From 0e98902865b4fc2a43233a1f3752be305afbdb07 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Fri, 20 Dec 2024 06:55:15 +0000 Subject: [PATCH 001/492] CP-52074: Add systemctl enable and disable API Signed-off-by: Bengang Yuan --- ocaml/forkexecd/lib/fe_systemctl.ml | 8 +++++-- ocaml/forkexecd/lib/fe_systemctl.mli | 3 +++ ocaml/xapi/xapi_systemctl.ml | 31 ++++++++++++++++++++++++---- ocaml/xapi/xapi_systemctl.mli | 10 +++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/ocaml/forkexecd/lib/fe_systemctl.ml b/ocaml/forkexecd/lib/fe_systemctl.ml index 046396002ca..97af823be67 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.ml +++ b/ocaml/forkexecd/lib/fe_systemctl.ml @@ -121,15 +121,19 @@ let stop ~service = Xapi_stdext_unix.Unixext.unlink_safe destination ; status -let is_active ~service = +let status ~command ~service = let status = Forkhelpers.safe_close_and_exec None None None [] systemctl - ["is-active"; "--quiet"; service] + [command; "--quiet"; service] |> Forkhelpers.waitpid |> snd in Unix.WEXITED 0 = status +let is_active ~service = status ~command:"is-active" ~service + +let is_enabled ~service = status ~command:"is-enabled" ~service + (** path to service file *) let path service = Filename.concat run_path (service ^ ".service") diff --git a/ocaml/forkexecd/lib/fe_systemctl.mli b/ocaml/forkexecd/lib/fe_systemctl.mli index 5ba44c4e290..908987f8432 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.mli +++ b/ocaml/forkexecd/lib/fe_systemctl.mli @@ -45,6 +45,9 @@ val start_transient : val is_active : service:string -> bool (** [is_active ~service] checks whether the [service] is still running *) +val is_enabled : service:string -> bool +(** [is_enabled ~service] checks whether the [service] is enabled *) + val show : service:string -> status (** [shows ~service] retrieves the exitcodes and PIDs of the specified [service] *) diff --git a/ocaml/xapi/xapi_systemctl.ml b/ocaml/xapi/xapi_systemctl.ml index 1dbca594a0c..448d565a266 100644 --- a/ocaml/xapi/xapi_systemctl.ml +++ b/ocaml/xapi/xapi_systemctl.ml @@ -19,7 +19,7 @@ module D = Debug.Make (struct let name = "xapi_systemctl" end) open D -type t = Start | Stop | Restart +type t = Start | Stop | Restart | Enable | Disable exception Systemctl_fail of string @@ -30,6 +30,10 @@ let to_string = function "stop" | Restart -> "restart" + | Enable -> + "enable" + | Disable -> + "disable" let perform ~wait_until_success ~service ~timeout op = let op_str = op |> to_string in @@ -42,8 +46,17 @@ let perform ~wait_until_success ~service ~timeout op = if wait_until_success then ( if op = Restart then Thread.delay 0.1 ; let is_active = Fe_systemctl.is_active ~service in + let is_enabled = Fe_systemctl.is_enabled ~service in let success_cond () = - match op with Start | Restart -> is_active | Stop -> is_active |> not + match op with + | Start | Restart -> + is_active + | Stop -> + is_active |> not + | Enable -> + is_enabled + | Disable -> + is_enabled |> not in try Helpers.retry_until_timeout ~timeout @@ -62,7 +75,17 @@ let restart ?(timeout = 5.) ~wait_until_success service = perform ~wait_until_success ~service ~timeout Restart let stop ?(timeout = 5.) ~wait_until_success service = - perform ~wait_until_success ~service ~timeout Stop + if Fe_systemctl.is_active ~service then + perform ~wait_until_success ~service ~timeout Stop let start ?(timeout = 5.) ~wait_until_success service = - perform ~wait_until_success ~service ~timeout Start + if not (Fe_systemctl.is_active ~service) then + perform ~wait_until_success ~service ~timeout Start + +let disable ?(timeout = 5.) ~wait_until_success service = + if Fe_systemctl.is_enabled ~service then + perform ~wait_until_success ~service ~timeout Disable + +let enable ?(timeout = 5.) ~wait_until_success service = + if not (Fe_systemctl.is_enabled ~service) then + perform ~wait_until_success ~service ~timeout Enable diff --git a/ocaml/xapi/xapi_systemctl.mli b/ocaml/xapi/xapi_systemctl.mli index 2660839f9b7..b552afebbbc 100644 --- a/ocaml/xapi/xapi_systemctl.mli +++ b/ocaml/xapi/xapi_systemctl.mli @@ -15,6 +15,10 @@ (* Exception about systemctl operation like start/stop failed *) exception Systemctl_fail of string +type t = Start | Stop | Restart | Enable | Disable + +val to_string : t -> string + (* start a service with systemctl *) val start : ?timeout:float -> wait_until_success:bool -> string -> unit @@ -23,3 +27,9 @@ val stop : ?timeout:float -> wait_until_success:bool -> string -> unit (* restart a service with systemctl *) val restart : ?timeout:float -> wait_until_success:bool -> string -> unit + +(* enable a service with systemctl *) +val enable : ?timeout:float -> wait_until_success:bool -> string -> unit + +(* disable a service with systemctl *) +val disable : ?timeout:float -> wait_until_success:bool -> string -> unit From cbab3b68da8142d6b6e23ed1e7128b0fd1651446 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Thu, 2 Jan 2025 16:54:10 +0800 Subject: [PATCH 002/492] CP-52074: Add enable and disable ssh API on host Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_errors.ml | 6 ++++++ ocaml/idl/datamodel_host.ml | 24 +++++++++++++++++++++++ ocaml/xapi-cli-server/cli_frontend.ml | 26 +++++++++++++++++++++++++ ocaml/xapi-cli-server/cli_operations.ml | 20 +++++++++++++++++++ ocaml/xapi-consts/api_errors.ml | 4 ++++ ocaml/xapi/message_forwarding.ml | 14 +++++++++++++ ocaml/xapi/xapi_host.ml | 20 +++++++++++++++++++ ocaml/xapi/xapi_host.mli | 4 ++++ 8 files changed, 118 insertions(+) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index fed2f830db1..4804905ef5a 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2010,6 +2010,12 @@ let _ = error Api_errors.too_many_groups [] ~doc:"VM can only belong to one group." () ; + error Api_errors.enable_ssh_failed ["host"] + ~doc:"Failed to enable SSH access." () ; + + error Api_errors.disable_ssh_failed ["host"] + ~doc:"Failed to disable SSH access." () ; + message (fst Api_messages.ha_pool_overcommitted) ~doc: diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 78b68a35722..737cfd83680 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -2338,6 +2338,28 @@ let emergency_clear_mandatory_guidance = ~doc:"Clear the pending mandatory guidance on this host" ~allowed_roles:_R_LOCAL_ROOT_ONLY () +let enable_ssh = + call ~name:"enable_ssh" + ~doc: + "Enable SSH access on the host. It will start the service sshd only if \ + it is not running. It will also enable the service sshd only if it is \ + not enabled. A newly joined host in the pool or an ejected host from \ + the pool would keep the original status." + ~lifecycle:[] + ~params:[(Ref _host, "self", "The host")] + ~allowed_roles:_R_POOL_ADMIN () + +let disable_ssh = + call ~name:"disable_ssh" + ~doc: + "Disable SSH access on the host. It will stop the service sshd only if \ + it is running. It will also disable the service sshd only if it is \ + enabled. A newly joined host in the pool or an ejected host from the \ + pool would keep the original status." + ~lifecycle:[] + ~params:[(Ref _host, "self", "The host")] + ~allowed_roles:_R_POOL_ADMIN () + let latest_synced_updates_applied_state = Enum ( "latest_synced_updates_applied_state" @@ -2494,6 +2516,8 @@ let t = ; set_https_only ; apply_recommended_guidances ; emergency_clear_mandatory_guidance + ; enable_ssh + ; disable_ssh ] ~contents: ([ diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 3de231f3cad..19655a1cb2a 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -1048,6 +1048,32 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [Host_selectors] } ) + ; ( "host-enable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Enable SSH access on the host. It will start the service sshd only \ + if it is not running. It will also enable the service sshd only if \ + it is not enabled. A newly joined host in the pool or an ejected \ + host from the pool would keep the original status." + ; implementation= No_fd Cli_operations.host_enable_ssh + ; flags= [Host_selectors] + } + ) + ; ( "host-disable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Disable SSH access on the host. It will stop the service sshd only \ + if it is running. It will also disable the service sshd only if it \ + is enabled. A newly joined host in the pool or an ejected host from \ + the pool would keep the original status." + ; implementation= No_fd Cli_operations.host_disable_ssh + ; flags= [Host_selectors] + } + ) ; ( "host-emergency-clear-mandatory-guidance" , { reqd= [] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 1e8ba0f3b37..6b928b47f8e 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -7729,6 +7729,26 @@ let host_apply_updates _printer rpc session_id params = params ["hash"] ) +let host_enable_ssh _printer rpc session_id params = + ignore + (do_host_op rpc session_id + (fun _ host -> + let host = host.getref () in + Client.Host.enable_ssh ~rpc ~session_id ~self:host + ) + params [] + ) + +let host_disable_ssh _printer rpc session_id params = + ignore + (do_host_op rpc session_id + (fun _ host -> + let host = host.getref () in + Client.Host.disable_ssh ~rpc ~session_id ~self:host + ) + params [] + ) + module SDN_controller = struct let introduce printer rpc session_id params = let port = diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 54bdd6f6660..57fcad842fe 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1403,3 +1403,7 @@ let telemetry_next_collection_too_late = let illegal_in_fips_mode = add_error "ILLEGAL_IN_FIPS_MODE" let too_many_groups = add_error "TOO_MANY_GROUPS" + +let enable_ssh_failed = add_error "ENABLE_SSH_FAILED" + +let disable_ssh_failed = add_error "DISABLE_SSH_FAILED" diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 63b27076a1a..6768ca7ea7a 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -4154,6 +4154,20 @@ functor let emergency_clear_mandatory_guidance ~__context = info "Host.emergency_clear_mandatory_guidance" ; Local.Host.emergency_clear_mandatory_guidance ~__context + + let enable_ssh ~__context ~self = + info "%s: host = '%s'" __FUNCTION__ (host_uuid ~__context self) ; + let local_fn = Local.Host.enable_ssh ~self in + do_op_on ~local_fn ~__context ~host:self (fun session_id rpc -> + Client.Host.enable_ssh ~rpc ~session_id ~self + ) + + let disable_ssh ~__context ~self = + info "%s: host = '%s'" __FUNCTION__ (host_uuid ~__context self) ; + let local_fn = Local.Host.disable_ssh ~self in + do_op_on ~local_fn ~__context ~host:self (fun session_id rpc -> + Client.Host.disable_ssh ~rpc ~session_id ~self + ) end module Host_crashdump = struct diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index cd6ae3a7d35..b543cfa63dc 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3127,3 +3127,23 @@ let emergency_clear_mandatory_guidance ~__context = info "%s: %s is cleared" __FUNCTION__ s ) ; Db.Host.set_pending_guidances ~__context ~self ~value:[] + +let enable_ssh ~__context ~self = + try + Xapi_systemctl.enable ~wait_until_success:false "sshd" ; + Xapi_systemctl.start ~wait_until_success:false "sshd" + with _ -> + raise + (Api_errors.Server_error + (Api_errors.enable_ssh_failed, [Ref.string_of self]) + ) + +let disable_ssh ~__context ~self = + try + Xapi_systemctl.disable ~wait_until_success:false "sshd" ; + Xapi_systemctl.stop ~wait_until_success:false "sshd" + with _ -> + raise + (Api_errors.Server_error + (Api_errors.disable_ssh_failed, [Ref.string_of self]) + ) diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index f8fe73f8379..a2ec3b31831 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -561,3 +561,7 @@ val set_https_only : __context:Context.t -> self:API.ref_host -> value:bool -> unit val emergency_clear_mandatory_guidance : __context:Context.t -> unit + +val enable_ssh : __context:Context.t -> self:API.ref_host -> unit + +val disable_ssh : __context:Context.t -> self:API.ref_host -> unit From 944a91d5c5a9025d32cb4a1d11467576d43b8d71 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Thu, 2 Jan 2025 17:05:06 +0800 Subject: [PATCH 003/492] CP-52074: Add enable and disable ssh API on pool Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_errors.ml | 6 +++++ ocaml/idl/datamodel_pool.ml | 20 +++++++++++++++ ocaml/sdk-gen/go/gen_go_helper.ml | 1 + ocaml/xapi-cli-server/cli_frontend.ml | 22 ++++++++++++++++ ocaml/xapi-cli-server/cli_operations.ml | 8 ++++++ ocaml/xapi-consts/api_errors.ml | 4 +++ ocaml/xapi/message_forwarding.ml | 8 ++++++ ocaml/xapi/xapi_pool.ml | 34 +++++++++++++++++++++++++ ocaml/xapi/xapi_pool.mli | 4 +++ 9 files changed, 107 insertions(+) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 4804905ef5a..954c9d7452e 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2016,6 +2016,12 @@ let _ = error Api_errors.disable_ssh_failed ["host"] ~doc:"Failed to disable SSH access." () ; + error Api_errors.enable_ssh_partially_failed ["hosts"] + ~doc:"Some of hosts failed to enable SSH access." () ; + + error Api_errors.disable_ssh_partially_failed ["hosts"] + ~doc:"Some of hosts failed to disable SSH access." () ; + message (fst Api_messages.ha_pool_overcommitted) ~doc: diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index ab0d1669788..6e468367132 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1539,6 +1539,24 @@ let get_guest_secureboot_readiness = ~result:(pool_guest_secureboot_readiness, "The readiness of the pool") ~allowed_roles:_R_POOL_OP () +let enable_ssh = + call ~name:"enable_ssh" + ~doc: + "Enable SSH access on all hosts in the pool. It's a helper which calls \ + host.enable_ssh for all the hosts in the pool." + ~lifecycle:[] + ~params:[(Ref _pool, "self", "The pool")] + ~allowed_roles:_R_POOL_ADMIN () + +let disable_ssh = + call ~name:"disable_ssh" + ~doc: + "Disable SSH access on all hosts in the pool. It's a helper which calls \ + host.disable_ssh for all the hosts in the pool." + ~lifecycle:[] + ~params:[(Ref _pool, "self", "The pool")] + ~allowed_roles:_R_POOL_ADMIN () + (** A pool class *) let t = create_obj ~in_db:true @@ -1633,6 +1651,8 @@ let t = ; set_ext_auth_cache_size ; set_ext_auth_cache_expiry ; get_guest_secureboot_readiness + ; enable_ssh + ; disable_ssh ] ~contents: ([ diff --git a/ocaml/sdk-gen/go/gen_go_helper.ml b/ocaml/sdk-gen/go/gen_go_helper.ml index 47540f55ef7..84b91260ae2 100644 --- a/ocaml/sdk-gen/go/gen_go_helper.ml +++ b/ocaml/sdk-gen/go/gen_go_helper.ml @@ -38,6 +38,7 @@ let acronyms = ; "db" ; "xml" ; "eof" + ; "ssh" ] |> StringSet.of_list diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 19655a1cb2a..82c642da64c 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -3131,6 +3131,28 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "pool-enable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Enable SSH access on all hosts in the pool. It's a helper which \ + calls host.enable_ssh for all the hosts in the pool." + ; implementation= No_fd Cli_operations.pool_enable_ssh + ; flags= [] + } + ) + ; ( "pool-disable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Disable SSH access on all hosts in the pool. It's a helper which \ + calls host.disable_ssh for all the hosts in the pool." + ; implementation= No_fd Cli_operations.pool_disable_ssh + ; flags= [] + } + ) ; ( "host-ha-xapi-healthcheck" , { reqd= [] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 6b928b47f8e..f8e1a0606ae 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -6779,6 +6779,14 @@ let pool_sync_bundle fd _printer rpc session_id params = | None -> failwith "Required parameter not found: filename" +let pool_enable_ssh _printer rpc session_id params = + let pool = get_pool_with_default rpc session_id params "uuid" in + Client.Pool.enable_ssh ~rpc ~session_id ~self:pool + +let pool_disable_ssh _printer rpc session_id params = + let pool = get_pool_with_default rpc session_id params "uuid" in + Client.Pool.disable_ssh ~rpc ~session_id ~self:pool + let host_restore fd _printer rpc session_id params = let filename = List.assoc "file-name" params in let op _ host = diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 57fcad842fe..9bd3937a89d 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1407,3 +1407,7 @@ let too_many_groups = add_error "TOO_MANY_GROUPS" let enable_ssh_failed = add_error "ENABLE_SSH_FAILED" let disable_ssh_failed = add_error "DISABLE_SSH_FAILED" + +let enable_ssh_partially_failed = add_error "ENABLE_SSH_PARTIALLY_FAILED" + +let disable_ssh_partially_failed = add_error "DISABLE_SSH_PARTIALLY_FAILED" diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 6768ca7ea7a..186516dcae5 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -1185,6 +1185,14 @@ functor let get_guest_secureboot_readiness ~__context ~self = info "%s: pool='%s'" __FUNCTION__ (pool_uuid ~__context self) ; Local.Pool.get_guest_secureboot_readiness ~__context ~self + + let enable_ssh ~__context ~self = + info "%s: pool = '%s'" __FUNCTION__ (pool_uuid ~__context self) ; + Local.Pool.enable_ssh ~__context ~self + + let disable_ssh ~__context ~self = + info "%s: pool = '%s'" __FUNCTION__ (pool_uuid ~__context self) ; + Local.Pool.disable_ssh ~__context ~self end module VM = struct diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 2f471932c14..14a830712c6 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3952,3 +3952,37 @@ let put_bundle_handler (req : Request.t) s _ = | None -> () ) + +module Ssh = struct + let operate ~__context ~action ~error = + let hosts = Db.Host.get_all ~__context in + Helpers.call_api_functions ~__context (fun rpc session_id -> + let failed_hosts = + List.fold_left + (fun failed_hosts host -> + try + action ~rpc ~session_id ~self:host ; + failed_hosts + with _ -> Ref.string_of host :: failed_hosts + ) + [] hosts + in + match failed_hosts with + | [] -> + () + | _ -> + raise (Api_errors.Server_error (error, failed_hosts)) + ) + + let enable ~__context ~self:_ = + operate ~__context ~action:Client.Host.enable_ssh + ~error:Api_errors.enable_ssh_partially_failed + + let disable ~__context ~self:_ = + operate ~__context ~action:Client.Host.disable_ssh + ~error:Api_errors.disable_ssh_partially_failed +end + +let enable_ssh = Ssh.enable + +let disable_ssh = Ssh.disable diff --git a/ocaml/xapi/xapi_pool.mli b/ocaml/xapi/xapi_pool.mli index 835a356f782..d17e667fbb1 100644 --- a/ocaml/xapi/xapi_pool.mli +++ b/ocaml/xapi/xapi_pool.mli @@ -434,3 +434,7 @@ val get_guest_secureboot_readiness : -> API.pool_guest_secureboot_readiness val put_bundle_handler : Http.Request.t -> Unix.file_descr -> 'a -> unit + +val enable_ssh : __context:Context.t -> self:API.ref_pool -> unit + +val disable_ssh : __context:Context.t -> self:API.ref_pool -> unit From 43729fafd06c207054c4037619baf22a9a23175d Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 17 Mar 2024 10:37:24 +0000 Subject: [PATCH 004/492] IH-533: Remove usage of forkexecd daemon to execute processes Forkexecd was written to avoid some issues with Ocaml and multi-threading. Instead use C code to launch processes and avoid these issues. Interface remains unchanged from Ocaml side but implementation rely entirely on C code. vfork() is used to avoid performance memory issue. Reap of the processes are done directly. Code automatically reap child processes to avoid zombies. One small helper is used to better separate Ocaml and C code and handling syslog redirection. This allows to better debug in case of issues. Syslog handling is done in a separate process allowing to restart the toolstack and keep launched programs running; note that even with forkexecd daemon one process was used for this purpose. Code tries to keep compatibility with forkexecd, in particular: - SIGPIPE is ignored in the parent; - /dev/null is open with O_WRONLY even for stdin; - file descriptors are limited to 1024. We use close_range (if available) to reduce system calls to close file descriptors. Cgroup is set to avoid systemd closing processes on toolstack restart. There's a fuzzer program to check file remapping algorithm; for this reason the algorithm is in a separate file. To turn internal debug on you need to set FORKEXECD_DEBUG_LOGS C preprocessor macro to 1. Signed-off-by: Frediano Ziglio --- Makefile | 4 +- ocaml/forkexecd/dune | 21 ++ ocaml/forkexecd/helper/Makefile | 33 ++ ocaml/forkexecd/helper/algo_fuzzer.c | 246 ++++++++++++++ ocaml/forkexecd/helper/close_from.c | 86 +++++ ocaml/forkexecd/helper/close_from.h | 19 ++ ocaml/forkexecd/helper/logs.c | 149 +++++++++ ocaml/forkexecd/helper/logs.h | 70 ++++ ocaml/forkexecd/helper/redirect_algo.h | 210 ++++++++++++ ocaml/forkexecd/helper/syslog.c | 101 ++++++ ocaml/forkexecd/helper/syslog.h | 21 ++ ocaml/forkexecd/helper/vfork_helper.c | 446 +++++++++++++++++++++++++ ocaml/forkexecd/helper/vfork_helper.h | 23 ++ ocaml/forkexecd/lib/dune | 8 +- ocaml/forkexecd/lib/fe_stubs.c | 416 +++++++++++++++++++++++ ocaml/forkexecd/lib/forkhelpers.ml | 173 ++++++++-- ocaml/forkexecd/test/dune | 2 +- ocaml/forkexecd/test/fe_test.sh | 8 +- ocaml/forkexecd/test/syslog.c | 64 ++++ ocaml/libs/stunnel/stunnel.ml | 5 - 20 files changed, 2065 insertions(+), 40 deletions(-) create mode 100644 ocaml/forkexecd/dune create mode 100644 ocaml/forkexecd/helper/Makefile create mode 100644 ocaml/forkexecd/helper/algo_fuzzer.c create mode 100644 ocaml/forkexecd/helper/close_from.c create mode 100644 ocaml/forkexecd/helper/close_from.h create mode 100644 ocaml/forkexecd/helper/logs.c create mode 100644 ocaml/forkexecd/helper/logs.h create mode 100644 ocaml/forkexecd/helper/redirect_algo.h create mode 100644 ocaml/forkexecd/helper/syslog.c create mode 100644 ocaml/forkexecd/helper/syslog.h create mode 100644 ocaml/forkexecd/helper/vfork_helper.c create mode 100644 ocaml/forkexecd/helper/vfork_helper.h create mode 100644 ocaml/forkexecd/lib/fe_stubs.c diff --git a/Makefile b/Makefile index 7f7386bf6b1..8b7a19bcd8d 100644 --- a/Makefile +++ b/Makefile @@ -153,7 +153,7 @@ DUNE_IU_PACKAGES1+=xapi-client xapi-schema xapi-consts xapi-cli-protocol xapi-da DUNE_IU_PACKAGES1+=xen-api-client xen-api-client-lwt rrdd-plugin rrd-transport DUNE_IU_PACKAGES1+=gzip http-lib pciutil sexpr stunnel uuid xml-light2 zstd xapi-compression safe-resources DUNE_IU_PACKAGES1+=message-switch message-switch-cli message-switch-core message-switch-lwt -DUNE_IU_PACKAGES1+=message-switch-unix xapi-idl forkexec xapi-forkexecd xapi-storage xapi-storage-script xapi-storage-cli +DUNE_IU_PACKAGES1+=message-switch-unix xapi-idl xapi-forkexecd xapi-storage xapi-storage-script xapi-storage-cli DUNE_IU_PACKAGES1+=xapi-nbd varstored-guard xapi-log xapi-open-uri xapi-tracing xapi-tracing-export xapi-expiry-alerts cohttp-posix DUNE_IU_PACKAGES1+=xapi-rrd xapi-inventory clock xapi-sdk DUNE_IU_PACKAGES1+=xapi-stdext-date xapi-stdext-encodings xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads xapi-stdext-unix xapi-stdext-zerocheck xapi-tools @@ -173,7 +173,7 @@ DUNE_IU_PACKAGES3=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(OPTDIR) --libdir=$( install-dune3: dune install $(DUNE_IU_PACKAGES3) -DUNE_IU_PACKAGES4=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(PREFIX) --libdir=$(LIBDIR) --libexecdir=/usr/libexec --mandir=$(MANDIR) vhd-tool +DUNE_IU_PACKAGES4=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(PREFIX) --libdir=$(LIBDIR) --libexecdir=/usr/libexec --mandir=$(MANDIR) vhd-tool forkexec install-dune4: dune install $(DUNE_IU_PACKAGES4) diff --git a/ocaml/forkexecd/dune b/ocaml/forkexecd/dune new file mode 100644 index 00000000000..40d4a7eb7c6 --- /dev/null +++ b/ocaml/forkexecd/dune @@ -0,0 +1,21 @@ +(data_only_dirs helper) + +(rule + (deps (source_tree helper)) + (targets vfork_helper) + (package forkexec) + (action + (no-infer + (progn + (chdir helper (run make)) + (copy helper/vfork_helper vfork_helper) + ) + ) + ) +) + +(install + (package forkexec) + (section libexec_root) + (files (vfork_helper as xapi/vfork_helper)) +) diff --git a/ocaml/forkexecd/helper/Makefile b/ocaml/forkexecd/helper/Makefile new file mode 100644 index 00000000000..2bfc3b07e35 --- /dev/null +++ b/ocaml/forkexecd/helper/Makefile @@ -0,0 +1,33 @@ +## Set some macro but not override environment ones +CFLAGS ?= -O2 -g -Wall -Werror +LDFLAGS ?= + +all:: vfork_helper + +clean:: + rm -f vfork_helper *.o + +%.o: %.c + $(CC) $(CFLAGS) -MMD -MP -MF $@.d -c -o $@ $< + +vfork_helper: vfork_helper.o close_from.o syslog.o + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -pthread + +-include $(wildcard *.o.d) + +## Fuzzer uses AFL (American Fuzzy Lop). +## +## Use "make fuzz" to build and launch the fuzzer +## +## Use "make show" to look at the first failures (if found). + +fuzz:: + afl-gcc $(CFLAGS) -Wall -Werror -o algo_fuzzer algo_fuzzer.c + rm -rf testcase_dir + mkdir testcase_dir + echo maomaoamaoaoao > testcase_dir/test1 + rm -rf findings_dir/ + afl-fuzz -i testcase_dir -o findings_dir -D -- ./algo_fuzzer + +show:: + cat "$$(ls -1 findings_dir/default/crashes/id* | head -1)" | ./algo_fuzzer diff --git a/ocaml/forkexecd/helper/algo_fuzzer.c b/ocaml/forkexecd/helper/algo_fuzzer.c new file mode 100644 index 00000000000..97eabd48d2c --- /dev/null +++ b/ocaml/forkexecd/helper/algo_fuzzer.c @@ -0,0 +1,246 @@ + +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#undef NDEBUG +#define DEBUG 1 + +#if DEBUG +#define log(fmt, ...) printf(fmt "\n", ##__VA_ARGS__) +#else +#define log(fmt, ...) do {} while(0) +#endif + +// include as first file to make sure header is self contained +#include "redirect_algo.h" + +#include +#include +#include +#include +#include +#include +#include + +static int fake_close(int fd); + +typedef struct { + bool open; + bool cloexec; + char *name; +} fd; + +#define NUM_FDS 4096 +static fd fds[NUM_FDS]; + +static bool +fake_close_fds_from(int fd_from) +{ + for (int fd = fd_from; fd < NUM_FDS; ++fd) + fake_close(fd); + + return true; +} + +#define O_WRONLY 1 +static int +fake_open(const char *fn, int dummy) +{ + for (int i = 0; i < NUM_FDS; ++i) + if (!fds[i].open) { + assert(fds[i].name == NULL); + fds[i].name = strdup(fn); + fds[i].open = true; + fds[i].cloexec = false; + return i; + } + assert(0); + return -1; +} + +static int +fake_close(int fd) +{ + assert(fd >= 0); + assert(fd < NUM_FDS); + if (!fds[fd].open) { + errno = EBADF; + return -1; + } + fds[fd].open = false; + free(fds[fd].name); + fds[fd].name = NULL; + return 0; +} + +static int +fake_dup2(int from, int to) +{ + assert(from >= 0 && from < NUM_FDS); + assert(to >= 0 && to < NUM_FDS); + assert(fds[from].open); + assert(from != to); + free(fds[to].name); + fds[to].open = true; + fds[to].name = strdup(fds[from].name); + fds[to].cloexec = false; + return 0; +} + +static int +fake_fcntl(int fd) +{ + assert(fd >= 0 && fd < NUM_FDS); + assert(fds[fd].open); + fds[fd].cloexec = false; + return 0; +} + +int main(int argc, char **argv) +{ + // Input where a given FD goes?? + // No, not enough, can be duplicated. + // Numbers >4096 in 2 bytes not file descriptor, + // (-1 for standard, skip for normal). + // We should add some random fds. + enum { MAX_FILE_BUF = 2048 }; + uint16_t file_buf[MAX_FILE_BUF]; + size_t read = fread(file_buf, 2, MAX_FILE_BUF, stdin); + if (read < 3) + return 0; + + static const char standard_names[][8] = { + "stdin", "stdout", "stderr" + }; + int num_mappings = 0; + uint16_t *num = file_buf; + mapping mappings[MAX_FILE_BUF]; + int i = 0; + for (i = 0; i < 3; ++i) { + mapping *m = &mappings[num_mappings++]; + m->uuid = standard_names[i]; + uint16_t n = *num++; + m->current_fd = n < NUM_FDS ? n : -1; + m->wanted_fd = i; + } + for (; i < read; ++i) { + uint16_t n = *num++; + if (n >= NUM_FDS) + continue; + + mapping *m = &mappings[num_mappings++]; + m->current_fd = n; + m->wanted_fd = -1; + char buf[64]; + sprintf(buf, "file%d", i); + m->uuid = strdup(buf); + } + if (num_mappings > MAX_TOTAL_MAPPINGS) + return 0; + + for (unsigned n = 0; n < num_mappings; ++n) { + mapping *m = &mappings[n]; + int fd = m->current_fd; + if (fd < 0) + continue; + fake_close(fd); + fds[fd].open = true; + fds[fd].name = strdup(m->uuid); + fds[fd].cloexec = true; + } + + // Check in the final file mapping all valid mappings + // have an open file descriptor. + // There should be no duplicate numbers in current_fd. + // current_fd must be in a range. + // Only if wanted_fd >= 0 current_fd can be -1. + // There should be a correspondance between input and output names. + // If current_fd was -1 it will still be -1. + // If wanted_fd >= 0 current_fd should be the same. + + fd_operation operations[MAX_OPERATIONS]; + int num_operations = + redirect_mappings(mappings, num_mappings, operations); + assert(num_operations > 0); + assert(num_operations <= MAX_OPERATIONS); + + for (int i = 0; i < num_operations; ++i) { + const fd_operation* op = &operations[i]; + log("op %d %d %d", op->fd_from, op->fd_to, op->operation); + switch (op->operation) { + case FD_OP_DUP: + if (op->fd_from == op->fd_to) + fake_fcntl(op->fd_from); + else + fake_dup2(op->fd_from, op->fd_to); + break; + case FD_OP_MOVE: + assert(op->fd_from != op->fd_to); + fake_dup2(op->fd_from, op->fd_to); + fake_close(op->fd_from); + break; + case FD_OP_DEVNULL: + // first close old, then create new one + fake_close(op->fd_to); + // TODO ideally we want read only for input for Ocaml did the same... + assert(fake_open("/dev/null", O_WRONLY) == op->fd_to); + break; + case FD_OP_CLOSE_FROM: + fake_close_fds_from(op->fd_from); + break; + default: + assert(0); + } + } + + // check files opened + for (int fd = 0; fd < NUM_FDS; ++fd) + assert(fds[fd].open == (fd < num_mappings)); + + for (int fd = 0; fd < num_mappings; ++fd) { + assert(fds[fd].cloexec == false); + log("file %d %s", fd, fds[fd].name); + } + + // Check in the final file mapping all valid mappings + // has an open file descriptor. + bool already_found[NUM_FDS] = { false, }; + for (unsigned n = 0; n < num_mappings; ++n) { + const int fd = mappings[n].current_fd; + const int wanted = mappings[n].wanted_fd; + if (fd >= 0) { + assert(fd < NUM_FDS); + assert(fds[fd].open); + + // There should be no duplicate numbers in current_fd. + assert(!already_found[fd]); + already_found[fd] = true; + } else { + // Only if wanted_fd >= 0 current_fd can be -1. + assert(mappings[n].wanted_fd >= 0); + assert(fd == -1); + } + + // If wanted_fd >= 0 current_fd should be the same. + if (wanted >= 0) + assert(wanted == fd || fd == -1); + + // current_fd must be in a range. + assert(fd >= -1); + assert(fd < num_mappings); + } + + // There should be a correspondance between input and output names. + // If current_fd was -1 it will still be -1. +} diff --git a/ocaml/forkexecd/helper/close_from.c b/ocaml/forkexecd/helper/close_from.c new file mode 100644 index 00000000000..14e724109df --- /dev/null +++ b/ocaml/forkexecd/helper/close_from.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "close_from.h" + +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#endif + +// try to use close_range on Linux even if not defined by headers +#if defined(__linux__) && !defined(SYS_close_range) +# if defined(__alpha__) +# define SYS_close_range 546 +# elif defined(__amd64__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__hppa__) || defined(__i386__) || \ + defined(__ia64__) || defined(__m68k__) || defined(__mips__) || \ + defined(__powerpc__) || defined(__powerpc64__) || defined(__sparc__) || \ + defined(__s390x__) +# define SYS_close_range 436 +# endif +#endif + +bool +close_fds_from(int fd_from) +{ + // first method, use close_range +#if (defined(__linux__) && defined(SYS_close_range)) \ + || (defined(__FreeBSD__) && defined(CLOSE_RANGE_CLOEXEC)) + static bool close_range_supported = true; + if (close_range_supported) { +#if defined(__linux__) + if (syscall(SYS_close_range, fd_from, ~0U, 0) == 0) +#else + if (close_range(fd_from, ~0U, 0) == 0) +#endif + return true; + + if (errno == ENOSYS) + close_range_supported = false; + } +#endif + + // second method, read fds list from /proc + DIR *dir = opendir("/proc/self/fd"); + if (dir) { + const int dir_fd = dirfd(dir); + struct dirent *ent; + while ((ent = readdir(dir)) != NULL) { + char *end = NULL; + unsigned long fd = strtoul(ent->d_name, &end, 10); + if (end == NULL || *end) + continue; + if (fd >= fd_from && fd != dir_fd) + close(fd); + } + closedir(dir); + return true; + } + + // third method, use just a loop + struct rlimit limit; + if (getrlimit(RLIMIT_NOFILE, &limit) < 0) + return false; + for (int fd = fd_from; fd < limit.rlim_cur; ++ fd) + close(fd); + + return true; +} diff --git a/ocaml/forkexecd/helper/close_from.h b/ocaml/forkexecd/helper/close_from.h new file mode 100644 index 00000000000..2d1ae77e527 --- /dev/null +++ b/ocaml/forkexecd/helper/close_from.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#pragma once + +#include + +bool close_fds_from(int fd); diff --git a/ocaml/forkexecd/helper/logs.c b/ocaml/forkexecd/helper/logs.c new file mode 100644 index 00000000000..2f7fab23c2f --- /dev/null +++ b/ocaml/forkexecd/helper/logs.c @@ -0,0 +1,149 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "logs.h" + +#if FORKEXECD_DEBUG_LOGS + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define FILE_SIZE (32 * 1024) + +struct priv_mapped_logs { + uint32_t size; + + // Flags, we use characters instead of binary so + // easily see them easily with different tools. + char flags[4]; + char filename[64]; + pid_t pid; + int num; +}; + +// flags order +enum { SUCCESS, FAILURE }; + +mapped_logs mapped_logs_open(void) +{ + static int last_num = 0; + + // create a mapped file with a given size, will write header as structure + // and update using memory + mkdir("/tmp/fe_repl", 0755); + + char tmpl[] = "/tmp/fe_repl/logXXXXXX"; + int fd = mkstemp(tmpl); + if (!fd) + caml_raise_out_of_memory(); + + if (ftruncate(fd, FILE_SIZE) < 0) { + close(fd); + caml_raise_out_of_memory(); + } + + priv_mapped_logs *l = mmap(NULL, FILE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (l == MAP_FAILED) { + close(fd); + caml_raise_out_of_memory(); + } + close(fd); + + l->size = sizeof(*l); + memcpy(l->flags, "____", 4); + strncpy(l->filename, tmpl, sizeof(l->filename)); + l->pid = getpid(); + l->num = ++last_num; + + return (mapped_logs){l}; +} + +#define DEFINE_RANGE(start, end) \ + char *start = (char*) logs.priv + sizeof(priv_mapped_logs); \ + char *const end = (char*) logs.priv + FILE_SIZE + +void mapped_logs_close(mapped_logs logs) +{ + if (!logs.priv) + return; + DEFINE_RANGE(start, end); + bool written = false; + bool success = logs.priv->flags[FAILURE] == '_' && logs.priv->flags[SUCCESS] != '_'; + if (!success) { + FILE *f = fopen("/tmp/fe_repl/all_logs", "a"); + if (f) { + end[-1] = 0; + size_t len = strlen(start); + written = (fwrite(start, 1, len, f) == len); + fclose(f); + } + } + if (written || success) + unlink(logs.priv->filename); + munmap(logs.priv, FILE_SIZE); +} + +void mapped_logs_failure(mapped_logs logs) +{ + if (!logs.priv) + return; + logs.priv->flags[FAILURE] = 'F'; +} + +void mapped_logs_success(mapped_logs logs) +{ + if (!logs.priv) + return; + logs.priv->flags[SUCCESS] = 'S'; +} + +void mapped_logs_add(mapped_logs logs, const char *fmt, ...) +{ + if (!logs.priv) + return; + int save_errno = errno; + DEFINE_RANGE(start, end); + start += strlen(start); + if (start >= end -1) { + errno = save_errno; + return; // no more space + } + size_t len = end - start; + int l = snprintf(start, len, "%d:%d ", (int) logs.priv->pid, logs.priv->num); + if (l >= len) { + errno = save_errno; + return; + } + start += l; + len -= l; + va_list ap; + va_start(ap, fmt); + vsnprintf(start, len, fmt, ap); + va_end(ap); + + errno = save_errno; +} +#endif diff --git a/ocaml/forkexecd/helper/logs.h b/ocaml/forkexecd/helper/logs.h new file mode 100644 index 00000000000..b9f396d6c34 --- /dev/null +++ b/ocaml/forkexecd/helper/logs.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +// Definitions to write logs into memory mapped objects. +// We use a memory mapped object here because we close file descriptors +// so writing to file using them would cause logs to be lost. + +#pragma once + +#if !defined(FORKEXECD_DEBUG_LOGS) +#define FORKEXECD_DEBUG_LOGS 0 +#endif + +#if (FORKEXECD_DEBUG_LOGS) != 0 && (FORKEXECD_DEBUG_LOGS) != 1 +#error Expected FORKEXECD_DEBUG_LOGS to be defined either 0 or 1 +#endif + +typedef struct priv_mapped_logs priv_mapped_logs; +typedef struct mapped_logs mapped_logs; + +#if FORKEXECD_DEBUG_LOGS +struct mapped_logs { + priv_mapped_logs *priv; +}; +#define NULL_MAPPED_LOGS ((mapped_logs){0}) +mapped_logs mapped_logs_open(void); +void mapped_logs_close(mapped_logs logs); + +// Add a log entry, similar to printf. +void mapped_logs_add(mapped_logs logs, const char *fmt, ...); + +// Mark as failed, any failure will keep the log. +void mapped_logs_failure(mapped_logs logs); + +// Mark as successful, if successful and no failure during +// execution the log will be removed. +void mapped_logs_success(mapped_logs logs); +#else +// Use an empty structure, compiler will strip it passing +// it as a parameter without the needs to change the source +// code. +struct mapped_logs {}; +#define NULL_MAPPED_LOGS ((mapped_logs){}) +static inline mapped_logs mapped_logs_open(void) { + return (mapped_logs){}; +} + +static inline void mapped_logs_close(mapped_logs logs) { +} + +static inline void mapped_logs_failure(mapped_logs logs) { +} + +static inline void mapped_logs_success(mapped_logs logs) { +} + +#define mapped_logs_add(...) \ + do {} while(0) +#endif diff --git a/ocaml/forkexecd/helper/redirect_algo.h b/ocaml/forkexecd/helper/redirect_algo.h new file mode 100644 index 00000000000..d86978cd351 --- /dev/null +++ b/ocaml/forkexecd/helper/redirect_algo.h @@ -0,0 +1,210 @@ +/* Algorithm used to remap file handles before executing a process. + * The algorithm is separated in a different file in order to reuse for + * fuzzing it. + */ + +#pragma once + +#if !defined(DEBUG) +#define DEBUG 0 +#endif + +#if (DEBUG) != 0 && (DEBUG) != 1 +#error Expected DEBUG to be defined either 0 or 1 +#endif + +#ifndef log +#error Expected log macro to be defined +#endif + +#include +#include +#include + +typedef struct { + const char *uuid; + int current_fd; + int wanted_fd; +} mapping; + +typedef struct { + // source file + int fd_from; + // destination file + short fd_to; + // see FD_OP_ constants + uint8_t operation; +} fd_operation; + +typedef enum { + // Duplicate from fd_from to fd_to. + // If fd_from is the same as fd_to remove FD_CLOEXEC flag. + FD_OP_DUP, + // Duplicate from fd_from to fd_to and close fd_from. + FD_OP_MOVE, + // Open /dev/null on fd_to. + FD_OP_DEVNULL, + // Close from fd_from to the sky! + FD_OP_CLOSE_FROM, +} FD_OP; + +#define MAX_OPERATIONS 1024 +#define MAX_TOTAL_MAPPINGS (MAX_OPERATIONS - 4) + +static uint16_t remap_fds(mapping *const mappings, unsigned num_mappings, int from, int to); + +// Given the passed mappings update them (current_fd) and returns the +// requested operations to do the job. +// First 3 mappings should refer to standard file descriptors (stdin, +// stdout, stderr). +// Returns the number of operations to perform or negative if error. +static int +redirect_mappings(mapping *const mappings, const unsigned num_mappings, fd_operation *operations) +{ + mapping *const end_mappings = mappings + num_mappings; + uint16_t used_fds[MAX_OPERATIONS] = {0,}; + fd_operation *ops = operations; + +#define DUMP_MAPPINGS do { \ + if (DEBUG) { \ + for (unsigned i = 0; i < num_mappings; ++i) { \ + const mapping *m __attribute__((unused)) = &mappings[i]; \ + log("mapping %s %d %d", m->uuid, m->current_fd, m->wanted_fd); \ + } \ + char lbuf[MAX_OPERATIONS* 16]; \ + lbuf[0] = 0; \ + for (int i = 0; i < MAX_OPERATIONS; ++i) { \ + if (used_fds[i]) \ + sprintf(strchr(lbuf, 0), "%d=%d,", i, used_fds[i]); \ + } \ + log("used %s", lbuf); \ + } \ +} while(0); + + log("handle"); + + // parse all mappings + standard fds, mark ones using range 0-MAX_OPERATIONS + for (mapping *m = mappings; m < end_mappings; ++m) { + if (m->current_fd < 0 || m->current_fd >= MAX_OPERATIONS) + continue; + used_fds[m->current_fd]++; + } + DUMP_MAPPINGS; + + // Move standard file descriptors out of the way. + // Maximum 3 operations. + log("move standard fds away"); + for (mapping *m = mappings; m < end_mappings; ++m) { + const int current_fd = m->current_fd; + if (current_fd < 0 || current_fd > 2) + continue; + // find first available fd to use + int fd = 3; + while (used_fds[fd]) + ++fd; + *ops++ = (fd_operation){ current_fd, fd, FD_OP_DUP }; + uint16_t changed = remap_fds(mappings, num_mappings, current_fd, fd); + log("changed %d from %d to %d", changed, current_fd, fd); + used_fds[current_fd] = 0; + used_fds[fd] = changed; + } + DUMP_MAPPINGS; + + // Move standard fds into proper positions + // Maximum 3 operations (standard fds to be moved). + log("move standard fds correctly"); + for (mapping *m = mappings; m < end_mappings; ++m) { + const int current_fd = m->current_fd; + if (current_fd < 0 || m->wanted_fd < 0) + continue; + int fd = m->wanted_fd; + FD_OP op = FD_OP_DUP; + if (current_fd >= num_mappings) { + // move + op = FD_OP_MOVE; + uint16_t changed = remap_fds(mappings, num_mappings, current_fd, fd); + log("changed %d from %d to %d", changed, current_fd, fd); + used_fds[fd] = changed; + } else { + // duplicate + m->current_fd = fd; + if (--used_fds[current_fd] == 0) + op = FD_OP_MOVE; + used_fds[fd] = 1; + } + *ops++ = (fd_operation){ current_fd, fd, op }; + } + DUMP_MAPPINGS; + + // Remove cloexec on range [3, 3 + num mappings). + // Maximum no standard mappings operations. + log("remove cloexec flags"); + for (int fd = 3; fd < num_mappings; ++fd) { + if (!used_fds[fd]) + continue; + log("remove cloexec from %d", fd); + *ops++ = (fd_operation){ fd, fd, FD_OP_DUP }; + } + DUMP_MAPPINGS; + + // Move all fds left in range [3, 3 + num mappings). + // Maximum no standard mapping operations; then sum with + // the above is the no standard mapping operations. + log("move all fds left in range"); + int last_free = 3; + for (mapping *m = mappings; m < end_mappings; ++m) { + const int current_fd = m->current_fd; + if (m->wanted_fd >= 0) + continue; + if (current_fd < num_mappings && used_fds[current_fd] == 1) + continue; + while (used_fds[last_free]) + ++last_free; + int fd = last_free; + // TODO copied from above + FD_OP op = FD_OP_DUP; + if (current_fd >= num_mappings) { + // move + op = FD_OP_MOVE; + uint16_t changed = remap_fds(mappings, num_mappings, current_fd, fd); + log("changed %d from %d to %d", changed, current_fd, fd); + used_fds[fd] = changed; + } else { + // duplicate + m->current_fd = fd; + if (--used_fds[current_fd] == 0) + op = FD_OP_MOVE; + used_fds[fd] = 1; + } + *ops++ = (fd_operation){ current_fd, fd, op }; + } + DUMP_MAPPINGS; + + // Close extra fds. + *ops++ = (fd_operation){ num_mappings, 0, FD_OP_CLOSE_FROM }; + + // Create missing standard fds. + // Maximum standard mapping operations, but not the above, + // so the sum with move the standard is 3. + for (int fd = 0; fd < 3; ++fd) { + if (used_fds[fd]) + continue; + *ops++ = (fd_operation){ fd, fd, FD_OP_DEVNULL }; + } + + return ops - operations; +} + +static uint16_t +remap_fds(mapping *const mappings, unsigned num_mappings, int from, int to) +{ + uint16_t res = 0; + for (unsigned i = 0; i < num_mappings; ++i) { + mapping *m = &mappings[i]; + if (m->current_fd == from) { + m->current_fd = to; + res++; + } + } + return res; +} diff --git a/ocaml/forkexecd/helper/syslog.c b/ocaml/forkexecd/helper/syslog.c new file mode 100644 index 00000000000..bf584370920 --- /dev/null +++ b/ocaml/forkexecd/helper/syslog.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "syslog.h" + +#include +#include +#include + +static inline bool ocaml_isprint(const char c) +{ + return c >= ' ' && c < 0x7f; +} + +static inline size_t quoted_length(const char c) +{ + return c == '\\' ? 2 : + ocaml_isprint(c) ? 1 : + 4; +} + +static const char hex[] = "0123456789ABCDEF"; + +static inline void write_quoted(char *const p, const char c) +{ + if (c == '\\') { + p[0] = p[1] = c; + } else if (ocaml_isprint(c)) { + p[0] = c; + } else { + p[0] = '\\'; + p[1] = 'x'; + p[2] = hex[(c>>4)&0xf]; + p[3] = hex[c&0xf]; + } +} + +static void syslog_line(const char *line, const char *key, int child_pid) +{ + syslog(LOG_DAEMON|LOG_INFO, "%s[%d]: %s", key, child_pid, line); +} + +// Quote and forward every line from "fd" to the syslog. +// "fd" will be closed. +bool forward_to_syslog(int fd, const char *key, int child_pid) +{ +#define syslog_line(line) syslog_line(line, key, child_pid) + FILE *f = fdopen(fd, "r"); + char quoted_buf[64000]; + char *dest = quoted_buf; + char *const dest_end = quoted_buf + sizeof(quoted_buf) - sizeof(" ...") - 1; + bool overflowed = false; + while (true) { + int ch = getc_unlocked(f); + + if (!overflowed && dest != quoted_buf && (ch == '\n' || ch == EOF)) { + *dest = 0; + syslog_line(quoted_buf); + } + + if (ch == EOF) { + bool res = !!feof(f); + fclose(f); + return res; + } + + if (ch == '\n') { + overflowed = false; + dest = quoted_buf; + continue; + } + + if (overflowed) + continue; + + const size_t quoted_len = quoted_length(ch); + if (dest + quoted_len >= dest_end) { + strcpy(dest, " ..."); + syslog_line(quoted_buf); + overflowed = true; + continue; + } + write_quoted(dest, ch); + dest += quoted_len; + } +} diff --git a/ocaml/forkexecd/helper/syslog.h b/ocaml/forkexecd/helper/syslog.h new file mode 100644 index 00000000000..5ce466bc418 --- /dev/null +++ b/ocaml/forkexecd/helper/syslog.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#pragma once + +#include + +// Quote and forward every line from "fd" to the syslog. +// "fd" will be closed. +bool forward_to_syslog(int fd, const char *key, int child_pid); diff --git a/ocaml/forkexecd/helper/vfork_helper.c b/ocaml/forkexecd/helper/vfork_helper.c new file mode 100644 index 00000000000..434afba6126 --- /dev/null +++ b/ocaml/forkexecd/helper/vfork_helper.c @@ -0,0 +1,446 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "close_from.h" +#include "syslog.h" +#include "logs.h" +#include "vfork_helper.h" + +#define log(...) do {} while(0) +#include "redirect_algo.h" + +typedef struct { + char **args; + mapping *mappings; + fd_operation operations[MAX_OPERATIONS]; + int err; + const char *err_func; +} exec_info; + +static void adjust_args(char **args, mapping *const mappings, unsigned num_mappings); +static void reset_signal_handlers(void); +static void clear_cgroup(void); +static const char *get_arg(int *argc, char ***argv); +static int get_fd(int *argc, char ***argv); +static void error(int err, const char *msg, ...); +static void init_syslog(const char *key, bool redirect_stderr_to_stdout); + +static int error_fd = -1; + +int main(int argc, char **argv) +{ + unsigned num_mappings = 3; + bool redirect_stderr_to_stdout = false; + const char *key = NULL; + struct rlimit nofile_limit; + mapping mappings_buf[MAX_TOTAL_MAPPINGS]; + exec_info info[1] = { NULL, }; + const char *directory = "/"; + + mapped_logs logs = mapped_logs_open(); +#undef log +#define log(fmt, ...) mapped_logs_add(logs, fmt "\n", ## __VA_ARGS__) +#define log_fail(fmt, ...) do {\ + mapped_logs_failure(logs); \ + mapped_logs_add(logs, fmt "\n", ## __VA_ARGS__); \ +} while(0) + + log("starting"); + + info->mappings = mappings_buf; + for (int i = 0; i < 3; ++i) { + mapping *const m = &info->mappings[i]; + m->uuid = NULL; + m->current_fd = -1; + m->wanted_fd = i; + } + + // Scan all arguments, check them and collect some information. + ++argv; + --argc; + for (;;) { + // we must have an argument left + const char *arg = get_arg(&argc, &argv); + + // next must be a single letter option + if (arg[0] != '-' || arg[1] == 0 || arg[2] != 0) { + log_fail("invalid option %s", arg); + mapped_logs_close(logs); + error(EINVAL, "Invalid option %s", arg); + } + + // final "--" + if (arg[1] == '-') + break; + + switch (arg[1]) { + case 'I': // stdin + info->mappings[0].current_fd = get_fd(&argc, &argv); + break; + case 'O': // stdout + info->mappings[1].current_fd = get_fd(&argc, &argv); + break; + case 'E': // stderr + info->mappings[2].current_fd = get_fd(&argc, &argv); + break; + case 'm': { // mapping + if (num_mappings >= MAX_TOTAL_MAPPINGS) { + log_fail("too many mappings"); + mapped_logs_close(logs); + error(EINVAL, "Too many mappings"); + } + const char *uuid = get_arg(&argc, &argv); + if (strlen(uuid) != 36) { + log_fail("invalid mapping"); + mapped_logs_close(logs); + error(EINVAL, "Invalid mapping UUID"); + } + const int fd = get_fd(&argc, &argv); + mapping* const m = &info->mappings[num_mappings++]; + m->uuid = uuid; + m->current_fd = fd; + m->wanted_fd = -1; + } + break; + case 's': // syslog (with key) + key = get_arg(&argc, &argv); + break; + case 'S': // syslog stderr to stdout + redirect_stderr_to_stdout = true; + break; + case 'd': + directory = get_arg(&argc, &argv); + break; + case 'e': { // error file descriptor + error_fd = get_fd(&argc, &argv); + if (num_mappings >= MAX_TOTAL_MAPPINGS) { + log_fail("too many mappings"); + mapped_logs_close(logs); + error(EINVAL, "Too many mappings"); + } + mapping* const m = &info->mappings[num_mappings++]; + m->uuid = NULL; + m->current_fd = error_fd; + m->wanted_fd = -1; + } + break; + default: + log_fail("invalid option %s", arg); + mapped_logs_close(logs); + error(EINVAL, "Invalid option %s", arg); + } + } + + if (argc < 1) { + log_fail("no args"); + mapped_logs_close(logs); + error(EINVAL, "No command arguments"); + } + + info->args = argv; + + if (getrlimit(RLIMIT_NOFILE, &nofile_limit) < 0) { + int err = errno; + log_fail("getrlimit error"); + mapped_logs_close(logs); + error(err, "getrlimit"); + } + + sigset_t sigset; + + // Compute the file operations we need to do for the file mappings + int num_operations = + redirect_mappings(info->mappings, num_mappings, info->operations); + + if (FORKEXECD_DEBUG_LOGS) { + for (size_t n = 0; info->args[n]; ++n) + log("arg %zd %s", n, info->args[n]); + } + + // Rename all command line. + adjust_args(info->args, info->mappings, num_mappings); + + if (FORKEXECD_DEBUG_LOGS) { + for (size_t n = 0; info->args[n]; ++n) + log("arg %zd %s", n, info->args[n]); + } + + reset_signal_handlers(); + + // "." is a no-op, don't call chdir + if (strcmp(directory, ".") != 0 && chdir(directory) < 0) { + int err = errno; + log_fail("chdir %d", err); + mapped_logs_close(logs); + error(err, "chdir"); + } + + // Clear cgroup otherwise systemd will shutdown processes if + // toolstack is restarted. + clear_cgroup(); + + if (setsid() < 0) { + int err = errno; + log_fail("setsid %d", errno); + mapped_logs_close(logs); + error(err, "setsid"); + } + + // Redirect file descriptors. + int err = 0; + const char *err_func = NULL; + for (int i = 0; i < num_operations && err == 0; ++i) { + const fd_operation* const op = &info->operations[i]; + log("op %d %d %d", op->fd_from, op->fd_to, op->operation); + switch (op->operation) { + case FD_OP_DUP: + if (op->fd_from == op->fd_to) { + // These file descriptors came from another process, + // so surely they have the CLOEXEC flag set, nothing + // to do. + break; + } else { + err_func = "dup2"; + if (dup2(op->fd_from, op->fd_to) < 0) + err = errno; + // Track last file descriptor. + // File descriptors are usually duplicated in order to be + // replaced later. + if (op->fd_from == error_fd) + error_fd = op->fd_to; + } + break; + case FD_OP_MOVE: + err_func = "dup2"; + if (dup2(op->fd_from, op->fd_to) < 0) + err = errno; + if (op->fd_from == error_fd) + error_fd = op->fd_to; + close(op->fd_from); + break; + case FD_OP_DEVNULL: + // first close old, then create new one + close(op->fd_to); + // TODO ideally we want read only for input for Ocaml did the same... + err_func = "open"; + errno = 0; + if (open("/dev/null", O_WRONLY) != op->fd_to) + err = errno ? errno : EBADF; + break; + case FD_OP_CLOSE_FROM: + close_fds_from(op->fd_from); + break; + default: + err_func = "safe_exec"; + err = EINVAL; + } + } + if (err != 0) { + info->err = err; + info->err_func = err_func; + log_fail("redirect error %d in %s", err, err_func); + mapped_logs_close(logs); + error(err, "%s", err_func); + } + + if (key) + init_syslog(key, redirect_stderr_to_stdout); + + // Limit number of files limits to standard limit to avoid + // creating bugs with old programs. + if (nofile_limit.rlim_cur > 1024) { + nofile_limit.rlim_cur = 1024; + setrlimit(RLIMIT_NOFILE, &nofile_limit); + } + + // Reset signal mask, inherited by the process we are going to execute + sigemptyset(&sigset); + pthread_sigmask(SIG_SETMASK, &sigset, NULL); + + log("execv..."); + mapped_logs_success(logs); + if (error_fd >= 0) + close(error_fd); + execv(info->args[0], info->args); + log_fail("execve failed %d", errno); + // Here we could set err and err_func but we kept compatibility + // with forkexecd daemon. + exit(errno == ENOENT ? 127 : 126); +} + +static void +adjust_args(char **args, mapping *const mappings, unsigned num_mappings) +{ + for (; *args; ++args) { + char *arg = *args; + size_t len = strlen(arg); + if (len < 36) + continue; + + // replace uuid with file descriptor + char *uuid = arg + len - 36; + for (unsigned i = 0; i < num_mappings; ++i) { + const mapping *m = &mappings[i]; + if (m->uuid == NULL || strcmp(m->uuid, uuid) != 0) + continue; + sprintf(uuid, "%d", m->current_fd); + } + } +} + +static void +reset_signal_handlers(void) +{ + for (int sig = 1; sig < NSIG; ++sig) { + // these signals can't be overridden + if (sig == SIGKILL || sig == SIGSTOP) + continue; + + // Set signal dispositions. + // This avoids inherit unwanted overrides. + // Also prevent handling unwanted signal handler, especially using vfork(). + // Use ignore SIGPIPE for compatibility with forkexecd. + signal(sig, sig == SIGPIPE ? SIG_IGN : SIG_DFL); + } +} + +static void +clear_cgroup(void) +{ + int fd = open("/sys/fs/cgroup/systemd/cgroup.procs", O_WRONLY|O_CLOEXEC); + if (fd >= 0) { + char string_pid[32]; + int ignored __attribute__((unused)); + sprintf(string_pid, "%d\n", (int) getpid()); + ignored = write(fd, string_pid, strlen(string_pid)); + close(fd); + } +} + +static const char * +get_arg(int *argc, char ***argv) +{ + if (*argc < 0) + error(EINVAL, "Expected one more argument"); + + const char *arg = **argv; + --(*argc); + ++(*argv); + return arg; +} + +static int +get_fd(int *argc, char ***argv) +{ + const char *arg = get_arg(argc, argv); + unsigned long fd = strtoul(arg, NULL, 0); + if (fd < 0 || fd > INT_MAX) + error(EINVAL, "Expected valid file descriptor number"); + return (int) fd; +} + +static void +error(int err, const char *format, ...) +{ + if (error_fd >= 0) { + msg_t msg = { err }; + va_list ap; + va_start(ap, format); + int ignored __attribute__((unused)); + vsnprintf(msg.msg_buf, sizeof(msg.msg_buf), format, ap); + msg.msg_buf[sizeof(msg.msg_buf) - 1] = 0; + va_end(ap); + ignored = write(error_fd, &msg, offsetof(msg_t, msg_buf) + strlen(msg.msg_buf) + 1); + } + exit(125); +} + +static void +init_syslog(const char *key, bool redirect_stderr_to_stdout) +{ + int fds[2]; + if (pipe(fds) < 0) + error(errno, "pipe"); + dup2(fds[1], 1); + if (redirect_stderr_to_stdout) + dup2(fds[1], 2); + close(fds[1]); + + const int child_pid = (int) getpid(); + + pid_t pid = fork(); + if (pid < 0) + error(errno, "fork"); + + if (pid == 0) { + // child + close(0); + close(1); + if (open("/dev/null", O_RDONLY) != 0 + || open("/dev/null", O_WRONLY) != 1) + error(errno, "open"); + dup2(1, 2); + if (fds[0] != 3) { + dup2(fds[0], 3); + fds[0] = 3; + } + close_fds_from(4); + + pid = fork(); + if (pid < 0) + error(errno, "fork"); + if (pid > 0) + // parent + exit(0); + + openlog("forkexecd", 0, LOG_DAEMON); + forward_to_syslog(fds[0], key, child_pid); + exit(0); + } + + close(fds[0]); + + // parent + int status; + wait(&status); + if (!WIFEXITED(status)) + error(EPIPE, "syslogger killed by signal"); + + switch (WEXITSTATUS(status)) { + case 0: + // success + return; + case 125: + // forward error, a proper message will be forwarded + exit(125); + } + error(EPIPE, "syslogger"); +} diff --git a/ocaml/forkexecd/helper/vfork_helper.h b/ocaml/forkexecd/helper/vfork_helper.h new file mode 100644 index 00000000000..400c7d45041 --- /dev/null +++ b/ocaml/forkexecd/helper/vfork_helper.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#pragma once + +// Common structure to pass errors from helper to library +typedef struct { + // numeric C error + int err; + // message + char msg_buf[1000]; +} msg_t; diff --git a/ocaml/forkexecd/lib/dune b/ocaml/forkexecd/lib/dune index 662223770f4..e2b97966a2b 100644 --- a/ocaml/forkexecd/lib/dune +++ b/ocaml/forkexecd/lib/dune @@ -18,4 +18,10 @@ xapi-stdext-date xapi-tracing ) - (preprocess (per_module ((pps ppx_deriving_rpc) Fe)))) + (preprocess (per_module ((pps ppx_deriving_rpc) Fe))) + (foreign_stubs + (language c) + (names fe_stubs) + (include_dirs ../helper) + (flags :standard -Wall -Werror) + )) diff --git a/ocaml/forkexecd/lib/fe_stubs.c b/ocaml/forkexecd/lib/fe_stubs.c new file mode 100644 index 00000000000..89e14101f13 --- /dev/null +++ b/ocaml/forkexecd/lib/fe_stubs.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../helper/vfork_helper.h" + +#define FOREACH_LIST(name, list) \ + for(value name = (list); name != Val_emptylist; name = Field(name, 1)) + +// Create thread reducing stack usage to a minimum to reduce memory usage. +// Returns error number (like pthread_create). +static int create_thread_minstack(pthread_t *th, void *(*proc)(void *), void *arg); + +static inline void +reap_pid(pid_t pid) +{ + int status; + while (waitpid(pid, &status, 0) < 0 && errno == EINTR) + continue; +} + +static void * +thread_proc_reap(void *arg) +{ + pid_t pid = (pid_t) (intptr_t) arg; + + reap_pid(pid); + + return NULL; +} + +// Appends a string to *p_dest buffer. +// It updates *p_dest to point after copied string. +// Returns copied string. +static char * +append_string(char **p_dest, const char *s) +{ + char *const dest = *p_dest; + size_t const size = strlen(s) + 1; + memcpy(dest, s, size); + *p_dest = dest + size; + return dest; +} + +static char** +copy_string_list(value list) +{ + size_t strings_size = 0; + size_t list_size = 0; + char **res, **ptrs; + char *strings; + + FOREACH_LIST(item, list) { + strings_size += strlen(String_val(Field(item, 0))) + 1; + ++list_size; + } + + res = (char **) malloc(sizeof(char*) * (list_size + 1) + strings_size); + if (!res) + return NULL; + + ptrs = res; + strings = (char *) (res + (list_size + 1)); + FOREACH_LIST(item, list) + *ptrs++ = append_string(&strings, String_val(Field(item, 0))); + *ptrs = NULL; + + return res; +} + +static void +close_fd(int *const p_fd) +{ + const int fd = *p_fd; + if (fd >= 0) { + *p_fd = -1; + close(fd); + } +} + +typedef struct { + const char *err_msg; + pid_t pid; + msg_t msg; +} safe_exec_result; + +static int +safe_exec_with_helper(safe_exec_result *res, char **args, char **envs) +{ + int err = EINVAL; + char fd_string[48]; + int pipe_fds[2] = { -1, -1 }; + + res->err_msg = "safe_exec"; + + if (!args[0] || !args[1] || !args[2]) + return EINVAL; + + if (strcmp(args[1], "-e") == 0) { + if (pipe(pipe_fds) < 0) { + res->err_msg = "pipe"; + return errno; + } + sprintf(fd_string, "%d", pipe_fds[1]); + args[2] = fd_string; + } + + sigset_t sigset, old_sigset; + int cancellation_state; + + // Disable cancellation to avoid some signals. + // Glibc use some signals to handle thread cancellation. + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancellation_state); + + // Block all possible signals to avoid receiving some in the child. + // Signal mask is inherited to new process/thread will start with + // all signals disabled and we can safely change them. + sigfillset(&sigset); + pthread_sigmask(SIG_BLOCK, &sigset, &old_sigset); + + // fork + err = 0; + res->pid = vfork(); + if (res->pid < 0) { + err = errno; + } else if (res->pid == 0) { + // child + if (pipe_fds[0] >= 0) + close(pipe_fds[0]); + execve(args[0], args, envs); + // keep compatibility with forkexecd daemon. + _exit(errno == ENOENT ? 127 : 126); + } + + // Restore thread state + pthread_sigmask(SIG_SETMASK, &old_sigset, NULL); + pthread_setcancelstate(cancellation_state, NULL); + + // We don't need writing pipe anymore and we need to detect + // if closed so we can't keep it open + close_fd(&pipe_fds[1]); + + if (err != 0) { + close_fd(&pipe_fds[0]); + res->err_msg = "vfork"; + return err; + } + + // Handle errors from helper + if (pipe_fds[0] >= 0) { + int readed; + // Note that buffer is small and written atomically by + // the helper, no reason for the kernel to split it. + while ((readed = read(pipe_fds[0], &res->msg, sizeof(res->msg))) < 0 + && errno == EINTR) + continue; + close_fd(&pipe_fds[0]); + if (readed != 0 && readed < offsetof(msg_t, msg_buf) + 1) { + // This should never happen !!! + // At this point the process is created and we have a pid so + // we cannot just return an error. + // We could try to wait the process but it should fail, let + // returns success and let caller read process status result. + return 0; + } + res->msg.msg_buf[sizeof(res->msg.msg_buf) - 1] = 0; + if (readed > 0) { + // Wait the process otherwise we'll have a zombie + reap_pid(res->pid); + + res->err_msg = res->msg.msg_buf; + return res->msg.err; + } + } + return 0; +} + +CAMLprim value +caml_safe_exec_with_helper(value args, value environment) +{ + CAMLparam2(args, environment); + + // Copy parameters to C + char **c_args = copy_string_list(args); + char **c_envs = copy_string_list(environment); + if (!c_envs || !c_args) { + free(c_envs); + free(c_args); + caml_raise_out_of_memory(); + } + + // potentially slow section, release Ocaml engine + caml_enter_blocking_section(); + + safe_exec_result res; + int err = safe_exec_with_helper(&res, c_args, c_envs); + + free(c_envs); + free(c_args); + + caml_leave_blocking_section(); + + // error, notify with an exception + if (err != 0) + unix_error(err, res.err_msg, Nothing); + + CAMLreturn(Val_int(res.pid)); +} + +CAMLprim value +caml_pidwaiter_dontwait(value pid_val) +{ + CAMLparam1(pid_val); + pid_t pid = Int_val(pid_val); + + // reap the pid to avoid zombies + pthread_t th; + if (create_thread_minstack(&th, thread_proc_reap, (void *) (intptr_t) pid) == 0) + pthread_detach(th); + + CAMLreturn(Val_unit); +} + +typedef struct { + pid_t pid; + bool timed_out; + bool stop; + struct timespec deadline; + pthread_mutex_t mtx; + pthread_cond_t cond; +} timeout_kill; + +static void * +thread_proc_timeout_kill(void *arg) +{ + timeout_kill *tm = (timeout_kill *) arg; + int res; + + do { + pthread_mutex_lock(&tm->mtx); + res = tm->stop ? 0: + pthread_cond_timedwait(&tm->cond, &tm->mtx, &tm->deadline); + pthread_mutex_unlock(&tm->mtx); + + if (res == ETIMEDOUT) { + kill(tm->pid, SIGKILL); + tm->timed_out = true; + break; + } + // handle spurious wakeups + } while (!tm->stop && res == 0); + return NULL; +} + +static int +create_thread_minstack(pthread_t *th, void *(*proc)(void *), void *arg) +{ + int res; + + // disable any possible signal handler so we can safely use a small stack + // for the thread + sigset_t sigset, old_sigset; + sigfillset(&sigset); + pthread_sigmask(SIG_BLOCK, &sigset, &old_sigset); + + pthread_attr_t th_attr; + res = pthread_attr_init(&th_attr); + if (!res) { + pthread_attr_setstacksize(&th_attr, PTHREAD_STACK_MIN); + + res = pthread_create(th, &th_attr, proc, arg); + + pthread_attr_destroy(&th_attr); + } + pthread_sigmask(SIG_SETMASK, &old_sigset, NULL); + + return res; +} + +/* + * Wait a process with a given timeout. + * At the end of timeout (if trigger) kill the process. + * To avoid race we need to wait a specific process, but this is blocking + * and we use a timeout to implement the wait. Timer functions are per + * process, not per thread. + * Returns <0 if error, 0 if not timed out, >0 if timedout. + */ +static int +wait_process_timeout(pid_t pid, double timeout) +{ + int err; + + // compute deadline + timeout_kill tm = { pid, false, false }; + if (clock_gettime(CLOCK_MONOTONIC, &tm.deadline) < 0) + return -errno; + + double f = floor(timeout); + tm.deadline.tv_sec += f; + tm.deadline.tv_nsec += (timeout - f) * 1000000000.; + if (tm.deadline.tv_nsec >= 1000000000) { + tm.deadline.tv_nsec -= 1000000000; + tm.deadline.tv_sec += 1; + } + + pthread_condattr_t attr; + err = pthread_condattr_init(&attr); + if (err) + return -err; + err = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + if (!err) + err = pthread_cond_init(&tm.cond, &attr); + pthread_condattr_destroy(&attr); + if (err) + return -err; + + err = pthread_mutex_init(&tm.mtx, NULL); + if (err) { + pthread_cond_destroy(&tm.cond); + return -err; + } + + // Create timeout thread + pthread_t th; + err = create_thread_minstack(&th, thread_proc_timeout_kill, &tm); + if (err) { + pthread_cond_destroy(&tm.cond); + pthread_mutex_destroy(&tm.mtx); + return -err; + } + + // Wait the process, we avoid to reap the other process to avoid + // race conditions. Consider: + // - process exit; + // - we reap the thread; + // - OS reuse the pid; + // - timeout thread terminate the pid, now reused. + // Avoiding reaping the process will create a zombie process so + // the KILL would be directed to that. + siginfo_t info; + err = 0; + while (waitid(P_PID, pid, &info, WEXITED|WNOWAIT) == -1) { + if (errno != EINTR) { + err = -errno; + break; + } + } + + // Close the timeout thread + pthread_mutex_lock(&tm.mtx); + // We use also a variable to avoid races like + // - we create the thread; + // - we start waiting the process which was already exited; + // - we came here trying to close the thread; + // - thread waits for signal. + tm.stop = true; + pthread_cond_broadcast(&tm.cond); + pthread_mutex_unlock(&tm.mtx); + pthread_join(th, NULL); + + // Cleanup + pthread_cond_destroy(&tm.cond); + pthread_mutex_destroy(&tm.mtx); + + return err ? err : (tm.timed_out ? 1 : 0); +} + +CAMLprim value +caml_pidwaiter_waitpid(value timeout_value, value pid_value) +{ + CAMLparam0(); + double timeout = timeout_value == Val_none ? 0 : Double_val(Some_val(timeout_value)); + pid_t pid = Int_val(pid_value); + + caml_enter_blocking_section(); + + bool timed_out = false; + int err = 0; + if (timeout > 0) { + int res = wait_process_timeout(pid, timeout); + if (res < 0) + err = -res; + else if (res != 0) + timed_out = true; + } + + caml_leave_blocking_section(); + + if (err) + unix_error(err, "waitpid", Nothing); + + CAMLreturn(timed_out ? Val_true: Val_false); +} diff --git a/ocaml/forkexecd/lib/forkhelpers.ml b/ocaml/forkexecd/lib/forkhelpers.ml index 2407b86b924..f731eb3e5b9 100644 --- a/ocaml/forkexecd/lib/forkhelpers.ml +++ b/ocaml/forkexecd/lib/forkhelpers.ml @@ -40,12 +40,33 @@ let with_tracing ~tracing ~name f = Tracing.with_tracing ~parent:tracing ~name f let finally = Xapi_stdext_pervasives.Pervasiveext.finally -type pidty = Unix.file_descr * int +(* Use forkexecd daemon instead of vfork implementation if file is present *) +let use_daemon = Sys.file_exists "/etc/xensource/forkexec-uses-daemon" + +module FEStubs = struct + external safe_exec_with_helper : string list -> string list -> int + = "caml_safe_exec_with_helper" + + (* timeout <= 0 wait infinite *) + external pidwaiter_waitpid : ?timeout:float -> int -> bool + = "caml_pidwaiter_waitpid" + + (* do not wait for a process, release it, it won't generate a zombie process *) + external pidwaiter_dontwait : int -> unit = "caml_pidwaiter_dontwait" +end + +type waiter = Pidwaiter | Sock of Unix.file_descr + +type pidty = waiter * int (* The forking executioner has been used, therefore we need to tell *it* to waitpid *) -let string_of_pidty (fd, pid) = - Printf.sprintf "(FEFork (%d,%d))" (Fd_send_recv.int_of_fd fd) pid +let string_of_pidty (waiter, pid) = + match waiter with + | Pidwaiter -> + Printf.sprintf "(FEFork (%d))" pid + | Sock fd -> + Printf.sprintf "(FEFork (%d,%d))" (Fd_send_recv.int_of_fd fd) pid exception Subprocess_failed of int @@ -53,7 +74,7 @@ exception Subprocess_killed of int exception Subprocess_timeout -let waitpid (sock, pid) = +let waitpid_daemon sock pid = let status = Fecomms.read_raw_rpc sock in Unix.close sock ; match status with @@ -79,7 +100,7 @@ let waitpid (sock, pid) = (* [waitpid_nohang] reports the status of a socket to a process. The intention is to make this non-blocking. If the process is finished, the socket is closed and not otherwise. *) -let waitpid_nohang (sock, pid) = +let waitpid_nohang_daemon sock pid = let verbose = false in if verbose then D.debug "%s pid=%d" __FUNCTION__ pid ; let fail fmt = Printf.ksprintf failwith fmt in @@ -118,7 +139,7 @@ let waitpid_nohang (sock, pid) = fail "%s: error happened when trying to read the status. %s" __FUNCTION__ (Printexc.to_string exn) -let dontwaitpid (sock, _pid) = +let dontwaitpid_daemon sock _pid = ( try (* Try to tell the child fe that we're not going to wait for it. If the other end of the pipe has been closed then this doesn't matter, as this @@ -128,6 +149,27 @@ let dontwaitpid (sock, _pid) = ) ; Unix.close sock +let waitpid (waiter, pid) = + match waiter with + | Pidwaiter -> + Unix.waitpid [] pid + | Sock sock -> + waitpid_daemon sock pid + +let waitpid_nohang (waiter, pid) = + match waiter with + | Pidwaiter -> + Unix.waitpid [Unix.WNOHANG] pid + | Sock sock -> + waitpid_nohang_daemon sock pid + +let dontwaitpid (waiter, pid) = + match waiter with + | Pidwaiter -> + FEStubs.pidwaiter_dontwait pid + | Sock sock -> + dontwaitpid_daemon sock pid + let waitpid_fail_if_bad_exit ty = let _, status = waitpid ty in match status with @@ -140,7 +182,7 @@ let waitpid_fail_if_bad_exit ty = | Unix.WSTOPPED n -> raise (Subprocess_killed n) -let getpid (_sock, pid) = pid +let getpid (_waiter, pid) = pid type 'a result = Success of string * 'a | Failure of string * exn @@ -179,12 +221,9 @@ type syslog_stdout = | Syslog_DefaultKey | Syslog_WithKey of string -(** Safe function which forks a command, closing all fds except a whitelist and - having performed some fd operations in the child *) -let safe_close_and_exec ?tracing ?env stdin stdout stderr +let safe_close_and_exec_daemon ?tracing env stdin stdout stderr (fds : (string * Unix.file_descr) list) ?(syslog_stdout = NoSyslogging) - ?(redirect_stderr_to_stdout = false) (cmd : string) (args : string list) = - with_tracing ~tracing ~name:__FUNCTION__ @@ fun tracing -> + ?(redirect_stderr_to_stdout = false) args = let sock = Fecomms.open_unix_domain_sock_client ?tracing (Filename.concat runtime_path "/xapi/forker/main") @@ -228,7 +267,6 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr List.fold_left maybe_add_id_to_fd_map dest_named_fds predefined_fds in - let env = Option.value ~default:default_path_env_pair env in let syslog_stdout = match syslog_stdout with | NoSyslogging -> @@ -241,7 +279,7 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr Fecomms.write_raw_rpc ?tracing sock (Fe.Setup { - Fe.cmdargs= cmd :: args + Fe.cmdargs= args ; env= Array.to_list env ; id_to_fd_map ; syslog_stdout @@ -295,7 +333,7 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr match Fecomms.read_raw_rpc ?tracing sock with | Ok (Fe.Execed pid) -> remove_fd_from_close_list sock ; - (sock, pid) + (Sock sock, pid) | Ok status -> let msg = Printf.sprintf @@ -314,6 +352,64 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr ) close_fds +let safe_close_and_exec_vfork ?tracing env stdin stdout stderr + (fds : (string * Unix.file_descr) list) ?(syslog_stdout = NoSyslogging) + ?(redirect_stderr_to_stdout = false) cmd args = + let string_of_fd (fd : Unix.file_descr) = string_of_int (Obj.magic fd) in + let args = "--" :: args in + let args = if redirect_stderr_to_stdout then "-S" :: args else args in + let args = + match syslog_stdout with + | NoSyslogging -> + args + | Syslog_DefaultKey -> + "-s" :: Filename.basename cmd :: args + | Syslog_WithKey key -> + "-s" :: key :: args + in + let args = + List.fold_right + (fun (uuid, fd) args -> + Unix.clear_close_on_exec fd ; + "-m" :: uuid :: string_of_fd fd :: args + ) + fds args + in + let add_std args arg fd = + match fd with + | Some fd -> + Unix.clear_close_on_exec fd ; + arg :: string_of_fd fd :: args + | None -> + args + in + let args = add_std args "-E" stderr in + let args = add_std args "-O" stdout in + let args = add_std args "-I" stdin in + let args = "/usr/libexec/xapi/vfork_helper" :: "-e" :: "DUMMY" :: args in + (* Convert environment and add tracing variables. *) + let env = + List.append (Tracing.EnvHelpers.of_span tracing) (Array.to_list env) + in + let pid = FEStubs.safe_exec_with_helper args env in + (Pidwaiter, pid) + +(** Safe function which forks a command, closing all fds except a whitelist and + having performed some fd operations in the child *) +let safe_close_and_exec ?tracing ?env stdin stdout stderr + (fds : (string * Unix.file_descr) list) ?(syslog_stdout = NoSyslogging) + ?(redirect_stderr_to_stdout = false) (cmd : string) (args : string list) = + with_tracing ~tracing ~name:__FUNCTION__ @@ fun tracing -> + let args = cmd :: args in + let env = Option.value ~default:default_path_env_pair env in + + if not use_daemon then (* Build a list of arguments as helper wants. *) + safe_close_and_exec_vfork ?tracing env stdin stdout stderr fds + ~syslog_stdout ~redirect_stderr_to_stdout cmd args + else + safe_close_and_exec_daemon ?tracing env stdin stdout stderr fds + ~syslog_stdout ~redirect_stderr_to_stdout args + let execute_command_get_output_inner ?tracing ?env ?stdin ?(syslog_stdout = NoSyslogging) ?(redirect_stderr_to_stdout = false) timeout cmd args = @@ -342,7 +438,7 @@ let execute_command_get_output_inner ?tracing ?env ?stdin with_tracing ~tracing ~name:"Forkhelpers.with_logfile_err_fd" @@ fun tracing -> with_logfile_fd "execute_command_get_err" (fun err_fd -> - let sock, pid = + let waiter, pid = safe_close_and_exec ?tracing ?env (Option.map (fun (_, fd, _) -> fd) stdinandpipes) (Some out_fd) (Some err_fd) [] ~syslog_stdout @@ -354,19 +450,38 @@ let execute_command_get_output_inner ?tracing ?env ?stdin close wr ) stdinandpipes ; - ( match timeout with - | Some span -> - let timeout = Clock.Timer.span_to_s span in - Unix.setsockopt_float sock Unix.SO_RCVTIMEO timeout - | None -> - () - ) ; - with_tracing ~tracing ~name:"Forkhelpers.waitpid" @@ fun _ -> - try waitpid (sock, pid) - with Unix.(Unix_error ((EAGAIN | EWOULDBLOCK), _, _)) -> - Unix.kill pid Sys.sigkill ; - ignore (waitpid (sock, pid)) ; - raise Subprocess_timeout + match waiter with + | Pidwaiter -> + with_tracing ~tracing ~name:"Forkhelpers.waitpid" + @@ fun _ -> + let timeout = + match timeout with + | Some span -> + Clock.Timer.span_to_s span + | None -> + 0. + in + let timedout = FEStubs.pidwaiter_waitpid ~timeout pid in + let res = Unix.waitpid [] pid in + + if timedout then raise Subprocess_timeout ; + res + | Sock sock -> ( + ( match timeout with + | Some span -> + let timeout = Clock.Timer.span_to_s span in + Unix.setsockopt_float sock Unix.SO_RCVTIMEO timeout + | None -> + () + ) ; + with_tracing ~tracing ~name:"Forkhelpers.waitpid" + @@ fun _ -> + try waitpid_daemon sock pid + with Unix.(Unix_error ((EAGAIN | EWOULDBLOCK), _, _)) -> + Unix.kill pid Sys.sigkill ; + ignore (waitpid_daemon sock pid) ; + raise Subprocess_timeout + ) ) ) with diff --git a/ocaml/forkexecd/test/dune b/ocaml/forkexecd/test/dune index 689c972ca5a..bba6499fd1e 100644 --- a/ocaml/forkexecd/test/dune +++ b/ocaml/forkexecd/test/dune @@ -13,6 +13,6 @@ (rule (alias runtest) (package xapi-forkexecd) - (deps fe_test.sh fe_test.exe ../src/fe_main.exe syslog.so) + (deps fe_test.sh fe_test.exe ../src/fe_main.exe syslog.so ../vfork_helper) (action (run ./fe_test.sh))) diff --git a/ocaml/forkexecd/test/fe_test.sh b/ocaml/forkexecd/test/fe_test.sh index fe454e89802..24ee9c21791 100755 --- a/ocaml/forkexecd/test/fe_test.sh +++ b/ocaml/forkexecd/test/fe_test.sh @@ -8,7 +8,9 @@ export FE_TEST=1 SOCKET=${XDG_RUNTIME_DIR}/xapi/forker/main rm -f "$SOCKET" -LD_PRELOAD="$PWD/syslog.so" ../src/fe_main.exe & +LD_PRELOAD="$PWD/syslog.so" \ +TEST_VFORK_HELPER="$PWD/../vfork_helper" \ +../src/fe_main.exe & MAIN=$! cleanup () { kill $MAIN @@ -17,4 +19,6 @@ trap cleanup EXIT INT for _ in $(seq 1 10); do test -S ${SOCKET} || sleep 1 done -echo "" | LD_PRELOAD="$PWD/syslog.so" ./fe_test.exe 16 +echo "" | LD_PRELOAD="$PWD/syslog.so" \ +TEST_VFORK_HELPER="$PWD/../vfork_helper" \ +./fe_test.exe 16 diff --git a/ocaml/forkexecd/test/syslog.c b/ocaml/forkexecd/test/syslog.c index 2316e84a25e..10e3dc3c79f 100644 --- a/ocaml/forkexecd/test/syslog.c +++ b/ocaml/forkexecd/test/syslog.c @@ -18,6 +18,23 @@ if (!old_func) \ old_func = (typeof(name) *) dlsym(RTLD_NEXT, #name); +#define strlcpy _strlcpy +static size_t +strlcpy(char *dest, const char *src, size_t len) +{ + size_t l = strlen(src); + + if (len) { + --len; + if (l <= len) + len = l; + + memcpy(dest, src, len); + dest[len] = 0; + } + return l; +} + int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { static const char dev_log[] = "/dev/log"; @@ -119,3 +136,50 @@ void __vsyslog_chk(int priority, int flags, const char *format, va_list ap) { vsyslog_internal(priority, format, ap); } + +static char vfork_helper[256] = "/usr/libexec/xapi/vfork_helper"; +static char ld_preload[512]; + +static const char ld_prefix[] = "LD_PRELOAD="; +enum { len_prefix = sizeof(ld_prefix) - 1 }; + +__attribute__((constructor)) +static void initialize(void) +{ + const char *env; + env = getenv("TEST_VFORK_HELPER"); + if (env) + strlcpy(vfork_helper, env, sizeof(vfork_helper)); + env = getenv("LD_PRELOAD"); + if (env) { + strcpy(ld_preload, ld_prefix); + strlcpy(ld_preload + len_prefix, env, sizeof(ld_preload) - len_prefix); + } +} + +int execve(const char *pathname, char *const argv[], char *const envp[]) +{ + START(execve); + + if (strcmp(pathname, "/usr/libexec/xapi/vfork_helper") == 0) + pathname = vfork_helper; + + if (envp && ld_preload[0]) { + bool ok = false; + size_t num_env = 0; + for (char * const *e = envp; *e; ++e) { + ++num_env; + if (strncmp(*e, ld_prefix, len_prefix) == 0) + ok = true; + } + if (!ok) { + // allocate on stack, we could be inside a vfork() created process + char **new_envs = alloca(sizeof(char*) * (num_env + 2)); + *new_envs = ld_preload; + memcpy(new_envs + 1, envp, sizeof(char*) * (num_env + 1)); + envp = new_envs; + } + } + + return old_func(pathname, argv, envp); +} diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index 0f5c74564c8..4991ac53273 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -110,11 +110,6 @@ type pid = | FEFork of Forkhelpers.pidty (** the forkhelpers module did it for us. *) | Nopid -(* let string_of_pid = function - | StdFork x -> Printf.sprintf "(StdFork %d)" x - | FEFork x -> Forkhelpers.string_of_pidty x - | Nopid -> "None" *) - let getpid ty = match ty with | StdFork pid -> From a5f5006cde2874edccc1e1e8102029e67600ad2f Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Tue, 18 Feb 2025 15:40:40 +0800 Subject: [PATCH 005/492] CP-53313: Add field services in VM_guest_metrics Add a new field to store service aware data from guest vms Signed-off-by: Changlei Li --- ocaml/idl/datamodel.ml | 4 ++++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 2 ++ ocaml/idl/schematest.ml | 2 +- ocaml/xapi-cli-server/records.ml | 12 ++++++++++++ ocaml/xapi/import.ml | 1 + ocaml/xapi/xapi_guest_agent.ml | 12 +++++++++++- ocaml/xapi/xapi_vm_helpers.ml | 1 + 8 files changed, 33 insertions(+), 3 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 2ee5f705c22..ef8e1361ce5 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -7544,6 +7544,10 @@ module VM_guest_metrics = struct ~ty:Bool ~default_value:(Some (VBool false)) "PV_drivers_detected" "At least one of the guest's devices has successfully connected to \ the backend." + ; field ~qualifier:DynamicRO ~lifecycle:[] + ~ty:(Map (String, String)) + ~default_value:(Some (VMap [])) "services" + "The guest's services data." ] () end diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 50bc585b7ac..a044c9a0f2d 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 786 +let schema_minor_vsn = 787 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 0023f8dead7..9f1b1dcf2bd 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -107,6 +107,8 @@ let prototyped_of_field = function Some "22.27.0" | "host", "last_software_update" -> Some "22.20.0" + | "VM_guest_metrics", "services" -> + Some "25.6.0-next" | "VM_guest_metrics", "netbios_name" -> Some "24.28.0" | "VM", "groups" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index d17b6cf488a..b534f108a57 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "05ac9223f9c17b07b12e328d5dc3db52" +let last_known_schema_hash = "34390a071f5df0fac8dcf9423a9111ae" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index 228ba04361b..2686f43739c 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -2416,6 +2416,18 @@ let vm_record rpc session_id vm = (xgm ()) ) () + ; make_field ~name:"services" + ~get:(fun () -> + Option.fold ~none:nid + ~some:(fun m -> get_from_map m.API.vM_guest_metrics_services) + (xgm ()) + ) + ~get_map:(fun () -> + Option.fold ~none:[] + ~some:(fun m -> m.API.vM_guest_metrics_services) + (xgm ()) + ) + () ; make_field ~name:"PV-drivers-detected" ~get:(fun () -> Option.fold ~none:nid diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index c3abaf34b00..91a900dedda 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -836,6 +836,7 @@ module GuestMetrics : HandlerTools = struct ~memory:gm_record.API.vM_guest_metrics_memory ~disks:gm_record.API.vM_guest_metrics_disks ~networks:gm_record.API.vM_guest_metrics_networks + ~services:gm_record.API.vM_guest_metrics_services ~pV_drivers_detected:gm_record.API.vM_guest_metrics_PV_drivers_detected ~other:gm_record.API.vM_guest_metrics_other ~last_updated:gm_record.API.vM_guest_metrics_last_updated diff --git a/ocaml/xapi/xapi_guest_agent.ml b/ocaml/xapi/xapi_guest_agent.ml index 7de892cdf79..00e44d5925c 100644 --- a/ocaml/xapi/xapi_guest_agent.ml +++ b/ocaml/xapi/xapi_guest_agent.ml @@ -224,6 +224,7 @@ type guest_metrics_t = { ; other: m ; memory: m ; device_id: m + ; services: m ; last_updated: float ; can_use_hotplug_vbd: API.tristate_type ; can_use_hotplug_vif: API.tristate_type @@ -289,6 +290,7 @@ let get_initial_guest_metrics (lookup : string -> string option) ; networks "xenserver/attr" "net-sriov-vf" list ] ) + and services = [] and other = List.append (to_map (other all_control)) ts and memory = to_map memory and last_updated = Unix.gettimeofday () in @@ -310,6 +312,7 @@ let get_initial_guest_metrics (lookup : string -> string option) ; other ; memory ; device_id + ; services ; last_updated ; can_use_hotplug_vbd ; can_use_hotplug_vif @@ -326,7 +329,8 @@ let create_and_set_guest_metrics (lookup : string -> string option) ~os_version:initial_gm.os_version ~netbios_name:initial_gm.netbios_name ~pV_drivers_version:initial_gm.pv_drivers_version ~pV_drivers_up_to_date:pV_drivers_detected ~memory:[] ~disks:[] - ~networks:initial_gm.networks ~pV_drivers_detected ~other:initial_gm.other + ~networks:initial_gm.networks ~services:initial_gm.services + ~pV_drivers_detected ~other:initial_gm.other ~last_updated:(Date.of_unix_time initial_gm.last_updated) ~other_config:[] ~live:true ~can_use_hotplug_vbd:initial_gm.can_use_hotplug_vbd @@ -356,6 +360,7 @@ let all (lookup : string -> string option) (list : string -> string list) ; other ; memory ; device_id + ; services ; last_updated ; can_use_hotplug_vbd ; can_use_hotplug_vif @@ -390,6 +395,7 @@ let all (lookup : string -> string option) (list : string -> string list) ; other= [] ; memory= [] ; device_id= [] + ; services= [] ; last_updated= 0.0 ; can_use_hotplug_vbd= `unspecified ; can_use_hotplug_vif= `unspecified @@ -407,6 +413,7 @@ let all (lookup : string -> string option) (list : string -> string list) ; other ; memory ; device_id + ; services ; last_updated ; can_use_hotplug_vbd ; can_use_hotplug_vif @@ -420,6 +427,7 @@ let all (lookup : string -> string option) (list : string -> string list) || guest_metrics_cached.networks <> networks || guest_metrics_cached.other <> other || guest_metrics_cached.device_id <> device_id + || guest_metrics_cached.services <> services ) || guest_metrics_cached.can_use_hotplug_vbd <> can_use_hotplug_vbd || guest_metrics_cached.can_use_hotplug_vif <> can_use_hotplug_vif @@ -452,6 +460,8 @@ let all (lookup : string -> string option) (list : string -> string list) ~value:netbios_name ; if guest_metrics_cached.networks <> networks then Db.VM_guest_metrics.set_networks ~__context ~self:gm ~value:networks ; + if guest_metrics_cached.services <> services then + Db.VM_guest_metrics.set_services ~__context ~self:gm ~value:services ; if guest_metrics_cached.other <> other then ( Db.VM_guest_metrics.set_other ~__context ~self:gm ~value:other ; Helpers.call_api_functions ~__context (fun rpc session_id -> diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index d7f36c8f4de..3371b7e1368 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -1465,6 +1465,7 @@ let copy_guest_metrics ~__context ~vm = ~memory:all.API.vM_guest_metrics_memory ~disks:all.API.vM_guest_metrics_disks ~networks:all.API.vM_guest_metrics_networks + ~services:all.API.vM_guest_metrics_services ~pV_drivers_detected:all.API.vM_guest_metrics_PV_drivers_detected ~other:all.API.vM_guest_metrics_other ~last_updated:all.API.vM_guest_metrics_last_updated From afc92075f497d3bcb51681dab9511546a57198e7 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 19 Feb 2025 12:06:22 +0800 Subject: [PATCH 006/492] CP-53314: Read and watch /data/service in xenstore to DB 1. xenopsd backend read and watch /local/domain/%d/data/service 2. xapi_guest_agent convert the data to (string, string) list to store in VM_guest_metrics.services Signed-off-by: Changlei Li --- ocaml/xapi/xapi_guest_agent.ml | 27 ++++++++++++++++++++++++++- ocaml/xenopsd/xc/xenops_server_xen.ml | 2 ++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_guest_agent.ml b/ocaml/xapi/xapi_guest_agent.ml index 00e44d5925c..1b89586abd4 100644 --- a/ocaml/xapi/xapi_guest_agent.ml +++ b/ocaml/xapi/xapi_guest_agent.ml @@ -236,6 +236,31 @@ let dead_domains : IntSet.t ref = ref IntSet.empty let mutex = Mutex.create () +(* Parse data/service which has the following structure: + data/service// = + data/service// = + ... + data/service// = + Read and convert to [(/, )] pair list. + The list is intended to store in VM_guest_metrics.services at last *) +let get_guest_services (lookup : string -> string option) + (list : string -> string list) = + let base_path = "data/service" in + let services = list base_path in + List.fold_left + (fun acc service -> + let sub_path = base_path // service in + List.fold_left + (fun acc key -> + let full_path_key = sub_path // key in + let db_key = service // key in + let value = lookup full_path_key in + (db_key, Option.value ~default:"" value) :: acc + ) + acc (list sub_path) + ) + [] services + (* In the following functions, 'lookup' reads a key from xenstore and 'list' reads a directory from xenstore. Both are relative to the guest's domainpath. *) let get_initial_guest_metrics (lookup : string -> string option) @@ -290,7 +315,7 @@ let get_initial_guest_metrics (lookup : string -> string option) ; networks "xenserver/attr" "net-sriov-vf" list ] ) - and services = [] + and services = get_guest_services lookup list and other = List.append (to_map (other all_control)) ts and memory = to_map memory and last_updated = Unix.gettimeofday () in diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index ba3dd7e2b8a..482e017a8f9 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2849,6 +2849,7 @@ module VM = struct ; ("drivers", None, 0) ; ("data", None, 0) (* in particular avoid data/volumes which contains many entries for each disk *) + ; ("data/service", None, 1) (* data/service//*) ] |> List.fold_left (fun acc (dir, excludes, depth) -> @@ -4825,6 +4826,7 @@ module Actions = struct sprintf "/local/domain/%d/attr" domid ; sprintf "/local/domain/%d/data/updated" domid ; sprintf "/local/domain/%d/data/ts" domid + ; sprintf "/local/domain/%d/data/service" domid ; sprintf "/local/domain/%d/memory/target" domid ; sprintf "/local/domain/%d/memory/uncooperative" domid ; sprintf "/local/domain/%d/console/vnc-port" domid From a6b5b7ce1a49e3f2b1c2a091e2be6ef5fd3f1ede Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 26 Feb 2025 16:52:33 +0800 Subject: [PATCH 007/492] Fix review comments and add unit test for guest_agent Signed-off-by: Changlei Li --- ocaml/tests/test_guest_agent.ml | 84 +++++++++++++++++++++++++++ ocaml/xapi/xapi_guest_agent.ml | 24 ++++---- ocaml/xenopsd/xc/xenops_server_xen.ml | 13 ++--- 3 files changed, 100 insertions(+), 21 deletions(-) diff --git a/ocaml/tests/test_guest_agent.ml b/ocaml/tests/test_guest_agent.ml index 6d9c7d8f40b..6b74aa55544 100644 --- a/ocaml/tests/test_guest_agent.ml +++ b/ocaml/tests/test_guest_agent.ml @@ -468,9 +468,93 @@ module Initial_guest_metrics = Generic.MakeStateless (struct ] end) +module Services = Generic.MakeStateless (struct + module Io = struct + type input_t = (string * string) list + + type output_t = (string * string) list + + let string_of_input_t = Test_printers.(assoc_list string string) + + let string_of_output_t = Test_printers.(assoc_list string string) + end + + (* prototype funtions lookup and list are in Xapi_xenops.ml::update_vm *) + let lookup state key = List.assoc_opt key state + + let list_subkeys state dir = + if dir = "" then + [] + else + let dir = + if dir.[0] = '/' then + String.sub dir 1 (String.length dir - 1) + else + dir + in + let results = + List.filter_map + (fun (path, _) -> + if String.starts_with ~prefix:dir path then + let rest = + String.sub path (String.length dir) + (String.length path - String.length dir) + in + let is_sep = function '/' -> true | _ -> false in + match Astring.String.fields ~empty:false ~is_sep rest with + | x :: _ -> + Some x + | _ -> + None + else + None + ) + state + |> Xapi_stdext_std.Listext.List.setify + in + results + + let transform input = + Xapi_guest_agent.get_guest_services (lookup input) (list_subkeys input) + + let tests = + `QuickAndAutoDocumented + [ + (* no data/service *) + ([("data/key1", "v1"); ("data/key2", "v2")], []) + ; (* less than two depth in data/service *) + ([("data/service/key1", "v1"); ("data/service/key2", "v2")], []) + ; (* beyond two depth in data/service *) + ( [ + ("data/service/service-a/sub/key1", "sab-v1") + ; ("data/service/service-a/sub/key2", "sab-v2") + ] + , [("service-a/sub", "")] + ) + ; (* normal case *) + ( [ + ("data/service", "") + ; ("data/service/service-a", "") + ; ("data/service/service-b", "") + ; ("data/service/service-a/key1", "sa-v1") + ; ("data/service/service-a/key2", "sa-v2") + ; ("data/service/service-b/key1", "sb-v1") + ; ("data/service/service-b/key2", "sb-v2") + ] + , [ + ("service-a/key1", "sa-v1") + ; ("service-a/key2", "sa-v2") + ; ("service-b/key1", "sb-v1") + ; ("service-b/key2", "sb-v2") + ] + ) + ] +end) + let tests = make_suite "guest_agent_" [ ("networks", Networks.tests) ; ("get_initial_guest_metrics", Initial_guest_metrics.tests) + ; ("get_guest_services", Services.tests) ] diff --git a/ocaml/xapi/xapi_guest_agent.ml b/ocaml/xapi/xapi_guest_agent.ml index 1b89586abd4..7160737e8c3 100644 --- a/ocaml/xapi/xapi_guest_agent.ml +++ b/ocaml/xapi/xapi_guest_agent.ml @@ -247,19 +247,17 @@ let get_guest_services (lookup : string -> string option) (list : string -> string list) = let base_path = "data/service" in let services = list base_path in - List.fold_left - (fun acc service -> - let sub_path = base_path // service in - List.fold_left - (fun acc key -> - let full_path_key = sub_path // key in - let db_key = service // key in - let value = lookup full_path_key in - (db_key, Option.value ~default:"" value) :: acc - ) - acc (list sub_path) - ) - [] services + services + |> List.concat_map (fun service -> + let sub_path = base_path // service in + list sub_path + |> List.map (fun key -> + let full_path_key = sub_path // key in + let db_key = service // key in + let value = lookup full_path_key in + (db_key, Option.value ~default:"" value) + ) + ) (* In the following functions, 'lookup' reads a key from xenstore and 'list' reads a directory from xenstore. Both are relative to the guest's domainpath. *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 482e017a8f9..5912f816e3b 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2744,9 +2744,10 @@ module VM = struct (fun port -> {Vm.protocol= Vm.Vt100; port; path= ""}) (Device.get_tc_port ~xs di.Xenctrl.domid) in - let local x = - Printf.sprintf "/local/domain/%d/%s" di.Xenctrl.domid x + let root_path = + Printf.sprintf "/local/domain/%d" di.Xenctrl.domid in + let local x = Printf.sprintf "%s/%s" root_path x in let uncooperative = try ignore_string (xs.Xs.read (local "memory/uncooperative")) ; @@ -2853,9 +2854,7 @@ module VM = struct ] |> List.fold_left (fun acc (dir, excludes, depth) -> - ls_lR ?excludes ~depth - (Printf.sprintf "/local/domain/%d" di.Xenctrl.domid) - acc dir + ls_lR ?excludes ~depth root_path acc dir ) (quota, []) |> fun (quota, acc) -> @@ -2863,9 +2862,7 @@ module VM = struct in let quota, xsdata_state = Domain.allowed_xsdata_prefixes - |> List.fold_left - (ls_lR (Printf.sprintf "/local/domain/%d" di.Xenctrl.domid)) - (quota, []) + |> List.fold_left (ls_lR root_path) (quota, []) in let path = Device_common.xenops_path_of_domain di.Xenctrl.domid From 5412b8887c3025f5ad1aba769911c6719423dc8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Apr 2024 00:12:15 +0100 Subject: [PATCH 008/492] CP-52880: benchmark for Xapi_vdi.update_allowed_operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a XAPI database with a number of VMs/VDIs/VBDs and measure how long update_allowed_operations takes. Can't really use 2400 VMs here yet, because even with 240 VMs takes ~15s to initialize the test. ``` ╭─────────────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────╮ │name │ major-allocated │ minor-allocated │ monotonic-clock │ ├─────────────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┤ │ update_allowed_operations/VDI │ 10145.1862 mjw/run│ 7412588.8431 mnw/run│ 53244625.3769 ns/run│ ╰─────────────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────╯ update_allowed_operations/VDI (ns): { monotonic-clock per run = 53244625.376923 (confidence: 53612028.619469 to 53103082.519729); r² = Some 0.992851 } ``` Signed-off-by: Edwin Török --- .../bench/bench_vdi_allowed_operations.ml | 59 +++++++++++++++++++ ocaml/tests/bench/dune | 23 +++++++- 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 ocaml/tests/bench/bench_vdi_allowed_operations.ml diff --git a/ocaml/tests/bench/bench_vdi_allowed_operations.ml b/ocaml/tests/bench/bench_vdi_allowed_operations.ml new file mode 100644 index 00000000000..9400490fde5 --- /dev/null +++ b/ocaml/tests/bench/bench_vdi_allowed_operations.ml @@ -0,0 +1,59 @@ +open Bechamel + +module D = Debug.Make (struct let name = __MODULE__ end) + +(* tested configuration limits *) +let max_hosts = 64 + +let max_vms = (*2400*) 240 + +let max_vbds = (* 255 *) 25 + +let () = + (* a minimal harness init *) + Suite_init.harness_init () ; + (* don't spam the logs in [allocate] *) + Debug.set_level Syslog.Info + +let allocate () = + let open Test_common in + let __context = make_test_database () in + let (_sm_ref : API.ref_SM) = make_sm ~__context () in + let sr_ref = make_sr ~__context () in + let (_ : API.ref_PBD array) = + Array.init max_hosts (fun _ -> make_pbd ~__context ~sR:sr_ref ()) + in + let vms = + Array.init max_vms @@ fun _ -> + let vm_ref = make_vm ~__context () in + Array.init (max_vbds / 2) @@ fun _ -> + let vdi_ref = make_vdi ~__context ~sR:sr_ref () in + let vbd_ref = + make_vbd ~__context ~vDI:vdi_ref ~vM:vm_ref ~currently_attached:true + ~mode:`RO () + in + let vdi_ref' = make_vdi ~__context ~sR:sr_ref () in + let vbd_ref' = + make_vbd ~__context ~vDI:vdi_ref' ~vM:vm_ref ~currently_attached:true + ~mode:`RW () + in + (vdi_ref, vbd_ref, vdi_ref', vbd_ref') + in + D.info "Created test database" ; + (__context, vms) + +let test_vdi_update_allowed_operations (__context, vm_disks) = + let _, _, vdi_ref, vbd_ref = vm_disks.(0).(0) in + Db.VBD.set_currently_attached ~__context ~self:vbd_ref ~value:true ; + Xapi_vdi.update_allowed_operations ~__context ~self:vdi_ref ; + Db.VBD.set_currently_attached ~__context ~self:vbd_ref ~value:false ; + Xapi_vdi.update_allowed_operations ~__context ~self:vdi_ref + +let benchmarks = + Test.make_grouped ~name:"update_allowed_operations" + [ + Test.make_with_resource ~name:"VDI" ~allocate ~free:ignore Test.uniq + (Staged.stage test_vdi_update_allowed_operations) + ] + +let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index 0c088389dfe..61f92787759 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -1,4 +1,21 @@ (executables - (names bench_tracing bench_uuid bench_throttle2 bench_cached_reads) - (libraries tracing bechamel bechamel-notty notty.unix tracing_export threads.posix fmt notty uuid xapi_aux tests_common log xapi_internal) -) + (names + bench_tracing + bench_uuid + bench_throttle2 + bench_cached_reads + bench_vdi_allowed_operations) + (libraries + tracing + bechamel + bechamel-notty + notty.unix + tracing_export + threads.posix + fmt + notty + uuid + xapi_aux + tests_common + log + xapi_internal)) From 5b52598ed3ca54b6c6767421bfa977ad6167b1a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Apr 2024 00:12:15 +0100 Subject: [PATCH 009/492] CP-52880: optimize List manipulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` ╭─────────────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────╮ │name │ major-allocated │ minor-allocated │ monotonic-clock │ ├─────────────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┤ │ update_allowed_operations/VDI │ 10096.0354 mjw/run│ 7412629.8723 mnw/run│ 53075833.0400 ns/run│ ╰─────────────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────╯ update_allowed_operations/VDI (ns): { monotonic-clock per run = 53075833.040000 (confidence: 53469156.908088 to 52924201.003476); r² = Some 0.991453 } ``` Signed-off-by: Edwin Török --- ocaml/xapi/cancel_tasks.ml | 4 ++-- ocaml/xapi/xapi_vdi.ml | 28 ++++++++++++---------------- ocaml/xapi/xapi_vdi.mli | 4 ++-- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/ocaml/xapi/cancel_tasks.ml b/ocaml/xapi/cancel_tasks.ml index 690cd1026b1..acdcd2fe015 100644 --- a/ocaml/xapi/cancel_tasks.ml +++ b/ocaml/xapi/cancel_tasks.ml @@ -83,14 +83,14 @@ let update_all_allowed_operations ~__context = in let vbd_records = List.map - (fun vbd -> (vbd, Db.VBD.get_record_internal ~__context ~self:vbd)) + (fun vbd -> Db.VBD.get_record_internal ~__context ~self:vbd) all_vbds in List.iter (safe_wrapper "allowed_ops - VDIs" (fun self -> let relevant_vbds = List.filter - (fun (_, vbd_record) -> vbd_record.Db_actions.vBD_VDI = self) + (fun vbd_record -> vbd_record.Db_actions.vBD_VDI = self) vbd_records in Xapi_vdi.update_allowed_operations_internal ~__context ~self diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 3713f189040..fedf1942b06 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -155,16 +155,14 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ) ) | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) ) + records in let my_active_rw_vbd_records = List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records @@ -183,14 +181,12 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ) ) | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] ) + records in (* If the VBD is currently_attached then some operations can still be performed ie: VDI.clone (if the VM is suspended we have to have the diff --git a/ocaml/xapi/xapi_vdi.mli b/ocaml/xapi/xapi_vdi.mli index 45569a12fde..3d60ad31ff1 100644 --- a/ocaml/xapi/xapi_vdi.mli +++ b/ocaml/xapi/xapi_vdi.mli @@ -23,7 +23,7 @@ val check_operation_error : __context:Context.t -> ?sr_records:'a list -> ?pbd_records:(API.ref_PBD * API.pBD_t) list - -> ?vbd_records:(API.ref_VBD * Db_actions.vBD_t) list + -> ?vbd_records:Db_actions.vBD_t list -> bool -> Db_actions.vDI_t -> API.ref_VDI @@ -40,7 +40,7 @@ val update_allowed_operations_internal : -> self:[`VDI] API.Ref.t -> sr_records:'a list -> pbd_records:(API.ref_PBD * API.pBD_t) list - -> ?vbd_records:(API.ref_VBD * Db_actions.vBD_t) list + -> ?vbd_records:Db_actions.vBD_t list -> unit -> unit From 49400e45bc9a8ad3235a4af08c2374f42858ef6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Apr 2024 00:12:15 +0100 Subject: [PATCH 010/492] CP-52880: Use a Set instead of a list for checking VDI operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit O(log n) instead of O(n) complexity. Also filtering can be done more efficiently. ``` ╭─────────────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────╮ │name │ major-allocated │ minor-allocated │ monotonic-clock │ ├─────────────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┤ │ update_allowed_operations/VDI │ 9874.6062 mjw/run│ 7412584.1277 mnw/run│ 51364914.0200 ns/run│ ╰─────────────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────╯ update_allowed_operations/VDI (ns): { monotonic-clock per run = 51364914.020000 (confidence: 51756944.767313 to 51194994.874696); r² = Some 0.990799 } ``` On this test ~2-3% improvement (potentially more on larger lists). Signed-off-by: Edwin Török --- ocaml/xapi/xapi_globs.ml | 11 +++++++++++ ocaml/xapi/xapi_vdi.ml | 25 +++++++++++++++---------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index a015535dd85..1337e728f0e 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -500,6 +500,16 @@ let rpu_allowed_vm_operations = ; `update_allowed_operations ] +module Vdi_operations = struct + type t = API.vdi_operations + + (* this is more efficient than just 'let compare = Stdlib.compare', + because the compiler can specialize it to [t] without calling any runtime functions *) + let compare (a : t) (b : t) = Stdlib.compare a b +end + +module Vdi_operations_set = Set.Make (Vdi_operations) + (* Until the Ely release, the vdi_operations enum had stayed unchanged * since 2009 or earlier, but then Ely and some subsequent releases * added new members to the enum. *) @@ -517,6 +527,7 @@ let pre_ely_vdi_operations = ; `generate_config ; `blocked ] + |> Vdi_operations_set.of_list (* We might consider restricting this further. *) let rpu_allowed_vdi_operations = pre_ely_vdi_operations diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index fedf1942b06..69e04b77213 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -86,7 +86,10 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) let* () = if Helpers.rolling_upgrade_in_progress ~__context - && not (List.mem op Xapi_globs.rpu_allowed_vdi_operations) + && not + (Xapi_globs.Vdi_operations_set.mem op + Xapi_globs.rpu_allowed_vdi_operations + ) then Error (Api_errors.not_supported_during_upgrade, []) else @@ -463,7 +466,7 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records *) let all_ops = Xapi_globs.pre_ely_vdi_operations - |> List.filter (function + |> Xapi_globs.Vdi_operations_set.filter (function | `blocked -> false (* CA-260245 *) | `force_unlock -> @@ -480,18 +483,20 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records ha_enabled all self x with | Ok () -> - [x] + true | _ -> - [] + false in - List.fold_left (fun accu op -> check op @ accu) [] all_ops + all_ops |> Xapi_globs.Vdi_operations_set.filter check in let allowed = - if Helpers.rolling_upgrade_in_progress ~__context then - Xapi_stdext_std.Listext.List.intersect allowed - Xapi_globs.rpu_allowed_vdi_operations - else - allowed + ( if Helpers.rolling_upgrade_in_progress ~__context then + Xapi_globs.Vdi_operations_set.inter allowed + Xapi_globs.rpu_allowed_vdi_operations + else + allowed + ) + |> Xapi_globs.Vdi_operations_set.elements in Db.VDI.set_allowed_operations ~__context ~self ~value:allowed From 8bd5737f9af2d313fd26da14c5f24720b0907f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Apr 2024 00:12:15 +0100 Subject: [PATCH 011/492] CP-52880: change order of short-circuited boolean operator argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perform the cheaper check first, so that it will short-circuit the evaluation when false. ``` ╭─────────────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────╮ │name │ major-allocated │ minor-allocated │ monotonic-clock │ ├─────────────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┤ │ update_allowed_operations/VDI │ 9884.5615 mjw/run│ 7412534.5215 mnw/run│ 53189355.8923 ns/run│ ╰─────────────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────╯ update_allowed_operations/VDI (ns): { monotonic-clock per run = 53189355.892308 (confidence: 53722938.915014 to 52908047.166446); r² = Some 0.992578 } ``` Signed-off-by: Edwin Török --- ocaml/xapi/xapi_vdi.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 69e04b77213..027023e9a2e 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -99,7 +99,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) (* Don't fail with other_operation_in_progress if VDI mirroring is in progress and destroy is called as part of VDI mirroring *) let is_vdi_mirroring_in_progress = - List.exists (fun (_, op) -> op = `mirror) current_ops && op = `destroy + op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops in if List.exists (fun (_, op) -> op <> `copy) current_ops @@ -133,7 +133,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) pbd_records in let* () = - if pbds_attached = [] && List.mem op [`resize] then + if pbds_attached = [] && op = `resize then Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) else Ok () From 50a39d7e5cd51caa1e816da9d0c9867d0822841a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Apr 2024 00:12:15 +0100 Subject: [PATCH 012/492] CP-52880: Avoid O(N^2) behaviour in Xapi_vdi.update_allowed_operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Activate old xapi_vdi.update_allowed_operations optimization: get_internal_records_where does a linear scan currently, so operating on N VDIs is O(N^2). Look at the VBD records directly, like before this 2013 commit which regressed it: 5097475ded2cdd90d879833ad9efea33e1bc29eb (We are going to optimize get_record separately so it doesn't go through serialization) For now only do this when run on the coordinator to avoid potentially large number of VBD round-trip database fetches. We'll need to optimize the 'get_internal_record_where' later to also speed up the pool case. ``` ╭─────────────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────╮ │name │ major-allocated │ minor-allocated │ monotonic-clock │ ├─────────────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┤ │ update_allowed_operations/VDI │ 9205.8042 mjw/run│ 964577.0228 mnw/run│ 2868770.0725 ns/run│ ╰─────────────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────╯ update_allowed_operations/VDI (ns): { monotonic-clock per run = 2868770.072546 (confidence: 2947963.590731 to 2834338.835371); r² = Some 0.404284 } ``` Compared to the previous commit this is 18x faster. Signed-off-by: Edwin Török --- ocaml/xapi/xapi_vdi.ml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 027023e9a2e..304fedf0b44 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -476,6 +476,15 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records ) in let all = Db.VDI.get_record_internal ~__context ~self in + let vbd_records = + match vbd_records with + | None when Pool_role.is_master () -> + all.Db_actions.vDI_VBDs + |> List.rev_map (fun self -> Db.VBD.get_record_internal ~__context ~self) + |> Option.some + | v -> + v + in let allowed = let check x = match From b6ced4474d315a56ca31dccf115a71de8b62c82c Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Mon, 10 Mar 2025 10:50:22 +0800 Subject: [PATCH 013/492] Update last_known_schema_hash and datamodel lifecycle Signed-off-by: Changlei Li --- ocaml/idl/datamodel_lifecycle.ml | 2 +- ocaml/idl/schematest.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 3299f39a264..877fa1b6626 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -110,7 +110,7 @@ let prototyped_of_field = function | "host", "last_software_update" -> Some "22.20.0" | "VM_guest_metrics", "services" -> - Some "25.6.0-next" + Some "25.10.0-next" | "VM_guest_metrics", "netbios_name" -> Some "24.28.0" | "VM", "groups" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index b534f108a57..435e2da373f 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "34390a071f5df0fac8dcf9423a9111ae" +let last_known_schema_hash = "7756b4bea0be3985c1c8f6708f04d442" let current_schema_hash : string = let open Datamodel_types in From 0b79d88c6c50a01b86a290d415634b543308f024 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Thu, 13 Mar 2025 06:23:26 +0000 Subject: [PATCH 014/492] Resolve build failure in message_forwarding.ml Change call_slave_... functions for new added ssh feature code to resolve build failure. Signed-off-by: Bengang Yuan --- ocaml/xapi/message_forwarding.ml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index aa2ff56625f..83c195f765a 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -4027,16 +4027,14 @@ functor let enable_ssh ~__context ~self = info "%s: host = '%s'" __FUNCTION__ (host_uuid ~__context self) ; let local_fn = Local.Host.enable_ssh ~self in - do_op_on ~local_fn ~__context ~host:self (fun session_id rpc -> - Client.Host.enable_ssh ~rpc ~session_id ~self - ) + let remote_fn = Client.Host.enable_ssh ~self in + do_op_on ~local_fn ~__context ~host:self ~remote_fn let disable_ssh ~__context ~self = info "%s: host = '%s'" __FUNCTION__ (host_uuid ~__context self) ; let local_fn = Local.Host.disable_ssh ~self in - do_op_on ~local_fn ~__context ~host:self (fun session_id rpc -> - Client.Host.disable_ssh ~rpc ~session_id ~self - ) + let remote_fn = Client.Host.disable_ssh ~self in + do_op_on ~local_fn ~__context ~host:self ~remote_fn end module Host_crashdump = struct From 18ba7e724fbd0d9454a2c62cf9684b7e10af5448 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 17 Mar 2025 01:25:33 +0000 Subject: [PATCH 015/492] Update datamodel_lifecycle.ml Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_lifecycle.ml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 38ed231cd7e..d21cad211e3 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -205,6 +205,10 @@ let prototyped_of_message = function Some "22.26.0" | "VTPM", "create" -> Some "22.26.0" + | "host", "disable_ssh" -> + Some "25.12.0-next" + | "host", "enable_ssh" -> + Some "25.12.0-next" | "host", "emergency_clear_mandatory_guidance" -> Some "24.10.0" | "host", "apply_recommended_guidances" -> @@ -223,6 +227,10 @@ let prototyped_of_message = function Some "23.30.0" | "VM", "set_groups" -> Some "24.19.1" + | "pool", "disable_ssh" -> + Some "25.12.0-next" + | "pool", "enable_ssh" -> + Some "25.12.0-next" | "pool", "get_guest_secureboot_readiness" -> Some "24.17.0" | "pool", "set_ext_auth_cache_expiry" -> From c1f02ed4f60af3545e2291da38768b1d30aa0b9e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 14 Mar 2025 08:33:29 +0000 Subject: [PATCH 016/492] CA-408126 follow-up: Fix negative ds_min and RRD values in historical archives When reading RRD archives from XML, make sure ds_min is never negative, this will convert negative values to NaNs as well. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 9d0ce1f3ffc..126442db986 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -744,7 +744,11 @@ let from_xml input = let name = get_el "name" i in let type_ = get_el "type" i in let min_hb = get_el "minimal_heartbeat" i in - let min = get_el "min" i in + (* CA-408126 - work around negative data in historical RRDs + where ds_min could have been incorrectly set to neg_infinity. + Setting ds_min to 0. means Fring.make below will turn negative + historical values to NaNs.*) + let min = max (float_of_string (get_el "min" i)) 0. in let max = get_el "max" i in ignore (get_el "last_ds" i) ; let value = get_el "value" i in @@ -767,7 +771,7 @@ let from_xml input = failwith "Bad format" ) ; ds_mrhb= float_of_string min_hb - ; ds_min= float_of_string min + ; ds_min= min ; ds_max= float_of_string max ; ds_last= VT_Unknown ; (* float_of_string "last_ds"; *) From 6e04ead07469426e1c2f84627ceac428fd212ee2 Mon Sep 17 00:00:00 2001 From: Guillaume Date: Mon, 17 Mar 2025 11:57:20 +0100 Subject: [PATCH 017/492] Add opam local switch in gitignore When using an opam local switch, ignoring the _opam directory prevents git from searching within it, reducing noise in search results. Signed-off-by: Guillaume --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2c90d7261d3..93ad844074b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _coverage/ *.install *.swp compile_flags.txt +_opam # tests xapi-db.xml From 6ba654440cfd040ebb1d1f6a60588fc343185827 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 17 Mar 2025 11:48:08 +0000 Subject: [PATCH 018/492] xenopsd: start vncterm for PVH guests Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/xenops_server_xen.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index ba3dd7e2b8a..397b478578a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2314,6 +2314,7 @@ module VM = struct (create_device_model_config vm vmextra vbds vifs vgpus vusbs) ; match vm.Vm.ty with | Vm.PV {vncterm= true; vncterm_ip= ip; _} + | Vm.PVH {vncterm= true; vncterm_ip= ip; _} | Vm.PVinPVH {vncterm= true; vncterm_ip= ip; _} -> Service.PV_Vnc.start ~xs ?ip di.Xenctrl.domid | _ -> From 24e892760447c81afc37c46d622211cbed4aafd4 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 17 Mar 2025 13:36:15 +0000 Subject: [PATCH 019/492] xenopsd: make vncterm less errorprone Previous match had a wildcard which made it easy to miss added cases, change it so all the guest types have to be enumerated. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/xenops_server_xen.ml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 397b478578a..db54f18293d 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2313,11 +2313,12 @@ module VM = struct ) (create_device_model_config vm vmextra vbds vifs vgpus vusbs) ; match vm.Vm.ty with - | Vm.PV {vncterm= true; vncterm_ip= ip; _} - | Vm.PVH {vncterm= true; vncterm_ip= ip; _} - | Vm.PVinPVH {vncterm= true; vncterm_ip= ip; _} -> - Service.PV_Vnc.start ~xs ?ip di.Xenctrl.domid - | _ -> + | PV {vncterm; vncterm_ip= ip; _} + | PVH {vncterm; vncterm_ip= ip; _} + | PVinPVH {vncterm; vncterm_ip= ip; _} -> + if vncterm then + Service.PV_Vnc.start ~xs ?ip di.Xenctrl.domid + | HVM _ -> () with Device.Ioemu_failed (name, msg) -> raise (Xenopsd_error (Failed_to_start_emulator (vm.Vm.id, name, msg))) From 05441c4425f77a41f08b0658d7adce0292d171bb Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Mon, 17 Mar 2025 16:32:05 +0000 Subject: [PATCH 020/492] Define SR_CACHING capability Signed-off-by: Mark Syms --- ocaml/tests/test_sm_features.ml | 13 +++++++++---- ocaml/xapi/smint.ml | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ocaml/tests/test_sm_features.ml b/ocaml/tests/test_sm_features.ml index 43bce4c3807..d7a63008882 100644 --- a/ocaml/tests/test_sm_features.ml +++ b/ocaml/tests/test_sm_features.ml @@ -49,6 +49,7 @@ let test_sequences = ; "SR_CACHING" ; "SR_PROBE" ; "SR_UPDATE" + ; "SR_CACHING" ; "VDI_ATTACH" ; "VDI_CLONE" ; "VDI_CONFIG_CBT" @@ -63,7 +64,8 @@ let test_sequences = ] ; smapiv1_features= [ - (Sr_probe, 1L) + (Sr_caching, 1L) + ; (Sr_probe, 1L) ; (Sr_update, 1L) ; (Vdi_attach, 1L) ; (Vdi_clone, 1L) @@ -79,7 +81,8 @@ let test_sequences = ] ; smapiv2_features= [ - "SR_PROBE/1" + "SR_CACHING/1" + ; "SR_PROBE/1" ; "SR_UPDATE/1" ; "VDI_ATTACH/1" ; "VDI_CLONE/1" @@ -97,7 +100,8 @@ let test_sequences = { capabilities= [ - "SR_PROBE" + "SR_CACHING" + ; "SR_PROBE" ; "SR_UPDATE" ; "VDI_ATTACH" ; "VDI_CLONE" @@ -113,7 +117,8 @@ let test_sequences = ] ; features= [ - ("SR_PROBE", 1L) + ("SR_CACHING", 1L) + ; ("SR_PROBE", 1L) ; ("SR_UPDATE", 1L) ; ("VDI_ATTACH", 1L) ; ("VDI_CLONE", 1L) diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index b5c290afcb7..a5809893c5f 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -36,6 +36,7 @@ module Feature = struct | Sr_metadata | Sr_trim | Sr_multipath + | Sr_caching | Vdi_create | Vdi_delete | Vdi_attach @@ -75,6 +76,7 @@ module Feature = struct ; ("SR_METADATA", Sr_metadata) ; ("SR_TRIM", Sr_trim) ; ("SR_MULTIPATH", Sr_multipath) + ; ("SR_CACHING", Sr_caching) ; ("SR_STATS", Sr_stats) ; ("VDI_CREATE", Vdi_create) ; ("VDI_DELETE", Vdi_delete) From 660df40acb481dc14839965bf2c152283a959bc9 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 18 Mar 2025 14:02:53 +0000 Subject: [PATCH 021/492] CP-52365 fix up driver-tool invocations Not all driver-tool invocations have been properly adjusted to an updated command line syntax; do it here. Signed-off-by: Christian Lindig --- ocaml/xapi/xapi_host_driver.ml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_host_driver.ml b/ocaml/xapi/xapi_host_driver.ml index 0416b1de33f..b44ee615c64 100644 --- a/ocaml/xapi/xapi_host_driver.ml +++ b/ocaml/xapi/xapi_host_driver.ml @@ -142,7 +142,8 @@ let select ~__context ~self ~variant = if v.API.driver_variant_hardware_present = false then no_hardware (Ref.string_of variant) ; let stdout = - Tool.call ["select"; d.API.host_driver_name; v.API.driver_variant_name] + Tool.call + ["-s"; "-n"; d.API.host_driver_name; "-v"; v.API.driver_variant_name] in info "%s: %s" __FUNCTION__ stdout ; Db.Host_driver.set_selected_variant ~__context ~self ~value:variant @@ -154,7 +155,7 @@ let select ~__context ~self ~variant = let deselect ~__context ~self = D.debug "%s driver %s" __FUNCTION__ (Ref.string_of self) ; let d = Db.Host_driver.get_record ~__context ~self in - let stdout = Tool.call ["deselect"; d.API.host_driver_name] in + let stdout = Tool.call ["-d"; d.API.host_driver_name] in info "%s: %s" __FUNCTION__ stdout ; Db.Host_driver.set_active_variant ~__context ~self ~value:Ref.null ; Db.Host_driver.set_selected_variant ~__context ~self ~value:Ref.null From 4becc093b5a4c219c81e6c51be698b1a26a897d2 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Tue, 18 Mar 2025 14:57:55 +0000 Subject: [PATCH 022/492] Update datamodel_lifecycle.ml Signed-off-by: Rob Hoes --- ocaml/idl/datamodel_lifecycle.ml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index d21cad211e3..c842edcd5bf 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -206,9 +206,9 @@ let prototyped_of_message = function | "VTPM", "create" -> Some "22.26.0" | "host", "disable_ssh" -> - Some "25.12.0-next" + Some "25.13.0" | "host", "enable_ssh" -> - Some "25.12.0-next" + Some "25.13.0" | "host", "emergency_clear_mandatory_guidance" -> Some "24.10.0" | "host", "apply_recommended_guidances" -> @@ -228,9 +228,9 @@ let prototyped_of_message = function | "VM", "set_groups" -> Some "24.19.1" | "pool", "disable_ssh" -> - Some "25.12.0-next" + Some "25.13.0" | "pool", "enable_ssh" -> - Some "25.12.0-next" + Some "25.13.0" | "pool", "get_guest_secureboot_readiness" -> Some "24.17.0" | "pool", "set_ext_auth_cache_expiry" -> From 3608e9d68d039ecd292961d1166b1848ec50332d Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Tue, 18 Mar 2025 15:08:45 +0000 Subject: [PATCH 023/492] CA-408339: Respect xenopsd's NUMA-placement-policy default Xenopsd has an experimental feature that aims to optimise NUMA placement. This used to be configured by adding `numa-placement=true` to the file /etc/xenopsd.conf, which tells xenopsd to enable the feature. Later, an actual API was added to configure this: `host.set_numa_affinity_policy`. The expectation was that, while this new API should be preferred, the old xenopsd.conf option would still work for backwards compatibility reasons. This is particularly important for hosts that are upgraded to the new version. Unfortunately, while code exists in xenopsd to read and use the config option when it starts up, when xapi starts up immediately after xenopsd, it always overrides the NUMA config based its own DB field. The field type actually has a "default" option, but this gets translated to "any" (= no NUMA). By default, this means means that the experimental feature is disabled, no matter what the config file says, and can only be enabled through the API. The fix is for xapi to not assign a default value itself, but let xenopsd decide on the default policy. And xenopsd uses its config file to do so, as before. Signed-off-by: Rob Hoes --- ocaml/xapi-idl/xen/xenops_interface.ml | 4 +++- ocaml/xapi/xapi_xenops.ml | 6 +++--- ocaml/xenopsd/lib/xenops_server.ml | 15 ++++++++++++++- ocaml/xenopsd/xc/xenops_server_xen.ml | 5 ++++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 083c345f149..68ef01b29c9 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -501,6 +501,8 @@ module Host = struct (** best effort placement on the smallest number of NUMA nodes where possible *) [@@deriving rpcty] + type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty] + type guest_agent_feature_list = guest_agent_feature list [@@deriving rpcty] end @@ -657,7 +659,7 @@ module XenopsAPI (R : RPC) = struct let numa_affinity_policy_p = Param.mk ~description:["Host NUMA affinity policy"] - ~name:"numa_affinity_policy" Host.numa_affinity_policy + ~name:"numa_affinity_policy" Host.numa_affinity_policy_opt in declare "HOST.set_numa_affinity_policy" ["Sets the host's NUMA aware VM scheduling policy"] diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 11e7f7c941f..77039e5fe3b 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -3098,11 +3098,11 @@ let set_numa_affinity_policy ~__context ~value = let open Xenops_interface.Host in match value with | `any -> - Any + Some Any | `best_effort -> - Best_effort + Some Best_effort | `default_policy -> - Any + None in Client.HOST.set_numa_affinity_policy dbg value diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 350227aa028..5325a8b29ba 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3398,8 +3398,13 @@ module VIF = struct () end +let default_numa_affinity_policy = ref Xenops_interface.Host.Any + let numa_placement = ref Xenops_interface.Host.Any +let string_of_numa_affinity_policy = + Xenops_interface.Host.(function Any -> "any" | Best_effort -> "best-effort") + module HOST = struct let stat _ dbg = Debug.with_thread_associated dbg @@ -3413,7 +3418,15 @@ module HOST = struct let set_numa_affinity_policy _ dbg = Debug.with_thread_associated dbg @@ fun policy -> debug "HOST.set_numa_affinity_policy" ; - numa_placement := policy + match policy with + | None -> + info "Enforcing default NUMA affinity policy (%s)" + (string_of_numa_affinity_policy !default_numa_affinity_policy) ; + numa_placement := !default_numa_affinity_policy + | Some p -> + info "Enforcing '%s' NUMA affinity policy" + (string_of_numa_affinity_policy p) ; + numa_placement := p let get_console_data _ dbg = Debug.with_thread_associated dbg diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index db54f18293d..6c6dd067ef7 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -5148,8 +5148,11 @@ let init () = {Xs_protocol.ACL.owner= 0; other= Xs_protocol.ACL.READ; acl= []} ) ; Device.Backend.init () ; - Xenops_server.numa_placement := + Xenops_server.default_numa_affinity_policy := if !Xenopsd.numa_placement_compat then Best_effort else Any ; + info "Default NUMA affinity policy is '%s'" + Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; + Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; Domain.numa_init () ; debug "xenstore is responding to requests" ; let () = Watcher.create_watcher_thread () in From 752a186b54ce5b672f3c9f1d8970d8e27db09600 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 19 Mar 2025 09:13:11 +0000 Subject: [PATCH 024/492] Use records when accumulating events In Xapi_event, events are accumulated by folding over the set of tables associated with a subscriber's subscription record. These events are mostly accumulated as lists within a tuple. There is no analogue of functional record update for tuples in OCaml. This means that the separate accumulations have to cite values they will not update. By introducing records, we can only cite the fields we actually change. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 126 ++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index a7412790019..8d5f0a674ef 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -525,6 +525,57 @@ let rec next ~__context = else rpc_of_events relevant +type entry = string * string * Xapi_database.Db_cache_types.Time.t + +type acc = { + creates: entry list + ; mods: entry list + ; deletes: entry list + ; last: Xapi_database.Db_cache_types.Time.t +} + +let collect_events subs tables last_generation acc table = + let open Xapi_database in + let open Db_cache_types in + let table_value = TableSet.find table tables in + let prepend_recent obj stat _ ({creates; mods; last; _} as entries) = + let Stat.{created; modified; deleted} = stat in + if Subscription.object_matches subs table obj then + let last = max last (max modified deleted) in + let creates = + if created > !last_generation then + (table, obj, created) :: creates + else + creates + in + let mods = + if modified > !last_generation && not (created > !last_generation) then + (table, obj, modified) :: mods + else + mods + in + {entries with creates; mods; last} + else + entries + in + let prepend_deleted obj stat ({deletes; last; _} as entries) = + let Stat.{created; modified; deleted} = stat in + if Subscription.object_matches subs table obj then + let last = max last (max modified deleted) in + let deletes = + if created <= !last_generation then + (table, obj, deleted) :: deletes + else + deletes + in + {entries with deletes; last} + else + entries + in + acc + |> Table.fold_over_recent !last_generation prepend_recent table_value + |> Table.fold_over_deleted !last_generation prepend_deleted table_value + let from_inner __context session subs from from_t timer batching = let open Xapi_database in let open From in @@ -551,75 +602,25 @@ let from_inner __context session subs from from_t timer batching = else (0L, []) in - ( msg_gen - , messages - , tableset - , List.fold_left - (fun acc table -> - (* Fold over the live objects *) - let acc = - Db_cache_types.Table.fold_over_recent !last_generation - (fun objref {Db_cache_types.Stat.created; modified; deleted} _ - (creates, mods, deletes, last) -> - if Subscription.object_matches subs table objref then - let last = max last (max modified deleted) in - (* mtime guaranteed to always be larger than ctime *) - ( ( if created > !last_generation then - (table, objref, created) :: creates - else - creates - ) - , ( if - modified > !last_generation - && not (created > !last_generation) - then - (table, objref, modified) :: mods - else - mods - ) - , (* Only have a mod event if we don't have a created event *) - deletes - , last - ) - else - (creates, mods, deletes, last) - ) - (Db_cache_types.TableSet.find table tableset) - acc - in - (* Fold over the deleted objects *) - Db_cache_types.Table.fold_over_deleted !last_generation - (fun objref {Db_cache_types.Stat.created; modified; deleted} - (creates, mods, deletes, last) -> - if Subscription.object_matches subs table objref then - let last = max last (max modified deleted) in - (* mtime guaranteed to always be larger than ctime *) - if created > !last_generation then - (creates, mods, deletes, last) - (* It was created and destroyed since the last update *) - else - (creates, mods, (table, objref, deleted) :: deletes, last) - (* It might have been modified, but we can't tell now *) - else - (creates, mods, deletes, last) - ) - (Db_cache_types.TableSet.find table tableset) - acc - ) - ([], [], [], !last_generation) - tables - ) + let events = + let initial = + {creates= []; mods= []; deletes= []; last= !last_generation} + in + let folder = collect_events subs tableset last_generation in + List.fold_left folder initial tables + in + (msg_gen, messages, tableset, events) in (* Each event.from should have an independent subscription record *) - let msg_gen, messages, tableset, (creates, mods, deletes, last) = + let msg_gen, messages, tableset, events = with_call session subs (fun sub -> let grab_nonempty_range = Throttle.Batching.with_recursive_loop batching @@ fun self arg -> - let ( (msg_gen, messages, _tableset, (creates, mods, deletes, last)) - as result - ) = + let result = Db_lock.with_lock (fun () -> grab_range (Db_backend.make ())) in + let msg_gen, messages, _tables, events = result in + let {creates; mods; deletes; last} = events in if creates = [] && mods = [] @@ -640,6 +641,7 @@ let from_inner __context session subs from from_t timer batching = grab_nonempty_range () ) in + let {creates; mods; deletes; last} = events in last_generation := last ; let event_of op ?snapshot (table, objref, time) = { From 20aad9cf5368819e592ec846f5ba75a86140bb62 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 19 Mar 2025 09:13:50 +0000 Subject: [PATCH 025/492] Remove mutable last_generation from Xapi_event In event accumulation for event.from, the code uses a mutable variable to thread a value through event accumulation. However, this value itself is accumulated in the fold: it gets larger for each matching database event that matches a subscription. To avoid the complexity in effectively having a global, mutable variable, we drop it and make it more evident when it changes: it is changed when no events are accumulated (by grab_range). In the case that no events are accumulated, but the deadline hasn't been reached, the code tries to collect events again. It is during a retry that the last_generation needs to be bumped, as it defines the starting point by which to query the database for recent and deleted objects. In short, if no suitable events were gleaned from matching database object records since a given point, there's no point starting from there again. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 8d5f0a674ef..233abcd1eb9 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -543,13 +543,13 @@ let collect_events subs tables last_generation acc table = if Subscription.object_matches subs table obj then let last = max last (max modified deleted) in let creates = - if created > !last_generation then + if created > last_generation then (table, obj, created) :: creates else creates in let mods = - if modified > !last_generation && not (created > !last_generation) then + if modified > last_generation && not (created > last_generation) then (table, obj, modified) :: mods else mods @@ -563,7 +563,7 @@ let collect_events subs tables last_generation acc table = if Subscription.object_matches subs table obj then let last = max last (max modified deleted) in let deletes = - if created <= !last_generation then + if created <= last_generation then (table, obj, deleted) :: deletes else deletes @@ -573,8 +573,8 @@ let collect_events subs tables last_generation acc table = entries in acc - |> Table.fold_over_recent !last_generation prepend_recent table_value - |> Table.fold_over_deleted !last_generation prepend_deleted table_value + |> Table.fold_over_recent last_generation prepend_recent table_value + |> Table.fold_over_deleted last_generation prepend_deleted table_value let from_inner __context session subs from from_t timer batching = let open Xapi_database in @@ -592,9 +592,8 @@ let from_inner __context session subs from from_t timer batching = in List.filter (fun table -> Subscription.table_matches subs table) all in - let last_generation = ref from in let last_msg_gen = ref from_t in - let grab_range t = + let grab_range ~since t = let tableset = Db_cache_types.Database.tableset (Db_ref.get_database t) in let msg_gen, messages = if Subscription.table_matches subs "message" then @@ -603,10 +602,8 @@ let from_inner __context session subs from from_t timer batching = (0L, []) in let events = - let initial = - {creates= []; mods= []; deletes= []; last= !last_generation} - in - let folder = collect_events subs tableset last_generation in + let initial = {creates= []; mods= []; deletes= []; last= since} in + let folder = collect_events subs tableset since in List.fold_left folder initial tables in (msg_gen, messages, tableset, events) @@ -615,9 +612,9 @@ let from_inner __context session subs from from_t timer batching = let msg_gen, messages, tableset, events = with_call session subs (fun sub -> let grab_nonempty_range = - Throttle.Batching.with_recursive_loop batching @@ fun self arg -> + Throttle.Batching.with_recursive_loop batching @@ fun self since -> let result = - Db_lock.with_lock (fun () -> grab_range (Db_backend.make ())) + Db_lock.with_lock (fun () -> grab_range ~since (Db_backend.make ())) in let msg_gen, messages, _tables, events = result in let {creates; mods; deletes; last} = events in @@ -628,21 +625,22 @@ let from_inner __context session subs from from_t timer batching = && messages = [] && not (Clock.Timer.has_expired timer) then ( - last_generation := last ; - (* Cur_id was bumped, but nothing relevent fell out of the db. Therefore the *) + (* cur_id was bumped, but nothing relevent fell out of the database. + Therefore the last ID the client got is equivalent to the current one. *) sub.cur_id <- last ; - (* last id the client got is equivalent to the current one *) last_msg_gen := msg_gen ; wait2 sub last timer ; - (self [@tailcall]) arg + (* The next iteration will fold over events starting after + the last database event that matched a subscription. *) + let next = last in + (self [@tailcall]) next ) else result in - grab_nonempty_range () + grab_nonempty_range from ) in let {creates; mods; deletes; last} = events in - last_generation := last ; let event_of op ?snapshot (table, objref, time) = { id= Int64.to_string time From aff5883e5a8e7c5a0975d40340f7247a05824392 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 19 Mar 2025 09:14:09 +0000 Subject: [PATCH 026/492] Factor out event reification In order to provide event records to subscribers, we must convert the accumulated events of the form (table, objref, time) into event records. The process of doing this is simple for objects in the database. The only difference is that deletion events do not provide a snapshot (as the object has been deleted). To avoid repeating ourselves, we define an "events_of" function that accumulates event records. The function takes an argument that specifies whether an attempt to provide a snapshot should be performed. The reification of events associated with messages - which are not stored in the database - is untouched. This relies on a callback instated elsewhere. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 67 +++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 233abcd1eb9..a5203ba28bc 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -651,39 +651,48 @@ let from_inner __context session subs from from_t timer batching = ; snapshot } in - let events = - List.fold_left - (fun acc x -> - let ev = event_of `del x in - if Subscription.event_matches subs ev then ev :: acc else acc - ) - [] deletes - in - let events = - List.fold_left - (fun acc (table, objref, mtime) -> - let serialiser = Eventgen.find_get_record table in - try - let xml = serialiser ~__context ~self:objref () in - let ev = event_of `_mod ?snapshot:xml (table, objref, mtime) in - if Subscription.event_matches subs ev then ev :: acc else acc - with _ -> acc - ) - events mods + let events_of ~kind ?(with_snapshot = true) entries acc = + let rec go events ((table, obj, _time) as entry) = + try + let snapshot = + let serialiser = Eventgen.find_get_record table in + if with_snapshot then + serialiser ~__context ~self:obj () + else + None + in + let event = event_of kind ?snapshot entry in + if Subscription.event_matches subs event then + event :: events + else + events + with _ -> + (* CA-91931: An exception may be raised here if an object's + lifetime is too short. + + The problem is that "collect_events" and "events_of" work + on different versions of the database, so some `add and + `mod events can be lost if the corresponding object is + deleted before a snapshot is taken. + + In practice, this has only been seen with the "task" + object - which can be rapidly created and destroyed using + helper functions. + + These exceptions have been suppressed since [bc0cc5a9]. *) + events + in + List.fold_left go acc entries in let events = - List.fold_left - (fun acc (table, objref, ctime) -> - let serialiser = Eventgen.find_get_record table in - try - let xml = serialiser ~__context ~self:objref () in - let ev = event_of `add ?snapshot:xml (table, objref, ctime) in - if Subscription.event_matches subs ev then ev :: acc else acc - with _ -> acc - ) - events creates + [] (* Accumulate the events for objects stored in the database. *) + |> events_of ~kind:`del ~with_snapshot:false deletes + |> events_of ~kind:`_mod mods + |> events_of ~kind:`add creates in let events = + (* Messages require a special casing as their contents are not + stored in the database. *) List.fold_left (fun acc mev -> let event = From cf8ff838f93a0ed285d4b3b27f087bf72e1d269a Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 19 Mar 2025 09:14:35 +0000 Subject: [PATCH 027/492] Use record type for individual event entries Further changes to turn tuples into records. Also partially uncurries `collect_events` to make its intended use as a fold more apparent. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index a5203ba28bc..cd38814d7e2 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -525,16 +525,18 @@ let rec next ~__context = else rpc_of_events relevant -type entry = string * string * Xapi_database.Db_cache_types.Time.t +type time = Xapi_database.Db_cache_types.Time.t + +type entry = {table: string; obj: string; time: time} type acc = { creates: entry list ; mods: entry list ; deletes: entry list - ; last: Xapi_database.Db_cache_types.Time.t + ; last: time } -let collect_events subs tables last_generation acc table = +let collect_events (subs, tables, last_generation) acc table = let open Xapi_database in let open Db_cache_types in let table_value = TableSet.find table tables in @@ -544,13 +546,13 @@ let collect_events subs tables last_generation acc table = let last = max last (max modified deleted) in let creates = if created > last_generation then - (table, obj, created) :: creates + {table; obj; time= created} :: creates else creates in let mods = if modified > last_generation && not (created > last_generation) then - (table, obj, modified) :: mods + {table; obj; time= modified} :: mods else mods in @@ -564,7 +566,7 @@ let collect_events subs tables last_generation acc table = let last = max last (max modified deleted) in let deletes = if created <= last_generation then - (table, obj, deleted) :: deletes + {table; obj; time= deleted} :: deletes else deletes in @@ -603,7 +605,7 @@ let from_inner __context session subs from from_t timer batching = in let events = let initial = {creates= []; mods= []; deletes= []; last= since} in - let folder = collect_events subs tableset since in + let folder = collect_events (subs, tableset, since) in List.fold_left folder initial tables in (msg_gen, messages, tableset, events) @@ -641,18 +643,18 @@ let from_inner __context session subs from from_t timer batching = ) in let {creates; mods; deletes; last} = events in - let event_of op ?snapshot (table, objref, time) = + let event_of op ?snapshot {table; obj; time} = { id= Int64.to_string time ; ts= "0.0" ; ty= String.lowercase_ascii table ; op - ; reference= objref + ; reference= obj ; snapshot } in let events_of ~kind ?(with_snapshot = true) entries acc = - let rec go events ((table, obj, _time) as entry) = + let rec go events ({table; obj; time= _} as entry) = try let snapshot = let serialiser = Eventgen.find_get_record table in @@ -696,13 +698,14 @@ let from_inner __context session subs from from_t timer batching = List.fold_left (fun acc mev -> let event = + let table = "message" in match mev with | Message.Create (_ref, message) -> event_of `add ?snapshot:(Some (API.rpc_of_message_t message)) - ("message", Ref.string_of _ref, 0L) + {table; obj= Ref.string_of _ref; time= 0L} | Message.Del _ref -> - event_of `del ("message", Ref.string_of _ref, 0L) + event_of `del {table; obj= Ref.string_of _ref; time= 0L} in event :: acc ) From 8e0b25372738c0c1736a8537dd26406c7a5a1906 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 19 Mar 2025 14:29:19 +0000 Subject: [PATCH 028/492] xenctrlext: do not truncate the amount of memory in claims to 32 bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Int_val truncates values to a 32-bit int. Instead use Long_val, which does not suffer from this. This is a problem when claiming more than ≈ 9706GiBs for a domain. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/c_stubs/xenctrlext_stubs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c index cbf2af76145..0e427548ed4 100644 --- a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c +++ b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c @@ -678,7 +678,7 @@ CAMLprim value stub_xenctrlext_domain_claim_pages(value xch_val, value domid_val int retval, the_errno; xc_interface* xch = xch_of_val(xch_val); uint32_t domid = Int_val(domid_val); - unsigned long nr_pages = Int_val(nr_pages_val); + unsigned long nr_pages = Long_val(nr_pages_val); caml_release_runtime_system(); retval = xc_domain_claim_pages(xch, domid, nr_pages); From 8bafeda1c864c3fce8b769c8e75b45c4b772d5d3 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 19 Mar 2025 11:49:47 +0000 Subject: [PATCH 029/492] CA-407177: Fix swtpm's use of SHA1 on XS9 The default crypto policy in XS9 disables use of SHA1. However, swtpm needs to use it since it advertises SHA1 support to guests. On XS9, swtpm will ship with a custom openssl configuration file for this purpose so set the appropriate environment variable to use it if the file exists. Signed-off-by: Ross Lagerwall --- ocaml/xenopsd/scripts/swtpm-wrapper | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ocaml/xenopsd/scripts/swtpm-wrapper b/ocaml/xenopsd/scripts/swtpm-wrapper index dfb322e6453..63de73a796a 100755 --- a/ocaml/xenopsd/scripts/swtpm-wrapper +++ b/ocaml/xenopsd/scripts/swtpm-wrapper @@ -13,6 +13,7 @@ # GNU Lesser General Public License for more details. import os +import os.path import stat import socket import sys @@ -140,6 +141,8 @@ def main(argv): tpm_env = dict(os.environ) tpm_env["LD_LIBRARY_PATH"] = "/usr/lib:" + if os.path.exists("/etc/ssl/openssl-swtpm.cnf"): + tpm_env["OPENSSL_CONF"] = "/etc/ssl/openssl-swtpm.cnf" if needs_init or check_state_needs_init(tpm_state_file): if tpm_file is None: From c94207ea3a638f9bcfe3d558e702da215843a86e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 20 Mar 2025 11:16:52 +0000 Subject: [PATCH 030/492] forkexecd: do not tie vfork_helper to the forkexec package This makes the binary unusable for testing in xapi_forkexecd Signed-off-by: Pau Ruiz Safont --- ocaml/forkexecd/dune | 1 - 1 file changed, 1 deletion(-) diff --git a/ocaml/forkexecd/dune b/ocaml/forkexecd/dune index 40d4a7eb7c6..01b51f24ed5 100644 --- a/ocaml/forkexecd/dune +++ b/ocaml/forkexecd/dune @@ -3,7 +3,6 @@ (rule (deps (source_tree helper)) (targets vfork_helper) - (package forkexec) (action (no-infer (progn From cecdcbab5797f50acbb656fb2ab5a3a13dccf678 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 20 Mar 2025 11:46:52 +0000 Subject: [PATCH 031/492] opam: add missing dependencies to packages This makes them compilable in xs-opam Signed-off-by: Pau Ruiz Safont --- dune-project | 7 +++++-- xapi-debug.opam | 1 + xapi-stdext-threads.opam | 6 ++++-- xapi-tools.opam | 1 + 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dune-project b/dune-project index af6364148da..2d8cab13744 100644 --- a/dune-project +++ b/dune-project @@ -264,6 +264,7 @@ re result rpclib + rrdd-plugin rresult sexplib sexplib0 @@ -315,6 +316,7 @@ ; 'xapi-tools' will have version ~dev, not 'master' like all the others ; because it is not in xs-opam yet rrd-transport + rrdd-plugin xapi-tracing-export xen-api-client (alcotest :with-test) @@ -721,10 +723,11 @@ This package provides an Lwt compatible interface to the library.") base-threads base-unix (alcotest :with-test) + (clock (= :version)) (fmt :with-test) - (odoc :with-doc) + mtime + (xapi-log (= :version)) (xapi-stdext-pervasives (= :version)) - (mtime :with-test) (xapi-stdext-unix (= :version)) ) ) diff --git a/xapi-debug.opam b/xapi-debug.opam index f8550f7508b..a2b7d9dd863 100644 --- a/xapi-debug.opam +++ b/xapi-debug.opam @@ -42,6 +42,7 @@ depends: [ "re" "result" "rpclib" + "rrdd-plugin" "rresult" "sexplib" "sexplib0" diff --git a/xapi-stdext-threads.opam b/xapi-stdext-threads.opam index ae64e906b29..a61529e7e09 100644 --- a/xapi-stdext-threads.opam +++ b/xapi-stdext-threads.opam @@ -11,11 +11,13 @@ depends: [ "base-threads" "base-unix" "alcotest" {with-test} + "clock" {= version} "fmt" {with-test} - "odoc" {with-doc} + "mtime" + "xapi-log" {= version} "xapi-stdext-pervasives" {= version} - "mtime" {with-test} "xapi-stdext-unix" {= version} + "odoc" {with-doc} ] build: [ ["dune" "subst"] {dev} diff --git a/xapi-tools.opam b/xapi-tools.opam index 852102302dd..da2e2ce2967 100644 --- a/xapi-tools.opam +++ b/xapi-tools.opam @@ -28,6 +28,7 @@ depends: [ "xmlm" "yojson" "rrd-transport" + "rrdd-plugin" "xapi-tracing-export" "xen-api-client" "alcotest" {with-test} From e2f1ffd4e3abf0d4123e1bb15aef9527048db3d9 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Tue, 18 Mar 2025 13:08:48 +0000 Subject: [PATCH 032/492] Simplify code by using get_trace_context Signed-off-by: Steven Woods --- ocaml/libs/tracing/tracing.ml | 5 +---- ocaml/libs/tracing/tracing_export.ml | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 4cb89d45b8a..c1cdc33692e 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -437,10 +437,7 @@ module Span = struct let to_propagation_context span = let traceparent = span |> get_context |> SpanContext.to_traceparent in - span - |> get_context - |> SpanContext.context_of_span_context - |> TraceContext.with_traceparent (Some traceparent) + span |> get_trace_context |> TraceContext.with_traceparent (Some traceparent) let with_trace_context span trace_context = let span_context = diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 592a12bbb26..5844d389e1c 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -83,10 +83,7 @@ module Content = struct ) in let tags = - let span_context = Span.get_context s in - let trace_context = - SpanContext.context_of_span_context span_context - in + let trace_context = Span.get_trace_context s in let baggage = TraceContext.baggage_of trace_context |> Option.value ~default:[] in From b39d80e7ff66a464ed92c0e84869f72496e07bc5 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 20 Mar 2025 16:32:07 +0000 Subject: [PATCH 033/492] ci: url of XS_SR_ERRORCODES.xml It got changed by https://github.com/xapi-project/sm/commit/84992502148a8a5e5639151f3e8f1c19e18cac38 Signed-off-by: Pau Ruiz Safont --- .github/workflows/setup-xapi-environment/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/setup-xapi-environment/action.yml b/.github/workflows/setup-xapi-environment/action.yml index 8381e31117b..c3126a6d157 100644 --- a/.github/workflows/setup-xapi-environment/action.yml +++ b/.github/workflows/setup-xapi-environment/action.yml @@ -18,7 +18,7 @@ runs: shell: bash run: | mkdir -p /opt/xensource/sm - wget -O /opt/xensource/sm/XE_SR_ERRORCODES.xml https://raw.githubusercontent.com/xapi-project/sm/master/drivers/XE_SR_ERRORCODES.xml + wget -O /opt/xensource/sm/XE_SR_ERRORCODES.xml https://raw.githubusercontent.com/xapi-project/sm/master/libs/sm/core/XE_SR_ERRORCODES.xml - name: Load environment file id: dotenv From bec4ca6f3a1db8306bb6ec5cdff7a399e7e8f630 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 20 Mar 2025 15:17:51 +0800 Subject: [PATCH 034/492] CA-404460: Expose Stunnel_verify_error for mismatched certificate Xapi uses stunnel to connect to remote peer and exposes certificate verify error by parsing stunnel logs. And when connect with a mismatched certificate, the log from stunnel would be: stunnel 5.60 on x86_64-koji-linux-gnu platform Compiled/running with OpenSSL 3.0.9 30 May 2023 Threading:PTHREAD Sockets:POLL,IPv6 TLS:ENGINE,OCSP,SNI Auth:LIBWRAP Reading configuration from descriptor 8 UTF-8 byte order mark not detected FIPS mode disabled Configuration successful Service [stunnel] accepted connection from unnamed socket s_connect: connected 10.63.96.116:443 Service [stunnel] connected remote server from 10.63.97.76:34138 CERT: Pre-verification error: self-signed certificate Rejected by CERT at depth=0: CN=10.63.96.116 SSL_connect: ssl/statem/statem_clnt.c:1889: error:0A000086:SSL routines::certificate verify failed Connection reset: 0 byte(s) sent to TLS, 0 byte(s) sent to socket This commit fixes the exposing of Stunnel_verify_error by checking "certificate verify failed" in the log, and expose it with reason "0A000086:SSL routines::certificate verify failed". We can find that the log "VERIFY ERROR" is not print by stunnel 5.60, which is the version of stunnel used in XS now, but it indeed was printed before: 20d6d2faf740ee5eb9b13752b076ee583fec94d8:src/verify.c: s_log(LOG_WARNING, "VERIFY ERROR: depth=%d, error=%s: %s", [gangj@xenrt10715872 stunnel]$ git branch --contains 20d6d2faf740ee5eb9b13752b076ee583fec94d8 master * private/gangj/stunnel-5.60 While we can find the log "certificate verify failed" which comes from openssl library: https://github.com/openssl/openssl/blob/openssl-3.0.9/ssl/ssl_err.c {ERR_PACK(ERR_LIB_SSL, 0, SSL_R_CERTIFICATE_VERIFY_FAILED), "certificate verify failed"}, Signed-off-by: Gang Ji --- ocaml/libs/stunnel/stunnel.ml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index 93b990d8449..e6732a0c0d7 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -483,12 +483,17 @@ let check_verify_error line = let split_1 c s = match Astring.String.cut ~sep:c s with Some (x, _) -> x | None -> s in - if Astring.String.is_infix ~affix:"VERIFY ERROR: " line then - match Astring.String.find_sub ~sub:"error=" line with + (* When verified with a mismatched certificate, one line of log from stunnel + * would look like: + SSL_connect: ssl/statem/statem_clnt.c:1889: error:0A000086:SSL routines::certificate verify failed + * in this case, Stunnel_verify_error can be raised with detailed error as + * reason if it can found in the log *) + if Astring.String.is_infix ~affix:"certificate verify failed" line then + match Astring.String.find_sub ~sub:"error:" line with | Some e -> raise (Stunnel_verify_error - (split_1 "," (sub_after (e + String.length "error=") line)) + (split_1 "," (sub_after (e + String.length "error:") line)) ) | None -> raise (Stunnel_verify_error "") From 659284ed81115ed4b364d7a82c31ebda8201bde3 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 20 Mar 2025 15:34:25 +0800 Subject: [PATCH 035/492] CA-404460: Expose Stunnel_verify_error for corrupted certificate Xapi uses stunnel to connect to remote peer and exposes certificate verify error by parsing stunnel logs. And when connecting with a corrupted certificate, the log from stunnel would be: Initializing inetd mode configuration Clients allowed=500 stunnel 5.60 on x86_64-koji-linux-gnu platform Compiled/running with OpenSSL 3.0.9 30 May 2023 Threading:PTHREAD Sockets:POLL,IPv6 TLS:ENGINE,OCSP,SNI Auth:LIBWRAP errno: (*__errno_location ()) Initializing inetd mode configuration Reading configuration from descriptor 8 UTF-8 byte order mark not detected FIPS mode disabled No PRNG seeding was required stunnel default security level set: 2 Ciphers: ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256 TLSv1.3 ciphersuites: TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256:TLS_CHACHA20_POLY1305_SHA256 TLS options: 0x02100000 (+0x00000000, -0x00000000) Session resumption enabled No certificate or private key specified error queue: crypto/x509/by_file.c:234: error:05880009:x509 certificate routines::PEM lib error queue: crypto/pem/pem_info.c:169: error:0488000D:PEM routines::ASN1 lib error queue: crypto/asn1/tasn_dec.c:349: error:0688010A:asn1 encoding routines::nested asn1 error error queue: crypto/asn1/tasn_dec.c:1178: error:06800066:asn1 encoding routines::bad object header SSL_CTX_load_verify_locations: crypto/asn1/asn1_lib.c:95: error:0680009B:asn1 encoding routines::too long Inetd mode: Failed to initialize TLS context Configuration failed Deallocating temporary section defaults This commit exposes Stunnel_verify_error by checking "No certificate or private key specified" in the log, and expose it with reason "The specified certificate is corrupt". And the log "No certificate or private key specified" comes from stunnel: https://github.com/mtrojnar/stunnel/blob/9f291d5ba27f0fa45353ae87cf9ac5f05401b012/src/ctx.c#L690 /* load the certificate and private key */ if(!section->cert || !section->key) { s_log(LOG_DEBUG, "No certificate or private key specified"); return 0; /* OK */ } Signed-off-by: Gang Ji --- ocaml/libs/stunnel/stunnel.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index e6732a0c0d7..c9b18f9b4d1 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -497,6 +497,11 @@ let check_verify_error line = ) | None -> raise (Stunnel_verify_error "") + else if + Astring.String.is_infix ~affix:"No certificate or private key specified" + line + then + raise (Stunnel_verify_error "The specified certificate is corrupt") else () From 95d888eaa9736f9daf7f3dde9a342b5dd33ee5b5 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 20 Mar 2025 16:00:22 +0800 Subject: [PATCH 036/492] CA-404460: Fix the exposing of Stunnel_verify_error in check_error There are 4 error logs are checked in check_error: "Connection refused" "No host resolved" "No route to host" "Invalid argument" We can indeed find the logging in stunnel for 2 of them in stunnel 5.60, which is the version used in XS now: [gangj@xenrt10715872 stunnel]$ git grep -C 1 -wn "Connection refused" src/log.c-493- case 10061: src/log.c:494: return "Connection refused (WSAECONNREFUSED)"; src/log.c-495- case 10062: -- src/protocol.c-240- s_log(LOG_ERR, src/protocol.c:241: "SOCKS5 request failed: Connection refused"); src/protocol.c-242- break; [gangj@xenrt10715872 stunnel]$ [gangj@xenrt10715872 stunnel]$ git grep -C 1 -wn "Invalid argument" src/log.c-437- case 10022: src/log.c:438: return "Invalid argument (WSAEINVAL)"; src/log.c-439- case 10024: While the other 2 are not found: [gangj@xenrt10715872 stunnel]$ git grep -C 1 -wn "No host resolved" [gangj@xenrt10715872 stunnel]$ [gangj@xenrt10715872 stunnel]$ git grep -C 1 -wn "No route to host" [gangj@xenrt10715872 stunnel]$ But seems "No host resolved" was in the history of stunnel: ddef8f192ecfe195610000c6f6272f6b77b97e53:src/client.c: s_log(LOG_ERR, "No host resolved"); [gangj@xenrt10715872 stunnel]$ git branch --contains ddef8f192ecfe195610000c6f6272f6b77b97e53 master * private/gangj/stunnel-5.60 And I failed to find the log "No route to host" in any historical code of stunnel or openssl. So at least for the two errors "No host resolved" and "No route to host", I think we will need to test and fix them later. Signed-off-by: Gang Ji --- ocaml/libs/stunnel/stunnel.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index c9b18f9b4d1..eef9a041234 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -506,7 +506,7 @@ let check_verify_error line = () let check_error s line = - if Astring.String.is_infix ~affix:line s then + if Astring.String.is_infix ~affix:s line then raise (Stunnel_error s) let diagnose_failure st_proc = From 15df70084b5060359ad30fbe2781e9860130e78a Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 20 Mar 2025 16:28:55 +0800 Subject: [PATCH 037/492] CA-404460: expose ssl_verify_error during updates syncing Signed-off-by: Gang Ji --- ocaml/xapi/repository.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index bf418ee8b03..1ec1486a3e2 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -306,6 +306,8 @@ let sync ~__context ~self ~token ~token_id ~username ~password = with | Api_errors.Server_error (_, _) as e -> raise e + | Stunnel.Stunnel_verify_error reason -> + raise (Api_errors.Server_error (Api_errors.ssl_verify_error, [reason])) | e -> error "Failed to sync with remote YUM repository: %s" (ExnHelper.string_of_exn e) ; From 938bdb4d03fd2a991cb9ba1263005e283772a0bb Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 21 Mar 2025 02:20:53 +0000 Subject: [PATCH 038/492] CA-408550: XSI-1834: Host netbios name should be added to local hosts file to avoid DNS lookup Without adding netbios name into /etc/hosts, a DNS query for localhost name is performed when talks to Domain Controller. This waste resources, especially when XenServer is monitorred by ControllUP, who perform external auth very frequently Adding netbios name into /etc/hosts to avoid the DNS query and keep consistent with PBIS Signed-off-by: Lin Liu --- ocaml/tests/test_extauth_plugin_ADwinbind.ml | 104 ++++++++++ ocaml/xapi/extauth_plugin_ADwinbind.ml | 204 +++++++++++++------ ocaml/xapi/extauth_plugin_ADwinbind.mli | 12 ++ ocaml/xapi/helpers.ml | 23 +++ 4 files changed, 281 insertions(+), 62 deletions(-) diff --git a/ocaml/tests/test_extauth_plugin_ADwinbind.ml b/ocaml/tests/test_extauth_plugin_ADwinbind.ml index a0180ee5e25..6babeda140c 100644 --- a/ocaml/tests/test_extauth_plugin_ADwinbind.ml +++ b/ocaml/tests/test_extauth_plugin_ADwinbind.ml @@ -499,6 +499,101 @@ let test_wbinfo_exception_of_stderr = in matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) +let test_add_ipv4_localhost_to_hosts = + let open Extauth_plugin_ADwinbind in + let check inp exp () = + let msg = + Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) + in + let actual = HostsConfIPv4.join "hostname" "domain" inp in + Alcotest.(check @@ list string) msg exp actual + in + let matrix = + [ + ( [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4" + ] + , [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4 hostname hostname.domain" + ] + ) + ; ( ["127.0.0.1 localhost hostname hostname.domain localhost.localdomain"] + , ["127.0.0.1 localhost localhost.localdomain hostname hostname.domain"] + ) + ; ( ["192.168.0.1 some_host"] + , ["127.0.0.1 hostname hostname.domain"; "192.168.0.1 some_host"] + ) + ; ([], ["127.0.0.1 hostname hostname.domain"]) + ] + in + matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) + +let test_add_ipv4_and_ipv6_localhost_to_hosts = + let open Extauth_plugin_ADwinbind in + let check inp exp () = + let msg = + Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) + in + let actual = + HostsConfIPv6.join "hostname" "domain" inp |> fun lines -> + HostsConfIPv4.join ~name:"hostname" ~domain:"domain" ~lines + in + Alcotest.(check @@ list string) msg exp actual + in + let matrix = + [ + ( ["127.0.0.1 localhost"] + , [ + "::1 hostname hostname.domain" + ; "127.0.0.1 localhost hostname hostname.domain" + ] + ) + ; ( ["127.0.0.1 localhost"; "::1 localhost"] + , [ + "127.0.0.1 localhost hostname hostname.domain" + ; "::1 localhost hostname hostname.domain" + ] + ) + ; ( [] + , ["127.0.0.1 hostname hostname.domain"; "::1 hostname hostname.domain"] + ) + ] + in + matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) + +let test_remove_ipv4_localhost_from_hosts = + let open Extauth_plugin_ADwinbind in + let check inp exp () = + let msg = + Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) + in + let actual = HostsConfIPv4.leave "hostname" "domain" inp in + Alcotest.(check @@ list string) msg exp actual + in + let matrix = + [ + ( [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4" + ] + , [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4" + ] + ) + ; ( ["127.0.0.1 localhost hostname hostname.domain localhost.localdomain"] + , ["127.0.0.1 localhost localhost.localdomain"] + ) + ; (["127.0.0.1 hostname hostname.domain"], []) + ; ( ["192.168.0.1 some_host"; "127.0.0.1 localhost hostname"] + , ["192.168.0.1 some_host"; "127.0.0.1 localhost"] + ) + ] + in + matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) + let tests = [ ("ADwinbind:extract_ou_config", ExtractOuConfig.tests) @@ -512,4 +607,13 @@ let tests = ; ( "ADwinbind:test_wbinfo_exception_of_stderr" , test_wbinfo_exception_of_stderr ) + ; ( "ADwinbind:test_add_ipv4_localhost_to_hosts" + , test_add_ipv4_localhost_to_hosts + ) + ; ( "ADwinbind:test_remove_ipv4_localhost_from_hosts" + , test_remove_ipv4_localhost_from_hosts + ) + ; ( "ADwinbind:test_add_ipv4_and_ipv6_localhost_to_hosts" + , test_add_ipv4_and_ipv6_localhost_to_hosts + ) ] diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index 6f51eea9cc5..efc6ac9f1a0 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -828,48 +828,39 @@ let config_winbind_daemon ~workgroup ~netbios_name ~domain = let conf_contents = match (workgroup, netbios_name, domain) with | Some wkgroup, Some netbios, Some dom -> - String.concat "\n" - [ - "# auto-generated by xapi" - ; "[global]" - ; "kerberos method = secrets and keytab" - ; Printf.sprintf "realm = %s" dom - ; "security = ADS" - ; "template shell = /bin/bash" - ; "winbind refresh tickets = yes" - ; "winbind enum groups = no" - ; "winbind enum users = no" - ; "winbind scan trusted domains = yes" - ; "winbind use krb5 enterprise principals = yes" - ; Printf.sprintf "winbind cache time = %d" - !Xapi_globs.winbind_cache_time - ; Printf.sprintf "machine password timeout = 0" - ; Printf.sprintf "kerberos encryption types = %s" - (Kerberos_encryption_types.Winbind.to_string - !Xapi_globs.winbind_kerberos_encryption_type - ) - ; Printf.sprintf "workgroup = %s" wkgroup - ; Printf.sprintf "netbios name = %s" netbios - ; "idmap config * : range = 3000000-3999999" - ; Printf.sprintf "idmap config %s: backend = rid" dom - ; Printf.sprintf "idmap config %s: range = 2000000-2999999" dom - ; Printf.sprintf "log level = %s" (debug_level ()) - ; Printf.sprintf "allow kerberos auth fallback = %s" allow_fallback - ; "idmap config * : backend = tdb" - ; "" (* Empty line at the end *) - ] + [ + "# autogenerated by xapi" + ; "[global]" + ; "kerberos method = secrets and keytab" + ; Printf.sprintf "realm = %s" dom + ; "security = ADS" + ; "template shell = /bin/bash" + ; "winbind refresh tickets = yes" + ; "winbind enum groups = no" + ; "winbind enum users = no" + ; "winbind scan trusted domains = yes" + ; "winbind use krb5 enterprise principals = yes" + ; Printf.sprintf "winbind cache time = %d" + !Xapi_globs.winbind_cache_time + ; Printf.sprintf "machine password timeout = 0" + ; Printf.sprintf "kerberos encryption types = %s" + (Kerberos_encryption_types.Winbind.to_string + !Xapi_globs.winbind_kerberos_encryption_type + ) + ; Printf.sprintf "workgroup = %s" wkgroup + ; Printf.sprintf "netbios name = %s" netbios + ; "idmap config * : range = 3000000-3999999" + ; Printf.sprintf "idmap config %s: backend = rid" dom + ; Printf.sprintf "idmap config %s: range = 2000000-2999999" dom + ; Printf.sprintf "log level = %s" (debug_level ()) + ; Printf.sprintf "allow kerberos auth fallback = %s" allow_fallback + ; "idmap config * : backend = tdb" + ; "" (* Empty line at the end *) + ] | _ -> - String.concat "\n" - [ - "# autogenerated by xapi"; "[global]"; "" (* Empty line at the end *) - ] + ["# autogenerated by xapi"; "[global]"; "" (* Empty line at the end *)] in - - let len = String.length conf_contents in - Unixext.atomic_write_to_file smb_config 0o0644 (fun fd -> - let (_ : int) = Unix.single_write_substring fd conf_contents 0 len in - Unix.fsync fd - ) + Helpers.ListFile.to_path smb_config conf_contents let clear_winbind_config () = (* Keep the winbind configuration if xapi config file specified explictly, @@ -1222,27 +1213,21 @@ module RotateMachinePassword = struct in let conf_contents = - String.concat "\n" - ([ - "# auto-generated by xapi" - ; "[libdefaults]" - ; Printf.sprintf "default_realm = %s" realm - ; "[realms]" - ; Printf.sprintf "%s={" realm - ; Printf.sprintf "kpasswd_server=%s" kdc_fqdn - ; Printf.sprintf "kdc=%s" kdc_fqdn - ; "}" (* include winbind generated configure if exists *) - ] - @ include_item - @ [""] (* Empty line at the end *) - ) + [ + "# autogenerated by xapi" + ; "[libdefaults]" + ; Printf.sprintf "default_realm = %s" realm + ; "[realms]" + ; Printf.sprintf "%s={" realm + ; Printf.sprintf "kpasswd_server=%s" kdc_fqdn + ; Printf.sprintf "kdc=%s" kdc_fqdn + ; "}" (* include winbind generated configure if exists *) + ] + @ include_item + @ [""] + (* Empty line at the end *) in - - let len = String.length conf_contents in - Unixext.atomic_write_to_file tmp_krb5_conf 0o0644 (fun fd -> - let (_ : int) = Unix.single_write_substring fd conf_contents 0 len in - Unix.fsync fd - ) + Helpers.ListFile.to_path tmp_krb5_conf conf_contents let clear_tmp_krb5_conf () = if !Xapi_globs.winbind_keep_configuration then @@ -1307,6 +1292,83 @@ module RotateMachinePassword = struct let stop_rotate () = Scheduler.remove_from_queue task_name end +module type LocalHostTag = sig + val local_ip : string +end + +module HostsConfTagIPv4 : LocalHostTag = struct let local_ip = "127.0.0.1" end + +module HostsConfTagIPv6 : LocalHostTag = struct let local_ip = "::1" end + +module type HostsConf = sig + (* add the domain info into conf*) + val join : name:string -> domain:string -> lines:string list -> string list + + (* remove the domain info from conf*) + val leave : name:string -> domain:string -> lines:string list -> string list +end + +module HostsConfFunc (T : LocalHostTag) : HostsConf = struct + let sep = ' ' + + let sep_str = String.make 1 sep + + type t = Add | Remove + + let interest line = String.starts_with ~prefix:T.local_ip line + + let handle op name domain line = + let line = String.lowercase_ascii line in + let name = String.lowercase_ascii name in + let domain = String.lowercase_ascii domain in + let fqdn = Printf.sprintf "%s.%s" name domain in + match interest line with + | false -> + line + | true -> + String.split_on_char sep line + |> List.filter (fun x -> x <> name && x <> fqdn) + |> (fun x -> match op with Add -> x @ [name; fqdn] | Remove -> x) + |> String.concat sep_str + + let leave ~name ~domain ~lines = + List.map (fun line -> handle Remove name domain line) lines + (* If no name for local ip left, just remove it *) + |> List.filter (fun x -> String.trim x <> T.local_ip) + + let join ~name ~domain ~lines = + List.map (fun line -> handle Add name domain line) lines |> fun x -> + match List.exists (fun l -> interest l) x with + | true -> + x + | false -> + (* Does not found and updated the conf, then add one *) + [ + Printf.sprintf "%s%s%s%s%s.%s" T.local_ip sep_str name sep_str name + domain + ] + @ x +end + +module HostsConfIPv4 = HostsConfFunc (HostsConfTagIPv4) +module HostsConfIPv6 = HostsConfFunc (HostsConfTagIPv6) + +module ConfigHosts = struct + let path = "/etc/hosts" + + let join ~name ~domain = + Helpers.ListFile.of_path path + |> HostsConfIPv4.join ~name ~domain + |> HostsConfIPv6.join ~name ~domain + |> Helpers.ListFile.to_path path + + let leave ~name ~domain = + Helpers.ListFile.of_path path + |> HostsConfIPv4.leave ~name ~domain + |> HostsConfIPv6.leave ~name ~domain + |> Helpers.ListFile.to_path path +end + let build_netbios_name ~config_params = let key = "netbios-name" in match List.assoc_opt key config_params with @@ -1628,18 +1690,21 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct ~netbios_name:(Some netbios_name) ; ClosestKdc.trigger_update ~start:0. ; RotateMachinePassword.trigger_rotate ~start:0. ; + ConfigHosts.join ~domain:service_name ~name:netbios_name ; (* Trigger right now *) debug "Succeed to join domain %s" service_name with | Forkhelpers.Spawn_internal_error (_, stdout, _) -> error "Join domain: %s error: %s" service_name stdout ; clear_winbind_config () ; + ConfigHosts.leave ~domain:service_name ~name:netbios_name ; (* The configure is kept for debug purpose with max level *) raise (Auth_service_error (stdout |> tag_from_err_msg, stdout)) | Xapi_systemctl.Systemctl_fail _ -> let msg = Printf.sprintf "Failed to start %s" Winbind.name in error "Start daemon error: %s" msg ; config_winbind_daemon ~domain:None ~workgroup:None ~netbios_name:None ; + ConfigHosts.leave ~domain:service_name ~name:netbios_name ; raise (Auth_service_error (E_GENERIC, msg)) | e -> let msg = @@ -1650,6 +1715,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct in error "Enable extauth error: %s" msg ; clear_winbind_config () ; + ConfigHosts.leave ~domain:service_name ~name:netbios_name ; raise (Auth_service_error (E_GENERIC, msg)) (* unit on_disable() @@ -1663,7 +1729,14 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct let@ __context = Context.with_tracing ~__context __FUNCTION__ in let user = List.assoc_opt "user" config_params in let pass = List.assoc_opt "pass" config_params in - let {service_name; _} = get_domain_info_from_db () in + let {service_name; workgroup; netbios_name; _} = + get_domain_info_from_db () + in + ( if Option.is_some netbios_name then + Option.get netbios_name |> fun name -> + ConfigHosts.leave ~domain:service_name ~name + ) ; + (* Clean extauth config *) persist_extauth_config ~domain:None ~user:None ~ou_conf:[] ~workgroup:None ~machine_pwd_last_change_time:None ~netbios_name:None ; @@ -1688,7 +1761,14 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct Winbind.start ~timeout:5. ~wait_until_success:true ; ClosestKdc.trigger_update ~start:ClosestKdc.startup_delay ; RotateMachinePassword.trigger_rotate ~start:5. ; - Winbind.check_ready_to_serve ~timeout:300. + Winbind.check_ready_to_serve ~timeout:300. ; + + let {service_name; workgroup; netbios_name; _} = + get_domain_info_from_db () + in + if Option.is_some netbios_name then + Option.get netbios_name |> fun name -> + ConfigHosts.join ~domain:service_name ~name (* unit on_xapi_exit() diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.mli b/ocaml/xapi/extauth_plugin_ADwinbind.mli index 0c9137d5f54..dab3963fa1a 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.mli +++ b/ocaml/xapi/extauth_plugin_ADwinbind.mli @@ -75,3 +75,15 @@ module Migrate_from_pbis : sig val parse_value_from_pbis : string -> string end + +module type HostsConf = sig + (* add the domain info into conf*) + val join : name:string -> domain:string -> lines:string list -> string list + + (* remove the domain info from conf*) + val leave : name:string -> domain:string -> lines:string list -> string list +end + +module HostsConfIPv4 : HostsConf + +module HostsConfIPv6 : HostsConf diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 2ef16112053..b5c810d76c3 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -2360,3 +2360,26 @@ module AuthenticationCache = struct None end end + +module ListFile = struct + (* Read/Write List to/from file, line by line *) + let of_path path = + let ic = open_in path in + finally + (fun () -> + let rec read_lines acc = + try + let line = input_line ic in + read_lines (acc @ [line]) + with End_of_file -> acc + in + read_lines [] + ) + (fun () -> close_in ic) + + let to_path ?(perm = 0o0644) path contents = + String.concat "\n" contents |> fun x -> + Unixext.atomic_write_to_file path perm @@ fun fd -> + Unixext.really_write_string fd x |> ignore ; + Unix.fsync fd +end From f352a5629651bf427d8883952d62faf377f04d4b Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Mar 2025 11:11:08 +0000 Subject: [PATCH 039/492] CP-54020: Split the mux policy from storage_mux This split is so that this part can be used by storage_migrate later on for its own multiplexing logic, to avoid dependency cycle. Signed-off-by: Vincent Liu --- ocaml/tests/test_vdi_cbt.ml | 2 +- ocaml/xapi/storage_access.ml | 4 +- ocaml/xapi/storage_mux.ml | 108 ++--------------------------- ocaml/xapi/storage_mux_reg.ml | 120 +++++++++++++++++++++++++++++++++ ocaml/xapi/storage_mux_reg.mli | 57 ++++++++++++++++ 5 files changed, 186 insertions(+), 105 deletions(-) create mode 100644 ocaml/xapi/storage_mux_reg.ml create mode 100644 ocaml/xapi/storage_mux_reg.mli diff --git a/ocaml/tests/test_vdi_cbt.ml b/ocaml/tests/test_vdi_cbt.ml index 3137e0485cb..3253f21311a 100644 --- a/ocaml/tests/test_vdi_cbt.ml +++ b/ocaml/tests/test_vdi_cbt.ml @@ -33,7 +33,7 @@ let register_smapiv2_server (module S : Storage_interface.Server_impl) sr_ref = } in - Storage_mux.register sr_ref rpc "" dummy_query_result + Storage_mux_reg.register sr_ref rpc "" dummy_query_result let make_smapiv2_storage_server ?vdi_enable_cbt ?vdi_disable_cbt ?vdi_list_changed_blocks ?vdi_data_destroy ?vdi_snapshot ?vdi_clone:_ () = diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index d38cab783b5..65fa54fe73b 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -264,7 +264,7 @@ let bind ~__context ~pbd = let service = make_service uuid ty in System_domains.register_service service queue_name ; let info = Client.Query.query dbg in - Storage_mux.register (Storage_interface.Sr.of_string sr_uuid) rpc uuid info ; + Storage_mux_reg.register (Storage_interface.Sr.of_string sr_uuid) rpc uuid info ; info with e -> error @@ -281,7 +281,7 @@ let unbind ~__context ~pbd = let ty = Db.SR.get_type ~__context ~self:sr in let sr = Db.SR.get_uuid ~__context ~self:sr in info "SR %s will nolonger be implemented by VM %s" sr (Ref.string_of driver) ; - Storage_mux.unregister (Storage_interface.Sr.of_string sr) ; + Storage_mux_reg.unregister (Storage_interface.Sr.of_string sr) ; let service = make_service uuid ty in System_domains.unregister_service service diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 7acba0c8823..e502666f4a2 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -17,113 +17,17 @@ module Unixext = Xapi_stdext_unix.Unixext module D = Debug.Make (struct let name = "mux" end) open D - -let with_dbg ~name ~dbg f = - Debug_info.with_dbg ~with_thread:true ~module_name:"SMAPIv2" ~name ~dbg f - -type processor = Rpc.call -> Rpc.response - -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute - open Storage_interface +open Storage_mux_reg -let s_of_sr = Sr.string_of - -let s_of_vdi = Vdi.string_of - -let s_of_vm = Vm.string_of - -type plugin = { - processor: processor - ; backend_domain: string - ; query_result: query_result - ; features: Smint.Feature.t list -} - -let plugins : (sr, plugin) Hashtbl.t = Hashtbl.create 10 - -let m = Mutex.create () - -let debug_printer rpc call = - (* debug "Rpc.call = %s" (Xmlrpc.string_of_call call); *) - let result = rpc call in - (* debug "Rpc.response = %s" (Xmlrpc.string_of_response result); *) - result - -let register sr rpc d info = - with_lock m (fun () -> - let features = - Smint.Feature.parse_capability_int64 info.Storage_interface.features - in - Hashtbl.replace plugins sr - { - processor= debug_printer rpc - ; backend_domain= d - ; query_result= info - ; features - } ; - debug "register SR %s (currently-registered = [ %s ])" (s_of_sr sr) - (String.concat ", " - (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) - ) - ) - -let unregister sr = - with_lock m (fun () -> - Hashtbl.remove plugins sr ; - debug "unregister SR %s (currently-registered = [ %s ])" (s_of_sr sr) - (String.concat ", " - (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) - ) - ) - -(* This function is entirely unused, but I am not sure if it should be - deleted or not *) -let query_result_of_sr sr = - with_lock m (fun () -> - Option.map (fun x -> x.query_result) (Hashtbl.find_opt plugins sr) - ) +let s_of_sr = Storage_interface.Sr.string_of -let sr_has_capability sr capability = - with_lock m (fun () -> - match Hashtbl.find_opt plugins sr with - | Some x -> - Smint.Feature.has_capability capability x.features - | None -> - false - ) +let s_of_vdi = Storage_interface.Vdi.string_of -(* This is the policy: *) -let of_sr sr = - with_lock m (fun () -> - match Hashtbl.find_opt plugins sr with - | Some x -> - x.processor - | None -> - error "No storage plugin for SR: %s (currently-registered = [ %s ])" - (s_of_sr sr) - (String.concat ", " - (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) - ) ; - raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) - ) - -type 'a sm_result = SMSuccess of 'a | SMFailure of exn - -let multicast f = - Hashtbl.fold - (fun sr plugin acc -> - (sr, try SMSuccess (f sr plugin.processor) with e -> SMFailure e) :: acc - ) - plugins [] +let s_of_vm = Storage_interface.Vm.string_of -let success = function SMSuccess _ -> true | _ -> false - -let string_of_sm_result f = function - | SMSuccess x -> - Printf.sprintf "Success: %s" (f x) - | SMFailure e -> - Printf.sprintf "Failure: %s" (Printexc.to_string e) +let with_dbg ~name ~dbg f = + Debug_info.with_dbg ~with_thread:true ~module_name:"SMAPIv2" ~name ~dbg f let partition l = List.partition (fun (_, x) -> success x) l diff --git a/ocaml/xapi/storage_mux_reg.ml b/ocaml/xapi/storage_mux_reg.ml new file mode 100644 index 00000000000..c3b13494c33 --- /dev/null +++ b/ocaml/xapi/storage_mux_reg.ml @@ -0,0 +1,120 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** This module contains the code for registering storage plugins (SMAPIv1 and SMAPIv3) +and multiplexing between them according to the sr type *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D + +type processor = Rpc.call -> Rpc.response + +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + +open Storage_interface + +let s_of_sr = Storage_interface.Sr.string_of + +type plugin = { + processor: processor + ; backend_domain: string + ; query_result: query_result + ; features: Smint.Feature.t list +} + +let plugins : (sr, plugin) Hashtbl.t = Hashtbl.create 10 + +let m = Mutex.create () + +let debug_printer rpc call = + (* debug "Rpc.call = %s" (Xmlrpc.string_of_call call); *) + let result = rpc call in + (* debug "Rpc.response = %s" (Xmlrpc.string_of_response result); *) + result + +let register sr rpc d info = + with_lock m (fun () -> + let features = + Smint.Feature.parse_capability_int64 info.Storage_interface.features + in + Hashtbl.replace plugins sr + { + processor= debug_printer rpc + ; backend_domain= d + ; query_result= info + ; features + } ; + debug "register SR %s (currently-registered = [ %s ])" (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) + ) + +let unregister sr = + with_lock m (fun () -> + Hashtbl.remove plugins sr ; + debug "unregister SR %s (currently-registered = [ %s ])" (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) + ) + +(* This function is entirely unused, but I am not sure if it should be + deleted or not *) +let query_result_of_sr sr = + with_lock m (fun () -> + Option.map (fun x -> x.query_result) (Hashtbl.find_opt plugins sr) + ) + +let sr_has_capability sr capability = + with_lock m (fun () -> + match Hashtbl.find_opt plugins sr with + | Some x -> + Smint.Feature.has_capability capability x.features + | None -> + false + ) + +(* This is the policy: *) +let of_sr sr = + with_lock m (fun () -> + match Hashtbl.find_opt plugins sr with + | Some x -> + x.processor + | None -> + error "No storage plugin for SR: %s (currently-registered = [ %s ])" + (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) ; + raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) + ) + +type 'a sm_result = SMSuccess of 'a | SMFailure of exn + +let string_of_sm_result f = function + | SMSuccess x -> + Printf.sprintf "Success: %s" (f x) + | SMFailure e -> + Printf.sprintf "Failure: %s" (Printexc.to_string e) + +let success = function SMSuccess _ -> true | _ -> false + +let multicast f = + Hashtbl.fold + (fun sr plugin acc -> + (sr, try SMSuccess (f sr plugin.processor) with e -> SMFailure e) :: acc + ) + plugins [] diff --git a/ocaml/xapi/storage_mux_reg.mli b/ocaml/xapi/storage_mux_reg.mli new file mode 100644 index 00000000000..218cd5f96b3 --- /dev/null +++ b/ocaml/xapi/storage_mux_reg.mli @@ -0,0 +1,57 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type processor = Rpc.call -> Rpc.response + +val with_lock : Mutex.t -> (unit -> 'a) -> 'a + +type plugin = { + processor: processor + ; backend_domain: string + ; query_result: Storage_interface.query_result + ; features: Smint.Feature.t list +} + +val plugins : (Storage_interface.sr, plugin) Hashtbl.t + +val debug_printer : ('a -> 'b) -> 'a -> 'b + +val register : + Storage_interface.sr + -> (Rpc.call -> Rpc.response) + -> string + -> Storage_interface.query_result + -> unit + +val unregister : Storage_interface.sr -> unit + +val query_result_of_sr : + Storage_interface.sr -> Storage_interface.query_result option + +val sr_has_capability : Storage_interface.sr -> Smint.Feature.capability -> bool + +val of_sr : Storage_interface.sr -> processor + +val smapi_version_of_sr : + Storage_interface.sr -> Storage_interface.smapi_version + +type 'a sm_result = SMSuccess of 'a | SMFailure of exn + +val string_of_sm_result : ('a -> string) -> 'a sm_result -> string + +val success : 'a sm_result -> bool + +val multicast : + (Storage_interface.sr -> processor -> 'a) + -> (Storage_interface.sr * 'a sm_result) list From 4d9bc8caa70c3c0c5b346e7c73fa5274b2100ae2 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Mar 2025 11:22:38 +0000 Subject: [PATCH 040/492] CP-54020: Introduce `smapi_version` in query result This is then used later for storage_migrate to multiplex based on the SMAPI version. Signed-off-by: Vincent Liu --- ocaml/tests/test_sm_features.ml | 1 + ocaml/tests/test_vdi_cbt.ml | 1 + ocaml/xapi-idl/storage/storage_interface.ml | 4 ++++ ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/sm_exec.ml | 1 + ocaml/xapi/smint.ml | 2 ++ ocaml/xapi/storage_access.ml | 4 +++- ocaml/xapi/storage_mux.ml | 1 + ocaml/xapi/storage_mux_reg.ml | 14 ++++++++++++++ ocaml/xapi/storage_smapiv1.ml | 1 + ocaml/xapi/xapi_services.ml | 1 + 11 files changed, 30 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/test_sm_features.ml b/ocaml/tests/test_sm_features.ml index d7a63008882..6b7ef99502d 100644 --- a/ocaml/tests/test_sm_features.ml +++ b/ocaml/tests/test_sm_features.ml @@ -249,6 +249,7 @@ module CreateSMObject = Generic.MakeStateful (struct ; features ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } let extract_output __context _ = diff --git a/ocaml/tests/test_vdi_cbt.ml b/ocaml/tests/test_vdi_cbt.ml index 3253f21311a..54ae411ac97 100644 --- a/ocaml/tests/test_vdi_cbt.ml +++ b/ocaml/tests/test_vdi_cbt.ml @@ -30,6 +30,7 @@ let register_smapiv2_server (module S : Storage_interface.Server_impl) sr_ref = ; features= [] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } in diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 542312c6448..7dc5a8f82a9 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -404,6 +404,9 @@ let err = ) } +type smapi_version = SMAPIv1 | SMAPIv2 | SMAPIv3 +[@@deriving rpcty, show {with_path= false}] + type query_result = { driver: string ; name: string @@ -415,6 +418,7 @@ type query_result = { ; features: string list ; configuration: (string * string) list ; required_cluster_stack: string list + ; smapi_version: smapi_version } [@@deriving rpcty] diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 4b678fa72de..5910d65f28f 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -948,6 +948,7 @@ module QueryImpl (M : META) = struct ; configuration= response.Xapi_storage.Plugin.configuration ; required_cluster_stack= response.Xapi_storage.Plugin.required_cluster_stack + ; smapi_version= SMAPIv3 } in wrap th diff --git a/ocaml/xapi/sm_exec.ml b/ocaml/xapi/sm_exec.ml index 1da0c6c7e83..c4e2c46a1a9 100644 --- a/ocaml/xapi/sm_exec.ml +++ b/ocaml/xapi/sm_exec.ml @@ -582,6 +582,7 @@ let parse_sr_get_driver_info driver (xml : Xml.xml) = ; sr_driver_configuration= configuration ; sr_driver_text_features= text_features ; sr_driver_required_cluster_stack= [] + ; sr_smapi_version= SMAPIv1 } let sr_get_driver_info ~dbg driver = diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index a5809893c5f..e58340b5239 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -192,6 +192,7 @@ type sr_driver_info = { ; sr_driver_text_features: string list ; sr_driver_configuration: (string * string) list ; sr_driver_required_cluster_stack: string list + ; sr_smapi_version: Storage_interface.smapi_version } let query_result_of_sr_driver_info x = @@ -206,6 +207,7 @@ let query_result_of_sr_driver_info x = ; features= x.sr_driver_text_features ; configuration= x.sr_driver_configuration ; required_cluster_stack= x.sr_driver_required_cluster_stack + ; smapi_version= x.sr_smapi_version } type attach_info = { diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index 65fa54fe73b..0aeed25125d 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -264,7 +264,9 @@ let bind ~__context ~pbd = let service = make_service uuid ty in System_domains.register_service service queue_name ; let info = Client.Query.query dbg in - Storage_mux_reg.register (Storage_interface.Sr.of_string sr_uuid) rpc uuid info ; + Storage_mux_reg.register + (Storage_interface.Sr.of_string sr_uuid) + rpc uuid info ; info with e -> error diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index e502666f4a2..e99ec3a3634 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -73,6 +73,7 @@ module Mux = struct ; features= [] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } let diagnostics () ~dbg = diff --git a/ocaml/xapi/storage_mux_reg.ml b/ocaml/xapi/storage_mux_reg.ml index c3b13494c33..f7eff2ab43d 100644 --- a/ocaml/xapi/storage_mux_reg.ml +++ b/ocaml/xapi/storage_mux_reg.ml @@ -102,6 +102,20 @@ let of_sr sr = raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) ) +let smapi_version_of_sr sr = + with_lock m (fun () -> + match Hashtbl.find_opt plugins sr with + | Some x -> + x.query_result.smapi_version + | None -> + error "No storage plugin for SR: %s (currently-registered = [ %s ])" + (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) ; + raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) + ) + type 'a sm_result = SMSuccess of 'a | SMFailure of exn let string_of_sm_result f = function diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index b71dea3d1c6..96da6ce4122 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -172,6 +172,7 @@ module SMAPIv1 : Server_impl = struct ; features= [] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv1 } let diagnostics _context ~dbg:_ = diff --git a/ocaml/xapi/xapi_services.ml b/ocaml/xapi/xapi_services.ml index a413e4c3630..21e3b8d0c3b 100644 --- a/ocaml/xapi/xapi_services.ml +++ b/ocaml/xapi/xapi_services.ml @@ -254,6 +254,7 @@ let get_handler (req : Http.Request.t) s _ = ; features= List.map (fun x -> path [_services; x]) [_SM] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } in respond req (Storage_interface.(rpc_of query_result) q) s From a39009810871d9600cb6c8c7279d8e4d5abec825 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Mar 2025 13:28:35 +0000 Subject: [PATCH 041/492] CP-54020: Factor out the MIRROR module This will be used later on to define the interface a migration module should implement. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 118 +++++++++----------- ocaml/xapi-idl/storage/storage_skeleton.ml | 2 + ocaml/xapi/storage_mux.ml | 2 + ocaml/xapi/storage_smapiv1.ml | 2 + ocaml/xapi/storage_smapiv1_wrapper.ml | 2 + 5 files changed, 63 insertions(+), 63 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 7dc5a8f82a9..34856e0a57b 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1161,6 +1161,60 @@ module StorageAPI (R : RPC) = struct end end +module type MIRROR = sig + type context = unit + + val start : + context + -> dbg:debug_info + -> sr:sr + -> vdi:vdi + -> dp:dp + -> mirror_vm:vm + -> copy_vm:vm + -> url:string + -> dest:sr + -> verify_dest:bool + -> Task.id + + val stop : context -> dbg:debug_info -> id:Mirror.id -> unit + + val stat : context -> dbg:debug_info -> id:Mirror.id -> Mirror.t + + val receive_start : + context + -> dbg:debug_info + -> sr:sr + -> vdi_info:vdi_info + -> id:Mirror.id + -> similar:Mirror.similars + -> Mirror.mirror_receive_result + + val receive_start2 : + context + -> dbg:debug_info + -> sr:sr + -> vdi_info:vdi_info + -> id:Mirror.id + -> similar:Mirror.similars + -> vm:vm + -> Mirror.mirror_receive_result + + val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit + + val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit + + val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit + + val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list + + val import_activate : + context -> dbg:debug_info -> dp:dp -> sr:sr -> vdi:vdi -> vm:vm -> sock_path + + val get_nbd_server : + context -> dbg:debug_info -> dp:dp -> sr:sr -> vdi:vdi -> vm:vm -> sock_path +end + module type Server_impl = sig type context = unit @@ -1417,69 +1471,7 @@ module type Server_impl = sig -> verify_dest:bool -> Task.id - module MIRROR : sig - val start : - context - -> dbg:debug_info - -> sr:sr - -> vdi:vdi - -> dp:dp - -> mirror_vm:vm - -> copy_vm:vm - -> url:string - -> dest:sr - -> verify_dest:bool - -> Task.id - - val stop : context -> dbg:debug_info -> id:Mirror.id -> unit - - val stat : context -> dbg:debug_info -> id:Mirror.id -> Mirror.t - - val receive_start : - context - -> dbg:debug_info - -> sr:sr - -> vdi_info:vdi_info - -> id:Mirror.id - -> similar:Mirror.similars - -> Mirror.mirror_receive_result - - val receive_start2 : - context - -> dbg:debug_info - -> sr:sr - -> vdi_info:vdi_info - -> id:Mirror.id - -> similar:Mirror.similars - -> vm:vm - -> Mirror.mirror_receive_result - - val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit - - val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit - - val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit - - val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list - - val import_activate : - context - -> dbg:debug_info - -> dp:dp - -> sr:sr - -> vdi:vdi - -> vm:vm - -> sock_path - - val get_nbd_server : - context - -> dbg:debug_info - -> dp:dp - -> sr:sr - -> vdi:vdi - -> vm:vm - -> sock_path - end + module MIRROR : MIRROR end module Policy : sig diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index ab84ed7712e..4b5b23e6973 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -155,6 +155,8 @@ module DATA = struct let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = u "DATA.copy" module MIRROR = struct + type context = unit + (** [start task sr vdi url sr2] creates a VDI in remote [url]'s [sr2] and writes data synchronously. It returns the id of the VDI.*) let start ctx ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest = diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index e99ec3a3634..2c2ba86d2df 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -740,6 +740,8 @@ module Mux = struct with_dbg ~name:"DATA.copy" ~dbg @@ fun dbg -> Storage_migrate.copy ~dbg module MIRROR = struct + type context = unit + let start () ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = with_dbg ~name:"DATA.MIRROR.start" ~dbg @@ fun di -> diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 96da6ce4122..4373fdaae87 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1213,6 +1213,8 @@ module SMAPIv1 : Server_impl = struct assert false module MIRROR = struct + type context = unit + let start _context ~dbg:_ ~sr:_ ~vdi:_ ~dp:_ ~mirror_vm:_ ~copy_vm:_ ~url:_ ~dest:_ ~verify_dest:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 8d6de8e8e84..f87bb9ffc4f 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1143,6 +1143,8 @@ functor Impl.DATA.copy context ~dbg ~sr ~vdi ~vm ~url ~dest module MIRROR = struct + type context = unit + let start context ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest = info "DATA.MIRROR.start dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; From 6ac37d770a6aaa51a6e21cf6c5b2a0a19c3d6ea9 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 25 Mar 2025 03:38:27 +0000 Subject: [PATCH 042/492] CA-408500: Remove ListFile with Xapi_stdext_unix.Unixext Signed-off-by: Lin Liu --- ocaml/tests/test_extauth_plugin_ADwinbind.ml | 11 +- ocaml/xapi/extauth_plugin_ADwinbind.ml | 146 +++++++++---------- ocaml/xapi/helpers.ml | 23 --- 3 files changed, 81 insertions(+), 99 deletions(-) diff --git a/ocaml/tests/test_extauth_plugin_ADwinbind.ml b/ocaml/tests/test_extauth_plugin_ADwinbind.ml index 6babeda140c..5fe5bfc91cd 100644 --- a/ocaml/tests/test_extauth_plugin_ADwinbind.ml +++ b/ocaml/tests/test_extauth_plugin_ADwinbind.ml @@ -505,7 +505,9 @@ let test_add_ipv4_localhost_to_hosts = let msg = Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) in - let actual = HostsConfIPv4.join "hostname" "domain" inp in + let actual = + HostsConfIPv4.join ~name:"hostname" ~domain:"domain" ~lines:inp + in Alcotest.(check @@ list string) msg exp actual in let matrix = @@ -537,7 +539,8 @@ let test_add_ipv4_and_ipv6_localhost_to_hosts = Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) in let actual = - HostsConfIPv6.join "hostname" "domain" inp |> fun lines -> + HostsConfIPv6.join ~name:"hostname" ~domain:"domain" ~lines:inp + |> fun lines -> HostsConfIPv4.join ~name:"hostname" ~domain:"domain" ~lines in Alcotest.(check @@ list string) msg exp actual @@ -569,7 +572,9 @@ let test_remove_ipv4_localhost_from_hosts = let msg = Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) in - let actual = HostsConfIPv4.leave "hostname" "domain" inp in + let actual = + HostsConfIPv4.leave ~name:"hostname" ~domain:"domain" ~lines:inp + in Alcotest.(check @@ list string) msg exp actual in let matrix = diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index efc6ac9f1a0..f23f1f5447e 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -815,7 +815,6 @@ let query_domain_workgroup ~domain = with _ -> raise (Auth_service_error (E_LOOKUP, err_msg)) let config_winbind_daemon ~workgroup ~netbios_name ~domain = - let open Xapi_stdext_unix in let smb_config = "/etc/samba/smb.conf" in let allow_fallback = (*`allow kerberos auth fallback` depends on our internal samba patch, @@ -825,42 +824,41 @@ let config_winbind_daemon ~workgroup ~netbios_name ~domain = * upgrade to samba packages with this capacity *) if !Xapi_globs.winbind_allow_kerberos_auth_fallback then "yes" else "no" in - let conf_contents = - match (workgroup, netbios_name, domain) with - | Some wkgroup, Some netbios, Some dom -> - [ - "# autogenerated by xapi" - ; "[global]" - ; "kerberos method = secrets and keytab" - ; Printf.sprintf "realm = %s" dom - ; "security = ADS" - ; "template shell = /bin/bash" - ; "winbind refresh tickets = yes" - ; "winbind enum groups = no" - ; "winbind enum users = no" - ; "winbind scan trusted domains = yes" - ; "winbind use krb5 enterprise principals = yes" - ; Printf.sprintf "winbind cache time = %d" - !Xapi_globs.winbind_cache_time - ; Printf.sprintf "machine password timeout = 0" - ; Printf.sprintf "kerberos encryption types = %s" - (Kerberos_encryption_types.Winbind.to_string - !Xapi_globs.winbind_kerberos_encryption_type - ) - ; Printf.sprintf "workgroup = %s" wkgroup - ; Printf.sprintf "netbios name = %s" netbios - ; "idmap config * : range = 3000000-3999999" - ; Printf.sprintf "idmap config %s: backend = rid" dom - ; Printf.sprintf "idmap config %s: range = 2000000-2999999" dom - ; Printf.sprintf "log level = %s" (debug_level ()) - ; Printf.sprintf "allow kerberos auth fallback = %s" allow_fallback - ; "idmap config * : backend = tdb" - ; "" (* Empty line at the end *) - ] - | _ -> - ["# autogenerated by xapi"; "[global]"; "" (* Empty line at the end *)] - in - Helpers.ListFile.to_path smb_config conf_contents + ( match (workgroup, netbios_name, domain) with + | Some wkgroup, Some netbios, Some dom -> + [ + "# autogenerated by xapi" + ; "[global]" + ; "kerberos method = secrets and keytab" + ; Printf.sprintf "realm = %s" dom + ; "security = ADS" + ; "template shell = /bin/bash" + ; "winbind refresh tickets = yes" + ; "winbind enum groups = no" + ; "winbind enum users = no" + ; "winbind scan trusted domains = yes" + ; "winbind use krb5 enterprise principals = yes" + ; Printf.sprintf "winbind cache time = %d" !Xapi_globs.winbind_cache_time + ; Printf.sprintf "machine password timeout = 0" + ; Printf.sprintf "kerberos encryption types = %s" + (Kerberos_encryption_types.Winbind.to_string + !Xapi_globs.winbind_kerberos_encryption_type + ) + ; Printf.sprintf "workgroup = %s" wkgroup + ; Printf.sprintf "netbios name = %s" netbios + ; "idmap config * : range = 3000000-3999999" + ; Printf.sprintf "idmap config %s: backend = rid" dom + ; Printf.sprintf "idmap config %s: range = 2000000-2999999" dom + ; Printf.sprintf "log level = %s" (debug_level ()) + ; Printf.sprintf "allow kerberos auth fallback = %s" allow_fallback + ; "idmap config * : backend = tdb" + ; "" (* Empty line at the end *) + ] + | _ -> + ["# autogenerated by xapi"; "[global]"; "" (* Empty line at the end *)] + ) + |> String.concat "\n" + |> Xapi_stdext_unix.Unixext.write_string_to_file smb_config let clear_winbind_config () = (* Keep the winbind configuration if xapi config file specified explictly, @@ -1198,7 +1196,6 @@ module RotateMachinePassword = struct let generate_krb5_tmp_config ~domain ~kdc_fqdn = (* Configure which server to change the password * https://web.mit.edu/kerberos/krb5-devel/doc/admin/conf_files/krb5_conf.html *) - let open Xapi_stdext_unix in let realm = String.uppercase_ascii domain in let domain_netbios = Wbinfo.domain_name_of ~target_name_type:NetbiosName ~from_name:domain @@ -1212,22 +1209,21 @@ module RotateMachinePassword = struct [] in - let conf_contents = - [ - "# autogenerated by xapi" - ; "[libdefaults]" - ; Printf.sprintf "default_realm = %s" realm - ; "[realms]" - ; Printf.sprintf "%s={" realm - ; Printf.sprintf "kpasswd_server=%s" kdc_fqdn - ; Printf.sprintf "kdc=%s" kdc_fqdn - ; "}" (* include winbind generated configure if exists *) - ] - @ include_item - @ [""] - (* Empty line at the end *) - in - Helpers.ListFile.to_path tmp_krb5_conf conf_contents + [ + "# autogenerated by xapi" + ; "[libdefaults]" + ; Printf.sprintf "default_realm = %s" realm + ; "[realms]" + ; Printf.sprintf "%s={" realm + ; Printf.sprintf "kpasswd_server=%s" kdc_fqdn + ; Printf.sprintf "kdc=%s" kdc_fqdn + ; "}" (* include winbind generated configure if exists *) + ] + @ include_item + @ [""] + (* Empty line at the end *) + |> String.concat "\n" + |> Xapi_stdext_unix.Unixext.write_string_to_file tmp_krb5_conf let clear_tmp_krb5_conf () = if !Xapi_globs.winbind_keep_configuration then @@ -1354,19 +1350,23 @@ module HostsConfIPv4 = HostsConfFunc (HostsConfTagIPv4) module HostsConfIPv6 = HostsConfFunc (HostsConfTagIPv6) module ConfigHosts = struct + open Xapi_stdext_unix.Unixext + let path = "/etc/hosts" let join ~name ~domain = - Helpers.ListFile.of_path path - |> HostsConfIPv4.join ~name ~domain - |> HostsConfIPv6.join ~name ~domain - |> Helpers.ListFile.to_path path + read_lines ~path |> fun lines -> + HostsConfIPv4.join ~name ~domain ~lines |> fun lines -> + HostsConfIPv6.join ~name ~domain ~lines + |> String.concat "\n" + |> write_string_to_file path let leave ~name ~domain = - Helpers.ListFile.of_path path - |> HostsConfIPv4.leave ~name ~domain - |> HostsConfIPv6.leave ~name ~domain - |> Helpers.ListFile.to_path path + read_lines ~path |> fun lines -> + HostsConfIPv4.leave ~name ~domain ~lines |> fun lines -> + HostsConfIPv6.leave ~name ~domain ~lines + |> String.concat "\n" + |> write_string_to_file path end let build_netbios_name ~config_params = @@ -1729,12 +1729,12 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct let@ __context = Context.with_tracing ~__context __FUNCTION__ in let user = List.assoc_opt "user" config_params in let pass = List.assoc_opt "pass" config_params in - let {service_name; workgroup; netbios_name; _} = - get_domain_info_from_db () - in - ( if Option.is_some netbios_name then - Option.get netbios_name |> fun name -> + let {service_name; netbios_name; _} = get_domain_info_from_db () in + ( match netbios_name with + | Some name -> ConfigHosts.leave ~domain:service_name ~name + | _ -> + () ) ; (* Clean extauth config *) @@ -1763,12 +1763,12 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct RotateMachinePassword.trigger_rotate ~start:5. ; Winbind.check_ready_to_serve ~timeout:300. ; - let {service_name; workgroup; netbios_name; _} = - get_domain_info_from_db () - in - if Option.is_some netbios_name then - Option.get netbios_name |> fun name -> - ConfigHosts.join ~domain:service_name ~name + let {service_name; netbios_name; _} = get_domain_info_from_db () in + match netbios_name with + | Some name -> + ConfigHosts.join ~domain:service_name ~name + | _ -> + () (* unit on_xapi_exit() diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index b5c810d76c3..2ef16112053 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -2360,26 +2360,3 @@ module AuthenticationCache = struct None end end - -module ListFile = struct - (* Read/Write List to/from file, line by line *) - let of_path path = - let ic = open_in path in - finally - (fun () -> - let rec read_lines acc = - try - let line = input_line ic in - read_lines (acc @ [line]) - with End_of_file -> acc - in - read_lines [] - ) - (fun () -> close_in ic) - - let to_path ?(perm = 0o0644) path contents = - String.concat "\n" contents |> fun x -> - Unixext.atomic_write_to_file path perm @@ fun fd -> - Unixext.really_write_string fd x |> ignore ; - Unix.fsync fd -end From 4a1b6570f566b8b2a49ccf99ee554ac7083145eb Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Mar 2025 14:43:57 +0000 Subject: [PATCH 043/492] CP-54020: Factor out sxm state tracking logic Signed-off-by: Vincent Liu --- ocaml/tests/test_storage_migrate_state.ml | 16 +- ocaml/xapi/dune | 4 +- ocaml/xapi/storage_migrate.ml | 307 +------------------- ocaml/xapi/storage_migrate_helper.ml | 328 ++++++++++++++++++++++ ocaml/xapi/storage_migrate_helper.mli | 249 ++++++++++++++++ ocaml/xapi/xapi_vm_migrate.ml | 2 +- 6 files changed, 589 insertions(+), 317 deletions(-) create mode 100644 ocaml/xapi/storage_migrate_helper.ml create mode 100644 ocaml/xapi/storage_migrate_helper.mli diff --git a/ocaml/tests/test_storage_migrate_state.ml b/ocaml/tests/test_storage_migrate_state.ml index 42087887995..498d9995548 100644 --- a/ocaml/tests/test_storage_migrate_state.ml +++ b/ocaml/tests/test_storage_migrate_state.ml @@ -17,11 +17,11 @@ open Test_highlevel module StorageMigrateState = struct type state_t = unit - let create_default_state () = Storage_migrate.State.clear () + let create_default_state () = Storage_migrate_helper.State.clear () end let sample_send_state = - Storage_migrate.State.Send_state. + Storage_migrate_helper.State.Send_state. { url= "url" ; dest_sr= Storage_interface.Sr.of_string "dest_sr" @@ -45,7 +45,7 @@ let sample_send_state = let sample_receive_state = let open Storage_interface in - Storage_migrate.State.Receive_state. + Storage_migrate_helper.State.Receive_state. { sr= Sr.of_string "my_sr" ; dummy_vdi= Vdi.of_string "dummy_vdi" @@ -57,7 +57,7 @@ let sample_receive_state = } let sample_copy_state = - Storage_migrate.State.Copy_state. + Storage_migrate_helper.State.Copy_state. { base_dp= "base_dp" ; leaf_dp= "leaf_dp" @@ -70,7 +70,7 @@ let sample_copy_state = module MapOf = Generic.MakeStateful (struct module Io = struct - open Storage_migrate.State + open Storage_migrate_helper.State type input_t = (string * osend operation) option @@ -88,7 +88,7 @@ module MapOf = Generic.MakeStateful (struct end module State = StorageMigrateState - open Storage_migrate.State + open Storage_migrate_helper.State let load_input () (send, recv, copy) = Option.iter (fun (id, send) -> add id send) send ; @@ -116,7 +116,7 @@ module MapOf = Generic.MakeStateful (struct end) let test_clear () = - let open Storage_migrate.State in + let open Storage_migrate_helper.State in clear () ; add "foo" (Send_op sample_send_state) ; add "bar" (Recv_op sample_receive_state) ; @@ -130,5 +130,5 @@ let test_clear () = let test = [("clear", `Quick, test_clear)] let tests = - Storage_migrate.State.persist_root := Test_common.working_area ; + Storage_migrate_helper.State.persist_root := Test_common.working_area ; [("storage_migrate_state_map_of", MapOf.tests)] diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index b74a1ecc16d..fde7a267003 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -217,8 +217,8 @@ ((pps ppx_deriving.ord) Xapi_observer_components) ((pps ppx_deriving_rpc) Config_file_sync Extauth_plugin_ADwinbind Importexport Sparse_dd_wrapper - Storage_migrate Storage_mux Storage_smapiv1_wrapper Stream_vdi - System_domains Xapi_psr Xapi_services Xapi_udhcpd))) + Storage_migrate Storage_migrate_helper Storage_mux Storage_smapiv1_wrapper + Stream_vdi System_domains Xapi_psr Xapi_services Xapi_udhcpd))) ) (library diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 37ec703709a..cfc005d97fe 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -15,308 +15,13 @@ module D = Debug.Make (struct let name = "storage_migrate" end) open D - -(** As SXM is such a long running process, we dedicate this to log important - milestones during the SXM process *) -module SXM = Debug.Make (struct - let name = "SXM" -end) - module Listext = Xapi_stdext_std.Listext open Xapi_stdext_pervasives.Pervasiveext module Unixext = Xapi_stdext_unix.Unixext open Xmlrpc_client open Storage_interface open Storage_task - -module State = struct - module Receive_state = struct - type t = { - sr: Sr.t - ; dummy_vdi: Vdi.t - ; leaf_vdi: Vdi.t - ; leaf_dp: dp - ; parent_vdi: Vdi.t - ; remote_vdi: Vdi.t - ; mirror_vm: Vm.t - } - [@@deriving rpcty] - - let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty - - let t_of_rpc x = - match Rpcmarshal.unmarshal t.Rpc.Types.ty x with - | Ok y -> - y - | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Receive_state.t: %s" m) - end - - module Send_state = struct - type remote_info = { - dp: dp - ; vdi: Vdi.t - ; url: string - ; verify_dest: bool [@default false] - } - [@@deriving rpcty] - - type tapdev = Tapctl.tapdev - - let typ_of_tapdev = - Rpc.Types.( - Abstract - { - aname= "tapdev" - ; test_data= [] - ; rpc_of= Tapctl.rpc_of_tapdev - ; of_rpc= (fun x -> Ok (Tapctl.tapdev_of_rpc x)) - } - ) - - type handle = Scheduler.handle - - let typ_of_handle = - Rpc.Types.( - Abstract - { - aname= "handle" - ; test_data= [] - ; rpc_of= Scheduler.rpc_of_handle - ; of_rpc= (fun x -> Ok (Scheduler.handle_of_rpc x)) - } - ) - - type t = { - url: string - ; dest_sr: Sr.t - ; remote_info: remote_info option - ; local_dp: dp - ; tapdev: tapdev option - ; mutable failed: bool - ; mutable watchdog: handle option - } - [@@deriving rpcty] - - let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty - - let t_of_rpc x = - match Rpcmarshal.unmarshal t.Rpc.Types.ty x with - | Ok y -> - y - | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Send_state.t: %s" m) - end - - module Copy_state = struct - type t = { - base_dp: dp - ; leaf_dp: dp - ; remote_dp: dp - ; dest_sr: Sr.t - ; copy_vdi: Vdi.t - ; remote_url: string - ; verify_dest: bool [@default false] - } - [@@deriving rpcty] - - let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty - - let t_of_rpc x = - match Rpcmarshal.unmarshal t.Rpc.Types.ty x with - | Ok y -> - y - | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Copy_state.t: %s" m) - end - - let loaded = ref false - - let mutex = Mutex.create () - - type send_table = (string, Send_state.t) Hashtbl.t - - type recv_table = (string, Receive_state.t) Hashtbl.t - - type copy_table = (string, Copy_state.t) Hashtbl.t - - type osend - - type orecv - - type ocopy - - type _ operation = - | Send_op : Send_state.t -> osend operation - | Recv_op : Receive_state.t -> orecv operation - | Copy_op : Copy_state.t -> ocopy operation - - type _ table = - | Send_table : send_table -> osend table - | Recv_table : recv_table -> orecv table - | Copy_table : copy_table -> ocopy table - - let active_send : send_table = Hashtbl.create 10 - - let active_recv : recv_table = Hashtbl.create 10 - - let active_copy : copy_table = Hashtbl.create 10 - - let table_of_op : type a. a operation -> a table = function - | Send_op _ -> - Send_table active_send - | Recv_op _ -> - Recv_table active_recv - | Copy_op _ -> - Copy_table active_copy - - let persist_root = ref "/var/run/nonpersistent" - - let path_of_table : type a. a table -> string = function - | Send_table _ -> - Filename.concat !persist_root "storage_mirrors_send.json" - | Recv_table _ -> - Filename.concat !persist_root "storage_mirrors_recv.json" - | Copy_table _ -> - Filename.concat !persist_root "storage_mirrors_copy.json" - - let rpc_of_table : type a. a table -> Rpc.t = - let open Rpc_std_helpers in - function - | Send_table send_table -> - rpc_of_hashtbl ~rpc_of:Send_state.rpc_of_t send_table - | Recv_table recv_table -> - rpc_of_hashtbl ~rpc_of:Receive_state.rpc_of_t recv_table - | Copy_table copy_table -> - rpc_of_hashtbl ~rpc_of:Copy_state.rpc_of_t copy_table - - let to_string : type a. a table -> string = - fun table -> rpc_of_table table |> Jsonrpc.to_string - - let rpc_of_path path = Unixext.string_of_file path |> Jsonrpc.of_string - - let load_one : type a. a table -> unit = - fun table -> - let rpc = path_of_table table |> rpc_of_path in - let open Rpc_std_helpers in - match table with - | Send_table table -> - Hashtbl.iter (Hashtbl.replace table) - (hashtbl_of_rpc ~of_rpc:Send_state.t_of_rpc rpc) - | Recv_table table -> - Hashtbl.iter (Hashtbl.replace table) - (hashtbl_of_rpc ~of_rpc:Receive_state.t_of_rpc rpc) - | Copy_table table -> - Hashtbl.iter (Hashtbl.replace table) - (hashtbl_of_rpc ~of_rpc:Copy_state.t_of_rpc rpc) - - let load () = - ignore_exn (fun () -> load_one (Send_table active_send)) ; - ignore_exn (fun () -> load_one (Recv_table active_recv)) ; - ignore_exn (fun () -> load_one (Copy_table active_copy)) ; - loaded := true - - let save_one : type a. a table -> unit = - fun table -> - to_string table |> Unixext.write_string_to_file (path_of_table table) - - let save () = - Unixext.mkdir_rec !persist_root 0o700 ; - save_one (Send_table active_send) ; - save_one (Recv_table active_recv) ; - save_one (Copy_table active_copy) - - let access_table ~save_after f table = - Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun () -> - if not !loaded then load () ; - let result = f table in - if save_after then save () ; - result - ) - - let map_of () = - let contents_of table = - Hashtbl.fold (fun k v acc -> (k, v) :: acc) table [] - in - let send_ops = access_table ~save_after:false contents_of active_send in - let recv_ops = access_table ~save_after:false contents_of active_recv in - let copy_ops = access_table ~save_after:false contents_of active_copy in - (send_ops, recv_ops, copy_ops) - - let add : type a. string -> a operation -> unit = - fun id op -> - let add' : type a. string -> a operation -> a table -> unit = - fun id op table -> - match (table, op) with - | Send_table table, Send_op op -> - Hashtbl.replace table id op - | Recv_table table, Recv_op op -> - Hashtbl.replace table id op - | Copy_table table, Copy_op op -> - Hashtbl.replace table id op - in - access_table ~save_after:true - (fun table -> add' id op table) - (table_of_op op) - - let find id table = - access_table ~save_after:false - (fun table -> Hashtbl.find_opt table id) - table - - let remove id table = - access_table ~save_after:true (fun table -> Hashtbl.remove table id) table - - let clear () = - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_send ; - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_recv ; - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_copy - - let remove_local_mirror id = remove id active_send - - let remove_receive_mirror id = remove id active_recv - - let remove_copy id = remove id active_copy - - let find_active_local_mirror id = find id active_send - - let find_active_receive_mirror id = find id active_recv - - let find_active_copy id = find id active_copy - - let mirror_id_of (sr, vdi) = - Printf.sprintf "%s/%s" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - - let of_mirror_id id = - match String.split_on_char '/' id with - | sr :: rest -> - Storage_interface. - (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) - | _ -> - failwith "Bad id" - - let copy_id_of (sr, vdi) = - Printf.sprintf "copy/%s/%s" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - - let of_copy_id id = - match String.split_on_char '/' id with - | op :: sr :: rest when op = "copy" -> - Storage_interface. - (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) - | _ -> - failwith "Bad id" -end - -let vdi_info x = - match x with - | Some (Vdi_info v) -> - v - | _ -> - failwith "Runtime type error: expecting Vdi_info" +open Storage_migrate_helper module Local = StorageAPI (Idl.Exn.GenClient (struct let rpc call = @@ -443,16 +148,6 @@ let progress_callback start len t y = Storage_task.set_state t (Task.Pending new_progress) ; signal (Storage_task.id_of_handle t) -let remove_from_sm_config vdi_info key = - { - vdi_info with - sm_config= List.filter (fun (k, _) -> k <> key) vdi_info.sm_config - } - -let add_to_sm_config vdi_info key value = - let vdi_info = remove_from_sm_config vdi_info key in - {vdi_info with sm_config= (key, value) :: vdi_info.sm_config} - (** This module [MigrateLocal] consists of the concrete implementations of the migration part of SMAPI. Functions inside this module are sender driven, which means they tend to be executed on the sender side. although there is not a hard rule diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml new file mode 100644 index 00000000000..d8182a28e5a --- /dev/null +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -0,0 +1,328 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** As SXM is such a long running process, we dedicate this to log important + milestones during the SXM process *) +module SXM = Debug.Make (struct + let name = "SXM" +end) + +module Listext = Xapi_stdext_std.Listext +module Unixext = Xapi_stdext_unix.Unixext +open Storage_interface +open Xapi_stdext_pervasives.Pervasiveext +open Xmlrpc_client + +module State = struct + module Receive_state = struct + type t = { + sr: Sr.t + ; dummy_vdi: Vdi.t + ; leaf_vdi: Vdi.t + ; leaf_dp: dp + ; parent_vdi: Vdi.t + ; remote_vdi: Vdi.t + ; mirror_vm: Vm.t + } + [@@deriving rpcty] + + let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty + + let t_of_rpc x = + match Rpcmarshal.unmarshal t.Rpc.Types.ty x with + | Ok y -> + y + | Error (`Msg m) -> + failwith (Printf.sprintf "Failed to unmarshal Receive_state.t: %s" m) + end + + module Send_state = struct + type remote_info = { + dp: dp + ; vdi: Vdi.t + ; url: string + ; verify_dest: bool [@default false] + } + [@@deriving rpcty] + + type tapdev = Tapctl.tapdev + + let typ_of_tapdev = + Rpc.Types.( + Abstract + { + aname= "tapdev" + ; test_data= [] + ; rpc_of= Tapctl.rpc_of_tapdev + ; of_rpc= (fun x -> Ok (Tapctl.tapdev_of_rpc x)) + } + ) + + type handle = Scheduler.handle + + let typ_of_handle = + Rpc.Types.( + Abstract + { + aname= "handle" + ; test_data= [] + ; rpc_of= Scheduler.rpc_of_handle + ; of_rpc= (fun x -> Ok (Scheduler.handle_of_rpc x)) + } + ) + + type t = { + url: string + ; dest_sr: Sr.t + ; remote_info: remote_info option + ; local_dp: dp + ; tapdev: tapdev option + ; mutable failed: bool + ; mutable watchdog: handle option + } + [@@deriving rpcty] + + let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty + + let t_of_rpc x = + match Rpcmarshal.unmarshal t.Rpc.Types.ty x with + | Ok y -> + y + | Error (`Msg m) -> + failwith (Printf.sprintf "Failed to unmarshal Send_state.t: %s" m) + end + + module Copy_state = struct + type t = { + base_dp: dp + ; leaf_dp: dp + ; remote_dp: dp + ; dest_sr: Sr.t + ; copy_vdi: Vdi.t + ; remote_url: string + ; verify_dest: bool [@default false] + } + [@@deriving rpcty] + + let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty + + let t_of_rpc x = + match Rpcmarshal.unmarshal t.Rpc.Types.ty x with + | Ok y -> + y + | Error (`Msg m) -> + failwith (Printf.sprintf "Failed to unmarshal Copy_state.t: %s" m) + end + + let loaded = ref false + + let mutex = Mutex.create () + + type send_table = (string, Send_state.t) Hashtbl.t + + type recv_table = (string, Receive_state.t) Hashtbl.t + + type copy_table = (string, Copy_state.t) Hashtbl.t + + type osend + + type orecv + + type ocopy + + type _ operation = + | Send_op : Send_state.t -> osend operation + | Recv_op : Receive_state.t -> orecv operation + | Copy_op : Copy_state.t -> ocopy operation + + type _ table = + | Send_table : send_table -> osend table + | Recv_table : recv_table -> orecv table + | Copy_table : copy_table -> ocopy table + + let active_send : send_table = Hashtbl.create 10 + + let active_recv : recv_table = Hashtbl.create 10 + + let active_copy : copy_table = Hashtbl.create 10 + + let table_of_op : type a. a operation -> a table = function + | Send_op _ -> + Send_table active_send + | Recv_op _ -> + Recv_table active_recv + | Copy_op _ -> + Copy_table active_copy + + let persist_root = ref "/var/run/nonpersistent" + + let path_of_table : type a. a table -> string = function + | Send_table _ -> + Filename.concat !persist_root "storage_mirrors_send.json" + | Recv_table _ -> + Filename.concat !persist_root "storage_mirrors_recv.json" + | Copy_table _ -> + Filename.concat !persist_root "storage_mirrors_copy.json" + + let rpc_of_table : type a. a table -> Rpc.t = + let open Rpc_std_helpers in + function + | Send_table send_table -> + rpc_of_hashtbl ~rpc_of:Send_state.rpc_of_t send_table + | Recv_table recv_table -> + rpc_of_hashtbl ~rpc_of:Receive_state.rpc_of_t recv_table + | Copy_table copy_table -> + rpc_of_hashtbl ~rpc_of:Copy_state.rpc_of_t copy_table + + let to_string : type a. a table -> string = + fun table -> rpc_of_table table |> Jsonrpc.to_string + + let rpc_of_path path = Unixext.string_of_file path |> Jsonrpc.of_string + + let load_one : type a. a table -> unit = + fun table -> + let rpc = path_of_table table |> rpc_of_path in + let open Rpc_std_helpers in + match table with + | Send_table table -> + Hashtbl.iter (Hashtbl.replace table) + (hashtbl_of_rpc ~of_rpc:Send_state.t_of_rpc rpc) + | Recv_table table -> + Hashtbl.iter (Hashtbl.replace table) + (hashtbl_of_rpc ~of_rpc:Receive_state.t_of_rpc rpc) + | Copy_table table -> + Hashtbl.iter (Hashtbl.replace table) + (hashtbl_of_rpc ~of_rpc:Copy_state.t_of_rpc rpc) + + let load () = + ignore_exn (fun () -> load_one (Send_table active_send)) ; + ignore_exn (fun () -> load_one (Recv_table active_recv)) ; + ignore_exn (fun () -> load_one (Copy_table active_copy)) ; + loaded := true + + let save_one : type a. a table -> unit = + fun table -> + to_string table |> Unixext.write_string_to_file (path_of_table table) + + let save () = + Unixext.mkdir_rec !persist_root 0o700 ; + save_one (Send_table active_send) ; + save_one (Recv_table active_recv) ; + save_one (Copy_table active_copy) + + let access_table ~save_after f table = + Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun () -> + if not !loaded then load () ; + let result = f table in + if save_after then save () ; + result + ) + + let map_of () = + let contents_of table = + Hashtbl.fold (fun k v acc -> (k, v) :: acc) table [] + in + let send_ops = access_table ~save_after:false contents_of active_send in + let recv_ops = access_table ~save_after:false contents_of active_recv in + let copy_ops = access_table ~save_after:false contents_of active_copy in + (send_ops, recv_ops, copy_ops) + + let add : type a. string -> a operation -> unit = + fun id op -> + let add' : type a. string -> a operation -> a table -> unit = + fun id op table -> + match (table, op) with + | Send_table table, Send_op op -> + Hashtbl.replace table id op + | Recv_table table, Recv_op op -> + Hashtbl.replace table id op + | Copy_table table, Copy_op op -> + Hashtbl.replace table id op + in + access_table ~save_after:true + (fun table -> add' id op table) + (table_of_op op) + + let find id table = + access_table ~save_after:false + (fun table -> Hashtbl.find_opt table id) + table + + let remove id table = + access_table ~save_after:true (fun table -> Hashtbl.remove table id) table + + let clear () = + access_table ~save_after:true (fun table -> Hashtbl.clear table) active_send ; + access_table ~save_after:true (fun table -> Hashtbl.clear table) active_recv ; + access_table ~save_after:true (fun table -> Hashtbl.clear table) active_copy + + let remove_local_mirror id = remove id active_send + + let remove_receive_mirror id = remove id active_recv + + let remove_copy id = remove id active_copy + + let find_active_local_mirror id = find id active_send + + let find_active_receive_mirror id = find id active_recv + + let find_active_copy id = find id active_copy + + let mirror_id_of (sr, vdi) = + Printf.sprintf "%s/%s" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + + let of_mirror_id id = + match String.split_on_char '/' id with + | sr :: rest -> + Storage_interface. + (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) + | _ -> + failwith "Bad id" + + let copy_id_of (sr, vdi) = + Printf.sprintf "copy/%s/%s" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + + let of_copy_id id = + match String.split_on_char '/' id with + | op :: sr :: rest when op = "copy" -> + Storage_interface. + (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) + | _ -> + failwith "Bad id" +end + +let vdi_info x = + match x with + | Some (Vdi_info v) -> + v + | _ -> + failwith "Runtime type error: expecting Vdi_info" + +let remove_from_sm_config vdi_info key = + { + vdi_info with + sm_config= List.filter (fun (k, _) -> k <> key) vdi_info.sm_config + } + +let add_to_sm_config vdi_info key value = + let vdi_info = remove_from_sm_config vdi_info key in + {vdi_info with sm_config= (key, value) :: vdi_info.sm_config} + +let with_http request f s = + try Http_client.rpc s request (fun response s -> f (response, s)) + with Unix.Unix_error (Unix.ECONNRESET, _, _) -> raise Connection_reset diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli new file mode 100644 index 00000000000..2355a4d3947 --- /dev/null +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -0,0 +1,249 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module SXM : Debug.DEBUG + +module State : sig + module Receive_state : sig + type t = { + sr: Storage_interface.sr + ; dummy_vdi: Storage_interface.vdi + ; leaf_vdi: Storage_interface.vdi + ; leaf_dp: string + ; parent_vdi: Storage_interface.vdi + ; remote_vdi: Storage_interface.vdi + ; mirror_vm: Storage_interface.vm + } + + val t_sr : (Storage_interface.sr, t) Rpc.Types.field + + val t_dummy_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_leaf_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_leaf_dp : (string, t) Rpc.Types.field + + val t_parent_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_remote_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_mirror_vm : (Storage_interface.vm, t) Rpc.Types.field + + val typ_of : t Rpc.Types.typ + + val t : t Rpc.Types.def + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t + end + + module Send_state : sig + type remote_info = { + dp: string + ; vdi: Storage_interface.vdi + ; url: string + ; verify_dest: bool + } + + val remote_info_dp : (string, remote_info) Rpc.Types.field + + val remote_info_vdi : (Storage_interface.vdi, remote_info) Rpc.Types.field + + val remote_info_url : (string, remote_info) Rpc.Types.field + + val remote_info_verify_dest : (bool, remote_info) Rpc.Types.field + + val typ_of_remote_info : remote_info Rpc.Types.typ + + val remote_info : remote_info Rpc.Types.def + + type tapdev = Tapctl.tapdev + + val typ_of_tapdev : Tapctl.tapdev Rpc.Types.typ + + type handle = Scheduler.handle + + val typ_of_handle : Scheduler.handle Rpc.Types.typ + + type t = { + url: string + ; dest_sr: Storage_interface.sr + ; remote_info: remote_info option + ; local_dp: string + ; tapdev: tapdev option + ; mutable failed: bool + ; mutable watchdog: handle option + } + + val t_url : (string, t) Rpc.Types.field + + val t_dest_sr : (Storage_interface.sr, t) Rpc.Types.field + + val t_remote_info : (remote_info option, t) Rpc.Types.field + + val t_local_dp : (string, t) Rpc.Types.field + + val t_tapdev : (tapdev option, t) Rpc.Types.field + + val t_failed : (bool, t) Rpc.Types.field + + val t_watchdog : (handle option, t) Rpc.Types.field + + val typ_of : t Rpc.Types.typ + + val t : t Rpc.Types.def + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t + end + + module Copy_state : sig + type t = { + base_dp: string + ; leaf_dp: string + ; remote_dp: string + ; dest_sr: Storage_interface.sr + ; copy_vdi: Storage_interface.vdi + ; remote_url: string + ; verify_dest: bool + } + + val t_base_dp : (string, t) Rpc.Types.field + + val t_leaf_dp : (string, t) Rpc.Types.field + + val t_remote_dp : (string, t) Rpc.Types.field + + val t_dest_sr : (Storage_interface.sr, t) Rpc.Types.field + + val t_copy_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_remote_url : (string, t) Rpc.Types.field + + val t_verify_dest : (bool, t) Rpc.Types.field + + val typ_of : t Rpc.Types.typ + + val t : t Rpc.Types.def + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t + end + + val loaded : bool ref + + val mutex : Mutex.t + + type send_table = (string, Send_state.t) Hashtbl.t + + type recv_table = (string, Receive_state.t) Hashtbl.t + + type copy_table = (string, Copy_state.t) Hashtbl.t + + type osend + + type orecv + + type ocopy + + type _ operation = + | Send_op : Send_state.t -> osend operation + | Recv_op : Receive_state.t -> orecv operation + | Copy_op : Copy_state.t -> ocopy operation + + type _ table = + | Send_table : send_table -> osend table + | Recv_table : recv_table -> orecv table + | Copy_table : copy_table -> ocopy table + + val active_send : send_table + + val active_recv : recv_table + + val active_copy : copy_table + + val table_of_op : 'a operation -> 'a table + + val persist_root : string ref + + val path_of_table : 'a table -> string + + val rpc_of_table : 'a table -> Rpc.t + + val to_string : 'a table -> string + + val rpc_of_path : string -> Rpc.t + + val load_one : 'a table -> unit + + val load : unit -> unit + + val save_one : 'a table -> unit + + val save : unit -> unit + + val access_table : save_after:bool -> ('a -> 'b) -> 'a -> 'b + + val map_of : + unit + -> (string * Send_state.t) list + * (string * Receive_state.t) list + * (string * Copy_state.t) list + + val add : string -> 'a operation -> unit + + val find : 'a -> ('a, 'b) Hashtbl.t -> 'b option + + val remove : 'a -> ('a, 'b) Hashtbl.t -> unit + + val clear : unit -> unit + + val remove_local_mirror : string -> unit + + val remove_receive_mirror : string -> unit + + val remove_copy : string -> unit + + val find_active_local_mirror : string -> Send_state.t option + + val find_active_receive_mirror : string -> Receive_state.t option + + val find_active_copy : string -> Copy_state.t option + + val mirror_id_of : Storage_interface.sr * Storage_interface.vdi -> string + + val of_mirror_id : string -> Storage_interface.sr * Storage_interface.vdi + + val copy_id_of : Storage_interface.sr * Storage_interface.vdi -> string + + val of_copy_id : string -> Storage_interface.sr * Storage_interface.vdi +end + +val vdi_info : + Storage_interface.async_result_t option -> Storage_interface.vdi_info + +val remove_from_sm_config : + Storage_interface.vdi_info -> string -> Storage_interface.vdi_info + +val add_to_sm_config : + Storage_interface.vdi_info -> string -> string -> Storage_interface.vdi_info + +val with_http : + Http.Request.t + -> (Http.Response.t * Unix.file_descr -> 'a) + -> Unix.file_descr + -> 'a diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index b0a7d17774d..b09adef7f9d 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1028,7 +1028,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far ) ; SMAPI.VDI.activate3 dbg new_dp vconf.sr vconf.location vconf.mirror_vm ; let id = - Storage_migrate.State.mirror_id_of (vconf.sr, vconf.location) + Storage_migrate_helper.State.mirror_id_of (vconf.sr, vconf.location) in debug "%s mirror_vm is %s copy_vm is %s" __FUNCTION__ (Vm.string_of vconf.mirror_vm) From 1044120faff4d7292435f4f820ff3d732f799a32 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Mar 2025 14:45:27 +0000 Subject: [PATCH 044/492] CP-54020: Factor out module creation logic Use first class modules to generate modules for the remote SMAPIv2 calls to avoid code duplication. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 65 +++++---------------------- ocaml/xapi/storage_migrate_helper.ml | 17 +++++++ ocaml/xapi/storage_migrate_helper.mli | 13 ++++++ 3 files changed, 42 insertions(+), 53 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index cfc005d97fe..8952f947993 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -23,13 +23,6 @@ open Storage_interface open Storage_task open Storage_migrate_helper -module Local = StorageAPI (Idl.Exn.GenClient (struct - let rpc call = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"smapiv2" - (Storage_utils.localhost_connection_args ()) - call -end)) - let tapdisk_of_attach_info (backend : Storage_interface.backend) = let _, blockdevices, _, nbds = Storage_interface.implementations_of_backend backend @@ -155,11 +148,7 @@ on what is executed on the sender side, this provides some heuristics. *) module MigrateLocal = struct (** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) let copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~dest_vdi ~verify_dest = - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in + let (module Remote) = get_remote_backend url verify_dest in debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" (Storage_interface.Sr.string_of sr) (Storage_interface.Vdi.string_of vdi) @@ -321,11 +310,7 @@ module MigrateLocal = struct url (Storage_interface.Sr.string_of dest) verify_dest ; - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in + let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) try let vdis = Local.SR.scan dbg sr in @@ -430,12 +415,9 @@ module MigrateLocal = struct url (Storage_interface.Sr.string_of dest) verify_dest ; + let remote_url = Http.Url.of_string url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - (Storage_utils.connection_args_of_uri ~verify_dest url) - end)) in + let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) let vdis = Local.SR.scan dbg sr in let local_vdi = @@ -676,16 +658,10 @@ module MigrateLocal = struct | None -> debug "Snapshot VDI already cleaned up" ) ; - let remote_url = - Storage_utils.connection_args_of_uri - ~verify_dest:remote_info.State.Send_state.verify_dest - remote_info.State.Send_state.url + + let (module Remote) = + get_remote_backend remote_info.url remote_info.verify_dest in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - remote_url - end)) in try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () ) | None -> @@ -773,7 +749,7 @@ module MigrateLocal = struct ) send_ops ; List.iter - (fun (id, copy_state) -> + (fun (id, (copy_state : State.Copy_state.t)) -> debug "Copy in progress: %s" id ; List.iter log_and_ignore_exn [ @@ -784,15 +760,9 @@ module MigrateLocal = struct Local.DP.destroy dbg copy_state.State.Copy_state.base_dp true ) ] ; - let remote_url = - Storage_utils.connection_args_of_uri - ~verify_dest:copy_state.State.Copy_state.verify_dest - copy_state.State.Copy_state.remote_url + let (module Remote) = + get_remote_backend copy_state.remote_url copy_state.verify_dest in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in List.iter log_and_ignore_exn [ (fun () -> @@ -1025,14 +995,7 @@ let post_deactivate_hook ~sr ~vdi ~dp:_ = ~some:(fun ri -> ri.verify_dest) r.remote_info in - let remote_url = - Storage_utils.connection_args_of_uri ~verify_dest r.url - in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - remote_url - end)) in + let (module Remote) = get_remote_backend r.url verify_dest in debug "Calling receive_finalize2" ; log_and_ignore_exn (fun () -> Remote.DATA.MIRROR.receive_finalize2 "Mirror-cleanup" id @@ -1170,11 +1133,7 @@ let receive_cancel = MigrateRemote.receive_cancel * to SMAPI. *) let update_snapshot_info_src ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs ~verify_dest = - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in + let (module Remote) = get_remote_backend url verify_dest in let local_vdis = Local.SR.scan dbg sr in let find_vdi ~vdi ~vdi_info_list = try List.find (fun x -> x.vdi = vdi) vdi_info_list diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index d8182a28e5a..19660598620 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -326,3 +326,20 @@ let add_to_sm_config vdi_info key value = let with_http request f s = try Http_client.rpc s request (fun response s -> f (response, s)) with Unix.Unix_error (Unix.ECONNRESET, _, _) -> raise Connection_reset + +module Local = StorageAPI (Idl.Exn.GenClient (struct + let rpc call = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"smapiv2" + (Storage_utils.localhost_connection_args ()) + call +end)) + +module type SMAPIv2 = module type of Local + +let get_remote_backend url verify_dest = + let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url + end)) in + (module Remote : SMAPIv2) diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index 2355a4d3947..7af4f39ed39 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -247,3 +247,16 @@ val with_http : -> (Http.Response.t * Unix.file_descr -> 'a) -> Unix.file_descr -> 'a + +module type SMAPIv2 = sig + include module type of Storage_interface.StorageAPI (Idl.Exn.GenClient (struct + let rpc call = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"smapiv2" + (Storage_utils.localhost_connection_args ()) + call + end)) +end + +module Local : SMAPIv2 + +val get_remote_backend : string -> bool -> (module SMAPIv2) From 7f349f3c8c5748935db524072b65cd2d68dc7788 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 20 Mar 2025 15:47:28 +0000 Subject: [PATCH 045/492] Delete unused `query_result_of_sr` function Signed-off-by: Vincent Liu --- ocaml/xapi/storage_mux_reg.ml | 7 ------- ocaml/xapi/storage_mux_reg.mli | 3 --- 2 files changed, 10 deletions(-) diff --git a/ocaml/xapi/storage_mux_reg.ml b/ocaml/xapi/storage_mux_reg.ml index f7eff2ab43d..0bad2d28c96 100644 --- a/ocaml/xapi/storage_mux_reg.ml +++ b/ocaml/xapi/storage_mux_reg.ml @@ -71,13 +71,6 @@ let unregister sr = ) ) -(* This function is entirely unused, but I am not sure if it should be - deleted or not *) -let query_result_of_sr sr = - with_lock m (fun () -> - Option.map (fun x -> x.query_result) (Hashtbl.find_opt plugins sr) - ) - let sr_has_capability sr capability = with_lock m (fun () -> match Hashtbl.find_opt plugins sr with diff --git a/ocaml/xapi/storage_mux_reg.mli b/ocaml/xapi/storage_mux_reg.mli index 218cd5f96b3..623a6eb7c1f 100644 --- a/ocaml/xapi/storage_mux_reg.mli +++ b/ocaml/xapi/storage_mux_reg.mli @@ -36,9 +36,6 @@ val register : val unregister : Storage_interface.sr -> unit -val query_result_of_sr : - Storage_interface.sr -> Storage_interface.query_result option - val sr_has_capability : Storage_interface.sr -> Smint.Feature.capability -> bool val of_sr : Storage_interface.sr -> processor From 112ef69fe414717a9a2949986b144c4a3e91255b Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 21 Mar 2025 11:29:35 +0000 Subject: [PATCH 046/492] style: Some coding style improvements Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate_helper.ml | 30 ++++++++++++++------------- ocaml/xapi/storage_migrate_helper.mli | 4 ---- ocaml/xapi/storage_mux.ml | 2 +- ocaml/xapi/storage_mux_reg.ml | 12 +++++------ ocaml/xapi/storage_mux_reg.mli | 2 +- 5 files changed, 23 insertions(+), 27 deletions(-) diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index 19660598620..b7b1eb6c6f9 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -25,6 +25,8 @@ open Xapi_stdext_pervasives.Pervasiveext open Xmlrpc_client module State = struct + let failwith_fmt fmt = Printf.ksprintf failwith fmt + module Receive_state = struct type t = { sr: Sr.t @@ -44,7 +46,7 @@ module State = struct | Ok y -> y | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Receive_state.t: %s" m) + failwith_fmt "Failed to unmarshal Receive_state.t: %s" m end module Send_state = struct @@ -100,7 +102,7 @@ module State = struct | Ok y -> y | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Send_state.t: %s" m) + failwith_fmt "Failed to unmarshal Send_state.t: %s" m end module Copy_state = struct @@ -122,7 +124,7 @@ module State = struct | Ok y -> y | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Copy_state.t: %s" m) + failwith_fmt "Failed to unmarshal Copy_state.t: %s" m end let loaded = ref false @@ -205,12 +207,6 @@ module State = struct Hashtbl.iter (Hashtbl.replace table) (hashtbl_of_rpc ~of_rpc:Copy_state.t_of_rpc rpc) - let load () = - ignore_exn (fun () -> load_one (Send_table active_send)) ; - ignore_exn (fun () -> load_one (Recv_table active_recv)) ; - ignore_exn (fun () -> load_one (Copy_table active_copy)) ; - loaded := true - let save_one : type a. a table -> unit = fun table -> to_string table |> Unixext.write_string_to_file (path_of_table table) @@ -222,6 +218,12 @@ module State = struct save_one (Copy_table active_copy) let access_table ~save_after f table = + let load () = + ignore_exn (fun () -> load_one (Send_table active_send)) ; + ignore_exn (fun () -> load_one (Recv_table active_recv)) ; + ignore_exn (fun () -> load_one (Copy_table active_copy)) ; + loaded := true + in Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun () -> if not !loaded then load () ; let result = f table in @@ -263,9 +265,10 @@ module State = struct access_table ~save_after:true (fun table -> Hashtbl.remove table id) table let clear () = - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_send ; - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_recv ; - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_copy + let clear_one (type a) (tbl : (string, a) Hashtbl.t) : unit = + access_table ~save_after:true Hashtbl.clear tbl + in + clear_one active_send ; clear_one active_recv ; clear_one active_copy let remove_local_mirror id = remove id active_send @@ -306,8 +309,7 @@ module State = struct failwith "Bad id" end -let vdi_info x = - match x with +let vdi_info = function | Some (Vdi_info v) -> v | _ -> diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index 7af4f39ed39..29753436c78 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -188,10 +188,6 @@ module State : sig val rpc_of_path : string -> Rpc.t - val load_one : 'a table -> unit - - val load : unit -> unit - val save_one : 'a table -> unit val save : unit -> unit diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 2c2ba86d2df..9b071b86187 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -50,7 +50,7 @@ module Mux = struct List.fold_left (fun acc (sr, result) -> Printf.sprintf "For SR: %s" (s_of_sr sr) - :: string_of_sm_result (fun s -> s) result + :: s_of_sm_result (fun s -> s) result :: acc ) [] results diff --git a/ocaml/xapi/storage_mux_reg.ml b/ocaml/xapi/storage_mux_reg.ml index 0bad2d28c96..488fcd9aa89 100644 --- a/ocaml/xapi/storage_mux_reg.ml +++ b/ocaml/xapi/storage_mux_reg.ml @@ -17,8 +17,6 @@ and multiplexing between them according to the sr type *) module D = Debug.Make (struct let name = __MODULE__ end) -open D - type processor = Rpc.call -> Rpc.response let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute @@ -56,7 +54,7 @@ let register sr rpc d info = ; query_result= info ; features } ; - debug "register SR %s (currently-registered = [ %s ])" (s_of_sr sr) + D.debug "register SR %s (currently-registered = [ %s ])" (s_of_sr sr) (String.concat ", " (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) ) @@ -65,7 +63,7 @@ let register sr rpc d info = let unregister sr = with_lock m (fun () -> Hashtbl.remove plugins sr ; - debug "unregister SR %s (currently-registered = [ %s ])" (s_of_sr sr) + D.debug "unregister SR %s (currently-registered = [ %s ])" (s_of_sr sr) (String.concat ", " (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) ) @@ -87,7 +85,7 @@ let of_sr sr = | Some x -> x.processor | None -> - error "No storage plugin for SR: %s (currently-registered = [ %s ])" + D.error "No storage plugin for SR: %s (currently-registered = [ %s ])" (s_of_sr sr) (String.concat ", " (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) @@ -101,7 +99,7 @@ let smapi_version_of_sr sr = | Some x -> x.query_result.smapi_version | None -> - error "No storage plugin for SR: %s (currently-registered = [ %s ])" + D.error "No storage plugin for SR: %s (currently-registered = [ %s ])" (s_of_sr sr) (String.concat ", " (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) @@ -111,7 +109,7 @@ let smapi_version_of_sr sr = type 'a sm_result = SMSuccess of 'a | SMFailure of exn -let string_of_sm_result f = function +let s_of_sm_result f = function | SMSuccess x -> Printf.sprintf "Success: %s" (f x) | SMFailure e -> diff --git a/ocaml/xapi/storage_mux_reg.mli b/ocaml/xapi/storage_mux_reg.mli index 623a6eb7c1f..7d4eee95214 100644 --- a/ocaml/xapi/storage_mux_reg.mli +++ b/ocaml/xapi/storage_mux_reg.mli @@ -45,7 +45,7 @@ val smapi_version_of_sr : type 'a sm_result = SMSuccess of 'a | SMFailure of exn -val string_of_sm_result : ('a -> string) -> 'a sm_result -> string +val s_of_sm_result : ('a -> string) -> 'a sm_result -> string val success : 'a sm_result -> bool From 12e5680762f2770b95276e321666cebcdcd7cc61 Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Tue, 25 Mar 2025 07:50:35 +0000 Subject: [PATCH 047/492] CP-53477 Update host/pool datamodel to support SSH status query and configure Add new host object fields: - ssh_enabled - ssh_enabled_timeout - ssh_expiry - console_idle_timeout Add new host/pool API to enable to set a temporary enabled SSH service timeout - set_ssh_enabled_timeout Add new host/pool API to enable to set console timeout - set_console_idle_timeout Signed-off-by: Lunfan Zhang --- ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_errors.ml | 3 +++ ocaml/idl/datamodel_host.ml | 41 +++++++++++++++++++++++++++++++ ocaml/idl/datamodel_pool.ml | 29 ++++++++++++++++++++++ ocaml/idl/schematest.ml | 2 +- ocaml/tests/common/test_common.ml | 3 ++- ocaml/xapi-consts/api_errors.ml | 3 +++ ocaml/xapi/message_forwarding.ml | 28 +++++++++++++++++++++ ocaml/xapi/xapi_host.ml | 8 +++++- ocaml/xapi/xapi_host.mli | 6 +++++ ocaml/xapi/xapi_pool.ml | 4 +++ ocaml/xapi/xapi_pool.mli | 6 +++++ 12 files changed, 131 insertions(+), 4 deletions(-) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 50bc585b7ac..a044c9a0f2d 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 786 +let schema_minor_vsn = 787 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 27bb8a7bf98..2aa0f803dec 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2043,6 +2043,9 @@ let _ = error Api_errors.host_driver_no_hardware ["driver variant"] ~doc:"No hardware present for this host driver variant" () ; + error Api_errors.set_console_idle_timeout_failed ["timeout"] + ~doc:"Failed to set console idle timeout." () ; + error Api_errors.tls_verification_not_enabled_in_pool [] ~doc: "TLS verification has not been enabled in the pool successfully, please \ diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 99f4ebcf316..81fa9ab0474 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -2368,6 +2368,29 @@ let disable_ssh = ~params:[(Ref _host, "self", "The host")] ~allowed_roles:_R_POOL_ADMIN () +let set_ssh_enabled_timeout = + call ~name:"set_ssh_enabled_timeout" ~lifecycle:[] + ~doc:"Set the SSH service enabled timeout for the host" + ~params: + [ + (Ref _host, "self", "The host") + ; ( Int + , "value" + , "The SSH enabled timeout in seconds (0 means no timeout, max 2 days)" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + +let set_console_idle_timeout = + call ~name:"set_console_idle_timeout" ~lifecycle:[] + ~doc:"Set the console idle timeout for the host" + ~params: + [ + (Ref _host, "self", "The host") + ; (Int, "value", "The idle console timeout in seconds") + ] + ~allowed_roles:_R_POOL_ADMIN () + let latest_synced_updates_applied_state = Enum ( "latest_synced_updates_applied_state" @@ -2527,6 +2550,8 @@ let t = ; emergency_clear_mandatory_guidance ; enable_ssh ; disable_ssh + ; set_ssh_enabled_timeout + ; set_console_idle_timeout ] ~contents: ([ @@ -2964,6 +2989,22 @@ let t = ~default_value:(Some (VString "")) "last_update_hash" "The SHA256 checksum of updateinfo of the most recently applied \ update on the host" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Bool + ~default_value:(Some (VBool true)) "ssh_enabled" + "True if SSH access is enabled for the host" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int + ~default_value:(Some (VInt 0L)) "ssh_enabled_timeout" + "The timeout in seconds after which SSH access will be \ + automatically disabled (0 means never), this setting will be \ + applied every time the SSH is enabled by XAPI" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:DateTime + ~default_value:(Some (VDateTime Date.epoch)) "ssh_expiry" + "The time in UTC after which the SSH access will be automatically \ + disabled" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int + ~default_value:(Some (VInt 0L)) "console_idle_timeout" + "The timeout in seconds after which idle console will be \ + automatically terminated (0 means never)" ] ) () diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index c35c6789f7f..dedcd2f4dc9 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1571,6 +1571,33 @@ let disable_ssh = ~params:[(Ref _pool, "self", "The pool")] ~allowed_roles:_R_POOL_ADMIN () +let set_ssh_enabled_timeout = + call ~name:"set_ssh_enabled_timeout" ~lifecycle:[] + ~doc:"Set the SSH enabled timeout for all hosts in the pool" + ~params: + [ + (Ref _pool, "self", "The pool") + ; ( Int + , "value" + , "The SSH enabled timeout in seconds. (0 means no timeout, max 2 days)" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + +let set_console_idle_timeout = + call ~name:"set_console_idle_timeout" ~lifecycle:[] + ~doc:"Set the console idle timeout for all hosts in the pool" + ~params: + [ + (Ref _pool, "self", "The pool") + ; ( Int + , "value" + , "The idle SSH/VNC session timeout in seconds. A value of 0 means no \ + timeout." + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + (** A pool class *) let t = create_obj ~in_db:true @@ -1667,6 +1694,8 @@ let t = ; get_guest_secureboot_readiness ; enable_ssh ; disable_ssh + ; set_ssh_enabled_timeout + ; set_console_idle_timeout ] ~contents: ([ diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 255e094e1dd..7d706fdca3a 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "ad67a64cd47cdea32085518c1fb38d27" +let last_known_schema_hash = "0cc42d0325bd7ea01a5024d63b835bfb" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7b5484a02ba..feab7169f5a 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -215,7 +215,8 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~last_software_update:(Xapi_host.get_servertime ~__context ~host:ref) ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown ~pending_guidances_recommended:[] ~pending_guidances_full:[] - ~last_update_hash:"" ; + ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L + ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ; ref let make_pif ~__context ~network ~host ?(device = "eth0") diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 906e22bf259..42390c2b9fb 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1424,3 +1424,6 @@ let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = add_error "TLS_VERIFICATION_NOT_ENABLED_IN_POOL" + +let set_console_idle_timeout_failed = + add_error "SET_CONSOLE_IDLE_TIMEOUT_FAILED" diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index dc77569e646..c9268e82d3b 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -1185,6 +1185,18 @@ functor let disable_ssh ~__context ~self = info "%s: pool = '%s'" __FUNCTION__ (pool_uuid ~__context self) ; Local.Pool.disable_ssh ~__context ~self + + let set_ssh_enabled_timeout ~__context ~self ~value = + info "Pool.set_ssh_enabled_timeout: pool='%s' value='%Ld'" + (pool_uuid ~__context self) + value ; + Local.Pool.set_ssh_enabled_timeout ~__context ~self ~value + + let set_console_idle_timeout ~__context ~self ~value = + info "Pool.set_console_idle_timeout: pool='%s' value='%Ld'" + (pool_uuid ~__context self) + value ; + Local.Pool.set_console_idle_timeout ~__context ~self ~value end module VM = struct @@ -4035,6 +4047,22 @@ functor let local_fn = Local.Host.disable_ssh ~self in let remote_fn = Client.Host.disable_ssh ~self in do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let set_ssh_enabled_timeout ~__context ~self ~value = + info "Host.set_ssh_enabled_timeout: host='%s' value='%Ld'" + (host_uuid ~__context self) + value ; + let local_fn = Local.Host.set_ssh_enabled_timeout ~self ~value in + let remote_fn = Client.Host.set_ssh_enabled_timeout ~self ~value in + do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let set_console_idle_timeout ~__context ~self ~value = + info "Host.set_console_idle_timeout: host='%s' value='%Ld'" + (host_uuid ~__context self) + value ; + let local_fn = Local.Host.set_console_idle_timeout ~self ~value in + let remote_fn = Client.Host.set_console_idle_timeout ~self ~value in + do_op_on ~local_fn ~__context ~host:self ~remote_fn end module Host_crashdump = struct diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index acd8a10936a..cfc73f80b2d 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1042,7 +1042,9 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] ~tls_verification_enabled ~last_software_update ~last_update_hash ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown - ~pending_guidances_recommended:[] ~pending_guidances_full:[] ; + ~pending_guidances_recommended:[] ~pending_guidances_full:[] + ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch + ~console_idle_timeout:0L ; (* If the host we're creating is us, make sure its set to live *) Db.Host_metrics.set_last_updated ~__context ~self:metrics ~value:(Date.now ()) ; Db.Host_metrics.set_live ~__context ~self:metrics ~value:host_is_us ; @@ -3131,3 +3133,7 @@ let disable_ssh ~__context ~self = (Api_errors.Server_error (Api_errors.disable_ssh_failed, [Ref.string_of self]) ) + +let set_ssh_enabled_timeout ~__context ~self:_ ~value:_ = () + +let set_console_idle_timeout ~__context ~self:_ ~value:_ = () diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index e1dc46c91ac..34b0d74ce3c 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -567,3 +567,9 @@ val emergency_clear_mandatory_guidance : __context:Context.t -> unit val enable_ssh : __context:Context.t -> self:API.ref_host -> unit val disable_ssh : __context:Context.t -> self:API.ref_host -> unit + +val set_ssh_enabled_timeout : + __context:Context.t -> self:API.ref_host -> value:int64 -> unit + +val set_console_idle_timeout : + __context:Context.t -> self:API.ref_host -> value:int64 -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 68c41f91a42..434ab3b9dc5 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -4008,3 +4008,7 @@ end let enable_ssh = Ssh.enable let disable_ssh = Ssh.disable + +let set_ssh_enabled_timeout ~__context ~self:_ ~value:_ = () + +let set_console_idle_timeout ~__context ~self:_ ~value:_ = () diff --git a/ocaml/xapi/xapi_pool.mli b/ocaml/xapi/xapi_pool.mli index 7d00d339805..b9c5b6fea3f 100644 --- a/ocaml/xapi/xapi_pool.mli +++ b/ocaml/xapi/xapi_pool.mli @@ -437,3 +437,9 @@ val put_bundle_handler : Http.Request.t -> Unix.file_descr -> 'a -> unit val enable_ssh : __context:Context.t -> self:API.ref_pool -> unit val disable_ssh : __context:Context.t -> self:API.ref_pool -> unit + +val set_ssh_enabled_timeout : + __context:Context.t -> self:API.ref_pool -> value:int64 -> unit + +val set_console_idle_timeout : + __context:Context.t -> self:API.ref_pool -> value:int64 -> unit From 71ce0082d7ee8c9b3837182baab4e0b14280de32 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 28 Mar 2025 11:02:37 +0000 Subject: [PATCH 048/492] CP-53472: Create parent for add_module spans This keeps them contained, allowing them to easily be hidden in Jaeger Signed-off-by: Steven Woods --- python3/packages/observer.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/python3/packages/observer.py b/python3/packages/observer.py index cf2ebf32226..df53f5f0f1f 100644 --- a/python3/packages/observer.py +++ b/python3/packages/observer.py @@ -266,7 +266,7 @@ def bugtool_filenamer(): tracers = list(map(create_tracer_from_config, configs)) debug("tracers=%s", tracers) - def span_of_tracers(wrapped=None, span_name_prefix=""): + def span_of_tracers(wrapped=None, span_name_prefix="", parent_context=None): """ Public decorator that creates a trace around a function. @@ -289,7 +289,7 @@ def span_of_tracers(wrapped=None, span_name_prefix=""): that the function is decorated properly on the second pass. """ if wrapped is None: # handle decorators with parameters - return functools.partial(span_of_tracers, span_name_prefix=span_name_prefix) + return functools.partial(span_of_tracers, span_name_prefix=span_name_prefix, parent_context=parent_context) @wrapt.decorator def instrument_function(wrapped, _, args, kwargs): @@ -352,11 +352,10 @@ def autoinstrument_class(aclass): traceback.format_exc(), ) - def autoinstrument_module(amodule): """Autoinstrument the classes and functions in a module.""" - with tracers[0].start_as_current_span(f"auto_instrumentation.add_module: {amodule}"): + with tracers[0].start_as_current_span(f"auto_instrumentation.add_module: {amodule}", context=parent_context): # Instrument the methods of the classes in the module for _, aclass in inspect.getmembers(amodule, inspect.isclass): try: @@ -373,14 +372,15 @@ def autoinstrument_module(amodule): return instrument_function(wrapped) - def _patch_module(module_name): + def _patch_module(module_name, parent_context=None): wrapt.importer.discover_post_import_hooks(module_name) wrapt.importer.when_imported(module_name)( - lambda hook: span_of_tracers(wrapped=hook) + lambda hook: span_of_tracers(wrapped=hook, parent_context=parent_context) ) - for m in module_names: - _patch_module(m) + def _patch_modules(parent_context): + for m in module_names: + _patch_module(m, parent_context=parent_context) # Create spans to track observer.py's setup duration t = tracers[0] @@ -388,6 +388,10 @@ def _patch_module(module_name): import_span = t.start_span("observer.py:imports", start_time=import_ts_start) import_span.end(end_time=import_ts_end) + # Set a parent span in the add_module spans' context so that they are kept together + with t.start_span("auto_instrumentation") as aspan: + _patch_modules(trace.set_span_in_context(aspan)) + return span_of_tracers, _patch_module From 5c387c4ceee6ef799da4101e95f25f71b7657d91 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 28 Mar 2025 18:08:40 +0000 Subject: [PATCH 049/492] libs: resources tests add logs dependency, necessary for future versions of logs Signed-off-by: Pau Ruiz Safont --- ocaml/libs/resources/test/dune | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/libs/resources/test/dune b/ocaml/libs/resources/test/dune index 2bc052f2e63..15a20f0bfa3 100644 --- a/ocaml/libs/resources/test/dune +++ b/ocaml/libs/resources/test/dune @@ -4,6 +4,7 @@ (action (run %{test} -e)) (libraries safe-resources + logs logs.fmt alcotest ) From f2f72dc94ffa376be7eee91c2f0d0a97b2fb11fc Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 28 Mar 2025 18:11:46 +0000 Subject: [PATCH 050/492] xapi-stdext-threads, test: use stable testing interface Alcotest changed how match_raises to actually be reasonable instead of failing when the mathing function returns true (note how is_oob expects a string with two spaces) This also highlights why matching strings in exception is a bad idea. Instead raise a non-stringy exception and use polymorphic compare. Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml | 4 +++- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli | 2 ++ .../libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml | 9 ++------- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 7293ae625e1..45bbd93622c 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -19,6 +19,8 @@ type 'a t = {default: 'a event; mutable size: int; mutable data: 'a event array} exception EmptyHeap +exception OutOfBounds of int + let create n default = if n <= 0 then invalid_arg "create" @@ -61,7 +63,7 @@ let maximum h = let remove h s = if h.size <= 0 then raise EmptyHeap ; if s < 0 || s >= h.size then - invalid_arg (Printf.sprintf "%s: index %d out of bounds" __FUNCTION__ s) ; + raise (OutOfBounds s) ; let n = h.size - 1 in let d = h.data in let x = d.(n) in diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli index 19f8bf1e33f..b542ef9d65d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -18,6 +18,8 @@ type 'a t exception EmptyHeap +exception OutOfBounds of int + val create : int -> 'a -> 'a t (** [create n default] creates an empty Imperative priority queue. The queue initially is initialized to store [n] elements. diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index a9cc2611da8..aab499da74a 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -18,14 +18,9 @@ module Ipq = Xapi_stdext_threads_scheduler.Ipq let test_out_of_index () = let q = Ipq.create 10 0 in Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.elapsed ()} ; - let is_oob = function - | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> - true - | _ -> - false - in let oob_check n = - (Alcotest.match_raises "out of bound" is_oob @@ fun () -> Ipq.remove q n) ; + let oob = Ipq.OutOfBounds n in + (Alcotest.check_raises "out of bound" oob @@ fun () -> Ipq.remove q n) ; Alcotest.(check bool) "same value" false (Ipq.is_empty q) in oob_check 10 ; From 4ee32f2fa6ffde149eec128fd9e8cf1d054e5f21 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 31 Mar 2025 07:40:58 +0100 Subject: [PATCH 051/492] CA-408841 rrd: don't update rrds when ds_update is called with an empty datasource array Several assumptions in the ds_update function expect at least one element to be present in the array, and will raise Invalid_argument("index out of bounds") otherwise. This could be triggered by disabling all datasources for a particular plugin/owner combination, for example. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 203 +++++++++++++++++---------------- 1 file changed, 103 insertions(+), 100 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 126442db986..b4c827705c9 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -379,121 +379,124 @@ let process_ds_value ds value interval new_rrd = rate let ds_update rrd timestamp valuesandtransforms new_rrd = - (* Interval is the time between this and the last update - - Currently ds_update is called with datasources that belong to a single - plugin, correspondingly they all have the same timestamp. - Further refactoring is needed if timestamps per measurement are to be - introduced. *) - let first_ds_index, _ = valuesandtransforms.(0) in - let last_updated = rrd.rrd_dss.(first_ds_index).ds_last_updated in - let interval = timestamp -. last_updated in - (* Work around the clock going backwards *) - let interval = if interval < 0. then 5. else interval in - - (* start time (st) and age of the last processed pdp and the currently occupied one *) - let proc_pdp_st, _proc_pdp_age = get_times last_updated rrd.timestep in - let occu_pdp_st, occu_pdp_age = get_times timestamp rrd.timestep in - - (* The number of pdps that should result from this update *) - let elapsed_pdp_st = - Int64.to_int ((occu_pdp_st --- proc_pdp_st) /// rrd.timestep) - in - - (* if we're due one or more PDPs, pre_int is the amount of the - current update interval that will be used in calculating them, and - post_int is the amount left over - this step. If a PDP isn't post is what's left over *) - let pre_int, post_int = - if elapsed_pdp_st > 0 then - let pre = interval -. occu_pdp_age in - (pre, occu_pdp_age) - else - (interval, 0.0) - in - - (* We're now done with the last_updated value, so update it *) - rrd.last_updated <- timestamp ; + (* CA-408841 - don't update the rrd at all if list of datasources is empty *) + if valuesandtransforms <> [||] then ( + (* Interval is the time between this and the last update + + Currently ds_update is called with datasources that belong to a single + plugin, correspondingly they all have the same timestamp. + Further refactoring is needed if timestamps per measurement are to be + introduced. *) + let first_ds_index, _ = valuesandtransforms.(0) in + let last_updated = rrd.rrd_dss.(first_ds_index).ds_last_updated in + let interval = timestamp -. last_updated in + (* Work around the clock going backwards *) + let interval = if interval < 0. then 5. else interval in + + (* start time (st) and age of the last processed pdp and the currently occupied one *) + let proc_pdp_st, _proc_pdp_age = get_times last_updated rrd.timestep in + let occu_pdp_st, occu_pdp_age = get_times timestamp rrd.timestep in + + (* The number of pdps that should result from this update *) + let elapsed_pdp_st = + Int64.to_int ((occu_pdp_st --- proc_pdp_st) /// rrd.timestep) + in - (* Calculate the values we're going to store based on the input data and the type of the DS *) - let v2s = - Array.map - (fun (i, {value; _}) -> - let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in - rrd.rrd_dss.(i).ds_last_updated <- timestamp ; - (i, v) - ) - valuesandtransforms - in - (* Update the PDP accumulators up until the most recent PDP *) - Array.iter - (fun (i, value) -> - let ds = rrd.rrd_dss.(i) in - if Utils.isnan value then - ds.ds_unknown_sec <- pre_int + (* if we're due one or more PDPs, pre_int is the amount of the + current update interval that will be used in calculating them, and + post_int is the amount left over + this step. If a PDP isn't post is what's left over *) + let pre_int, post_int = + if elapsed_pdp_st > 0 then + let pre = interval -. occu_pdp_age in + (pre, occu_pdp_age) else - (* CA-404597 - Gauge and Absolute values should be passed as-is, - without being involved in time-based calculations at all. - This applies to calculations below as well *) - match ds.ds_ty with - | Gauge | Absolute -> - ds.ds_value <- value - | Derive -> - ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval) - ) - v2s ; + (interval, 0.0) + in + + (* We're now done with the last_updated value, so update it *) + rrd.last_updated <- timestamp ; - (* If we've passed a PDP point, we need to update the RRAs *) - if elapsed_pdp_st > 0 then ( - (* Calculate the PDPs for each DS *) - let pdps = + (* Calculate the values we're going to store based on the input data and the type of the DS *) + let v2s = Array.map - (fun (i, {transform; _}) -> - let ds = rrd.rrd_dss.(i) in - if interval > ds.ds_mrhb then - (i, nan) - else - let raw = - let proc_pdp_st = get_float_time last_updated rrd.timestep in - let occu_pdp_st = get_float_time timestamp rrd.timestep in - - match ds.ds_ty with - | Gauge | Absolute -> - ds.ds_value - | Derive -> - ds.ds_value - /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) - in - (* Apply the transform after the raw value has been calculated *) - let raw = apply_transform_function transform raw in - (* Make sure the values are not out of bounds after all the processing *) - if raw < ds.ds_min || raw > ds.ds_max then - (i, nan) - else - (i, raw) + (fun (i, {value; _}) -> + let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in + rrd.rrd_dss.(i).ds_last_updated <- timestamp ; + (i, v) ) valuesandtransforms in - - rra_update rrd proc_pdp_st elapsed_pdp_st pdps ; - - (* Reset the PDP accumulators *) + (* Update the PDP accumulators up until the most recent PDP *) Array.iter (fun (i, value) -> let ds = rrd.rrd_dss.(i) in - if Utils.isnan value then ( - ds.ds_value <- 0.0 ; - ds.ds_unknown_sec <- post_int - ) else ( - ds.ds_unknown_sec <- 0.0 ; + if Utils.isnan value then + ds.ds_unknown_sec <- pre_int + else + (* CA-404597 - Gauge and Absolute values should be passed as-is, + without being involved in time-based calculations at all. + This applies to calculations below as well *) match ds.ds_ty with | Gauge | Absolute -> ds.ds_value <- value | Derive -> - ds.ds_value <- post_int *. value /. interval - ) + ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval) ) - v2s + v2s ; + + (* If we've passed a PDP point, we need to update the RRAs *) + if elapsed_pdp_st > 0 then ( + (* Calculate the PDPs for each DS *) + let pdps = + Array.map + (fun (i, {transform; _}) -> + let ds = rrd.rrd_dss.(i) in + if interval > ds.ds_mrhb then + (i, nan) + else + let raw = + let proc_pdp_st = get_float_time last_updated rrd.timestep in + let occu_pdp_st = get_float_time timestamp rrd.timestep in + + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value + | Derive -> + ds.ds_value + /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) + in + (* Apply the transform after the raw value has been calculated *) + let raw = apply_transform_function transform raw in + (* Make sure the values are not out of bounds after all the processing *) + if raw < ds.ds_min || raw > ds.ds_max then + (i, nan) + else + (i, raw) + ) + valuesandtransforms + in + + rra_update rrd proc_pdp_st elapsed_pdp_st pdps ; + + (* Reset the PDP accumulators *) + Array.iter + (fun (i, value) -> + let ds = rrd.rrd_dss.(i) in + if Utils.isnan value then ( + ds.ds_value <- 0.0 ; + ds.ds_unknown_sec <- post_int + ) else ( + ds.ds_unknown_sec <- 0.0 ; + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value <- value + | Derive -> + ds.ds_value <- post_int *. value /. interval + ) + ) + v2s + ) ) (** Update the rrd with named values rather than just an ordered array From 7ca0ce2935907e5c283ddae0ff787ffaba488e7d Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 31 Mar 2025 14:02:34 +0100 Subject: [PATCH 052/492] Remove xapi-stdext-date It's has been replaced by clock Signed-off-by: Pau Ruiz Safont --- Makefile | 2 +- dune-project | 10 -- .../xapi-stdext/lib/xapi-stdext-date/date.ml | 35 ------ .../xapi-stdext/lib/xapi-stdext-date/date.mli | 115 ------------------ .../xapi-stdext/lib/xapi-stdext-date/dune | 6 - xapi-stdext-date.opam | 29 ----- 6 files changed, 1 insertion(+), 196 deletions(-) delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune delete mode 100644 xapi-stdext-date.opam diff --git a/Makefile b/Makefile index d6099331c60..7d0677277fa 100644 --- a/Makefile +++ b/Makefile @@ -156,7 +156,7 @@ DUNE_IU_PACKAGES1+=message-switch message-switch-cli message-switch-core message DUNE_IU_PACKAGES1+=message-switch-unix xapi-idl xapi-forkexecd xapi-storage xapi-storage-script xapi-storage-cli DUNE_IU_PACKAGES1+=xapi-nbd varstored-guard xapi-log xapi-open-uri xapi-tracing xapi-tracing-export xapi-expiry-alerts cohttp-posix DUNE_IU_PACKAGES1+=xapi-rrd xapi-inventory clock xapi-sdk -DUNE_IU_PACKAGES1+=xapi-stdext-date xapi-stdext-encodings xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads xapi-stdext-unix xapi-stdext-zerocheck xapi-tools +DUNE_IU_PACKAGES1+=xapi-stdext-encodings xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads xapi-stdext-unix xapi-stdext-zerocheck xapi-tools install-dune1: diff --git a/dune-project b/dune-project index 2d8cab13744..a3d6651fc45 100644 --- a/dune-project +++ b/dune-project @@ -669,16 +669,6 @@ This package provides an Lwt compatible interface to the library.") (name xapi-inventory) ) -(package - (name xapi-stdext-date) - (synopsis "Xapi's standard library extension, Dates") - (authors "Jonathan Ludlam") - (depends - (clock (= :version)) - ptime - ) -) - (package (name xapi-stdext-encodings) (synopsis "Xapi's standard library extension, Encodings") diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml deleted file mode 100644 index ef0f98ce13a..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml +++ /dev/null @@ -1,35 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -include Clock.Date - -let never = epoch - -let of_string = of_iso8601 - -let to_string = to_rfc3339 - -let of_float = of_unix_time - -let to_float = to_unix_time - -let rfc822_of_float = of_unix_time - -let rfc822_to_string = to_rfc822 - -let eq = equal - -type iso8601 = t - -type rfc822 = t diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli deleted file mode 100644 index 9af45ab6096..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli +++ /dev/null @@ -1,115 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(** date-time with support for keeping timezone for ISO 8601 conversion *) -type t = Clock.Date.t - -(** Conversions *) - -val of_ptime : Ptime.t -> t -(** Convert ptime to time in UTC *) - -val to_ptime : t -> Ptime.t -(** Convert date/time to a ptime value: the number of seconds since 00:00:00 - UTC, 1 Jan 1970. Assumes the underlying {!t} is in UTC *) - -val of_unix_time : float -> t -(** Convert calendar time [x] (as returned by e.g. Unix.time), to time in UTC *) - -val to_unix_time : t -> float -(** Convert date/time to a unix timestamp: the number of seconds since - 00:00:00 UTC, 1 Jan 1970. Assumes the underlying {!t} is in UTC *) - -val to_rfc822 : t -> string -(** Convert date/time to email-formatted (RFC 822) string. *) - -val to_rfc3339 : t -> string -(** Convert date/time to an RFC-3339-formatted string. It also complies with - the ISO 8601 format *) - -val of_iso8601 : string -> t -(** Convert ISO 8601 formatted string to a date/time value. Does not accept a - timezone annotated datetime - i.e. string must be UTC, and end with a Z *) - -val epoch : t -(** 00:00:00 UTC, 1 Jan 1970, in UTC *) - -val now : unit -> t -(** Count the number of seconds passed since 00:00:00 UTC, 1 Jan 1970, in UTC *) - -val _localtime_string : Ptime.tz_offset_s option -> Ptime.t -> string -(** exposed for testing *) - -val localtime : unit -> t -(** Count the number of seconds passed since 00:00:00 UTC, 1 Jan 1970, in local - time *) - -(** Comparisons *) - -val equal : t -> t -> bool -(** [equal a b] returns whether [a] and [b] are equal *) - -val compare : t -> t -> int -(** [compare a b] returns -1 if [a] is earlier than [b], 1 if [a] is later than - [b] or the ordering of the timezone printer *) - -val is_earlier : than:t -> t -> bool -(** [is_earlier ~than a] returns whether the timestamp [a] happens before - [than] *) - -val is_later : than:t -> t -> bool -(** [is_later ~than a] returns whether the timestamp [a] happens after [than] - *) - -val diff : t -> t -> Ptime.Span.t -(** [diff a b] returns the span of time corresponding to [a - b] *) - -(** Deprecated bindings, these will be removed in a future release: *) - -val eq : t -> t -> bool -[@@deprecated "Use Date.equal"] -(** [eq a b] returns whether [a] and [b] are equal *) - -val rfc822_to_string : t -> string -[@@deprecated "Use Date.to_rfc822"] -(** Same as {!to_rfc822} *) - -val rfc822_of_float : float -> t -[@@deprecated "Use Date.of_unix_time"] -(** Same as {!of_unix_time} *) - -val of_float : float -> t -[@@deprecated "Use Date.of_unix_time"] -(** Same as {!of_unix_time} *) - -val to_float : t -> float -[@@deprecated "Use Date.to_unix_time"] -(** Same as {!to_unix_time} *) - -val to_string : t -> string -[@@deprecated "Use Date.to_rfc3339"] -(** Same as {!to_rfc3339} *) - -val of_string : string -> t -[@@deprecated "Use Date.of_iso8601"] -(** Same as {!of_iso8601} *) - -val never : t [@@deprecated "Use Date.epoch"] -(** Same as {!epoch} *) - -(** Deprecated alias for {!t} *) -type iso8601 = t [@@deprecated "Use Date.t"] - -(** Deprecated alias for {!t} *) -type rfc822 = t [@@deprecated "Use Date.t"] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune deleted file mode 100644 index 8566d86e12c..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune +++ /dev/null @@ -1,6 +0,0 @@ -(library - (name xapi_stdext_date) - (public_name xapi-stdext-date) - (modules :standard) - (libraries clock ptime) -) diff --git a/xapi-stdext-date.opam b/xapi-stdext-date.opam deleted file mode 100644 index 06021447900..00000000000 --- a/xapi-stdext-date.opam +++ /dev/null @@ -1,29 +0,0 @@ -# This file is generated by dune, edit dune-project instead -opam-version: "2.0" -synopsis: "Xapi's standard library extension, Dates" -maintainer: ["Xapi project maintainers"] -authors: ["Jonathan Ludlam"] -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api/issues" -depends: [ - "dune" {>= "3.15"} - "clock" {= version} - "ptime" - "odoc" {with-doc} -] -build: [ - ["dune" "subst"] {dev} - [ - "dune" - "build" - "-p" - name - "-j" - jobs - "@install" - "@runtest" {with-test} - "@doc" {with-doc} - ] -] -dev-repo: "git+https://github.com/xapi-project/xen-api.git" From db38cb035d917e2c3045b9cdf2b419179775f938 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 28 Mar 2025 17:38:55 +0800 Subject: [PATCH 053/492] Fixup Add "Changed" records for 2 APIs which were missed. Fix "param_release" for 3 added parameters. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_host.ml | 12 ++++++++++-- ocaml/idl/datamodel_pool.ml | 14 +++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 81fa9ab0474..c1472c8773a 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1297,14 +1297,22 @@ let create_params = ; param_doc= "The SHA256 checksum of updateinfo of the most recently applied update \ on the host" - ; param_release= numbered_release "24.39.0-next" + ; param_release= numbered_release "24.40.0" ; param_default= Some (VString "") } ] let create = call ~name:"create" ~in_oss_since:None - ~lifecycle:[(Published, rel_rio, "Create a new host record")] + ~lifecycle: + [ + (Published, rel_rio, "Create a new host record") + ; ( Changed + , "24.40.0" + , "Added --last_update_hash option to allow last_update_hash to be \ + kept for host joined a pool" + ) + ] ~versioned_params:create_params ~doc:"Create a new host record" ~result:(Ref _host, "Reference to the newly created host object.") ~hide_from_docs:true ~allowed_roles:_R_POOL_OP () diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index dedcd2f4dc9..0e001036222 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1249,7 +1249,15 @@ let remove_repository = let sync_updates = call ~name:"sync_updates" - ~lifecycle:[(Published, "1.329.0", "")] + ~lifecycle: + [ + (Published, "1.329.0", "") + ; ( Changed + , "25.7.0" + , "Added --username --password options to allow syncing updates from a \ + remote_pool type repository" + ) + ] ~doc:"Sync with the enabled repository" ~versioned_params: [ @@ -1286,14 +1294,14 @@ let sync_updates = param_type= String ; param_name= "username" ; param_doc= "The username of the remote pool" - ; param_release= numbered_release "25.6.0-next" + ; param_release= numbered_release "25.7.0" ; param_default= Some (VString "") } ; { param_type= String ; param_name= "password" ; param_doc= "The password of the remote pool" - ; param_release= numbered_release "25.6.0-next" + ; param_release= numbered_release "25.7.0" ; param_default= Some (VString "") } ] From a875364a9dde4ba82b83e43de366a0833e31be33 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 28 Mar 2025 19:23:14 +0800 Subject: [PATCH 054/492] CP-53711: Copy SSH settings from pool coordinator in pool join During pool join, create a new host obj in the remote pool coordinator DB with the same SSH settings as pool coordinator. Also configure SSH service locally before xapi restart which will persist after xapi restart. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_host.ml | 41 +++++++++++++++++++++++++++++++ ocaml/tests/common/test_common.ml | 7 ++++-- ocaml/tests/test_host.ml | 2 ++ ocaml/xapi/dbsync_slave.ml | 3 ++- ocaml/xapi/xapi_host.ml | 8 +++--- ocaml/xapi/xapi_host.mli | 4 +++ ocaml/xapi/xapi_pool.ml | 35 +++++++++++++++++++++++++- 7 files changed, 92 insertions(+), 8 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index c1472c8773a..737d49f45d9 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1300,6 +1300,41 @@ let create_params = ; param_release= numbered_release "24.40.0" ; param_default= Some (VString "") } + ; { + param_type= Bool + ; param_name= "ssh_enabled" + ; param_doc= "True if SSH access is enabled for the host" + ; param_release= numbered_release "25.14.0-next" + ; param_default= Some (VBool true) + } + ; { + param_type= Int + ; param_name= "ssh_enabled_timeout" + ; param_doc= + "The timeout in seconds after which SSH access will be automatically \ + disabled (0 means never), this setting will be applied every time the \ + SSH is enabled by XAPI" + ; param_release= numbered_release "25.14.0-next" + ; param_default= Some (VInt 0L) + } + ; { + param_type= DateTime + ; param_name= "ssh_expiry" + ; param_doc= + "The time in UTC after which the SSH access will be automatically \ + disabled" + ; param_release= numbered_release "25.14.0-next" + ; param_default= Some (VDateTime Date.epoch) + } + ; { + param_type= Int + ; param_name= "console_idle_timeout" + ; param_doc= + "The timeout in seconds after which idle console will be automatically \ + terminated (0 means never)" + ; param_release= numbered_release "25.14.0-next" + ; param_default= Some (VInt 0L) + } ] let create = @@ -1312,6 +1347,12 @@ let create = , "Added --last_update_hash option to allow last_update_hash to be \ kept for host joined a pool" ) + ; ( Changed + , "25.14.0-next" + , "Added --ssh_enabled --ssh_enabled_timeout --ssh_expiry \ + --console_idle_timeout options to allow them to be configured for \ + new host" + ) ] ~versioned_params:create_params ~doc:"Create a new host record" ~result:(Ref _host, "Reference to the newly created host object.") diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index feab7169f5a..d59e1134411 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -170,13 +170,16 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) ?(chipset_info = []) ?(ssl_legacy = false) - ?(last_software_update = Date.epoch) ?(last_update_hash = "") () = + ?(last_software_update = Date.epoch) ?(last_update_hash = "") + ?(ssh_enabled = true) ?(ssh_enabled_timeout = 0L) ?(ssh_expiry = Date.epoch) + ?(console_idle_timeout = 0L) () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy ~last_software_update - ~last_update_hash + ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry + ~console_idle_timeout in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index edca58ac032..03f526d08d0 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -24,6 +24,8 @@ let add_host __context name = ~license_params:[] ~edition:"" ~license_server:[] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false ~last_software_update:Clock.Date.epoch ~last_update_hash:"" + ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch + ~console_idle_timeout:0L ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 366990e2692..fc9609db638 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -59,7 +59,8 @@ let create_localhost ~__context info = ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[("address", "localhost"); ("port", "27000")] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Date.epoch ~last_update_hash:"" + ~last_software_update:Date.epoch ~last_update_hash:"" ~ssh_enabled:true + ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch ~console_idle_timeout:0L in () diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index cfc73f80b2d..dfa0013d7ac 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -978,7 +978,8 @@ let is_host_alive ~__context ~host = let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info - ~ssl_legacy:_ ~last_software_update ~last_update_hash = + ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1042,9 +1043,8 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] ~tls_verification_enabled ~last_software_update ~last_update_hash ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown - ~pending_guidances_recommended:[] ~pending_guidances_full:[] - ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch - ~console_idle_timeout:0L ; + ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~ssh_enabled + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ; (* If the host we're creating is us, make sure its set to live *) Db.Host_metrics.set_last_updated ~__context ~self:metrics ~value:(Date.now ()) ; Db.Host_metrics.set_live ~__context ~self:metrics ~value:host_is_us ; diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 34b0d74ce3c..b041722fac9 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -130,6 +130,10 @@ val create : -> ssl_legacy:bool -> last_software_update:API.datetime -> last_update_hash:string + -> ssh_enabled:bool + -> ssh_enabled_timeout:int64 + -> ssh_expiry:API.datetime + -> console_idle_timeout:int64 -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 434ab3b9dc5..832ae0df272 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -943,6 +943,38 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : create_or_get_sr_on_master __context rpc session_id (my_local_cache_sr, my_local_cache_sr_rec) in + let remote_coordinator = get_master ~rpc ~session_id in + let ssh_enabled = + Client.Host.get_ssh_enabled ~rpc ~session_id ~self:remote_coordinator + in + let ssh_enabled_timeout = + Client.Host.get_ssh_enabled_timeout ~rpc ~session_id + ~self:remote_coordinator + in + let console_idle_timeout = + Client.Host.get_console_idle_timeout ~rpc ~session_id + ~self:remote_coordinator + in + (* Configure SSH service on local host *) + Xapi_host.set_console_idle_timeout ~__context ~self:host_ref + ~value:console_idle_timeout ; + Xapi_host.set_ssh_enabled_timeout ~__context ~self:host_ref + ~value:ssh_enabled_timeout ; + ( match ssh_enabled with + | true -> + Xapi_host.enable_ssh ~__context ~self:host_ref + | false -> + Xapi_host.disable_ssh ~__context ~self:host_ref + ) ; + (* As ssh_expiry will be updated by host.enable_ssh and host.disable_ssh, + there is a corner case when the joiner's SSH state will not match SSH + service state in its new coordinator exactly: if the joiner joins when + SSH service has been enabled in the new coordinator, while not timed + out yet, the joiner will start SSH service with timeout + host.ssh_enabled_timeout, which means SSH service in the joiner will + be disabled later than in the new coordinator. *) + let ssh_expiry = Db.Host.get_ssh_expiry ~__context ~self:host_ref in + debug "Creating host object on master" ; let ref = Client.Host.create ~rpc ~session_id ~uuid:my_uuid @@ -962,7 +994,8 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~local_cache_sr ~chipset_info:host.API.host_chipset_info ~ssl_legacy:false ~last_software_update:host.API.host_last_software_update - ~last_update_hash:host.API.host_last_update_hash + ~last_update_hash:host.API.host_last_update_hash ~ssh_enabled + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout in (* Copy other-config into newly created host record: *) no_exn From caea30d7dfb878204ded95b6cb476057aaba2005 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Thu, 20 Mar 2025 11:31:00 +0000 Subject: [PATCH 055/492] CP-50836: Add VM_migrate_downtime and request_shutdown spans This simplifies making optimisations as it removes the need to look in the logs for these events. Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/xenops_server.ml | 1 + ocaml/xenopsd/xc/xenops_server_xen.ml | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 350227aa028..4baf67bcd56 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -2988,6 +2988,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ] ) t ; + with_tracing ~task:t ~name:"VM_migrate_downtime_end" Fun.id ; Handshake.send s Handshake.Success ; debug "VM.receive_memory: Synchronisation point 4" with e -> diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index db54f18293d..8d8ff30181d 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2515,6 +2515,7 @@ module VM = struct @@ fun () -> pre_suspend_callback task ) ; + with_tracing ~task ~name:"VM_save_request_shutdown" @@ fun () -> if not ( with_tracing ~task @@ -2523,6 +2524,13 @@ module VM = struct ) then raise (Xenopsd_error Failed_to_acknowledge_suspend_request) ; + (* If this is for a migration, record the begin time *) + ( match data with + | FD _ -> + with_tracing ~task ~name:"VM_migrate_downtime_begin" Fun.id + | _ -> + () + ) ; if not ( with_tracing ~task From 8442197513e1d70a97e5a63c482d2772f79da2ad Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 2 Apr 2025 14:55:53 +0100 Subject: [PATCH 056/492] opam: move all opam files to the opam subdir This cleans up the root folder Signed-off-by: Pau Ruiz Safont --- dune-project | 1 + clock.opam => opam/clock.opam | 0 cohttp-posix.opam => opam/cohttp-posix.opam | 0 cohttp-posix.opam.template => opam/cohttp-posix.opam.template | 0 ezxenstore.opam => opam/ezxenstore.opam | 0 ezxenstore.opam.template => opam/ezxenstore.opam.template | 0 forkexec.opam => opam/forkexec.opam | 0 gzip.opam => opam/gzip.opam | 0 gzip.opam.template => opam/gzip.opam.template | 0 http-lib.opam => opam/http-lib.opam | 0 message-switch-cli.opam => opam/message-switch-cli.opam | 0 .../message-switch-cli.opam.template | 0 message-switch-core.opam => opam/message-switch-core.opam | 0 message-switch-lwt.opam => opam/message-switch-lwt.opam | 0 .../message-switch-lwt.opam.template | 0 message-switch-unix.opam => opam/message-switch-unix.opam | 0 message-switch.opam => opam/message-switch.opam | 0 .../message-switch.opam.template | 0 pciutil.opam => opam/pciutil.opam | 0 pciutil.opam.template => opam/pciutil.opam.template | 0 rrd-transport.opam => opam/rrd-transport.opam | 0 rrdd-plugin.opam => opam/rrdd-plugin.opam | 0 safe-resources.opam => opam/safe-resources.opam | 0 .../safe-resources.opam.template | 0 sexpr.opam => opam/sexpr.opam | 0 sexpr.opam.template => opam/sexpr.opam.template | 0 stunnel.opam => opam/stunnel.opam | 0 tgroup.opam => opam/tgroup.opam | 0 uuid.opam => opam/uuid.opam | 0 uuid.opam.template => opam/uuid.opam.template | 0 varstored-guard.opam => opam/varstored-guard.opam | 0 .../varstored-guard.opam.template | 0 vhd-format-lwt.opam => opam/vhd-format-lwt.opam | 0 .../vhd-format-lwt.opam.template | 0 vhd-format.opam => opam/vhd-format.opam | 0 vhd-format.opam.template => opam/vhd-format.opam.template | 0 vhd-tool.opam => opam/vhd-tool.opam | 0 xapi-cli-protocol.opam => opam/xapi-cli-protocol.opam | 0 .../xapi-cli-protocol.opam.template | 0 xapi-client.opam => opam/xapi-client.opam | 0 xapi-client.opam.template => opam/xapi-client.opam.template | 0 xapi-compression.opam => opam/xapi-compression.opam | 0 .../xapi-compression.opam.template | 0 xapi-consts.opam => opam/xapi-consts.opam | 0 xapi-consts.opam.template => opam/xapi-consts.opam.template | 0 xapi-datamodel.opam => opam/xapi-datamodel.opam | 0 .../xapi-datamodel.opam.template | 0 xapi-debug.opam => opam/xapi-debug.opam | 0 xapi-expiry-alerts.opam => opam/xapi-expiry-alerts.opam | 0 .../xapi-expiry-alerts.opam.template | 0 xapi-forkexecd.opam => opam/xapi-forkexecd.opam | 0 xapi-idl.opam => opam/xapi-idl.opam | 0 xapi-idl.opam.template => opam/xapi-idl.opam.template | 0 xapi-inventory.opam => opam/xapi-inventory.opam | 0 .../xapi-inventory.opam.template | 0 xapi-log.opam => opam/xapi-log.opam | 0 xapi-log.opam.template => opam/xapi-log.opam.template | 0 xapi-nbd.opam => opam/xapi-nbd.opam | 0 xapi-nbd.opam.template => opam/xapi-nbd.opam.template | 0 xapi-open-uri.opam => opam/xapi-open-uri.opam | 0 xapi-open-uri.opam.template => opam/xapi-open-uri.opam.template | 0 xapi-rrd.opam => opam/xapi-rrd.opam | 0 xapi-rrd.opam.template => opam/xapi-rrd.opam.template | 0 xapi-schema.opam => opam/xapi-schema.opam | 0 xapi-schema.opam.template => opam/xapi-schema.opam.template | 0 xapi-sdk.opam => opam/xapi-sdk.opam | 0 xapi-stdext-encodings.opam => opam/xapi-stdext-encodings.opam | 0 .../xapi-stdext-encodings.opam.template | 0 xapi-stdext-pervasives.opam => opam/xapi-stdext-pervasives.opam | 0 xapi-stdext-std.opam => opam/xapi-stdext-std.opam | 0 xapi-stdext-threads.opam => opam/xapi-stdext-threads.opam | 0 xapi-stdext-unix.opam => opam/xapi-stdext-unix.opam | 0 .../xapi-stdext-unix.opam.template | 0 xapi-stdext-zerocheck.opam => opam/xapi-stdext-zerocheck.opam | 0 xapi-storage-cli.opam => opam/xapi-storage-cli.opam | 0 .../xapi-storage-cli.opam.template | 0 xapi-storage-script.opam => opam/xapi-storage-script.opam | 0 .../xapi-storage-script.opam.template | 0 xapi-storage.opam => opam/xapi-storage.opam | 0 xapi-storage.opam.template => opam/xapi-storage.opam.template | 0 xapi-tools.opam => opam/xapi-tools.opam | 0 xapi-tools.opam.template => opam/xapi-tools.opam.template | 0 xapi-tracing-export.opam => opam/xapi-tracing-export.opam | 0 .../xapi-tracing-export.opam.template | 0 xapi-tracing.opam => opam/xapi-tracing.opam | 0 xapi-tracing.opam.template => opam/xapi-tracing.opam.template | 0 xapi-types.opam => opam/xapi-types.opam | 0 xapi-types.opam.template => opam/xapi-types.opam.template | 0 xapi.opam => opam/xapi.opam | 0 xapi.opam.template => opam/xapi.opam.template | 0 xe.opam => opam/xe.opam | 0 xe.opam.template => opam/xe.opam.template | 0 xen-api-client-lwt.opam => opam/xen-api-client-lwt.opam | 0 .../xen-api-client-lwt.opam.template | 0 xen-api-client.opam => opam/xen-api-client.opam | 0 xml-light2.opam => opam/xml-light2.opam | 0 xml-light2.opam.template => opam/xml-light2.opam.template | 0 zstd.opam => opam/zstd.opam | 0 zstd.opam.template => opam/zstd.opam.template | 0 99 files changed, 1 insertion(+) rename clock.opam => opam/clock.opam (100%) rename cohttp-posix.opam => opam/cohttp-posix.opam (100%) rename cohttp-posix.opam.template => opam/cohttp-posix.opam.template (100%) rename ezxenstore.opam => opam/ezxenstore.opam (100%) rename ezxenstore.opam.template => opam/ezxenstore.opam.template (100%) rename forkexec.opam => opam/forkexec.opam (100%) rename gzip.opam => opam/gzip.opam (100%) rename gzip.opam.template => opam/gzip.opam.template (100%) rename http-lib.opam => opam/http-lib.opam (100%) rename message-switch-cli.opam => opam/message-switch-cli.opam (100%) rename message-switch-cli.opam.template => opam/message-switch-cli.opam.template (100%) rename message-switch-core.opam => opam/message-switch-core.opam (100%) rename message-switch-lwt.opam => opam/message-switch-lwt.opam (100%) rename message-switch-lwt.opam.template => opam/message-switch-lwt.opam.template (100%) rename message-switch-unix.opam => opam/message-switch-unix.opam (100%) rename message-switch.opam => opam/message-switch.opam (100%) rename message-switch.opam.template => opam/message-switch.opam.template (100%) rename pciutil.opam => opam/pciutil.opam (100%) rename pciutil.opam.template => opam/pciutil.opam.template (100%) rename rrd-transport.opam => opam/rrd-transport.opam (100%) rename rrdd-plugin.opam => opam/rrdd-plugin.opam (100%) rename safe-resources.opam => opam/safe-resources.opam (100%) rename safe-resources.opam.template => opam/safe-resources.opam.template (100%) rename sexpr.opam => opam/sexpr.opam (100%) rename sexpr.opam.template => opam/sexpr.opam.template (100%) rename stunnel.opam => opam/stunnel.opam (100%) rename tgroup.opam => opam/tgroup.opam (100%) rename uuid.opam => opam/uuid.opam (100%) rename uuid.opam.template => opam/uuid.opam.template (100%) rename varstored-guard.opam => opam/varstored-guard.opam (100%) rename varstored-guard.opam.template => opam/varstored-guard.opam.template (100%) rename vhd-format-lwt.opam => opam/vhd-format-lwt.opam (100%) rename vhd-format-lwt.opam.template => opam/vhd-format-lwt.opam.template (100%) rename vhd-format.opam => opam/vhd-format.opam (100%) rename vhd-format.opam.template => opam/vhd-format.opam.template (100%) rename vhd-tool.opam => opam/vhd-tool.opam (100%) rename xapi-cli-protocol.opam => opam/xapi-cli-protocol.opam (100%) rename xapi-cli-protocol.opam.template => opam/xapi-cli-protocol.opam.template (100%) rename xapi-client.opam => opam/xapi-client.opam (100%) rename xapi-client.opam.template => opam/xapi-client.opam.template (100%) rename xapi-compression.opam => opam/xapi-compression.opam (100%) rename xapi-compression.opam.template => opam/xapi-compression.opam.template (100%) rename xapi-consts.opam => opam/xapi-consts.opam (100%) rename xapi-consts.opam.template => opam/xapi-consts.opam.template (100%) rename xapi-datamodel.opam => opam/xapi-datamodel.opam (100%) rename xapi-datamodel.opam.template => opam/xapi-datamodel.opam.template (100%) rename xapi-debug.opam => opam/xapi-debug.opam (100%) rename xapi-expiry-alerts.opam => opam/xapi-expiry-alerts.opam (100%) rename xapi-expiry-alerts.opam.template => opam/xapi-expiry-alerts.opam.template (100%) rename xapi-forkexecd.opam => opam/xapi-forkexecd.opam (100%) rename xapi-idl.opam => opam/xapi-idl.opam (100%) rename xapi-idl.opam.template => opam/xapi-idl.opam.template (100%) rename xapi-inventory.opam => opam/xapi-inventory.opam (100%) rename xapi-inventory.opam.template => opam/xapi-inventory.opam.template (100%) rename xapi-log.opam => opam/xapi-log.opam (100%) rename xapi-log.opam.template => opam/xapi-log.opam.template (100%) rename xapi-nbd.opam => opam/xapi-nbd.opam (100%) rename xapi-nbd.opam.template => opam/xapi-nbd.opam.template (100%) rename xapi-open-uri.opam => opam/xapi-open-uri.opam (100%) rename xapi-open-uri.opam.template => opam/xapi-open-uri.opam.template (100%) rename xapi-rrd.opam => opam/xapi-rrd.opam (100%) rename xapi-rrd.opam.template => opam/xapi-rrd.opam.template (100%) rename xapi-schema.opam => opam/xapi-schema.opam (100%) rename xapi-schema.opam.template => opam/xapi-schema.opam.template (100%) rename xapi-sdk.opam => opam/xapi-sdk.opam (100%) rename xapi-stdext-encodings.opam => opam/xapi-stdext-encodings.opam (100%) rename xapi-stdext-encodings.opam.template => opam/xapi-stdext-encodings.opam.template (100%) rename xapi-stdext-pervasives.opam => opam/xapi-stdext-pervasives.opam (100%) rename xapi-stdext-std.opam => opam/xapi-stdext-std.opam (100%) rename xapi-stdext-threads.opam => opam/xapi-stdext-threads.opam (100%) rename xapi-stdext-unix.opam => opam/xapi-stdext-unix.opam (100%) rename xapi-stdext-unix.opam.template => opam/xapi-stdext-unix.opam.template (100%) rename xapi-stdext-zerocheck.opam => opam/xapi-stdext-zerocheck.opam (100%) rename xapi-storage-cli.opam => opam/xapi-storage-cli.opam (100%) rename xapi-storage-cli.opam.template => opam/xapi-storage-cli.opam.template (100%) rename xapi-storage-script.opam => opam/xapi-storage-script.opam (100%) rename xapi-storage-script.opam.template => opam/xapi-storage-script.opam.template (100%) rename xapi-storage.opam => opam/xapi-storage.opam (100%) rename xapi-storage.opam.template => opam/xapi-storage.opam.template (100%) rename xapi-tools.opam => opam/xapi-tools.opam (100%) rename xapi-tools.opam.template => opam/xapi-tools.opam.template (100%) rename xapi-tracing-export.opam => opam/xapi-tracing-export.opam (100%) rename xapi-tracing-export.opam.template => opam/xapi-tracing-export.opam.template (100%) rename xapi-tracing.opam => opam/xapi-tracing.opam (100%) rename xapi-tracing.opam.template => opam/xapi-tracing.opam.template (100%) rename xapi-types.opam => opam/xapi-types.opam (100%) rename xapi-types.opam.template => opam/xapi-types.opam.template (100%) rename xapi.opam => opam/xapi.opam (100%) rename xapi.opam.template => opam/xapi.opam.template (100%) rename xe.opam => opam/xe.opam (100%) rename xe.opam.template => opam/xe.opam.template (100%) rename xen-api-client-lwt.opam => opam/xen-api-client-lwt.opam (100%) rename xen-api-client-lwt.opam.template => opam/xen-api-client-lwt.opam.template (100%) rename xen-api-client.opam => opam/xen-api-client.opam (100%) rename xml-light2.opam => opam/xml-light2.opam (100%) rename xml-light2.opam.template => opam/xml-light2.opam.template (100%) rename zstd.opam => opam/zstd.opam (100%) rename zstd.opam.template => opam/zstd.opam.template (100%) diff --git a/dune-project b/dune-project index a3d6651fc45..1de533b179d 100644 --- a/dune-project +++ b/dune-project @@ -3,6 +3,7 @@ (formatting (enabled_for ocaml)) (using menhir 2.0) (using directory-targets 0.1) +(opam_file_location inside_opam_directory) (cram enable) (implicit_transitive_deps false) diff --git a/clock.opam b/opam/clock.opam similarity index 100% rename from clock.opam rename to opam/clock.opam diff --git a/cohttp-posix.opam b/opam/cohttp-posix.opam similarity index 100% rename from cohttp-posix.opam rename to opam/cohttp-posix.opam diff --git a/cohttp-posix.opam.template b/opam/cohttp-posix.opam.template similarity index 100% rename from cohttp-posix.opam.template rename to opam/cohttp-posix.opam.template diff --git a/ezxenstore.opam b/opam/ezxenstore.opam similarity index 100% rename from ezxenstore.opam rename to opam/ezxenstore.opam diff --git a/ezxenstore.opam.template b/opam/ezxenstore.opam.template similarity index 100% rename from ezxenstore.opam.template rename to opam/ezxenstore.opam.template diff --git a/forkexec.opam b/opam/forkexec.opam similarity index 100% rename from forkexec.opam rename to opam/forkexec.opam diff --git a/gzip.opam b/opam/gzip.opam similarity index 100% rename from gzip.opam rename to opam/gzip.opam diff --git a/gzip.opam.template b/opam/gzip.opam.template similarity index 100% rename from gzip.opam.template rename to opam/gzip.opam.template diff --git a/http-lib.opam b/opam/http-lib.opam similarity index 100% rename from http-lib.opam rename to opam/http-lib.opam diff --git a/message-switch-cli.opam b/opam/message-switch-cli.opam similarity index 100% rename from message-switch-cli.opam rename to opam/message-switch-cli.opam diff --git a/message-switch-cli.opam.template b/opam/message-switch-cli.opam.template similarity index 100% rename from message-switch-cli.opam.template rename to opam/message-switch-cli.opam.template diff --git a/message-switch-core.opam b/opam/message-switch-core.opam similarity index 100% rename from message-switch-core.opam rename to opam/message-switch-core.opam diff --git a/message-switch-lwt.opam b/opam/message-switch-lwt.opam similarity index 100% rename from message-switch-lwt.opam rename to opam/message-switch-lwt.opam diff --git a/message-switch-lwt.opam.template b/opam/message-switch-lwt.opam.template similarity index 100% rename from message-switch-lwt.opam.template rename to opam/message-switch-lwt.opam.template diff --git a/message-switch-unix.opam b/opam/message-switch-unix.opam similarity index 100% rename from message-switch-unix.opam rename to opam/message-switch-unix.opam diff --git a/message-switch.opam b/opam/message-switch.opam similarity index 100% rename from message-switch.opam rename to opam/message-switch.opam diff --git a/message-switch.opam.template b/opam/message-switch.opam.template similarity index 100% rename from message-switch.opam.template rename to opam/message-switch.opam.template diff --git a/pciutil.opam b/opam/pciutil.opam similarity index 100% rename from pciutil.opam rename to opam/pciutil.opam diff --git a/pciutil.opam.template b/opam/pciutil.opam.template similarity index 100% rename from pciutil.opam.template rename to opam/pciutil.opam.template diff --git a/rrd-transport.opam b/opam/rrd-transport.opam similarity index 100% rename from rrd-transport.opam rename to opam/rrd-transport.opam diff --git a/rrdd-plugin.opam b/opam/rrdd-plugin.opam similarity index 100% rename from rrdd-plugin.opam rename to opam/rrdd-plugin.opam diff --git a/safe-resources.opam b/opam/safe-resources.opam similarity index 100% rename from safe-resources.opam rename to opam/safe-resources.opam diff --git a/safe-resources.opam.template b/opam/safe-resources.opam.template similarity index 100% rename from safe-resources.opam.template rename to opam/safe-resources.opam.template diff --git a/sexpr.opam b/opam/sexpr.opam similarity index 100% rename from sexpr.opam rename to opam/sexpr.opam diff --git a/sexpr.opam.template b/opam/sexpr.opam.template similarity index 100% rename from sexpr.opam.template rename to opam/sexpr.opam.template diff --git a/stunnel.opam b/opam/stunnel.opam similarity index 100% rename from stunnel.opam rename to opam/stunnel.opam diff --git a/tgroup.opam b/opam/tgroup.opam similarity index 100% rename from tgroup.opam rename to opam/tgroup.opam diff --git a/uuid.opam b/opam/uuid.opam similarity index 100% rename from uuid.opam rename to opam/uuid.opam diff --git a/uuid.opam.template b/opam/uuid.opam.template similarity index 100% rename from uuid.opam.template rename to opam/uuid.opam.template diff --git a/varstored-guard.opam b/opam/varstored-guard.opam similarity index 100% rename from varstored-guard.opam rename to opam/varstored-guard.opam diff --git a/varstored-guard.opam.template b/opam/varstored-guard.opam.template similarity index 100% rename from varstored-guard.opam.template rename to opam/varstored-guard.opam.template diff --git a/vhd-format-lwt.opam b/opam/vhd-format-lwt.opam similarity index 100% rename from vhd-format-lwt.opam rename to opam/vhd-format-lwt.opam diff --git a/vhd-format-lwt.opam.template b/opam/vhd-format-lwt.opam.template similarity index 100% rename from vhd-format-lwt.opam.template rename to opam/vhd-format-lwt.opam.template diff --git a/vhd-format.opam b/opam/vhd-format.opam similarity index 100% rename from vhd-format.opam rename to opam/vhd-format.opam diff --git a/vhd-format.opam.template b/opam/vhd-format.opam.template similarity index 100% rename from vhd-format.opam.template rename to opam/vhd-format.opam.template diff --git a/vhd-tool.opam b/opam/vhd-tool.opam similarity index 100% rename from vhd-tool.opam rename to opam/vhd-tool.opam diff --git a/xapi-cli-protocol.opam b/opam/xapi-cli-protocol.opam similarity index 100% rename from xapi-cli-protocol.opam rename to opam/xapi-cli-protocol.opam diff --git a/xapi-cli-protocol.opam.template b/opam/xapi-cli-protocol.opam.template similarity index 100% rename from xapi-cli-protocol.opam.template rename to opam/xapi-cli-protocol.opam.template diff --git a/xapi-client.opam b/opam/xapi-client.opam similarity index 100% rename from xapi-client.opam rename to opam/xapi-client.opam diff --git a/xapi-client.opam.template b/opam/xapi-client.opam.template similarity index 100% rename from xapi-client.opam.template rename to opam/xapi-client.opam.template diff --git a/xapi-compression.opam b/opam/xapi-compression.opam similarity index 100% rename from xapi-compression.opam rename to opam/xapi-compression.opam diff --git a/xapi-compression.opam.template b/opam/xapi-compression.opam.template similarity index 100% rename from xapi-compression.opam.template rename to opam/xapi-compression.opam.template diff --git a/xapi-consts.opam b/opam/xapi-consts.opam similarity index 100% rename from xapi-consts.opam rename to opam/xapi-consts.opam diff --git a/xapi-consts.opam.template b/opam/xapi-consts.opam.template similarity index 100% rename from xapi-consts.opam.template rename to opam/xapi-consts.opam.template diff --git a/xapi-datamodel.opam b/opam/xapi-datamodel.opam similarity index 100% rename from xapi-datamodel.opam rename to opam/xapi-datamodel.opam diff --git a/xapi-datamodel.opam.template b/opam/xapi-datamodel.opam.template similarity index 100% rename from xapi-datamodel.opam.template rename to opam/xapi-datamodel.opam.template diff --git a/xapi-debug.opam b/opam/xapi-debug.opam similarity index 100% rename from xapi-debug.opam rename to opam/xapi-debug.opam diff --git a/xapi-expiry-alerts.opam b/opam/xapi-expiry-alerts.opam similarity index 100% rename from xapi-expiry-alerts.opam rename to opam/xapi-expiry-alerts.opam diff --git a/xapi-expiry-alerts.opam.template b/opam/xapi-expiry-alerts.opam.template similarity index 100% rename from xapi-expiry-alerts.opam.template rename to opam/xapi-expiry-alerts.opam.template diff --git a/xapi-forkexecd.opam b/opam/xapi-forkexecd.opam similarity index 100% rename from xapi-forkexecd.opam rename to opam/xapi-forkexecd.opam diff --git a/xapi-idl.opam b/opam/xapi-idl.opam similarity index 100% rename from xapi-idl.opam rename to opam/xapi-idl.opam diff --git a/xapi-idl.opam.template b/opam/xapi-idl.opam.template similarity index 100% rename from xapi-idl.opam.template rename to opam/xapi-idl.opam.template diff --git a/xapi-inventory.opam b/opam/xapi-inventory.opam similarity index 100% rename from xapi-inventory.opam rename to opam/xapi-inventory.opam diff --git a/xapi-inventory.opam.template b/opam/xapi-inventory.opam.template similarity index 100% rename from xapi-inventory.opam.template rename to opam/xapi-inventory.opam.template diff --git a/xapi-log.opam b/opam/xapi-log.opam similarity index 100% rename from xapi-log.opam rename to opam/xapi-log.opam diff --git a/xapi-log.opam.template b/opam/xapi-log.opam.template similarity index 100% rename from xapi-log.opam.template rename to opam/xapi-log.opam.template diff --git a/xapi-nbd.opam b/opam/xapi-nbd.opam similarity index 100% rename from xapi-nbd.opam rename to opam/xapi-nbd.opam diff --git a/xapi-nbd.opam.template b/opam/xapi-nbd.opam.template similarity index 100% rename from xapi-nbd.opam.template rename to opam/xapi-nbd.opam.template diff --git a/xapi-open-uri.opam b/opam/xapi-open-uri.opam similarity index 100% rename from xapi-open-uri.opam rename to opam/xapi-open-uri.opam diff --git a/xapi-open-uri.opam.template b/opam/xapi-open-uri.opam.template similarity index 100% rename from xapi-open-uri.opam.template rename to opam/xapi-open-uri.opam.template diff --git a/xapi-rrd.opam b/opam/xapi-rrd.opam similarity index 100% rename from xapi-rrd.opam rename to opam/xapi-rrd.opam diff --git a/xapi-rrd.opam.template b/opam/xapi-rrd.opam.template similarity index 100% rename from xapi-rrd.opam.template rename to opam/xapi-rrd.opam.template diff --git a/xapi-schema.opam b/opam/xapi-schema.opam similarity index 100% rename from xapi-schema.opam rename to opam/xapi-schema.opam diff --git a/xapi-schema.opam.template b/opam/xapi-schema.opam.template similarity index 100% rename from xapi-schema.opam.template rename to opam/xapi-schema.opam.template diff --git a/xapi-sdk.opam b/opam/xapi-sdk.opam similarity index 100% rename from xapi-sdk.opam rename to opam/xapi-sdk.opam diff --git a/xapi-stdext-encodings.opam b/opam/xapi-stdext-encodings.opam similarity index 100% rename from xapi-stdext-encodings.opam rename to opam/xapi-stdext-encodings.opam diff --git a/xapi-stdext-encodings.opam.template b/opam/xapi-stdext-encodings.opam.template similarity index 100% rename from xapi-stdext-encodings.opam.template rename to opam/xapi-stdext-encodings.opam.template diff --git a/xapi-stdext-pervasives.opam b/opam/xapi-stdext-pervasives.opam similarity index 100% rename from xapi-stdext-pervasives.opam rename to opam/xapi-stdext-pervasives.opam diff --git a/xapi-stdext-std.opam b/opam/xapi-stdext-std.opam similarity index 100% rename from xapi-stdext-std.opam rename to opam/xapi-stdext-std.opam diff --git a/xapi-stdext-threads.opam b/opam/xapi-stdext-threads.opam similarity index 100% rename from xapi-stdext-threads.opam rename to opam/xapi-stdext-threads.opam diff --git a/xapi-stdext-unix.opam b/opam/xapi-stdext-unix.opam similarity index 100% rename from xapi-stdext-unix.opam rename to opam/xapi-stdext-unix.opam diff --git a/xapi-stdext-unix.opam.template b/opam/xapi-stdext-unix.opam.template similarity index 100% rename from xapi-stdext-unix.opam.template rename to opam/xapi-stdext-unix.opam.template diff --git a/xapi-stdext-zerocheck.opam b/opam/xapi-stdext-zerocheck.opam similarity index 100% rename from xapi-stdext-zerocheck.opam rename to opam/xapi-stdext-zerocheck.opam diff --git a/xapi-storage-cli.opam b/opam/xapi-storage-cli.opam similarity index 100% rename from xapi-storage-cli.opam rename to opam/xapi-storage-cli.opam diff --git a/xapi-storage-cli.opam.template b/opam/xapi-storage-cli.opam.template similarity index 100% rename from xapi-storage-cli.opam.template rename to opam/xapi-storage-cli.opam.template diff --git a/xapi-storage-script.opam b/opam/xapi-storage-script.opam similarity index 100% rename from xapi-storage-script.opam rename to opam/xapi-storage-script.opam diff --git a/xapi-storage-script.opam.template b/opam/xapi-storage-script.opam.template similarity index 100% rename from xapi-storage-script.opam.template rename to opam/xapi-storage-script.opam.template diff --git a/xapi-storage.opam b/opam/xapi-storage.opam similarity index 100% rename from xapi-storage.opam rename to opam/xapi-storage.opam diff --git a/xapi-storage.opam.template b/opam/xapi-storage.opam.template similarity index 100% rename from xapi-storage.opam.template rename to opam/xapi-storage.opam.template diff --git a/xapi-tools.opam b/opam/xapi-tools.opam similarity index 100% rename from xapi-tools.opam rename to opam/xapi-tools.opam diff --git a/xapi-tools.opam.template b/opam/xapi-tools.opam.template similarity index 100% rename from xapi-tools.opam.template rename to opam/xapi-tools.opam.template diff --git a/xapi-tracing-export.opam b/opam/xapi-tracing-export.opam similarity index 100% rename from xapi-tracing-export.opam rename to opam/xapi-tracing-export.opam diff --git a/xapi-tracing-export.opam.template b/opam/xapi-tracing-export.opam.template similarity index 100% rename from xapi-tracing-export.opam.template rename to opam/xapi-tracing-export.opam.template diff --git a/xapi-tracing.opam b/opam/xapi-tracing.opam similarity index 100% rename from xapi-tracing.opam rename to opam/xapi-tracing.opam diff --git a/xapi-tracing.opam.template b/opam/xapi-tracing.opam.template similarity index 100% rename from xapi-tracing.opam.template rename to opam/xapi-tracing.opam.template diff --git a/xapi-types.opam b/opam/xapi-types.opam similarity index 100% rename from xapi-types.opam rename to opam/xapi-types.opam diff --git a/xapi-types.opam.template b/opam/xapi-types.opam.template similarity index 100% rename from xapi-types.opam.template rename to opam/xapi-types.opam.template diff --git a/xapi.opam b/opam/xapi.opam similarity index 100% rename from xapi.opam rename to opam/xapi.opam diff --git a/xapi.opam.template b/opam/xapi.opam.template similarity index 100% rename from xapi.opam.template rename to opam/xapi.opam.template diff --git a/xe.opam b/opam/xe.opam similarity index 100% rename from xe.opam rename to opam/xe.opam diff --git a/xe.opam.template b/opam/xe.opam.template similarity index 100% rename from xe.opam.template rename to opam/xe.opam.template diff --git a/xen-api-client-lwt.opam b/opam/xen-api-client-lwt.opam similarity index 100% rename from xen-api-client-lwt.opam rename to opam/xen-api-client-lwt.opam diff --git a/xen-api-client-lwt.opam.template b/opam/xen-api-client-lwt.opam.template similarity index 100% rename from xen-api-client-lwt.opam.template rename to opam/xen-api-client-lwt.opam.template diff --git a/xen-api-client.opam b/opam/xen-api-client.opam similarity index 100% rename from xen-api-client.opam rename to opam/xen-api-client.opam diff --git a/xml-light2.opam b/opam/xml-light2.opam similarity index 100% rename from xml-light2.opam rename to opam/xml-light2.opam diff --git a/xml-light2.opam.template b/opam/xml-light2.opam.template similarity index 100% rename from xml-light2.opam.template rename to opam/xml-light2.opam.template diff --git a/zstd.opam b/opam/zstd.opam similarity index 100% rename from zstd.opam rename to opam/zstd.opam diff --git a/zstd.opam.template b/opam/zstd.opam.template similarity index 100% rename from zstd.opam.template rename to opam/zstd.opam.template From 08fc9cfd11dff35e5f0524007a8df23a2cd0dd50 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 2 Apr 2025 15:17:23 +0100 Subject: [PATCH 057/492] github: update docs workflow to use latest setup-ocaml Signed-off-by: Pau Ruiz Safont --- .github/workflows/docs.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 94c7c1a687e..08d381eeaae 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,15 +31,23 @@ jobs: - name: Update Ubuntu repositories run: sudo apt-get update + # We set DUNE_CACHE_STORAGE_MODE, it is required for dune cache to work inside opam for now, + # otherwise it gets EXDEV and considers it a cache miss - name: Use ocaml - uses: ocaml/setup-ocaml@v2 + uses: ocaml/setup-ocaml@v3 with: ocaml-compiler: ${{ steps.dotenv.outputs.ocaml_version_full }} opam-repositories: | xs-opam: ${{ steps.dotenv.outputs.repository }} + dune-cache: true + opam-pin: false + cache-prefix: v3-${{ steps.system-info.outputs.name }}-${{ steps.system-info.outputs.release }} + env: + DUNE_CACHE_STORAGE_MODE: copy - name: Install dependencies - run: opam pin list --short | xargs opam install --deps-only -v + shell: bash + run: opam install . --deps-only -v - name: Generate xapi-storage docs run: | From 586921f3a2dd278815da0b05510e3b51b27dd848 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 21 Mar 2025 10:51:15 +0000 Subject: [PATCH 058/492] maintenance: replace most 'maybe' functions Many years ago, 'maybe' used to be a synonym for Option.iter, Option.map and other functions. Use Option.x directly to avoid any misunderstnadings Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-client/event_helper.ml | 96 +++++++++++++++++++++---------- ocaml/xenopsd/xc/domain.ml | 6 +- 2 files changed, 68 insertions(+), 34 deletions(-) diff --git a/ocaml/xapi-client/event_helper.ml b/ocaml/xapi-client/event_helper.ml index 3ec6e7f9236..cbbeb978ba2 100644 --- a/ocaml/xapi-client/event_helper.ml +++ b/ocaml/xapi-client/event_helper.ml @@ -43,96 +43,132 @@ type event_record = | VMPP of [`VMPP] Ref.t * API.vMPP_t option | VMSS of [`VMSS] Ref.t * API.vMSS_t option -let maybe f x = match x with Some x -> Some (f x) | None -> None - let record_of_event ev = let rpc = ev.Event_types.snapshot in match ev.Event_types.ty with | "session" -> Session ( Ref.of_secret_string ev.Event_types.reference - , maybe API.session_t_of_rpc rpc + , Option.map API.session_t_of_rpc rpc ) | "task" -> - Task (Ref.of_string ev.Event_types.reference, maybe API.task_t_of_rpc rpc) + Task + ( Ref.of_string ev.Event_types.reference + , Option.map API.task_t_of_rpc rpc + ) | "event" -> Event - (Ref.of_string ev.Event_types.reference, maybe API.event_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.event_t_of_rpc rpc + ) | "vm" -> - VM (Ref.of_string ev.Event_types.reference, maybe API.vM_t_of_rpc rpc) + VM (Ref.of_string ev.Event_types.reference, Option.map API.vM_t_of_rpc rpc) | "vm_metrics" -> VM_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vM_metrics_t_of_rpc rpc + , Option.map API.vM_metrics_t_of_rpc rpc ) | "vm_guest_metrics" -> VM_guest_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vM_guest_metrics_t_of_rpc rpc + , Option.map API.vM_guest_metrics_t_of_rpc rpc ) | "host" -> - Host (Ref.of_string ev.Event_types.reference, maybe API.host_t_of_rpc rpc) + Host + ( Ref.of_string ev.Event_types.reference + , Option.map API.host_t_of_rpc rpc + ) | "host_metrics" -> Host_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.host_metrics_t_of_rpc rpc + , Option.map API.host_metrics_t_of_rpc rpc ) | "host_cpu" -> Host_cpu - (Ref.of_string ev.Event_types.reference, maybe API.host_cpu_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.host_cpu_t_of_rpc rpc + ) | "network" -> Network - (Ref.of_string ev.Event_types.reference, maybe API.network_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.network_t_of_rpc rpc + ) | "vif" -> - VIF (Ref.of_string ev.Event_types.reference, maybe API.vIF_t_of_rpc rpc) + VIF + (Ref.of_string ev.Event_types.reference, Option.map API.vIF_t_of_rpc rpc) | "vif_metrics" -> VIF_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vIF_metrics_t_of_rpc rpc + , Option.map API.vIF_metrics_t_of_rpc rpc ) | "pif" -> - PIF (Ref.of_string ev.Event_types.reference, maybe API.pIF_t_of_rpc rpc) + PIF + (Ref.of_string ev.Event_types.reference, Option.map API.pIF_t_of_rpc rpc) | "pif_metrics" -> PIF_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.pIF_metrics_t_of_rpc rpc + , Option.map API.pIF_metrics_t_of_rpc rpc ) | "sr" -> - SR (Ref.of_string ev.Event_types.reference, maybe API.sR_t_of_rpc rpc) + SR (Ref.of_string ev.Event_types.reference, Option.map API.sR_t_of_rpc rpc) | "vdi" -> - VDI (Ref.of_string ev.Event_types.reference, maybe API.vDI_t_of_rpc rpc) + VDI + (Ref.of_string ev.Event_types.reference, Option.map API.vDI_t_of_rpc rpc) | "vbd" -> - VBD (Ref.of_string ev.Event_types.reference, maybe API.vBD_t_of_rpc rpc) + VBD + (Ref.of_string ev.Event_types.reference, Option.map API.vBD_t_of_rpc rpc) | "vbd_metrics" -> VBD_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vBD_metrics_t_of_rpc rpc + , Option.map API.vBD_metrics_t_of_rpc rpc ) | "pbd" -> - PBD (Ref.of_string ev.Event_types.reference, maybe API.pBD_t_of_rpc rpc) + PBD + (Ref.of_string ev.Event_types.reference, Option.map API.pBD_t_of_rpc rpc) | "crashdump" -> Crashdump ( Ref.of_string ev.Event_types.reference - , maybe API.crashdump_t_of_rpc rpc + , Option.map API.crashdump_t_of_rpc rpc ) | "vtpm" -> - VTPM (Ref.of_string ev.Event_types.reference, maybe API.vTPM_t_of_rpc rpc) + VTPM + ( Ref.of_string ev.Event_types.reference + , Option.map API.vTPM_t_of_rpc rpc + ) | "console" -> Console - (Ref.of_string ev.Event_types.reference, maybe API.console_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.console_t_of_rpc rpc + ) | "user" -> - User (Ref.of_string ev.Event_types.reference, maybe API.user_t_of_rpc rpc) + User + ( Ref.of_string ev.Event_types.reference + , Option.map API.user_t_of_rpc rpc + ) | "pool" -> - Pool (Ref.of_string ev.Event_types.reference, maybe API.pool_t_of_rpc rpc) + Pool + ( Ref.of_string ev.Event_types.reference + , Option.map API.pool_t_of_rpc rpc + ) | "message" -> Message - (Ref.of_string ev.Event_types.reference, maybe API.message_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.message_t_of_rpc rpc + ) | "secret" -> Secret - (Ref.of_string ev.Event_types.reference, maybe API.secret_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.secret_t_of_rpc rpc + ) | "vmpp" -> - VMPP (Ref.of_string ev.Event_types.reference, maybe API.vMPP_t_of_rpc rpc) + VMPP + ( Ref.of_string ev.Event_types.reference + , Option.map API.vMPP_t_of_rpc rpc + ) | "vmss" -> - VMSS (Ref.of_string ev.Event_types.reference, maybe API.vMSS_t_of_rpc rpc) + VMSS + ( Ref.of_string ev.Event_types.reference + , Option.map API.vMSS_t_of_rpc rpc + ) | _ -> failwith "unknown event type" diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 07b1957db8c..a9022f26565 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -204,8 +204,6 @@ let assert_file_is_readable filename = error "Cannot read file %s" filename ; raise (Could_not_read_file filename) -let maybe f = function None -> () | Some x -> f x - (* Recursively iterate over a directory and all its children, calling fn for each *) let rec xenstore_iter t fn path = @@ -931,7 +929,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = error "VM = %s; domid = %d; %s" (Uuidx.to_string uuid) domid err_msg ; raise (Domain_build_pre_failed err_msg) in - maybe + Option.iter (fun mode -> log_reraise (Printf.sprintf "domain_set_timer_mode %d" mode) (fun () -> let xcext = Xenctrlext.get_handle () in @@ -1163,7 +1161,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid Memory.Linux.full_config static_max_mib video_mib target_mib vcpus shadow_multiplier in - maybe assert_file_is_readable pvinfo.ramdisk ; + Option.iter assert_file_is_readable pvinfo.ramdisk ; let store_port, console_port = build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid in From 3bf9f907bf867a04d686dd9b3b684574aedb314c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 4 Mar 2025 16:34:21 +0000 Subject: [PATCH 059/492] numa: add test binary that prints changes in free memory and domain lifetime Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/dune | 18 ++-- ocaml/xenopsd/xc/numa.ml | 176 ++++++++++++++++++++++++++++++++++++++ ocaml/xenopsd/xc/numa.mli | 0 3 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 ocaml/xenopsd/xc/numa.ml create mode 100644 ocaml/xenopsd/xc/numa.mli diff --git a/ocaml/xenopsd/xc/dune b/ocaml/xenopsd/xc/dune index f04f082d086..1bf73af404f 100644 --- a/ocaml/xenopsd/xc/dune +++ b/ocaml/xenopsd/xc/dune @@ -2,6 +2,7 @@ (name xenopsd_xc) (modes best) (modules :standard \ + numa xenops_xc_main memory_breakdown memory_summary @@ -68,13 +69,18 @@ ) (wrapped false) ) + +(executable + (name numa) + (modules numa) + (libraries fmt logs logs.fmt mtime mtime.clock threads.posix xenctrl xenopsd_xc) +) + (executable (name xenops_xc_main) (modes exe) (modules xenops_xc_main) - (libraries - ezxenstore.core uuid xapi-idl @@ -95,7 +101,7 @@ (libraries astring cmdliner - + ezxenstore.core uuid xapi-idl.memory @@ -112,13 +118,13 @@ (section sbin) (package xapi-tools) ) - + (executable (name memory_summary) (modes exe) (modules memory_summary) (libraries - + clock xapi-stdext-unix xapi_xenopsd @@ -143,7 +149,7 @@ (modules cancel_utils_test) (libraries cmdliner - + ezxenstore.core threads.posix xapi-idl.xen.interface diff --git a/ocaml/xenopsd/xc/numa.ml b/ocaml/xenopsd/xc/numa.ml new file mode 100644 index 00000000000..99f6473e9e4 --- /dev/null +++ b/ocaml/xenopsd/xc/numa.ml @@ -0,0 +1,176 @@ +(* Copyright (C) 2025 Cloud Software Group + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. +*) + +(* Monitoring loop that keeps track of per-numa-node memory changes, and prints + the change. Useful to see whether memory scrubbing is seen as used or free + memory by userspace *) +open! Xenctrlext + +let ( let@ ) f x = f x + +let stamp_tag : Mtime.span Logs.Tag.def = + Logs.Tag.def "stamp" ~doc:"Relative monotonic time stamp" Mtime.Span.pp + +let stamp c = Logs.Tag.(empty |> add stamp_tag (Mtime_clock.count c)) + +let xc = get_handle () + +let binary_prefixes = [""; "Ki"; "Mi"; "Gi"; "Ti"; "Pi"] + +let human_readable_bytes quantity = + let unit = "Bs" in + let print prefix q = Printf.sprintf "%Ld %s%s" q prefix unit in + let rec loop acc q = function + | [] -> + acc + | pre :: prefs -> + let quotient = Int64.div q 1024L in + let modulus = Int64.rem q 1024L in + let acc = + if Int64.equal modulus 0L then acc else print pre modulus :: acc + in + loop acc quotient prefs + in + if quantity = 0L then + print "" 0L + else + loop [] quantity binary_prefixes |> String.concat ", " + +let get_memory () = + let {memory; _} = numainfo xc in + memory + +let print_mem c mem = + for i = 0 to Array.length mem - 1 do + let {memfree; memsize} = mem.(i) in + let memfree = human_readable_bytes memfree in + let memsize = human_readable_bytes memsize in + Logs.app (fun m -> + m "\t%d: %s free out of %s" i memfree memsize ~tags:(stamp c) + ) + done + +let print_diff_mem before after = + if before > after then + Printf.sprintf "%s 🢆 " (Int64.sub before after |> human_readable_bytes) + else + Printf.sprintf "%s 🢅 " (Int64.sub after before |> human_readable_bytes) + +let diff c old cur = + let changed_yet = ref false in + for i = 0 to Int.min (Array.length old) (Array.length cur) - 1 do + let {memfree= a_free; _}, {memfree= b_free; _} = (old.(i), cur.(i)) in + if a_free <> b_free then ( + if not !changed_yet then + changed_yet := true ; + let free = human_readable_bytes b_free in + let updown = print_diff_mem a_free b_free in + Logs.app (fun m -> + m "\t%d: %s free (%s)" i free updown ~tags:(stamp (c ())) + ) + ) + done ; + !changed_yet + +let reporter ppf = + let report _src level ~over k msgf = + let k _ = over () ; k () in + let with_stamp h tags k ppf fmt = + let stamp = + match tags with + | None -> + None + | Some tags -> + Logs.Tag.find stamp_tag tags + in + let span_pp s = + match s with + | None -> + "0ns" + | Some s -> + Fmt.to_to_string Mtime.Span.pp s + in + Format.kfprintf k ppf + ("%a[%s] @[" ^^ fmt ^^ "@]@.") + Logs.pp_header (level, h) (span_pp stamp) + in + msgf @@ fun ?header ?tags fmt -> with_stamp header tags k ppf fmt + in + {Logs.report} + +let memory_changes () = + let max_time = Mtime.Span.(7 * s) in + + let memory = get_memory () in + let c = Mtime_clock.counter () in + print_mem c memory ; + let rec loop since_started since_changed previous = + let current = get_memory () in + + let since_started = ref since_started in + let timer () = + let last_changed = Mtime_clock.count since_changed in + if Mtime.Span.is_longer last_changed ~than:max_time then + since_started := Mtime_clock.counter () ; + !since_started + in + + let changed = diff timer previous current in + + let since_changed = + if changed then + Mtime_clock.counter () + else + !since_started + in + Unix.sleepf 0.01 ; + loop !since_started since_changed current + in + loop c c memory + +module DomainSet = Set.Make (Int) + +let get_domains xc = + Xenctrl.domain_getinfolist xc 0 + |> List.to_seq + |> Seq.map (function Xenctrl.{domid; _} -> domid) + |> DomainSet.of_seq + +let diff_domains c previous current = + let added = DomainSet.diff current previous in + let removed = DomainSet.diff previous current in + DomainSet.iter + (fun id -> Logs.app (fun m -> m "domain %d added" id ~tags:(stamp c))) + added ; + DomainSet.iter + (fun id -> Logs.app (fun m -> m "domain %d removed" id ~tags:(stamp c))) + removed + +let domain_changes xc = + let domains = get_domains xc in + let c = Mtime_clock.counter () in + let rec loop previous = + let current = get_domains xc in + diff_domains c previous current ; + Unix.sleepf 0.01 ; + loop current + in + loop domains + +let () = + Logs.set_reporter (reporter Format.std_formatter) ; + Logs.set_level (Some Logs.Info) ; + + ignore (Thread.create memory_changes () : Thread.t) ; + let@ xc = Xenctrl.with_intf in + domain_changes xc diff --git a/ocaml/xenopsd/xc/numa.mli b/ocaml/xenopsd/xc/numa.mli new file mode 100644 index 00000000000..e69de29bb2d From 6a915a3eb4eb78ab570b28aa41084e1955c9e228 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 10 Mar 2025 11:51:33 +0000 Subject: [PATCH 060/492] CP-53658: adapt claim_pages to new version with numa node parameter Now the numa node needs to be passed. A special value of -1n is used to signify that no node is meant to be used. Since this is arch-dependent, a Nativeint.t is used to encode the value. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/c_stubs/xenctrlext_stubs.c | 7 ++++--- ocaml/xenopsd/xc/xenctrlext.ml | 15 ++++++++++++++- ocaml/xenopsd/xc/xenctrlext.mli | 11 +++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c index 0e427548ed4..2cdcfe37b3d 100644 --- a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c +++ b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c @@ -672,16 +672,17 @@ CAMLprim value stub_xenforeignmemory_unmap(value fmem, value mapping) } CAMLprim value stub_xenctrlext_domain_claim_pages(value xch_val, value domid_val, - value nr_pages_val) + value numa_node_val, value nr_pages_val) { - CAMLparam3(xch_val, domid_val, nr_pages_val); + CAMLparam4(xch_val, domid_val, numa_node_val, nr_pages_val); int retval, the_errno; xc_interface* xch = xch_of_val(xch_val); uint32_t domid = Int_val(domid_val); + unsigned int numa_node = Int_val(numa_node_val); unsigned long nr_pages = Long_val(nr_pages_val); caml_release_runtime_system(); - retval = xc_domain_claim_pages(xch, domid, nr_pages); + retval = xc_domain_claim_pages(xch, domid, numa_node, nr_pages); the_errno = errno; caml_acquire_runtime_system(); diff --git a/ocaml/xenopsd/xc/xenctrlext.ml b/ocaml/xenopsd/xc/xenctrlext.ml index 5cea490864a..a0e0c0ed311 100644 --- a/ocaml/xenopsd/xc/xenctrlext.ml +++ b/ocaml/xenopsd/xc/xenctrlext.ml @@ -109,5 +109,18 @@ external combine_cpu_policies : int64 array -> int64 array -> int64 array external policy_is_compatible : int64 array -> int64 array -> string option = "stub_xenctrlext_featuresets_are_compatible" -external domain_claim_pages : handle -> domid -> int -> unit +external stub_domain_claim_pages : handle -> domid -> int -> int -> unit = "stub_xenctrlext_domain_claim_pages" + +module NumaNode = struct + type t = int + + (** Defined as XC_NUMA_NO_NODE in xen.git/tools/include/xenguest.h, it's an + unsigned int (~0U) *) + let none = 0xFFFFFFFF + + let from = Fun.id +end + +let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages = + stub_domain_claim_pages handle domid numa_node nr_pages diff --git a/ocaml/xenopsd/xc/xenctrlext.mli b/ocaml/xenopsd/xc/xenctrlext.mli index 2a4632780ce..39c6eeff514 100644 --- a/ocaml/xenopsd/xc/xenctrlext.mli +++ b/ocaml/xenopsd/xc/xenctrlext.mli @@ -91,5 +91,12 @@ external combine_cpu_policies : int64 array -> int64 array -> int64 array external policy_is_compatible : int64 array -> int64 array -> string option = "stub_xenctrlext_featuresets_are_compatible" -external domain_claim_pages : handle -> domid -> int -> unit - = "stub_xenctrlext_domain_claim_pages" +module NumaNode : sig + type t + + val none : t + + val from : int -> t +end + +val domain_claim_pages : handle -> domid -> ?numa_node:NumaNode.t -> int -> unit From 33abbd28e18a20a1060eaa7654b0c3120be1b78f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 12 Mar 2025 10:15:38 +0000 Subject: [PATCH 061/492] xenctrl: Don't use numa_node in domain_claim_pages calls This binding is only available in Xen 4.21 (unreleased) Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/c_stubs/xenctrlext_stubs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c index 2cdcfe37b3d..d7f3fee8f5e 100644 --- a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c +++ b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c @@ -678,11 +678,11 @@ CAMLprim value stub_xenctrlext_domain_claim_pages(value xch_val, value domid_val int retval, the_errno; xc_interface* xch = xch_of_val(xch_val); uint32_t domid = Int_val(domid_val); - unsigned int numa_node = Int_val(numa_node_val); + // unsigned int numa_node = Int_val(numa_node_val); unsigned long nr_pages = Long_val(nr_pages_val); caml_release_runtime_system(); - retval = xc_domain_claim_pages(xch, domid, numa_node, nr_pages); + retval = xc_domain_claim_pages(xch, domid, /*numa_node,*/ nr_pages); the_errno = errno; caml_acquire_runtime_system(); From 687ad1dc2cb63a2d6347e4d53e104c4dde7caf60 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 17 Mar 2025 15:34:33 +0000 Subject: [PATCH 062/492] xenopsd: log_reraise doesn't ignore the result Previously unit was returned every single time, but the result of the inner function will need to be used in the near future. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/domain.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index a9022f26565..348c5de44bc 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -919,7 +919,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = let timer_mode = int_platform_flag "timer_mode" in let log_reraise call_str f = debug "VM = %s; domid = %d; %s" (Uuidx.to_string uuid) domid call_str ; - try ignore (f ()) + try f () with e -> let bt = Printexc.get_backtrace () in debug "Backtrace: %s" bt ; From 6f6b6386beb89787803d3bf15f564f30afa2a80f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 12 Mar 2025 16:06:03 +0000 Subject: [PATCH 063/492] CP-54065, xenopsd: use domain_claim_pages on a single node, if possible Xen currently supports to modes to claim memory for a domain: without any node in particular, or claim memory in a single NUMA node. When planning a domain, return the nodes that will host the domain, and how much memory. In the case where the domain fits in a single NUMA node, claim pages on that node, otherwise fall back to previous behaviour. The memory claims need to happen while the memory measurements hold valid, that is while no VMs are started, otherwise ENOMEM might be returned. Because the current mode is a best-effort, log when the claim does not work. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/softaffinity.ml | 11 +- ocaml/xenopsd/lib/softaffinity.mli | 6 +- ocaml/xenopsd/lib/topology.ml | 2 +- ocaml/xenopsd/lib/topology.mli | 5 +- ocaml/xenopsd/test/test_topology.ml | 14 +- ocaml/xenopsd/xc/domain.ml | 217 +++++++++++++++----------- ocaml/xenopsd/xc/xenctrlext.mli | 1 + ocaml/xenopsd/xc/xenops_server_xen.ml | 26 +-- 8 files changed, 165 insertions(+), 117 deletions(-) diff --git a/ocaml/xenopsd/lib/softaffinity.ml b/ocaml/xenopsd/lib/softaffinity.ml index 4e38640dcd1..1e7231506da 100644 --- a/ocaml/xenopsd/lib/softaffinity.ml +++ b/ocaml/xenopsd/lib/softaffinity.ml @@ -26,10 +26,9 @@ let plan host nodes ~vm = (Fmt.to_to_string NUMARequest.pp_dump requested) (Fmt.to_to_string NUMAResource.pp_dump allocated) ; let candidate = nodes.(nodeidx) in - ( NUMAResource.union allocated candidate - , node :: picked - , NUMARequest.shrink requested candidate - ) + (* This is where the memory allocated to the node can be calculated *) + let remaining_request = NUMARequest.shrink requested candidate in + (NUMAResource.union allocated candidate, node :: picked, remaining_request) in let plan_valid (avg, nodes) = let allocated, picked, remaining = @@ -72,8 +71,8 @@ let plan host nodes ~vm = | None -> debug "No allocations possible" ; None - | Some allocated -> + | Some (allocated, nodes) -> debug "Allocated resources: %s" (Fmt.to_to_string NUMAResource.pp_dump allocated) ; assert (NUMARequest.fits vm allocated) ; - Some allocated.NUMAResource.affinity + Some (allocated.NUMAResource.affinity, nodes) diff --git a/ocaml/xenopsd/lib/softaffinity.mli b/ocaml/xenopsd/lib/softaffinity.mli index 7bef2079f89..5b1f550af5b 100644 --- a/ocaml/xenopsd/lib/softaffinity.mli +++ b/ocaml/xenopsd/lib/softaffinity.mli @@ -14,7 +14,11 @@ open Topology -val plan : NUMA.t -> NUMAResource.t array -> vm:NUMARequest.t -> CPUSet.t option +val plan : + NUMA.t + -> NUMAResource.t array + -> vm:NUMARequest.t + -> (Topology.CPUSet.t * Topology.NUMA.node list) option (** [plan host nodes ~vm] returns the CPU soft affinity recommended for [vm], Such that the memory latency between the NUMA nodes of the vCPUs is small, and usage of NUMA nodes is balanced. diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index f706f542d5e..a2cd401a0cc 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -298,7 +298,7 @@ module NUMA = struct None else ( List.iter (fun (Node n) -> t.node_usage.(n) <- t.node_usage.(n) + 1) nodes ; - Some result + Some (result, nodes) ) let pp_dump_node = Fmt.(using (fun (Node x) -> x) int) diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index 478a7ac2b64..f1bd6f9f569 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -150,7 +150,10 @@ module NUMA : sig NUMA nodes > 16 it limits the length of the sequence to [n+65520], to avoid exponential blowup. *) - val choose : t -> (node list * NUMAResource.t) Seq.t -> NUMAResource.t option + val choose : + t + -> (node list * NUMAResource.t) Seq.t + -> (NUMAResource.t * node list) option (** [choose t resources] will choose one NUMA node deterministically, trying to keep the overall NUMA node usage balanced *) diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index e53640f5054..d9945ed8018 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -210,18 +210,20 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = match Softaffinity.plan h nodes ~vm with | None -> Alcotest.fail "No NUMA plan" - | Some plan -> - D.debug "NUMA allocation succeeded for VM %d: %s" i - (Fmt.to_to_string CPUSet.pp_dump plan) ; + | Some (cpu_plan, mem_plan) -> + D.debug + "NUMA allocation succeeded for VM %d: [CPUS: %s]; [nodes: %s]" i + (Fmt.to_to_string CPUSet.pp_dump cpu_plan) + (Fmt.to_to_string Fmt.(Dump.list NUMA.pp_dump_node) mem_plan) ; let usednodes = - plan + cpu_plan |> CPUSet.elements |> List.map (NUMA.node_of_cpu h) |> List.sort_uniq compare |> List.to_seq in let costs_numa_aware = - vm_access_costs h plans (vm_cores, usednodes, plan) + vm_access_costs h plans (vm_cores, usednodes, cpu_plan) in let costs_default = vm_access_costs h plans (vm_cores, NUMA.nodes h, NUMA.all_cpus h) @@ -229,7 +231,7 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = cost_not_worse ~default:costs_default costs_numa_aware ; ( costs_default :: costs_old , costs_numa_aware :: costs_new - , ((vm_cores, List.of_seq usednodes), plan) :: plans + , ((vm_cores, List.of_seq usednodes), cpu_plan) :: plans ) ) ([], [], []) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 348c5de44bc..6ea92ad2ae1 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -860,42 +860,62 @@ let numa_init () = let numa_placement domid ~vcpus ~memory = let open Xenctrlext in let open Topology in - let hint = - with_lock numa_mutex (fun () -> - let ( let* ) = Option.bind in - let xcext = get_handle () in - let* host = Lazy.force numa_hierarchy in - let numa_meminfo = (numainfo xcext).memory |> Array.to_list in - let nodes = - ListLabels.map2 - (NUMA.nodes host |> List.of_seq) - numa_meminfo - ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) - in - let vm = NUMARequest.make ~memory ~vcpus in - let nodea = - match !numa_resources with - | None -> - Array.of_list nodes - | Some a -> - Array.map2 NUMAResource.min_memory (Array.of_list nodes) a - in - numa_resources := Some nodea ; - Softaffinity.plan ~vm host nodea - ) - in - let xcext = get_handle () in - ( match hint with - | None -> - D.debug "NUMA-aware placement failed for domid %d" domid - | Some soft_affinity -> - let cpua = CPUSet.to_mask soft_affinity in - for i = 0 to vcpus - 1 do - Xenctrlext.vcpu_setaffinity_soft xcext domid i cpua - done - ) ; - let nr_pages = Int64.div memory 4096L |> Int64.to_int in - Xenctrlext.domain_claim_pages xcext domid nr_pages + with_lock numa_mutex (fun () -> + let ( let* ) = Option.bind in + let xcext = get_handle () in + let* host = Lazy.force numa_hierarchy in + let numa_meminfo = (numainfo xcext).memory |> Array.to_list in + let nodes = + ListLabels.map2 + (NUMA.nodes host |> List.of_seq) + numa_meminfo + ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) + in + let vm = NUMARequest.make ~memory ~vcpus in + let nodea = + match !numa_resources with + | None -> + Array.of_list nodes + | Some a -> + Array.map2 NUMAResource.min_memory (Array.of_list nodes) a + in + numa_resources := Some nodea ; + let memory_plan = + match Softaffinity.plan ~vm host nodea with + | None -> + D.debug "NUMA-aware placement failed for domid %d" domid ; + [] + | Some (cpu_affinity, mem_plan) -> + let cpus = CPUSet.to_mask cpu_affinity in + for i = 0 to vcpus - 1 do + Xenctrlext.vcpu_setaffinity_soft xcext domid i cpus + done ; + mem_plan + in + (* Xen only allows a single node when using memory claims, or none at all. *) + let* numa_node, node = + match memory_plan with + | [Node node] -> + Some (Xenctrlext.NumaNode.from node, node) + | [] | _ :: _ :: _ -> + D.debug + "%s: domain %d can't fit a single NUMA node, falling back to \ + default behaviour" + __FUNCTION__ domid ; + None + in + let nr_pages = Int64.div memory 4096L |> Int64.to_int in + try + Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; + Some (node, memory) + with Xenctrlext.Unix_error (errno, _) -> + D.info + "%s: unable to claim enough memory, domain %d won't be hosted in a \ + single NUMA node. (error %s)" + __FUNCTION__ domid + Unix.(error_message errno) ; + None + ) let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = let open Memory in @@ -949,42 +969,54 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = log_reraise (Printf.sprintf "shadow_allocation_set %d MiB" shadow_mib) (fun () -> Xenctrl.shadow_allocation_set xc domid shadow_mib ) ; - let () = + let node_placement = match !Xenops_server.numa_placement with | Any -> - () + None | Best_effort -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> - if has_hard_affinity then - D.debug "VM has hard affinity set, skipping NUMA optimization" - else + if has_hard_affinity then ( + D.debug "VM has hard affinity set, skipping NUMA optimization" ; + None + ) else numa_placement domid ~vcpus ~memory:(Int64.mul memory.xen_max_mib 1048576L) + |> Option.map fst ) in - create_channels ~xc uuid domid + let store_chan, console_chan = create_channels ~xc uuid domid in + (store_chan, console_chan, node_placement) + +let args_numa_placements numa_placement = + Option.fold ~none:[] + ~some:(fun node -> ["-mem_pnode"; Printf.sprintf "%d" node]) + numa_placement let xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory = + ~console_domid ~memory ~numa_placement = [ - "-domid" - ; string_of_int domid - ; "-store_port" - ; string_of_int store_port - ; "-store_domid" - ; string_of_int store_domid - ; "-console_port" - ; string_of_int console_port - ; "-console_domid" - ; string_of_int console_domid - ; "-mem_max_mib" - ; Int64.to_string memory.Memory.build_max_mib - ; "-mem_start_mib" - ; Int64.to_string memory.Memory.build_start_mib + [ + "-domid" + ; string_of_int domid + ; "-store_port" + ; string_of_int store_port + ; "-store_domid" + ; string_of_int store_domid + ; "-console_port" + ; string_of_int console_port + ; "-console_domid" + ; string_of_int console_domid + ; "-mem_max_mib" + ; Int64.to_string memory.Memory.build_max_mib + ; "-mem_start_mib" + ; Int64.to_string memory.Memory.build_start_mib + ] + ; args_numa_placements numa_placement ] + |> List.concat let xenguest_args_hvm ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~vgpus = + ~console_domid ~memory ~kernel ~vgpus ~numa_placement = ["-mode"; "hvm_build"; "-image"; kernel] @ (vgpus |> function | Xenops_interface.Vgpu.{implementation= Nvidia _; _} :: _ -> @@ -993,10 +1025,10 @@ let xenguest_args_hvm ~domid ~store_port ~store_domid ~console_port [] ) @ xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory + ~console_domid ~memory ~numa_placement let xenguest_args_pv ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~cmdline ~ramdisk = + ~console_domid ~memory ~kernel ~cmdline ~ramdisk ~numa_placement = [ "-mode" ; "linux_build" @@ -1012,10 +1044,10 @@ let xenguest_args_pv ~domid ~store_port ~store_domid ~console_port ; "0" ] @ xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory + ~console_domid ~memory ~numa_placement let xenguest_args_pvh ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~cmdline ~modules = + ~console_domid ~memory ~kernel ~cmdline ~modules ~numa_placement = let module_args = List.concat_map (fun (m, c) -> @@ -1037,7 +1069,7 @@ let xenguest_args_pvh ~domid ~store_port ~store_domid ~console_port ] @ module_args @ xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory + ~console_domid ~memory ~numa_placement let xenguest task xenguest_path domid uuid args = let line = @@ -1134,13 +1166,13 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid shadow_multiplier in maybe_ca_140252_workaround ~xc ~vcpus domid ; - let store_port, console_port = + let store_port, console_port, numa_placement = build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid in let store_mfn, console_mfn = let args = xenguest_args_hvm ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~vgpus + ~console_domid ~memory ~kernel ~vgpus ~numa_placement @ force_arg @ extras in @@ -1162,14 +1194,14 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid shadow_multiplier in Option.iter assert_file_is_readable pvinfo.ramdisk ; - let store_port, console_port = + let store_port, console_port, numa_placement = build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid in let store_mfn, console_mfn = let args = xenguest_args_pv ~domid ~store_port ~store_domid ~console_port ~console_domid ~memory ~kernel ~cmdline:pvinfo.cmdline - ~ramdisk:pvinfo.ramdisk + ~ramdisk:pvinfo.ramdisk ~numa_placement @ force_arg @ extras in @@ -1185,13 +1217,13 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid shadow_multiplier in maybe_ca_140252_workaround ~xc ~vcpus domid ; - let store_port, console_port = + let store_port, console_port, numa_placement = build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid in let store_mfn, console_mfn = let args = xenguest_args_pvh ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~cmdline ~modules + ~console_domid ~memory ~kernel ~cmdline ~modules ~numa_placement @ force_arg @ extras in @@ -1221,8 +1253,8 @@ let dm_flags = [] let with_emu_manager_restore (task : Xenops_task.task_handle) ~domain_type - ~(dm : Device.Profile.t) ~store_port ~console_port ~extras manager_path - domid _uuid main_fd vgpu_fd f = + ~(dm : Device.Profile.t) ~store_port ~console_port ~extras ~numa_placements + manager_path domid _uuid main_fd vgpu_fd f = let mode = match domain_type with `hvm | `pvh -> "hvm_restore" | `pv -> "restore" in @@ -1240,20 +1272,24 @@ let with_emu_manager_restore (task : Xenops_task.task_handle) ~domain_type let fds = [(fd_uuid, main_fd)] @ vgpu_args in let args = [ - "-mode" - ; mode - ; "-domid" - ; string_of_int domid - ; "-fd" - ; fd_uuid - ; "-store_port" - ; string_of_int store_port - ; "-console_port" - ; string_of_int console_port + [ + "-mode" + ; mode + ; "-domid" + ; string_of_int domid + ; "-fd" + ; fd_uuid + ; "-store_port" + ; string_of_int store_port + ; "-console_port" + ; string_of_int console_port + ] + ; args_numa_placements numa_placements + ; dm_flags dm + ; extras + ; vgpu_cmdline ] - @ dm_flags dm - @ extras - @ vgpu_cmdline + |> List.concat in Emu_manager.with_connection task manager_path args fds f @@ -1307,7 +1343,7 @@ let consume_qemu_record fd limit domid uuid = let restore_common (task : Xenops_task.task_handle) ~xc ~xs ~(dm : Device.Profile.t) ~domain_type ~store_port ~store_domid:_ ~console_port ~console_domid:_ ~no_incr_generationid:_ ~vcpus:_ ~extras - ~vtpm manager_path domid main_fd vgpu_fd = + ~vtpm ~numa_placements manager_path domid main_fd vgpu_fd = let module DD = Debug.Make (struct let name = "mig64" end) in let open DD in let uuid = get_uuid ~xc domid in @@ -1320,8 +1356,8 @@ let restore_common (task : Xenops_task.task_handle) ~xc ~xs match with_conversion_script task "Emu_manager" hvm main_fd (fun pipe_r -> with_emu_manager_restore task ~domain_type ~dm ~store_port - ~console_port ~extras manager_path domid uuid pipe_r vgpu_fd - (fun cnx -> restore_libxc_record cnx domid uuid + ~console_port ~extras ~numa_placements manager_path domid uuid + pipe_r vgpu_fd (fun cnx -> restore_libxc_record cnx domid uuid ) ) with @@ -1360,7 +1396,8 @@ let restore_common (task : Xenops_task.task_handle) ~xc ~xs [main_fd] in with_emu_manager_restore task ~domain_type ~dm ~store_port ~console_port - ~extras manager_path domid uuid main_fd vgpu_fd (fun cnx -> + ~extras ~numa_placements manager_path domid uuid main_fd vgpu_fd + (fun cnx -> (* Maintain a list of results returned by emu-manager that are expected by the reader threads. Contains the emu for which a result is wanted plus an event channel for waking up the reader once the @@ -1614,14 +1651,14 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~store_domid maybe_ca_140252_workaround ~xc ~vcpus domid ; (memory, vm_stuff, `pvh) in - let store_port, console_port = + let store_port, console_port, numa_placements = build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity:info.has_hard_affinity domid in let store_mfn, console_mfn = restore_common task ~xc ~xs ~dm ~domain_type ~store_port ~store_domid ~console_port ~console_domid ~no_incr_generationid ~vcpus ~extras ~vtpm - manager_path domid fd vgpu_fd + ~numa_placements manager_path domid fd vgpu_fd in let local_stuff = console_keys console_port console_mfn in (* And finish domain's building *) diff --git a/ocaml/xenopsd/xc/xenctrlext.mli b/ocaml/xenopsd/xc/xenctrlext.mli index 39c6eeff514..559842fac75 100644 --- a/ocaml/xenopsd/xc/xenctrlext.mli +++ b/ocaml/xenopsd/xc/xenctrlext.mli @@ -100,3 +100,4 @@ module NumaNode : sig end val domain_claim_pages : handle -> domid -> ?numa_node:NumaNode.t -> int -> unit +(** Raises {Unix_error} if there's not enough memory to claim in the system *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 6c6dd067ef7..3527cbeb63a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -51,6 +51,8 @@ let _xenguest = "xenguest" let _emu_manager = "emu-manager" +let ( // ) = Filename.concat + let run cmd args = debug "%s %s" cmd (String.concat " " args) ; fst (Forkhelpers.execute_command_get_output cmd args) @@ -58,20 +60,20 @@ let run cmd args = let choose_alternative kind default platformdata = debug "looking for %s in [ %s ]" kind (String.concat "; " (List.map (fun (k, v) -> k ^ " : " ^ v) platformdata)) ; - if List.mem_assoc kind platformdata then - let x = List.assoc kind platformdata in - let dir = Filename.concat !Xc_resources.alternatives kind in - let available = try Array.to_list (Sys.readdir dir) with _ -> [] in + let path_available x = + let dir = !Xc_resources.alternatives // kind in + let available = try Sys.readdir dir with _ -> [||] in (* If x has been put in the directory (by root) then it's safe to use *) - if List.mem x available then - Filename.concat dir x + if Array.mem x available then + Some (dir // x) else ( error "Invalid platform:%s=%s (check execute permissions of %s)" kind x - (Filename.concat dir x) ; - default + (dir // x) ; + None ) - else - default + in + Option.bind (List.assoc_opt kind platformdata) path_available + |> Option.value ~default (* We allow qemu-dm to be overriden via a platform flag *) let choose_qemu_dm x = @@ -2664,8 +2666,8 @@ module VM = struct in let manager_path = choose_emu_manager vm.Vm.platformdata in Domain.restore task ~xc ~xs ~dm:(dm_of ~vm) ~store_domid - ~console_domid ~no_incr_generationid (* XXX progress_callback *) - ~timeoffset ~extras build_info ~manager_path ~vtpm domid fd vgpu_fd + ~console_domid ~no_incr_generationid ~timeoffset ~extras build_info + ~manager_path ~vtpm domid fd vgpu_fd with e -> error "VM %s: restore failed: %s" vm.Vm.id (Printexc.to_string e) ; (* As of xen-unstable.hg 779c0ef9682 libxenguest will destroy From 9e6fb15bb069404a64836dc7c6603d41226cc6bb Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 26 Mar 2025 16:35:29 +0000 Subject: [PATCH 064/492] xenopsd/xc: Do not try to allocate pages to a particular NUMA node Neither xenguest nor emu-manager support passing the parameter just yet, so avoid passing the numa node to create the parameter. On top of that claiming memory conflicts with DMC, so it's better to keep previous behaviour of not claiming any pages before allocating. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/domain.ml | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 6ea92ad2ae1..19f28e41985 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -880,7 +880,7 @@ let numa_placement domid ~vcpus ~memory = Array.map2 NUMAResource.min_memory (Array.of_list nodes) a in numa_resources := Some nodea ; - let memory_plan = + let _ = match Softaffinity.plan ~vm host nodea with | None -> D.debug "NUMA-aware placement failed for domid %d" domid ; @@ -892,29 +892,10 @@ let numa_placement domid ~vcpus ~memory = done ; mem_plan in - (* Xen only allows a single node when using memory claims, or none at all. *) - let* numa_node, node = - match memory_plan with - | [Node node] -> - Some (Xenctrlext.NumaNode.from node, node) - | [] | _ :: _ :: _ -> - D.debug - "%s: domain %d can't fit a single NUMA node, falling back to \ - default behaviour" - __FUNCTION__ domid ; - None - in - let nr_pages = Int64.div memory 4096L |> Int64.to_int in - try - Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; - Some (node, memory) - with Xenctrlext.Unix_error (errno, _) -> - D.info - "%s: unable to claim enough memory, domain %d won't be hosted in a \ - single NUMA node. (error %s)" - __FUNCTION__ domid - Unix.(error_message errno) ; - None + (* Neither xenguest nor emu-manager allow allocating pages to a single + NUMA node, don't return any NUMA in any case. Claiming the memory + would be done here, but it conflicts with DMC. *) + None ) let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = From ac20de6fa8b93e8ff41ea95167c3644f81a692c2 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 21 Mar 2025 11:00:09 +0000 Subject: [PATCH 065/492] Add additional tracing to VBD plug/unplug Signed-off-by: Steven Woods --- ocaml/xenopsd/xc/xenops_server_xen.ml | 37 +++++++++++++++++---------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 6c6dd067ef7..d41f3345d39 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3720,6 +3720,7 @@ module VBD = struct } in let dev = + with_tracing ~task ~name:"VBD_device_add" @@ fun () -> Xenops_task.with_subtask task (Printf.sprintf "Vbd.add %s" (id_of vbd)) (fun () -> @@ -3729,13 +3730,16 @@ module VBD = struct ) in (* We store away the disk so we can implement VBD.stat *) - Option.iter - (fun d -> - xs.Xs.write - (vdi_path_of_device ~xs dev) - (d |> rpc_of disk |> Jsonrpc.to_string) - ) - vbd.backend ; + with_tracing ~task ~name:"VBD_xs_write" (fun () -> + Option.iter + (fun d -> + xs.Xs.write + (vdi_path_of_device ~xs dev) + (d |> rpc_of disk |> Jsonrpc.to_string) + ) + vbd.backend + ) ; + with_tracing ~task ~name:"VBD_attach_qemu" @@ fun () -> (* NB now the frontend position has been resolved *) let open Device_common in let device_number = @@ -3851,6 +3855,7 @@ module VBD = struct (* this happens on normal shutdown too *) (* Case (1): success; Case (2): success; Case (3): an exception is thrown *) + with_tracing ~task ~name:"VBD_device_shutdown" @@ fun () -> Xenops_task.with_subtask task (Printf.sprintf "Vbd.clean_shutdown %s" (id_of vbd)) (fun () -> @@ -3863,14 +3868,17 @@ module VBD = struct the DP if the backend is of type VDI *) finally (fun () -> - Option.iter - (fun dev -> - Xenops_task.with_subtask task - (Printf.sprintf "Vbd.release %s" (id_of vbd)) - (fun () -> Device.Vbd.release task ~xc ~xs dev) - ) - dev ; + with_tracing ~task ~name:"VBD_device_release" (fun () -> + Option.iter + (fun dev -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.release %s" (id_of vbd)) + (fun () -> Device.Vbd.release task ~xc ~xs dev) + ) + dev + ) ; (* If we have a qemu frontend, detach this too. *) + with_tracing ~task ~name:"VBD_detach_qemu" @@ fun () -> let _ = DB.update vm (Option.map (fun vm_t -> @@ -3901,6 +3909,7 @@ module VBD = struct () ) (fun () -> + with_tracing ~task ~name:"VBD_dp_destroy" @@ fun () -> match (domid, backend) with | Some x, None | Some x, Some (VDI _) -> Storage.dp_destroy task From 79c9219a4928139e82ad19a082f4b711b207c48e Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 14 Mar 2025 18:17:10 +0000 Subject: [PATCH 066/492] CP-53554: Split plug xenopsd atomic into attach/activate This consists of two parts, splitting the attach_and_activate into functionally equivalent attach and activate functions, and splitting the VBD_plug atomic itself's behaviour into two new atomics, VBD_attach and VBD_activate. If the new xenopsd_vbd_plug_unplug_legacy flag is true, the only difference will be that VBD_plug calls attach and activate sequentially instead of attach_and_activate. If xenopsd_vbd_plug_unplug_legacy is false, the VBD_attach and VBD_activate atomics will be used in place of VBD_plug in all places that it is used. This should still be functionally equivalent. The purpose of this change is to allow optimisations in this area as there are some situations where they do not need to be called at the same time. For example VBD_attach could be called outside of VM migrate downtime to reduce the overall downtime. Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/storage.ml | 22 +- ocaml/xenopsd/lib/xenops_server.ml | 39 +- ocaml/xenopsd/lib/xenops_server_plugin.ml | 4 +- ocaml/xenopsd/lib/xenops_server_simulator.ml | 4 +- ocaml/xenopsd/lib/xenops_server_skeleton.ml | 4 +- ocaml/xenopsd/lib/xenopsd.ml | 5 + ocaml/xenopsd/xc/xenops_server_xen.ml | 435 +++++++++++-------- 7 files changed, 320 insertions(+), 193 deletions(-) diff --git a/ocaml/xenopsd/lib/storage.ml b/ocaml/xenopsd/lib/storage.ml index 72dd3b03322..a051b34f7ca 100644 --- a/ocaml/xenopsd/lib/storage.ml +++ b/ocaml/xenopsd/lib/storage.ml @@ -62,20 +62,20 @@ let vm_of_domid vmdomid = "Invalid domid, could not be converted to int, passing empty string." ; Storage_interface.Vm.of_string "" -let attach_and_activate ~task ~_vm ~vmdomid ~dp ~sr ~vdi ~read_write = +let attach ~task ~_vm ~vmdomid ~dp ~sr ~vdi ~read_write = + let dbg = get_dbg task in + Xenops_task.with_subtask task + (Printf.sprintf "VDI.attach3 %s" dp) + (transform_exception (fun () -> + Client.VDI.attach3 dbg dp sr vdi vmdomid read_write + ) + ) + +let activate ~task ~_vm ~vmdomid ~dp ~sr ~vdi = let dbg = get_dbg task in - let result = - Xenops_task.with_subtask task - (Printf.sprintf "VDI.attach3 %s" dp) - (transform_exception (fun () -> - Client.VDI.attach3 dbg dp sr vdi vmdomid read_write - ) - ) - in Xenops_task.with_subtask task (Printf.sprintf "VDI.activate3 %s" dp) - (transform_exception (fun () -> Client.VDI.activate3 dbg dp sr vdi vmdomid)) ; - result + (transform_exception (fun () -> Client.VDI.activate3 dbg dp sr vdi vmdomid)) let deactivate task dp sr vdi vmdomid = debug "Deactivating disk %s %s" (Sr.string_of sr) (Vdi.string_of vdi) ; diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 5325a8b29ba..60bd162f24c 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -37,6 +37,8 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let domain_shutdown_ack_timeout = ref 60. +let xenopsd_vbd_plug_unplug_legacy = ref true + type context = { transferred_fd: Unix.file_descr option (** some API calls take a file descriptor argument *) @@ -122,6 +124,8 @@ type atomic = | VM_hook_script_stable of (Vm.id * Xenops_hooks.script * string * Vm.id) | VM_hook_script of (Vm.id * Xenops_hooks.script * string) | VBD_plug of Vbd.id + | VBD_attach of Vbd.id + | VBD_activate of Vbd.id | VBD_epoch_begin of (Vbd.id * disk * bool) | VBD_epoch_end of (Vbd.id * disk) | VBD_set_qos of Vbd.id @@ -195,6 +199,10 @@ let rec name_of_atomic = function "VM_hook_script" | VBD_plug _ -> "VBD_plug" + | VBD_attach _ -> + "VBD_attach" + | VBD_activate _ -> + "VBD_activate" | VBD_epoch_begin _ -> "VBD_epoch_begin" | VBD_epoch_end _ -> @@ -1580,6 +1588,18 @@ let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) let map_or_empty f x = Option.value ~default:[] (Option.map f x) +(* Creates a Serial of 2 or more Atomics. If the number of Atomics could be + less than this, use serial or serial_concat *) +let serial_of name ~id at1 at2 ats = + Serial (id, Printf.sprintf "%s VM=%s" name id, at1 :: at2 :: ats) + +let vbd_plug vbd_id = + if !xenopsd_vbd_plug_unplug_legacy then + VBD_plug vbd_id + else + serial_of "VBD.attach_and_activate" ~id:(VBD_DB.vm_of vbd_id) + (VBD_attach vbd_id) (VBD_activate vbd_id) [] + let rec atomics_of_operation = function | VM_start (id, force) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in @@ -1604,7 +1624,7 @@ let rec atomics_of_operation = function [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] ) vbd.Vbd.backend - ; [VBD_plug vbd.Vbd.id] + ; [vbd_plug vbd.Vbd.id] ] ) in @@ -1692,7 +1712,7 @@ let rec atomics_of_operation = function let name_one = pf "VBD.activate_and_plug %s" typ in parallel_map name_multi ~id vbds (fun vbd -> serial name_one ~id - [VBD_set_active (vbd.Vbd.id, true); VBD_plug vbd.Vbd.id] + [VBD_set_active (vbd.Vbd.id, true); vbd_plug vbd.Vbd.id] ) in [ @@ -1825,7 +1845,7 @@ let rec atomics_of_operation = function ] |> List.concat | VBD_hotplug id -> - [VBD_set_active (id, true); VBD_plug id] + [VBD_set_active (id, true); vbd_plug id] | VBD_hotunplug (id, force) -> [VBD_unplug (id, force); VBD_set_active (id, false)] | VIF_hotplug id -> @@ -2017,7 +2037,16 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) Xenops_hooks.vm ~script ~reason ~id ~extra_args | VBD_plug id -> debug "VBD.plug %s" (VBD_DB.string_of_id id) ; - B.VBD.plug t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + B.VBD.attach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + B.VBD.activate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + VBD_DB.signal id + | VBD_attach id -> + debug "VBD.attach %s" (VBD_DB.string_of_id id) ; + B.VBD.attach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + VBD_DB.signal id + | VBD_activate id -> + debug "VBD.activate %s" (VBD_DB.string_of_id id) ; + B.VBD.activate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; VBD_DB.signal id | VBD_set_active (id, b) -> debug "VBD.set_active %s %b" (VBD_DB.string_of_id id) b ; @@ -2445,6 +2474,8 @@ and trigger_cleanup_after_failure_atom op t = match op with | VBD_eject id | VBD_plug id + | VBD_attach id + | VBD_activate id | VBD_set_active (id, _) | VBD_epoch_begin (id, _, _) | VBD_epoch_end (id, _) diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index fbeb78f3640..7b8fa8379e4 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -207,7 +207,9 @@ module type S = sig val epoch_end : Xenops_task.task_handle -> Vm.id -> disk -> unit - val plug : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit + val attach : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit + + val activate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit val unplug : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index c5123641978..16680682180 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -673,7 +673,9 @@ module VBD = struct let epoch_end _ (_vm : Vm.id) (_disk : disk) = () - let plug _ (vm : Vm.id) (vbd : Vbd.t) = with_lock m (add_vbd vm vbd) + let attach _ (vm : Vm.id) (vbd : Vbd.t) = with_lock m (add_vbd vm vbd) + + let activate _ (_vm : Vm.id) (_vbd : Vbd.t) = () let unplug _ vm vbd _ = with_lock m (remove_vbd vm vbd) diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index dc1b826f85e..688624e3f8a 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -145,7 +145,9 @@ module VBD = struct let epoch_end _ _ _ = () - let plug _ _ _ = unimplemented "VBD.plug" + let attach _ _ _ = unimplemented "VBD.attach" + + let activate _ _ _ = unimplemented "VBD.activate" let unplug _ _ _ _ = unimplemented "VBD.unplug" diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index e0a4f5949db..276192792d4 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -283,6 +283,11 @@ let options = , (fun () -> string_of_int !test_open) , "TESTING only: open N file descriptors" ) + ; ( "xenopsd-vbd-plug-unplug-legacy" + , Arg.Bool (fun x -> Xenops_server.xenopsd_vbd_plug_unplug_legacy := x) + , (fun () -> string_of_bool !Xenops_server.xenopsd_vbd_plug_unplug_legacy) + , "False if we want to split the plug atomic into attach/activate" + ) ] let path () = Filename.concat !sockets_path "xenopsd" diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index d41f3345d39..f4731a0cf25 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -185,6 +185,7 @@ module VmExtra = struct ; pv_drivers_detected: bool [@default false] ; xen_platform: (int * int) option (* (device_id, revision) for QEMU *) ; platformdata: (string * string) list [@default []] + ; attached_vdis: (Vbd.id * attached_vdi) list [@default []] } [@@deriving rpcty] @@ -409,18 +410,16 @@ module Storage = struct let vm_of_domid = vm_of_domid (* We need to deal with driver domains here: *) - let attach_and_activate ~xc:_ ~xs task vm dp sr vdi read_write = + let attach ~xc:_ ~xs task vm dp sr vdi read_write = let vmdomid = vm_of_domid (domid_of_uuid ~xs (uuid_of_string vm)) in - let result = - attach_and_activate ~task ~_vm:vm ~vmdomid ~dp ~sr ~vdi ~read_write - in + let result = attach ~task ~_vm:vm ~vmdomid ~dp ~sr ~vdi ~read_write in let backend = Xenops_task.with_subtask task (Printf.sprintf "Policy.get_backend_vm %s %s %s" vm (Sr.string_of sr) (Vdi.string_of vdi) ) (transform_exception (fun () -> - Client.Policy.get_backend_vm "attach_and_activate" vm sr vdi + Client.Policy.get_backend_vm "attach" vm sr vdi ) ) in @@ -430,6 +429,10 @@ module Storage = struct | Some domid -> {domid; attach_info= result} + let activate ~xc:_ ~xs task vm dp sr vdi = + let vmdomid = vm_of_domid (domid_of_uuid ~xs (uuid_of_string vm)) in + activate ~task ~_vm:vm ~vmdomid ~dp ~sr ~vdi + let deactivate = deactivate let dp_destroy = dp_destroy @@ -504,10 +507,11 @@ let with_disk ~xc ~xs task disk write f = (fun () -> let frontend_domid = this_domid ~xs in let frontend_vm = get_uuid ~xc frontend_domid |> Uuidx.to_string in - let vdi = - attach_and_activate ~xc ~xs task frontend_vm dp sr vdi write + let attached_vdi = attach ~xc ~xs task frontend_vm dp sr vdi write in + activate ~xc ~xs task frontend_vm dp sr vdi ; + let device = + create_vbd_frontend ~xc ~xs task frontend_domid attached_vdi in - let device = create_vbd_frontend ~xc ~xs task frontend_domid vdi in finally (fun () -> match device with @@ -3530,11 +3534,14 @@ module VBD = struct let vdi_attach_path vbd = Printf.sprintf "/xapi/%s/private/vdis/%s" (fst vbd.id) (snd vbd.id) - let attach_and_activate task xc xs frontend_domid vbd vdi = - let vdi = - match vdi with - | None -> - (* XXX: do something better with CDROMs *) + type attachment_status = Attached of attached_vdi | PathToAttach of string + + (* For vdis that are None or local, return Attached attached_vdi, otherwise return PathToAttach path *) + let attachment_status_of_vdi xs vdi = + match vdi with + | None -> + (* XXX: do something better with CDROMs *) + Attached { domid= this_domid ~xs ; attach_info= @@ -3547,7 +3554,8 @@ module VBD = struct ] } } - | Some (Local path) -> + | Some (Local path) -> + Attached { domid= this_domid ~xs ; attach_info= @@ -3560,17 +3568,34 @@ module VBD = struct ] } } - | Some (VDI path) -> + | Some (VDI path) -> + PathToAttach path + + let attach' task xc xs frontend_domid vbd vdi = + let vdi = + match attachment_status_of_vdi xs vdi with + | Attached attached_vdi -> + attached_vdi + | PathToAttach path -> let sr, vdi = Storage.get_disk_by_name task path in let dp = Storage.id_of (string_of_int frontend_domid) vbd.id in let vm = fst vbd.id in - Storage.attach_and_activate ~xc ~xs task vm dp sr vdi - (vbd.mode = ReadWrite) + Storage.attach ~xc ~xs task vm dp sr vdi (vbd.mode = ReadWrite) in xs.Xs.write (vdi_attach_path vbd) (vdi |> rpc_of attached_vdi |> Jsonrpc.to_string) ; vdi + let activate' task xc xs frontend_domid vbd vdi = + match attachment_status_of_vdi xs vdi with + | Attached _ -> + () + | PathToAttach path -> + let sr, vdi = Storage.get_disk_by_name task path in + let dp = Storage.id_of (string_of_int frontend_domid) vbd.id in + let vm = fst vbd.id in + Storage.activate ~xc ~xs task vm dp sr vdi + let frontend_domid_of_device device = device.Device_common.frontend.Device_common.domid @@ -3649,157 +3674,217 @@ module VBD = struct let vdi_path_of_device ~xs device = Device_common.backend_path_of_device ~xs device ^ "/vdi" - let plug task vm vbd = + let attach task vm vbd = (* If the vbd isn't listed as "active" then we don't automatically plug this - one in *) - if not (get_active vm vbd) then - debug "VBD %s.%s is not active: not plugging into VM" (fst vbd.Vbd.id) - (snd vbd.Vbd.id) - else - on_frontend - (fun xc xs frontend_domid domain_type -> - if vbd.backend = None && domain_type <> Vm.Domain_HVM then - info - "VM = %s; an empty CDROM drive on PV and PVinPVH guests is \ - simulated by unplugging the whole drive" - vm - else - let vdi = - attach_and_activate task xc xs frontend_domid vbd vbd.backend - in - let params, xenstore_data, extra_keys = - params_of_backend vdi.attach_info - in - let new_keys = - List.map (fun (k, v) -> ("sm-data/" ^ k, v)) xenstore_data - @ extra_keys - in - let extra_backend_keys = - List.fold_left - (fun acc (k, v) -> (k, v) :: List.remove_assoc k acc) - vbd.extra_backend_keys new_keys - in - let kind = device_kind_of ~xs vbd in - (* Remember the VBD id with the device *) - let vbd_id = (_device_id kind, id_of vbd) in - (* Remember the VDI with the device (for later deactivation) *) - let vdi_id = - (_vdi_id, vbd.backend |> rpc_of backend |> Jsonrpc.to_string) - in - let dp_id = - (_dp_id, Storage.id_of (string_of_int frontend_domid) vbd.Vbd.id) - in - let x = - { - Device.Vbd.mode= - ( match vbd.mode with - | ReadOnly -> - Device.Vbd.ReadOnly - | ReadWrite -> - Device.Vbd.ReadWrite - ) - ; device_number= vbd.position - ; phystype= Device.Vbd.Phys - ; params - ; dev_type= - ( match vbd.ty with - | CDROM -> - Device.Vbd.CDROM - | Disk -> - Device.Vbd.Disk - | Floppy -> - Device.Vbd.Floppy + one in *) + let attached_vdi = + if not (get_active vm vbd) then ( + debug "VBD %s.%s is not active: not plugging into VM" (fst vbd.Vbd.id) + (snd vbd.Vbd.id) ; + None + ) else + on_frontend + (fun xc xs frontend_domid domain_type -> + if vbd.backend = None && domain_type <> Vm.Domain_HVM then ( + info + "VM = %s; an empty CDROM drive on PV and PVinPVH guests is \ + simulated by unplugging the whole drive" + vm ; + None + ) else + Some (attach' task xc xs frontend_domid vbd vbd.backend) + ) + vm + in + match attached_vdi with + | None -> + () + | Some vdi -> + (* Record the attached_vdi so it can be used in activate *) + let _ = + DB.update_exn vm (fun vm_t -> + Some + VmExtra. + { + persistent= + { + vm_t.VmExtra.persistent with + attached_vdis= + (vbd.Vbd.id, vdi) + :: List.remove_assoc vbd.Vbd.id + vm_t.persistent.attached_vdis + } + } + ) + in + () + + let cleanup_attached_vdis vm vbd_id = + let _ = + DB.update_exn vm (fun vm_t -> + let remaining_vdis = + List.remove_assoc vbd_id vm_t.persistent.attached_vdis + in + Some + {persistent= {vm_t.persistent with attached_vdis= remaining_vdis}} + ) + in + () + + let activate task vm vbd = + let vmextra = DB.read_exn vm in + match List.assoc_opt vbd.id vmextra.persistent.attached_vdis with + | None -> + debug "No attached_vdi info, so not activating" + | Some vdi -> + finally + (fun () -> + on_frontend + (fun xc xs frontend_domid domain_type -> + activate' task xc xs frontend_domid vbd vbd.backend ; + let params, xenstore_data, extra_keys = + params_of_backend vdi.attach_info + in + let new_keys = + List.map (fun (k, v) -> ("sm-data/" ^ k, v)) xenstore_data + @ extra_keys + in + let extra_backend_keys = + List.fold_left + (fun acc (k, v) -> (k, v) :: List.remove_assoc k acc) + vbd.extra_backend_keys new_keys + in + let kind = device_kind_of ~xs vbd in + (* Remember the VBD id with the device *) + let vbd_id = (_device_id kind, id_of vbd) in + (* Remember the VDI with the device (for later deactivation) *) + let vdi_id = + (_vdi_id, vbd.backend |> rpc_of backend |> Jsonrpc.to_string) + in + let dp_id = + ( _dp_id + , Storage.id_of (string_of_int frontend_domid) vbd.Vbd.id ) - ; unpluggable= vbd.unpluggable - ; protocol= None - ; kind - ; extra_backend_keys - ; extra_private_keys= - dp_id :: vdi_id :: vbd_id :: vbd.extra_private_keys - ; backend_domid= vdi.domid - } - in - let dev = - with_tracing ~task ~name:"VBD_device_add" @@ fun () -> - Xenops_task.with_subtask task - (Printf.sprintf "Vbd.add %s" (id_of vbd)) - (fun () -> - Device.Vbd.add task ~xc ~xs - ~hvm:(domain_type = Vm.Domain_HVM) - x frontend_domid - ) - in - (* We store away the disk so we can implement VBD.stat *) - with_tracing ~task ~name:"VBD_xs_write" (fun () -> - Option.iter - (fun d -> - xs.Xs.write - (vdi_path_of_device ~xs dev) - (d |> rpc_of disk |> Jsonrpc.to_string) - ) - vbd.backend - ) ; - with_tracing ~task ~name:"VBD_attach_qemu" @@ fun () -> - (* NB now the frontend position has been resolved *) - let open Device_common in - let device_number = - dev.frontend.devid |> Device_number.of_xenstore_key - in - let qemu_domid = this_domid ~xs in - let qemu_frontend = - let maybe_create_vbd_frontend () = - let index = Device_number.disk device_number in - match vbd.Vbd.backend with - | None -> - Some (index, Empty) - | Some _ -> - Some (index, create_vbd_frontend ~xc ~xs task qemu_domid vdi) - in - match (device_number :> Device_number.bus_type * int * int) with - | Ide, n, _ when 0 <= n && n < 4 -> - maybe_create_vbd_frontend () - | Floppy, n, _ when 0 <= n && n < 2 -> - maybe_create_vbd_frontend () - | Ide, n, _ -> - D.warn - "qemu_frontend: Ide supports device numbers between 0 and \ - 3, but got: %i" - n ; - None - | Floppy, n, _ -> - D.warn - "qemu_frontend: Floppy supports device numbers between 0 \ - and 1, but got: %i" - n ; - None - | (Xen | Scsi), _, _ -> - None - in - (* Remember what we've just done *) - (* Dom0 doesn't have a vm_t - we don't need this currently, but when - we have storage driver domains, we will. Also this causes the - SMRT tests to fail, as they demand the loopback VBDs *) - Option.iter - (fun q -> - let _ = - DB.update_exn vm (fun vm_t -> - Some - VmExtra. - { - persistent= + in + let x = + { + Device.Vbd.mode= + ( match vbd.mode with + | ReadOnly -> + Device.Vbd.ReadOnly + | ReadWrite -> + Device.Vbd.ReadWrite + ) + ; device_number= vbd.position + ; phystype= Device.Vbd.Phys + ; params + ; dev_type= + ( match vbd.ty with + | CDROM -> + Device.Vbd.CDROM + | Disk -> + Device.Vbd.Disk + | Floppy -> + Device.Vbd.Floppy + ) + ; unpluggable= vbd.unpluggable + ; protocol= None + ; kind + ; extra_backend_keys + ; extra_private_keys= + dp_id :: vdi_id :: vbd_id :: vbd.extra_private_keys + ; backend_domid= vdi.domid + } + in + let dev = + with_tracing ~task ~name:"VBD_activate_add" @@ fun () -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.add %s" (id_of vbd)) + (fun () -> + Device.Vbd.add task ~xc ~xs + ~hvm:(domain_type = Vm.Domain_HVM) + x frontend_domid + ) + in + (* We store away the disk so we can implement VBD.stat *) + ( with_tracing ~task ~name:"VBD_activate_xs_write" @@ fun () -> + Option.iter + (fun d -> + xs.Xs.write + (vdi_path_of_device ~xs dev) + (d |> rpc_of disk |> Jsonrpc.to_string) + ) + vbd.backend + ) ; + with_tracing ~task ~name:"VBD_activate_qemu" @@ fun () -> + (* NB now the frontend position has been resolved *) + let open Device_common in + let device_number = + dev.frontend.devid |> Device_number.of_xenstore_key + in + let qemu_domid = this_domid ~xs in + let qemu_frontend = + let maybe_create_vbd_frontend () = + let index = Device_number.disk device_number in + match vbd.Vbd.backend with + | None -> + Some (index, Empty) + | Some _ -> + Some + ( index + , create_vbd_frontend ~xc ~xs task qemu_domid vdi + ) + in + match + (device_number :> Device_number.bus_type * int * int) + with + | Ide, n, _ when 0 <= n && n < 4 -> + maybe_create_vbd_frontend () + | Floppy, n, _ when 0 <= n && n < 2 -> + maybe_create_vbd_frontend () + | Ide, n, _ -> + D.warn + "qemu_frontend: Ide supports device numbers between 0 \ + and 3, but got: %i" + n ; + None + | Floppy, n, _ -> + D.warn + "qemu_frontend: Floppy supports device numbers between \ + 0 and 1, but got: %i" + n ; + None + | (Xen | Scsi), _, _ -> + None + in + (* Remember what we've just done *) + (* Dom0 doesn't have a vm_t - we don't need this currently, but when + we have storage driver domains, we will. Also this causes the + SMRT tests to fail, as they demand the loopback VBDs *) + Option.iter + (fun q -> + let _ = + DB.update_exn vm (fun vm_t -> + Some + VmExtra. { - vm_t.VmExtra.persistent with - qemu_vbds= - (vbd.Vbd.id, q) :: vm_t.persistent.qemu_vbds + persistent= + { + vm_t.VmExtra.persistent with + qemu_vbds= + (vbd.Vbd.id, q) + :: vm_t.persistent.qemu_vbds + } } - } + ) + in + () ) - in - () + qemu_frontend ) - qemu_frontend - ) - vm + vm + ) + (fun () -> cleanup_attached_vdis vm vbd.id) let unplug task vm vbd force = with_xc_and_xs (fun xc xs -> @@ -3869,13 +3954,13 @@ module VBD = struct finally (fun () -> with_tracing ~task ~name:"VBD_device_release" (fun () -> - Option.iter - (fun dev -> - Xenops_task.with_subtask task - (Printf.sprintf "Vbd.release %s" (id_of vbd)) - (fun () -> Device.Vbd.release task ~xc ~xs dev) - ) - dev + Option.iter + (fun dev -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.release %s" (id_of vbd)) + (fun () -> Device.Vbd.release task ~xc ~xs dev) + ) + dev ) ; (* If we have a qemu frontend, detach this too. *) with_tracing ~task ~name:"VBD_detach_qemu" @@ fun () -> @@ -3925,15 +4010,15 @@ module VBD = struct let insert task vm vbd d = on_frontend (fun xc xs frontend_domid domain_type -> - if domain_type <> Vm.Domain_HVM then - plug task vm {vbd with backend= Some d} - else + if domain_type <> Vm.Domain_HVM then ( + attach task vm {vbd with backend= Some d} ; + activate task vm {vbd with backend= Some d} + ) else let (device : Device_common.device) = device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd) in - let vdi = - attach_and_activate task xc xs frontend_domid vbd (Some d) - in + let vdi = attach' task xc xs frontend_domid vbd (Some d) in + activate' task xc xs frontend_domid vbd (Some d) ; let params, xenstore_data, _ = params_of_backend vdi.attach_info in let phystype = Device.Vbd.Phys in (* We store away the disk so we can implement VBD.stat *) From 1a46f33be7683f982200a1081a80c1cf3357dda1 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 21 Mar 2025 11:17:33 +0000 Subject: [PATCH 067/492] CP-53555: Split unplug atomic into deactivate/detach This consists of two parts, splitting the unplug function into functionally equivalent deactivate and detach functions, and splitting the VBD_unplug atomic itself's behaviour into two new atomics: VBD_deactivate and VBD_detach If the xenopsd_vbd_plug_unplug_legacy flag is true, the only difference will be that VBD_unplug calls deactivate and detach sequentially instead of unplug If xenopsd_vbd_plug_unplug_legacy is false, the VBD_deactivate and VBD_detach atomics will be used in place of VBD_unplug in all places that it is used. This should still be functionally equivalent. The purpose of this change is to allow optimisations in this area as there are some situations where they do not need to be called at the same time. For example we could skip detaching on reboot and only deactivate and activate again reducing reboot time. Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/xenops_server.ml | 36 +++++++++- ocaml/xenopsd/lib/xenops_server_plugin.ml | 4 +- ocaml/xenopsd/lib/xenops_server_simulator.ml | 4 +- ocaml/xenopsd/lib/xenops_server_skeleton.ml | 4 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 71 ++++++++++++++++---- 5 files changed, 99 insertions(+), 20 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 60bd162f24c..bc55feec3ed 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -130,6 +130,8 @@ type atomic = | VBD_epoch_end of (Vbd.id * disk) | VBD_set_qos of Vbd.id | VBD_unplug of Vbd.id * bool + | VBD_deactivate of Vbd.id * bool + | VBD_detach of Vbd.id | VBD_insert of Vbd.id * disk | VBD_set_active of Vbd.id * bool | VM_remove of Vm.id @@ -211,6 +213,10 @@ let rec name_of_atomic = function "VBD_set_qos" | VBD_unplug _ -> "VBD_unplug" + | VBD_deactivate _ -> + "VBD_deactivate" + | VBD_detach _ -> + "VBD_detach" | VBD_insert _ -> "VBD_insert" | VBD_set_active _ -> @@ -1600,6 +1606,14 @@ let vbd_plug vbd_id = serial_of "VBD.attach_and_activate" ~id:(VBD_DB.vm_of vbd_id) (VBD_attach vbd_id) (VBD_activate vbd_id) [] +let vbd_unplug vbd_id force = + if !xenopsd_vbd_plug_unplug_legacy then + VBD_unplug (vbd_id, force) + else + serial_of "VBD.deactivate_and_detach" ~id:(VBD_DB.vm_of vbd_id) + (VBD_deactivate (vbd_id, force)) + (VBD_detach vbd_id) [] + let rec atomics_of_operation = function | VM_start (id, force) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in @@ -1688,7 +1702,7 @@ let rec atomics_of_operation = function ] ; parallel_concat "Devices.unplug" ~id [ - List.map (fun vbd -> VBD_unplug (vbd.Vbd.id, true)) vbds + List.map (fun vbd -> vbd_unplug vbd.Vbd.id true) vbds ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis ] @@ -1847,7 +1861,7 @@ let rec atomics_of_operation = function | VBD_hotplug id -> [VBD_set_active (id, true); vbd_plug id] | VBD_hotunplug (id, force) -> - [VBD_unplug (id, force); VBD_set_active (id, false)] + [vbd_unplug id force; VBD_set_active (id, false)] | VIF_hotplug id -> [VIF_set_active (id, true); VIF_plug id] | VIF_hotunplug (id, force) -> @@ -2065,8 +2079,22 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) | VBD_unplug (id, force) -> debug "VBD.unplug %s" (VBD_DB.string_of_id id) ; finally - (fun () -> B.VBD.unplug t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force) + (fun () -> + B.VBD.deactivate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force ; + B.VBD.detach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) + ) + (fun () -> VBD_DB.signal id) + | VBD_deactivate (id, force) -> + debug "VBD.deactivate %s" (VBD_DB.string_of_id id) ; + finally + (fun () -> + B.VBD.deactivate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force + ) (fun () -> VBD_DB.signal id) + | VBD_detach id -> + debug "VBD.detach %s" (VBD_DB.string_of_id id) ; + B.VBD.detach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + VBD_DB.signal id | VBD_insert (id, disk) -> ( (* NB this is also used to "refresh" ie signal a qemu that it should re-open a device, useful for when a physical CDROM is inserted into the @@ -2481,6 +2509,8 @@ and trigger_cleanup_after_failure_atom op t = | VBD_epoch_end (id, _) | VBD_set_qos id | VBD_unplug (id, _) + | VBD_deactivate (id, _) + | VBD_detach id | VBD_insert (id, _) -> immediate_operation dbg (fst id) (VBD_check_state id) | VIF_plug id diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index 7b8fa8379e4..1a52749a9f3 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -211,7 +211,9 @@ module type S = sig val activate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit - val unplug : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit + val deactivate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit + + val detach : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit val insert : Xenops_task.task_handle -> Vm.id -> Vbd.t -> disk -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index 16680682180..f8c0afab8ab 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -677,7 +677,9 @@ module VBD = struct let activate _ (_vm : Vm.id) (_vbd : Vbd.t) = () - let unplug _ vm vbd _ = with_lock m (remove_vbd vm vbd) + let deactivate _ vm vbd _ = with_lock m (remove_vbd vm vbd) + + let detach _ _vm _vbd = () let insert _ _vm _vbd _disk = () diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index 688624e3f8a..2055837c47c 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -149,7 +149,9 @@ module VBD = struct let activate _ _ _ = unimplemented "VBD.activate" - let unplug _ _ _ _ = unimplemented "VBD.unplug" + let deactivate _ _ _ _ = unimplemented "VBD.deactivate" + + let detach _ _ _ = unimplemented "VBD.detach" let insert _ _ _ _ = unimplemented "VBD.insert" diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index f4731a0cf25..90a38d821af 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3886,22 +3886,22 @@ module VBD = struct ) (fun () -> cleanup_attached_vdis vm vbd.id) - let unplug task vm vbd force = + let deactivate task vm vbd force = with_xc_and_xs (fun xc xs -> try (* On destroying the datapath - 1. if the device has already been shutdown and deactivated (as in - suspend) we must call DP.destroy here to avoid leaks + 1. if the device has already been shutdown and deactivated (as in + suspend) we must call DP.destroy here to avoid leaks - 2. if the device is successfully shutdown here then we must call - DP.destroy because no-one else will + 2. if the device is successfully shutdown here then we must call + DP.destroy because no-one else will - 3. if the device shutdown is rejected then we should leave the DP - alone and rely on the event thread calling us again later. *) + 3. if the device shutdown is rejected then we should leave the DP + alone and rely on the event thread calling us again later. *) let domid = domid_of_uuid ~xs (uuid_of_string vm) in (* If the device is gone then we don't need to shut it down but we do - need to free any storage resources. *) + need to free any storage resources. *) let dev = try Some (device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd)) @@ -3939,7 +3939,7 @@ module VBD = struct vm (id_of vbd) ; (* this happens on normal shutdown too *) (* Case (1): success; Case (2): success; Case (3): an exception is - thrown *) + thrown *) with_tracing ~task ~name:"VBD_device_shutdown" @@ fun () -> Xenops_task.with_subtask task (Printf.sprintf "Vbd.clean_shutdown %s" (id_of vbd)) @@ -3950,7 +3950,7 @@ module VBD = struct ) dev ; (* We now have a shutdown device but an active DP: we should destroy - the DP if the backend is of type VDI *) + the DP if the backend is of type VDI *) finally (fun () -> with_tracing ~task ~name:"VBD_device_release" (fun () -> @@ -3994,11 +3994,14 @@ module VBD = struct () ) (fun () -> - with_tracing ~task ~name:"VBD_dp_destroy" @@ fun () -> + with_tracing ~task ~name:"VBD_deactivate" @@ fun () -> + let vmid = Storage.vm_of_domid domid in match (domid, backend) with - | Some x, None | Some x, Some (VDI _) -> - Storage.dp_destroy task - (Storage.id_of (string_of_int x) vbd.Vbd.id) + | Some x, Some (VDI path) -> + let sr, vdi = Storage.get_disk_by_name task path in + let dp = Storage.id_of (string_of_int x) vbd.id in + Storage.deactivate task dp sr vdi vmid + (* We don't need to detach Local or CDROM *) | _ -> () ) @@ -4007,6 +4010,46 @@ module VBD = struct raise (Xenopsd_error (Device_detach_rejected ("VBD", id_of vbd, s))) ) + let detach task vm vbd = + with_xc_and_xs (fun xc xs -> + let domid = domid_of_uuid ~xs (uuid_of_string vm) in + let dev = + try + Some (device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd)) + with + | Xenopsd_error (Does_not_exist (_, _)) -> + debug "VM = %s; VBD = %s; Ignoring missing domain" vm (id_of vbd) ; + None + | Xenopsd_error Device_not_connected -> + debug "VM = %s; VBD = %s; Ignoring missing device" vm (id_of vbd) ; + None + in + let backend = + match dev with + | None -> + None + | Some dv -> ( + match + Rpcmarshal.unmarshal typ_of_backend + (Device.Generic.get_private_key ~xs dv _vdi_id + |> Jsonrpc.of_string + ) + with + | Ok x -> + x + | Error (`Msg m) -> + internal_error "Failed to unmarshal VBD backend: %s" m + ) + in + with_tracing ~task ~name:"VBD_dp_destroy" @@ fun () -> + match (domid, backend) with + | Some x, None | Some x, Some (VDI _) -> + Storage.dp_destroy task (Storage.id_of (string_of_int x) vbd.Vbd.id) + | _ -> + () + ) ; + cleanup_attached_vdis vm vbd.id + let insert task vm vbd d = on_frontend (fun xc xs frontend_domid domain_type -> From b02ef25e68dc27f655d713d47c927414836c0ce4 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 24 Mar 2025 16:48:28 +0000 Subject: [PATCH 068/492] Move two storage functions to DATA module `import_activate` and `get_nbd_server` is more like datapath functions so I am moving them into the `DATA` module whereas they were previoulsy in `DATA.MIRROR` module. Reserve `DATA.MIRROR` only for storage migrate functionalities that is implemented in the xapi layer but no in the storage layer. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 84 ++++++++------ ocaml/xapi-idl/storage/storage_skeleton.ml | 11 +- ocaml/xapi-storage-script/main.ml | 116 ++++++++++---------- ocaml/xapi/storage_migrate.ml | 4 +- ocaml/xapi/storage_mux.ml | 36 +++--- ocaml/xapi/storage_smapiv1.ml | 9 +- ocaml/xapi/storage_smapiv1_wrapper.ml | 86 +++++++-------- 7 files changed, 176 insertions(+), 170 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 34856e0a57b..f6e292b9f29 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1000,6 +1000,32 @@ module StorageAPI (R : RPC) = struct @-> returning result_p err ) + (** [import_activate dbg dp sr vdi vm] returns a server socket address to + which a fd can be passed via SCM_RIGHTS for mirroring purposes.*) + let import_activate = + declare "DATA.import_activate" [] + (dbg_p + @-> dp_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> returning sock_path_p err + ) + + (** [get_nbd_server dbg dp sr vdi vm] returns the address of a generic nbd + server that can be connected to. Depending on the backend, this will either + be a nbd server backed by tapdisk or qemu-dp. Note this is different + from [import_activate] as the returned server does not accept fds. *) + let get_nbd_server = + declare "DATA.get_nbd_server" [] + (dbg_p + @-> dp_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> returning sock_path_p err + ) + module MIRROR = struct let mirror_vm_p = Param.mk ~name:"mirror_vm" Vm.t @@ -1091,32 +1117,6 @@ module StorageAPI (R : RPC) = struct Param.mk ~name:"mirrors" TypeCombinators.(list (pair Mirror.(id, t))) in declare "DATA.MIRROR.list" [] (dbg_p @-> returning result_p err) - - (** [import_activate dbg dp sr vdi vm] returns a server socket address to - which a fd can be passed via SCM_RIGHTS for mirroring purposes.*) - let import_activate = - declare "DATA.MIRROR.import_activate" [] - (dbg_p - @-> dp_p - @-> sr_p - @-> vdi_p - @-> vm_p - @-> returning sock_path_p err - ) - - (** [get_nbd_server dbg dp sr vdi vm] returns the address of a generic nbd - server that can be connected to. Depending on the backend, this will either - be a nbd server backed by tapdisk or qemu-dp. Note this is different - from [import_activate] as the returned server does not accept fds. *) - let get_nbd_server = - declare "DATA.MIRROR.get_nbd_server" [] - (dbg_p - @-> dp_p - @-> sr_p - @-> vdi_p - @-> vm_p - @-> returning sock_path_p err - ) end end @@ -1207,12 +1207,6 @@ module type MIRROR = sig val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list - - val import_activate : - context -> dbg:debug_info -> dp:dp -> sr:sr -> vdi:vdi -> vm:vm -> sock_path - - val get_nbd_server : - context -> dbg:debug_info -> dp:dp -> sr:sr -> vdi:vdi -> vm:vm -> sock_path end module type Server_impl = sig @@ -1471,6 +1465,24 @@ module type Server_impl = sig -> verify_dest:bool -> Task.id + val import_activate : + context + -> dbg:debug_info + -> dp:dp + -> sr:sr + -> vdi:vdi + -> vm:vm + -> sock_path + + val get_nbd_server : + context + -> dbg:debug_info + -> dp:dp + -> sr:sr + -> vdi:vdi + -> vm:vm + -> sock_path + module MIRROR : MIRROR end @@ -1650,11 +1662,11 @@ module Server (Impl : Server_impl) () = struct Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~id ) ; S.DATA.MIRROR.list (fun dbg -> Impl.DATA.MIRROR.list () ~dbg) ; - S.DATA.MIRROR.import_activate (fun dbg dp sr vdi vm -> - Impl.DATA.MIRROR.import_activate () ~dbg ~dp ~sr ~vdi ~vm + S.DATA.import_activate (fun dbg dp sr vdi vm -> + Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm ) ; - S.DATA.MIRROR.get_nbd_server (fun dbg dp sr vdi vm -> - Impl.DATA.MIRROR.get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm + S.DATA.get_nbd_server (fun dbg dp sr vdi vm -> + Impl.DATA.get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm ) ; S.Policy.get_backend_vm (fun dbg vm sr vdi -> Impl.Policy.get_backend_vm () ~dbg ~vm ~sr ~vdi diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 4b5b23e6973..03f3741f513 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -154,6 +154,11 @@ let get_by_name ctx ~dbg ~name = u "get_by_name" module DATA = struct let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = u "DATA.copy" + let import_activate ctx ~dbg ~dp ~sr ~vdi ~vm = + u "DATA.MIRROR.import_activate" + + let get_nbd_server ctx ~dbg ~dp ~sr ~vdi ~vm = u "DATA.MIRROR.get_nbd_server" + module MIRROR = struct type context = unit @@ -179,12 +184,6 @@ module DATA = struct let receive_cancel ctx ~dbg ~id = u "DATA.MIRROR.receive_cancel" let list ctx ~dbg = u "DATA.MIRROR.list" - - let import_activate ctx ~dbg ~dp ~sr ~vdi ~vm = - u "DATA.MIRROR.import_activate" - - let get_nbd_server ctx ~dbg ~dp ~sr ~vdi ~vm = - u "DATA.MIRROR.get_nbd_server" end end diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 5910d65f28f..0f1e768bc50 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1789,59 +1789,59 @@ end module DATAImpl (M : META) = struct module VDI = VDIImpl (M) - module MIRROR = struct - let data_import_activate_impl dbg _dp sr vdi' vm' = - wrap - @@ - let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in - Attached_SRs.find sr >>>= fun sr -> - (* Discover the URIs using Volume.stat *) - VDI.stat ~dbg ~sr ~vdi >>>= fun response -> - ( match - List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys - with - | None -> - return response - | Some temporary -> - VDI.stat ~dbg ~sr ~vdi:temporary + let data_import_activate_impl dbg _dp sr vdi' vm' = + wrap + @@ + let vdi = Storage_interface.Vdi.string_of vdi' in + let domain = Storage_interface.Vm.string_of vm' in + Attached_SRs.find sr >>>= fun sr -> + (* Discover the URIs using Volume.stat *) + VDI.stat ~dbg ~sr ~vdi >>>= fun response -> + ( match + List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys + with + | None -> + return response + | Some temporary -> + VDI.stat ~dbg ~sr ~vdi:temporary + ) + >>>= fun response -> + choose_datapath response >>>= fun (rpc, datapath, uri) -> + if Datapath_plugins.supports_feature datapath _vdi_mirror_in then + return_data_rpc (fun () -> + Datapath_client.import_activate (rpc ~dbg) dbg uri domain ) - >>>= fun response -> - choose_datapath response >>>= fun (rpc, datapath, uri) -> - if Datapath_plugins.supports_feature datapath _vdi_mirror_in then - return_data_rpc (fun () -> - Datapath_client.import_activate (rpc ~dbg) dbg uri domain - ) - else - fail (Storage_interface.Errors.Unimplemented _vdi_mirror_in) - - let get_nbd_server_impl dbg _dp sr vdi' vm' = - wrap - @@ - let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in - VDI.vdi_attach_common dbg sr vdi domain >>>= function - | response -> ( - let _, _, _, nbds = - Storage_interface.implementations_of_backend - { - Storage_interface.implementations= - List.map convert_implementation - response.Xapi_storage.Data.implementations - } - in - match nbds with - | ({uri} as nbd) :: _ -> - info (fun m -> - m "%s qemu-dp nbd server address is %s" __FUNCTION__ uri - ) - >>= fun () -> - let socket, _export = Storage_interface.parse_nbd_uri nbd in - return socket - | _ -> - fail (backend_error "No nbd server found" []) - ) - end + else + fail (Storage_interface.Errors.Unimplemented _vdi_mirror_in) + + let get_nbd_server_impl dbg _dp sr vdi' vm' = + wrap + @@ + let vdi = Storage_interface.Vdi.string_of vdi' in + let domain = Storage_interface.Vm.string_of vm' in + VDI.vdi_attach_common dbg sr vdi domain >>>= function + | response -> ( + let _, _, _, nbds = + Storage_interface.implementations_of_backend + { + Storage_interface.implementations= + List.map convert_implementation + response.Xapi_storage.Data.implementations + } + in + match nbds with + | ({uri} as nbd) :: _ -> + info (fun m -> + m "%s qemu-dp nbd server address is %s" __FUNCTION__ uri + ) + >>= fun () -> + let socket, _export = Storage_interface.parse_nbd_uri nbd in + return socket + | _ -> + fail (backend_error "No nbd server found" []) + ) + + module MIRROR = struct end end (* Bind the implementations *) @@ -1905,14 +1905,12 @@ let bind ~volume_script_dir = S.DP.attach_info DP.dp_attach_info_impl ; let module DATA = DATAImpl (RuntimeMeta) in - S.DATA.MIRROR.get_nbd_server DATA.MIRROR.get_nbd_server_impl ; - S.DATA.MIRROR.import_activate DATA.MIRROR.data_import_activate_impl ; + S.DATA.get_nbd_server DATA.get_nbd_server_impl ; + S.DATA.import_activate DATA.data_import_activate_impl ; let u name _ = failwith ("Unimplemented: " ^ name) in S.get_by_name (u "get_by_name") ; S.VDI.get_by_name (u "VDI.get_by_name") ; - S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; - S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.UPDATES.get (u "UPDATES.get") ; S.SR.update_snapshot_info_dest (u "SR.update_snapshot_info_dest") ; S.DATA.MIRROR.list (u "DATA.MIRROR.list") ; @@ -1923,19 +1921,21 @@ let bind ~volume_script_dir = S.VDI.similar_content (u "VDI.similar_content") ; S.DATA.copy (u "DATA.copy") ; S.DP.stat_vdi (u "DP.stat_vdi") ; + S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; + S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; + S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.DP.create (u "DP.create") ; S.TASK.cancel (u "TASK.cancel") ; + S.TASK.list (u "TASK.list") ; S.VDI.attach (u "VDI.attach") ; S.VDI.attach2 (u "VDI.attach2") ; S.VDI.activate (u "VDI.activate") ; S.DATA.MIRROR.stat (u "DATA.MIRROR.stat") ; - S.TASK.list (u "TASK.list") ; S.VDI.get_url (u "VDI.get_url") ; S.DATA.MIRROR.start (u "DATA.MIRROR.start") ; S.Policy.get_backend_vm (u "Policy.get_backend_vm") ; - S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.SR.update_snapshot_info_src (u "SR.update_snapshot_info_src") ; S.DATA.MIRROR.stop (u "DATA.MIRROR.stop") ; Rpc_lwt.server S.implementation diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 8952f947993..27f91f036b6 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -1013,7 +1013,7 @@ let nbd_handler req s ?(vm = "0") sr vdi dp = let vm = Vm.of_string vm in let path = Storage_utils.transform_storage_exn (fun () -> - Local.DATA.MIRROR.import_activate "nbd" dp sr vdi vm + Local.DATA.import_activate "nbd" dp sr vdi vm ) in Http_svr.headers s (Http.http_200_ok () @ ["Transfer-encoding: nbd"]) ; @@ -1043,7 +1043,7 @@ let nbd_proxy req s vm sr vdi dp = let vm = Vm.of_string vm in let path = Storage_utils.transform_storage_exn (fun () -> - Local.DATA.MIRROR.get_nbd_server "nbd" dp sr vdi vm + Local.DATA.get_nbd_server "nbd" dp sr vdi vm ) in debug "%s got nbd server path %s" __FUNCTION__ path ; diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 9b071b86187..2c6884d48b5 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -739,6 +739,24 @@ module Mux = struct let copy () ~dbg = with_dbg ~name:"DATA.copy" ~dbg @@ fun dbg -> Storage_migrate.copy ~dbg + let import_activate () ~dbg ~dp ~sr ~vdi ~vm = + with_dbg ~name:"DATA.import_activate" ~dbg @@ fun di -> + info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) (s_of_vm vm) ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.import_activate (Debug_info.to_string di) dp sr vdi vm + + let get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm = + with_dbg ~name:"DATA.get_nbd_server" ~dbg @@ fun di -> + info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) (s_of_vm vm) ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.get_nbd_server (Debug_info.to_string di) dp sr vdi vm + module MIRROR = struct type context = unit @@ -803,24 +821,6 @@ module Mux = struct with_dbg ~name:"DATA.MIRROR.receive_cancel" ~dbg @@ fun di -> info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; Storage_migrate.receive_cancel ~dbg:di.log ~id - - let import_activate () ~dbg ~dp ~sr ~vdi ~vm = - with_dbg ~name:"DATA.MIRROR.import_activate" ~dbg @@ fun di -> - info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp - (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in - C.DATA.MIRROR.import_activate (Debug_info.to_string di) dp sr vdi vm - - let get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm = - with_dbg ~name:"DATA.MIRROR.get_nbd_server" ~dbg @@ fun di -> - info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp - (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in - C.DATA.MIRROR.get_nbd_server (Debug_info.to_string di) dp sr vdi vm end end diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 4373fdaae87..d459c0ee2b0 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1212,6 +1212,10 @@ module SMAPIv1 : Server_impl = struct let copy _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~url:_ ~dest:_ ~verify_dest:_ = assert false + let import_activate _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false + + let get_nbd_server _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false + module MIRROR = struct type context = unit @@ -1237,11 +1241,6 @@ module SMAPIv1 : Server_impl = struct let receive_finalize2 _context ~dbg:_ ~id:_ = assert false let receive_cancel _context ~dbg:_ ~id:_ = assert false - - let import_activate _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = - assert false - - let get_nbd_server _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false end end diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index f87bb9ffc4f..7672a4b61e6 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1142,6 +1142,47 @@ functor (s_of_vdi vdi) url (s_of_sr dest) ; Impl.DATA.copy context ~dbg ~sr ~vdi ~vm ~url ~dest + (* tapdisk supports three kind of nbd servers, the old style nbdserver, + the new style nbd server and a real nbd server. The old and new style nbd servers + are "special" nbd servers that accept fds passed via SCM_RIGHTS and handle + connection based on that fd. The real nbd server is a "normal" nbd server + that accepts nbd connections from nbd clients, and it does not support fd + passing. *) + let get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style = + info "%s DATA.get_nbd_server dbg:%s dp:%s sr:%s vdi:%s vm:%s" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; + let attach_info = DP.attach_info context ~dbg:"nbd" ~sr ~vdi ~dp ~vm in + match Storage_migrate.tapdisk_of_attach_info attach_info with + | Some tapdev -> + let minor = Tapctl.get_minor tapdev in + let pid = Tapctl.get_tapdisk_pid tapdev in + let path = + match style with + | `newstyle -> + Printf.sprintf "/var/run/blktap-control/nbdserver-new%d.%d" + pid minor + | `oldstyle -> + Printf.sprintf "/var/run/blktap-control/nbdserver%d.%d" pid + minor + | `real -> + Printf.sprintf "/var/run/blktap-control/nbd%d.%d" pid minor + in + debug "%s nbd server path is %s" __FUNCTION__ path ; + path + | None -> + raise + (Storage_interface.Storage_error + (Backend_error + (Api_errors.internal_error, ["No tapdisk attach info found"]) + ) + ) + + let import_activate context ~dbg ~dp ~sr ~vdi ~vm = + get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`oldstyle + + let get_nbd_server context ~dbg ~dp ~sr ~vdi ~vm = + get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`real + module MIRROR = struct type context = unit @@ -1189,51 +1230,6 @@ functor let receive_cancel context ~dbg ~id = info "DATA.MIRROR.receive_cancel dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_cancel context ~dbg ~id - - (* tapdisk supports three kind of nbd servers, the old style nbdserver, - the new style nbd server and a real nbd server. The old and new style nbd servers - are "special" nbd servers that accept fds passed via SCM_RIGHTS and handle - connection based on that fd. The real nbd server is a "normal" nbd server - that accepts nbd connections from nbd clients, and it does not support fd - passing. *) - let get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style = - info "%s DATA.MIRROR.get_nbd_server dbg:%s dp:%s sr:%s vdi:%s vm:%s" - __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let attach_info = - DP.attach_info context ~dbg:"nbd" ~sr ~vdi ~dp ~vm - in - match Storage_migrate.tapdisk_of_attach_info attach_info with - | Some tapdev -> - let minor = Tapctl.get_minor tapdev in - let pid = Tapctl.get_tapdisk_pid tapdev in - let path = - match style with - | `newstyle -> - Printf.sprintf "/var/run/blktap-control/nbdserver-new%d.%d" - pid minor - | `oldstyle -> - Printf.sprintf "/var/run/blktap-control/nbdserver%d.%d" pid - minor - | `real -> - Printf.sprintf "/var/run/blktap-control/nbd%d.%d" pid minor - in - debug "%s nbd server path is %s" __FUNCTION__ path ; - path - | None -> - raise - (Storage_interface.Storage_error - (Backend_error - ( Api_errors.internal_error - , ["No tapdisk attach info found"] - ) - ) - ) - - let import_activate context ~dbg ~dp ~sr ~vdi ~vm = - get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`oldstyle - - let get_nbd_server context ~dbg ~dp ~sr ~vdi ~vm = - get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`real end end From 00e6954f76cc2b0e0314d42e399d593e9afda902 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 24 Mar 2025 17:08:18 +0000 Subject: [PATCH 069/492] CP-54072: Remove some of the unmuxed functions There are several functions in storage_interface and hence storage_mux, such as `start`, `stop`, `list`. These functions are currently not multiplexed but just called directly into storage_migrate. In fact, they are unlikely to be multiplexed because they use the `State` module in storage migrate, which is a in memory hashtable in xapi, not accessible by xapi-storage-script. So remove them from the storage interface, and callers of these functions can call them from storage_migrate directly rather than going through the storage interface. None of these are remote functions so no need to worry about backwards compatibility. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 61 ---------- ocaml/xapi-idl/storage/storage_skeleton.ml | 11 -- ocaml/xapi-storage-cli/dune | 1 + ocaml/xapi-storage-cli/main.ml | 10 +- ocaml/xapi-storage-script/main.ml | 4 - ocaml/xapi/storage_access.ml | 2 +- ocaml/xapi/storage_migrate.ml | 126 +++++++++++--------- ocaml/xapi/storage_mux.ml | 26 ---- ocaml/xapi/storage_smapiv1.ml | 10 -- ocaml/xapi/storage_smapiv1_wrapper.ml | 18 --- ocaml/xapi/xapi_vm_migrate.ml | 11 +- 11 files changed, 82 insertions(+), 198 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index f6e292b9f29..d3e1448aba8 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1027,36 +1027,8 @@ module StorageAPI (R : RPC) = struct ) module MIRROR = struct - let mirror_vm_p = Param.mk ~name:"mirror_vm" Vm.t - - let copy_vm_p = Param.mk ~name:"copy_vm" Vm.t - - (** [start task sr vdi url sr2] creates a VDI in remote [url]'s [sr2] and - writes data synchronously. It returns the id of the VDI.*) - let start = - declare "DATA.MIRROR.start" [] - (dbg_p - @-> sr_p - @-> vdi_p - @-> dp_p - @-> mirror_vm_p - @-> copy_vm_p - @-> url_p - @-> dest_p - @-> verify_dest_p - @-> returning task_id_p err - ) - let id_p = Param.mk ~name:"id" Mirror.id - (** [stop task sr vdi] stops mirroring local [vdi] *) - let stop = - declare "DATA.MIRROR.stop" [] (dbg_p @-> id_p @-> returning unit_p err) - - let stat = - let result_p = Param.mk ~name:"result" Mirror.t in - declare "DATA.MIRROR.stat" [] (dbg_p @-> id_p @-> returning result_p err) - (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward compatibility with old xapis that call Remote.DATA.MIRROR.receive_start during SXM. @@ -1111,12 +1083,6 @@ module StorageAPI (R : RPC) = struct let receive_cancel = declare "DATA.MIRROR.receive_cancel" [] (dbg_p @-> id_p @-> returning unit_p err) - - let list = - let result_p = - Param.mk ~name:"mirrors" TypeCombinators.(list (pair Mirror.(id, t))) - in - declare "DATA.MIRROR.list" [] (dbg_p @-> returning result_p err) end end @@ -1164,23 +1130,6 @@ end module type MIRROR = sig type context = unit - val start : - context - -> dbg:debug_info - -> sr:sr - -> vdi:vdi - -> dp:dp - -> mirror_vm:vm - -> copy_vm:vm - -> url:string - -> dest:sr - -> verify_dest:bool - -> Task.id - - val stop : context -> dbg:debug_info -> id:Mirror.id -> unit - - val stat : context -> dbg:debug_info -> id:Mirror.id -> Mirror.t - val receive_start : context -> dbg:debug_info @@ -1205,8 +1154,6 @@ module type MIRROR = sig val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit - - val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list end module type Server_impl = sig @@ -1639,13 +1586,6 @@ module Server (Impl : Server_impl) () = struct S.DATA.copy (fun dbg sr vdi vm url dest verify_dest -> Impl.DATA.copy () ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest ) ; - S.DATA.MIRROR.start - (fun dbg sr vdi dp mirror_vm copy_vm url dest verify_dest -> - Impl.DATA.MIRROR.start () ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url - ~dest ~verify_dest - ) ; - S.DATA.MIRROR.stop (fun dbg id -> Impl.DATA.MIRROR.stop () ~dbg ~id) ; - S.DATA.MIRROR.stat (fun dbg id -> Impl.DATA.MIRROR.stat () ~dbg ~id) ; S.DATA.MIRROR.receive_start (fun dbg sr vdi_info id similar -> Impl.DATA.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar ) ; @@ -1661,7 +1601,6 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.receive_finalize2 (fun dbg id -> Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~id ) ; - S.DATA.MIRROR.list (fun dbg -> Impl.DATA.MIRROR.list () ~dbg) ; S.DATA.import_activate (fun dbg dp sr vdi vm -> Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm ) ; diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 03f3741f513..04a4bb1e85e 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -162,15 +162,6 @@ module DATA = struct module MIRROR = struct type context = unit - (** [start task sr vdi url sr2] creates a VDI in remote [url]'s [sr2] and - writes data synchronously. It returns the id of the VDI.*) - let start ctx ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest = - u "DATA.MIRROR.start" - - let stop ctx ~dbg ~id = u "DATA.MIRROR.stop" - - let stat ctx ~dbg ~id = u "DATA.MIRROR.stat" - let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = u "DATA.MIRROR.receive_start" @@ -182,8 +173,6 @@ module DATA = struct let receive_finalize2 ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize2" let receive_cancel ctx ~dbg ~id = u "DATA.MIRROR.receive_cancel" - - let list ctx ~dbg = u "DATA.MIRROR.list" end end diff --git a/ocaml/xapi-storage-cli/dune b/ocaml/xapi-storage-cli/dune index 624f2f727e1..c59c5c1fad4 100644 --- a/ocaml/xapi-storage-cli/dune +++ b/ocaml/xapi-storage-cli/dune @@ -5,6 +5,7 @@ xapi-idl xapi-idl.storage xapi-idl.storage.interface + xapi_internal re re.str rpclib.core diff --git a/ocaml/xapi-storage-cli/main.ml b/ocaml/xapi-storage-cli/main.ml index c64a4f6fcd9..536ea02608e 100644 --- a/ocaml/xapi-storage-cli/main.ml +++ b/ocaml/xapi-storage-cli/main.ml @@ -149,7 +149,7 @@ let string_of_file filename = let mirror_list common_opts = wrap common_opts (fun () -> - let list = Client.DATA.MIRROR.list dbg in + let list = Storage_migrate.list ~dbg in List.iter (fun (id, status) -> Printf.printf "%s" (string_of_mirror id status)) list @@ -323,9 +323,9 @@ let mirror_start common_opts sr vdi dp url dest verify_dest = let url = get_opt url "Need a URL" in let dest = get_opt dest "Need a destination SR" in let task = - Client.DATA.MIRROR.start dbg sr vdi dp mirror_vm copy_vm url - (Storage_interface.Sr.of_string dest) - verify_dest + Storage_migrate.start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url + ~dest:(Storage_interface.Sr.of_string dest) + ~verify_dest in Printf.printf "Task id: %s\n" task ) @@ -335,7 +335,7 @@ let mirror_stop common_opts id = wrap common_opts (fun () -> match id with | Some id -> - Client.DATA.MIRROR.stop dbg id + Storage_migrate.stop ~dbg ~id | None -> failwith "Need an ID" ) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 0f1e768bc50..29c321836c6 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1913,7 +1913,6 @@ let bind ~volume_script_dir = S.VDI.get_by_name (u "VDI.get_by_name") ; S.UPDATES.get (u "UPDATES.get") ; S.SR.update_snapshot_info_dest (u "SR.update_snapshot_info_dest") ; - S.DATA.MIRROR.list (u "DATA.MIRROR.list") ; S.TASK.stat (u "TASK.stat") ; S.DP.diagnostics (u "DP.diagnostics") ; S.TASK.destroy (u "TASK.destroy") ; @@ -1932,12 +1931,9 @@ let bind ~volume_script_dir = S.VDI.attach (u "VDI.attach") ; S.VDI.attach2 (u "VDI.attach2") ; S.VDI.activate (u "VDI.activate") ; - S.DATA.MIRROR.stat (u "DATA.MIRROR.stat") ; S.VDI.get_url (u "VDI.get_url") ; - S.DATA.MIRROR.start (u "DATA.MIRROR.start") ; S.Policy.get_backend_vm (u "Policy.get_backend_vm") ; S.SR.update_snapshot_info_src (u "SR.update_snapshot_info_src") ; - S.DATA.MIRROR.stop (u "DATA.MIRROR.stop") ; Rpc_lwt.server S.implementation let process_smapiv2_requests server txt = diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index 0aeed25125d..6eaee5a029e 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -439,7 +439,7 @@ let update_task ~__context id = let update_mirror ~__context id = try let dbg = Context.string_of_task __context in - let m = Client.DATA.MIRROR.stat dbg id in + let m = Storage_migrate.stat ~dbg ~id in if m.Mirror.failed then debug "Mirror %s has failed" id ; let task = get_mirror_task id in diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 27f91f036b6..023a984256e 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -401,6 +401,70 @@ module MigrateLocal = struct | e -> raise (Storage_error (Internal_error (Printexc.to_string e))) + let stop_internal ~dbg ~id = + (* Find the local VDI *) + let alm = State.find_active_local_mirror id in + match alm with + | Some alm -> + ( match alm.State.Send_state.remote_info with + | Some remote_info -> ( + let sr, vdi = State.of_mirror_id id in + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> + failwith + (Printf.sprintf "Local VDI %s not found" + (Storage_interface.Vdi.string_of vdi) + ) + in + let local_vdi = add_to_sm_config local_vdi "mirror" "null" in + let local_vdi = remove_from_sm_config local_vdi "base_mirror" in + (* Disable mirroring on the local machine *) + let snapshot = Local.VDI.snapshot dbg sr local_vdi in + Local.VDI.destroy dbg sr snapshot.vdi ; + (* Destroy the snapshot, if it still exists *) + let snap = + try + Some + (List.find + (fun x -> + List.mem_assoc "base_mirror" x.sm_config + && List.assoc "base_mirror" x.sm_config = id + ) + vdis + ) + with _ -> None + in + ( match snap with + | Some s -> + debug "Found snapshot VDI: %s" + (Storage_interface.Vdi.string_of s.vdi) ; + Local.VDI.destroy dbg sr s.vdi + | None -> + debug "Snapshot VDI already cleaned up" + ) ; + + let (module Remote) = + get_remote_backend remote_info.url remote_info.verify_dest + in + try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () + ) + | None -> + () + ) ; + State.remove_local_mirror id + | None -> + raise (Storage_interface.Storage_error (Does_not_exist ("mirror", id))) + + let stop ~dbg ~id = + try stop_internal ~dbg ~id with + | Storage_error (Backend_error (code, params)) + | Api_errors.Server_error (code, params) -> + raise (Storage_error (Backend_error (code, params))) + | e -> + raise e + let start ~task ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = SXM.info @@ -586,7 +650,7 @@ module MigrateLocal = struct in inner () ) ; - on_fail := (fun () -> Local.DATA.MIRROR.stop dbg mirror_id) :: !on_fail ; + on_fail := (fun () -> stop ~dbg ~id:mirror_id) :: !on_fail ; (* Copy the snapshot to the remote *) let new_parent = Storage_task.with_subtask task "copy" (fun () -> @@ -615,62 +679,6 @@ module MigrateLocal = struct perform_cleanup_actions !on_fail ; raise e - let stop ~dbg ~id = - (* Find the local VDI *) - let alm = State.find_active_local_mirror id in - match alm with - | Some alm -> - ( match alm.State.Send_state.remote_info with - | Some remote_info -> ( - let sr, vdi = State.of_mirror_id id in - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in - let local_vdi = add_to_sm_config local_vdi "mirror" "null" in - let local_vdi = remove_from_sm_config local_vdi "base_mirror" in - (* Disable mirroring on the local machine *) - let snapshot = Local.VDI.snapshot dbg sr local_vdi in - Local.VDI.destroy dbg sr snapshot.vdi ; - (* Destroy the snapshot, if it still exists *) - let snap = - try - Some - (List.find - (fun x -> - List.mem_assoc "base_mirror" x.sm_config - && List.assoc "base_mirror" x.sm_config = id - ) - vdis - ) - with _ -> None - in - ( match snap with - | Some s -> - debug "Found snapshot VDI: %s" - (Storage_interface.Vdi.string_of s.vdi) ; - Local.VDI.destroy dbg sr s.vdi - | None -> - debug "Snapshot VDI already cleaned up" - ) ; - - let (module Remote) = - get_remote_backend remote_info.url remote_info.verify_dest - in - try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () - ) - | None -> - () - ) ; - State.remove_local_mirror id - | None -> - raise (Storage_interface.Storage_error (Does_not_exist ("mirror", id))) - let stat ~dbg:_ ~id = let recv_opt = State.find_active_receive_mirror id in let send_opt = State.find_active_local_mirror id in @@ -1061,6 +1069,9 @@ let nbd_proxy req s vm sr vdi dp = ) (fun () -> Unix.close control_fd) +let with_dbg ~name ~dbg f = + Debug_info.with_dbg ~with_thread:true ~module_name:__MODULE__ ~name ~dbg f + let with_task_and_thread ~dbg f = let task = Storage_task.add tasks dbg.Debug_info.log (fun task -> @@ -1097,6 +1108,7 @@ let copy ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = ) let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = + with_dbg ~name:__FUNCTION__ ~dbg @@ fun dbg -> with_task_and_thread ~dbg (fun task -> MigrateLocal.start ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 2c6884d48b5..0a90d55e05f 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -760,32 +760,6 @@ module Mux = struct module MIRROR = struct type context = unit - let start () ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest - = - with_dbg ~name:"DATA.MIRROR.start" ~dbg @@ fun di -> - info - "%s dbg:%s sr: %s vdi: %s dp:%s mirror_vm: %s copy_vm: %s url: %s \ - dest sr: %s verify_dest: %B" - __FUNCTION__ dbg (s_of_sr sr) (s_of_vdi vdi) dp (s_of_vm mirror_vm) - (s_of_vm copy_vm) url (s_of_sr dest) verify_dest ; - Storage_migrate.start ~dbg:di ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url - ~dest ~verify_dest - - let stop () ~dbg ~id = - with_dbg ~name:"DATA.MIRROR.stop" ~dbg @@ fun di -> - info "%s dbg:%s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.stop ~dbg:di.log ~id - - let list () ~dbg = - with_dbg ~name:"DATA.MIRROR.list" ~dbg @@ fun di -> - info "%s dbg: %s" __FUNCTION__ dbg ; - Storage_migrate.list ~dbg:di.log - - let stat () ~dbg ~id = - with_dbg ~name:"DATA.MIRROR.stat" ~dbg @@ fun di -> - info "%s dbg: %s mirror_id: %s" __FUNCTION__ di.log id ; - Storage_migrate.stat ~dbg:di.log ~id - let receive_start () ~dbg ~sr ~vdi_info ~id ~similar = with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun di -> info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s" diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index d459c0ee2b0..a1beed1afe8 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1219,16 +1219,6 @@ module SMAPIv1 : Server_impl = struct module MIRROR = struct type context = unit - let start _context ~dbg:_ ~sr:_ ~vdi:_ ~dp:_ ~mirror_vm:_ ~copy_vm:_ - ~url:_ ~dest:_ ~verify_dest:_ = - assert false - - let stop _context ~dbg:_ ~id:_ = assert false - - let list _context ~dbg:_ = assert false - - let stat _context ~dbg:_ ~id:_ = assert false - let receive_start _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 7672a4b61e6..dd8aad1d13f 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1186,24 +1186,6 @@ functor module MIRROR = struct type context = unit - let start context ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest = - info "DATA.MIRROR.start dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg - (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; - Impl.DATA.MIRROR.start context ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm - ~url ~dest - - let stop context ~dbg ~id = - info "DATA.MIRROR.stop dbg:%s id:%s" dbg id ; - Impl.DATA.MIRROR.stop context ~dbg ~id - - let list context ~dbg = - info "DATA.MIRROR.active dbg:%s" dbg ; - Impl.DATA.MIRROR.list context ~dbg - - let stat context ~dbg ~id = - info "DATA.MIRROR.stat dbg:%s id:%s" dbg id ; - Impl.DATA.MIRROR.stat context ~dbg ~id - let receive_start context ~dbg ~sr ~vdi_info ~id ~similar = info "DATA.MIRROR.receive_start dbg:%s sr:%s id:%s similar:[%s]" dbg (s_of_sr sr) id diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index b09adef7f9d..1d4e7377b66 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1035,8 +1035,9 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far (Vm.string_of vconf.copy_vm) ; (* Layering violation!! *) ignore (Storage_access.register_mirror __context id) ; - SMAPI.DATA.MIRROR.start dbg vconf.sr vconf.location new_dp - vconf.mirror_vm vconf.copy_vm remote.sm_url dest_sr is_intra_pool + Storage_migrate.start ~dbg ~sr:vconf.sr ~vdi:vconf.location ~dp:new_dp + ~mirror_vm:vconf.mirror_vm ~copy_vm:vconf.copy_vm ~url:remote.sm_url + ~dest:dest_sr ~verify_dest:is_intra_pool in let mapfn x = let total = Int64.to_float total_size in @@ -1061,7 +1062,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far (None, vdi.vdi) ) else let mirrorid = task_result |> mirror_of_task dbg in - let m = SMAPI.DATA.MIRROR.stat dbg mirrorid in + let m = Storage_migrate.stat ~dbg ~id:mirrorid in (Some mirrorid, m.Mirror.dest_vdi) in so_far := Int64.add !so_far vconf.size ; @@ -1090,8 +1091,8 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far match mirror_id with | Some mid -> ignore (Storage_access.unregister_mirror mid) ; - let m = SMAPI.DATA.MIRROR.stat dbg mid in - (try SMAPI.DATA.MIRROR.stop dbg mid with _ -> ()) ; + let m = Storage_migrate.stat ~dbg ~id:mid in + (try Storage_migrate.stop ~dbg ~id:mid with _ -> ()) ; m.Mirror.failed | None -> false From 63d7ee3a75c61021e874c103da37c3e57ded5987 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 25 Mar 2025 16:52:03 +0000 Subject: [PATCH 070/492] CP-54072: Define new `MIRROR.send_start` function The `send_start` function is a subroutine inside the `Storage_migrate.start` function, which takes the mirror prepared by the `receive_start` and initiates mirroring to the remote VDI. This commit only defines the interface, which means this function is currently unused. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 74 +++++++++++++++++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 4 ++ ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/storage_mux.ml | 7 ++ ocaml/xapi/storage_smapiv1.ml | 5 ++ ocaml/xapi/storage_smapiv1_wrapper.ml | 8 +++ 6 files changed, 99 insertions(+) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index d3e1448aba8..311c9f2dfdf 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1027,8 +1027,43 @@ module StorageAPI (R : RPC) = struct ) module MIRROR = struct + let mirror_vm_p = Param.mk ~name:"mirror_vm" Vm.t + + let copy_vm_p = Param.mk ~name:"copy_vm" Vm.t + + let live_vm_p = Param.mk ~name:"live_vm" Vm.t + let id_p = Param.mk ~name:"id" Mirror.id + (** [send_start dbg dp task src_sr vdi mirror_vm mirror_id local_vdi copy_vm + live_vm url remote_mirror dest_sr verify_dest] + takes the remote mirror [remote_mirror] prepared by the destination host + and initiates the mirroring of [vdi] from the source *) + let send_start = + let recv_result_p = + Param.mk ~name:"recv_result" Mirror.mirror_receive_result + in + let local_vdi_p = Param.mk ~name:"local_vdi" vdi_info in + let src_sr_p = Param.mk ~name:"src_sr" Sr.t in + let dest_sr_p = Param.mk ~name:"dest_sr" Sr.t in + declare "DATA.MIRROR.send_start" [] + (dbg_p + @-> dp_p + @-> task_id_p + @-> src_sr_p + @-> vdi_p + @-> mirror_vm_p + @-> id_p + @-> local_vdi_p + @-> copy_vm_p + @-> live_vm_p + @-> url_p + @-> recv_result_p + @-> dest_sr_p + @-> verify_dest_p + @-> returning unit_p err + ) + (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward compatibility with old xapis that call Remote.DATA.MIRROR.receive_start during SXM. @@ -1130,6 +1165,24 @@ end module type MIRROR = sig type context = unit + val send_start : + context + -> dbg:debug_info + -> task_id:Task.id + -> dp:dp + -> sr:sr + -> vdi:vdi + -> mirror_vm:vm + -> mirror_id:Mirror.id + -> local_vdi:vdi_info + -> copy_vm:vm + -> live_vm:vm + -> url:string + -> remote_mirror:Mirror.mirror_receive_result + -> dest_sr:sr + -> verify_dest:bool + -> unit + val receive_start : context -> dbg:debug_info @@ -1586,6 +1639,27 @@ module Server (Impl : Server_impl) () = struct S.DATA.copy (fun dbg sr vdi vm url dest verify_dest -> Impl.DATA.copy () ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest ) ; + S.DATA.MIRROR.send_start + (fun + dbg + task_id + dp + sr + vdi + mirror_vm + mirror_id + local_vdi + copy_vm + live_vm + url + remote_mirror + dest_sr + verify_dest + -> + Impl.DATA.MIRROR.send_start () ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm + ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr + ~verify_dest + ) ; S.DATA.MIRROR.receive_start (fun dbg sr vdi_info id similar -> Impl.DATA.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar ) ; diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 04a4bb1e85e..27197b06c7c 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -162,6 +162,10 @@ module DATA = struct module MIRROR = struct type context = unit + let send_start ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id + ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = + u "DATA.MIRROR.send_start" + let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = u "DATA.MIRROR.receive_start" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 29c321836c6..eb63f132e98 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1920,6 +1920,7 @@ let bind ~volume_script_dir = S.VDI.similar_content (u "VDI.similar_content") ; S.DATA.copy (u "DATA.copy") ; S.DP.stat_vdi (u "DP.stat_vdi") ; + S.DATA.MIRROR.send_start (u "DATA.MIRROR.send_start") ; S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 0a90d55e05f..6614177e3e3 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -760,6 +760,13 @@ module Mux = struct module MIRROR = struct type context = unit + let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + + let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ + ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ + ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = + u "DATA.MIRROR.send_start" (* see storage_smapi{v1,v3}_migrate.ml *) + let receive_start () ~dbg ~sr ~vdi_info ~id ~similar = with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun di -> info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s" diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index a1beed1afe8..1616d1a65f9 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1219,6 +1219,11 @@ module SMAPIv1 : Server_impl = struct module MIRROR = struct type context = unit + let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ + ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ + ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = + assert false + let receive_start _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index dd8aad1d13f..8a9b053f509 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1186,6 +1186,14 @@ functor module MIRROR = struct type context = unit + let u x = + raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + + let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ + ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ + ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = + u "DATA.MIRROR.send_start" + let receive_start context ~dbg ~sr ~vdi_info ~id ~similar = info "DATA.MIRROR.receive_start dbg:%s sr:%s id:%s similar:[%s]" dbg (s_of_sr sr) id From 36d41aa24e582fece281b076b8b2bca807bd6e37 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 25 Mar 2025 17:35:59 +0000 Subject: [PATCH 071/492] CP-54072: Create template for storage_smapi{v1,v3}_migrate Just so that they type check, some of the functions are still unimplemented, and these functions are still unused. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_smapiv1_migrate.ml | 177 +++++++++++++++++++++++++ ocaml/xapi/storage_smapiv1_migrate.mli | 17 +++ ocaml/xapi/storage_smapiv3_migrate.ml | 39 ++++++ ocaml/xapi/storage_smapiv3_migrate.mli | 17 +++ 4 files changed, 250 insertions(+) create mode 100644 ocaml/xapi/storage_smapiv1_migrate.ml create mode 100644 ocaml/xapi/storage_smapiv1_migrate.mli create mode 100644 ocaml/xapi/storage_smapiv3_migrate.ml create mode 100644 ocaml/xapi/storage_smapiv3_migrate.mli diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml new file mode 100644 index 00000000000..83dd41d4972 --- /dev/null +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -0,0 +1,177 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = "storage_smapiv1_migrate" end) + +module Unixext = Xapi_stdext_unix.Unixext +open Storage_interface +open Storage_migrate_helper +module State = Storage_migrate_helper.State +module SXM = Storage_migrate_helper.SXM + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +module MIRROR : SMAPIv2_MIRROR = struct + type context = unit + + let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + + let send_start _ctx = u __FUNCTION__ + + let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = + let on_fail : (unit -> unit) list ref = ref [] in + let vdis = Local.SR.scan dbg sr in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in + let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in + try + let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in + let leaf = Local.VDI.create dbg sr vdi_info in + D.info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; + on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + (* dummy VDI is created so that the leaf VDI becomes a differencing disk, + useful for calling VDI.compose later on *) + let dummy = Local.VDI.snapshot dbg sr leaf in + on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; + D.debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ + (string_of_vdi_info dummy) ; + let _ : backend = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in + Local.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= vdi_info.virtual_size + ) + vdis + ) + with Not_found -> None + ) + ) + None similar + in + D.debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let parent = + match nearest with + | Some vdi -> + D.debug "Cloning VDI" ; + let vdi = add_to_sm_config vdi "base_mirror" id in + let vdi_clone = Local.VDI.clone dbg sr vdi in + D.debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> vdi_info.virtual_size then + let new_size = + Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size + in + D.debug "Resize local clone VDI to %Ld: result %Ld" + vdi_info.virtual_size new_size + ) ; + vdi_clone + | None -> + D.debug "Creating a blank remote VDI" ; + Local.VDI.create dbg sr vdi_info + in + D.debug "Parent disk content_id=%s" parent.content_id ; + State.add id + State.( + Recv_op + Receive_state. + { + sr + ; dummy_vdi= dummy.vdi + ; leaf_vdi= leaf.vdi + ; leaf_dp + ; parent_vdi= parent.vdi + ; remote_vdi= vdi_info.vdi + ; mirror_vm= vm + } + ) ; + let nearest_content_id = Option.map (fun x -> x.content_id) nearest in + Mirror.Vhd_mirror + { + Mirror.mirror_vdi= leaf + ; mirror_datapath= leaf_dp + ; copy_diffs_from= nearest_content_id + ; copy_diffs_to= parent.vdi + ; dummy_vdi= dummy.vdi + } + with e -> + List.iter + (fun op -> + try op () + with e -> + D.debug "Caught exception in on_fail: %s" (Printexc.to_string e) + ) + !on_fail ; + raise e + + let receive_start _ctx ~dbg ~sr ~vdi_info ~id ~similar = + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") + + let receive_start2 _ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm + + let receive_finalize _ctx ~dbg ~id = + let recv_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; + State.remove_receive_mirror id + + let receive_finalize2 _ctx ~dbg ~id = + let recv_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter + (fun r -> + SXM.info + "%s Mirror done. Compose on the dest sr %s parent %s and leaf %s" + __FUNCTION__ (Sr.string_of r.sr) + (Vdi.string_of r.parent_vdi) + (Vdi.string_of r.leaf_vdi) ; + Local.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + Local.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; + (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so + there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) + D.log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr r.dummy_vdi) ; + Local.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + ) + recv_state ; + State.remove_receive_mirror id + + let receive_cancel _ctx ~dbg ~id = + let receive_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter + (fun r -> + D.log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; + List.iter + (fun v -> + D.log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v) + ) + [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] + ) + receive_state ; + State.remove_receive_mirror id +end diff --git a/ocaml/xapi/storage_smapiv1_migrate.mli b/ocaml/xapi/storage_smapiv1_migrate.mli new file mode 100644 index 00000000000..d47b82cd86c --- /dev/null +++ b/ocaml/xapi/storage_smapiv1_migrate.mli @@ -0,0 +1,17 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +module MIRROR : SMAPIv2_MIRROR diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml new file mode 100644 index 00000000000..4cfcf1c831e --- /dev/null +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -0,0 +1,39 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = "storage_smapiv1_migrate" end) + +module Unixext = Xapi_stdext_unix.Unixext +module State = Storage_migrate_helper.State +module SXM = Storage_migrate_helper.SXM + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +module MIRROR : SMAPIv2_MIRROR = struct + type context = unit + + let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + + let send_start _ctx = u __FUNCTION__ + + let receive_start _ctx = u __FUNCTION__ + + let receive_start2 _ctx = u __FUNCTION__ + + let receive_finalize _ctx = u __FUNCTION__ + + let receive_finalize2 _ctx = u __FUNCTION__ + + let receive_cancel _ctx = u __FUNCTION__ +end diff --git a/ocaml/xapi/storage_smapiv3_migrate.mli b/ocaml/xapi/storage_smapiv3_migrate.mli new file mode 100644 index 00000000000..d47b82cd86c --- /dev/null +++ b/ocaml/xapi/storage_smapiv3_migrate.mli @@ -0,0 +1,17 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +module MIRROR : SMAPIv2_MIRROR From 7f96646d118bd3d4f98fdf01cf80a69af8d918df Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 4 Apr 2025 11:04:43 +0100 Subject: [PATCH 072/492] style: List.find/List.find_opt/s Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 28 ++++++++++++--------------- ocaml/xapi/storage_migrate_helper.ml | 4 ++-- ocaml/xapi/storage_migrate_helper.mli | 2 ++ 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 023a984256e..e9fa4f18481 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -411,12 +411,12 @@ module MigrateLocal = struct let sr, vdi = State.of_mirror_id id in let vdis = Local.SR.scan dbg sr in let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) + match List.find_opt (fun x -> x.vdi = vdi) vdis with + | None -> + failwith_fmt "Local VDI %s not found" + (Storage_interface.Vdi.string_of vdi) + | Some v -> + v in let local_vdi = add_to_sm_config local_vdi "mirror" "null" in let local_vdi = remove_from_sm_config local_vdi "base_mirror" in @@ -425,16 +425,12 @@ module MigrateLocal = struct Local.VDI.destroy dbg sr snapshot.vdi ; (* Destroy the snapshot, if it still exists *) let snap = - try - Some - (List.find - (fun x -> - List.mem_assoc "base_mirror" x.sm_config - && List.assoc "base_mirror" x.sm_config = id - ) - vdis - ) - with _ -> None + List.find_opt + (fun x -> + List.mem_assoc "base_mirror" x.sm_config + && List.assoc "base_mirror" x.sm_config = id + ) + vdis in ( match snap with | Some s -> diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index b7b1eb6c6f9..e924c208d8f 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -24,9 +24,9 @@ open Storage_interface open Xapi_stdext_pervasives.Pervasiveext open Xmlrpc_client -module State = struct - let failwith_fmt fmt = Printf.ksprintf failwith fmt +let failwith_fmt fmt = Printf.ksprintf failwith fmt +module State = struct module Receive_state = struct type t = { sr: Sr.t diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index 29753436c78..8ac0da552e2 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -14,6 +14,8 @@ module SXM : Debug.DEBUG +val failwith_fmt : ('a, unit, string, 'b) format4 -> 'a + module State : sig module Receive_state : sig type t = { From bedf26923c498c19a64160db150f8cc6edb21ee1 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 2 Apr 2025 15:53:08 +0100 Subject: [PATCH 073/492] xapi_vm_migrate: Avoid duplicate, overly-strict CBT check on VDIs There is already a call to `assert_can_migrate_vdis` present in `assert_can_migrate`, which checks that none of the VDIs that *are going to be moved* have CBT enabled. There is no need to additionally check that none of the VDIs *in general* have CBT enabled. Some clients, like XenOrchestra, will turn off CBT on VDIs and retry migration after getting the `VDI_CBT_ENABLED` error on live migration. Dropping this overly strict check allows not stripping CBT when VDI will not be moved (when it's on a shared SR). In addition, during rolling pool upgrades, disabling CBT is not allowed, hence the live migration operation wouldn't be able to continue. Avoiding the strict check fixes that as well. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vm_migrate.ml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index b0a7d17774d..fe4b2309c9e 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1778,14 +1778,6 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options let vbds = Db.VM.get_VBDs ~__context ~self:vm in let vms_vdis = List.filter_map (vdi_filter __context true) vbds in check_vdi_map ~__context vms_vdis vdi_map ; - (* Prevent SXM when the VM has a VDI on which changed block tracking is enabled *) - List.iter - (fun vconf -> - let vdi = vconf.vdi in - if Db.VDI.get_cbt_enabled ~__context ~self:vdi then - raise Api_errors.(Server_error (vdi_cbt_enabled, [Ref.string_of vdi])) - ) - vms_vdis ; (* operations required for migration *) let required_src_sr_operations = Smint.Feature.[Vdi_snapshot; Vdi_mirror] in let required_dst_sr_operations = @@ -1919,6 +1911,9 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options ) ) ; (* check_vdi_map above has already verified that all VDIs are in the vdi_map *) + (* Previously there was also a check that none of the VDIs have CBT enabled. + This is unnecessary, we only need to check that none of the VDIs that + *will be moved* have CBT enabled. *) assert_can_migrate_vdis ~__context ~vdi_map let assert_can_migrate_sender ~__context ~vm ~dest ~live:_ ~vdi_map:_ ~vif_map:_ From 31c2b3c5d46e52ed6af9b8263b2b42bd32650fbc Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 7 Apr 2025 10:44:47 +0800 Subject: [PATCH 074/492] Fixup Useless here, the local DB will be dropped soon as the joinner will switch to the remote DB of the new coordinator. And latest_synced_updates_applied will be set to `unknown in host.create in remote DB as default value. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_pool.ml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 832ae0df272..af8a46e7c5f 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1688,8 +1688,6 @@ let join_common ~__context ~master_address ~master_username ~master_password "Unable to set the write the new pool certificates to the disk : %s" (ExnHelper.string_of_exn e) ) ; - Db.Host.set_latest_synced_updates_applied ~__context ~self:me - ~value:`unknown ; (* this is where we try and sync up as much state as we can with the master. This is "best effort" rather than critical; if we fail part way through this then we carry From 2653f74ac32456f0be346ddd34d46aa11bfa6e74 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Mon, 7 Apr 2025 13:31:32 +0800 Subject: [PATCH 075/492] Update datamodel lifecycle VM_guest_metrics.services Signed-off-by: Changlei Li --- ocaml/idl/datamodel_lifecycle.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 10d2c76821e..9f4ce426aa0 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -110,7 +110,7 @@ let prototyped_of_field = function | "host", "last_software_update" -> Some "22.20.0" | "VM_guest_metrics", "services" -> - Some "25.10.0-next" + Some "25.14.0-next" | "VM_guest_metrics", "netbios_name" -> Some "24.28.0" | "VM", "groups" -> From 600e16694bddd20aae670316abed1a4b33ac608d Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 7 Apr 2025 17:17:40 +0800 Subject: [PATCH 076/492] CP-53802: Restore SSH service to default state in pool eject After being ejected from a pool, a new host obj will be created with default settings in DB. This commit configures SSH service in the ejected host to default state during pool eject. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_pool.ml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 832ae0df272..4b26769aec2 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -2045,6 +2045,19 @@ let eject_self ~__context ~host = control_domains_to_destroy with _ -> () ) ; + ( try + (* Restore console idle timeout *) + Xapi_host.set_console_idle_timeout ~__context ~self:host + ~value:0L ; + (* Restore SSH service to default state *) + Xapi_host.set_ssh_enabled_timeout ~__context ~self:host + ~value:0L ; + Xapi_host.enable_ssh ~__context ~self:host + with e -> + warn "Caught %s while restoring ssh service. Ignoring" + (Printexc.to_string e) + ) ; + debug "Pool.eject: setting our role to be master" ; Xapi_pool_transition.set_role Pool_role.Master ; debug "Pool.eject: forgetting pool secret" ; From 0bc2246f8f088ec4b6d6b6e2b19aa0c6c372f0e9 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 7 Apr 2025 17:22:57 +0800 Subject: [PATCH 077/492] Add default value of SSH settings in constants.ml Signed-off-by: Gang Ji --- ocaml/idl/datamodel_host.ml | 18 ++++++++++-------- ocaml/xapi-consts/constants.ml | 6 ++++++ ocaml/xapi/dbsync_slave.ml | 7 +++++-- ocaml/xapi/xapi_pool.ml | 10 +++++++--- 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 737d49f45d9..e51b59eb573 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1305,7 +1305,7 @@ let create_params = ; param_name= "ssh_enabled" ; param_doc= "True if SSH access is enabled for the host" ; param_release= numbered_release "25.14.0-next" - ; param_default= Some (VBool true) + ; param_default= Some (VBool Constants.default_ssh_enabled) } ; { param_type= Int @@ -1315,7 +1315,7 @@ let create_params = disabled (0 means never), this setting will be applied every time the \ SSH is enabled by XAPI" ; param_release= numbered_release "25.14.0-next" - ; param_default= Some (VInt 0L) + ; param_default= Some (VInt Constants.default_ssh_enabled_timeout) } ; { param_type= DateTime @@ -1333,7 +1333,7 @@ let create_params = "The timeout in seconds after which idle console will be automatically \ terminated (0 means never)" ; param_release= numbered_release "25.14.0-next" - ; param_default= Some (VInt 0L) + ; param_default= Some (VInt Constants.default_console_idle_timeout) } ] @@ -2436,7 +2436,7 @@ let set_console_idle_timeout = ~params: [ (Ref _host, "self", "The host") - ; (Int, "value", "The idle console timeout in seconds") + ; (Int, "value", "The console idle timeout in seconds") ] ~allowed_roles:_R_POOL_ADMIN () @@ -3039,10 +3039,11 @@ let t = "The SHA256 checksum of updateinfo of the most recently applied \ update on the host" ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Bool - ~default_value:(Some (VBool true)) "ssh_enabled" - "True if SSH access is enabled for the host" + ~default_value:(Some (VBool Constants.default_ssh_enabled)) + "ssh_enabled" "True if SSH access is enabled for the host" ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int - ~default_value:(Some (VInt 0L)) "ssh_enabled_timeout" + ~default_value:(Some (VInt Constants.default_ssh_enabled_timeout)) + "ssh_enabled_timeout" "The timeout in seconds after which SSH access will be \ automatically disabled (0 means never), this setting will be \ applied every time the SSH is enabled by XAPI" @@ -3051,7 +3052,8 @@ let t = "The time in UTC after which the SSH access will be automatically \ disabled" ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int - ~default_value:(Some (VInt 0L)) "console_idle_timeout" + ~default_value:(Some (VInt Constants.default_console_idle_timeout)) + "console_idle_timeout" "The timeout in seconds after which idle console will be \ automatically terminated (0 means never)" ] diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 3072a459c00..185f9669a7c 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -422,3 +422,9 @@ let observer_components_all = let tgroups_enabled = ref false let when_tgroups_enabled f = if !tgroups_enabled then f () else () + +let default_ssh_enabled = true + +let default_ssh_enabled_timeout = 0L + +let default_console_idle_timeout = 0L diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index fc9609db638..0d3115ff11e 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -59,8 +59,11 @@ let create_localhost ~__context info = ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[("address", "localhost"); ("port", "27000")] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Date.epoch ~last_update_hash:"" ~ssh_enabled:true - ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch ~console_idle_timeout:0L + ~last_software_update:Date.epoch ~last_update_hash:"" + ~ssh_enabled:Constants.default_ssh_enabled + ~ssh_enabled_timeout:Constants.default_ssh_enabled_timeout + ~ssh_expiry:Date.epoch + ~console_idle_timeout:Constants.default_console_idle_timeout in () diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 4b26769aec2..4a6d4163da3 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -2048,11 +2048,15 @@ let eject_self ~__context ~host = ( try (* Restore console idle timeout *) Xapi_host.set_console_idle_timeout ~__context ~self:host - ~value:0L ; + ~value:Constants.default_console_idle_timeout ; (* Restore SSH service to default state *) Xapi_host.set_ssh_enabled_timeout ~__context ~self:host - ~value:0L ; - Xapi_host.enable_ssh ~__context ~self:host + ~value:Constants.default_ssh_enabled_timeout ; + match Constants.default_ssh_enabled with + | true -> + Xapi_host.enable_ssh ~__context ~self:host + | false -> + Xapi_host.disable_ssh ~__context ~self:host with e -> warn "Caught %s while restoring ssh service. Ignoring" (Printexc.to_string e) From 5a533f568e73c1568010607d238980766d2eb729 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Mon, 7 Apr 2025 13:55:58 +0800 Subject: [PATCH 078/492] CP-53711: Apply SSH settings in joiner before update_non_vm_metadata Signed-off-by: Gang Ji --- ocaml/xapi/xapi_pool.ml | 75 ++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index af8a46e7c5f..28c981a70bd 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -943,38 +943,6 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : create_or_get_sr_on_master __context rpc session_id (my_local_cache_sr, my_local_cache_sr_rec) in - let remote_coordinator = get_master ~rpc ~session_id in - let ssh_enabled = - Client.Host.get_ssh_enabled ~rpc ~session_id ~self:remote_coordinator - in - let ssh_enabled_timeout = - Client.Host.get_ssh_enabled_timeout ~rpc ~session_id - ~self:remote_coordinator - in - let console_idle_timeout = - Client.Host.get_console_idle_timeout ~rpc ~session_id - ~self:remote_coordinator - in - (* Configure SSH service on local host *) - Xapi_host.set_console_idle_timeout ~__context ~self:host_ref - ~value:console_idle_timeout ; - Xapi_host.set_ssh_enabled_timeout ~__context ~self:host_ref - ~value:ssh_enabled_timeout ; - ( match ssh_enabled with - | true -> - Xapi_host.enable_ssh ~__context ~self:host_ref - | false -> - Xapi_host.disable_ssh ~__context ~self:host_ref - ) ; - (* As ssh_expiry will be updated by host.enable_ssh and host.disable_ssh, - there is a corner case when the joiner's SSH state will not match SSH - service state in its new coordinator exactly: if the joiner joins when - SSH service has been enabled in the new coordinator, while not timed - out yet, the joiner will start SSH service with timeout - host.ssh_enabled_timeout, which means SSH service in the joiner will - be disabled later than in the new coordinator. *) - let ssh_expiry = Db.Host.get_ssh_expiry ~__context ~self:host_ref in - debug "Creating host object on master" ; let ref = Client.Host.create ~rpc ~session_id ~uuid:my_uuid @@ -994,8 +962,11 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~local_cache_sr ~chipset_info:host.API.host_chipset_info ~ssl_legacy:false ~last_software_update:host.API.host_last_software_update - ~last_update_hash:host.API.host_last_update_hash ~ssh_enabled - ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout + ~last_update_hash:host.API.host_last_update_hash + ~ssh_enabled:host.API.host_ssh_enabled + ~ssh_enabled_timeout:host.API.host_ssh_enabled_timeout + ~ssh_expiry:host.API.host_ssh_expiry + ~console_idle_timeout:host.API.host_console_idle_timeout in (* Copy other-config into newly created host record: *) no_exn @@ -1588,6 +1559,7 @@ let join_common ~__context ~master_address ~master_username ~master_password ) in + let remote_coordinator = get_master ~rpc ~session_id in (* If management is on a VLAN, then get the Pool master management network bridge before we logout the session *) let pool_master_bridge, mgmt_pif = @@ -1598,7 +1570,7 @@ let join_common ~__context ~master_address ~master_username ~master_password if Db.PIF.get_VLAN_master_of ~__context ~self:my_pif <> Ref.null then let pif = Client.Host.get_management_interface ~rpc ~session_id - ~host:(get_master ~rpc ~session_id) + ~host:remote_coordinator in let network = Client.PIF.get_network ~rpc ~session_id ~self:pif in (Some (Client.Network.get_bridge ~rpc ~session_id ~self:network), my_pif) @@ -1688,6 +1660,39 @@ let join_common ~__context ~master_address ~master_username ~master_password "Unable to set the write the new pool certificates to the disk : %s" (ExnHelper.string_of_exn e) ) ; + ( try + let ssh_enabled_timeout = + Client.Host.get_ssh_enabled_timeout ~rpc ~session_id + ~self:remote_coordinator + in + let console_idle_timeout = + Client.Host.get_console_idle_timeout ~rpc ~session_id + ~self:remote_coordinator + in + Xapi_host.set_console_idle_timeout ~__context ~self:me + ~value:console_idle_timeout ; + Xapi_host.set_ssh_enabled_timeout ~__context ~self:me + ~value:ssh_enabled_timeout ; + let ssh_enabled = + Client.Host.get_ssh_enabled ~rpc ~session_id + ~self:remote_coordinator + in + (* As ssh_expiry will be updated by host.enable_ssh and host.disable_ssh, + there is a corner case when the joiner's SSH state will not match SSH + service state in its new coordinator exactly: if the joiner joins when + SSH service has been enabled in the new coordinator, while not timed + out yet, the joiner will start SSH service with timeout + host.ssh_enabled_timeout, which means SSH service in the joiner will + be disabled later than in the new coordinator. *) + match ssh_enabled with + | true -> + Xapi_host.enable_ssh ~__context ~self:me + | false -> + Xapi_host.disable_ssh ~__context ~self:me + with e -> + error "Unable to configure SSH service on local host: %s" + (ExnHelper.string_of_exn e) + ) ; (* this is where we try and sync up as much state as we can with the master. This is "best effort" rather than critical; if we fail part way through this then we carry From f67dcf7db5afa7489e6f8bc8d5ca653eab8ef38f Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Tue, 1 Apr 2025 06:36:48 +0000 Subject: [PATCH 079/492] CP-53723 Implement Console timeout configure API for Dom0 SSH control Implemented XAPI APIs: - `host.set_console_idle_timeout` - `pool.set_console_idle_timeout` These APIs allow XAPI to configure timeout for idle console sessions. Signed-off-by: Lunfan Zhang --- ocaml/idl/datamodel_errors.ml | 3 +++ ocaml/xapi-consts/api_errors.ml | 3 +++ ocaml/xapi/xapi_globs.ml | 2 ++ ocaml/xapi/xapi_host.ml | 34 ++++++++++++++++++++++++++++++++- ocaml/xapi/xapi_pool.ml | 9 ++++++++- 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 2aa0f803dec..19ca6ee7123 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2040,6 +2040,9 @@ let _ = error Api_errors.disable_ssh_partially_failed ["hosts"] ~doc:"Some of hosts failed to disable SSH access." () ; + error Api_errors.set_console_timeout_partially_failed ["hosts"] + ~doc:"Some hosts failed to set console timeout." () ; + error Api_errors.host_driver_no_hardware ["driver variant"] ~doc:"No hardware present for this host driver variant" () ; diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 42390c2b9fb..535b65da9bb 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1420,6 +1420,9 @@ let enable_ssh_partially_failed = add_error "ENABLE_SSH_PARTIALLY_FAILED" let disable_ssh_partially_failed = add_error "DISABLE_SSH_PARTIALLY_FAILED" +let set_console_timeout_partially_failed = + add_error "SET_CONSOLE_TIMEOUT_PARTIALLY_FAILED" + let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 89665a13494..1dcdb658a3d 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1287,6 +1287,8 @@ let gpumon_stop_timeout = ref 10.0 let reboot_required_hfxs = ref "/run/reboot-required.hfxs" +let console_timeout_profile_path = ref "/etc/profile.d/console_timeout.sh" + (* Fingerprint of default patch key *) let citrix_patch_key = "NERDNTUzMDMwRUMwNDFFNDI4N0M4OEVCRUFEMzlGOTJEOEE5REUyNg==" diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index cfc73f80b2d..cf325ba4368 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3136,4 +3136,36 @@ let disable_ssh ~__context ~self = let set_ssh_enabled_timeout ~__context ~self:_ ~value:_ = () -let set_console_idle_timeout ~__context ~self:_ ~value:_ = () +let set_console_idle_timeout ~__context ~self ~value = + let assert_timeout_valid timeout = + if timeout < 0L then + raise + (Api_errors.Server_error + ( Api_errors.invalid_value + , ["console_timeout"; Int64.to_string timeout] + ) + ) + in + + assert_timeout_valid value ; + try + let content = + match value with + | 0L -> + "# Console timeout is disabled\n" + | timeout -> + Printf.sprintf "# Console timeout configuration\nexport TMOUT=%Ld\n" + timeout + in + + Unixext.atomic_write_to_file !Xapi_globs.console_timeout_profile_path 0o0644 + (fun fd -> + Unix.write fd (Bytes.of_string content) 0 (String.length content) + |> ignore + ) ; + + Db.Host.set_console_idle_timeout ~__context ~self ~value + with e -> + error "Failed to configure console timeout: %s" (Printexc.to_string e) ; + Helpers.internal_error "Failed to set console timeout: %Ld: %s" value + (Printexc.to_string e) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 434ab3b9dc5..6cf8ab732f3 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -4003,6 +4003,13 @@ module Ssh = struct let disable ~__context ~self:_ = operate ~__context ~action:Client.Host.disable_ssh ~error:Api_errors.disable_ssh_partially_failed + + let set_console_timeout ~__context ~self:_ ~value = + operate ~__context + ~action:(fun ~rpc ~session_id ~self -> + Client.Host.set_console_idle_timeout ~rpc ~session_id ~self ~value + ) + ~error:Api_errors.set_console_timeout_partially_failed end let enable_ssh = Ssh.enable @@ -4011,4 +4018,4 @@ let disable_ssh = Ssh.disable let set_ssh_enabled_timeout ~__context ~self:_ ~value:_ = () -let set_console_idle_timeout ~__context ~self:_ ~value:_ = () +let set_console_idle_timeout = Ssh.set_console_timeout From a3bc84d282d7d2fc1eb0290326ce2f97ab6f6c06 Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Tue, 1 Apr 2025 06:44:52 +0000 Subject: [PATCH 080/492] CP-53478: Implement SSH enabeld timeout API for Dom0 SSH control Implemented XAPI APIs: - `host.set_ssh_enabled_timeout` - `pool.set_ssh_enabled_timeout` These APIs allow XAPI to configure timeout for SSH service. `host.enable_ssh` now also supports enabling the SSH service with a ssh_enabled_timeout Signed-off-by: Lunfan Zhang --- ocaml/idl/datamodel_errors.ml | 3 + ocaml/xapi-consts/api_errors.ml | 3 + ocaml/xapi/xapi_globs.ml | 4 ++ ocaml/xapi/xapi_host.ml | 110 +++++++++++++++++++++++++++----- ocaml/xapi/xapi_pool.ml | 9 ++- 5 files changed, 111 insertions(+), 18 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 19ca6ee7123..039c5c313f3 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2040,6 +2040,9 @@ let _ = error Api_errors.disable_ssh_partially_failed ["hosts"] ~doc:"Some of hosts failed to disable SSH access." () ; + error Api_errors.set_ssh_timeout_partially_failed ["hosts"] + ~doc:"Some hosts failed to set SSH timeout." () ; + error Api_errors.set_console_timeout_partially_failed ["hosts"] ~doc:"Some hosts failed to set console timeout." () ; diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 535b65da9bb..274d7d351fd 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1420,6 +1420,9 @@ let enable_ssh_partially_failed = add_error "ENABLE_SSH_PARTIALLY_FAILED" let disable_ssh_partially_failed = add_error "DISABLE_SSH_PARTIALLY_FAILED" +let set_ssh_timeout_partially_failed = + add_error "SET_SSH_TIMEOUT_PARTIALLY_FAILED" + let set_console_timeout_partially_failed = add_error "SET_CONSOLE_TIMEOUT_PARTIALLY_FAILED" diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 1dcdb658a3d..58c5af94226 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1289,6 +1289,10 @@ let reboot_required_hfxs = ref "/run/reboot-required.hfxs" let console_timeout_profile_path = ref "/etc/profile.d/console_timeout.sh" +let job_for_disable_ssh = ref "Disable SSH" + +let ssh_service = ref "sshd" + (* Fingerprint of default patch key *) let citrix_patch_key = "NERDNTUzMDMwRUMwNDFFNDI4N0M4OEVCRUFEMzlGOTJEOEE5REUyNg==" diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index cf325ba4368..f10df7b3707 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3114,27 +3114,103 @@ let emergency_clear_mandatory_guidance ~__context = ) ; Db.Host.set_pending_guidances ~__context ~self ~value:[] -let enable_ssh ~__context ~self = +let disable_ssh_internal ~__context ~self = try - Xapi_systemctl.enable ~wait_until_success:false "sshd" ; - Xapi_systemctl.start ~wait_until_success:false "sshd" - with _ -> - raise - (Api_errors.Server_error - (Api_errors.enable_ssh_failed, [Ref.string_of self]) - ) + debug "Disabling SSH for host %s" (Helpers.get_localhost_uuid ()) ; + Xapi_systemctl.disable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.stop ~wait_until_success:false !Xapi_globs.ssh_service ; + Db.Host.set_ssh_enabled ~__context ~self ~value:false + with e -> + error "Failed to disable SSH for host %s: %s" (Ref.string_of self) + (Printexc.to_string e) ; + Helpers.internal_error "Failed to disable SSH: %s" (Printexc.to_string e) + +let schedule_disable_ssh_job ~__context ~self ~timeout = + let host_uuid = Helpers.get_localhost_uuid () in + let expiry_time = + match + Ptime.add_span (Ptime_clock.now ()) + (Ptime.Span.of_int_s (Int64.to_int timeout)) + with + | None -> + error "Invalid SSH timeout: %Ld" timeout ; + raise + (Api_errors.Server_error + ( Api_errors.invalid_value + , ["ssh_enabled_timeout"; Int64.to_string timeout] + ) + ) + | Some t -> + Ptime.to_float_s t |> Date.of_unix_time + in -let disable_ssh ~__context ~self = + debug "Scheduling SSH disable job for host %s with timeout %Ld seconds" + host_uuid timeout ; + + (* Remove any existing job first *) + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + !Xapi_globs.job_for_disable_ssh + Xapi_stdext_threads_scheduler.Scheduler.OneShot (Int64.to_float timeout) + (fun () -> disable_ssh_internal ~__context ~self + ) ; + + Db.Host.set_ssh_expiry ~__context ~self ~value:expiry_time + +let enable_ssh ~__context ~self = try - Xapi_systemctl.disable ~wait_until_success:false "sshd" ; - Xapi_systemctl.stop ~wait_until_success:false "sshd" - with _ -> - raise - (Api_errors.Server_error - (Api_errors.disable_ssh_failed, [Ref.string_of self]) - ) + debug "Enabling SSH for host %s" (Helpers.get_localhost_uuid ()) ; + + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.start ~wait_until_success:false !Xapi_globs.ssh_service ; + + let timeout = Db.Host.get_ssh_enabled_timeout ~__context ~self in + ( match timeout with + | 0L -> + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh + | t -> + schedule_disable_ssh_job ~__context ~self ~timeout:t + ) ; -let set_ssh_enabled_timeout ~__context ~self:_ ~value:_ = () + Db.Host.set_ssh_enabled ~__context ~self ~value:true + with e -> + error "Failed to enable SSH on host %s: %s" (Ref.string_of self) + (Printexc.to_string e) ; + Helpers.internal_error "Failed to enable SSH: %s" (Printexc.to_string e) + +let disable_ssh ~__context ~self = + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + disable_ssh_internal ~__context ~self ; + Db.Host.set_ssh_expiry ~__context ~self ~value:(Date.now ()) + +let set_ssh_enabled_timeout ~__context ~self ~value = + let validate_timeout value = + (* the max timeout is two days: 172800L = 2*24*60*60 *) + if value < 0L || value > 172800L then + raise + (Api_errors.Server_error + ( Api_errors.invalid_value + , ["ssh_enabled_timeout"; Int64.to_string value] + ) + ) + in + validate_timeout value ; + debug "Setting SSH timeout for host %s to %Ld seconds" + (Db.Host.get_uuid ~__context ~self) + value ; + Db.Host.set_ssh_enabled_timeout ~__context ~self ~value ; + if Db.Host.get_ssh_enabled ~__context ~self then + match value with + | 0L -> + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch + | t -> + schedule_disable_ssh_job ~__context ~self ~timeout:t let set_console_idle_timeout ~__context ~self ~value = let assert_timeout_valid timeout = diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 6cf8ab732f3..7d5e2ce2dce 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -4004,6 +4004,13 @@ module Ssh = struct operate ~__context ~action:Client.Host.disable_ssh ~error:Api_errors.disable_ssh_partially_failed + let set_enabled_timeout ~__context ~self:_ ~value = + operate ~__context + ~action:(fun ~rpc ~session_id ~self -> + Client.Host.set_ssh_enabled_timeout ~rpc ~session_id ~self ~value + ) + ~error:Api_errors.set_ssh_timeout_partially_failed + let set_console_timeout ~__context ~self:_ ~value = operate ~__context ~action:(fun ~rpc ~session_id ~self -> @@ -4016,6 +4023,6 @@ let enable_ssh = Ssh.enable let disable_ssh = Ssh.disable -let set_ssh_enabled_timeout ~__context ~self:_ ~value:_ = () +let set_ssh_enabled_timeout = Ssh.set_enabled_timeout let set_console_idle_timeout = Ssh.set_console_timeout From cb9277d85007128dce88f1be94ed48e087ffeeba Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Tue, 1 Apr 2025 06:49:19 +0000 Subject: [PATCH 081/492] CP-53725 Create SSH-related xe CLI for Dom0 SSH control Updated `records.ml` file to support `host-param-set/get/list` and `pool-param-set/get/list` for SSH-related fields. Signed-off-by: Lunfan Zhang --- ocaml/xapi-cli-server/records.ml | 89 ++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index 56e97fbda03..8598cb05bb9 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -20,6 +20,8 @@ let nullref = Ref.string_of Ref.null let nid = "" +let inconsistent = "" + let unknown_time = "" let string_of_float f = Printf.sprintf "%.3f" f @@ -204,6 +206,37 @@ let get_pbds_host rpc session_id pbds = let get_sr_host rpc session_id record = get_pbds_host rpc session_id record.API.sR_PBDs +(** Get consistent field from all hosts, or return a default value if the field + is not the same on all hosts. *) +let get_consistent_field_or_default ~rpc ~session_id ~getter ~transform ~default + = + match Client.Host.get_all ~rpc ~session_id with + | [] -> + default + | hosts -> ( + let result = + List.fold_left + (fun acc host -> + match acc with + | `Inconsistent -> + `Inconsistent + | `NotSet -> + `Value (getter ~rpc ~session_id ~self:host |> transform) + | `Value v -> + let current = getter ~rpc ~session_id ~self:host |> transform in + if v = current then `Value v else `Inconsistent + ) + `NotSet hosts + in + match result with + | `Value v -> + v + | `Inconsistent -> + default + | `NotSet -> + default + ) + let bond_record rpc session_id bond = let _ref = ref bond in let empty_record = @@ -1506,6 +1539,42 @@ let pool_record rpc session_id pool = ) ~get_map:(fun () -> (x ()).API.pool_license_server) () + ; make_field ~name:"ssh-enabled" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_enabled ~transform:string_of_bool + ~default:inconsistent + ) + () + ; make_field ~name:"ssh-enabled-timeout" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_enabled_timeout + ~transform:Int64.to_string ~default:inconsistent + ) + ~set:(fun value -> + Client.Pool.set_ssh_enabled_timeout ~rpc ~session_id ~self:pool + ~value:(safe_i64_of_string "ssh-enabled-timeout" value) + ) + () + ; make_field ~name:"ssh-expiry" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_expiry ~transform:Date.to_rfc3339 + ~default:inconsistent + ) + () + ; make_field ~name:"console-idle-timeout" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_console_idle_timeout + ~transform:Int64.to_string ~default:inconsistent + ) + ~set:(fun value -> + Client.Pool.set_console_idle_timeout ~rpc ~session_id ~self:pool + ~value:(safe_i64_of_string "console-idle-timeout" value) + ) + () ] } @@ -3265,6 +3334,26 @@ let host_record rpc session_id host = ; make_field ~name:"last-update-hash" ~get:(fun () -> (x ()).API.host_last_update_hash) () + ; make_field ~name:"ssh-enabled" + ~get:(fun () -> string_of_bool (x ()).API.host_ssh_enabled) + () + ; make_field ~name:"ssh-enabled-timeout" + ~get:(fun () -> Int64.to_string (x ()).API.host_ssh_enabled_timeout) + ~set:(fun value -> + Client.Host.set_ssh_enabled_timeout ~rpc ~session_id ~self:host + ~value:(safe_i64_of_string "ssh-enabled-timeout" value) + ) + () + ; make_field ~name:"ssh-expiry" + ~get:(fun () -> Date.to_rfc3339 (x ()).API.host_ssh_expiry) + () + ; make_field ~name:"console-idle-timeout" + ~get:(fun () -> Int64.to_string (x ()).API.host_console_idle_timeout) + ~set:(fun value -> + Client.Host.set_console_idle_timeout ~rpc ~session_id ~self:host + ~value:(safe_i64_of_string "console-idle-timeout" value) + ) + () ] } From c3ec05063314a32773691d6808dfaf48ad3ed4af Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 7 Apr 2025 14:55:07 +0100 Subject: [PATCH 082/492] CA-408048 add library to represent version strings We want to handle version strings reliably and not re-doscovering their problems over and over. Add a simple library together with tests. Signed-off-by: Christian Lindig --- ocaml/xapi-aux/dune | 9 ++++ ocaml/xapi-aux/version.ml | 85 +++++++++++++++++++++++++++++++++ ocaml/xapi-aux/version.mli | 76 +++++++++++++++++++++++++++++ ocaml/xapi-aux/version_test.ml | 25 ++++++++++ ocaml/xapi-aux/version_test.mli | 1 + 5 files changed, 196 insertions(+) create mode 100644 ocaml/xapi-aux/version.ml create mode 100644 ocaml/xapi-aux/version.mli create mode 100644 ocaml/xapi-aux/version_test.ml create mode 100644 ocaml/xapi-aux/version_test.mli diff --git a/ocaml/xapi-aux/dune b/ocaml/xapi-aux/dune index d334769d655..60e27cf5b3b 100644 --- a/ocaml/xapi-aux/dune +++ b/ocaml/xapi-aux/dune @@ -1,6 +1,7 @@ (library (name xapi_aux) (modes best) + (modules :standard \ version_test) (libraries astring clock @@ -21,3 +22,11 @@ (wrapped false) ) +; to run this test: dune exec ./version_test.exe +(tests + (names version_test) + (modes (best exe)) + (modules version_test) + (package xapi) + (libraries + xapi_aux alcotest)) diff --git a/ocaml/xapi-aux/version.ml b/ocaml/xapi-aux/version.ml new file mode 100644 index 00000000000..18e7cedc8e1 --- /dev/null +++ b/ocaml/xapi-aux/version.ml @@ -0,0 +1,85 @@ +(* + Copyright (c) Cloud Software Group, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + *) + +(* Simple abstraction for version information that enforces a simple + format and predicatable semantics *) + +exception Format of string + +(** in decreasing oder of sginificance *) +type t = int list + +let of_string str = + let int str = Scanf.sscanf str "%u%!" Fun.id in + try String.split_on_char '.' str |> List.map int with _ -> raise (Format str) + +let to_string t = + let str int = Printf.sprintf "%d" int in + t |> List.map str |> String.concat "." + +(** Total order over versions; 1.2.3 is equal to 1.2.3.0 *) +let rec compare v1 v2 = + match (v1, v2) with + | [], [] -> + 0 + | 0 :: xs, [] -> + compare xs [] + | _, [] -> + 1 + | [], 0 :: ys -> + compare [] ys + | [], _ -> + -1 + | x :: xs, y :: ys when x = y -> + compare xs ys + | x :: _, y :: _ when x < y -> + -1 + | _ -> + 1 + +let ne x y = compare x y <> 0 + +let eq x y = compare x y = 0 + +let le x y = compare x y <= 0 + +let ge x y = compare x y >= 0 + +let gt x y = compare x y > 0 + +let lt x y = compare x y < 0 + +let is_valid str = + try + ignore (of_string str) ; + true + with Format _ -> false + +module String = struct + let wrap f v1 v2 = f (of_string v1) (of_string v2) + + let compare = wrap compare + + let ne = wrap ne + + let eq = wrap eq + + let le = wrap le + + let ge = wrap ge + + let gt = wrap gt + + let lt = wrap lt +end diff --git a/ocaml/xapi-aux/version.mli b/ocaml/xapi-aux/version.mli new file mode 100644 index 00000000000..b0aed1d6369 --- /dev/null +++ b/ocaml/xapi-aux/version.mli @@ -0,0 +1,76 @@ +(* + Copyright (c) Cloud Software Group, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + *) + +(** a version, derived from a string representation - see below *) +type t + +(** A version string violates the supported syntax *) +exception Format of string + +val of_string : string -> t +(** Parse a version; may raise [Format]. A version is a sequence of + unsigned integers separated by a dot; for axample "1.2.3" is a legal + version. Must have at least one component. Examples: + - 3 + - 3.10 + - 3.10.4 + - 3.10.4.0.0 + - 3.10.4.0.1 + - 0 + - 0.2 + *) + +val to_string : t -> string +(** represent a version as a string *) + +val compare : t -> t -> int +(** Total order over versions; yields one of -1, 0, 1 as by convention. + - 1.2.3 = 1.2.3.0 + - 1.10.2 > 1.9.1 + - 0.1.0.0 = 0.1 + *) + +(* version equality relations *) +val eq : t -> t -> bool + +val ge : t -> t -> bool + +val gt : t -> t -> bool + +val le : t -> t -> bool + +val lt : t -> t -> bool + +val ne : t -> t -> bool + +(* Validate the format of a version string *) +val is_valid : string -> bool + +(* Operations over version strings for convenience. Each function may + raise [Format] *) +module String : sig + val compare : string -> string -> int + + val ne : string -> string -> bool + + val eq : string -> string -> bool + + val le : string -> string -> bool + + val ge : string -> string -> bool + + val gt : string -> string -> bool + + val lt : string -> string -> bool +end diff --git a/ocaml/xapi-aux/version_test.ml b/ocaml/xapi-aux/version_test.ml new file mode 100644 index 00000000000..4e4351b5392 --- /dev/null +++ b/ocaml/xapi-aux/version_test.ml @@ -0,0 +1,25 @@ +open Alcotest + +let format () = + check bool __LOC__ true (Version.is_valid "3") ; + check bool __LOC__ true (Version.is_valid "0") ; + check bool __LOC__ true (Version.is_valid "3.1") ; + check bool __LOC__ true (Version.is_valid "3.1.4") ; + check bool __LOC__ true (Version.is_valid "3.14") ; + check bool __LOC__ false (Version.is_valid "") ; + check bool __LOC__ false (Version.is_valid "3a") ; + check bool __LOC__ false (Version.is_valid "3.1.4.") ; + check bool __LOC__ false (Version.is_valid "3.1.4.a") ; + check bool __LOC__ false (Version.is_valid "3.1.4a") ; + check bool __LOC__ false (Version.is_valid "3.1:4") ; + check bool __LOC__ false (Version.is_valid "-3.1.4") + +let order () = + check bool __LOC__ true (Version.String.eq "3" "3.0.0") ; + check bool __LOC__ true (Version.String.le "3" "3.0.1") ; + check bool __LOC__ true (Version.String.le "3.1" "3.10") ; + check bool __LOC__ true (Version.String.eq "0" "0.0.0") + +let tests = [test_case "format" `Quick format; test_case "order" `Quick order] + +let () = run __MODULE__ [(__MODULE__, tests)] diff --git a/ocaml/xapi-aux/version_test.mli b/ocaml/xapi-aux/version_test.mli new file mode 100644 index 00000000000..e790aeb70f0 --- /dev/null +++ b/ocaml/xapi-aux/version_test.mli @@ -0,0 +1 @@ +(* empty *) From efc5b308b4ddf52e390943d017ee7ce1afefffb6 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Apr 2025 13:52:33 +0100 Subject: [PATCH 083/492] CA-408048 remove SM plugins from DB if unavailable An SM plugin might become unavailable; we have to remove its record on xapi startup. The canonical case is an upgrade from XS8 to XS9. Signed-off-by: Christian Lindig --- ocaml/xapi/storage_access.ml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index d38cab783b5..0269ca0bcc3 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -109,10 +109,16 @@ exception Message_switch_failure (** Synchronise the SM table with the SMAPIv1 plugins on the disk and the SMAPIv2 plugins mentioned in the configuration file whitelist. *) let on_xapi_start ~__context = + (* An SM is either implemented as a plugin - for which we check its + presence, or via an API *) + let is_available (_rf, rc) = + Sys.file_exists rc.API.sM_driver_filename + || Version.String.ge rc.sM_required_api_version "5.0" + in let existing = - List.map - (fun (rf, rc) -> (rc.API.sM_type, (rf, rc))) - (Db.SM.get_all_records ~__context) + Db.SM.get_all_records ~__context + |> List.filter is_available + |> List.map (fun (rf, rc) -> (rc.API.sM_type, (rf, rc))) in let explicitly_configured_drivers = List.filter_map From a038a1c4a1805099480aa0b75e825e5c9ecaca00 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 7 Apr 2025 11:46:31 +0100 Subject: [PATCH 084/492] CA-401023: Remove smapi observer config if smapi is set as experimental If smapi observer was previously enabled and then added to observer_experimental_components, when the toolstack is restarted the smapi observer will no longer exist. The observer will therefore not be "destroyed" by normal means so we must remove the config manually. This is important for xapi-storage-script as it cannot read the DB and so relies on this config file to know whether to trace SMAPIv3 or not. Signed-off-by: Steven Woods --- ocaml/xapi/xapi_observer.ml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 2c1fcd81312..404c4496f29 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -571,6 +571,14 @@ let initialise ~__context = |> observed_components_of |> List.iter (initialise_observer_component ~__context) ) ; + (* If SMApi is now experimental, manually remove the config as there is no observer to do it *) + if + Xapi_globs.( + StringSet.mem (to_string SMApi) !observer_experimental_components + ) + then + Xapi_stdext_unix.Unixext.rm_rec (dir_name_of_component SMApi) ; + Tracing_export.set_service_name "xapi" let set_hosts ~__context ~self ~value = From ffda08e55f595e868b9e5617b9462e8b5db24867 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 8 Apr 2025 10:34:51 +0100 Subject: [PATCH 085/492] xapi_message: Implement proper expression handling in get_all_records_where Messages are not stored in the database, so their handling is done through Custom_actions. The custom implementation of get_all_records_where, in particular, used to ignore the query expression altogether (as if expr = true). Expand the existing handler to evaluate the query and match against the messages retrieved and parsed from disk. An alternative approach would have instead special cased filtering code in the database itself, but in my attempts this was awkward and duplicated quite a lot of code. With this it is possible to filter messages properly now: ``` >>> session.xenapi.message.get_all_records_where('field "name" = "VM_STARTED"') >>> session.xenapi.message.get_all_records_where('field "name" = "VM_STARTED" and field "obj_uuid" = "3c61111f-1d67-7db4-95a5-f0287aff57bf"') ``` Also update the documentation to match new behaviour. Closes #6340 Signed-off-by: Andrii Sultanov --- ocaml/idl/datamodel.ml | 6 ++--- ocaml/xapi/xapi_message.ml | 50 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index c905c5354b9..a2bfaf4d4fb 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -8976,7 +8976,7 @@ module Message = struct let get_all_records_where = call ~name:"get_all_records_where" ~lifecycle:[(Published, rel_orlando, "")] - ~params:[(String, "expr", "The expression to match (not currently used)")] + ~params:[(String, "expr", "The expression to match")] ~flags:[`Session] ~result:(Map (Ref _message, Record _message), "The messages") ~allowed_roles:_R_READ_ONLY () @@ -10543,12 +10543,12 @@ let all_system = (** These are the pairs of (object, field) which are bound together in the database schema. - + It is assumed that, for any entry (p, p'), neither p nor p' appears in any other entry. It may be the case that p = p', which is the only instance where some object-field pair may appear more than once. - + This is implicitly assumed by other code which treats this list - and its symmetric closure - as an association list without duplicate keys. *) diff --git a/ocaml/xapi/xapi_message.ml b/ocaml/xapi/xapi_message.ml index 90f75943f6a..408ba7acf07 100644 --- a/ocaml/xapi/xapi_message.ml +++ b/ocaml/xapi/xapi_message.ml @@ -730,8 +730,54 @@ let get_record ~__context ~self = let get_all_records ~__context = get_real message_dir (fun _ -> true) 0.0 -let get_all_records_where ~__context ~expr:_ = - get_real message_dir (fun _ -> true) 0.0 +let get_all_records_where ~__context ~expr = + let open Xapi_database in + let expr = Db_filter.expr_of_string expr in + let eval_val msg expr = + match expr with + | Db_filter_types.Literal x -> + x + | Db_filter_types.Field x -> ( + match x with + | "name" -> + msg.API.message_name + | "uuid" -> + msg.API.message_uuid + | "priority" -> + Int64.to_string msg.API.message_priority + | "cls" -> + Record_util.cls_to_string msg.API.message_cls + | "obj_uuid" -> + msg.API.message_obj_uuid + | "timestamp" -> + Date.to_rfc3339 msg.API.message_timestamp + | "body" -> + msg.API.message_body + | any_other_key -> + raise (Db_exn.DBCache_NotFound ("missing field", any_other_key, "")) + ) + in + let eval_expr (lookup_val : API.message_t -> Db_filter_types._val -> string) + (msg : API.message_t) = + let lookup_val = lookup_val msg in + let compare _a _b = lookup_val _a = lookup_val _b in + let rec f = function + | Db_filter_types.True -> + true + | Db_filter_types.False -> + false + | Db_filter_types.Not x -> + not (f x) + | Db_filter_types.And (a, b) -> + f a && f b + | Db_filter_types.Eq (_a, _b) -> + compare _a _b + | Db_filter_types.Or (a, b) -> + f a || f b + in + f expr + in + get_real message_dir (eval_expr eval_val) 0.0 let repopulate_cache () = with_lock in_memory_cache_mutex (fun () -> From fbefb1c9b040d4ab715e47942209fa06fa95de3d Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 19 Mar 2025 17:22:27 +0000 Subject: [PATCH 086/492] CP-54026: option to control VM-internal shutdown behaviour under HA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new datamodel field in the pool class: bool pool.ha_reboot_vm_on_internal_shutdown The field is read/write, with the setter restricted to the pool-operator role. The default of this new field is `true` to reflect the current behaviour. This field controls what happens when HA is enabled and a protected VM is shut down internally (e.g. by pressing the shutdown button in Windows): - `true`: the VM is automatically restarted. - `false`: the VM is not restarted and left halted – consistent with the behaviour of shutting the VM down through the API. CLI: xe pool-param-set uuid=... ha-reboot-vm-on-internal-shutdown=false Whether an HA-protected VM is automatically (re)started depends on the field `VM.ha_always_run`, which is managed by xapi. This field is set to `true` when a protected VM is started, and to `false` when it is shut down through the API, which prevents the HA monitor thread from restarting it again. Setting the new pool-field to `false` does the same thing is such a VM is shut down from inside when handling the event in the xenopsd-events thread. Signed-off-by: Rob Hoes --- ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_pool.ml | 6 ++++++ ocaml/idl/schematest.ml | 2 +- ocaml/tests/common/test_common.ml | 6 ++++-- ocaml/xapi-cli-server/records.ml | 9 +++++++++ ocaml/xapi/dbsync_master.ml | 2 +- ocaml/xapi/xapi_xenops.ml | 14 ++++++++++++++ 7 files changed, 36 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index a044c9a0f2d..8a87d7eb524 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 787 +let schema_minor_vsn = 788 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index c35c6789f7f..cce63a58e16 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -2191,6 +2191,12 @@ let t = ~ty:(Map (String, String)) ~default_value:(Some (VMap [])) "license_server" "Licensing data shared within the whole pool" + ; field ~writer_roles:_R_POOL_OP ~qualifier:RW ~lifecycle:[] ~ty:Bool + ~default_value:(Some (VBool true)) + "ha_reboot_vm_on_internal_shutdown" + "Indicates whether an HA-protected VM that is shut down from \ + inside (not through the API) should be automatically rebooted \ + when HA is enabled" ] ) () diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 435e2da373f..36b99cd3c62 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "7756b4bea0be3985c1c8f6708f04d442" +let last_known_schema_hash = "e10b420b0863116ee188eea9e63b1349" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7b5484a02ba..cba53ad17c4 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -300,7 +300,8 @@ let make_pool ~__context ~master ?(name_label = "") ?(name_description = "") ?(telemetry_next_collection = API.Date.epoch) ?(last_update_sync = API.Date.epoch) ?(update_sync_frequency = `daily) ?(update_sync_day = 0L) ?(update_sync_enabled = false) - ?(recommendations = []) ?(license_server = []) () = + ?(recommendations = []) ?(license_server = []) + ?(ha_reboot_vm_on_internal_shutdown = true) () = let pool_ref = Ref.make () in Db.Pool.create ~__context ~ref:pool_ref ~uuid:(make_uuid ()) ~name_label ~name_description ~master ~default_SR ~suspend_image_SR ~crash_dump_SR @@ -320,7 +321,8 @@ let make_pool ~__context ~master ?(name_label = "") ?(name_description = "") ~local_auth_max_threads:8L ~ext_auth_max_threads:8L ~ext_auth_cache_enabled:false ~ext_auth_cache_size:50L ~ext_auth_cache_expiry:300L ~update_sync_frequency ~update_sync_day - ~update_sync_enabled ~recommendations ~license_server ; + ~update_sync_enabled ~recommendations ~license_server + ~ha_reboot_vm_on_internal_shutdown ; pool_ref let default_sm_features = diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index 620d0f0eacc..9b5cd3180df 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -1244,6 +1244,15 @@ let pool_record rpc session_id pool = ; make_field ~name:"ha-overcommitted" ~get:(fun () -> string_of_bool (x ()).API.pool_ha_overcommitted) () + ; make_field ~name:"ha-reboot-vm-on-internal-shutdown" + ~get:(fun () -> + string_of_bool (x ()).API.pool_ha_reboot_vm_on_internal_shutdown + ) + ~set:(fun x -> + Client.Pool.set_ha_reboot_vm_on_internal_shutdown ~rpc ~session_id + ~self:pool ~value:(bool_of_string x) + ) + () ; make_field ~name:"blobs" ~get:(fun () -> get_uuid_map_from_ref_map (x ()).API.pool_blobs) () diff --git a/ocaml/xapi/dbsync_master.ml b/ocaml/xapi/dbsync_master.ml index efd5be874a0..f8316b81993 100644 --- a/ocaml/xapi/dbsync_master.ml +++ b/ocaml/xapi/dbsync_master.ml @@ -54,7 +54,7 @@ let create_pool_record ~__context = ~update_sync_day:0L ~update_sync_enabled:false ~local_auth_max_threads:8L ~ext_auth_max_threads:1L ~ext_auth_cache_enabled:false ~ext_auth_cache_size:50L ~ext_auth_cache_expiry:300L ~recommendations:[] - ~license_server:[] + ~license_server:[] ~ha_reboot_vm_on_internal_shutdown:true let set_master_ip ~__context = let ip = diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 77039e5fe3b..1a7350c2e9d 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2033,6 +2033,20 @@ let update_vm ~__context id = "Will update VM.allowed_operations because power_state has \ changed." ; should_update_allowed_operations := true ; + (* Update ha_always_run before the power_state (if needed), to avoid racing + with the HA monitor thread. *) + let pool = Helpers.get_pool ~__context in + if + power_state = `Halted + && not + (Db.Pool.get_ha_reboot_vm_on_internal_shutdown ~__context + ~self:pool + ) + then ( + Db.VM.set_ha_always_run ~__context ~self ~value:false ; + debug "Setting ha_always_run on vm=%s as false after shutdown" + (Ref.string_of self) + ) ; debug "xenopsd event: Updating VM %s power_state <- %s" id (Record_util.vm_power_state_to_string power_state) ; (* This will mark VBDs, VIFs as detached and clear resident_on From 9ecacc882f658debfa4e977a3ad9fe9f6f1c97eb Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Tue, 8 Apr 2025 13:21:02 +0000 Subject: [PATCH 087/492] Update datamodel_lifecycle Signed-off-by: Rob Hoes --- ocaml/idl/datamodel_lifecycle.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 9f4ce426aa0..24543829da6 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -110,7 +110,7 @@ let prototyped_of_field = function | "host", "last_software_update" -> Some "22.20.0" | "VM_guest_metrics", "services" -> - Some "25.14.0-next" + Some "25.15.0" | "VM_guest_metrics", "netbios_name" -> Some "24.28.0" | "VM", "groups" -> @@ -123,6 +123,8 @@ let prototyped_of_field = function Some "23.18.0" | "VM", "actions__after_softreboot" -> Some "23.1.0" + | "pool", "ha_reboot_vm_on_internal_shutdown" -> + Some "25.15.0-next" | "pool", "license_server" -> Some "25.6.0" | "pool", "recommendations" -> From e87367b54e48bb6ac08814bb9cda20f5c02c0eea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 8 Apr 2025 16:27:50 +0100 Subject: [PATCH 088/492] [maintenance]: reformat ocaml/xapi/dune MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used `dune format-dune-file dune >x && mv x dune`. Can also be done via LSP. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi/dune | 578 ++++++++++++++++++++++++++---------------------- 1 file changed, 312 insertions(+), 266 deletions(-) diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index fde7a267003..906ea8bd3b0 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -1,288 +1,334 @@ (rule - (target server.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} server --gen-debug --filter-internal --filter closed))) -) + (target server.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} server --gen-debug --filter-internal --filter closed)))) (rule - (target db_actions.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} db --filter nothing))) -) + (target db_actions.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} db --filter nothing)))) (rule - (target custom_actions.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} actions --filter-internal --filter closed))) -) + (target custom_actions.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} actions --filter-internal --filter closed)))) (rule - (target rbac_static.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} rbac --filter-internal --filter closed))) -) + (target rbac_static.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} rbac --filter-internal --filter closed)))) (rule - (target rbac_static.csv) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} rbac --gen-debug --filter-internal --filter closed))) -) + (target rbac_static.csv) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} rbac --gen-debug --filter-internal --filter closed)))) (install - (package xapi-debug) - (section share_root) - (files rbac_static.csv) -) - + (package xapi-debug) + (section share_root) + (files rbac_static.csv)) (library - (name xapi_internal_minimal) - (modules context custom_actions xapi_globs server_helpers session_check rbac rbac_audit db_actions taskHelper eventgen locking_helpers exnHelper rbac_static xapi_role xapi_extensions db) - (modes best) - (wrapped false) - (libraries - http_lib - httpsvr - ipaddr - xapi-types - xapi_database - mtime - tracing - tracing_propagator - uuid - rpclib.core - threads.posix - fmt - clock - astring - stunnel - sexplib0 - sexplib - sexpr - tgroup - forkexec - xapi-idl - xapi_aux - xapi-stdext-std - xapi-stdext-pervasives - xapi-backtrace - xapi-datamodel - xapi-consts - xapi_version - xapi-stdext-threads - xapi-stdext-unix - rpclib.xml - xapi-log) -) + (name xapi_internal_minimal) + (modules + context + custom_actions + xapi_globs + server_helpers + session_check + rbac + rbac_audit + db_actions + taskHelper + eventgen + locking_helpers + exnHelper + rbac_static + xapi_role + xapi_extensions + db) + (modes best) + (wrapped false) + (libraries + http_lib + httpsvr + ipaddr + xapi-types + xapi_database + mtime + tracing + tracing_propagator + uuid + rpclib.core + threads.posix + fmt + clock + astring + stunnel + sexplib0 + sexplib + sexpr + tgroup + forkexec + xapi-idl + xapi_aux + xapi-stdext-std + xapi-stdext-pervasives + xapi-backtrace + xapi-datamodel + xapi-consts + xapi_version + xapi-stdext-threads + xapi-stdext-unix + rpclib.xml + xapi-log)) (library - (name xapi_internal) - (wrapped false) - (modes best) - (modules (:standard \ - xapi_main server api_server xapi custom_actions context xapi_globs server_helpers session_check rbac rbac_audit rbac_static db_actions taskHelper eventgen locking_helpers exnHelper xapi_role xapi_extensions db)) - (libraries - angstrom - astring - cstruct - base64 - clock - cohttp - cohttp_posix - domain-name - ezxenstore.core - fmt - forkexec - gencertlib - gzip - hex - http_lib - httpsvr - ipaddr - ipaddr.unix - magic-mime - message-switch-core - message-switch-unix - mirage-crypto - mirage-crypto-rng - mirage-crypto-rng.unix - mtime - mtime.clock.os - pam - pciutil - pci - psq - ptime - ptime.clock.os - rpclib.core - rpclib.json - rpclib.xml - re - result - rresult - rrd-transport.lib - rrd-transport.file - rrdd-plugin.base - rrdd-plugin.local - sexplib - sexplib0 - sexpr - sha - stunnel - tapctl - tar - tar-unix - tgroup - threads.posix - tracing - tracing_propagator - unixpwd - uri - uuid - uuidm - vhd_lib - x509 - xapi_aux - xapi-backtrace - (re_export xapi-consts) - xapi-consts.xapi_version - xapi-client - xapi-cli-protocol - xapi_cli_server - xapi_database - xapi-datamodel - xapi-idl - xapi-idl.cluster - xapi-idl.rrd - xapi-idl.rrd.interface - xapi-idl.rrd.interface.types - xapi-idl.storage - xapi-idl.storage.interface - xapi-idl.storage.interface.types - xapi-idl.xen - xapi-idl.xen.interface - xapi-idl.xen.interface.types - xapi-idl.network - xapi-idl.v6 - xapi-idl.memory - xapi-idl.gpumon - xapi-idl.updates - (re_export xapi_internal_minimal) - xapi-inventory - xapi-log - xapi-open-uri - xapi-rrd - (re_export xapi-types) - xapi-stdext-encodings - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-threads.scheduler - xapi-stdext-unix - xapi-stdext-zerocheck - xapi-tracing - xapi-tracing-export - xapi_version - xapi_xenopsd - xenstore_transport.unix - xml-light2 - xmlm - xxhash - yojson - zstd - xapi_host_driver_helpers - ) - (preprocess (per_module - ((pps ppx_sexp_conv) Cert_distrib) - ((pps ppx_deriving.ord) Xapi_observer_components) + (name xapi_internal) + (wrapped false) + (modes best) + (modules + (:standard + \ + xapi_main + server + api_server + xapi + custom_actions + context + xapi_globs + server_helpers + session_check + rbac + rbac_audit + rbac_static + db_actions + taskHelper + eventgen + locking_helpers + exnHelper + xapi_role + xapi_extensions + db)) + (libraries + angstrom + astring + cstruct + base64 + clock + cohttp + cohttp_posix + domain-name + ezxenstore.core + fmt + forkexec + gencertlib + gzip + hex + http_lib + httpsvr + ipaddr + ipaddr.unix + magic-mime + message-switch-core + message-switch-unix + mirage-crypto + mirage-crypto-rng + mirage-crypto-rng.unix + mtime + mtime.clock.os + pam + pciutil + pci + psq + ptime + ptime.clock.os + rpclib.core + rpclib.json + rpclib.xml + re + result + rresult + rrd-transport.lib + rrd-transport.file + rrdd-plugin.base + rrdd-plugin.local + sexplib + sexplib0 + sexpr + sha + stunnel + tapctl + tar + tar-unix + tgroup + threads.posix + tracing + tracing_propagator + unixpwd + uri + uuid + uuidm + vhd_lib + x509 + xapi_aux + xapi-backtrace + (re_export xapi-consts) + xapi-consts.xapi_version + xapi-client + xapi-cli-protocol + xapi_cli_server + xapi_database + xapi-datamodel + xapi-idl + xapi-idl.cluster + xapi-idl.rrd + xapi-idl.rrd.interface + xapi-idl.rrd.interface.types + xapi-idl.storage + xapi-idl.storage.interface + xapi-idl.storage.interface.types + xapi-idl.xen + xapi-idl.xen.interface + xapi-idl.xen.interface.types + xapi-idl.network + xapi-idl.v6 + xapi-idl.memory + xapi-idl.gpumon + xapi-idl.updates + (re_export xapi_internal_minimal) + xapi-inventory + xapi-log + xapi-open-uri + xapi-rrd + (re_export xapi-types) + xapi-stdext-encodings + xapi-stdext-pervasives + xapi-stdext-std + xapi-stdext-threads + xapi-stdext-threads.scheduler + xapi-stdext-unix + xapi-stdext-zerocheck + xapi-tracing + xapi-tracing-export + xapi_version + xapi_xenopsd + xenstore_transport.unix + xml-light2 + xmlm + xxhash + yojson + zstd + xapi_host_driver_helpers) + (preprocess + (per_module + ((pps ppx_sexp_conv) + Cert_distrib) + ((pps ppx_deriving.ord) + Xapi_observer_components) ((pps ppx_deriving_rpc) - Config_file_sync Extauth_plugin_ADwinbind Importexport Sparse_dd_wrapper - Storage_migrate Storage_migrate_helper Storage_mux Storage_smapiv1_wrapper - Stream_vdi System_domains Xapi_psr Xapi_services Xapi_udhcpd))) -) + Config_file_sync + Extauth_plugin_ADwinbind + Importexport + Sparse_dd_wrapper + Storage_migrate + Storage_migrate_helper + Storage_mux + Storage_smapiv1_wrapper + Stream_vdi + System_domains + Xapi_psr + Xapi_services + Xapi_udhcpd)))) (library - (name xapi_internal_server_only) - (modes best) - (modules server) - (libraries xapi_database xapi_internal_minimal http_lib rpclib.core xapi-types xapi-log xapi-stdext-encodings xapi-consts xapi-backtrace clock rpclib.json) - (wrapped false) -) + (name xapi_internal_server_only) + (modes best) + (modules server) + (libraries + xapi_database + xapi_internal_minimal + http_lib + rpclib.core + xapi-types + xapi-log + xapi-stdext-encodings + xapi-consts + xapi-backtrace + clock + rpclib.json) + (wrapped false)) (library - (name xapi_internal_server) - (modes best) - (wrapped false) - (modules api_server xapi) - (libraries - clock - forkexec - http_lib - httpsvr - rpclib.core - rpclib.json - rpclib.xml - stunnel - tgroup - threads.posix - tracing - tracing_propagator - xapi-backtrace - xapi-client - xapi-consts - xapi-datamodel - xapi_internal_minimal - xapi-idl - xapi-inventory - (re_export xapi_internal_server_only) - xapi-log - xapi-stdext-encodings - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-threads.scheduler - xapi-stdext-unix - xapi-types - xapi_aux - xapi-consts.xapi_version - xapi_cli_server - xapi_database - xapi_internal) -) + (name xapi_internal_server) + (modes best) + (wrapped false) + (modules api_server xapi) + (libraries + clock + forkexec + http_lib + httpsvr + rpclib.core + rpclib.json + rpclib.xml + stunnel + tgroup + threads.posix + tracing + tracing_propagator + xapi-backtrace + xapi-client + xapi-consts + xapi-datamodel + xapi_internal_minimal + xapi-idl + xapi-inventory + (re_export xapi_internal_server_only) + xapi-log + xapi-stdext-encodings + xapi-stdext-pervasives + xapi-stdext-std + xapi-stdext-threads + xapi-stdext-threads.scheduler + xapi-stdext-unix + xapi-types + xapi_aux + xapi-consts.xapi_version + xapi_cli_server + xapi_database + xapi_internal)) (executable - (modes exe) - (name xapi_main) - (public_name xapi) - (package xapi) - (modules xapi_main) - (libraries - xapi_internal - xapi_internal_server - xapi_internal_minimal - xapi-idl - xapi-log - xapi-stdext-unix - ) -) - + (modes exe) + (name xapi_main) + (public_name xapi) + (package xapi) + (modules xapi_main) + (libraries + xapi_internal + xapi_internal_server + xapi_internal_minimal + xapi-idl + xapi-log + xapi-stdext-unix)) From 0832ae5171113c3792e5bd327673e400f971c3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 18 Mar 2025 14:08:07 +0000 Subject: [PATCH 089/492] CP-53951: Drop SSL and Lwt dependency from XAPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XAPI dependended on vhd_lib, which depends on ssl and lwt. XAPI is not yet ready for Lwt (and mixing Lwt with blocking code is not a good idea). The library was only used to gain access to 1 variant used for parameter passing inside XAPI. Replace this with a polymorphic variant, and drop the dependency. Also allows to drop a match case that was dead code (proto was only ever set to NBD). Fixes: 044dc15ee4 ("CP-45016: Add support for specifying nbd export in sparse_dd") Signed-off-by: Edwin Török --- ocaml/xapi/check-no-lwtssl.sh | 17 +++++++++++++++++ ocaml/xapi/dune | 9 ++++++++- ocaml/xapi/sparse_dd_wrapper.ml | 4 +--- ocaml/xapi/storage_migrate.ml | 2 +- 4 files changed, 27 insertions(+), 5 deletions(-) create mode 100755 ocaml/xapi/check-no-lwtssl.sh diff --git a/ocaml/xapi/check-no-lwtssl.sh b/ocaml/xapi/check-no-lwtssl.sh new file mode 100755 index 00000000000..6b8e9a94a02 --- /dev/null +++ b/ocaml/xapi/check-no-lwtssl.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +SSL=libssl +CRYPTO=libcrypto +# This is an OCaml library, but when it is linked +# we also configure it to link libev, so look for that +LWT=libev +DEPS="${SSL}|${CRYPTO}|${LWT}" + +ldd "$1" | grep -q -E "${DEPS}" 2>&1 +if [ $? -eq 1 ]; then + echo -e "\n\033[32;1m[OK]\033[0m $1 does not depend on ${DEPS}"; + exit 0 +else + echo -e "\n\033[31;1m[ERROR]\033[0m $1 depends on ${DEPS}"; + exit 1 +fi diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 906ea8bd3b0..85f4bf030af 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -189,7 +189,6 @@ uri uuid uuidm - vhd_lib x509 xapi_aux xapi-backtrace @@ -332,3 +331,11 @@ xapi-idl xapi-log xapi-stdext-unix)) + +(rule + (alias runtest) + (package xapi) + (deps + (:x xapi_main.exe)) + (action + (run ./check-no-lwtssl.sh %{x}))) diff --git a/ocaml/xapi/sparse_dd_wrapper.ml b/ocaml/xapi/sparse_dd_wrapper.ml index 0195fc38884..4085933d330 100644 --- a/ocaml/xapi/sparse_dd_wrapper.ml +++ b/ocaml/xapi/sparse_dd_wrapper.ml @@ -92,10 +92,8 @@ let dd_internal progress_cb base prezeroed verify_cert ?(proto = None) infile match proto with | None -> [] - | Some (StreamCommon.Nbd export) -> + | Some (`NBD export) -> ["-dest-proto"; "nbd"; "-nbd-export"; export] - | Some p -> - ["-dest-proto"; StreamCommon.string_of_protocol p] in let verify_args = match verify_cert with diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 8952f947993..4dd2cb39b31 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -256,7 +256,7 @@ module MigrateLocal = struct None | uri :: _ -> let _socket, export = Storage_interface.parse_nbd_uri uri in - Some (StreamCommon.Nbd export) + Some (`NBD export) in Remote.VDI.activate3 dbg remote_dp dest dest_vdi vm ; with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp ~vm From 4e465250d37fe40f249e417fc4d8318fd45dabc1 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 8 Apr 2025 16:28:38 +0100 Subject: [PATCH 090/492] quicktest: Add a test verifying Message.get_all_records_where filtering Also adjust the number of List.hd expected by the quality gate. Signed-off-by: Andrii Sultanov --- ocaml/quicktest/quicktest.ml | 1 + ocaml/quicktest/quicktest_message.ml | 73 ++++++++++++++++++++++++++++ quality-gate.sh | 2 +- 3 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 ocaml/quicktest/quicktest_message.ml diff --git a/ocaml/quicktest/quicktest.ml b/ocaml/quicktest/quicktest.ml index f4f8309ec34..337b1ae2b3e 100644 --- a/ocaml/quicktest/quicktest.ml +++ b/ocaml/quicktest/quicktest.ml @@ -25,6 +25,7 @@ let () = let suite = [ ("Quicktest_example", Quicktest_example.tests ()) + ; ("Quicktest_message", Quicktest_message.tests ()) ; ("xenstore", Quicktest_xenstore.tests ()) ; ("cbt", Quicktest_cbt.tests ()) ; ("event", Quicktest_event.tests ()) diff --git a/ocaml/quicktest/quicktest_message.ml b/ocaml/quicktest/quicktest_message.ml new file mode 100644 index 00000000000..55e8dc67492 --- /dev/null +++ b/ocaml/quicktest/quicktest_message.ml @@ -0,0 +1,73 @@ +module Message = Client.Client.Message + +let rpc = Quicktest_args.rpc + +module Testable = struct + let ref () = + let fmt = Fmt.of_to_string Ref.string_of in + Alcotest.testable fmt ( = ) +end + +let get_all_records_where_test rpc session_id () = + let with_api fn = fn ~rpc ~session_id in + let create_message = with_api Message.create in + + let vm = List.hd (with_api Client.Client.VM.get_all) in + let vm_uuid = with_api Client.Client.VM.get_uuid ~self:vm in + + let host = List.hd (with_api Client.Client.Host.get_all) in + let host_uuid = with_api Client.Client.Host.get_uuid ~self:host in + + (* Create several different messages *) + let messages = + [ + create_message ~name:"VM_EXPLODED" ~priority:100L ~cls:`VM + ~obj_uuid:vm_uuid ~body:"body" + ; create_message ~name:"HOST_EXPLODED" ~priority:100L ~cls:`Host + ~obj_uuid:host_uuid ~body:"body" + ; create_message ~name:"VM_DROWNED" ~priority:50L ~cls:`VM ~obj_uuid:vm_uuid + ~body:"body" + ] + in + + let get_messages_where = with_api Message.get_all_records_where in + + let queries = + [ + (* query | is message expected in the list [bool ; bool ; bool] *) + ({| field "name" = "VM_EXPLODED" |}, [true; false; false]) + ; ({| field "priority" = "100" |}, [true; true; false]) + ; ({| field "cls" = "VM" |}, [true; false; true]) + ; ({| field "cls" = "VM" and field "priority" = "50"|}, [false; false; true]) + ; ({| field "cls" = "VDI"|}, [false; false; false]) + ; ({| field "class" = "VM"|}, [false; false; false]) + ; ( {| field "cls" = "VM" or field "name" = "HOST_EXPLODED"|} + , [true; true; true] + ) + ] + in + let lists = + List.map + (fun (expr, expected) -> (expr, get_messages_where ~expr, expected)) + queries + in + + (* Check that filtering returns correct messages *) + let message_test msg list expected = + let list = List.map fst list in + List.iter2 + (fun msg_ref expected -> + Alcotest.check' Alcotest.bool ~msg ~expected + ~actual:(List.mem msg_ref list) + ) + messages expected + in + List.iter (fun (msg, list, expected) -> message_test msg list expected) lists + +let tests () = + let open Qt_filter in + [ + [("Message.get_all_records_where test", `Quick, get_all_records_where_test)] + |> conn + ] + |> List.concat diff --git a/quality-gate.sh b/quality-gate.sh index e59b8e40ccb..605d5142a38 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=277 + N=279 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From be3b059e29d4486718675afe0641c592415a82a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 9 Apr 2025 15:14:23 +0100 Subject: [PATCH 091/492] CI: allow XAPI linking with Lwt for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It sneaks in via xenstore_transport and tar.unix, fixing that requires patching those 2 libraries first. For now partially revert this test: only test for linking with OpenSSL, not Lwt. Fixes: 0832ae517 ("CP-53951: Drop SSL and Lwt dependency from XAPI") Signed-off-by: Edwin Török --- ocaml/xapi/check-no-lwtssl.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ocaml/xapi/check-no-lwtssl.sh b/ocaml/xapi/check-no-lwtssl.sh index 6b8e9a94a02..c7065d0dbc0 100755 --- a/ocaml/xapi/check-no-lwtssl.sh +++ b/ocaml/xapi/check-no-lwtssl.sh @@ -2,10 +2,7 @@ SSL=libssl CRYPTO=libcrypto -# This is an OCaml library, but when it is linked -# we also configure it to link libev, so look for that -LWT=libev -DEPS="${SSL}|${CRYPTO}|${LWT}" +DEPS="${SSL}|${CRYPTO}" ldd "$1" | grep -q -E "${DEPS}" 2>&1 if [ $? -eq 1 ]; then From 24905957c1aadf337a75db31be34688b4dc66a7c Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 9 Apr 2025 17:15:44 +0100 Subject: [PATCH 092/492] style: Use List.assoc_opt Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index e9fa4f18481..2a35eae710c 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -426,10 +426,7 @@ module MigrateLocal = struct (* Destroy the snapshot, if it still exists *) let snap = List.find_opt - (fun x -> - List.mem_assoc "base_mirror" x.sm_config - && List.assoc "base_mirror" x.sm_config = id - ) + (fun x -> List.assoc_opt "base_mirror" x.sm_config = Some id) vdis in ( match snap with From c927979fbf20b975752185febd8238cb86b335e0 Mon Sep 17 00:00:00 2001 From: Guillaume Date: Tue, 8 Apr 2025 18:22:21 +0200 Subject: [PATCH 093/492] Check that there are no changes during SR.scan Currently, we are only checking that no VDIs have been removed during the SR scan performed by the SM plugin. However, there are situations where a VDI has been added, and if this VDI is not present in the list obtained from SR.scan, it will be forgotten. The checks only prevent this in the case where the VDI was added during the scan. There is still a TOCTOU situation if the issue happens after the scan, and there is room for that. Signed-off-by: Guillaume --- ocaml/xapi/xapi_sr.ml | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index d09490b9521..4a0684147af 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -757,6 +757,11 @@ let update_vdis ~__context ~sr db_vdis vdi_infos = (* Perform a scan of this locally-attached SR *) let scan ~__context ~sr = + let module RefSet = Set.Make (struct + type t = [`VDI] Ref.t + + let compare = Ref.compare + end) in let open Storage_access in let task = Context.get_task_id __context in let module C = Storage_interface.StorageAPI (Idl.Exn.GenClient (struct @@ -781,9 +786,21 @@ let scan ~__context ~sr = (* It is sufficient to just compare the refs in two db_vdis, as this is what update_vdis uses to determine what to delete *) let vdis_ref_equal db_vdi1 db_vdi2 = - Listext.List.set_difference (List.map fst db_vdi1) - (List.map fst db_vdi2) - = [] + let refs1 = RefSet.of_list (List.map fst db_vdi1) in + let refs2 = RefSet.of_list (List.map fst db_vdi2) in + if RefSet.equal refs1 refs2 then + true + else + let log_diff label a b = + RefSet.diff a b + |> RefSet.elements + |> List.map Ref.string_of + |> String.concat " " + |> debug "%s: VDIs %s during scan: %s" __FUNCTION__ label + in + log_diff "removed" refs1 refs2 ; + log_diff "added" refs2 refs1 ; + false in let db_vdis_before = find_vdis () in let vs, sr_info = @@ -793,21 +810,13 @@ let scan ~__context ~sr = let db_vdis_after = find_vdis () in if limit > 0 && not (vdis_ref_equal db_vdis_before db_vdis_after) then ( - debug - "%s detected db change while scanning, before scan vdis %s, \ - after scan vdis %s, retry limit left %d" - __FUNCTION__ - (List.map (fun (_, v) -> v.vDI_uuid) db_vdis_before - |> String.concat "," - ) - (List.map (fun (_, v) -> v.vDI_uuid) db_vdis_after - |> String.concat "," - ) - limit ; + debug "%s detected db change while scanning, retry limit left %d" + __FUNCTION__ limit ; (scan_rec [@tailcall]) (limit - 1) ) else if limit = 0 then Helpers.internal_error "SR.scan retry limit exceeded" else ( + debug "%s no change detected, updating VDIs" __FUNCTION__ ; update_vdis ~__context ~sr db_vdis_after vs ; let virtual_allocation = List.fold_left From cbeb0fca09ba6975c7f9551ea4def8146c2a0d21 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Wed, 2 Apr 2025 10:06:54 +0000 Subject: [PATCH 094/492] CA-408843: XSI-1852: Set encryption type of machine account According to https://techcommunity.microsoft.com/blog/coreinfrastructureandsecurityblog/decrypting-the-selection-of-supported-kerberos-encryption-types/1628797 msDS-SupportedEncryptionTypes of machine account help to decide Service Ticket encryption type Some customer IT teams have strict encryption types limitation in their domains This commit add winbind_set_machine_account_kerberos_encryption_type and default to false. When enabled, xapi set the machine account encryption types consistent with the samba client Signed-off-by: Lin Liu --- ocaml/xapi-aux/kerberos_encryption_types.ml | 32 ++++++++++++++++++++ ocaml/xapi-aux/kerberos_encryption_types.mli | 2 ++ ocaml/xapi/extauth_plugin_ADwinbind.ml | 32 +++++++++++++++++++- ocaml/xapi/xapi_globs.ml | 10 ++++++ 4 files changed, 75 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi-aux/kerberos_encryption_types.ml b/ocaml/xapi-aux/kerberos_encryption_types.ml index fd2f67399f7..8bb63004677 100644 --- a/ocaml/xapi-aux/kerberos_encryption_types.ml +++ b/ocaml/xapi-aux/kerberos_encryption_types.ml @@ -20,6 +20,24 @@ module Winbind = struct type t = Strong | Legacy | All + (* + * [X] 0x00000001 DES-CBC-CRC + * [X] 0x00000002 DES-CBC-MD5 + * [X] 0x00000004 RC4-HMAC + * [X] 0x00000008 AES128-CTS-HMAC-SHA1-96 + * [X] 0x00000010 AES256-CTS-HMAC-SHA1-96 + * *) + + let des_cbc_crc = 0x1 + + let des_cbc_md5 = 0x2 + + let rc4_hmac = 0x4 + + let aes128_cts_hmac_sha1_96 = 0x8 + + let aes256_cts_hmac_sha1_96 = 0x10 + let to_string = function | Strong -> "strong" @@ -28,6 +46,20 @@ module Winbind = struct | All -> "all" + let ( +++ ) x y = x lor y + + let to_encoding = function + | Strong -> + aes128_cts_hmac_sha1_96 +++ aes256_cts_hmac_sha1_96 + | Legacy -> + rc4_hmac + | All -> + des_cbc_crc + +++ des_cbc_md5 + +++ rc4_hmac + +++ aes128_cts_hmac_sha1_96 + +++ aes256_cts_hmac_sha1_96 + let of_string = function | "all" -> Some All diff --git a/ocaml/xapi-aux/kerberos_encryption_types.mli b/ocaml/xapi-aux/kerberos_encryption_types.mli index 833b6d7bed2..5ef9f833a0e 100644 --- a/ocaml/xapi-aux/kerberos_encryption_types.mli +++ b/ocaml/xapi-aux/kerberos_encryption_types.mli @@ -17,5 +17,7 @@ module Winbind : sig val to_string : t -> string + val to_encoding : t -> int + val of_string : string -> t option end diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index f23f1f5447e..b3458478e3e 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -1085,6 +1085,35 @@ module Winbind = struct netbios_name ) else hostname + + let set_machine_account_encryption_type netbios_name = + match !Xapi_globs.winbind_set_machine_account_kerberos_encryption_type with + | true -> ( + let args = + [ + "ads" + ; "enctypes" + ; "set" + ; "--machine-pass" + ; "-d" + ; debug_level () + ; Printf.sprintf "%s$" netbios_name + ; Printf.sprintf "%d" + (Kerberos_encryption_types.Winbind.to_encoding + !Xapi_globs.winbind_kerberos_encryption_type + ) + ] + in + try + Helpers.call_script + ~timeout:Mtime.Span.(5 * s) + !Xapi_globs.net_cmd args + |> ignore + with _ -> + warn "Failed to set machine account encryption type, ignoring" + ) + | false -> + debug "Skip setting machine account encryption type to DC" end module ClosestKdc = struct @@ -1688,10 +1717,11 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct ~ou_conf ~workgroup:(Some workgroup) ~machine_pwd_last_change_time:(Some machine_pwd_last_change_time) ~netbios_name:(Some netbios_name) ; + (* Trigger right now *) ClosestKdc.trigger_update ~start:0. ; RotateMachinePassword.trigger_rotate ~start:0. ; ConfigHosts.join ~domain:service_name ~name:netbios_name ; - (* Trigger right now *) + Winbind.set_machine_account_encryption_type netbios_name ; debug "Succeed to join domain %s" service_name with | Forkhelpers.Spawn_internal_error (_, stdout, _) -> diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 89665a13494..f86ff967b43 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1004,6 +1004,8 @@ let winbind_update_closest_kdc_interval = ref (3600. *. 22.) let winbind_kerberos_encryption_type = ref Kerberos_encryption_types.Winbind.All +let winbind_set_machine_account_kerberos_encryption_type = ref false + let winbind_allow_kerberos_auth_fallback = ref false let winbind_keep_configuration = ref false @@ -1546,6 +1548,14 @@ let other_options = , "Encryption types to use when operating as Kerberos client \ [strong|legacy|all]" ) + ; ( "winbind_set_machine_account_kerberos_encryption_type" + , Arg.Set winbind_set_machine_account_kerberos_encryption_type + , (fun () -> + string_of_bool !winbind_set_machine_account_kerberos_encryption_type + ) + , "Whether set machine account encryption type \ + (msDS-SupportedEncryptionTypes) on domain controller" + ) ; ( "winbind_allow_kerberos_auth_fallback" , Arg.Set winbind_allow_kerberos_auth_fallback , (fun () -> string_of_bool !winbind_allow_kerberos_auth_fallback) From 02c02a954af9a29fb690a5a9ec4478bbc42b8633 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 10 Apr 2025 11:29:50 +0100 Subject: [PATCH 095/492] xe-reset-networking: Avoid truncating IPv6 addresses Otherwise, for a pool.conf like `slave:2a01:240:ab08:5:13::201`, `address` would just be `2a01`. Note that such `address` isn't actually used anywhere (only the user-specified address is written to the config, and that isn't truncated), so this is just a safety measure in case of future changes. Signed-off-by: Andrii Sultanov --- python3/bin/xe-reset-networking | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python3/bin/xe-reset-networking b/python3/bin/xe-reset-networking index 58091d09120..81b3c57286f 100755 --- a/python3/bin/xe-reset-networking +++ b/python3/bin/xe-reset-networking @@ -87,7 +87,7 @@ if __name__ == "__main__": f = open(pool_conf, 'r') try: l = f.readline() - ls = l.split(':') + ls = l.split(':', maxsplit=1) if ls[0].strip() == 'master': master = True address = 'localhost' From e2071f082c1f7d2cc179f5f574cf44c9f70a880a Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 10 Apr 2025 11:59:31 +0100 Subject: [PATCH 096/492] networkd: simplify parsing of config Use a helper method to parse lists in values, and use options better to ignore empty values. Signed-off-by: Pau Ruiz Safont --- ocaml/networkd/lib/network_config.ml | 60 +++++++++------------------- 1 file changed, 19 insertions(+), 41 deletions(-) diff --git a/ocaml/networkd/lib/network_config.ml b/ocaml/networkd/lib/network_config.ml index b306b580b32..ba4930c3068 100644 --- a/ocaml/networkd/lib/network_config.ml +++ b/ocaml/networkd/lib/network_config.ml @@ -34,6 +34,11 @@ let bridge_naming_convention (device : string) = else "br" ^ device +let get_list_from ~sep ~key args = + List.assoc_opt key args + |> Option.map (fun v -> Astring.String.cuts ~empty:false ~sep v) + |> Option.value ~default:[] + let read_management_conf () = try let management_conf = @@ -42,35 +47,23 @@ let read_management_conf () = in let args = Astring.String.cuts ~empty:false ~sep:"\n" (String.trim management_conf) - in - let args = - List.map - (fun s -> - match Astring.String.cuts ~sep:"=" s with - | [k; v] -> - (k, Astring.String.trim ~drop:(( = ) '\'') v) - | _ -> - ("", "") - ) - args + |> List.filter_map (fun s -> + match Astring.String.cut ~sep:"=" s with + | Some (_, "") | None -> + None + | Some (k, v) -> + Some (k, Astring.String.trim ~drop:(( = ) '\'') v) + ) in debug "Firstboot file management.conf has: %s" (String.concat "; " (List.map (fun (k, v) -> k ^ "=" ^ v) args)) ; let vlan = List.assoc_opt "VLAN" args in - let bond_mode = - Option.value ~default:"" (List.assoc_opt "BOND_MODE" args) - in - let bond_members = - match List.assoc_opt "BOND_MEMBERS" args with - | None -> - [] - | Some x -> - String.split_on_char ',' x - in + let bond_mode = List.assoc_opt "BOND_MODE" args in + let bond_members = get_list_from ~sep:"," ~key:"BOND_MEMBERS" args in let device = (* Take 1st member of bond *) match (bond_mode, bond_members) with - | "", _ | _, [] -> ( + | None, _ | _, [] -> ( match List.assoc_opt "LABEL" args with | Some x -> x @@ -111,28 +104,13 @@ let read_management_conf () = let ip = List.assoc "IP" args |> Unix.inet_addr_of_string in let prefixlen = List.assoc "NETMASK" args |> netmask_to_prefixlen in let gateway = - if List.mem_assoc "GATEWAY" args then - Some (List.assoc "GATEWAY" args |> Unix.inet_addr_of_string) - else - None + Option.map Unix.inet_addr_of_string (List.assoc_opt "GATEWAY" args) in let nameservers = - if List.mem_assoc "DNS" args && List.assoc "DNS" args <> "" then - List.map Unix.inet_addr_of_string - (Astring.String.cuts ~empty:false ~sep:"," - (List.assoc "DNS" args) - ) - else - [] - in - let domains = - if List.mem_assoc "DOMAIN" args && List.assoc "DOMAIN" args <> "" - then - Astring.String.cuts ~empty:false ~sep:" " - (List.assoc "DOMAIN" args) - else - [] + get_list_from ~sep:"," ~key:"DNS" args + |> List.map Unix.inet_addr_of_string in + let domains = get_list_from ~sep:" " ~key:"DOMAIN" args in let dns = (nameservers, domains) in (Static4 [(ip, prefixlen)], gateway, dns) | "dhcp" -> From 77ebbe7dcd31c28b82266e7540d2b34d65e15ccf Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 10 Apr 2025 13:24:55 +0100 Subject: [PATCH 097/492] networkd: read IPv6 entries in the firstboot management config file Other parts of xapi and various scripts have already been updated to account for IPv6 entries in management.conf (like xe-reset-networking, xapi_pool.ml, etc.) Add the IPv6 alternatives to IPv4 config entries also to read_management_conf, otherwise networkd will not properly account for IPv6 settings or error out in case some IPv4 entries are skipped Signed-off-by: Andrii Sultanov --- ocaml/networkd/lib/network_config.ml | 78 +++++++++++++++++++++------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/ocaml/networkd/lib/network_config.ml b/ocaml/networkd/lib/network_config.ml index ba4930c3068..56eef61ce3d 100644 --- a/ocaml/networkd/lib/network_config.ml +++ b/ocaml/networkd/lib/network_config.ml @@ -39,6 +39,46 @@ let get_list_from ~sep ~key args = |> Option.map (fun v -> Astring.String.cuts ~empty:false ~sep v) |> Option.value ~default:[] +let parse_ipv4_config args = function + | Some "static" -> + let ip = List.assoc "IP" args |> Unix.inet_addr_of_string in + let prefixlen = List.assoc "NETMASK" args |> netmask_to_prefixlen in + let gateway = + Option.map Unix.inet_addr_of_string (List.assoc_opt "GATEWAY" args) + in + (Static4 [(ip, prefixlen)], gateway) + | Some "dhcp" -> + (DHCP4, None) + | _ -> + (None4, None) + +let parse_ipv6_config args = function + | Some "static" -> + let ipv6_arg = List.assoc "IPv6" args in + let ip, prefixlen = + Scanf.sscanf ipv6_arg "%s@/%d" (fun ip prefixlen -> + let ip = ip |> Unix.inet_addr_of_string in + (ip, prefixlen) + ) + in + let gateway = + Option.map Unix.inet_addr_of_string (List.assoc_opt "IPv6_GATEWAY" args) + in + (Static6 [(ip, prefixlen)], gateway) + | Some "dhcp" -> + (DHCP6, None) + | Some "autoconf" -> + (Autoconf6, None) + | _ -> + (None6, None) + +let parse_dns_config args = + let nameservers = + get_list_from ~sep:"," ~key:"DNS" args |> List.map Unix.inet_addr_of_string + in + let domains = get_list_from ~sep:" " ~key:"DOMAIN" args in + (nameservers, domains) + let read_management_conf () = try let management_conf = @@ -98,30 +138,28 @@ let read_management_conf () = bridge in let mac = Network_utils.Ip.get_mac device in - let ipv4_conf, ipv4_gateway, dns = - match List.assoc "MODE" args with - | "static" -> - let ip = List.assoc "IP" args |> Unix.inet_addr_of_string in - let prefixlen = List.assoc "NETMASK" args |> netmask_to_prefixlen in - let gateway = - Option.map Unix.inet_addr_of_string (List.assoc_opt "GATEWAY" args) - in - let nameservers = - get_list_from ~sep:"," ~key:"DNS" args - |> List.map Unix.inet_addr_of_string - in - let domains = get_list_from ~sep:" " ~key:"DOMAIN" args in - let dns = (nameservers, domains) in - (Static4 [(ip, prefixlen)], gateway, dns) - | "dhcp" -> - (DHCP4, None, ([], [])) - | _ -> - (None4, None, ([], [])) + let dns = parse_dns_config args in + let (ipv4_conf, ipv4_gateway), (ipv6_conf, ipv6_gateway) = + match (List.assoc_opt "MODE" args, List.assoc_opt "MODEV6" args) with + | None, None -> + error "%s: at least one of 'MODE', 'MODEV6' needs to be specified" + __FUNCTION__ ; + raise Read_error + | v4, v6 -> + (parse_ipv4_config args v4, parse_ipv6_config args v6) in let phy_interface = {default_interface with persistent_i= true} in let bridge_interface = - {default_interface with ipv4_conf; ipv4_gateway; persistent_i= true; dns} + { + default_interface with + ipv4_conf + ; ipv4_gateway + ; ipv6_conf + ; ipv6_gateway + ; persistent_i= true + ; dns + } in let interface_config, bridge_config = let primary_bridge_conf = From f2b848dedb44c93cd607efd654043993e429b0d5 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 30 Jan 2025 14:15:14 +0000 Subject: [PATCH 098/492] CP-52745: Add `ThreadLocalStorage` in `Threadext` Adds a `ThreadLocalStorage` module under `Threadext`. Currently, it uses `Ambient_local.Thread_local` as the underlying implementation. Following the thread classification PRs, https://github.com/xapi-project/xen-api/pull/6154, this will enable accessing the current thread group for each thread. The current data structure contains the following: 1. ocaml_tid 2. thread_name 3. time_running 4. tepoch 5. tgroup Field 1: `ocaml_tid` is equivalent to `Thread.self () |> Thread.id.` Field 2: `thread_name` is to associet threads with human readable string. Fields 3-5: - `time_runinng` - the amount of time the thread has been running in the current OCaml runtime timeslice, - `tepoch` - the current timeslice the thread has been scheduled for, - `tgroup` - current thread classification. Fields 3-5 are what is expected to be used for thread scheduling when xapi is under load. This can be extended in the future to contain information about tracing, such as `traceparent` and `baggage`. Signed-off-by: Gabriel Buica --- dune-project | 2 + ocaml/libs/tgroup/tgroup.ml | 5 + ocaml/libs/tgroup/tgroup.mli | 12 ++- .../xapi-stdext/lib/xapi-stdext-threads/dune | 28 +++++- .../lib/xapi-stdext-threads/threadext.ml | 38 ++++++++ .../lib/xapi-stdext-threads/threadext.mli | 24 +++++ .../lib/xapi-stdext-threads/threadext_test.ml | 95 ++++++++++++++++++- opam/xapi-stdext-threads.opam | 2 + 8 files changed, 198 insertions(+), 8 deletions(-) diff --git a/dune-project b/dune-project index 1de533b179d..3c6b4af663e 100644 --- a/dune-project +++ b/dune-project @@ -711,12 +711,14 @@ This package provides an Lwt compatible interface to the library.") (synopsis "Xapi's standard library extension, Threads") (authors "Jonathan Ludlam") (depends + ambient-context base-threads base-unix (alcotest :with-test) (clock (= :version)) (fmt :with-test) mtime + tgroup (xapi-log (= :version)) (xapi-stdext-pervasives (= :version)) (xapi-stdext-unix (= :version)) diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index 171b78ee2b2..071a9dfe0d2 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -253,6 +253,11 @@ module Group = struct External.name // External.Unauthenticated.name let to_string g = match g with Group group -> to_cgroup group + + let authenticated_root = + of_creator (Creator.make ~identity:Identity.root_identity ()) + + let unauthenticated = Group External_Unauthenticated end module Cgroup = struct diff --git a/ocaml/libs/tgroup/tgroup.mli b/ocaml/libs/tgroup/tgroup.mli index b9316967ae3..d89ef542ffd 100644 --- a/ocaml/libs/tgroup/tgroup.mli +++ b/ocaml/libs/tgroup/tgroup.mli @@ -37,7 +37,7 @@ module Group : sig val of_string : string -> t (** [of_string s] creates an originator from a string [s]. - + e.g create an originator based on a http header. *) val to_string : t -> string @@ -76,6 +76,14 @@ module Group : sig val to_string : t -> string (** [to_string g] returns the string representation of the group [g].*) + + val authenticated_root : t + (** [authenticated_root] represents the main classification of internal xapi + threads. *) + + val unauthenticated : t + (** [unauthenticated] represents the classification of xapi threads for + unauthenticated users. *) end (** [Cgroup] module encapsulates different function for managing the cgroups @@ -87,7 +95,7 @@ module Cgroup : sig val dir_of : Group.t -> t option (** [dir_of group] returns the full path of the cgroup directory corresponding to the group [group] as [Some dir]. - + Returns [None] if [init dir] has not been called. *) val init : string -> unit diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 5d61f52cfc4..0dc52b78cd8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,29 +1,47 @@ (library (public_name xapi-stdext-threads) - (name xapi_stdext_threads) + (name xapi_stdext_threads) (modules :standard \ ipq scheduler threadext_test ipq_test scheduler_test) (libraries + ambient-context.thread_local mtime mtime.clock.os threads.posix unix + tgroup xapi-stdext-unix xapi-stdext-pervasives) (foreign_stubs (language c) - (names delay_stubs)) + (names delay_stubs) + ) ) (library (public_name xapi-stdext-threads.scheduler) (name xapi_stdext_threads_scheduler) (modules ipq scheduler) - (libraries mtime mtime.clock.os threads.posix unix xapi-log xapi-stdext-threads clock) -) + (libraries + mtime + mtime.clock.os + threads.posix + unix + xapi-log + xapi-stdext-threads + clock) + ) (tests (names threadext_test ipq_test scheduler_test) (package xapi-stdext-threads) (modules threadext_test ipq_test scheduler_test) - (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix xapi_stdext_threads_scheduler) + (libraries + xapi_stdext_threads + alcotest + mtime.clock.os + mtime + fmt + tgroup + threads.posix + xapi_stdext_threads_scheduler) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index b954a159ddb..c8d85d8b6c5 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -97,3 +97,41 @@ let wait_timed_write fd timeout = true | _ -> assert false + +module ThreadRuntimeContext = struct + type t = { + ocaml_tid: int + ; thread_name: string + ; mutable time_running: int + ; mutable tepoch: int + ; tgroup: Tgroup.Group.t + } + + (*The documentation for Ambient_context_thread_local isn't really clear is + this context. thread_local_storage is a global variable shared by all + threads. It is a map with keys, the thread IDs and values the above + defined data structure.*) + let thread_local_storage = Ambient_context_thread_local.Thread_local.create () + + let create ?(thread_name = "") () = + let ocaml_tid = Thread.self () |> Thread.id in + let time_running = 0 in + let tepoch = 0 in + let tgroup = Tgroup.Group.authenticated_root in + let tls = {thread_name; tgroup; ocaml_tid; time_running; tepoch} in + let () = + Ambient_context_thread_local.Thread_local.set thread_local_storage tls + in + tls + + let get () = + Ambient_context_thread_local.Thread_local.get_or_create ~create + thread_local_storage + + let update f context = + f context + |> Ambient_context_thread_local.Thread_local.set thread_local_storage + + let remove () = + Ambient_context_thread_local.Thread_local.remove thread_local_storage +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli index a1af35ccbeb..7967e3fa573 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli @@ -43,3 +43,27 @@ end val wait_timed_read : Unix.file_descr -> float -> bool val wait_timed_write : Unix.file_descr -> float -> bool + +module ThreadRuntimeContext : sig + type t = { + ocaml_tid: int + ; thread_name: string + ; mutable time_running: int + ; mutable tepoch: int + ; tgroup: Tgroup.Group.t + } + + val create : ?thread_name:string -> unit -> t + (** [create ()] creates and returns an initial thread local strorage for the + current thread. *) + + val get : unit -> t + (** [get ()] returns the current thread local storage. *) + + val update : (t -> t) -> t -> unit + (** [update fn thread_ctx] updates the thread local storage based on + the supplied arguments. *) + + val remove : unit -> unit + (** [remove ()] removes the thread local storage of the current thread. *) +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml index b93df9f47a8..2182430b182 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml @@ -74,4 +74,97 @@ let tests = ; ("other_thread", `Quick, other_thread) ] -let () = Alcotest.run "Threadext" [("Delay", tests)] +let test_create_ambient_storage () = + let open Xapi_stdext_threads.Threadext in + let _ : Thread.t = + Thread.create + (fun () -> + let storage = ThreadRuntimeContext.create () in + let storage_tid = storage.ocaml_tid in + let ocaml_tid = Thread.self () |> Thread.id in + Alcotest.(check int) + "Ocaml thread id matches the thread id stored" ocaml_tid storage_tid + ) + () + in + () + +let test_thread_storage_update_and_get () = + let open Xapi_stdext_threads.Threadext in + let _ : Thread.t = + Thread.create + (fun () -> + let context : ThreadRuntimeContext.t = ThreadRuntimeContext.create () in + + let expected_name = "thread_1" in + ThreadRuntimeContext.update + (fun t -> {t with thread_name= expected_name}) + context ; + let storage = ThreadRuntimeContext.get () in + Alcotest.(check string) + "Check if correct value is set in storage" expected_name + storage.thread_name + ) + () + in + () + +let test_storage_locality () = + let open Xapi_stdext_threads.Threadext in + let r1 = ref None in + let r2 = ref None in + + let thread1_expected_name = "thread_1" in + let thread2_expected_name = "thread_2" in + + let thread1 = + Thread.create + (fun () -> + let context = ThreadRuntimeContext.create () in + ThreadRuntimeContext.update + (fun t -> {t with thread_name= thread1_expected_name}) + context ; + Thread.delay 1. ; + r1 := Some (ThreadRuntimeContext.get ()) + ) + () + in + let thread2 = + Thread.create + (fun () -> + let context = ThreadRuntimeContext.create () in + ThreadRuntimeContext.update + (fun t -> {t with thread_name= thread2_expected_name}) + context ; + + r2 := Some (ThreadRuntimeContext.get ()) + ) + () + in + Thread.join thread1 ; + Thread.join thread2 ; + Alcotest.(check bool) + "Check thread local storage is set for thread1" true (Option.is_some !r1) ; + Alcotest.(check bool) + "Check thread local storage is set for thread2" true (Option.is_some !r2) ; + let thread1_name = + let r1 = Option.get !r1 in + r1.thread_name + in + let thread2_name = + let r2 = Option.get !r2 in + r2.thread_name + in + Alcotest.(check string) "Thread1 name" thread1_expected_name thread1_name ; + Alcotest.(check string) "Thread2 name" thread2_expected_name thread2_name + +let tls_tests = + [ + ("create storage", `Quick, test_create_ambient_storage) + ; ("storage update and get", `Quick, test_thread_storage_update_and_get) + ; ("thread local storage", `Quick, test_storage_locality) + ] + +let () = + Alcotest.run "Threadext" + [("Delay", tests); ("ThreadRuntimeContext", tls_tests)] diff --git a/opam/xapi-stdext-threads.opam b/opam/xapi-stdext-threads.opam index a61529e7e09..55653e588c9 100644 --- a/opam/xapi-stdext-threads.opam +++ b/opam/xapi-stdext-threads.opam @@ -8,12 +8,14 @@ homepage: "https://xapi-project.github.io/" bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ "dune" {>= "3.15"} + "ambient-context" "base-threads" "base-unix" "alcotest" {with-test} "clock" {= version} "fmt" {with-test} "mtime" + "tgroup" "xapi-log" {= version} "xapi-stdext-pervasives" {= version} "xapi-stdext-unix" {= version} From fd1e91d35aa6ccd93734d651fd3dc010c67c60a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 11 Apr 2025 10:35:38 +0100 Subject: [PATCH 099/492] CA-409488: prevent Xenctrl exceptions from escaping on VM boot/shutdown races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that this hostload metric is completely wrong, and we should probably drop it. But for now prevent it from introducing gaps in the entire host CPU graph: if an exception escapes here, then nothing else gets reported, because the host CPU, VM CPU and hostload metrics all get returned as a single list. An exception escaping from any prevents all the others from working too. Fixes: 9aa7dfcca ("CP-43574: Add host load data source") Signed-off-by: Edwin Török --- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index 7a0db5ec5d7..998ab77768b 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -188,12 +188,15 @@ let dss_hostload xc domains = let load = List.fold_left (fun acc (dom, _, domid) -> - sum 0 dom.Xenctrl.max_vcpu_id (fun id -> - let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid id in - if vcpuinfo.Xenctrl.online && not vcpuinfo.Xenctrl.blocked then - 1 - else - 0 + ( try + sum 0 dom.Xenctrl.max_vcpu_id (fun id -> + let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid id in + if vcpuinfo.Xenctrl.online && not vcpuinfo.Xenctrl.blocked then + 1 + else + 0 + ) + with _ -> 0 ) + acc ) From 5ba4c8fd6198f7e7adb4e5d40acf51aecdc1ca04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 11 Apr 2025 10:37:48 +0100 Subject: [PATCH 100/492] CA-409489: prevent running out of pages with lots of domains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When this plugin was part of xcp-rrdd the shared memory protocol wasn't used for transfering data (it was transferred by direct function call). The shared memory protocol has a limitation that you need to declare a maximum size from the beginning, or carefully grow the file itself dynamically. We support max 1024 VMs/host, and if each one needs 5 pages, that'd be 20Mib/host, so always use that as the minimum number of pages. Fixes: b3ea09222 ("IH-615: Move CPU-related data-source collection into a separate RRDD plugin") Signed-off-by: Edwin Török --- ocaml/xapi-idl/rrd/rrd_interface.ml | 2 ++ ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 4 ++-- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 10 ++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-idl/rrd/rrd_interface.ml b/ocaml/xapi-idl/rrd/rrd_interface.ml index bee3c646d34..1cfa1e39a2f 100644 --- a/ocaml/xapi-idl/rrd/rrd_interface.ml +++ b/ocaml/xapi-idl/rrd/rrd_interface.ml @@ -29,6 +29,8 @@ let default_sockets_dir = "/var/lib/xcp" let daemon_name = "xcp-rrdd" +let max_supported_vms = 1024 + let default_path = ref (Filename.concat default_sockets_dir daemon_name) let forwarded_path = diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index e2b6f741fc0..046641b6fc3 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -225,11 +225,11 @@ let dss_mem_host xc = (** estimate the space needed to serialize all the dss_mem_vms in a host. the json-like serialization for the 3 dss in dss_mem_vms takes 622 bytes. these bytes plus some overhead make 1024 bytes an upper bound. *) -let max_supported_vms = 1024 let bytes_per_mem_vm = 1024 -let mem_vm_writer_pages = ((max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 +let mem_vm_writer_pages = + ((Rrd_interface.max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 let res_error fmt = Printf.ksprintf Result.error fmt diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index 998ab77768b..e3b86db975b 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -233,14 +233,20 @@ let generate_cpu_ds_list xc () = let _, domains, _ = Xenctrl_lib.domain_snapshot xc in dss_pcpus xc @ dss_vcpus xc domains @ dss_loadavg () @ dss_hostload xc domains +(* 32 vCPUS ~8659 bytes, so 64 vCPUs should fit in 5 *) +let cpu_pages_per_vm = 5 + let _ = Xenctrl.with_intf (fun xc -> let _, domains, _ = Xenctrl_lib.domain_snapshot xc in Process.initialise () ; (* Share one page per PCPU and dom each *) let physinfo = Xenctrl.physinfo xc in - let shared_page_count = physinfo.Xenctrl.nr_cpus + List.length domains in - (* TODO: Can run out of pages if a lot of domains are added at runtime *) + let shared_page_count = + physinfo.Xenctrl.nr_cpus + + Int.max Rrd_interface.max_supported_vms (List.length domains) + * cpu_pages_per_vm + in Process.main_loop ~neg_shift:0.5 ~target:(Reporter.Local shared_page_count) ~protocol:Rrd_interface.V2 ~dss_f:(generate_cpu_ds_list xc) From 5f84b06c5ad18475234e93141992c15d6d4295f9 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Apr 2025 14:13:46 +0100 Subject: [PATCH 101/492] xapi: Log exceptions in check_network_reset Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 8560f8947d5..f7ac9b546d3 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -760,9 +760,9 @@ let check_network_reset () = ) ; (* Remove trigger file *) Unix.unlink Xapi_globs.network_reset_trigger - with _ -> () - -(* TODO: catch specific exception for missing fields in reset_file and inform user *) + with e -> + D.error "%s: exception: %s (%s)" __FUNCTION__ (Printexc.to_string e) + (Printexc.get_backtrace ()) (** Make sure our license is set correctly *) let handle_licensing () = From 01870e439d4a3ef121f4742976d4c94102520b11 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 11 Apr 2025 15:31:48 +0100 Subject: [PATCH 102/492] rrd_file_writer: protect against resource leak Make sure we always close the file descriptor on the error paths. Signed-off-by: Christian Lindig --- .../lib/transport/file/rrd_file_writer.ml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml b/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml index 9f8fa196df4..92cff3e4966 100644 --- a/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml +++ b/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml @@ -14,6 +14,8 @@ type local_id = {path: string; shared_page_count: int} +let finally f finally = Fun.protect ~finally f + module File = struct let page_size = 4096 @@ -29,14 +31,17 @@ module File = struct let init {path; shared_page_count} = let size = shared_page_count * page_size in let fd = Unix.openfile path [Unix.O_RDWR; Unix.O_CREAT] 0o600 in - let mapping = - Bigarray.( - array1_of_genarray @@ Unix.map_file fd char c_layout true [|size|] + finally + (fun () -> + let mapping = + Bigarray.( + array1_of_genarray @@ Unix.map_file fd char c_layout true [|size|] + ) + in + let cstruct = Cstruct.of_bigarray mapping in + (path, cstruct) ) - in - Unix.close fd ; - let cstruct = Cstruct.of_bigarray mapping in - (path, cstruct) + (fun () -> Unix.close fd) let cleanup _ path _ = Unix.unlink path From a9d3fc48facb41ec41f497722fbc67f7fdb081de Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 11 Apr 2025 16:23:18 +0100 Subject: [PATCH 103/492] Raise log level for rrd thread monitor Signed-off-by: Christian Lindig --- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index e2b6f741fc0..3fe57962e57 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -543,7 +543,7 @@ let monitor_write_loop writers = ) ; Thread.delay !Rrdd_shared.timeslice with e -> - debug + warn "Monitor/write thread caught an exception. Pausing for 10s, \ then restarting: %s" (Printexc.to_string e) ; From a0bf2b612843223a7291ee70c32f2e10aca57201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 11 Apr 2025 16:49:44 +0100 Subject: [PATCH 104/492] Makefile: install tgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is now a dependency of Threadext, which causes linking to fail if this isn't installed. Only a problem when RPM is used, not when OPAM is used. Signed-off-by: Edwin Török --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7d0677277fa..dde13fc24a6 100644 --- a/Makefile +++ b/Makefile @@ -155,7 +155,7 @@ DUNE_IU_PACKAGES1+=gzip http-lib pciutil sexpr stunnel uuid xml-light2 zstd xapi DUNE_IU_PACKAGES1+=message-switch message-switch-cli message-switch-core message-switch-lwt DUNE_IU_PACKAGES1+=message-switch-unix xapi-idl xapi-forkexecd xapi-storage xapi-storage-script xapi-storage-cli DUNE_IU_PACKAGES1+=xapi-nbd varstored-guard xapi-log xapi-open-uri xapi-tracing xapi-tracing-export xapi-expiry-alerts cohttp-posix -DUNE_IU_PACKAGES1+=xapi-rrd xapi-inventory clock xapi-sdk +DUNE_IU_PACKAGES1+=xapi-rrd xapi-inventory clock xapi-sdk tgroup DUNE_IU_PACKAGES1+=xapi-stdext-encodings xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads xapi-stdext-unix xapi-stdext-zerocheck xapi-tools From 7e8836ff304a8f01967cc58b3ddb3ca971242658 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 11 Apr 2025 18:50:10 +0800 Subject: [PATCH 105/492] CA-408230: Enable destroy op for HA statefile VDI after HA is disabled The destroy operation is missing in the allowed operations for HA statefile VDI after HA is disable. It is because when update_allowed_operations for the VDI, it is found that ha_disable_in_progress is true. Xapi_ha.disable_internal will also be called when HA is not enabled successfully, in that case, ha_enable_in_progress is true. This fix removes `ha_enable or `ha_disable from pool's current operations to allow update_allowed_operations to enable destory operation for the VDI. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_ha.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index b452aaa8221..fd99a8b7712 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -1745,6 +1745,11 @@ let disable_internal __context = ) errors ) ; + (* CA-408230: mark current operation, `ha_enable or `ha_disable, as done, + as otherwise it will fail to update_allowed_operations for metadata_vdis + and statefile_vdis *) + let task_id = Ref.string_of (Context.get_task_id __context) in + Db.Pool.remove_from_current_operations ~__context ~self:pool ~key:task_id ; (* Update the allowed operations on the statefile VDIs for tidiness *) List.iter (fun vdi -> Xapi_vdi.update_allowed_operations ~__context ~self:vdi) From 451bb35561a41b2817ead0ad4df334b5ee0184d2 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 14 Apr 2025 15:28:40 +0100 Subject: [PATCH 106/492] CP-52131/CP-53474: Reorder operations during pci_add Reorder the operations in _pci_add: 1) So that Xen can verify calls to grant ioport and iomem permissions, reorder the calls so that the device is assigned to the domain before granting permissions for the resources. When Secure Boot is enabled, Xen will enforce that ioport/iomem permissions can be granted to a domain only when the corresponding device is assigned to that domain. 2) Add the device to QEMU after assigning to the domain. Rather than accessing the PCI config space through dom0 sysfs which is blocked when Secure Boot is enabled, QEMU in XS9 has been updated to use a hypercall to access the PCI config space of a device assigned to a domain. Therefore, add the device to QEMU after assigning it to the domain rather than before so that the config space accesses it performs during the QMP call succeed. Signed-off-by: Ross Lagerwall --- ocaml/xenopsd/xc/device.ml | 56 +++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 22514697509..f7b11e18d25 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -1215,30 +1215,6 @@ module PCI = struct |> String.trim |> int_of_string in - if hvm && qmp_add then - if Service.Qemu.is_running ~xs domid then - let id = - Printf.sprintf "pci-pt-%02x_%02x.%01x" host.bus host.dev host.fn - in - let _qmp_result = - qmp_send_cmd domid - (Qmp.Device_add - { - driver= "xen-pci-passthrough" - ; device= - Qmp.Device.PCI - { - id - ; devfn - ; hostaddr= string_of_address host - ; permissive= false - } - } - ) - in - () - else - raise (Domain_not_running (host, domid)) ; let addresses = sysfs_pci_dev ^ string_of_address host ^ "/resource" |> Unixext.string_of_file @@ -1264,15 +1240,39 @@ module PCI = struct in Xenctrl.domain_iomem_permission xc domid scan_start scan_size true in - List.iteri apply_io_permission addresses ; let xcext = Xenctrlext.get_handle () in + ignore (quarantine host) ; + Xenctrlext.assign_device xcext domid (encode_bdf host) + _xen_domctl_dev_rdm_relaxed ; + List.iteri apply_io_permission addresses ; ( if irq > 0 then Xenctrlext.physdev_map_pirq xcext domid irq |> fun x -> Xenctrl.domain_irq_permission xc domid x true ) ; - ignore (quarantine host) ; - Xenctrlext.assign_device xcext domid (encode_bdf host) - _xen_domctl_dev_rdm_relaxed + if hvm && qmp_add then + if Service.Qemu.is_running ~xs domid then + let id = + Printf.sprintf "pci-pt-%02x_%02x.%01x" host.bus host.dev host.fn + in + let _qmp_result = + qmp_send_cmd domid + (Qmp.Device_add + { + driver= "xen-pci-passthrough" + ; device= + Qmp.Device.PCI + { + id + ; devfn + ; hostaddr= string_of_address host + ; permissive= false + } + } + ) + in + () + else + raise (Domain_not_running (host, domid)) let add ~xc ~xs ~hvm pcidevs domid = let host_addr {host; guest= _; _} = host in From 55ed95de9894572f23f5b08ca01088164836350e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 29 Apr 2024 16:24:41 +0100 Subject: [PATCH 107/492] xapi-aux: Add function to return all management ip addresses This will be important to have a dual-stack mode Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-aux/networking_info.ml | 80 +++++++++++++++++++++++------- ocaml/xapi-aux/networking_info.mli | 24 ++++++--- ocaml/xapi/xapi_host.ml | 18 +++---- 3 files changed, 89 insertions(+), 33 deletions(-) diff --git a/ocaml/xapi-aux/networking_info.ml b/ocaml/xapi-aux/networking_info.ml index 2717338e5da..52de3fb12f6 100644 --- a/ocaml/xapi-aux/networking_info.ml +++ b/ocaml/xapi-aux/networking_info.ml @@ -17,7 +17,22 @@ module L = Debug.Make (struct let name = __MODULE__ end) let get_hostname () = try Unix.gethostname () with _ -> "" -exception Unexpected_address_type of string +type management_ip_error = + | Interface_missing + | Unexpected_address_type of string + | IP_missing + | Other of exn + +let management_ip_error_to_string = function + | Interface_missing -> + "Management interface is missing" + | IP_missing -> + "Management IP is missing" + | Unexpected_address_type s -> + Printf.sprintf + "Unexpected address type. Expected 'ipv4' or 'ipv6', got %s" s + | Other e -> + Printexc.to_string e (* Try to get all FQDNs, avoid localhost *) let dns_names () = @@ -46,32 +61,63 @@ let ipaddr_to_cstruct = function | Ipaddr.V6 addr -> Cstruct.of_string (Ipaddr.V6.to_octets addr) -let list_head lst = List.nth_opt lst 0 - -let get_management_ip_addr ~dbg = +let get_management_ip_addrs ~dbg = let iface = Inventory.lookup Inventory._management_interface in try if iface = "" || (not @@ Net.Interface.exists dbg iface) then - None + Error Interface_missing else - let addrs = + let ( let* ) = Result.bind in + let* addrs = + let ipv4 = Net.Interface.get_ipv4_addr dbg iface in + let ipv6 = Net.Interface.get_ipv6_addr dbg iface in match String.lowercase_ascii (Inventory.lookup Inventory._management_address_type ~default:"ipv4") with | "ipv4" -> - Net.Interface.get_ipv4_addr dbg iface + Ok (ipv4, ipv6) | "ipv6" -> - Net.Interface.get_ipv6_addr dbg iface + Ok (ipv6, ipv4) | s -> - let msg = Printf.sprintf "Expected 'ipv4' or 'ipv6', got %s" s in - L.error "%s: %s" __FUNCTION__ msg ; - raise (Unexpected_address_type msg) + Error (Unexpected_address_type s) in - addrs - |> List.map (fun (addr, _) -> Ipaddr_unix.of_inet_addr addr) (* Filter out link-local addresses *) - |> List.filter (fun addr -> Ipaddr.scope addr <> Ipaddr.Link) - |> List.map (fun ip -> (Ipaddr.to_string ip, ipaddr_to_cstruct ip)) - |> list_head - with _ -> None + let no_local (addr, _) = + let addr = Ipaddr_unix.of_inet_addr addr in + if Ipaddr.scope addr <> Ipaddr.Link then + Some addr + else + None + in + Ok + ( List.filter_map no_local (fst addrs) + , List.filter_map no_local (snd addrs) + ) + with e -> Error (Other e) + +let get_management_ip_addr ~dbg = + match get_management_ip_addrs ~dbg with + | Ok (preferred, _) -> + List.nth_opt preferred 0 + |> Option.map (fun addr -> (Ipaddr.to_string addr, ipaddr_to_cstruct addr)) + | Error _ -> + None + +let get_host_certificate_subjects ~dbg = + let ( let* ) = Result.bind in + let* ips, preferred_ip = + match get_management_ip_addrs ~dbg with + | Error e -> + Error e + | Ok (preferred, others) -> + let ips = List.(rev_append (rev preferred) others) in + Option.fold ~none:(Error IP_missing) + ~some:(fun ip -> Ok (List.map ipaddr_to_cstruct ips, ip)) + (List.nth_opt ips 0) + in + let dns_names = dns_names () in + let name = + match dns_names with [] -> Ipaddr.to_string preferred_ip | dns :: _ -> dns + in + Ok (name, dns_names, ips) diff --git a/ocaml/xapi-aux/networking_info.mli b/ocaml/xapi-aux/networking_info.mli index 8774e7aba3d..d2ba929e431 100644 --- a/ocaml/xapi-aux/networking_info.mli +++ b/ocaml/xapi-aux/networking_info.mli @@ -14,15 +14,27 @@ val get_hostname : unit -> string (** [get_hostname ()] returns the hostname as returned by Unix.gethostname. If there is an error "" is returned. *) -exception Unexpected_address_type of string +type management_ip_error = + | Interface_missing + | Unexpected_address_type of string + | IP_missing + | Other of exn + +val management_ip_error_to_string : management_ip_error -> string +(** [management_ip_error err] returns a string representation of [err], useful + only for logging. *) val dns_names : unit -> string list (** [dns_names ()] returns a list of the hostnames that the host may have. Ignores empty names as well as "localhost" *) val get_management_ip_addr : dbg:string -> (string * Cstruct.t) option -(** [get_management_ip_addr ~dbg] returns the IP of the management network. - If the system does not have management address None is return. - [Unexpected_address_type] is raised if there is an unexpected address is - stored. The address is return in two formats: human-readable string and - its bytes representation. *) +(** [get_management_ip_addr ~dbg] returns the preferred IP of the management + network, or None. The address is returned in two formats: a human-readable + string and its bytes representation. *) + +val get_host_certificate_subjects : + dbg:string + -> (string * string list * Cstruct.t list, management_ip_error) Result.t +(** [get_host_certificate_subjects ~dbg] returns the main, dns names and ip + addresses that identify the host in secure connections. *) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index acd8a10936a..e2cece5cb5c 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1583,19 +1583,17 @@ let install_server_certificate ~__context ~host ~certificate ~private_key replace_host_certificate ~__context ~type':`host ~host write_cert_fs let _new_host_cert ~dbg ~path : X509.Certificate.t = - let ip_as_string, ip = - match Networking_info.get_management_ip_addr ~dbg with - | None -> + let name, dns_names, ips = + match Networking_info.get_host_certificate_subjects ~dbg with + | Error cause -> + let msg = Networking_info.management_ip_error_to_string cause in Helpers.internal_error ~log_err:true ~err_fun:D.error - "%s: failed to get management IP" __LOC__ - | Some ip -> - ip + "%s: failed to generate certificate subjects because %s" __LOC__ msg + | Ok (name, dns_names, ips) -> + (name, dns_names, ips) in - let dns_names = Networking_info.dns_names () in - let cn = match dns_names with [] -> ip_as_string | dns :: _ -> dns in - let ips = [ip] in let valid_for_days = !Xapi_globs.cert_expiration_days in - Gencertlib.Selfcert.host ~name:cn ~dns_names ~ips ~valid_for_days path + Gencertlib.Selfcert.host ~name ~dns_names ~ips ~valid_for_days path !Xapi_globs.server_cert_group_id let reset_server_certificate ~__context ~host = From ad6f9253a56da143ba579d27af6d6a41a1ea2ed6 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 29 Apr 2024 16:42:36 +0100 Subject: [PATCH 108/492] gencert: Allow adding more than one IP as the certificate subject This is done for host certificates only Signed-off-by: Pau Ruiz Safont --- ocaml/gencert/gencert.ml | 30 ++++++++++++++---------------- ocaml/xapi-aux/networking_info.mli | 4 ---- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/ocaml/gencert/gencert.ml b/ocaml/gencert/gencert.ml index 0d3284379ff..f507879b2ca 100644 --- a/ocaml/gencert/gencert.ml +++ b/ocaml/gencert/gencert.ml @@ -47,22 +47,20 @@ let main ~dbg ~path ~cert_gid ~sni () = init_inventory () ; let generator path = match sni with - | SNI.Default -> - let name, ip = - match Networking_info.get_management_ip_addr ~dbg with - | None -> - D.error "gencert.ml: cannot get management ip address!" ; - exit 1 - | Some x -> - x - in - let dns_names = Networking_info.dns_names () in - let ips = [ip] in - let (_ : X509.Certificate.t) = - Gencertlib.Selfcert.host ~name ~dns_names ~ips ~valid_for_days path - cert_gid - in - () + | SNI.Default -> ( + match Networking_info.get_host_certificate_subjects ~dbg with + | Error cause -> + let msg = Networking_info.management_ip_error_to_string cause in + D.error + "gencert.ml: failed to generate certificate subjects because %s" msg ; + exit 1 + | Ok (name, dns_names, ips) -> + let _ : X509.Certificate.t = + Gencertlib.Selfcert.host ~name ~dns_names ~ips ~valid_for_days path + cert_gid + in + () + ) | SNI.Xapi_pool -> let uuid = Inventory.lookup Inventory._installation_uuid in let (_ : X509.Certificate.t) = diff --git a/ocaml/xapi-aux/networking_info.mli b/ocaml/xapi-aux/networking_info.mli index d2ba929e431..ced93d30dd5 100644 --- a/ocaml/xapi-aux/networking_info.mli +++ b/ocaml/xapi-aux/networking_info.mli @@ -24,10 +24,6 @@ val management_ip_error_to_string : management_ip_error -> string (** [management_ip_error err] returns a string representation of [err], useful only for logging. *) -val dns_names : unit -> string list -(** [dns_names ()] returns a list of the hostnames that the host may have. - Ignores empty names as well as "localhost" *) - val get_management_ip_addr : dbg:string -> (string * Cstruct.t) option (** [get_management_ip_addr ~dbg] returns the preferred IP of the management network, or None. The address is returned in two formats: a human-readable From d94a4bf2412f8d8d81b48cfdc07f3b325565d3d1 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 26 Apr 2024 15:53:18 +0100 Subject: [PATCH 109/492] network_server: add gateway and dns options to DHCP6 Signed-off-by: Pau Ruiz Safont --- ocaml/networkd/bin/network_server.ml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 8cc5e9ea908..59c76e319f3 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -412,12 +412,23 @@ module Interface = struct Ip.set_ipv6_link_local_addr name ) | DHCP6 -> + let gateway = + Option.fold ~none:[] + ~some:(fun n -> [`gateway n]) + !config.gateway_interface + in + let dns = + Option.fold ~none:[] + ~some:(fun n -> [`dns n]) + !config.dns_interface + in if Dhclient.is_running ~ipv6:true name then ignore (Dhclient.stop ~ipv6:true name) ; Sysctl.set_ipv6_autoconf name false ; Ip.flush_ip_addr ~ipv6:true name ; Ip.set_ipv6_link_local_addr name ; - ignore (Dhclient.ensure_running ~ipv6:true name []) + let options = gateway @ dns in + ignore (Dhclient.ensure_running ~ipv6:true name options) | Autoconf6 -> if Dhclient.is_running ~ipv6:true name then ignore (Dhclient.stop ~ipv6:true name) ; From e7c66dea2e5981420d1d4290d6d558d624b80439 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 14 Apr 2025 17:10:55 +0100 Subject: [PATCH 110/492] opam: update xapi-storage-cli metadata Now it's generated by dune and includes the dependency on xapi-client and xapi-types. Signed-off-by: Pau Ruiz Safont --- dune-project | 11 ++++++++ opam/xapi-storage-cli.opam | 44 +++++++++++++++++------------ opam/xapi-storage-cli.opam.template | 26 ----------------- 3 files changed, 37 insertions(+), 44 deletions(-) delete mode 100644 opam/xapi-storage-cli.opam.template diff --git a/dune-project b/dune-project index 3c6b4af663e..5ad3b3a0ff3 100644 --- a/dune-project +++ b/dune-project @@ -146,6 +146,17 @@ (package (name xapi-storage-cli) + (depends + cmdliner + re + rpclib + ppx_deriving_rpc + (xapi-client (= :version)) + (xapi-idl (= :version)) + (xapi-types (= :version)) + ) + (synopsis "A CLI for xapi storage services") + (description "The CLI allows you to directly manipulate virtual disk images, without them being attached to VMs.") ) (package diff --git a/opam/xapi-storage-cli.opam b/opam/xapi-storage-cli.opam index c58a06832eb..c2e092eb9f6 100644 --- a/opam/xapi-storage-cli.opam +++ b/opam/xapi-storage-cli.opam @@ -1,28 +1,36 @@ # This file is generated by dune, edit dune-project instead - opam-version: "2.0" -name: "xapi-storage-cli" -maintainer: "xen-api@lists.xen.org" -authors: [ "xen-api@lists.xen.org" ] +synopsis: "A CLI for xapi storage services" +description: + "The CLI allows you to directly manipulate virtual disk images, without them being attached to VMs." +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -homepage: "https://github.com/xapi-project/xen-api" +homepage: "https://xapi-project.github.io/" bug-reports: "https://github.com/xapi-project/xen-api/issues" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [[ "dune" "build" "-p" name "-j" jobs ]] depends: [ - "ocaml" "dune" {>= "3.15"} - "base-threads" + "cmdliner" "re" "rpclib" "ppx_deriving_rpc" - "xapi-idl" - "cmdliner" + "xapi-client" {= version} + "xapi-idl" {= version} + "xapi-types" {= version} + "odoc" {with-doc} ] -synopsis: "A CLI for xapi storage services" -description: """ -The CLI allows you to directly manipulate virtual disk images, without -them being attached to VMs.""" -url { - src: "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" diff --git a/opam/xapi-storage-cli.opam.template b/opam/xapi-storage-cli.opam.template deleted file mode 100644 index 3ffbe86d8a3..00000000000 --- a/opam/xapi-storage-cli.opam.template +++ /dev/null @@ -1,26 +0,0 @@ -opam-version: "2.0" -name: "xapi-storage-cli" -maintainer: "xen-api@lists.xen.org" -authors: [ "xen-api@lists.xen.org" ] -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -homepage: "https://github.com/xapi-project/xen-api" -bug-reports: "https://github.com/xapi-project/xen-api/issues" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [[ "dune" "build" "-p" name "-j" jobs ]] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "base-threads" - "re" - "rpclib" - "ppx_deriving_rpc" - "xapi-idl" - "cmdliner" -] -synopsis: "A CLI for xapi storage services" -description: """ -The CLI allows you to directly manipulate virtual disk images, without -them being attached to VMs.""" -url { - src: "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} From 365cd742568d18f89d79777d19e1103c4dc01d26 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Tue, 15 Apr 2025 13:25:50 +0800 Subject: [PATCH 111/492] Remove the unused error set_console_idle_timeout_failed The error set_console_idle_timeout_failed was added in feature branch while it is not used anywhere. The error used in set_console_idle_timeout now is invalid_value. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_errors.ml | 3 --- ocaml/xapi-consts/api_errors.ml | 3 --- 2 files changed, 6 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 039c5c313f3..d7d24c6d76a 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2049,9 +2049,6 @@ let _ = error Api_errors.host_driver_no_hardware ["driver variant"] ~doc:"No hardware present for this host driver variant" () ; - error Api_errors.set_console_idle_timeout_failed ["timeout"] - ~doc:"Failed to set console idle timeout." () ; - error Api_errors.tls_verification_not_enabled_in_pool [] ~doc: "TLS verification has not been enabled in the pool successfully, please \ diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 274d7d351fd..adfb96e4b86 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1430,6 +1430,3 @@ let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = add_error "TLS_VERIFICATION_NOT_ENABLED_IN_POOL" - -let set_console_idle_timeout_failed = - add_error "SET_CONSOLE_IDLE_TIMEOUT_FAILED" From 26ed87da3826e2135775cab5b579555df67124c2 Mon Sep 17 00:00:00 2001 From: Zeroday BYTE Date: Wed, 16 Apr 2025 07:18:50 +0700 Subject: [PATCH 112/492] Update pom.xml Signed-off-by: Zeroday BYTE --- ocaml/sdk-gen/java/autogen/xen-api/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml index c3a6cabdfda..5dc18e7ec61 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml +++ b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml @@ -74,12 +74,12 @@ vcc-releases VCC Release Repository - http://oss.sonatype.org/content/repositories/java-net-releases/ + https://oss.sonatype.org/content/repositories/java-net-releases/ vcc-snapshots VCC Snapshot Repository - http://oss.sonatype.org/content/repositories/java-net-snapshots/ + https://oss.sonatype.org/content/repositories/java-net-snapshots/ From f19f381ab89796e253804fb40e9e9dc648e1eecb Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 11 Apr 2025 18:33:41 +0800 Subject: [PATCH 113/492] CA-408230: Check destroy op valid for HA metadata VDI with type redo_log As xha_metadata_vdi is of type `redo_log. This is why the destroy operation was not missing for the HA metadata VDI after HA is disabled. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_vdi.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 3713f189040..cecf8296f8e 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -345,12 +345,12 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) then Error (Api_errors.ha_is_enabled, []) else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] && Xapi_pool_helpers.ha_enable_in_progress ~__context then Error (Api_errors.ha_enable_in_progress, []) else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] && Xapi_pool_helpers.ha_disable_in_progress ~__context then Error (Api_errors.ha_disable_in_progress, []) From 34b308c88110bcb889eeaa2c209650ac4039d9b4 Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Fri, 11 Apr 2025 08:05:40 +0000 Subject: [PATCH 114/492] CP-54138: Sync SSH status during XAPI startup - Ensure host.enabled_ssh reflects the actual SSH service state on startup, in case it was manually changed by the user. - Reschedule the "disable SSH" job if: - host.ssh_enabled_timeout is set to a positive value, and - host.ssh_expiry is in the future. - Disable the SSH if: - host.ssh_enabled_timeout is set to a positive value, and - host.ssh_expiry is in the past. Signed-off-by: Lunfan Zhang --- ocaml/xapi/dbsync_slave.ml | 5 +++++ ocaml/xapi/xapi_globs.ml | 2 ++ ocaml/xapi/xapi_host.mli | 3 +++ ocaml/xapi/xapi_periodic_scheduler_init.ml | 22 ++++++++++++++++++++++ 4 files changed, 32 insertions(+) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 0d3115ff11e..51ef2665d15 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -380,5 +380,10 @@ let update_env __context sync_keys = Create_misc.create_chipset_info ~__context info ) ; switched_sync Xapi_globs.sync_gpus (fun () -> Xapi_pgpu.update_gpus ~__context) ; + switched_sync Xapi_globs.sync_ssh_status (fun () -> + let ssh_service = !Xapi_globs.ssh_service in + let status = Fe_systemctl.is_active ~service:ssh_service in + Db.Host.set_ssh_enabled ~__context ~self:localhost ~value:status + ) ; remove_pending_guidances ~__context diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 58c5af94226..b5c1a821e2f 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -368,6 +368,8 @@ let sync_bios_strings = "sync_bios_strings" let sync_chipset_info = "sync_chipset_info" +let sync_ssh_status = "sync_ssh_status" + let sync_pci_devices = "sync_pci_devices" let sync_gpus = "sync_gpus" diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index b041722fac9..a3d7504b4a4 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -577,3 +577,6 @@ val set_ssh_enabled_timeout : val set_console_idle_timeout : __context:Context.t -> self:API.ref_host -> value:int64 -> unit + +val schedule_disable_ssh_job : + __context:Context.t -> self:API.ref_host -> timeout:int64 -> unit diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index 1bd13d5f6d6..f394a9ad999 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -13,6 +13,8 @@ *) (** Periodic scheduler for background tasks. *) +module Date = Clock.Date + module D = Debug.Make (struct let name = "backgroundscheduler" end) open D @@ -73,6 +75,25 @@ let register ~__context = (fun __context -> Xapi_subject.update_all_subjects ~__context ) in + let sync_ssh_status ~__context = + let self = Helpers.get_localhost ~__context in + let timeout = Db.Host.get_ssh_enabled_timeout ~__context ~self in + + if timeout > 0L then + let expiry_time = + Db.Host.get_ssh_expiry ~__context ~self + |> Date.to_unix_time + |> Int64.of_float + in + let current_time = Unix.time () |> Int64.of_float in + + if Int64.compare expiry_time current_time > 0 then + let remaining = Int64.sub expiry_time current_time in + Xapi_host.schedule_disable_ssh_job ~__context ~self ~timeout:remaining + (* handle the case where XAPI is not active when the SSH timeout expires *) + else if Fe_systemctl.is_active ~service:!Xapi_globs.ssh_service then + Xapi_host.disable_ssh ~__context ~self + in let update_all_subjects_delay = 10.0 in (* initial delay = 10 seconds *) if master then @@ -133,6 +154,7 @@ let register ~__context = "Check stunnel cache expiry" (Xapi_stdext_threads_scheduler.Scheduler.Periodic stunnel_period) stunnel_period Stunnel_cache.gc ; + sync_ssh_status ~__context ; if master && Db.Pool.get_update_sync_enabled ~__context From 1ac5b3656dfa188ff84700542ecc713ce93b63d2 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 15 Apr 2025 14:19:27 +0100 Subject: [PATCH 115/492] xapi_guest_agent: Update xenstore keys for Windows PV drivers versions Windows PV drivers do not store their version information into "drivers/{xeneventchn,xenvbd,xennet}" xenstore keys since 2015, see: * PV drivers commit 784af16810d705ba2bc02bab6ac93b24119f886c (Publish distribution information to xenstore) https://xenbits.xen.org/gitweb/?p=pvdrivers/win/xenbus.git;a=commit;h=784af16810d705ba2bc02bab6ac93b24119f886c * Xen commit 71e64e163b2dae7d08f7d77ee942749663f484d5 (docs: Introduce xenstore paths for PV driver information) https://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=71e64e163b2dae7d08f7d77ee942749663f484d5 Instead it is stored like this: drivers/0 = "XenServer XENBUS 9.1.9.105 " drivers/1 = "XenServer XENVBD 9.1.8.79 " drivers/2 = "XenServer XENVIF 9.1.12.101 " drivers/3 = "XenServer XENIFACE 9.1.10.87 " drivers/4 = "XenServer XENNET 9.1.7.65 " Modify xapi_guest_agent to list such entries under "drivers/" and present version information for each driver. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_guest_agent.ml | 46 ++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/ocaml/xapi/xapi_guest_agent.ml b/ocaml/xapi/xapi_guest_agent.ml index 066c57cb6f4..edb56d64995 100644 --- a/ocaml/xapi/xapi_guest_agent.ml +++ b/ocaml/xapi/xapi_guest_agent.ml @@ -33,12 +33,6 @@ end) NB each key is annotated with whether it appears in windows and/or linux *) let pv_drivers_version = [ - ("drivers/xenevtchn", "xenevtchn") - ; (* windows *) - ("drivers/xenvbd", "xenvbd") - ; (* windows *) - ("drivers/xennet", "xennet") - ; (* windows *) ("attr/PVAddons/MajorVersion", "major") ; (* linux + windows *) ("attr/PVAddons/MinorVersion", "minor") @@ -294,7 +288,45 @@ let get_initial_guest_metrics (lookup : string -> string option) | None -> [] in - let pv_drivers_version = to_map pv_drivers_version + (* enumerate all driver versions from xenstore, which are stored like + drivers/0 = "XenServer XENBUS 9.1.9.105 " + drivers/1 = "XenServer XENVBD 9.1.8.79 " + drivers/2 = "XenServer XENVIF 9.1.12.101 " + drivers/3 = "XenServer XENIFACE 9.1.10.87 " + drivers/4 = "XenServer XENNET 9.1.7.65 " + + (see the format specified in xenstore-paths) + *) + let get_windows_driver_versions () = + (* Only look into directories that are numbers (indices) *) + let filter_dirs subdirs = + List.filter_map + (fun x -> + match int_of_string_opt x with + | Some _ -> + Some ("drivers/" ^ x, x) + | None -> + None + ) + subdirs + in + let versions = list "drivers" |> filter_dirs |> to_map in + List.filter_map + (fun (_, version_string) -> + try + Scanf.sscanf version_string "%s@ %s@ %s@ %s@\n" + (fun vendor driver_name version attr -> + Some + ( String.lowercase_ascii driver_name + , String.concat " " [vendor; version; attr] + ) + ) + with _ -> None + ) + versions + in + let pv_drivers_version = + to_map pv_drivers_version @ get_windows_driver_versions () and os_version = to_map os_version and netbios_name = match to_map dns_domain with From c5803e27dea7e1170cb64a2fb20afc030dffc192 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 17 Apr 2025 13:30:10 +0100 Subject: [PATCH 116/492] tests: Add Windows PV driver version parsing test to test_guest_agent Signed-off-by: Andrii Sultanov --- ocaml/tests/test_guest_agent.ml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/ocaml/tests/test_guest_agent.ml b/ocaml/tests/test_guest_agent.ml index 6b74aa55544..6bc0f227c7b 100644 --- a/ocaml/tests/test_guest_agent.ml +++ b/ocaml/tests/test_guest_agent.ml @@ -285,6 +285,7 @@ module Initial_guest_metrics = Generic.MakeStateless (struct Xapi_guest_agent.get_initial_guest_metrics (lookup tree) (list tree) in guest_metrics.Xapi_guest_agent.networks + @ guest_metrics.Xapi_guest_agent.pv_drivers_version let tests = `QuickAndAutoDocumented @@ -465,6 +466,33 @@ module Initial_guest_metrics = Generic.MakeStateless (struct ] , [] ) + ; (* windows pv driver versions parsing *) + ( [ + ("drivers/0", "XenServer XENBUS 9.1.9.105 ") + ; ("drivers/1", "XenServer XENVBD 9.1.8.79 ") + ; ("drivers/2", "XenServer XENVIF 9.1.12.101 ") + ; ("drivers/3", "XenServer XENIFACE 9.1.10.87 ") + ; ("drivers/4", "XenServer XENNET 9.1.7.65 ") + ] + , [ + ("micro", "-1") + ; ("xennet", "XenServer 9.1.7.65 ") + ; ("xeniface", "XenServer 9.1.10.87 ") + ; ("xenvif", "XenServer 9.1.12.101 ") + ; ("xenvbd", "XenServer 9.1.8.79 ") + ; ("xenbus", "XenServer 9.1.9.105 ") + ] + ) + ; ( [ + ("drivers/0", "XenServer XENBUS 9.1.9.105 (DEBUG) (MOREDEBUG)") + ; ("drivers/2", "XCP_ng XENVIF 9.1.12.101 ") + ] + , [ + ("micro", "-1") + ; ("xenvif", "XCP_ng 9.1.12.101 ") + ; ("xenbus", "XenServer 9.1.9.105 (DEBUG) (MOREDEBUG)") + ] + ) ] end) From d3bf7368b6e0894919e4552edff52fda976469ac Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 2 Apr 2025 16:59:44 +0100 Subject: [PATCH 117/492] Explicitly define failures for SXM We define different failures for SXM, for different stages of the SXM: - preparation failure: raised when preparing the environment for SXM, for example, when the dest host creates VDIs for data mirroring (SMAPIv1 and v3); - mirror_fd_failure: happens when passing fds to tapdisks for mirroring (SMAPIv1 only); - mirror_snapshot_failure: raised when taking a snapshot as the base image before copying it over to the destination (SMAPIv1 only); - mirror_copy_failure: raised when copying of the base image fails; - mirror_failure: raised when there is any issues that causes the mirror to crash during SXM (SMAPIv1 and v3) Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 311c9f2dfdf..b98047bd610 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -362,6 +362,19 @@ module Errors = struct | No_storage_plugin_for_sr of string | Content_ids_do_not_match of (string * string) | Missing_configuration_parameter of string + (* raised when preparing the environment for SXM, for example, when the dest + host creates VDIs for data mirroring (SMAPIv1 and v3) *) + | Migration_preparation_failure of string + (* happens when passing fds to tapdisks for mirroring (SMAPIv1 only) *) + | Migration_mirror_fd_failure of string + (* raised when taking a snapshot as the base image before copying it over to + the destination (SMAPIv1 only) *) + | Migration_mirror_snapshot_failure of string + (* mirror_copy_failure: raised when copying of the base image fails (SMAPIv1 only) *) + | Migration_mirror_copy_failure of string + (* mirror_failure: raised when there is any issues that causes the mirror to crash + during SXM (SMAPIv3 only, v1 uses more specific errors as above) *) + | Migration_mirror_failure of string | Internal_error of string | Unknown_error [@@default Unknown_error] [@@deriving rpcty] From 50a40dfe316a7d115fdb7841970dc130d6ce8c89 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 2 Apr 2025 17:29:09 +0100 Subject: [PATCH 118/492] Define the utility function for choosing the backend Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 706f73891a0..abc40cc7d2c 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -23,6 +23,21 @@ open Storage_interface open Storage_task open Storage_migrate_helper +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +let s_of_sr = Storage_interface.Sr.string_of + +let choose_backend dbg sr = + debug "%s dbg: %s choosing backend for sr :%s" __FUNCTION__ dbg (s_of_sr sr) ; + match Storage_mux_reg.smapi_version_of_sr sr with + | SMAPIv1 -> + (module Storage_smapiv1_migrate.MIRROR : SMAPIv2_MIRROR) + | SMAPIv3 -> + (module Storage_smapiv3_migrate.MIRROR : SMAPIv2_MIRROR) + | SMAPIv2 -> + (* this should never happen *) + failwith "unsupported SMAPI version smapiv2" + let tapdisk_of_attach_info (backend : Storage_interface.backend) = let _, blockdevices, _, nbds = Storage_interface.implementations_of_backend backend From beca5774fc39f4b63c2fc816a71abc731d9198fe Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 3 Apr 2025 16:50:30 +0100 Subject: [PATCH 119/492] Move the copying function to storage_smapiv1_migrate Although copying is not exclusively used by SMAPIv1, e.g. DATA.copy maps to Storage_migrate.copy which maps to Storage_smapiv1_migrate.copy. And this is used when a VDI does not need to be migrated live (attached read-only). It's a bit easier in terms of dependency to leave them in storage_smapiv1_migrate.ml And some related helper functions such as `progress_callback`. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 378 +------------------------ ocaml/xapi/storage_migrate_helper.ml | 28 ++ ocaml/xapi/storage_migrate_helper.mli | 4 + ocaml/xapi/storage_smapiv1_migrate.ml | 379 ++++++++++++++++++++++++++ ocaml/xapi/storage_smapiv1_wrapper.ml | 2 +- 5 files changed, 414 insertions(+), 377 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index abc40cc7d2c..9ee1cf9daec 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -18,7 +18,6 @@ open D module Listext = Xapi_stdext_std.Listext open Xapi_stdext_pervasives.Pervasiveext module Unixext = Xapi_stdext_unix.Unixext -open Xmlrpc_client open Storage_interface open Storage_task open Storage_migrate_helper @@ -38,384 +37,11 @@ let choose_backend dbg sr = (* this should never happen *) failwith "unsupported SMAPI version smapiv2" -let tapdisk_of_attach_info (backend : Storage_interface.backend) = - let _, blockdevices, _, nbds = - Storage_interface.implementations_of_backend backend - in - match (blockdevices, nbds) with - | blockdevice :: _, _ -> ( - let path = blockdevice.Storage_interface.path in - try - match Tapctl.of_device (Tapctl.create ()) path with - | tapdev, _, _ -> - Some tapdev - with - | Tapctl.Not_blktap -> - debug "Device %s is not controlled by blktap" path ; - None - | Tapctl.Not_a_device -> - debug "%s is not a device" path ; - None - | _ -> - debug "Device %s has an unknown driver" path ; - None - ) - | _, nbd :: _ -> ( - try - let path, _ = Storage_interface.parse_nbd_uri nbd in - let filename = Unix.realpath path |> Filename.basename in - Scanf.sscanf filename "nbd%d.%d" (fun pid minor -> - Some (Tapctl.tapdev_of ~pid ~minor) - ) - with _ -> - debug "No tapdisk found for NBD backend: %s" nbd.Storage_interface.uri ; - None - ) - | _ -> - debug "No tapdisk found for backend: %s" - (Storage_interface.(rpc_of backend) backend |> Rpc.to_string) ; - None - -let with_activated_disk ~dbg ~sr ~vdi ~dp ~vm f = - let attached_vdi = - Option.map - (fun vdi -> - let backend = Local.VDI.attach3 dbg dp sr vdi vm false in - (vdi, backend) - ) - vdi - in - finally - (fun () -> - let path_and_nbd = - Option.map - (fun (vdi, backend) -> - let _xendisks, blockdevs, files, nbds = - Storage_interface.implementations_of_backend backend - in - match (files, blockdevs, nbds) with - | {path} :: _, _, _ | _, {path} :: _, _ -> - Local.VDI.activate3 dbg dp sr vdi vm ; - (path, false) - | _, _, nbd :: _ -> - Local.VDI.activate3 dbg dp sr vdi vm ; - let unix_socket_path, export_name = - Storage_interface.parse_nbd_uri nbd - in - ( Attach_helpers.NbdClient.start_nbd_client ~unix_socket_path - ~export_name - , true - ) - | [], [], [] -> - raise - (Storage_interface.Storage_error - (Backend_error - ( Api_errors.internal_error - , [ - "No File, BlockDevice or Nbd implementation in \ - Datapath.attach response: " - ^ (Storage_interface.(rpc_of backend) backend - |> Jsonrpc.to_string - ) - ] - ) - ) - ) - ) - attached_vdi - in - finally - (fun () -> f (Option.map (function path, _ -> path) path_and_nbd)) - (fun () -> - Option.iter - (function - | path, true -> - Attach_helpers.NbdClient.stop_nbd_client ~nbd_device:path - | _ -> - () - ) - path_and_nbd ; - Option.iter (fun vdi -> Local.VDI.deactivate dbg dp sr vdi vm) vdi - ) - ) - (fun () -> - Option.iter - (fun (vdi, _) -> Local.VDI.detach dbg dp sr vdi vm) - attached_vdi - ) - -let perform_cleanup_actions = - List.iter (fun f -> - try f () - with e -> - error "Caught %s while performing cleanup actions" (Printexc.to_string e) - ) - -let progress_callback start len t y = - let new_progress = start +. (y *. len) in - Storage_task.set_state t (Task.Pending new_progress) ; - signal (Storage_task.id_of_handle t) - (** This module [MigrateLocal] consists of the concrete implementations of the migration part of SMAPI. Functions inside this module are sender driven, which means they tend to be executed on the sender side. although there is not a hard rule on what is executed on the sender side, this provides some heuristics. *) module MigrateLocal = struct - (** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) - let copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~dest_vdi ~verify_dest = - let (module Remote) = get_remote_backend url verify_dest in - debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of dest_vdi) - verify_dest ; - (* Check the remote SR exists *) - let srs = Remote.SR.list dbg in - if not (List.mem dest srs) then - failwith - (Printf.sprintf "Remote SR %s not found" - (Storage_interface.Sr.string_of dest) - ) ; - let vdis = Remote.SR.scan dbg dest in - let remote_vdi = - try List.find (fun x -> x.vdi = dest_vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Remote VDI %s not found" - (Storage_interface.Vdi.string_of dest_vdi) - ) - in - let dest_content_id = remote_vdi.content_id in - (* Find the local VDI *) - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in - debug "copy local content_id=%s" local_vdi.content_id ; - debug "copy remote content_id=%s" dest_content_id ; - if local_vdi.virtual_size > remote_vdi.virtual_size then ( - (* This should never happen provided the higher-level logic is working properly *) - error "copy local virtual_size=%Ld > remote virtual_size = %Ld" - local_vdi.virtual_size remote_vdi.virtual_size ; - failwith "local VDI is larger than the remote VDI" - ) ; - let on_fail : (unit -> unit) list ref = ref [] in - let base_vdi = - try - let x = - (List.find (fun x -> x.content_id = dest_content_id) vdis).vdi - in - debug - "local VDI has content_id = %s; we will perform an incremental copy" - dest_content_id ; - Some x - with _ -> - debug "no local VDI has content_id = %s; we will perform a full copy" - dest_content_id ; - None - in - try - let remote_dp = Uuidx.(to_string (make ())) in - let base_dp = Uuidx.(to_string (make ())) in - let leaf_dp = Uuidx.(to_string (make ())) in - let dest_vdi_url = - let url' = Http.Url.of_string url in - Http.Url.set_uri url' - (Printf.sprintf "%s/nbdproxy/%s/%s/%s/%s" (Http.Url.get_uri url') - (Storage_interface.Vm.string_of vm) - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of dest_vdi) - remote_dp - ) - |> Http.Url.to_string - in - debug "%s copy remote NBD URL = %s" __FUNCTION__ dest_vdi_url ; - let id = State.copy_id_of (sr, vdi) in - debug "Persisting state for copy (id=%s)" id ; - State.add id - State.( - Copy_op - Copy_state. - { - base_dp - ; leaf_dp - ; remote_dp - ; dest_sr= dest - ; copy_vdi= remote_vdi.vdi - ; remote_url= url - ; verify_dest - } - ) ; - SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ - (Storage_interface.Vdi.string_of vdi) - (Storage_interface.Vdi.string_of dest_vdi) ; - finally - (fun () -> - debug "activating RW datapath %s on remote" remote_dp ; - let backend = - Remote.VDI.attach3 dbg remote_dp dest dest_vdi vm true - in - let _, _, _, nbds = - Storage_interface.implementations_of_backend backend - in - let proto = - match nbds with - | [] -> - None - | uri :: _ -> - let _socket, export = Storage_interface.parse_nbd_uri uri in - Some (`NBD export) - in - Remote.VDI.activate3 dbg remote_dp dest dest_vdi vm ; - with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp ~vm - (fun base_path -> - with_activated_disk ~dbg ~sr ~vdi:(Some vdi) ~dp:leaf_dp ~vm - (fun src -> - let verify_cert = - if verify_dest then Stunnel_client.pool () else None - in - let dd = - Sparse_dd_wrapper.start - ~progress_cb:(progress_callback 0.05 0.9 task) - ~verify_cert ~proto ?base:base_path true (Option.get src) - dest_vdi_url remote_vdi.virtual_size - in - Storage_task.with_cancel task - (fun () -> Sparse_dd_wrapper.cancel dd) - (fun () -> - try Sparse_dd_wrapper.wait dd - with Sparse_dd_wrapper.Cancelled -> - Storage_task.raise_cancelled task - ) - ) - ) - ) - (fun () -> - Remote.DP.destroy dbg remote_dp false ; - State.remove_copy id - ) ; - SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ - (Storage_interface.Vdi.string_of vdi) - (Storage_interface.Vdi.string_of dest_vdi) ; - debug "setting remote content_id <- %s" local_vdi.content_id ; - Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; - (* PR-1255: XXX: this is useful because we don't have content_ids by default *) - debug "setting local content_id <- %s" local_vdi.content_id ; - Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; - Some (Vdi_info remote_vdi) - with e -> - error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; - perform_cleanup_actions !on_fail ; - raise e - - (** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will - find the nearest vdi on the [dest] sr, and if there is no such vdi, it will - create one. *) - let copy_into_sr ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = - debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - verify_dest ; - let (module Remote) = get_remote_backend url verify_dest in - (* Find the local VDI *) - try - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith (Printf.sprintf "Local VDI not found") - in - try - let similar_vdis = Local.VDI.similar_content dbg sr vdi in - let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in - debug "Similar VDIs = [ %s ]" - (String.concat "; " - (List.map - (fun x -> - Printf.sprintf "(vdi=%s,content_id=%s)" - (Storage_interface.Vdi.string_of x.vdi) - x.content_id - ) - similar_vdis - ) - ) ; - let remote_vdis = Remote.SR.scan dbg dest in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let remote_vdis = - List.filter (fun vdi -> vdi.ty <> "cbt_metadata") remote_vdis - in - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= local_vdi.virtual_size - ) - remote_vdis - ) - with Not_found -> None - ) - ) - None similars - in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let remote_base = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi_clone = Remote.VDI.clone dbg dest vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> local_vdi.virtual_size then - let new_size = - Remote.VDI.resize dbg dest vdi_clone.vdi - local_vdi.virtual_size - in - debug "Resize remote clone VDI to %Ld: result %Ld" - local_vdi.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Remote.VDI.create dbg dest {local_vdi with sm_config= []} - in - let remote_copy = - copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest - ~dest_vdi:remote_base.vdi ~verify_dest - |> vdi_info - in - let snapshot = Remote.VDI.snapshot dbg dest remote_copy in - Remote.VDI.destroy dbg dest remote_copy.vdi ; - Some (Vdi_info snapshot) - with e -> - error "Caught %s: copying snapshots vdi" (Printexc.to_string e) ; - raise (Storage_error (Internal_error (Printexc.to_string e))) - with - | Storage_error (Backend_error (code, params)) - | Api_errors.Server_error (code, params) -> - raise (Storage_error (Backend_error (code, params))) - | e -> - raise (Storage_error (Internal_error (Printexc.to_string e))) - let stop_internal ~dbg ~id = (* Find the local VDI *) let alm = State.find_active_local_mirror id in @@ -1111,8 +737,8 @@ let with_task_and_thread ~dbg f = let copy ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - MigrateLocal.copy_into_sr ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~vm ~url - ~dest ~verify_dest + Storage_smapiv1_migrate.Copy.copy_into_sr ~task ~dbg:dbg.Debug_info.log + ~sr ~vdi ~vm ~url ~dest ~verify_dest ) let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index e924c208d8f..28a2bd42455 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -345,3 +345,31 @@ let get_remote_backend url verify_dest = Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url end)) in (module Remote : SMAPIv2) + +(** [similar_vdis dbg sr vdi] returns a list of content_ids of vdis + which are similar to the input [vdi] in [sr] *) +let similar_vdis ~dbg ~sr ~vdi = + let similar_vdis = Local.VDI.similar_content dbg sr vdi in + let similars = + List.filter_map + (function + | {content_id; _} when content_id = "" -> + None + | {content_id; _} -> + Some content_id + ) + similar_vdis + in + + D.debug "%s Similar VDIs to = [ %s ]" __FUNCTION__ + (String.concat "; " + (List.map + (fun x -> + Printf.sprintf "(vdi=%s,content_id=%s)" + (Storage_interface.Vdi.string_of x.vdi) + x.content_id + ) + similar_vdis + ) + ) ; + similars diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index 8ac0da552e2..b869c9daac5 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -14,6 +14,8 @@ module SXM : Debug.DEBUG +open Storage_interface + val failwith_fmt : ('a, unit, string, 'b) format4 -> 'a module State : sig @@ -258,3 +260,5 @@ end module Local : SMAPIv2 val get_remote_backend : string -> bool -> (module SMAPIv2) + +val similar_vdis : dbg:string -> sr:sr -> vdi:vdi -> uuid list diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 83dd41d4972..2833bf24b0f 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -15,13 +15,392 @@ module D = Debug.Make (struct let name = "storage_smapiv1_migrate" end) module Unixext = Xapi_stdext_unix.Unixext +open Xapi_stdext_pervasives.Pervasiveext open Storage_interface +open Xmlrpc_client open Storage_migrate_helper +open Storage_task module State = Storage_migrate_helper.State module SXM = Storage_migrate_helper.SXM module type SMAPIv2_MIRROR = Storage_interface.MIRROR +let with_activated_disk ~dbg ~sr ~vdi ~dp ~vm f = + let attached_vdi = + Option.map + (fun vdi -> + let backend = Local.VDI.attach3 dbg dp sr vdi vm false in + (vdi, backend) + ) + vdi + in + finally + (fun () -> + let path_and_nbd = + Option.map + (fun (vdi, backend) -> + let _xendisks, blockdevs, files, nbds = + Storage_interface.implementations_of_backend backend + in + match (files, blockdevs, nbds) with + | {path} :: _, _, _ | _, {path} :: _, _ -> + Local.VDI.activate3 dbg dp sr vdi vm ; + (path, false) + | _, _, nbd :: _ -> + Local.VDI.activate3 dbg dp sr vdi vm ; + let unix_socket_path, export_name = + Storage_interface.parse_nbd_uri nbd + in + ( Attach_helpers.NbdClient.start_nbd_client ~unix_socket_path + ~export_name + , true + ) + | [], [], [] -> + raise + (Storage_interface.Storage_error + (Backend_error + ( Api_errors.internal_error + , [ + "No File, BlockDevice or Nbd implementation in \ + Datapath.attach response: " + ^ (Storage_interface.(rpc_of backend) backend + |> Jsonrpc.to_string + ) + ] + ) + ) + ) + ) + attached_vdi + in + finally + (fun () -> f (Option.map (function path, _ -> path) path_and_nbd)) + (fun () -> + Option.iter + (function + | path, true -> + Attach_helpers.NbdClient.stop_nbd_client ~nbd_device:path + | _ -> + () + ) + path_and_nbd ; + Option.iter (fun vdi -> Local.VDI.deactivate dbg dp sr vdi vm) vdi + ) + ) + (fun () -> + Option.iter + (fun (vdi, _) -> Local.VDI.detach dbg dp sr vdi vm) + attached_vdi + ) + +let tapdisk_of_attach_info (backend : Storage_interface.backend) = + let _, blockdevices, _, nbds = + Storage_interface.implementations_of_backend backend + in + match (blockdevices, nbds) with + | blockdevice :: _, _ -> ( + let path = blockdevice.Storage_interface.path in + try + match Tapctl.of_device (Tapctl.create ()) path with + | tapdev, _, _ -> + Some tapdev + with + | Tapctl.Not_blktap -> + D.debug "Device %s is not controlled by blktap" path ; + None + | Tapctl.Not_a_device -> + D.debug "%s is not a device" path ; + None + | _ -> + D.debug "Device %s has an unknown driver" path ; + None + ) + | _, nbd :: _ -> ( + try + let path, _ = Storage_interface.parse_nbd_uri nbd in + let filename = Unix.realpath path |> Filename.basename in + Scanf.sscanf filename "nbd%d.%d" (fun pid minor -> + Some (Tapctl.tapdev_of ~pid ~minor) + ) + with _ -> + D.debug "No tapdisk found for NBD backend: %s" nbd.Storage_interface.uri ; + None + ) + | _ -> + D.debug "No tapdisk found for backend: %s" + (Storage_interface.(rpc_of backend) backend |> Rpc.to_string) ; + None + +let progress_callback start len t y = + let new_progress = start +. (y *. len) in + Storage_task.set_state t (Task.Pending new_progress) ; + signal (Storage_task.id_of_handle t) + +let perform_cleanup_actions = + List.iter (fun f -> + try f () + with e -> + D.error "Caught %s while performing cleanup actions" + (Printexc.to_string e) + ) + +module Copy = struct + (** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) + let copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~dest_vdi ~verify_dest = + let (module Remote) = get_remote_backend url verify_dest in + D.debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of dest_vdi) + verify_dest ; + (* Check the remote SR exists *) + let srs = Remote.SR.list dbg in + if not (List.mem dest srs) then + failwith + (Printf.sprintf "Remote SR %s not found" + (Storage_interface.Sr.string_of dest) + ) ; + let vdis = Remote.SR.scan dbg dest in + let remote_vdi = + try List.find (fun x -> x.vdi = dest_vdi) vdis + with Not_found -> + failwith + (Printf.sprintf "Remote VDI %s not found" + (Storage_interface.Vdi.string_of dest_vdi) + ) + in + let dest_content_id = remote_vdi.content_id in + (* Find the local VDI *) + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> + failwith + (Printf.sprintf "Local VDI %s not found" + (Storage_interface.Vdi.string_of vdi) + ) + in + D.debug "copy local content_id=%s" local_vdi.content_id ; + D.debug "copy remote content_id=%s" dest_content_id ; + if local_vdi.virtual_size > remote_vdi.virtual_size then ( + (* This should never happen provided the higher-level logic is working properly *) + D.error "copy local virtual_size=%Ld > remote virtual_size = %Ld" + local_vdi.virtual_size remote_vdi.virtual_size ; + failwith "local VDI is larger than the remote VDI" + ) ; + let on_fail : (unit -> unit) list ref = ref [] in + let base_vdi = + try + let x = + (List.find (fun x -> x.content_id = dest_content_id) vdis).vdi + in + D.debug + "local VDI has content_id = %s; we will perform an incremental copy" + dest_content_id ; + Some x + with _ -> + D.debug "no local VDI has content_id = %s; we will perform a full copy" + dest_content_id ; + None + in + try + let remote_dp = Uuidx.(to_string (make ())) in + let base_dp = Uuidx.(to_string (make ())) in + let leaf_dp = Uuidx.(to_string (make ())) in + let dest_vdi_url = + let url' = Http.Url.of_string url in + Http.Url.set_uri url' + (Printf.sprintf "%s/nbdproxy/%s/%s/%s/%s" (Http.Url.get_uri url') + (Storage_interface.Vm.string_of vm) + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of dest_vdi) + remote_dp + ) + |> Http.Url.to_string + in + D.debug "%s copy remote NBD URL = %s" __FUNCTION__ dest_vdi_url ; + let id = State.copy_id_of (sr, vdi) in + D.debug "Persisting state for copy (id=%s)" id ; + State.add id + State.( + Copy_op + Copy_state. + { + base_dp + ; leaf_dp + ; remote_dp + ; dest_sr= dest + ; copy_vdi= remote_vdi.vdi + ; remote_url= url + ; verify_dest + } + ) ; + SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; + finally + (fun () -> + D.debug "activating RW datapath %s on remote" remote_dp ; + let backend = + Remote.VDI.attach3 dbg remote_dp dest dest_vdi vm true + in + let _, _, _, nbds = + Storage_interface.implementations_of_backend backend + in + let proto = + match nbds with + | [] -> + None + | uri :: _ -> + let _socket, export = Storage_interface.parse_nbd_uri uri in + Some (`NBD export) + in + Remote.VDI.activate3 dbg remote_dp dest dest_vdi vm ; + with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp ~vm + (fun base_path -> + with_activated_disk ~dbg ~sr ~vdi:(Some vdi) ~dp:leaf_dp ~vm + (fun src -> + let verify_cert = + if verify_dest then Stunnel_client.pool () else None + in + let dd = + Sparse_dd_wrapper.start + ~progress_cb:(progress_callback 0.05 0.9 task) + ~verify_cert ~proto ?base:base_path true (Option.get src) + dest_vdi_url remote_vdi.virtual_size + in + Storage_task.with_cancel task + (fun () -> Sparse_dd_wrapper.cancel dd) + (fun () -> + try Sparse_dd_wrapper.wait dd + with Sparse_dd_wrapper.Cancelled -> + Storage_task.raise_cancelled task + ) + ) + ) + ) + (fun () -> + Remote.DP.destroy dbg remote_dp false ; + State.remove_copy id + ) ; + SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; + D.debug "setting remote content_id <- %s" local_vdi.content_id ; + Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; + (* PR-1255: XXX: this is useful because we don't have content_ids by default *) + D.debug "setting local content_id <- %s" local_vdi.content_id ; + Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; + Some (Vdi_info remote_vdi) + with e -> + D.error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; + perform_cleanup_actions !on_fail ; + raise e + + (** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will + find the nearest vdi on the [dest] sr, and if there is no such vdi, it will + create one. *) + let copy_into_sr ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = + D.debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + verify_dest ; + let (module Remote) = get_remote_backend url verify_dest in + (* Find the local VDI *) + try + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> failwith (Printf.sprintf "Local VDI not found") + in + try + let similar_vdis = Local.VDI.similar_content dbg sr vdi in + let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in + D.debug "Similar VDIs = [ %s ]" + (String.concat "; " + (List.map + (fun x -> + Printf.sprintf "(vdi=%s,content_id=%s)" + (Storage_interface.Vdi.string_of x.vdi) + x.content_id + ) + similar_vdis + ) + ) ; + let remote_vdis = Remote.SR.scan dbg dest in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let remote_vdis = + List.filter (fun vdi -> vdi.ty <> "cbt_metadata") remote_vdis + in + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= local_vdi.virtual_size + ) + remote_vdis + ) + with Not_found -> None + ) + ) + None similars + in + D.debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let remote_base = + match nearest with + | Some vdi -> + D.debug "Cloning VDI" ; + let vdi_clone = Remote.VDI.clone dbg dest vdi in + D.debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> local_vdi.virtual_size then + let new_size = + Remote.VDI.resize dbg dest vdi_clone.vdi + local_vdi.virtual_size + in + D.debug "Resize remote clone VDI to %Ld: result %Ld" + local_vdi.virtual_size new_size + ) ; + vdi_clone + | None -> + D.debug "Creating a blank remote VDI" ; + Remote.VDI.create dbg dest {local_vdi with sm_config= []} + in + let remote_copy = + copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest + ~dest_vdi:remote_base.vdi ~verify_dest + |> vdi_info + in + let snapshot = Remote.VDI.snapshot dbg dest remote_copy in + Remote.VDI.destroy dbg dest remote_copy.vdi ; + Some (Vdi_info snapshot) + with e -> + D.error "Caught %s: copying snapshots vdi" (Printexc.to_string e) ; + raise (Storage_error (Internal_error (Printexc.to_string e))) + with + | Storage_error (Backend_error (code, params)) + | Api_errors.Server_error (code, params) -> + raise (Storage_error (Backend_error (code, params))) + | e -> + raise (Storage_error (Internal_error (Printexc.to_string e))) +end + module MIRROR : SMAPIv2_MIRROR = struct type context = unit diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 8a9b053f509..569f4f33bb0 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1152,7 +1152,7 @@ functor info "%s DATA.get_nbd_server dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; let attach_info = DP.attach_info context ~dbg:"nbd" ~sr ~vdi ~dp ~vm in - match Storage_migrate.tapdisk_of_attach_info attach_info with + match Storage_smapiv1_migrate.tapdisk_of_attach_info attach_info with | Some tapdev -> let minor = Tapctl.get_minor tapdev in let pid = Tapctl.get_tapdisk_pid tapdev in From e10381b58fda5a66035180c613cd08dd0d865b26 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 3 Apr 2025 17:04:10 +0100 Subject: [PATCH 120/492] Move MigrateRemote before MigrateLocal Because the latter needs to use the former. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 296 +++++++++++++++++----------------- 1 file changed, 148 insertions(+), 148 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 9ee1cf9daec..386463fe26b 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -37,6 +37,154 @@ let choose_backend dbg sr = (* this should never happen *) failwith "unsupported SMAPI version smapiv2" +(** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions +tend to be executed on the receiver side. *) +module MigrateRemote = struct + let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = + let on_fail : (unit -> unit) list ref = ref [] in + let vdis = Local.SR.scan dbg sr in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in + let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in + try + let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in + let leaf = Local.VDI.create dbg sr vdi_info in + info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; + on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + (* dummy VDI is created so that the leaf VDI becomes a differencing disk, + useful for calling VDI.compose later on *) + let dummy = Local.VDI.snapshot dbg sr leaf in + on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; + debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ + (string_of_vdi_info dummy) ; + let _ : backend = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in + Local.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= vdi_info.virtual_size + ) + vdis + ) + with Not_found -> None + ) + ) + None similar + in + debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let parent = + match nearest with + | Some vdi -> + debug "Cloning VDI" ; + let vdi = add_to_sm_config vdi "base_mirror" id in + let vdi_clone = Local.VDI.clone dbg sr vdi in + debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> vdi_info.virtual_size then + let new_size = + Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size + in + debug "Resize local clone VDI to %Ld: result %Ld" + vdi_info.virtual_size new_size + ) ; + vdi_clone + | None -> + debug "Creating a blank remote VDI" ; + Local.VDI.create dbg sr vdi_info + in + debug "Parent disk content_id=%s" parent.content_id ; + State.add id + State.( + Recv_op + Receive_state. + { + sr + ; dummy_vdi= dummy.vdi + ; leaf_vdi= leaf.vdi + ; leaf_dp + ; parent_vdi= parent.vdi + ; remote_vdi= vdi_info.vdi + ; mirror_vm= vm + } + ) ; + let nearest_content_id = Option.map (fun x -> x.content_id) nearest in + Mirror.Vhd_mirror + { + Mirror.mirror_vdi= leaf + ; mirror_datapath= leaf_dp + ; copy_diffs_from= nearest_content_id + ; copy_diffs_to= parent.vdi + ; dummy_vdi= dummy.vdi + } + with e -> + List.iter + (fun op -> + try op () + with e -> + debug "Caught exception in on_fail: %s" (Printexc.to_string e) + ) + !on_fail ; + raise e + + let receive_start ~dbg ~sr ~vdi_info ~id ~similar = + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") + + let receive_start2 ~dbg ~sr ~vdi_info ~id ~similar ~vm = + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm + + let receive_finalize ~dbg ~id = + let recv_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; + State.remove_receive_mirror id + + let receive_finalize2 ~dbg ~id = + let recv_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter + (fun r -> + SXM.info + "%s Mirror done. Compose on the dest sr %s parent %s and leaf %s" + __FUNCTION__ (Sr.string_of r.sr) + (Vdi.string_of r.parent_vdi) + (Vdi.string_of r.leaf_vdi) ; + Local.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + Local.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; + (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so + there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) + log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr r.dummy_vdi) ; + Local.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + ) + recv_state ; + State.remove_receive_mirror id + + let receive_cancel ~dbg ~id = + let receive_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter + (fun r -> + log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; + List.iter + (fun v -> log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v)) + [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] + ) + receive_state ; + State.remove_receive_mirror id +end + (** This module [MigrateLocal] consists of the concrete implementations of the migration part of SMAPI. Functions inside this module are sender driven, which means they tend to be executed on the sender side. although there is not a hard rule @@ -426,154 +574,6 @@ module MigrateLocal = struct State.clear () end -(** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions -tend to be executed on the receiver side. *) -module MigrateRemote = struct - let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = - let on_fail : (unit -> unit) list ref = ref [] in - let vdis = Local.SR.scan dbg sr in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in - let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in - try - let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in - let leaf = Local.VDI.create dbg sr vdi_info in - info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; - (* dummy VDI is created so that the leaf VDI becomes a differencing disk, - useful for calling VDI.compose later on *) - let dummy = Local.VDI.snapshot dbg sr leaf in - on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; - debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ - (string_of_vdi_info dummy) ; - let _ : backend = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in - Local.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= vdi_info.virtual_size - ) - vdis - ) - with Not_found -> None - ) - ) - None similar - in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let parent = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi = add_to_sm_config vdi "base_mirror" id in - let vdi_clone = Local.VDI.clone dbg sr vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> vdi_info.virtual_size then - let new_size = - Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size - in - debug "Resize local clone VDI to %Ld: result %Ld" - vdi_info.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Local.VDI.create dbg sr vdi_info - in - debug "Parent disk content_id=%s" parent.content_id ; - State.add id - State.( - Recv_op - Receive_state. - { - sr - ; dummy_vdi= dummy.vdi - ; leaf_vdi= leaf.vdi - ; leaf_dp - ; parent_vdi= parent.vdi - ; remote_vdi= vdi_info.vdi - ; mirror_vm= vm - } - ) ; - let nearest_content_id = Option.map (fun x -> x.content_id) nearest in - Mirror.Vhd_mirror - { - Mirror.mirror_vdi= leaf - ; mirror_datapath= leaf_dp - ; copy_diffs_from= nearest_content_id - ; copy_diffs_to= parent.vdi - ; dummy_vdi= dummy.vdi - } - with e -> - List.iter - (fun op -> - try op () - with e -> - debug "Caught exception in on_fail: %s" (Printexc.to_string e) - ) - !on_fail ; - raise e - - let receive_start ~dbg ~sr ~vdi_info ~id ~similar = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") - - let receive_start2 ~dbg ~sr ~vdi_info ~id ~similar ~vm = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm - - let receive_finalize ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; - State.remove_receive_mirror id - - let receive_finalize2 ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter - (fun r -> - SXM.info - "%s Mirror done. Compose on the dest sr %s parent %s and leaf %s" - __FUNCTION__ (Sr.string_of r.sr) - (Vdi.string_of r.parent_vdi) - (Vdi.string_of r.leaf_vdi) ; - Local.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; - Local.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; - (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so - there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) - log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr r.dummy_vdi) ; - Local.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" - ) - recv_state ; - State.remove_receive_mirror id - - let receive_cancel ~dbg ~id = - let receive_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter - (fun r -> - log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; - List.iter - (fun v -> log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v)) - [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] - ) - receive_state ; - State.remove_receive_mirror id -end - exception Timeout of Mtime.Span.t let reqs_outstanding_timeout = Mtime.Span.(150 * s) From 10dea896aa41fd54aa52572961b55bb129693464 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 4 Apr 2025 15:42:12 +0100 Subject: [PATCH 121/492] Move `find_local_vdi` utility function Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 6 +----- ocaml/xapi/storage_migrate_helper.ml | 9 +++++++++ ocaml/xapi/storage_migrate_helper.mli | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 386463fe26b..2b2d50c788d 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -265,11 +265,7 @@ module MigrateLocal = struct let remote_url = Http.Url.of_string url in let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith "Local VDI not found" - in + let local_vdi = find_local_vdi ~dbg ~sr ~vdi in let mirror_id = State.mirror_id_of (sr, local_vdi.vdi) in debug "%s: Adding to active local mirrors before sending: id=%s" __FUNCTION__ mirror_id ; diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index 28a2bd42455..66c23d9a04e 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -346,6 +346,15 @@ let get_remote_backend url verify_dest = end)) in (module Remote : SMAPIv2) +let find_local_vdi ~dbg ~sr ~vdi = + (* Find the local VDI *) + let vdis, _ = Local.SR.scan2 dbg sr in + match List.find_opt (fun x -> x.vdi = vdi) vdis with + | None -> + failwith "Local VDI not found" + | Some v -> + v + (** [similar_vdis dbg sr vdi] returns a list of content_ids of vdis which are similar to the input [vdi] in [sr] *) let similar_vdis ~dbg ~sr ~vdi = diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index b869c9daac5..972faf57ce6 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -261,4 +261,6 @@ module Local : SMAPIv2 val get_remote_backend : string -> bool -> (module SMAPIv2) +val find_local_vdi : dbg:string -> sr:sr -> vdi:vdi -> vdi_info + val similar_vdis : dbg:string -> sr:sr -> vdi:vdi -> uuid list From bf931c54b3b6fee810a924f3698548ba87741e35 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 4 Apr 2025 15:54:46 +0100 Subject: [PATCH 122/492] Split Storage_migrate.start The original Storage_migrate.start function is quite fat and does lots of things. For better readability and maintainability, split it into multiple functions that represent different stages of the SXM: - prepare: makes a remote call to the dest host and asks it to prepare for the VDIs for receiving data - mirror_pass_fds: passes the fd to tapdisk to establish connection between two tapdisk processes (sending & receiving) - mirror_snapshot: takes a snapshot of the VDI to be mirrored, because the tapdisk mirroring only mirrors data written after the mirror is initiated - mirror_copy: copy the snapshot from the source to destination As these operations are all specific to SMAPIv1, move them to `storage_smapiv1_migrate` as well. Also restructure the way clean up is done. As there are multiple points at which the migration might fail, previously a `on_fail` list ref is constructed and populated as we go. Now that we have explicitly defined the different errors during the migration, we could handle these errors accordingly. The way to reason about cleanups is: we have three possible clean up actions: - receive_cancel: which will undo the preparation work done on the dest host. This is added once preparation is done; - destroy snapshot: which will destroy the snapshot. This is called after the snapshot is created; - stop: which will disable mirroring, delete snapshot and also do what receive_cancel does, which is basically clean up everything. And this will be called after the mirror has been started. Note many of these functions are best-effort, for example, the stop function won't delete a snapshot if it does not exist, so it's generally safe to call them. We put these clean up functions into the appropriate places, see the actual arrangement of these functions in the code, they should be self-explanatory. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 186 +++++-------------------- ocaml/xapi/storage_smapiv1_migrate.ml | 155 +++++++++++++++++++++ ocaml/xapi/storage_smapiv1_migrate.mli | 71 ++++++++++ 3 files changed, 263 insertions(+), 149 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 2b2d50c788d..7083087990b 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -247,6 +247,20 @@ module MigrateLocal = struct | e -> raise e + let prepare ~dbg ~sr ~vdi ~dest ~local_vdi ~mirror_id ~mirror_vm ~url + ~verify_dest = + try + let (module Remote) = get_remote_backend url verify_dest in + let similars = similar_vdis ~dbg ~sr ~vdi in + + Remote.DATA.MIRROR.receive_start2 dbg dest local_vdi mirror_id similars + mirror_vm + with e -> + error "%s Caught error %s while preparing for SXM" __FUNCTION__ + (Printexc.to_string e) ; + raise + (Storage_error (Migration_preparation_failure (Printexc.to_string e))) + let start ~task ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = SXM.info @@ -262,7 +276,6 @@ module MigrateLocal = struct (Storage_interface.Sr.string_of dest) verify_dest ; - let remote_url = Http.Url.of_string url in let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) let local_vdi = find_local_vdi ~dbg ~sr ~vdi in @@ -285,164 +298,30 @@ module MigrateLocal = struct State.add mirror_id (State.Send_op alm) ; debug "%s Added mirror %s to active local mirrors" __FUNCTION__ mirror_id ; (* A list of cleanup actions to perform if the operation should fail. *) - let on_fail : (unit -> unit) list ref = ref [] in try - let similar_vdis = Local.VDI.similar_content dbg sr vdi in - let similars = - List.filter - (fun x -> x <> "") - (List.map (fun vdi -> vdi.content_id) similar_vdis) + let (Vhd_mirror remote_mirror) = + prepare ~dbg ~sr ~vdi ~dest ~local_vdi ~mirror_id ~mirror_vm ~url + ~verify_dest in - debug "Similar VDIs to = [ %s ]" - (String.concat "; " - (List.map - (fun x -> - Printf.sprintf "(vdi=%s,content_id=%s)" - (Storage_interface.Vdi.string_of x.vdi) - x.content_id - ) - similar_vdis - ) - ) ; - let (Mirror.Vhd_mirror result) = - Remote.DATA.MIRROR.receive_start2 dbg dest local_vdi mirror_id similars - mirror_vm - in - (* Enable mirroring on the local machine *) - let mirror_dp = result.Mirror.mirror_datapath in - let uri = - Printf.sprintf "/services/SM/nbd/%s/%s/%s/%s" - (Storage_interface.Vm.string_of mirror_vm) - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) - mirror_dp - in - debug "%s: uri of http request for mirroring is %s" __FUNCTION__ uri ; - let dest_url = Http.Url.set_uri remote_url uri in - let request = - Http.Request.make - ~query:(Http.Url.get_query_params dest_url) - ~version:"1.0" ~user_agent:"smapiv2" Http.Put uri - in - let verify_cert = if verify_dest then Stunnel_client.pool () else None in - let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in - debug "Searching for data path: %s" dp ; - let attach_info = Local.DP.attach_info dbg sr vdi dp mirror_vm in - on_fail := - (fun () -> Remote.DATA.MIRROR.receive_cancel dbg mirror_id) :: !on_fail ; let tapdev = - match tapdisk_of_attach_info attach_info with - | Some tapdev -> - let pid = Tapctl.get_tapdisk_pid tapdev in - let path = - Printf.sprintf "/var/run/blktap-control/nbdclient%d" pid - in - with_transport ~stunnel_wait_disconnect:false transport - (with_http request (fun (_response, s) -> - let control_fd = - Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 - in - finally - (fun () -> - Unix.connect control_fd (Unix.ADDR_UNIX path) ; - let msg = dp in - let len = String.length msg in - let written = - Unixext.send_fd_substring control_fd msg 0 len [] s - in - if written <> len then ( - error "Failed to transfer fd to %s" path ; - failwith "Internal error transferring fd to tapdisk" - ) - ) - (fun () -> Unix.close control_fd) - ) - ) ; - tapdev - | None -> - failwith "Not attached" - in - debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; - let alm = - State.Send_state. - { - url - ; dest_sr= dest - ; remote_info= - Some - { - dp= mirror_dp - ; vdi= result.Mirror.mirror_vdi.vdi - ; url - ; verify_dest - } - ; local_dp= dp - ; tapdev= Some tapdev - ; failed= false - ; watchdog= None - } + Storage_smapiv1_migrate.mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm + ~mirror_id ~url ~dest_sr:dest ~verify_dest ~remote_mirror in - - State.add mirror_id (State.Send_op alm) ; - debug "%s Updated mirror_id %s in the active local mirror" __FUNCTION__ - mirror_id ; - - SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ - (string_of_vdi_info local_vdi) ; - let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in - let local_vdi = add_to_sm_config local_vdi "base_mirror" mirror_id in let snapshot = - try Local.VDI.snapshot dbg sr local_vdi with - | Storage_interface.Storage_error (Backend_error (code, _)) - when code = "SR_BACKEND_FAILURE_44" -> - raise - (Api_errors.Server_error - ( Api_errors.sr_source_space_insufficient - , [Storage_interface.Sr.string_of sr] - ) - ) - | e -> - raise e + Storage_smapiv1_migrate.mirror_snapshot ~dbg ~sr ~dp ~mirror_id + ~local_vdi in - SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" - __FUNCTION__ - (Storage_interface.Vdi.string_of snapshot.vdi) - (Storage_interface.Vdi.string_of local_vdi.vdi) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr snapshot.vdi) :: !on_fail ; - (let rec inner () = - let alm_opt = State.find_active_local_mirror mirror_id in - match alm_opt with - | Some alm -> - let stats = Tapctl.stats (Tapctl.create ()) tapdev in - if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( - error "Tapdisk mirroring has failed" ; - Updates.add (Dynamic.Mirror mirror_id) updates - ) ; - alm.State.Send_state.watchdog <- - Some - (Scheduler.one_shot scheduler (Scheduler.Delta 5) - "tapdisk_watchdog" inner - ) - | None -> - () - in - inner () - ) ; - on_fail := (fun () -> stop ~dbg ~id:mirror_id) :: !on_fail ; - (* Copy the snapshot to the remote *) + Storage_smapiv1_migrate.mirror_checker mirror_id tapdev ; let new_parent = - Storage_task.with_subtask task "copy" (fun () -> - copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~vm:copy_vm ~url - ~dest ~dest_vdi:result.Mirror.copy_diffs_to ~verify_dest - ) - |> vdi_info + Storage_smapiv1_migrate.mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm + ~url ~dest_sr:dest ~remote_mirror ~verify_dest in debug "Local VDI %s = remote VDI %s" (Storage_interface.Vdi.string_of snapshot.vdi) (Storage_interface.Vdi.string_of new_parent.vdi) ; debug "Local VDI %s now mirrored to remote VDI: %s" (Storage_interface.Vdi.string_of local_vdi.vdi) - (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) ; + (Storage_interface.Vdi.string_of remote_mirror.Mirror.mirror_vdi.vdi) ; debug "Destroying snapshot on src" ; Local.VDI.destroy dbg sr snapshot.vdi ; Some (Mirror_id mirror_id) @@ -450,11 +329,20 @@ module MigrateLocal = struct | Storage_error (Sr_not_attached sr_uuid) -> error " Caught exception %s:%s. Performing cleanup." Api_errors.sr_not_attached sr_uuid ; - perform_cleanup_actions !on_fail ; raise (Api_errors.Server_error (Api_errors.sr_not_attached, [sr_uuid])) + | ( Storage_error (Migration_mirror_fd_failure reason) + | Storage_error (Migration_mirror_snapshot_failure reason) ) as e -> + error "%s: Caught %s: during storage migration preparation" __FUNCTION__ + reason ; + MigrateRemote.receive_cancel ~dbg ~id:mirror_id ; + raise e + | Storage_error (Migration_mirror_copy_failure reason) as e -> + error "%s: Caught %s: during storage migration copy" __FUNCTION__ reason ; + stop ~dbg ~id:mirror_id ; + raise e | e -> - error "Caught %s: performing cleanup actions" (Api_errors.to_string e) ; - perform_cleanup_actions !on_fail ; + error "Caught %s during SXM: " (Api_errors.to_string e) ; + stop ~dbg ~id:mirror_id ; raise e let stat ~dbg:_ ~id = diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 2833bf24b0f..b816c791f2e 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -401,6 +401,161 @@ module Copy = struct raise (Storage_error (Internal_error (Printexc.to_string e))) end +let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~url ~dest_sr + ~verify_dest ~(remote_mirror : Mirror.mirror_receive_result_vhd_t) = + let remote_vdi = remote_mirror.mirror_vdi.vdi in + let mirror_dp = remote_mirror.mirror_datapath in + + let uri = + Printf.sprintf "/services/SM/nbd/%s/%s/%s/%s" + (Storage_interface.Vm.string_of mirror_vm) + (Storage_interface.Sr.string_of dest_sr) + (Storage_interface.Vdi.string_of remote_vdi) + mirror_dp + in + D.debug "%s: uri of http request for mirroring is %s" __FUNCTION__ uri ; + let dest_url = Http.Url.set_uri (Http.Url.of_string url) uri in + D.debug "%s url of http request for mirroring is %s" __FUNCTION__ + (Http.Url.to_string dest_url) ; + let request = + Http.Request.make + ~query:(Http.Url.get_query_params dest_url) + ~version:"1.0" ~user_agent:"smapiv2" Http.Put uri + in + let verify_cert = if verify_dest then Stunnel_client.pool () else None in + let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in + D.debug "Searching for data path: %s" dp ; + let attach_info = Local.DP.attach_info dbg sr vdi dp mirror_vm in + + let tapdev = + match tapdisk_of_attach_info attach_info with + | Some tapdev -> + let pid = Tapctl.get_tapdisk_pid tapdev in + let path = Printf.sprintf "/var/run/blktap-control/nbdclient%d" pid in + with_transport ~stunnel_wait_disconnect:false transport + (with_http request (fun (_response, s) -> + (* Enable mirroring on the local machine *) + let control_fd = Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 in + finally + (fun () -> + Unix.connect control_fd (Unix.ADDR_UNIX path) ; + let msg = dp in + let len = String.length msg in + let written = + Unixext.send_fd_substring control_fd msg 0 len [] s + in + if written <> len then ( + D.error "Failed to transfer fd to %s" path ; + failwith "Internal error transferring fd to tapdisk" + ) + ) + (fun () -> Unix.close control_fd) + ) + ) ; + tapdev + | None -> + D.error "%s: vdi %s not attached" __FUNCTION__ (Vdi.string_of vdi) ; + raise + (Storage_interface.Storage_error + (Migration_mirror_fd_failure "VDI Not Attached") + ) + | exception e -> + D.error "%s Caught exception %s:. Performing cleanup." __FUNCTION__ + (Printexc.to_string e) ; + raise + (Storage_interface.Storage_error + (Migration_mirror_fd_failure (Printexc.to_string e)) + ) + in + D.debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; + let alm = + State.Send_state. + { + url + ; dest_sr + ; remote_info= + Some + { + dp= remote_mirror.mirror_datapath + ; vdi= remote_mirror.mirror_vdi.vdi + ; url + ; verify_dest + } + ; local_dp= dp + ; tapdev= Some tapdev + ; failed= false + ; watchdog= None + } + in + State.add mirror_id (State.Send_op alm) ; + D.debug "%s Updated mirror_id %s in the active local mirror" __FUNCTION__ + mirror_id ; + tapdev + +let mirror_snapshot ~dbg ~sr ~dp ~mirror_id ~local_vdi = + SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ + (string_of_vdi_info local_vdi) ; + let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in + let local_vdi = add_to_sm_config local_vdi "base_mirror" mirror_id in + let snapshot = + try Local.VDI.snapshot dbg sr local_vdi with + | Storage_interface.Storage_error (Backend_error (code, _)) + when code = "SR_BACKEND_FAILURE_44" -> + raise + (Storage_interface.Storage_error + (Migration_mirror_snapshot_failure + (Printf.sprintf "%s:%s" Api_errors.sr_source_space_insufficient + (Storage_interface.Sr.string_of sr) + ) + ) + ) + | e -> + raise + (Storage_interface.Storage_error + (Migration_mirror_snapshot_failure (Printexc.to_string e)) + ) + in + + SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" + __FUNCTION__ + (Storage_interface.Vdi.string_of snapshot.vdi) + (Storage_interface.Vdi.string_of local_vdi.vdi) ; + + snapshot + +let mirror_checker mirror_id tapdev = + let rec inner () = + let alm_opt = State.find_active_local_mirror mirror_id in + match alm_opt with + | Some alm -> + let stats = Tapctl.stats (Tapctl.create ()) tapdev in + if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( + D.error "Tapdisk mirroring has failed" ; + Updates.add (Dynamic.Mirror mirror_id) updates + ) ; + alm.State.Send_state.watchdog <- + Some + (Scheduler.one_shot scheduler (Scheduler.Delta 5) "tapdisk_watchdog" + inner + ) + | None -> + () + in + inner () + +let mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm ~url ~dest_sr ~remote_mirror + ~verify_dest = + (* Copy the snapshot to the remote *) + try + Storage_task.with_subtask task "copy" (fun () -> + Copy.copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~vm:copy_vm ~url + ~dest:dest_sr ~dest_vdi:remote_mirror.Mirror.copy_diffs_to + ~verify_dest + ) + |> vdi_info + with e -> + raise (Storage_error (Migration_mirror_copy_failure (Printexc.to_string e))) + module MIRROR : SMAPIv2_MIRROR = struct type context = unit diff --git a/ocaml/xapi/storage_smapiv1_migrate.mli b/ocaml/xapi/storage_smapiv1_migrate.mli index d47b82cd86c..4c40e2ab999 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.mli +++ b/ocaml/xapi/storage_smapiv1_migrate.mli @@ -14,4 +14,75 @@ module type SMAPIv2_MIRROR = Storage_interface.MIRROR +val with_activated_disk : + dbg:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi option + -> dp:string + -> vm:Storage_interface.vm + -> (string option -> 'a) + -> 'a + +val tapdisk_of_attach_info : Storage_interface.backend -> Tapctl.tapdev option + +module Copy : sig + val copy_into_vdi : + task:Storage_task.Storage_task.task_handle + -> dbg:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi + -> vm:Storage_interface.vm + -> url:string + -> dest:Storage_interface.sr + -> dest_vdi:Storage_interface.vdi + -> verify_dest:bool + -> Storage_interface.async_result_t option + + val copy_into_sr : + task:Storage_task.Storage_task.task_handle + -> dbg:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi + -> vm:Storage_interface.vm + -> url:string + -> dest:Storage_interface.sr + -> verify_dest:bool + -> Storage_interface.async_result_t option +end + +val mirror_pass_fds : + dbg:string + -> dp:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi + -> mirror_vm:Storage_interface.vm + -> mirror_id:string + -> url:string + -> dest_sr:Storage_interface.sr + -> verify_dest:bool + -> remote_mirror:Storage_interface.Mirror.mirror_receive_result_vhd_t + -> Tapctl.tapdev + +val mirror_snapshot : + dbg:string + -> sr:Storage_interface.sr + -> dp:string + -> mirror_id:string + -> local_vdi:Storage_interface.vdi_info + -> Storage_interface.vdi_info + +val mirror_checker : string -> Tapctl.tapdev -> unit + +val mirror_copy : + task:Storage_task.Storage_task.task_handle + -> dbg:string + -> sr:Storage_interface.sr + -> snapshot:Storage_interface.vdi_info + -> copy_vm:Storage_interface.vm + -> url:string + -> dest_sr:Storage_interface.sr + -> remote_mirror:Storage_interface.Mirror.mirror_receive_result_vhd_t + -> verify_dest:bool + -> Storage_interface.vdi_info + module MIRROR : SMAPIv2_MIRROR From 4230c0c38a97f6a9a7afed1cfc3548749ea7e178 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 4 Apr 2025 16:07:50 +0100 Subject: [PATCH 123/492] Remove duplicate Storage_migrate.stop impl Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 7083087990b..ef7c6fe67f9 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -633,13 +633,7 @@ let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = ) (* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) -let stop ~dbg ~id = - try MigrateLocal.stop ~dbg ~id with - | Storage_error (Backend_error (code, params)) - | Api_errors.Server_error (code, params) -> - raise (Storage_error (Backend_error (code, params))) - | e -> - raise e +let stop = MigrateLocal.stop let list = MigrateLocal.list From d1464390ff7ffb3b698292ccc8ae039aae139faf Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 4 Apr 2025 16:25:54 +0100 Subject: [PATCH 124/492] Implement send_start for SMAPIv1 Now that the split is done, we can implement the `send_start` function defined earlier for SMAPIv1, by combining all the different stages into one function and invoke it from `Storage_migrate.start`. At this point the refacotring for SMAPIv1 should be finished, and there should still be no functional change. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 37 ++++++++------------------- ocaml/xapi/storage_smapiv1_migrate.ml | 33 ++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index ef7c6fe67f9..ae3344d788b 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -261,8 +261,8 @@ module MigrateLocal = struct raise (Storage_error (Migration_preparation_failure (Printexc.to_string e))) - let start ~task ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest - = + let start ~task_id ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest + ~verify_dest = SXM.info "%s sr:%s vdi:%s dp: %s mirror_vm: %s copy_vm: %s url:%s dest:%s \ verify_dest:%B" @@ -298,32 +298,15 @@ module MigrateLocal = struct State.add mirror_id (State.Send_op alm) ; debug "%s Added mirror %s to active local mirrors" __FUNCTION__ mirror_id ; (* A list of cleanup actions to perform if the operation should fail. *) + let (module Migrate_Backend) = choose_backend dbg sr in try - let (Vhd_mirror remote_mirror) = + let remote_mirror = prepare ~dbg ~sr ~vdi ~dest ~local_vdi ~mirror_id ~mirror_vm ~url ~verify_dest in - let tapdev = - Storage_smapiv1_migrate.mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm - ~mirror_id ~url ~dest_sr:dest ~verify_dest ~remote_mirror - in - let snapshot = - Storage_smapiv1_migrate.mirror_snapshot ~dbg ~sr ~dp ~mirror_id - ~local_vdi - in - Storage_smapiv1_migrate.mirror_checker mirror_id tapdev ; - let new_parent = - Storage_smapiv1_migrate.mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm - ~url ~dest_sr:dest ~remote_mirror ~verify_dest - in - debug "Local VDI %s = remote VDI %s" - (Storage_interface.Vdi.string_of snapshot.vdi) - (Storage_interface.Vdi.string_of new_parent.vdi) ; - debug "Local VDI %s now mirrored to remote VDI: %s" - (Storage_interface.Vdi.string_of local_vdi.vdi) - (Storage_interface.Vdi.string_of remote_mirror.Mirror.mirror_vdi.vdi) ; - debug "Destroying snapshot on src" ; - Local.VDI.destroy dbg sr snapshot.vdi ; + Migrate_Backend.send_start () ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm + ~mirror_id ~local_vdi ~copy_vm ~live_vm:(Vm.of_string "0") ~url + ~remote_mirror ~dest_sr:dest ~verify_dest ; Some (Mirror_id mirror_id) with | Storage_error (Sr_not_attached sr_uuid) -> @@ -628,8 +611,10 @@ let copy ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = with_dbg ~name:__FUNCTION__ ~dbg @@ fun dbg -> with_task_and_thread ~dbg (fun task -> - MigrateLocal.start ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm - ~copy_vm ~url ~dest ~verify_dest + MigrateLocal.start + ~task_id:(Storage_task.id_of_handle task) + ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest + ~verify_dest ) (* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index b816c791f2e..b38231dad5b 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -556,12 +556,41 @@ let mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm ~url ~dest_sr ~remote_mirror with e -> raise (Storage_error (Migration_mirror_copy_failure (Printexc.to_string e))) +let mirror_cleanup ~dbg ~sr ~snapshot = + D.debug "Destroying snapshot on src" ; + Local.VDI.destroy dbg sr snapshot.vdi + module MIRROR : SMAPIv2_MIRROR = struct type context = unit - let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + let send_start _ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id + ~local_vdi ~copy_vm ~live_vm:_ ~url ~remote_mirror ~dest_sr ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + match remote_mirror with + | Mirror.Vhd_mirror mirror_res -> + let tapdev = + mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~url ~dest_sr + ~verify_dest ~remote_mirror:mirror_res + in + + let snapshot = mirror_snapshot ~dbg ~sr ~dp ~mirror_id ~local_vdi in + + mirror_checker mirror_id tapdev ; + let task = Storage_task.(handle_of_id tasks) task_id in + let new_parent = + mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm ~url ~dest_sr + ~remote_mirror:mirror_res ~verify_dest + in - let send_start _ctx = u __FUNCTION__ + D.debug "Local VDI %s = remote VDI %s" + (Storage_interface.Vdi.string_of snapshot.vdi) + (Storage_interface.Vdi.string_of new_parent.vdi) ; + D.debug "Local VDI %s now mirrored to remote VDI: %s" + (Storage_interface.Vdi.string_of local_vdi.vdi) + (Storage_interface.Vdi.string_of mirror_res.Mirror.mirror_vdi.vdi) ; + mirror_cleanup ~dbg ~sr ~snapshot let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = let on_fail : (unit -> unit) list ref = ref [] in From e037eeea546b9153b630053eac47e487eece6ece Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Sun, 13 Apr 2025 16:23:15 +0100 Subject: [PATCH 125/492] doc: Add sxm mux design Signed-off-by: Vincent Liu --- doc/content/xapi/storage/sxm.md | 112 ++++++++++++++++-- doc/content/xapi/storage/sxm_mux_inbound.svg | 4 + doc/content/xapi/storage/sxm_mux_outbound.svg | 4 + 3 files changed, 110 insertions(+), 10 deletions(-) create mode 100644 doc/content/xapi/storage/sxm_mux_inbound.svg create mode 100644 doc/content/xapi/storage/sxm_mux_outbound.svg diff --git a/doc/content/xapi/storage/sxm.md b/doc/content/xapi/storage/sxm.md index 6c44e432d22..b3a97db4045 100644 --- a/doc/content/xapi/storage/sxm.md +++ b/doc/content/xapi/storage/sxm.md @@ -2,9 +2,101 @@ Title: Storage migration --- +- [Overview](#overview) +- [SXM Multiplexing](#sxm-multiplexing) + - [Motivation](#motivation) + - [But we have storage\_mux.ml](#but-we-have-storage_muxml) + - [Thought experiments on an alternative design](#thought-experiments-on-an-alternative-design) + - [Design](#design) +- [SMAPIv1 Migration](#smapiv1-migration) + - [Receiving SXM](#receiving-sxm) + - [Xapi code](#xapi-code) + - [Storage code](#storage-code) + - [Copying a VDI](#copying-a-vdi) + - [Mirroring a VDI](#mirroring-a-vdi) + - [Code walkthrough](#code-walkthrough) + - [DATA.copy](#datacopy) + - [DATA.copy\_into](#datacopy_into) + - [DATA.MIRROR.start](#datamirrorstart) + + ## Overview -{{}} + +## SXM Multiplexing + +This section is about the design idea behind the additional layer of mutiplexing specifically +for Storage Xen Motion (SXM) from SRs using SMAPIv3. It is recommended that you have read the +[introduction doc](_index.md) for the storage layer first to understand how storage +multiplexing is done between SMAPIv2 and SMAPI{v1, v3} before reading this. + + +### Motivation + +The existing SXM code was designed to work only with SMAPIv1 SRs, and therefore +does not take into account the dramatic difference in the ways SXM is done between +SMAPIv1 and SMAPIv3. The exact difference will be covered later on in this doc, for this section +it is sufficient to assume that they have two ways of doing migration. Therefore, +we need different code paths for migration from SMAPIv1 and SMAPIv3. + +#### But we have storage_mux.ml + +Indeed, storage_mux.ml is responsible for multiplexing and forwarding requests to +the correct storage backend, based on the SR type that the caller specifies. And +in fact, for inbound SXM to SMAPIv3 (i.e. migrating into a SMAPIv3 SR, GFS2 for example), +storage_mux is doing the heavy lifting of multiplexing between different storage +backends. Every time a `Remote.` call is invoked, this will go through the SMAPIv2 +layer to the remote host and get multiplexed on the destination host, based on +whether we are migrating into a SMAPIv1 or SMAPIv3 SR (see the diagram below). +And the inbound SXM is implemented +by implementing the existing SMAPIv2 -> SMAPIv3 calls (see `import_activate` for example) +which may not have been implemented before. + +![mux for inbound](sxm_mux_inbound.svg) + +While this works fine for inbound SXM, it does not work for outbound SXM. A typical SXM +consists of four combinations, the source sr type (v1/v3) and the destiantion sr +type (v1/v3), any of the four combinations is possible. We have already covered the +destination multiplexing (v1/v3) by utilising storage_mux, and at this point we +have run out of multiplexer for multiplexing on the source. In other words, we +can only mutiplex once for each SMAPIv2 call, and we can either use that chance for +either the source or the destination, and we have already used it for the latter. + + +#### Thought experiments on an alternative design + +To make it even more concrete, let us consider an example: the mirroring logic in +SXM is different based on the source SR type of the SXM call. You might imagine +defining a function like `MIRROR.start v3_sr v1_sr` that will be multiplexed +by the storage_mux based on the source SR type, and forwarded to storage_smapiv3_migrate, +or even just xapi-storage-script, which is indeed quite possible. +Now at this point we have already done the multiplexing, but we still wish to +multiplex operations on destination SRs, for example, we might want to attach a +VDI belonging to a SMAPIv1 SR on the remote host. But as we have already done the +multiplexing and is now inside xapi-storage-script, we have lost any chance of doing +any further multiplexing :( + +### Design + +The idea of this new design is to introduce an additional multiplexing layer that +is specific for multiplexing calls based on the source SR type. For example, in +the diagram below the `send_start src_sr dest_sr` will take both the src SR and the +destination SR as parameters, and suppose the mirroring logic is different for different +types of source SRs (i.e. SMAPIv1 or SMAPIv3), the storage migration code will +necessarily choose the right code path based on the source SR type. And this is +exactly what is done in this additional multiplexing layer. The respective logic +for doing {v1,v3}-specifi mirroring, for example, will stay in storage_smapi{v1,v3}_migrate.ml + +![mux for outbound](sxm_mux_outbound.svg) + +Note that later on storage_smapi{v1,v3}_migrate.ml will still have the flexibility +to call remote SMAPIv2 functions, such as `Remote.VDI.attach dest_sr vdi`, and +it will be handled just as before. + + +## SMAPIv1 Migration + +```mermaid sequenceDiagram participant local_tapdisk as local tapdisk participant local_smapiv2 as local SMAPIv2 @@ -129,7 +221,7 @@ opt post_detach_hook end Note over xapi: memory image migration by xenopsd Note over xapi: destroy the VM record -{{< /mermaid >}} +``` ### Receiving SXM @@ -162,7 +254,7 @@ the receiving end of storage motion: This is how xapi coordinates storage migration. We'll do it as a code walkthrough through the two layers: xapi and storage-in-xapi (SMAPIv2). -## Xapi code +### Xapi code The entry point is in [xapi_vm_migration.ml](https://github.com/xapi-project/xen-api/blob/f75d51e7a3eff89d952330ec1a739df85a2895e2/ocaml/xapi/xapi_vm_migrate.ml#L786) @@ -1056,7 +1148,7 @@ We also try to remove the VM record from the destination if we managed to send i Finally we check for mirror failure in the task - this is set by the events thread watching for events from the storage layer, in [storage_access.ml](https://github.com/xapi-project/xen-api/blob/f75d51e7a3eff89d952330ec1a739df85a2895e2/ocaml/xapi/storage_access.ml#L1169-L1207) -## Storage code +### Storage code The part of the code that is conceptually in the storage layer, but physically in xapi, is located in [storage_migrate.ml](https://github.com/xapi-project/xen-api/blob/f75d51e7a3eff89d952330ec1a739df85a2895e2/ocaml/xapi/storage_migrate.ml). There are logically a few separate parts to this file: @@ -1069,7 +1161,7 @@ The part of the code that is conceptually in the storage layer, but physically i Let's start by considering the way the storage APIs are intended to be used. -### Copying a VDI +#### Copying a VDI `DATA.copy` takes several parameters: @@ -1119,7 +1211,7 @@ The implementation uses the `url` parameter to make SMAPIv2 calls to the destina The implementation tries to minimize the amount of data copied by looking for related VDIs on the destination SR. See below for more details. -### Mirroring a VDI +#### Mirroring a VDI `DATA.MIRROR.start` takes a similar set of parameters to that of copy: @@ -1156,11 +1248,11 @@ Note that state is a list since the initial phase of the operation requires both Additionally the mirror can be cancelled using the `MIRROR.stop` API call. -### Code walkthrough +#### Code walkthrough let's go through the implementation of `copy`: -#### DATA.copy +##### DATA.copy ```ocaml let copy ~task ~dbg ~sr ~vdi ~dp ~url ~dest = @@ -1296,7 +1388,7 @@ Finally we snapshot the remote VDI to ensure we've got a VDI of type 'snapshot' The exception handler does nothing - so we leak remote VDIs if the exception happens after we've done our cloning :-( -#### DATA.copy_into +##### DATA.copy_into Let's now look at the data-copying part. This is common code shared between `VDI.copy`, `VDI.copy_into` and `MIRROR.start` and hence has some duplication of the calls made above. @@ -1467,7 +1559,7 @@ The last thing we do is to set the local and remote content_id. The local set_co Here we perform the list of cleanup operations. Theoretically. It seems we don't ever actually set this to anything, so this is dead code. -#### DATA.MIRROR.start +##### DATA.MIRROR.start ```ocaml let start' ~task ~dbg ~sr ~vdi ~dp ~url ~dest = diff --git a/doc/content/xapi/storage/sxm_mux_inbound.svg b/doc/content/xapi/storage/sxm_mux_inbound.svg new file mode 100644 index 00000000000..c38bc36ae5f --- /dev/null +++ b/doc/content/xapi/storage/sxm_mux_inbound.svg @@ -0,0 +1,4 @@ + + + +
Storage_migrate.start
Host A
Host B
v1
v3
storage_mux server
Remote.VDI.attach
storage_mux server
rpc
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm_mux_outbound.svg b/doc/content/xapi/storage/sxm_mux_outbound.svg new file mode 100644 index 00000000000..915cc7550e3 --- /dev/null +++ b/doc/content/xapi/storage/sxm_mux_outbound.svg @@ -0,0 +1,4 @@ + + + +
Storage_migrate.start
Host A
Host B
storage_mux server
Remote.VDI.attach
vdi dst_sr
MIRROR.send_start
src_sr dst_sr
storage_smapiv1_migrate.
send_start
storage_smapiv3_migrate.
send_start
RPC to host B
....
new multiplexing layer
mux based on src_sr
\ No newline at end of file From 4e34d8d6895054904f37e5a33b19febfc5571965 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Sun, 13 Apr 2025 19:33:53 +0100 Subject: [PATCH 126/492] doc: Add an overview of SXM Signed-off-by: Vincent Liu --- doc/content/xapi/storage/sxm.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/content/xapi/storage/sxm.md b/doc/content/xapi/storage/sxm.md index b3a97db4045..945d715096a 100644 --- a/doc/content/xapi/storage/sxm.md +++ b/doc/content/xapi/storage/sxm.md @@ -22,6 +22,26 @@ Title: Storage migration ## Overview +The core idea of storage migration is surprisingly simple: We have VDIs attached to a VM, +and we wish to migrate these VDIs from one SR to another. This necessarily requires +us to copy the data stored in these VDIs over to the new SR, which can be a long-running +process if there are gigabytes or even terabytes of them. We wish to minimise the +down time of this process to allow the VM to keep running as much as possible. + +At a very high level, the SXM process generally only consists of two stages: preparation +and mirroring. The preparation is about getting the receiving host ready for the +mirroring operation, while the mirroring itself can be further divided into two +more operations: 1. sending new writes to both sides; 2.copying existing data from +source to destination. The exact detail of how to set up a mirror differs significantly +between SMAPIv1 and SMAPIv3, but both of them will have to perform the above two +operations. +Once the mirroring is established, it is a matter of checking the status of the +mirroring and carry on with the follwoing VM migration. + +The reality is more complex than what we had hoped for. For example, in SMAPIv1, +the mirror establishment is quite an involved process and is itself divided into +several stages, which will be discussed in more detail later on. + ## SXM Multiplexing From 615de98347840a10bdf0729df7c4278ea0ad069d Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Sun, 13 Apr 2025 19:34:10 +0100 Subject: [PATCH 127/492] doc: Add error handling of SXM Signed-off-by: Vincent Liu --- doc/content/xapi/storage/sxm.md | 104 +++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 2 deletions(-) diff --git a/doc/content/xapi/storage/sxm.md b/doc/content/xapi/storage/sxm.md index 945d715096a..8b7971bed79 100644 --- a/doc/content/xapi/storage/sxm.md +++ b/doc/content/xapi/storage/sxm.md @@ -8,7 +8,14 @@ Title: Storage migration - [But we have storage\_mux.ml](#but-we-have-storage_muxml) - [Thought experiments on an alternative design](#thought-experiments-on-an-alternative-design) - [Design](#design) -- [SMAPIv1 Migration](#smapiv1-migration) +- [SMAPIv1 migration](#smapiv1-migration) +- [SMAPIv3 migration](#smapiv3-migration) +- [Error Handling](#error-handling) + - [Preparation (SMAPIv1 and SMAPIv3)](#preparation-smapiv1-and-smapiv3) + - [Snapshot and mirror failure (SMAPIv1)](#snapshot-and-mirror-failure-smapiv1) + - [Mirror failure (SMAPIv3)](#mirror-failure-smapiv3) + - [Copy failure (SMAPIv1)](#copy-failure-smapiv1) +- [SMAPIv1 Migration implementation detail](#smapiv1-migration-implementation-detail) - [Receiving SXM](#receiving-sxm) - [Xapi code](#xapi-code) - [Storage code](#storage-code) @@ -113,8 +120,100 @@ Note that later on storage_smapi{v1,v3}_migrate.ml will still have the flexibili to call remote SMAPIv2 functions, such as `Remote.VDI.attach dest_sr vdi`, and it will be handled just as before. +## SMAPIv1 migration -## SMAPIv1 Migration +At a high level, mirror establishment for SMAPIv1 works as follows: + +1. Take a snapshot of a VDI that is attached to VM1. This gives us an immutable +copy of the current state of the VDI, with all the data until the point we took +the snapshot. This is illustrated in the diagram as a VDI and its snapshot connecting +to a shared parent, which stores the shared content for the snapshot and the writable +VDI from which we took the snapshot (snapshot) +2. Mirror the writable VDI to the server hosts: this means that all writes that goes to the +client VDI will also be written to the mirrored VDI on the remote host (mirror) +3. Copy the immutable snapshot from our local host to the remote (copy) +4. Compose the mirror and the snapshot to form a single VDI +5. Destroy the snapshot on the local host (cleanup) + + +more detail to come... + +## SMAPIv3 migration + +More detail to come... + +## Error Handling + +Storage migration is a long-running process, and is prone to failures in each +step. Hence it is important specifying what errors could be raised at each step +and their significance. This is beneficial both for the user and for triaging. + +There are two general cleanup functions in SXM: `MIRROR.receive_cancel` and +`MIRROR.stop`. The former is for cleaning up whatever has been created by `MIRROR.receive_start` +on the destination host (such as VDIs for receiving mirrored data). The latter is +a more comprehensive function that attempts to "undo" all the side effects that +was done during the SXM, and also calls `receive_cancel` as part of its operations. + +Currently error handling was done by building up a list of cleanup functions in +the `on_fail` list ref as the function executes. For example, if the `receive_start` +has been completed successfully, add `receive_cancel` to the list of cleanup functions. +And whenever an exception is encountered, just execute whatever has been added +to the `on_fail` list ref. This is convenient, but does entangle all the error +handling logic with the core SXM logic itself, making the code rather than hard +to understand and maintain. + +The idea to fix this is to introduce explicit "stages" during the SXM and define +explicitly what error handling should be done if it fails at a certain stage. This +helps separate the error handling logic into the `with` part of a `try with` block, +which is where they are supposed to be. Since we need to accommodate the existing +SMAPIv1 migration (which has more stages than SMAPIv3), the following stages are +introduced: preparation (v1,v3), snapshot(v1), mirror(v1, v3), copy(v1). Note that +each stage also roughly corresponds to a helper function that is called within `MIRROR.start`, +which is the wrapper function that initiates storage migration. And each helper +functions themselves would also have error handling logic within themselves as +needed (e.g. see `Storage_smapiv1_migrate.receive_start) to deal with exceptions +that happen within each helper functions. + +### Preparation (SMAPIv1 and SMAPIv3) + +The preparation stage generally corresponds to what is done in `receive_start`, and +this function itself will handle exceptions when there are partial failures within +the function itself, such as an exception after the receiving VDI is created. +It will use the old-style `on_fail` function but only with a limited scope. + +There is nothing to be done at a higher level (i.e within `MIRROR.start` which +calls `receive_start`) if preparation has failed. + +### Snapshot and mirror failure (SMAPIv1) + +For SMAPIv1, the mirror is done in a bit cumbersome way. The end goal is to establish +connections between two tapdisk processes on the source and destination hosts. +To achieve this goal, xapi will do two main jobs: 1. create a connection between two +hosts and pass the connection to tapdisk; 2. create a snapshot as a starting point +of the mirroring process. + +Therefore handling of failures at these two stages are similar: clean up what was +done in the preparation stage by calling `receive_cancel`, and that is almost it. +Again, we will leave whatever is needed for partial failure handling within those +functions themselves and only clean up at a stage-level in `storage_migrate.ml` + +Note that `receive_cancel` is a multiplexed function for SMAPIv1 and SMAPIv3, which +means different clean up logic will be executed depending on what type of SR we +are migrating from. + +### Mirror failure (SMAPIv3) + +To be filled... + +### Copy failure (SMAPIv1) + +The final step of storage migration for SMAPIv1 is to copy the snapshot from the +source to the destination. At this stage, most of the side effectful work has been +done, so we do need to call `MIRROR.stop` to clean things up if we experience an +failure during copying. + + +## SMAPIv1 Migration implementation detail ```mermaid sequenceDiagram @@ -1877,3 +1976,4 @@ let pre_deactivate_hook ~dbg ~dp ~sr ~vdi = s.failed <- true ) ``` + From bfea6f33240ada42189f8d527332f3f1c8ec8de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 15 Apr 2025 23:52:00 +0100 Subject: [PATCH 128/492] CA-409628: Add backtrace logging test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The actual location is currently missing from the backtrace, which is the bug CA-409628. Signed-off-by: Edwin Török --- ocaml/libs/log/test/dune | 6 ++++++ ocaml/libs/log/test/log_test.ml | 17 +++++++++++++++++ ocaml/libs/log/test/log_test.mli | 0 ocaml/libs/log/test/log_test.t | 4 ++++ 4 files changed, 27 insertions(+) create mode 100644 ocaml/libs/log/test/dune create mode 100644 ocaml/libs/log/test/log_test.ml create mode 100644 ocaml/libs/log/test/log_test.mli create mode 100644 ocaml/libs/log/test/log_test.t diff --git a/ocaml/libs/log/test/dune b/ocaml/libs/log/test/dune new file mode 100644 index 00000000000..ddfbf07bcc9 --- /dev/null +++ b/ocaml/libs/log/test/dune @@ -0,0 +1,6 @@ +(executable + (name log_test) + (libraries log xapi-stdext-threads threads.posix xapi-backtrace)) + +(cram + (deps log_test.exe)) diff --git a/ocaml/libs/log/test/log_test.ml b/ocaml/libs/log/test/log_test.ml new file mode 100644 index 00000000000..2d09c5f0457 --- /dev/null +++ b/ocaml/libs/log/test/log_test.ml @@ -0,0 +1,17 @@ +module D = Debug.Make (struct let name = Filename.basename __FILE__ end) + +let m = Mutex.create () + +let a = [||] + +let buggy () = a.(1) <- 0 + +let () = + Printexc.record_backtrace true ; + Debug.log_to_stdout () ; + () + |> Debug.with_thread_associated "main" @@ fun () -> + try Xapi_stdext_threads.Threadext.Mutex.execute m buggy + with e -> + D.log_backtrace () ; + D.warn "Got exception: %s" (Printexc.to_string e) diff --git a/ocaml/libs/log/test/log_test.mli b/ocaml/libs/log/test/log_test.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/log/test/log_test.t b/ocaml/libs/log/test/log_test.t new file mode 100644 index 00000000000..80d6abc0249 --- /dev/null +++ b/ocaml/libs/log/test/log_test.t @@ -0,0 +1,4 @@ + $ ./log_test.exe | sed -re 's/[0-9]+T[0-9:.]+Z//' + [|debug||0 |main|log_test.ml] Raised at Xapi_stdext_pervasives__Pervasiveext.finally in file \"ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml\", line 39, characters 6-15\nCalled from Dune__exe__Log_test.(fun) in file \"ocaml/libs/log/test/log_test.ml\", line 14, characters 9-60\n + [| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") + From 524791ccb2c6b8e613c947f624788d24c10d0aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 15 Apr 2025 23:52:00 +0100 Subject: [PATCH 129/492] CA-409628: Do not lose the original backtrace in log_backtrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 2 log_backtrace functions in the Debug module, one of them prints the full backtrace, the other one prints the backtrace just from the last time Backtrace.is_important got called, i.e. it drops the *important* part of the backtrace. Delete the buggy function and replace it with a function that always takes an exception, so that we can look up any stashed backtrace. There are good reasons why Backtrace.is_important wipes the current backtrace after it stashes it away (destroying the important part): * if it didn't, then every time Backtrace.is_important got called we'd get the same backtrace prefix appeneded multiple times * for cross-process/language backtraces to work you need to use Backtrace.get to retrieve the stashed backtrace when printing, and not use the OCaml one directly from Printexc. But that means you need to be disciplined in how you deal with backtraces: * catching and reraising the same exception with the same backtrace using Printexc.rawbacktrace is fine * the first statement in an exception handler needs to be either Backtrace.is_important, or Debug.log_backtrace (which calls is_important internally) Using Printexc.get_backtrace/Printexc.get_rawbacktrace for printing purposes has to be avoided if we ever called Backtrace.is_important. The updated unit test shows that we properly print line 8 as the source of the exception now. This fixes backtraces from xcp-rrdd. Signed-off-by: Edwin Török --- ocaml/libs/http-lib/dune | 190 ++++++++++---------- ocaml/libs/http-lib/http_svr.ml | 6 +- ocaml/libs/http-lib/server_io.ml | 3 +- ocaml/libs/log/debug.ml | 9 +- ocaml/libs/log/debug.mli | 10 +- ocaml/libs/log/test/log_test.ml | 2 +- ocaml/libs/log/test/log_test.t | 7 +- ocaml/xapi-cli-server/cli_frontend.ml | 2 +- ocaml/xapi-guard/lib/server_interface.ml | 2 +- ocaml/xapi-guard/src/main.ml | 4 +- ocaml/xapi-guard/test/xapi_guard_test.ml | 2 +- ocaml/xapi-idl/gpumon/gpumon_interface.ml | 2 +- ocaml/xapi-idl/memory/memory_interface.ml | 2 +- ocaml/xapi-idl/network/network_interface.ml | 2 +- ocaml/xapi-idl/v6/v6_interface.ml | 2 +- ocaml/xapi/monitor_master.ml | 2 +- ocaml/xapi/xapi_ha.ml | 4 +- ocaml/xapi/xapi_observer_components.ml | 4 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 5 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 4 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml | 3 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 3 +- ocaml/xenopsd/xc/mem_stats.ml | 2 +- 24 files changed, 144 insertions(+), 130 deletions(-) diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index 887aaf7dbc7..184fd5b2869 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -1,109 +1,103 @@ (library - (name http_lib) - (public_name http-lib) - (modes best) - (wrapped false) - (modules (:standard \ http_svr http_proxy server_io http_test radix_tree_test test_client test_server)) - (preprocess (per_module ((pps ppx_deriving_rpc) Http))) - (libraries - astring - base64 - fmt - ipaddr - mtime - mtime.clock.os - rpclib.core - rpclib.json - rpclib.xml - safe_resources - sha - stunnel - threads.posix - uuid - uri - xapi-backtrace - xapi-consts.xapi_version - xapi-idl.updates - xapi-log - clock - xapi-stdext-pervasives - xapi-stdext-threads - xapi-stdext-unix - xml-light2 - ) -) + (name http_lib) + (public_name http-lib) + (modes best) + (wrapped false) + (modules + (:standard + \ + http_svr + http_proxy + server_io + http_test + radix_tree_test + test_client + test_server)) + (preprocess + (per_module + ((pps ppx_deriving_rpc) + Http))) + (libraries + astring + base64 + fmt + ipaddr + mtime + mtime.clock.os + rpclib.core + rpclib.json + rpclib.xml + safe_resources + sha + stunnel + threads.posix + uuid + uri + xapi-backtrace + xapi-consts.xapi_version + xapi-idl.updates + xapi-log + clock + xapi-stdext-pervasives + xapi-stdext-threads + xapi-stdext-unix + xml-light2)) (library - (name httpsvr) - (wrapped false) - (modes best) - (modules http_svr http_proxy server_io) - (libraries - astring - http_lib - ipaddr - polly - tgroup - threads.posix - tracing - tracing_propagator - uri - xapi-log - xapi-stdext-pervasives - xapi-stdext-threads - xapi-stdext-unix - ) -) + (name httpsvr) + (wrapped false) + (modes best) + (modules http_svr http_proxy server_io) + (libraries + astring + http_lib + ipaddr + polly + tgroup + threads.posix + tracing + tracing_propagator + uri + xapi-backtrace + xapi-log + xapi-stdext-pervasives + xapi-stdext-threads + xapi-stdext-unix)) (tests - (names http_test radix_tree_test) - (package http-lib) - (modes (best exe)) - (modules http_test radix_tree_test) - (libraries - alcotest - - fmt - http_lib - ) -) + (names http_test radix_tree_test) + (package http-lib) + (modes + (best exe)) + (modules http_test radix_tree_test) + (libraries alcotest fmt http_lib)) (executable - (modes exe) - (name test_client) - (modules test_client) - (libraries - - http_lib - safe-resources - stunnel - threads.posix - xapi-backtrace - xapi-log - xapi-stdext-pervasives - xapi-stdext-unix - ) -) + (modes exe) + (name test_client) + (modules test_client) + (libraries + http_lib + safe-resources + stunnel + threads.posix + xapi-backtrace + xapi-log + xapi-stdext-pervasives + xapi-stdext-unix)) (executable - (modes exe) - (name test_server) - (modules test_server) - (libraries - - http_lib - httpsvr - safe-resources - threads.posix - xapi-stdext-threads - xapi-stdext-unix - ) -) + (modes exe) + (name test_server) + (modules test_server) + (libraries + http_lib + httpsvr + safe-resources + threads.posix + xapi-stdext-threads + xapi-stdext-unix)) (cram - (package xapi) - (deps - test_client.exe - test_server.exe - ) -) + (package xapi) + (deps test_client.exe test_server.exe)) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index d84ba6ad627..8de5874ebc1 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -189,8 +189,9 @@ let response_request_header_fields_too_large s = response_error_html s "431" "Request Header Fields Too Large" [] body let response_internal_error ?req ?extra exc s = + Backtrace.is_important exc ; E.error "Responding with 500 Internal Error due to %s" (Printexc.to_string exc) ; - E.log_backtrace () ; + E.log_backtrace exc ; let version = Option.map get_return_version req in let extra = Option.fold ~none:"" @@ -497,9 +498,10 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = (Unix.error_message a) b c ) | exc -> + Backtrace.is_important exc ; response_internal_error exc fd ~extra:(escape (Printexc.to_string exc)) ; - log_backtrace () + log_backtrace exc ) ; (None, None) diff --git a/ocaml/libs/http-lib/server_io.ml b/ocaml/libs/http-lib/server_io.ml index c821a27c024..61cce796afc 100644 --- a/ocaml/libs/http-lib/server_io.ml +++ b/ocaml/libs/http-lib/server_io.ml @@ -53,12 +53,13 @@ let establish_server ?(signal_fds = []) forker handler sock = let s, caller = Unix.accept ~cloexec:true sock in try ignore (forker handler s caller) with exc -> + Backtrace.is_important exc ; (* NB provided 'forker' is configured to make a background thread then the only way we can get here is if Thread.create fails. This means we haven't executed any code which could close the fd therefore we should do it ourselves. *) debug "Got exception in server_io.ml: %s" (Printexc.to_string exc) ; - log_backtrace () ; + log_backtrace exc ; Unix.close s ; Thread.delay 30.0 ) diff --git a/ocaml/libs/log/debug.ml b/ocaml/libs/log/debug.ml index 2f73cd47aca..db88c60a7e1 100644 --- a/ocaml/libs/log/debug.ml +++ b/ocaml/libs/log/debug.ml @@ -307,7 +307,7 @@ module type DEBUG = sig val audit : ?raw:bool -> ('a, unit, string, string) format4 -> 'a - val log_backtrace : unit -> unit + val log_backtrace : exn -> unit val log_and_ignore_exn : (unit -> unit) -> unit end @@ -344,9 +344,10 @@ functor ) fmt - let log_backtrace () = - let backtrace = Printexc.get_backtrace () in - debug "%s" (String.escaped backtrace) + let log_backtrace exn = + let level = Syslog.Debug in + if not (is_disabled Brand.name level) then + log_backtrace_internal ~level exn () let log_and_ignore_exn f = try f () diff --git a/ocaml/libs/log/debug.mli b/ocaml/libs/log/debug.mli index 4ba72886ce6..e552d56cc4a 100644 --- a/ocaml/libs/log/debug.mli +++ b/ocaml/libs/log/debug.mli @@ -76,7 +76,15 @@ module type DEBUG = sig val audit : ?raw:bool -> ('a, unit, string, string) format4 -> 'a (** Audit function *) - val log_backtrace : unit -> unit + val log_backtrace : exn -> unit + (** [log_backtrace exn] logs the backtrace associated with [exn]. + Either this or {!Backtrace.is_important} must be the first statement in an exception handler, + otherwise the backtrace may be overwritten (e.g. by formatting functions that internally raise and catch exceptions). + + This has to be used instead of getting a new backtrace from Printexc if [Backtrace.is_important] was ever called, + because that function stashes away the backtrace and then overwrites the current backtrace (to avoid duplicate frames in the stacktrace, + when Backtrace.get is used). + *) val log_and_ignore_exn : (unit -> unit) -> unit end diff --git a/ocaml/libs/log/test/log_test.ml b/ocaml/libs/log/test/log_test.ml index 2d09c5f0457..53d5cf0ddeb 100644 --- a/ocaml/libs/log/test/log_test.ml +++ b/ocaml/libs/log/test/log_test.ml @@ -13,5 +13,5 @@ let () = |> Debug.with_thread_associated "main" @@ fun () -> try Xapi_stdext_threads.Threadext.Mutex.execute m buggy with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "Got exception: %s" (Printexc.to_string e) diff --git a/ocaml/libs/log/test/log_test.t b/ocaml/libs/log/test/log_test.t index 80d6abc0249..2d7b5fa1414 100644 --- a/ocaml/libs/log/test/log_test.t +++ b/ocaml/libs/log/test/log_test.t @@ -1,4 +1,9 @@ $ ./log_test.exe | sed -re 's/[0-9]+T[0-9:.]+Z//' - [|debug||0 |main|log_test.ml] Raised at Xapi_stdext_pervasives__Pervasiveext.finally in file \"ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml\", line 39, characters 6-15\nCalled from Dune__exe__Log_test.(fun) in file \"ocaml/libs/log/test/log_test.ml\", line 14, characters 9-60\n + [|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") + [|error||0 |main|backtrace] 1/4 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 + [|error||0 |main|backtrace] 2/4 log_test.exe Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 24 + [|error||0 |main|backtrace] 3/4 log_test.exe Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 39 + [|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 14 + [|error||0 |main|backtrace] [| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 1de1ac2a0a9..c67e6f4b3ab 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -4052,7 +4052,7 @@ let rio_help printer minimal cmd = in printer (Cli_printer.PTable [recs]) | None -> - D.log_backtrace () ; + D.log_backtrace Not_found ; error "Responding with Unknown command %s" cmd ; printer (Cli_printer.PList ["Unknown command '" ^ cmd ^ "'"]) in diff --git a/ocaml/xapi-guard/lib/server_interface.ml b/ocaml/xapi-guard/lib/server_interface.ml index 8a64a576897..6e81ce77f2d 100644 --- a/ocaml/xapi-guard/lib/server_interface.ml +++ b/ocaml/xapi-guard/lib/server_interface.ml @@ -58,7 +58,7 @@ let with_xapi ~cache ?(timeout = 120.) f = let serve_forever_lwt path callback = let conn_closed _ = () in let on_exn e = - log_backtrace () ; + log_backtrace e ; warn "Exception: %s" (Printexc.to_string e) in let stop, do_stop = Lwt.task () in diff --git a/ocaml/xapi-guard/src/main.ml b/ocaml/xapi-guard/src/main.ml index 67e0b7f1d0b..c7bbe3a1157 100644 --- a/ocaml/xapi-guard/src/main.ml +++ b/ocaml/xapi-guard/src/main.ml @@ -299,7 +299,7 @@ let make_message_switch_server () = (* best effort resume *) let* () = Lwt.catch (resume ~vtpm_read_write ~uefi_read_write) (fun e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "Resume failed: %s" (Printexc.to_string e) ; Lwt.return_unit ) @@ -321,7 +321,7 @@ let main log_level = let old_hook = !Lwt.async_exception_hook in (Lwt.async_exception_hook := fun exn -> - D.log_backtrace () ; + D.log_backtrace exn ; D.error "Lwt caught async exception: %s" (Printexc.to_string exn) ; old_hook exn ) ; diff --git a/ocaml/xapi-guard/test/xapi_guard_test.ml b/ocaml/xapi-guard/test/xapi_guard_test.ml index 280d9f4d627..475a96d3a27 100644 --- a/ocaml/xapi-guard/test/xapi_guard_test.ml +++ b/ocaml/xapi-guard/test/xapi_guard_test.ml @@ -68,7 +68,7 @@ let () = let old_hook = !Lwt.async_exception_hook in Lwt.async_exception_hook := fun exn -> - D.log_backtrace () ; + D.log_backtrace exn ; D.error "Lwt caught async exception: %s" (Printexc.to_string exn) ; old_hook exn diff --git a/ocaml/xapi-idl/gpumon/gpumon_interface.ml b/ocaml/xapi-idl/gpumon/gpumon_interface.ml index 465acf91f6d..cd873ada755 100644 --- a/ocaml/xapi-idl/gpumon/gpumon_interface.ml +++ b/ocaml/xapi-idl/gpumon/gpumon_interface.ml @@ -87,8 +87,8 @@ let gpu_err = def= gpu_errors ; raiser= (fun e -> - log_backtrace () ; let exn = Gpumon_error e in + log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi-idl/memory/memory_interface.ml b/ocaml/xapi-idl/memory/memory_interface.ml index 7f31f13eeb0..02d3a962d22 100644 --- a/ocaml/xapi-idl/memory/memory_interface.ml +++ b/ocaml/xapi-idl/memory/memory_interface.ml @@ -87,8 +87,8 @@ let err = def= errors ; raiser= (fun e -> - log_backtrace () ; let exn = MemoryError e in + log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi-idl/network/network_interface.ml b/ocaml/xapi-idl/network/network_interface.ml index 6b27e31f5bf..f7dde90003a 100644 --- a/ocaml/xapi-idl/network/network_interface.ml +++ b/ocaml/xapi-idl/network/network_interface.ml @@ -294,8 +294,8 @@ let err = def= errors ; raiser= (fun e -> - log_backtrace () ; let exn = Network_error e in + log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi-idl/v6/v6_interface.ml b/ocaml/xapi-idl/v6/v6_interface.ml index 74b8201dbc7..46935590a32 100644 --- a/ocaml/xapi-idl/v6/v6_interface.ml +++ b/ocaml/xapi-idl/v6/v6_interface.ml @@ -111,8 +111,8 @@ let err = def= errors ; raiser= (fun e -> - log_backtrace () ; let exn = V6_error e in + log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi/monitor_master.ml b/ocaml/xapi/monitor_master.ml index b506d030205..18a2c9edf7e 100644 --- a/ocaml/xapi/monitor_master.ml +++ b/ocaml/xapi/monitor_master.ml @@ -173,7 +173,7 @@ let update_pifs ~__context host pifs = |> List.concat_map vifs_on_local_bridge |> List.iter set_carrier with e -> - log_backtrace () ; + log_backtrace e ; error "Failed to update VIF carrier flags for PIF: %s" (ExnHelper.string_of_exn e) ) ; diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index b452aaa8221..488ccaf0c31 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -527,7 +527,7 @@ module Monitor = struct Xapi_ha_vm_failover.restart_auto_run_vms ~__context liveset_refs to_tolerate with e -> - log_backtrace () ; + log_backtrace e ; error "Caught unexpected exception when executing restart plan: \ %s" @@ -832,7 +832,7 @@ module Monitor = struct ) ) with e -> - log_backtrace () ; + log_backtrace e ; debug "Exception in HA monitor thread: %s" (ExnHelper.string_of_exn e) ; Thread.delay !Xapi_globs.ha_monitor_interval diff --git a/ocaml/xapi/xapi_observer_components.ml b/ocaml/xapi/xapi_observer_components.ml index e7803189151..18dd8bdd25d 100644 --- a/ocaml/xapi/xapi_observer_components.ml +++ b/ocaml/xapi/xapi_observer_components.ml @@ -91,13 +91,13 @@ let is_component_enabled ~component = ) observers with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "is_component_enabled(%s) inner got exception: %s" (to_string component) (Printexc.to_string e) ; false ) with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "is_component_enabled(%s) got exception: %s" (to_string component) (Printexc.to_string e) ; false diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 0d5ac4d4201..6fa7d58aefe 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -299,7 +299,7 @@ let update_rrds uuid_domids paused_vms plugins_dss = reset_missing_data sr_rrdi.rrd missing_updates ; Hashtbl.replace sr_rrds sr_uuid sr_rrdi - with _ -> log_backtrace () + with e -> log_backtrace e in let process_host plugins_dss available_dss = let host_rrdi = !host_rrd in diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 3a883a56986..a813ec036c9 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -300,7 +300,7 @@ let migrate_rrd (session_id : string option) (remote_address : string) Some x | None -> debug "VM %s RRDs not found on migrate! Continuing anyway..." vm_uuid ; - log_backtrace () ; + log_backtrace Not_found ; None ) |> Option.iter (fun rrdi -> @@ -696,9 +696,10 @@ module Plugin = struct incr_skip_count uid plugin ; (* increase skip count *) let log e = + Backtrace.is_important e ; info "Failed to process plugin metrics file: %s (%s)" (P.string_of_uid ~uid) (Printexc.to_string e) ; - log_backtrace () + log_backtrace e in let open Rrd_protocol in match e with diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 883f9844cb5..8800ed56836 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -140,7 +140,7 @@ let send_rrd ?(session_id : string option) let open Xmlrpc_client in with_transport transport (with_http request (fun (_response, fd) -> - try Rrd_unix.to_fd ~internal:true rrd fd with _ -> log_backtrace () + try Rrd_unix.to_fd ~internal:true rrd fd with e -> log_backtrace e ) ) ; debug "Sending RRD complete." @@ -171,7 +171,7 @@ let archive_rrd_internal ?(transport = None) ~uuid ~rrd () = Xapi_stdext_unix.Unixext.unlink_safe base_filename ) else debug "No local storage: not persisting RRDs" - with _ -> log_backtrace () + with e -> log_backtrace e ) | Some transport -> (* Stream it to the master to store, or maybe to a host in the migrate diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml index 09ffd401a09..c0404e17b1b 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml @@ -238,5 +238,6 @@ let print_snapshot () = print_stats () ) with e -> + Backtrace.is_important e ; debug "Caught: %s" (Printexc.to_string e) ; - log_backtrace () + log_backtrace e diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 6321cb85374..448dc98f9cb 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -543,11 +543,12 @@ let monitor_write_loop writers = ) ; Thread.delay !Rrdd_shared.timeslice with e -> + Backtrace.is_important e ; warn "Monitor/write thread caught an exception. Pausing for 10s, \ then restarting: %s" (Printexc.to_string e) ; - log_backtrace () ; + log_backtrace e ; Thread.delay 10. done ) diff --git a/ocaml/xenopsd/xc/mem_stats.ml b/ocaml/xenopsd/xc/mem_stats.ml index 8daca47aff6..12353e56c1c 100644 --- a/ocaml/xenopsd/xc/mem_stats.ml +++ b/ocaml/xenopsd/xc/mem_stats.ml @@ -325,7 +325,7 @@ let generate_stats_exn () = let generate_stats () = try generate_stats_exn () with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.debug "Failed to generate stats: %s" (Printexc.to_string e) ; [] From d5c00dba6c10a247fa17005bdcdacc3f3f01b6fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 17 Apr 2025 16:32:14 +0100 Subject: [PATCH 130/492] CA-409628: remove leftover log_backtrace from find->find_opt conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These shouldn't be necessary anymore, since no exception is raised. Signed-off-by: Edwin Török --- ocaml/xapi-cli-server/cli_frontend.ml | 1 - ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 1 - 2 files changed, 2 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index c67e6f4b3ab..389b880a268 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -4052,7 +4052,6 @@ let rio_help printer minimal cmd = in printer (Cli_printer.PTable [recs]) | None -> - D.log_backtrace Not_found ; error "Responding with Unknown command %s" cmd ; printer (Cli_printer.PList ["Unknown command '" ^ cmd ^ "'"]) in diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index a813ec036c9..546568d5cdf 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -300,7 +300,6 @@ let migrate_rrd (session_id : string option) (remote_address : string) Some x | None -> debug "VM %s RRDs not found on migrate! Continuing anyway..." vm_uuid ; - log_backtrace Not_found ; None ) |> Option.iter (fun rrdi -> From 639a6344be2ba418b6b50adb22ae4c46e01302a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 17 Apr 2025 16:34:17 +0100 Subject: [PATCH 131/492] CA-409628: remove unnecessary log_backtrace from API boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should only log_backtrace if we are the final handler. The exception is raised here, so the caller will have a chance to log it. This was also inconsistent: some *_interface logged the backtrace, and others didn't. In theory there is a chance that the caller is buggy and doesn't log the correct backtrace. But if we simplify the places that call the Backtrace module, we'll have fewer chances of that going wrong. Signed-off-by: Edwin Török --- ocaml/xapi-idl/gpumon/gpumon_interface.ml | 1 - ocaml/xapi-idl/memory/memory_interface.ml | 1 - ocaml/xapi-idl/network/network_interface.ml | 1 - ocaml/xapi-idl/v6/v6_interface.ml | 1 - 4 files changed, 4 deletions(-) diff --git a/ocaml/xapi-idl/gpumon/gpumon_interface.ml b/ocaml/xapi-idl/gpumon/gpumon_interface.ml index cd873ada755..ab02b38260c 100644 --- a/ocaml/xapi-idl/gpumon/gpumon_interface.ml +++ b/ocaml/xapi-idl/gpumon/gpumon_interface.ml @@ -88,7 +88,6 @@ let gpu_err = ; raiser= (fun e -> let exn = Gpumon_error e in - log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi-idl/memory/memory_interface.ml b/ocaml/xapi-idl/memory/memory_interface.ml index 02d3a962d22..f8e39496ef5 100644 --- a/ocaml/xapi-idl/memory/memory_interface.ml +++ b/ocaml/xapi-idl/memory/memory_interface.ml @@ -88,7 +88,6 @@ let err = ; raiser= (fun e -> let exn = MemoryError e in - log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi-idl/network/network_interface.ml b/ocaml/xapi-idl/network/network_interface.ml index f7dde90003a..2f3368fc131 100644 --- a/ocaml/xapi-idl/network/network_interface.ml +++ b/ocaml/xapi-idl/network/network_interface.ml @@ -295,7 +295,6 @@ let err = ; raiser= (fun e -> let exn = Network_error e in - log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) diff --git a/ocaml/xapi-idl/v6/v6_interface.ml b/ocaml/xapi-idl/v6/v6_interface.ml index 46935590a32..ba42aa259ec 100644 --- a/ocaml/xapi-idl/v6/v6_interface.ml +++ b/ocaml/xapi-idl/v6/v6_interface.ml @@ -112,7 +112,6 @@ let err = ; raiser= (fun e -> let exn = V6_error e in - log_backtrace exn ; error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn ) From eb5604b0dc0b0929bfe32df19b054e6adb6ea8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 17 Apr 2025 16:39:56 +0100 Subject: [PATCH 132/492] CA-409628: remove duplicate exception backtrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit response_internal_error already calls Backtrace.is_important in the correct place, and logs the exception. There is no need to do that a 2nd time in the caller. Signed-off-by: Edwin Török --- ocaml/libs/http-lib/http_svr.ml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 8de5874ebc1..5327e9469cc 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -498,10 +498,8 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = (Unix.error_message a) b c ) | exc -> - Backtrace.is_important exc ; response_internal_error exc fd - ~extra:(escape (Printexc.to_string exc)) ; - log_backtrace exc + ~extra:(escape (Printexc.to_string exc)) ) ; (None, None) From 8a2dba33dbc50f43e5424a63409c03e4c364c50f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 17 Apr 2025 16:44:00 +0100 Subject: [PATCH 133/492] CA-409628: add missing Backtrace.is_important MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This shows how brittle the current Backtrace API is, this was missing from a lot of places. We have some better alternatives (`with_backtraces`, or a `try_with` function) that'd guarantee that `important` is always called in the right place, but that would be a more invasive change, which will be done in a followup commit. Signed-off-by: Edwin Török --- ocaml/gencert/selfcert.ml | 1 + ocaml/libs/http-lib/http_svr.ml | 1 + ocaml/libs/log/debug.ml | 4 ++++ ocaml/xapi/cancel_tasks.ml | 1 + ocaml/xapi/storage_access.ml | 1 + ocaml/xapi/xapi_extensions.ml | 1 + ocaml/xapi/xapi_pool.ml | 1 + ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 2 +- 8 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ocaml/gencert/selfcert.ml b/ocaml/gencert/selfcert.ml index 3b022bcb19f..3d840d34c2a 100644 --- a/ocaml/gencert/selfcert.ml +++ b/ocaml/gencert/selfcert.ml @@ -109,6 +109,7 @@ let generate_pub_priv_key length = let stdout, _stderr = call_openssl args in Ok stdout with e -> + Backtrace.is_important e ; let msg = "generating RSA key failed" in D.error "selfcert.ml: %s" msg ; Debug.log_backtrace e (Backtrace.get e) ; diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 5327e9469cc..4db3df81d2a 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -474,6 +474,7 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = in (Some r, proxy) with e -> + Backtrace.is_important e ; D.warn "%s (%s)" (Printexc.to_string e) __LOC__ ; best_effort (fun () -> match e with diff --git a/ocaml/libs/log/debug.ml b/ocaml/libs/log/debug.ml index db88c60a7e1..ac0478e281b 100644 --- a/ocaml/libs/log/debug.ml +++ b/ocaml/libs/log/debug.ml @@ -258,6 +258,10 @@ let with_thread_associated ?client ?(quiet = false) desc f x = (* This function is a top-level exception handler typically used on fresh threads. This is the last chance to do something with the backtrace *) if not quiet then ( + (* It would seem that a Backtrace.is_important would be missing here. + But in fact it has actually been called in [let result] above, + so calling it again is not necessary. + *) output_log "backtrace" Syslog.Err "error" (Printf.sprintf "%s failed with exception %s" desc (Printexc.to_string exn) diff --git a/ocaml/xapi/cancel_tasks.ml b/ocaml/xapi/cancel_tasks.ml index 690cd1026b1..3ec7594f378 100644 --- a/ocaml/xapi/cancel_tasks.ml +++ b/ocaml/xapi/cancel_tasks.ml @@ -21,6 +21,7 @@ open D let safe_wrapper n f x = try f x with e -> + Backtrace.is_important e ; debug "Caught exception while cancelling tasks (%s): %s" n (ExnHelper.string_of_exn e) ; Debug.log_backtrace e (Backtrace.get e) diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index 9de67a007d5..6f2b540dac4 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -162,6 +162,7 @@ let on_xapi_start ~__context = | Message_switch_failure -> [] (* no more logging *) | e -> + Backtrace.is_important e ; error "Unexpected error querying the message switch: %s" (Printexc.to_string e) ; Debug.log_backtrace e (Backtrace.get e) ; diff --git a/ocaml/xapi/xapi_extensions.ml b/ocaml/xapi/xapi_extensions.ml index 301a0a5e686..e19dc88933d 100644 --- a/ocaml/xapi/xapi_extensions.ml +++ b/ocaml/xapi/xapi_extensions.ml @@ -79,6 +79,7 @@ let call_extension rpc = | Api_errors.Server_error (code, params) -> API.response_of_failure code params | e -> + Backtrace.is_important e ; error "Unexpected exception calling extension %s: %s" rpc.Rpc.name (Printexc.to_string e) ; Debug.log_backtrace e (Backtrace.get e) ; diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 68c41f91a42..b2d6da1122f 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1358,6 +1358,7 @@ let create_or_get_secret_on_master __context rpc session_id (_secret_ref, secret let protect_exn f x = try Some (f x) with e -> + Backtrace.is_important e ; debug "Ignoring exception: %s" (Printexc.to_string e) ; Debug.log_backtrace e (Backtrace.get e) ; None diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 546568d5cdf..6e11a2da31c 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -692,10 +692,10 @@ module Plugin = struct (* reset skip counts *) payload with e -> ( + Backtrace.is_important e ; incr_skip_count uid plugin ; (* increase skip count *) let log e = - Backtrace.is_important e ; info "Failed to process plugin metrics file: %s (%s)" (P.string_of_uid ~uid) (Printexc.to_string e) ; log_backtrace e From 571f36ae5dd495c9232b0be24f0aabe4010e1419 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Apr 2025 14:03:31 +0100 Subject: [PATCH 134/492] Update cluster-stack-version lifecycle This was not set properly when it was first introduced in xapi-24.15.0 Signed-off-by: Vincent Liu --- ocaml/idl/datamodel_cluster.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_cluster.ml b/ocaml/idl/datamodel_cluster.ml index dba9b76c73b..eefdbf7c6bf 100644 --- a/ocaml/idl/datamodel_cluster.ml +++ b/ocaml/idl/datamodel_cluster.ml @@ -204,7 +204,7 @@ let t = (Some (VString Constants.default_smapiv3_cluster_stack)) "Simply the string 'corosync'. No other cluster stacks are \ currently supported" - ; field ~qualifier:StaticRO ~lifecycle ~ty:Int "cluster_stack_version" + ; field ~qualifier:StaticRO ~lifecycle:[] ~ty:Int "cluster_stack_version" ~default_value:(Some (VInt 2L)) "Version of cluster stack, not writable via the API. Defaulting to \ 2 for backwards compatibility when upgrading from a cluster \ diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 24543829da6..4ec63bb4cfb 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -83,6 +83,8 @@ let prototyped_of_field = function Some "24.3.0" | "Cluster", "is_quorate" -> Some "24.3.0" + | "Cluster", "cluster_stack_version" -> + Some "24.15.0" | "VTPM", "contents" -> Some "22.26.0" | "VTPM", "is_protected" -> From b18a1c911b181e5229efa27d49a9eb681c5f7bba Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Apr 2025 14:04:47 +0100 Subject: [PATCH 135/492] Update datamodel lifecycle Signed-off-by: Vincent Liu --- ocaml/idl/datamodel_lifecycle.ml | 2 +- ocaml/idl/schematest.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 4ec63bb4cfb..90d5db8de7e 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -126,7 +126,7 @@ let prototyped_of_field = function | "VM", "actions__after_softreboot" -> Some "23.1.0" | "pool", "ha_reboot_vm_on_internal_shutdown" -> - Some "25.15.0-next" + Some "25.16.0" | "pool", "license_server" -> Some "25.6.0" | "pool", "recommendations" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 36b99cd3c62..a9f7dbfc776 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "e10b420b0863116ee188eea9e63b1349" +let last_known_schema_hash = "0cf3458af211661024fca9c1d0ab34ab" let current_schema_hash : string = let open Datamodel_types in From 46a66c36edc8d711f93f8a6591f020f3a6ae190c Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 19 Mar 2025 15:11:25 +0000 Subject: [PATCH 136/492] CP-54034: Expose `expected_votes` in Cluster object The `expected_votes` field in corosync represents the number of hosts that is expected by the cluster stack. In the context of corosync, this is the same as the number of hosts as in the corosync.conf file*. This is a useful field to expose to the user so that they can see how many nodes actually are expected. We also have `Cluster_host` object, which represents xapi's view of what nodes should be in the cluster, but that might not be identical to corosync's view, especially when a host is disabled, but is still left in the list of Cluster_host objects. Although one could argue that we could infer this `expected_votes` field from the number of enabled Cluster_hosts, it might still be useful to get this information directly from corosync. *: there are ways in corosync to make one host cast multiple votes, but that feature is not used. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel_cluster.ml | 3 +++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 2 ++ ocaml/idl/schematest.ml | 2 +- ocaml/tests/common/test_common.ml | 5 +++-- ocaml/tests/test_cluster.ml | 1 + ocaml/tests/test_cluster_host.ml | 2 +- ocaml/xapi-cli-server/records.ml | 3 +++ ocaml/xapi-idl/cluster/cluster_interface.ml | 4 ++++ ocaml/xapi/xapi_cluster.ml | 2 +- ocaml/xapi/xapi_clustering.ml | 2 ++ 11 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ocaml/idl/datamodel_cluster.ml b/ocaml/idl/datamodel_cluster.ml index eefdbf7c6bf..c3556fee726 100644 --- a/ocaml/idl/datamodel_cluster.ml +++ b/ocaml/idl/datamodel_cluster.ml @@ -219,6 +219,9 @@ let t = ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int "live_hosts" ~default_value:(Some (VInt 0L)) "Current number of live hosts, according to the cluster stack" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int "expected_hosts" + ~default_value:(Some (VInt 0L)) + "Total number of hosts expected by the cluster stack" ] @ allowed_and_current_operations cluster_operation @ [ diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 8a87d7eb524..819b7c61141 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 788 +let schema_minor_vsn = 789 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 90d5db8de7e..b8a5a528a54 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -77,6 +77,8 @@ let prototyped_of_field = function Some "24.3.0" | "Cluster_host", "live" -> Some "24.3.0" + | "Cluster", "expected_hosts" -> + Some "25.16.0-next" | "Cluster", "live_hosts" -> Some "24.3.0" | "Cluster", "quorum" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index a9f7dbfc776..06feb367452 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "0cf3458af211661024fca9c1d0ab34ab" +let last_known_schema_hash = "2f80cd8fbfd0eedab4dfe345565bcb64" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index cba53ad17c4..3ff9bea2380 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -626,12 +626,13 @@ let make_cluster_and_cluster_host ~__context ?(ref = Ref.make ()) ?(token_timeout = Constants.default_token_timeout_s) ?(token_timeout_coefficient = Constants.default_token_timeout_coefficient_s) ?(cluster_config = []) ?(other_config = []) ?(host = Ref.null) - ?(is_quorate = false) ?(quorum = 0L) ?(live_hosts = 0L) () = + ?(is_quorate = false) ?(quorum = 0L) ?(live_hosts = 0L) + ?(expected_hosts = 0L) () = Db.Cluster.create ~__context ~ref ~uuid ~cluster_token ~pending_forget:[] ~cluster_stack ~cluster_stack_version ~allowed_operations ~current_operations ~pool_auto_join ~token_timeout ~token_timeout_coefficient ~cluster_config ~other_config ~is_quorate ~quorum - ~live_hosts ; + ~live_hosts ~expected_hosts ; let cluster_host_ref = make_cluster_host ~__context ~cluster:ref ~host ~pIF () in diff --git a/ocaml/tests/test_cluster.ml b/ocaml/tests/test_cluster.ml index b42621a300f..d24e36fe72c 100644 --- a/ocaml/tests/test_cluster.ml +++ b/ocaml/tests/test_cluster.ml @@ -69,6 +69,7 @@ let test_clusterd_rpc ~__context call = ; num_times_booted= 1 ; is_quorate= true ; total_votes= 1 + ; expected_votes= 1 ; quorum= 1 ; quorum_members= Some [me] ; is_running= true diff --git a/ocaml/tests/test_cluster_host.ml b/ocaml/tests/test_cluster_host.ml index 0887f72ade5..17673d6c0fd 100644 --- a/ocaml/tests/test_cluster_host.ml +++ b/ocaml/tests/test_cluster_host.ml @@ -24,7 +24,7 @@ let create_cluster ~__context pool_auto_join = ~token_timeout_coefficient:Constants.default_token_timeout_coefficient_s ~allowed_operations:[] ~current_operations:[] ~pool_auto_join ~cluster_config:[] ~other_config:[] ~pending_forget:[] ~is_quorate:false - ~quorum:0L ~live_hosts:0L ; + ~quorum:0L ~live_hosts:0L ~expected_hosts:0L ; cluster_ref let check_cluster_option = diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index 9b5cd3180df..da839d1e3f4 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -5132,6 +5132,9 @@ let cluster_record rpc session_id cluster = ; make_field ~name:"live-hosts" ~get:(fun () -> Int64.to_string (x ()).API.cluster_live_hosts) () + ; make_field ~name:"expected-hosts" + ~get:(fun () -> Int64.to_string (x ()).API.cluster_expected_hosts) + () ] } diff --git a/ocaml/xapi-idl/cluster/cluster_interface.ml b/ocaml/xapi-idl/cluster/cluster_interface.ml index e83e69363f2..a39fc0a2ae9 100644 --- a/ocaml/xapi-idl/cluster/cluster_interface.ml +++ b/ocaml/xapi-idl/cluster/cluster_interface.ml @@ -159,6 +159,9 @@ type optional_path = string option [@@deriving rpcty] type quorum_info = { is_quorate: bool ; total_votes: int + (* number of nodes that the cluster stack thinks are currently in the cluster *) + ; expected_votes: int + (* number of nodes that the cluster stack is expecting to be in the cluster *) ; quorum: int (** number of nodes required to form a quorum *) ; quorum_members: all_members option } @@ -179,6 +182,7 @@ type diagnostics = { ; is_quorate: bool ; is_running: bool ; total_votes: int + ; expected_votes: int ; quorum: int ; quorum_members: all_members option ; startup_finished: bool diff --git a/ocaml/xapi/xapi_cluster.ml b/ocaml/xapi/xapi_cluster.ml index 1968e5f0774..33f9573b1e1 100644 --- a/ocaml/xapi/xapi_cluster.ml +++ b/ocaml/xapi/xapi_cluster.ml @@ -96,7 +96,7 @@ let create ~__context ~pIF ~cluster_stack ~pool_auto_join ~token_timeout ~pending_forget:[] ~pool_auto_join ~token_timeout ~token_timeout_coefficient ~current_operations:[] ~allowed_operations:[] ~cluster_config:[] ~other_config:[] - ~is_quorate:false ~quorum:0L ~live_hosts:0L ; + ~is_quorate:false ~quorum:0L ~live_hosts:0L ~expected_hosts:0L ; Db.Cluster_host.create ~__context ~ref:cluster_host_ref ~uuid:cluster_host_uuid ~cluster:cluster_ref ~host ~enabled:true ~pIF ~current_operations:[] ~allowed_operations:[] ~other_config:[] diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index efaac876d69..c17b5eaf394 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -516,6 +516,8 @@ module Watcher = struct Db.Cluster.set_quorum ~__context ~self:cluster ~value:(Int64.of_int diag.quorum) ; Db.Cluster.set_live_hosts ~__context ~self:cluster + ~value:(Int64.of_int diag.total_votes) ; + Db.Cluster.set_expected_hosts ~__context ~self:cluster ~value:(Int64.of_int diag.total_votes) | Error (InternalError message) | Error (Unix_error message) -> warn "%s Cannot query diagnostics due to %s, not performing update" From f0d21aec01e5b3baf872478875c7c82252b60046 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 17 Apr 2025 11:03:12 +0100 Subject: [PATCH 137/492] Move update_snapshot_info_dest to storage_mux This function updates the snapshot related db fields after the storage migration. There is no need to leave this in the storage layer as xapi-storage-script will not be able to access xapi db. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_mux.ml | 91 ++++++++++++++++++++++++++++++++--- ocaml/xapi/storage_smapiv1.ml | 69 ++------------------------ 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 6614177e3e3..f47a3982625 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -330,11 +330,54 @@ module Mux = struct Storage_migrate.update_snapshot_info_src ~dbg:(Debug_info.to_string di) ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs + exception No_VDI + + (* Find a VDI given a storage-layer SR and VDI *) + let find_vdi ~__context sr vdi = + let sr = s_of_sr sr in + let vdi = s_of_vdi vdi in + let open Xapi_database.Db_filter_types in + let sr = Db.SR.get_by_uuid ~__context ~uuid:sr in + match + Db.VDI.get_records_where ~__context + ~expr: + (And + ( Eq (Field "location", Literal vdi) + , Eq (Field "SR", Literal (Ref.string_of sr)) + ) + ) + with + | x :: _ -> + x + | _ -> + raise No_VDI + + let set_is_a_snapshot _context ~dbg ~sr ~vdi ~is_a_snapshot = + Server_helpers.exec_with_new_task "VDI.set_is_a_snapshot" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let vdi, _ = find_vdi ~__context sr vdi in + Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:is_a_snapshot + ) + + let set_snapshot_time _context ~dbg ~sr ~vdi ~snapshot_time = + let module Date = Clock.Date in + Server_helpers.exec_with_new_task "VDI.set_snapshot_time" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let vdi, _ = find_vdi ~__context sr vdi in + let snapshot_time = Date.of_iso8601 snapshot_time in + Db.VDI.set_snapshot_time ~__context ~self:vdi ~value:snapshot_time + ) + + let set_snapshot_of _context ~dbg ~sr ~vdi ~snapshot_of = + Server_helpers.exec_with_new_task "VDI.set_snapshot_of" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let vdi, _ = find_vdi ~__context sr vdi in + let snapshot_of, _ = find_vdi ~__context sr snapshot_of in + Db.VDI.set_snapshot_of ~__context ~self:vdi ~value:snapshot_of + ) + let update_snapshot_info_dest () ~dbg ~sr ~vdi ~src_vdi ~snapshot_pairs = - with_dbg ~name:"SR.update_snapshot_info_dest" ~dbg @@ fun di -> - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in + with_dbg ~name:"SR.update_snapshot_info_dest" ~dbg @@ fun _di -> info "SR.update_snapshot_info_dest dbg:%s sr:%s vdi:%s ~src_vdi:%s \ snapshot_pairs:%s" @@ -348,8 +391,44 @@ module Mux = struct |> String.concat "; " |> Printf.sprintf "[%s]" ) ; - C.SR.update_snapshot_info_dest (Debug_info.to_string di) sr vdi src_vdi - snapshot_pairs + Server_helpers.exec_with_new_task "SR.update_snapshot_info_dest" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let local_vdis = scan () ~dbg ~sr in + let find_sm_vdi ~vdi ~vdi_info_list = + try List.find (fun x -> x.vdi = vdi) vdi_info_list + with Not_found -> + raise (Storage_error (Vdi_does_not_exist (s_of_vdi vdi))) + in + let assert_content_ids_match ~vdi_info1 ~vdi_info2 = + if vdi_info1.content_id <> vdi_info2.content_id then + raise + (Storage_error + (Content_ids_do_not_match + (s_of_vdi vdi_info1.vdi, s_of_vdi vdi_info2.vdi) + ) + ) + in + (* For each (local snapshot vdi, source snapshot vdi) pair: + * - Check that the content_ids are the same + * - Copy snapshot_time from the source VDI to the local VDI + * - Set the local VDI's snapshot_of to vdi + * - Set is_a_snapshot = true for the local snapshot *) + List.iter + (fun (local_snapshot, src_snapshot_info) -> + let local_snapshot_info = + find_sm_vdi ~vdi:local_snapshot ~vdi_info_list:local_vdis + in + assert_content_ids_match ~vdi_info1:local_snapshot_info + ~vdi_info2:src_snapshot_info ; + set_snapshot_time __context ~dbg ~sr ~vdi:local_snapshot + ~snapshot_time:src_snapshot_info.snapshot_time ; + set_snapshot_of __context ~dbg ~sr ~vdi:local_snapshot + ~snapshot_of:vdi ; + set_is_a_snapshot __context ~dbg ~sr ~vdi:local_snapshot + ~is_a_snapshot:true + ) + snapshot_pairs + ) end module VDI = struct diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 1616d1a65f9..3f9003aad25 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -132,32 +132,6 @@ module SMAPIv1 : Server_impl = struct let vdi_rec = Db.VDI.get_record ~__context ~self in vdi_info_of_vdi_rec __context vdi_rec - (* For SMAPIv1, is_a_snapshot, snapshot_time and snapshot_of are stored in - * xapi's database. For SMAPIv2 they should be implemented by the storage - * backend. *) - let set_is_a_snapshot _context ~dbg ~sr ~vdi ~is_a_snapshot = - Server_helpers.exec_with_new_task "VDI.set_is_a_snapshot" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let vdi, _ = find_vdi ~__context sr vdi in - Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:is_a_snapshot - ) - - let set_snapshot_time _context ~dbg ~sr ~vdi ~snapshot_time = - Server_helpers.exec_with_new_task "VDI.set_snapshot_time" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let vdi, _ = find_vdi ~__context sr vdi in - let snapshot_time = Date.of_iso8601 snapshot_time in - Db.VDI.set_snapshot_time ~__context ~self:vdi ~value:snapshot_time - ) - - let set_snapshot_of _context ~dbg ~sr ~vdi ~snapshot_of = - Server_helpers.exec_with_new_task "VDI.set_snapshot_of" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let vdi, _ = find_vdi ~__context sr vdi in - let snapshot_of, _ = find_vdi ~__context sr snapshot_of in - Db.VDI.set_snapshot_of ~__context ~self:vdi ~value:snapshot_of - ) - module Query = struct let query _context ~dbg:_ = { @@ -433,46 +407,9 @@ module SMAPIv1 : Server_impl = struct ~dest_vdi:_ ~snapshot_pairs:_ = assert false - let update_snapshot_info_dest _context ~dbg ~sr ~vdi ~src_vdi:_ - ~snapshot_pairs = - Server_helpers.exec_with_new_task "SR.update_snapshot_info_dest" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let local_vdis = scan __context ~dbg ~sr in - let find_sm_vdi ~vdi ~vdi_info_list = - try List.find (fun x -> x.vdi = vdi) vdi_info_list - with Not_found -> - raise (Storage_error (Vdi_does_not_exist (s_of_vdi vdi))) - in - let assert_content_ids_match ~vdi_info1 ~vdi_info2 = - if vdi_info1.content_id <> vdi_info2.content_id then - raise - (Storage_error - (Content_ids_do_not_match - (s_of_vdi vdi_info1.vdi, s_of_vdi vdi_info2.vdi) - ) - ) - in - (* For each (local snapshot vdi, source snapshot vdi) pair: - * - Check that the content_ids are the same - * - Copy snapshot_time from the source VDI to the local VDI - * - Set the local VDI's snapshot_of to vdi - * - Set is_a_snapshot = true for the local snapshot *) - List.iter - (fun (local_snapshot, src_snapshot_info) -> - let local_snapshot_info = - find_sm_vdi ~vdi:local_snapshot ~vdi_info_list:local_vdis - in - assert_content_ids_match ~vdi_info1:local_snapshot_info - ~vdi_info2:src_snapshot_info ; - set_snapshot_time __context ~dbg ~sr ~vdi:local_snapshot - ~snapshot_time:src_snapshot_info.snapshot_time ; - set_snapshot_of __context ~dbg ~sr ~vdi:local_snapshot - ~snapshot_of:vdi ; - set_is_a_snapshot __context ~dbg ~sr ~vdi:local_snapshot - ~is_a_snapshot:true - ) - snapshot_pairs - ) + let update_snapshot_info_dest _context ~dbg:_ ~sr:_ ~vdi:_ ~src_vdi:_ + ~snapshot_pairs:_ = + assert false end module VDI = struct From 0a2135845ce29f44aa556e70dbd3a8c9af6f92b8 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 23 Apr 2025 15:34:25 +0100 Subject: [PATCH 138/492] Refactor Storage_smapiv1.find_vdi Move this to storage_utils.ml since it is used by storage_smapiv1.ml and storage_mux.ml Signed-off-by: Vincent Liu --- ocaml/xapi/storage_mux.ml | 23 +---------------------- ocaml/xapi/storage_smapiv1.ml | 23 +---------------------- ocaml/xapi/storage_smapiv1.mli | 3 --- ocaml/xapi/storage_utils.ml | 25 +++++++++++++++++++++++++ ocaml/xapi/storage_utils.mli | 9 +++++++++ ocaml/xapi/xapi_services.ml | 2 +- 6 files changed, 37 insertions(+), 48 deletions(-) diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index f47a3982625..768c9b7e872 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -19,6 +19,7 @@ module D = Debug.Make (struct let name = "mux" end) open D open Storage_interface open Storage_mux_reg +open Storage_utils let s_of_sr = Storage_interface.Sr.string_of @@ -330,28 +331,6 @@ module Mux = struct Storage_migrate.update_snapshot_info_src ~dbg:(Debug_info.to_string di) ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs - exception No_VDI - - (* Find a VDI given a storage-layer SR and VDI *) - let find_vdi ~__context sr vdi = - let sr = s_of_sr sr in - let vdi = s_of_vdi vdi in - let open Xapi_database.Db_filter_types in - let sr = Db.SR.get_by_uuid ~__context ~uuid:sr in - match - Db.VDI.get_records_where ~__context - ~expr: - (And - ( Eq (Field "location", Literal vdi) - , Eq (Field "SR", Literal (Ref.string_of sr)) - ) - ) - with - | x :: _ -> - x - | _ -> - raise No_VDI - let set_is_a_snapshot _context ~dbg ~sr ~vdi ~is_a_snapshot = Server_helpers.exec_with_new_task "VDI.set_is_a_snapshot" ~subtask_of:(Ref.of_string dbg) (fun __context -> diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 3f9003aad25..ab6f05f57d3 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -18,8 +18,7 @@ open D module Date = Clock.Date module XenAPI = Client.Client open Storage_interface - -exception No_VDI +open Storage_utils let s_of_vdi = Vdi.string_of @@ -30,26 +29,6 @@ let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute let with_dbg ~name ~dbg f = Debug_info.with_dbg ~module_name:"SMAPIv1" ~name ~dbg f -(* Find a VDI given a storage-layer SR and VDI *) -let find_vdi ~__context sr vdi = - let sr = s_of_sr sr in - let vdi = s_of_vdi vdi in - let open Xapi_database.Db_filter_types in - let sr = Db.SR.get_by_uuid ~__context ~uuid:sr in - match - Db.VDI.get_records_where ~__context - ~expr: - (And - ( Eq (Field "location", Literal vdi) - , Eq (Field "SR", Literal (Ref.string_of sr)) - ) - ) - with - | x :: _ -> - x - | _ -> - raise No_VDI - (* Find a VDI reference given a name *) let find_content ~__context ?sr name = (* PR-1255: the backend should do this for us *) diff --git a/ocaml/xapi/storage_smapiv1.mli b/ocaml/xapi/storage_smapiv1.mli index 69a0a22aa9f..f991e6f82c3 100644 --- a/ocaml/xapi/storage_smapiv1.mli +++ b/ocaml/xapi/storage_smapiv1.mli @@ -20,7 +20,4 @@ val vdi_read_write : (Sr.t * Vdi.t, bool) Hashtbl.t val vdi_info_of_vdi_rec : Context.t -> API.vDI_t -> Storage_interface.vdi_info -val find_vdi : __context:Context.t -> Sr.t -> Vdi.t -> [`VDI] Ref.t * API.vDI_t -(** Find a VDI given a storage-layer SR and VDI *) - module SMAPIv1 : Server_impl diff --git a/ocaml/xapi/storage_utils.ml b/ocaml/xapi/storage_utils.ml index dd7d6b6e63d..8c2398619ff 100644 --- a/ocaml/xapi/storage_utils.ml +++ b/ocaml/xapi/storage_utils.ml @@ -14,6 +14,10 @@ open Storage_interface +let s_of_sr = Storage_interface.Sr.string_of + +let s_of_vdi = Storage_interface.Vdi.string_of + let string_of_vdi_type vdi_type = Rpc.string_of_rpc (API.rpc_of_vdi_type vdi_type) @@ -173,3 +177,24 @@ let transform_storage_exn f = (Api_errors.Server_error (Api_errors.internal_error, [Printexc.to_string e]) ) + +exception No_VDI + +let find_vdi ~__context sr vdi = + let sr = s_of_sr sr in + let vdi = s_of_vdi vdi in + let open Xapi_database.Db_filter_types in + let sr = Db.SR.get_by_uuid ~__context ~uuid:sr in + match + Db.VDI.get_records_where ~__context + ~expr: + (And + ( Eq (Field "location", Literal vdi) + , Eq (Field "SR", Literal (Ref.string_of sr)) + ) + ) + with + | x :: _ -> + x + | _ -> + raise No_VDI diff --git a/ocaml/xapi/storage_utils.mli b/ocaml/xapi/storage_utils.mli index 50e3a80e7f8..d0a98704c8b 100644 --- a/ocaml/xapi/storage_utils.mli +++ b/ocaml/xapi/storage_utils.mli @@ -64,3 +64,12 @@ val rpc : val transform_storage_exn : (unit -> 'a) -> 'a (** [transform_storage_exn f] runs [f], rethrowing any storage error as a nice XenAPI error *) + +exception No_VDI + +val find_vdi : + __context:Context.t + -> Storage_interface.sr + -> Storage_interface.vdi + -> [`VDI] Ref.t * API.vDI_t +(** Find a VDI given a storage-layer SR and VDI *) diff --git a/ocaml/xapi/xapi_services.ml b/ocaml/xapi/xapi_services.ml index 21e3b8d0c3b..1612c5050f8 100644 --- a/ocaml/xapi/xapi_services.ml +++ b/ocaml/xapi/xapi_services.ml @@ -196,7 +196,7 @@ let put_handler (req : Http.Request.t) s _ = http_proxy_to_plugin req s name | [""; services; "SM"; "data"; sr; vdi] when services = _services -> let vdi, _ = - Storage_smapiv1.find_vdi ~__context + Storage_utils.find_vdi ~__context (Storage_interface.Sr.of_string sr) (Storage_interface.Vdi.of_string vdi) in From 7471d40666cafd8933bd3d2607adac91ab9427d7 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 17 Apr 2025 15:21:36 +0100 Subject: [PATCH 139/492] Use the new scan2 Signed-off-by: Vincent Liu --- ocaml/xapi/storage_mux.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 768c9b7e872..ca66169363d 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -372,7 +372,7 @@ module Mux = struct ) ; Server_helpers.exec_with_new_task "SR.update_snapshot_info_dest" ~subtask_of:(Ref.of_string dbg) (fun __context -> - let local_vdis = scan () ~dbg ~sr in + let local_vdis, _ = scan2 () ~dbg ~sr in let find_sm_vdi ~vdi ~vdi_info_list = try List.find (fun x -> x.vdi = vdi) vdi_info_list with Not_found -> From ad259561f823fbd1ec10420206817f59528458f1 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 17 Apr 2025 15:22:14 +0100 Subject: [PATCH 140/492] Refactor Storage_migrate.find_vdi Extract common logic on finding vdi_info given vdi, and also add a parameter to specify where to find the VDI (locally or remotely). Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 2 +- ocaml/xapi/storage_migrate_helper.ml | 9 ++++---- ocaml/xapi/storage_migrate_helper.mli | 3 ++- ocaml/xapi/storage_smapiv1_migrate.ml | 31 +++++++-------------------- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index ae3344d788b..395e1daf1d5 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -278,7 +278,7 @@ module MigrateLocal = struct let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) - let local_vdi = find_local_vdi ~dbg ~sr ~vdi in + let local_vdi, _ = find_vdi ~dbg ~sr ~vdi (module Local) in let mirror_id = State.mirror_id_of (sr, local_vdi.vdi) in debug "%s: Adding to active local mirrors before sending: id=%s" __FUNCTION__ mirror_id ; diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index 66c23d9a04e..f60d2d4742b 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -346,14 +346,13 @@ let get_remote_backend url verify_dest = end)) in (module Remote : SMAPIv2) -let find_local_vdi ~dbg ~sr ~vdi = - (* Find the local VDI *) - let vdis, _ = Local.SR.scan2 dbg sr in +let find_vdi ~dbg ~sr ~vdi (module SMAPIv2 : SMAPIv2) = + let vdis, _ = SMAPIv2.SR.scan2 dbg sr in match List.find_opt (fun x -> x.vdi = vdi) vdis with | None -> - failwith "Local VDI not found" + failwith_fmt "VDI %s not found" (Storage_interface.Vdi.string_of vdi) | Some v -> - v + (v, vdis) (** [similar_vdis dbg sr vdi] returns a list of content_ids of vdis which are similar to the input [vdi] in [sr] *) diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index 972faf57ce6..b3c445300a0 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -261,6 +261,7 @@ module Local : SMAPIv2 val get_remote_backend : string -> bool -> (module SMAPIv2) -val find_local_vdi : dbg:string -> sr:sr -> vdi:vdi -> vdi_info +val find_vdi : + dbg:string -> sr:sr -> vdi:vdi -> (module SMAPIv2) -> vdi_info * vdi_info list val similar_vdis : dbg:string -> sr:sr -> vdi:vdi -> uuid list diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index b38231dad5b..a93e506ff08 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -162,26 +162,11 @@ module Copy = struct (Printf.sprintf "Remote SR %s not found" (Storage_interface.Sr.string_of dest) ) ; - let vdis = Remote.SR.scan dbg dest in - let remote_vdi = - try List.find (fun x -> x.vdi = dest_vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Remote VDI %s not found" - (Storage_interface.Vdi.string_of dest_vdi) - ) - in + + let remote_vdi, _ = find_vdi ~dbg ~sr:dest ~vdi:dest_vdi (module Remote) in let dest_content_id = remote_vdi.content_id in (* Find the local VDI *) - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in + let local_vdi, vdis = find_vdi ~dbg ~sr ~vdi (module Local) in D.debug "copy local content_id=%s" local_vdi.content_id ; D.debug "copy remote content_id=%s" dest_content_id ; if local_vdi.virtual_size > remote_vdi.virtual_size then ( @@ -293,6 +278,10 @@ module Copy = struct (* PR-1255: XXX: this is useful because we don't have content_ids by default *) D.debug "setting local content_id <- %s" local_vdi.content_id ; Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; + (* Re-find the VDI to get the updated content_id info *) + let remote_vdi, _ = + find_vdi ~dbg ~sr:dest ~vdi:dest_vdi (module Remote) + in Some (Vdi_info remote_vdi) with e -> D.error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; @@ -312,11 +301,7 @@ module Copy = struct let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) try - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith (Printf.sprintf "Local VDI not found") - in + let local_vdi, _ = find_vdi ~dbg ~sr ~vdi (module Local) in try let similar_vdis = Local.VDI.similar_content dbg sr vdi in let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in From 82c509fbe2b29bef1c5b7fa81ce458a49999c132 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 14 Apr 2025 16:57:26 +0100 Subject: [PATCH 141/492] Add new interface for mirror operation in SMAPIv3 These will be used when mirroring a VDI that is on a SMAPIv3 SR. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 34 +++++++++++++++++++-- ocaml/xapi/storage_smapiv1_migrate.ml | 8 +++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index b98047bd610..b4df7cf4855 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -291,12 +291,42 @@ module Mirror = struct } [@@deriving rpcty] - type mirror_receive_result = Vhd_mirror of mirror_receive_result_vhd_t + type mirror_receive_result_smapiv3_t = { + mirror_vdi: vdi_info + ; mirror_datapath: dp + ; nbd_export: string + } + [@@deriving rpcty] + + (* The variant of the mirror receive result depends on the SMAPI version being used, + rather than the VDI image type. We call the new variant SMAPIv3_mirror to reflect + this, but keep the old one Vhd_mirror for backwards compatability reasons. *) + type mirror_receive_result = + | Vhd_mirror of mirror_receive_result_vhd_t + | SMAPIv3_mirror of mirror_receive_result_smapiv3_t [@@deriving rpcty] type similars = content_id list [@@deriving rpcty] + + type copy_operation_v1 = string [@@deriving rpcty, show {with_path= false}] + + type mirror_operation_v1 = string [@@deriving rpcty, show {with_path= false}] + + (* SMAPIv3 mirror operation *) + type operation = + | CopyV1 of copy_operation_v1 + | MirrorV1 of mirror_operation_v1 + [@@deriving rpcty, show {with_path= false}] + + (* status of SMAPIv3 mirror *) + type status = {failed: bool; complete: bool; progress: float option} + [@@deriving rpcty] end +type operation = Mirror.operation + +type status = Mirror.status + type async_result_t = Vdi_info of vdi_info | Mirror_id of Mirror.id [@@deriving rpcty] @@ -373,7 +403,7 @@ module Errors = struct (* mirror_copy_failure: raised when copying of the base image fails (SMAPIv1 only) *) | Migration_mirror_copy_failure of string (* mirror_failure: raised when there is any issues that causes the mirror to crash - during SXM (SMAPIv3 only, v1 uses more specific errors as above) *) + during SXM (SMAPIv1 and SMAPIv3 *) | Migration_mirror_failure of string | Internal_error of string | Unknown_error diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index a93e506ff08..c5c82785215 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -554,6 +554,14 @@ module MIRROR : SMAPIv2_MIRROR = struct Storage_migrate_helper.get_remote_backend url verify_dest in match remote_mirror with + | Mirror.SMAPIv3_mirror _ -> + (* this should never happen *) + raise + (Storage_error + (Migration_mirror_failure + "Incorrect remote mirror format for SMAPIv1" + ) + ) | Mirror.Vhd_mirror mirror_res -> let tapdev = mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~url ~dest_sr From 1fe6389dc85d374fa87bb736e1f7717e2f7c7a24 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 11:28:54 +0100 Subject: [PATCH 142/492] Add more states for SXM Adding a few more states that will be tracked by the send_state and receive_state. These will be used later on for SMAPIv3 outbound migration. We do need to give them a default value in case the deserialisation of the on-disk copy does not have these values. These are not used yet, so no functional change. Signed-off-by: Vincent Liu --- ocaml/tests/test_storage_migrate_state.ml | 5 +++++ ocaml/xapi-storage-cli/main.ml | 5 ++++- ocaml/xapi/storage_migrate.ml | 12 ++++++++---- ocaml/xapi/storage_migrate_helper.ml | 7 +++++++ ocaml/xapi/storage_migrate_helper.mli | 5 +++++ ocaml/xapi/storage_smapiv1_migrate.ml | 16 +++++++++++----- ocaml/xapi/storage_smapiv1_migrate.mli | 1 + ocaml/xapi/xapi_vm_migrate.ml | 23 +++++++++++++++++++---- 8 files changed, 60 insertions(+), 14 deletions(-) diff --git a/ocaml/tests/test_storage_migrate_state.ml b/ocaml/tests/test_storage_migrate_state.ml index 498d9995548..ea059ae07e2 100644 --- a/ocaml/tests/test_storage_migrate_state.ml +++ b/ocaml/tests/test_storage_migrate_state.ml @@ -41,6 +41,9 @@ let sample_send_state = ) ; failed= false ; watchdog= None + ; live_vm= Storage_interface.Vm.of_string "0" + ; mirror_key= None + ; vdi= Storage_interface.Vdi.of_string "" } let sample_receive_state = @@ -54,6 +57,8 @@ let sample_receive_state = ; parent_vdi= Vdi.of_string "parent_vdi" ; remote_vdi= Vdi.of_string "remote_vdi" ; mirror_vm= Vm.of_string "mirror_vm" + ; url= "" + ; verify_dest= false } let sample_copy_state = diff --git a/ocaml/xapi-storage-cli/main.ml b/ocaml/xapi-storage-cli/main.ml index 536ea02608e..6a607f50986 100644 --- a/ocaml/xapi-storage-cli/main.ml +++ b/ocaml/xapi-storage-cli/main.ml @@ -315,6 +315,8 @@ let mirror_vm = Vm.of_string "SXM_mirror" let copy_vm = Vm.of_string "SXM_copy" +let live_vm = Vm.of_string "live_vm" + let mirror_start common_opts sr vdi dp url dest verify_dest = on_vdi' (fun sr vdi -> @@ -323,7 +325,8 @@ let mirror_start common_opts sr vdi dp url dest verify_dest = let url = get_opt url "Need a URL" in let dest = get_opt dest "Need a destination SR" in let task = - Storage_migrate.start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url + Storage_migrate.start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm + ~url ~dest:(Storage_interface.Sr.of_string dest) ~verify_dest in diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 395e1daf1d5..4e2093f1f59 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -261,7 +261,7 @@ module MigrateLocal = struct raise (Storage_error (Migration_preparation_failure (Printexc.to_string e))) - let start ~task_id ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest + let start ~task_id ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm ~url ~dest ~verify_dest = SXM.info "%s sr:%s vdi:%s dp: %s mirror_vm: %s copy_vm: %s url:%s dest:%s \ @@ -292,6 +292,9 @@ module MigrateLocal = struct ; tapdev= None ; failed= false ; watchdog= None + ; live_vm + ; vdi + ; mirror_key= None } in @@ -608,13 +611,14 @@ let copy ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = ~sr ~vdi ~vm ~url ~dest ~verify_dest ) -let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = +let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm ~url ~dest ~verify_dest + = with_dbg ~name:__FUNCTION__ ~dbg @@ fun dbg -> with_task_and_thread ~dbg (fun task -> MigrateLocal.start ~task_id:(Storage_task.id_of_handle task) - ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest - ~verify_dest + ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm ~url + ~dest ~verify_dest ) (* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml index f60d2d4742b..f4c5d46c39c 100644 --- a/ocaml/xapi/storage_migrate_helper.ml +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -36,6 +36,8 @@ module State = struct ; parent_vdi: Vdi.t ; remote_vdi: Vdi.t ; mirror_vm: Vm.t + ; url: string [@default ""] + ; verify_dest: bool [@default false] } [@@deriving rpcty] @@ -92,6 +94,11 @@ module State = struct ; tapdev: tapdev option ; mutable failed: bool ; mutable watchdog: handle option + ; vdi: Vdi.t [@default Vdi.of_string ""] (* source vdi *) + ; live_vm: Vm.t + [@default Vm.of_string "0"] + (* vm to which the source vdi is attached *) + ; mirror_key: Mirror.operation option [@default None] } [@@deriving rpcty] diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli index b3c445300a0..0f3a6ee8e11 100644 --- a/ocaml/xapi/storage_migrate_helper.mli +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -28,6 +28,8 @@ module State : sig ; parent_vdi: Storage_interface.vdi ; remote_vdi: Storage_interface.vdi ; mirror_vm: Storage_interface.vm + ; url: string + ; verify_dest: bool } val t_sr : (Storage_interface.sr, t) Rpc.Types.field @@ -89,6 +91,9 @@ module State : sig ; tapdev: tapdev option ; mutable failed: bool ; mutable watchdog: handle option + ; vdi: Vdi.t [@default Vdi.of_string ""] + ; live_vm: Vm.t [@default Vm.of_string "0"] + ; mirror_key: Mirror.operation option [@default None] } val t_url : (string, t) Rpc.Types.field diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index c5c82785215..70bd14dc3dc 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -386,8 +386,9 @@ module Copy = struct raise (Storage_error (Internal_error (Printexc.to_string e))) end -let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~url ~dest_sr - ~verify_dest ~(remote_mirror : Mirror.mirror_receive_result_vhd_t) = +let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~live_vm ~mirror_id ~url + ~dest_sr ~verify_dest ~(remote_mirror : Mirror.mirror_receive_result_vhd_t) + = let remote_vdi = remote_mirror.mirror_vdi.vdi in let mirror_dp = remote_mirror.mirror_datapath in @@ -470,6 +471,9 @@ let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~url ~dest_sr ; tapdev= Some tapdev ; failed= false ; watchdog= None + ; vdi + ; live_vm + ; mirror_key= None } in State.add mirror_id (State.Send_op alm) ; @@ -549,7 +553,7 @@ module MIRROR : SMAPIv2_MIRROR = struct type context = unit let send_start _ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id - ~local_vdi ~copy_vm ~live_vm:_ ~url ~remote_mirror ~dest_sr ~verify_dest = + ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = let (module Remote) = Storage_migrate_helper.get_remote_backend url verify_dest in @@ -564,8 +568,8 @@ module MIRROR : SMAPIv2_MIRROR = struct ) | Mirror.Vhd_mirror mirror_res -> let tapdev = - mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~url ~dest_sr - ~verify_dest ~remote_mirror:mirror_res + mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~live_vm ~mirror_id ~url + ~dest_sr ~verify_dest ~remote_mirror:mirror_res in let snapshot = mirror_snapshot ~dbg ~sr ~dp ~mirror_id ~local_vdi in @@ -663,6 +667,8 @@ module MIRROR : SMAPIv2_MIRROR = struct ; parent_vdi= parent.vdi ; remote_vdi= vdi_info.vdi ; mirror_vm= vm + ; url= "" + ; verify_dest= false } ) ; let nearest_content_id = Option.map (fun x -> x.content_id) nearest in diff --git a/ocaml/xapi/storage_smapiv1_migrate.mli b/ocaml/xapi/storage_smapiv1_migrate.mli index 4c40e2ab999..a1021858e46 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.mli +++ b/ocaml/xapi/storage_smapiv1_migrate.mli @@ -56,6 +56,7 @@ val mirror_pass_fds : -> sr:Storage_interface.sr -> vdi:Storage_interface.vdi -> mirror_vm:Storage_interface.vm + -> live_vm:Storage_interface.vm -> mirror_id:string -> url:string -> dest_sr:Storage_interface.sr diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index 60c344d4c65..c12dc0648d1 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1030,14 +1030,29 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far let id = Storage_migrate_helper.State.mirror_id_of (vconf.sr, vconf.location) in - debug "%s mirror_vm is %s copy_vm is %s" __FUNCTION__ + let live_vm = + match Db.VDI.get_VBDs ~__context ~self:vconf.vdi with + | [] -> + Storage_migrate_helper.failwith_fmt + "VDI %s does not have a corresponding VBD" + (Ref.string_of vconf.vdi) + | vbd_ref :: _ -> + (* XX Is it possible that this VDI might be used as multiple VBDs attached to different VMs? *) + let vm_ref = Db.VBD.get_VM ~__context ~self:vbd_ref in + let domid = + Db.VM.get_domid ~__context ~self:vm_ref |> Int64.to_string + in + Vm.of_string domid + in + debug "%s mirror_vm is %s copy_vm is %s live_vm is %s" __FUNCTION__ (Vm.string_of vconf.mirror_vm) - (Vm.string_of vconf.copy_vm) ; + (Vm.string_of vconf.copy_vm) + (Vm.string_of live_vm) ; (* Layering violation!! *) ignore (Storage_access.register_mirror __context id) ; Storage_migrate.start ~dbg ~sr:vconf.sr ~vdi:vconf.location ~dp:new_dp - ~mirror_vm:vconf.mirror_vm ~copy_vm:vconf.copy_vm ~url:remote.sm_url - ~dest:dest_sr ~verify_dest:is_intra_pool + ~mirror_vm:vconf.mirror_vm ~copy_vm:vconf.copy_vm ~live_vm + ~url:remote.sm_url ~dest:dest_sr ~verify_dest:is_intra_pool in let mapfn x = let total = Int64.to_float total_size in From 59236c62308f8734768660eb73167a8bdaeea02b Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 12:23:31 +0100 Subject: [PATCH 143/492] Remove receive_start(2) from storage_migrate These functions will not be exposed in storage_migrate.ml any more. `receive_start` is kept in the mux for backwards compatability reasons, and will go straight to the SMAPIv1 implemenetation. `receive_start2` calls will not be directed to the storage_mux any more, instead it will be multiplexed by the new layer in storage_migrate. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 109 ---------------------------------- ocaml/xapi/storage_mux.ml | 23 +++---- 2 files changed, 9 insertions(+), 123 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 4e2093f1f59..05e3266ab8d 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -40,111 +40,6 @@ let choose_backend dbg sr = (** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions tend to be executed on the receiver side. *) module MigrateRemote = struct - let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = - let on_fail : (unit -> unit) list ref = ref [] in - let vdis = Local.SR.scan dbg sr in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in - let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in - try - let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in - let leaf = Local.VDI.create dbg sr vdi_info in - info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; - (* dummy VDI is created so that the leaf VDI becomes a differencing disk, - useful for calling VDI.compose later on *) - let dummy = Local.VDI.snapshot dbg sr leaf in - on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; - debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ - (string_of_vdi_info dummy) ; - let _ : backend = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in - Local.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= vdi_info.virtual_size - ) - vdis - ) - with Not_found -> None - ) - ) - None similar - in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let parent = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi = add_to_sm_config vdi "base_mirror" id in - let vdi_clone = Local.VDI.clone dbg sr vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> vdi_info.virtual_size then - let new_size = - Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size - in - debug "Resize local clone VDI to %Ld: result %Ld" - vdi_info.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Local.VDI.create dbg sr vdi_info - in - debug "Parent disk content_id=%s" parent.content_id ; - State.add id - State.( - Recv_op - Receive_state. - { - sr - ; dummy_vdi= dummy.vdi - ; leaf_vdi= leaf.vdi - ; leaf_dp - ; parent_vdi= parent.vdi - ; remote_vdi= vdi_info.vdi - ; mirror_vm= vm - } - ) ; - let nearest_content_id = Option.map (fun x -> x.content_id) nearest in - Mirror.Vhd_mirror - { - Mirror.mirror_vdi= leaf - ; mirror_datapath= leaf_dp - ; copy_diffs_from= nearest_content_id - ; copy_diffs_to= parent.vdi - ; dummy_vdi= dummy.vdi - } - with e -> - List.iter - (fun op -> - try op () - with e -> - debug "Caught exception in on_fail: %s" (Printexc.to_string e) - ) - !on_fail ; - raise e - - let receive_start ~dbg ~sr ~vdi_info ~id ~similar = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") - - let receive_start2 ~dbg ~sr ~vdi_info ~id ~similar ~vm = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm - let receive_finalize ~dbg ~id = let recv_state = State.find_active_receive_mirror id in let open State.Receive_state in @@ -630,10 +525,6 @@ let killall = MigrateLocal.killall let stat = MigrateLocal.stat -let receive_start = MigrateRemote.receive_start - -let receive_start2 = MigrateRemote.receive_start2 - let receive_finalize = MigrateRemote.receive_finalize let receive_finalize2 = MigrateRemote.receive_finalize2 diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index ca66169363d..06e67955e15 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -826,25 +826,20 @@ module Mux = struct u "DATA.MIRROR.send_start" (* see storage_smapi{v1,v3}_migrate.ml *) let receive_start () ~dbg ~sr ~vdi_info ~id ~similar = - with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun di -> + with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun _di -> info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s" __FUNCTION__ dbg (s_of_sr sr) (string_of_vdi_info vdi_info) id (String.concat ";" similar) ; - Storage_migrate.receive_start ~dbg:di.log ~sr ~vdi_info ~id ~similar - - let receive_start2 () ~dbg ~sr ~vdi_info ~id ~similar ~vm = - with_dbg ~name:"DATA.MIRROR.receive_start2" ~dbg @@ fun di -> - info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s vm: %s" - __FUNCTION__ dbg (s_of_sr sr) - (string_of_vdi_info vdi_info) - id - (String.concat ";" similar) - (s_of_vm vm) ; - info "%s dbg:%s" __FUNCTION__ dbg ; - Storage_migrate.receive_start2 ~dbg:di.log ~sr ~vdi_info ~id ~similar - ~vm + (* This goes straight to storage_smapiv1_migrate for backwards compatability + reasons, new code should not call receive_start any more *) + Storage_smapiv1_migrate.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id + ~similar + + (** see storage_smapiv{1,3}_migrate.receive_start2 *) + let receive_start2 () ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ ~vm:_ = + u __FUNCTION__ let receive_finalize () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_finalize" ~dbg @@ fun di -> From 8da184b9105fa6acb11d6b21810a1daa31daaec6 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 14:47:07 +0100 Subject: [PATCH 144/492] Change how receive_start2 is called Previously `MIRROR.receive_start2` is called as a remote function, i.e. `Remote.DATA.MIRROR.receive_start2` and it will be forwarded to the destination host and multiplexes based on the destination SR type. This is inconvenient as what `receive_start2` should do is more dependent on what the source SR type is. For example, if the source SR is using SMAPIv1, then `receive_start2` needs to tell the destination host to create snapshots VDIs, while this is not necessary if the source SR type is of SMAPIv3. Hence instead of calling `Remote.receive_start2`, call each individual functions inside `receive_start2` remotely, such as `Remote.VDI.create`, and these SMAPIv2 functions will still be multiplexed on the destiantion side, based on the destination SR type. And this is indeed what we want, imagine a case where we are migrating v1 -> v3, what we want is still create a snapshot VDI, but on the v3 SR. This does mean that the state tracking, such as `State.add`, which was previously tracked by the destination host, now needs to be tracked by the source host. And this will affect a number of other `receive_` functions such as `receive_finalize2` and `receive_cancel2`, which are updated accordingly. For backwards compatability reasons, we still need to preserve `receive_start` which might still be called from an older host trying to do a v1 -> v1 migration. And this is done by making sure that the SMAPIv2 done inside `receive_start` are all local, while the `receive_start` call itself is remote. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 57 +++++++++++++-- ocaml/xapi-idl/storage/storage_skeleton.ml | 9 ++- ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/storage_migrate.ml | 81 ++++++++------------- ocaml/xapi/storage_mux.ml | 16 ++-- ocaml/xapi/storage_smapiv1.ml | 11 ++- ocaml/xapi/storage_smapiv1_migrate.ml | 65 +++++++++++------ ocaml/xapi/storage_smapiv1_wrapper.ml | 21 +++--- ocaml/xapi/storage_smapiv3_migrate.ml | 2 + 9 files changed, 160 insertions(+), 103 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index b4df7cf4855..28e3752d8c6 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1136,6 +1136,8 @@ module StorageAPI (R : RPC) = struct @-> id_p @-> similar_p @-> vm_p + @-> url_p + @-> verify_dest_p @-> returning result err ) @@ -1154,13 +1156,29 @@ module StorageAPI (R : RPC) = struct should be used in conjunction with [receive_start2] *) let receive_finalize2 = declare "DATA.MIRROR.receive_finalize2" [] - (dbg_p @-> id_p @-> returning unit_p err) + (dbg_p + @-> id_p + @-> sr_p + @-> url_p + @-> verify_dest_p + @-> returning unit_p err + ) (** [receive_cancel dbg id] is called in the case of migration failure to - do the clean up.*) + do the clean up. + @deprecated This function is deprecated, and is only here to keep backward + compatibility with old xapis that call Remote.DATA.MIRROR.receive_cancel + during SXM. Use the receive_cancel2 function instead. + *) let receive_cancel = declare "DATA.MIRROR.receive_cancel" [] (dbg_p @-> id_p @-> returning unit_p err) + + (** [receive_cancel2 dbg mirror_id url verify_dest] cleans up the side effects + done by [receive_start2] on the destination host when the migration fails. *) + let receive_cancel2 = + declare "DATA.MIRROR.receive_cancel2" [] + (dbg_p @-> id_p @-> url_p @-> verify_dest_p @-> returning unit_p err) end end @@ -1240,16 +1258,33 @@ module type MIRROR = sig -> dbg:debug_info -> sr:sr -> vdi_info:vdi_info - -> id:Mirror.id + -> mirror_id:Mirror.id -> similar:Mirror.similars -> vm:vm + -> url:string + -> verify_dest:bool -> Mirror.mirror_receive_result val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit - val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit + val receive_finalize2 : + context + -> dbg:debug_info + -> mirror_id:Mirror.id + -> sr:sr + -> url:string + -> verify_dest:bool + -> unit val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit + + val receive_cancel2 : + context + -> dbg:debug_info + -> mirror_id:Mirror.id + -> url:string + -> verify_dest:bool + -> unit end module type Server_impl = sig @@ -1706,17 +1741,23 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.receive_start (fun dbg sr vdi_info id similar -> Impl.DATA.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar ) ; - S.DATA.MIRROR.receive_start2 (fun dbg sr vdi_info id similar vm -> - Impl.DATA.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~id ~similar ~vm + S.DATA.MIRROR.receive_start2 + (fun dbg sr vdi_info mirror_id similar vm url verify_dest -> + Impl.DATA.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~mirror_id + ~similar ~vm ~url ~verify_dest ) ; S.DATA.MIRROR.receive_cancel (fun dbg id -> Impl.DATA.MIRROR.receive_cancel () ~dbg ~id ) ; + S.DATA.MIRROR.receive_cancel2 (fun dbg mirror_id url verify_dest -> + Impl.DATA.MIRROR.receive_cancel2 () ~dbg ~mirror_id ~url ~verify_dest + ) ; S.DATA.MIRROR.receive_finalize (fun dbg id -> Impl.DATA.MIRROR.receive_finalize () ~dbg ~id ) ; - S.DATA.MIRROR.receive_finalize2 (fun dbg id -> - Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~id + S.DATA.MIRROR.receive_finalize2 (fun dbg mirror_id sr url verify_dest -> + Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~mirror_id ~sr ~url + ~verify_dest ) ; S.DATA.import_activate (fun dbg dp sr vdi vm -> Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 27197b06c7c..01f66eebb21 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -169,14 +169,19 @@ module DATA = struct let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = u "DATA.MIRROR.receive_start" - let receive_start2 ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = + let receive_start2 ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url + ~verify_dest = u "DATA.MIRROR.receive_start2" let receive_finalize ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize" - let receive_finalize2 ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize2" + let receive_finalize2 ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + u "DATA.MIRROR.receive_finalize2" let receive_cancel ctx ~dbg ~id = u "DATA.MIRROR.receive_cancel" + + let receive_cancel2 ctx ~dbg ~mirror_id ~url ~verify_dest = + u "DATA.MIRROR.receive_cancel2" end end diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index eb63f132e98..0800223c3f4 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1926,6 +1926,7 @@ let bind ~volume_script_dir = S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; + S.DATA.MIRROR.receive_cancel2 (u "DATA.MIRROR.receive_cancel2") ; S.DP.create (u "DP.create") ; S.TASK.cancel (u "TASK.cancel") ; S.TASK.list (u "TASK.list") ; diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 05e3266ab8d..54144ce5a2b 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -40,44 +40,29 @@ let choose_backend dbg sr = (** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions tend to be executed on the receiver side. *) module MigrateRemote = struct - let receive_finalize ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; - State.remove_receive_mirror id - - let receive_finalize2 ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter - (fun r -> - SXM.info - "%s Mirror done. Compose on the dest sr %s parent %s and leaf %s" - __FUNCTION__ (Sr.string_of r.sr) - (Vdi.string_of r.parent_vdi) - (Vdi.string_of r.leaf_vdi) ; - Local.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; - Local.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; - (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so - there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) - log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr r.dummy_vdi) ; - Local.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" - ) - recv_state ; - State.remove_receive_mirror id + (** [receive_finalize2 dbg mirror_id sr url verify_dest] takes an [sr] parameter + which is the source sr and multiplexes based on the type of that *) + let receive_finalize2 ~dbg ~mirror_id ~sr ~url ~verify_dest = + let (module Migrate_Backend) = choose_backend dbg sr in + Migrate_Backend.receive_finalize2 () ~dbg ~mirror_id ~sr ~url ~verify_dest - let receive_cancel ~dbg ~id = - let receive_state = State.find_active_receive_mirror id in + let receive_cancel2 ~dbg ~mirror_id ~url ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let receive_state = State.find_active_receive_mirror mirror_id in let open State.Receive_state in Option.iter (fun r -> - log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; + D.log_and_ignore_exn (fun () -> Remote.DP.destroy dbg r.leaf_dp false) ; List.iter - (fun v -> log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v)) + (fun v -> + D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr v) + ) [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] ) receive_state ; - State.remove_receive_mirror id + State.remove_receive_mirror mirror_id end (** This module [MigrateLocal] consists of the concrete implementations of the @@ -121,11 +106,10 @@ module MigrateLocal = struct | None -> debug "Snapshot VDI already cleaned up" ) ; - - let (module Remote) = - get_remote_backend remote_info.url remote_info.verify_dest - in - try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () + try + MigrateRemote.receive_cancel2 ~dbg ~mirror_id:id + ~url:remote_info.url ~verify_dest:remote_info.verify_dest + with _ -> () ) | None -> () @@ -145,11 +129,10 @@ module MigrateLocal = struct let prepare ~dbg ~sr ~vdi ~dest ~local_vdi ~mirror_id ~mirror_vm ~url ~verify_dest = try - let (module Remote) = get_remote_backend url verify_dest in + let (module Migrate_Backend) = choose_backend dbg sr in let similars = similar_vdis ~dbg ~sr ~vdi in - - Remote.DATA.MIRROR.receive_start2 dbg dest local_vdi mirror_id similars - mirror_vm + Migrate_Backend.receive_start2 () ~dbg ~sr:dest ~vdi_info:local_vdi + ~mirror_id ~similar:similars ~vm:mirror_vm ~url ~verify_dest with e -> error "%s Caught error %s while preparing for SXM" __FUNCTION__ (Printexc.to_string e) ; @@ -215,7 +198,7 @@ module MigrateLocal = struct | Storage_error (Migration_mirror_snapshot_failure reason) ) as e -> error "%s: Caught %s: during storage migration preparation" __FUNCTION__ reason ; - MigrateRemote.receive_cancel ~dbg ~id:mirror_id ; + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~url ~verify_dest ; raise e | Storage_error (Migration_mirror_copy_failure reason) as e -> error "%s: Caught %s: during storage migration copy" __FUNCTION__ reason ; @@ -331,9 +314,12 @@ module MigrateLocal = struct ) copy_ops ; List.iter - (fun (id, _recv_state) -> - debug "Receive in progress: %s" id ; - log_and_ignore_exn (fun () -> Local.DATA.MIRROR.receive_cancel dbg id) + (fun (mirror_id, (recv_state : State.Receive_state.t)) -> + debug "Receive in progress: %s" mirror_id ; + log_and_ignore_exn (fun () -> + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~url:recv_state.url + ~verify_dest:recv_state.verify_dest + ) ) recv_ops ; State.clear () @@ -405,7 +391,8 @@ let post_deactivate_hook ~sr ~vdi ~dp:_ = let (module Remote) = get_remote_backend r.url verify_dest in debug "Calling receive_finalize2" ; log_and_ignore_exn (fun () -> - Remote.DATA.MIRROR.receive_finalize2 "Mirror-cleanup" id + MigrateRemote.receive_finalize2 ~dbg:"Mirror-cleanup" ~mirror_id:id + ~sr ~url:r.url ~verify_dest ) ; debug "Finished calling receive_finalize2" ; State.remove_local_mirror id ; @@ -525,12 +512,6 @@ let killall = MigrateLocal.killall let stat = MigrateLocal.stat -let receive_finalize = MigrateRemote.receive_finalize - -let receive_finalize2 = MigrateRemote.receive_finalize2 - -let receive_cancel = MigrateRemote.receive_cancel - (* The remote end of this call, SR.update_snapshot_info_dest, is implemented in * the SMAPIv1 section of storage_migrate.ml. It needs to access the setters * for snapshot_of, snapshot_time and is_a_snapshot, which we don't want to add diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 06e67955e15..c9a387f6269 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -838,23 +838,25 @@ module Mux = struct ~similar (** see storage_smapiv{1,3}_migrate.receive_start2 *) - let receive_start2 () ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ ~vm:_ = + let receive_start2 () ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ + ~vm:_ = u __FUNCTION__ let receive_finalize () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_finalize" ~dbg @@ fun di -> info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.receive_finalize ~dbg:di.log ~id + Storage_smapiv1_migrate.MIRROR.receive_finalize () ~dbg:di.log ~id - let receive_finalize2 () ~dbg ~id = - with_dbg ~name:"DATA.MIRROR.receive_finalize2" ~dbg @@ fun di -> - info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.receive_finalize2 ~dbg:di.log ~id + let receive_finalize2 () ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = + u __FUNCTION__ let receive_cancel () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_cancel" ~dbg @@ fun di -> info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.receive_cancel ~dbg:di.log ~id + Storage_smapiv1_migrate.MIRROR.receive_cancel () ~dbg:di.log ~id + + let receive_cancel2 () ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = + u __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index ab6f05f57d3..708e35c0a96 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1143,15 +1143,20 @@ module SMAPIv1 : Server_impl = struct let receive_start _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = assert false - let receive_start2 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ - ~vm:_ = + let receive_start2 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ + ~similar:_ ~vm:_ ~url:_ ~verify_dest:_ = assert false let receive_finalize _context ~dbg:_ ~id:_ = assert false - let receive_finalize2 _context ~dbg:_ ~id:_ = assert false + let receive_finalize2 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ + ~verify_dest:_ = + assert false let receive_cancel _context ~dbg:_ ~id:_ = assert false + + let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = + assert false end end diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 70bd14dc3dc..d6156a7fad0 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -552,6 +552,8 @@ let mirror_cleanup ~dbg ~sr ~snapshot = module MIRROR : SMAPIv2_MIRROR = struct type context = unit + let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + let send_start _ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = let (module Remote) = @@ -589,25 +591,26 @@ module MIRROR : SMAPIv2_MIRROR = struct (Storage_interface.Vdi.string_of mirror_res.Mirror.mirror_vdi.vdi) ; mirror_cleanup ~dbg ~sr ~snapshot - let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = + let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm + (module SMAPI : SMAPIv2) = let on_fail : (unit -> unit) list ref = ref [] in - let vdis = Local.SR.scan dbg sr in + let vdis = SMAPI.SR.scan dbg sr in (* We drop cbt_metadata VDIs that do not have any actual data *) let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in - let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in + let leaf_dp = SMAPI.DP.create dbg Uuidx.(to_string (make ())) in try let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in - let leaf = Local.VDI.create dbg sr vdi_info in + let leaf = SMAPI.VDI.create dbg sr vdi_info in D.info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + on_fail := (fun () -> SMAPI.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; (* dummy VDI is created so that the leaf VDI becomes a differencing disk, useful for calling VDI.compose later on *) - let dummy = Local.VDI.snapshot dbg sr leaf in - on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; + let dummy = SMAPI.VDI.snapshot dbg sr leaf in + on_fail := (fun () -> SMAPI.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; D.debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ (string_of_vdi_info dummy) ; - let _ : backend = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in - Local.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; + let _ : backend = SMAPI.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in + SMAPI.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; let nearest = List.fold_left (fun acc content_id -> @@ -640,21 +643,26 @@ module MIRROR : SMAPIv2_MIRROR = struct | Some vdi -> D.debug "Cloning VDI" ; let vdi = add_to_sm_config vdi "base_mirror" id in - let vdi_clone = Local.VDI.clone dbg sr vdi in + let vdi_clone = SMAPI.VDI.clone dbg sr vdi in D.debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; ( if vdi_clone.virtual_size <> vdi_info.virtual_size then let new_size = - Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size + SMAPI.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size in - D.debug "Resize local clone VDI to %Ld: result %Ld" + D.debug "Resize clone VDI to %Ld: result %Ld" vdi_info.virtual_size new_size ) ; vdi_clone | None -> D.debug "Creating a blank remote VDI" ; - Local.VDI.create dbg sr vdi_info + SMAPI.VDI.create dbg sr vdi_info in D.debug "Parent disk content_id=%s" parent.content_id ; + (* The state tracking here does not need to be changed, however, it will be + stored in memory on different hosts. If receive_start is called, by an older + host, this State.add is run on the destination host. On the other hand, if + receive_start2 is called, this will be stored in memory on the source host. + receive_finalize2 and receive_cancel2 handles this similarly. *) State.add id State.( Recv_op @@ -692,9 +700,15 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_start _ctx ~dbg ~sr ~vdi_info ~id ~similar = receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") + (module Local) - let receive_start2 _ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm + let receive_start2 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url + ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + receive_start_common ~dbg ~sr ~vdi_info ~id:mirror_id ~similar ~vm + (module Remote) let receive_finalize _ctx ~dbg ~id = let recv_state = State.find_active_receive_mirror id in @@ -702,8 +716,11 @@ module MIRROR : SMAPIv2_MIRROR = struct Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; State.remove_receive_mirror id - let receive_finalize2 _ctx ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in + let receive_finalize2 _ctx ~dbg ~mirror_id ~sr:_ ~url ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let recv_state = State.find_active_receive_mirror mirror_id in let open State.Receive_state in Option.iter (fun r -> @@ -712,15 +729,15 @@ module MIRROR : SMAPIv2_MIRROR = struct __FUNCTION__ (Sr.string_of r.sr) (Vdi.string_of r.parent_vdi) (Vdi.string_of r.leaf_vdi) ; - Local.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; - Local.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; + Remote.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + Remote.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) - D.log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr r.dummy_vdi) ; - Local.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr r.dummy_vdi) ; + Remote.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" ) recv_state ; - State.remove_receive_mirror id + State.remove_receive_mirror mirror_id let receive_cancel _ctx ~dbg ~id = let receive_state = State.find_active_receive_mirror id in @@ -736,4 +753,8 @@ module MIRROR : SMAPIv2_MIRROR = struct ) receive_state ; State.remove_receive_mirror id + + let receive_cancel2 _ctx ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = + (* see Storage_migrate.receive_cancel2 *) + u __FUNCTION__ end diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 569f4f33bb0..7d418fb9091 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1200,26 +1200,25 @@ functor (String.concat "," similar) ; Impl.DATA.MIRROR.receive_start context ~dbg ~sr ~vdi_info ~id ~similar - let receive_start2 context ~dbg ~sr ~vdi_info ~id ~similar ~vm = - info - "DATA.MIRROR.receive_start2 dbg:%s sr:%s id:%s similar:[%s] vm:%s" - dbg (s_of_sr sr) id - (String.concat "," similar) - (s_of_vm vm) ; - Impl.DATA.MIRROR.receive_start2 context ~dbg ~sr ~vdi_info ~id - ~similar ~vm + let receive_start2 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ + ~similar:_ ~vm:_ = + u __FUNCTION__ let receive_finalize context ~dbg ~id = info "DATA.MIRROR.receive_finalize dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_finalize context ~dbg ~id - let receive_finalize2 context ~dbg ~id = - info "DATA.MIRROR.receive_finalize2 dbg:%s id:%s" dbg id ; - Impl.DATA.MIRROR.receive_finalize2 context ~dbg ~id + let receive_finalize2 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ + ~verify_dest:_ = + (* see storage_smapiv{1,3}_migrate *) + u __FUNCTION__ let receive_cancel context ~dbg ~id = info "DATA.MIRROR.receive_cancel dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_cancel context ~dbg ~id + + let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = + u __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 4cfcf1c831e..72d9f2bde9d 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -36,4 +36,6 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_finalize2 _ctx = u __FUNCTION__ let receive_cancel _ctx = u __FUNCTION__ + + let receive_cancel2 _ctx = u __FUNCTION__ end From 6a1d8afdccc74c1c99c69d8bda584702ef7337ce Mon Sep 17 00:00:00 2001 From: Guillaume Date: Mon, 3 Feb 2025 10:48:23 +0100 Subject: [PATCH 145/492] Add qcow2 as supported format by xcp-rrdd-iostat Since we are now supporting qcow file `tap-ctl list` can return strings like: - "1564848 0 0 qcow2 /var/run/sr-mount/..." Without this patch the type "qcow2" is unknown and xcp-rrdd-iostat generates an error like: returned a line that could not be parsed. Ignoring This patch fixes the issue. Signed-off-by: Guillaume --- ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml b/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml index 0f547015304..6141090eae7 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml @@ -332,7 +332,10 @@ let refresh_phypath_to_sr_vdi () = let exec_tap_ctl_list () : ((string * string) * int) list = let tap_ctl = "/usr/sbin/tap-ctl list" in let extract_vdis pid minor _state kind phypath = - if not (kind = "vhd" || kind = "aio") then raise (Failure "Unknown type") ; + if not (kind = "vhd" || kind = "aio" || kind = "qcow2") then ( + D.warn {|"%s" is not a known type.|} kind ; + raise (Failure "Unknown type") + ) ; (* Look up SR and VDI uuids from the physical path *) if not (Hashtbl.mem phypath_to_sr_vdi phypath) then refresh_phypath_to_sr_vdi () ; From 3453e78a7ad06157ca1dcc874e17c8df8a032fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 31 Jan 2025 23:02:46 +0000 Subject: [PATCH 146/492] CA-404946: NBD: set timeout to 90s to match iSCSI default device/timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- python3/libexec/nbd_client_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python3/libexec/nbd_client_manager.py b/python3/libexec/nbd_client_manager.py index 3d0920a3845..a7e6a87a53b 100644 --- a/python3/libexec/nbd_client_manager.py +++ b/python3/libexec/nbd_client_manager.py @@ -208,7 +208,7 @@ def connect_nbd(path, exportname): path, nbd_device, "-timeout", - "60", + "90", "-name", exportname, ] From f715b2863c92b9b6ea57b39924071a692e4fa8d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 31 Jan 2025 23:04:15 +0000 Subject: [PATCH 147/492] CA-404946: NBD: use persistent connection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On IO and timeout errors do not remove the device, but try to reconnect. Signed-off-by: Edwin Török --- python3/libexec/nbd_client_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python3/libexec/nbd_client_manager.py b/python3/libexec/nbd_client_manager.py index a7e6a87a53b..99dd85c6cc9 100644 --- a/python3/libexec/nbd_client_manager.py +++ b/python3/libexec/nbd_client_manager.py @@ -209,6 +209,7 @@ def connect_nbd(path, exportname): nbd_device, "-timeout", "90", + "-persist", "-name", exportname, ] From 997d5dbd776876019016b0f742b6170bdbd59bb4 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 28 Apr 2025 10:24:05 +0100 Subject: [PATCH 148/492] Update datamodel_lifecycle Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_lifecycle.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index b8a5a528a54..9aaa87e5fec 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -78,7 +78,7 @@ let prototyped_of_field = function | "Cluster_host", "live" -> Some "24.3.0" | "Cluster", "expected_hosts" -> - Some "25.16.0-next" + Some "25.17.0" | "Cluster", "live_hosts" -> Some "24.3.0" | "Cluster", "quorum" -> From 3c6422aee0010c5c8ba6a7e84cb988afb7b42035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 28 Apr 2025 14:03:16 +0100 Subject: [PATCH 149/492] [maintenance]: reformat dune files in sdk-gen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` for i in **/dune; do dune format-dune-file $i >x && mv x $i; done ``` Signed-off-by: Edwin Török --- ocaml/sdk-gen/c/autogen/dune | 32 ++++------ ocaml/sdk-gen/c/dune | 46 ++++++-------- ocaml/sdk-gen/common/dune | 18 ++---- ocaml/sdk-gen/csharp/autogen/dune | 13 ++-- ocaml/sdk-gen/csharp/dune | 83 ++++++++++-------------- ocaml/sdk-gen/dune | 28 ++++---- ocaml/sdk-gen/go/autogen/dune | 32 ++++------ ocaml/sdk-gen/go/dune | 92 ++++++++++++--------------- ocaml/sdk-gen/java/autogen/dune | 9 +-- ocaml/sdk-gen/java/dune | 64 +++++++++---------- ocaml/sdk-gen/powershell/autogen/dune | 30 ++++----- ocaml/sdk-gen/powershell/dune | 55 +++++++--------- 12 files changed, 211 insertions(+), 291 deletions(-) diff --git a/ocaml/sdk-gen/c/autogen/dune b/ocaml/sdk-gen/c/autogen/dune index 78b81f38e4c..ff89723f136 100644 --- a/ocaml/sdk-gen/c/autogen/dune +++ b/ocaml/sdk-gen/c/autogen/dune @@ -1,26 +1,20 @@ (rule - (targets COPYING) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets COPYING) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (rule - (targets README) - (deps - ../README.dist - ) - (action (copy %{deps} %{targets})) -) + (targets README) + (deps ../README.dist) + (action + (copy %{deps} %{targets}))) (alias - (name generate) - (deps - COPYING - README - (source_tree .) - ) -) + (name generate) + (deps + COPYING + README + (source_tree .))) (data_only_dirs src include) diff --git a/ocaml/sdk-gen/c/dune b/ocaml/sdk-gen/c/dune index adbea6905fa..e2470f9f712 100644 --- a/ocaml/sdk-gen/c/dune +++ b/ocaml/sdk-gen/c/dune @@ -1,34 +1,26 @@ (executable - (modes exe) - (name gen_c_binding) - (libraries - astring - CommonFunctions - - mustache - xapi-datamodel - ) -) + (modes exe) + (name gen_c_binding) + (libraries astring CommonFunctions mustache xapi-datamodel)) (rule - (alias generate) - (package xapi-sdk) - (targets (dir autogen-out)) - (deps - (:x gen_c_binding.exe) - (source_tree templates) - (source_tree autogen) - ) - (action (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - )) -) + (alias generate) + (package xapi-sdk) + (targets + (dir autogen-out)) + (deps + (:x gen_c_binding.exe) + (source_tree templates) + (source_tree autogen)) + (action + (concurrent + (bash "cp -r autogen/ autogen-out/") + (run %{x})))) (data_only_dirs templates) (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as c)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as c))) diff --git a/ocaml/sdk-gen/common/dune b/ocaml/sdk-gen/common/dune index ea0011e71ce..1475ba4da8d 100644 --- a/ocaml/sdk-gen/common/dune +++ b/ocaml/sdk-gen/common/dune @@ -1,14 +1,6 @@ (library - (name CommonFunctions) - (modes best) - (wrapped false) - (libraries - astring - xapi-datamodel - mustache - xapi-stdext-std - xapi-stdext-unix - ) - (modules_without_implementation license) -) - + (name CommonFunctions) + (modes best) + (wrapped false) + (libraries astring xapi-datamodel mustache xapi-stdext-std xapi-stdext-unix) + (modules_without_implementation license)) diff --git a/ocaml/sdk-gen/csharp/autogen/dune b/ocaml/sdk-gen/csharp/autogen/dune index 2a9744e4ae6..bd393d9a6e6 100644 --- a/ocaml/sdk-gen/csharp/autogen/dune +++ b/ocaml/sdk-gen/csharp/autogen/dune @@ -1,11 +1,8 @@ (rule - (alias generate) - (targets LICENSE) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (alias generate) + (targets LICENSE) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (data_only_dirs src) - diff --git a/ocaml/sdk-gen/csharp/dune b/ocaml/sdk-gen/csharp/dune index 07e2fd42950..66999fc95a3 100644 --- a/ocaml/sdk-gen/csharp/dune +++ b/ocaml/sdk-gen/csharp/dune @@ -1,60 +1,41 @@ (executable - (modes exe) - (name gen_csharp_binding) - (modules Gen_csharp_binding) - (libraries - astring - CommonFunctions - - mustache - xapi-consts - xapi-datamodel - ) -) + (modes exe) + (name gen_csharp_binding) + (modules Gen_csharp_binding) + (libraries astring CommonFunctions mustache xapi-consts xapi-datamodel)) (executable - (modes exe) - (name friendly_error_names) - (modules Friendly_error_names) - (libraries - CommonFunctions - - mustache - xapi-datamodel - xmllight2 - str - ) -) + (modes exe) + (name friendly_error_names) + (modules Friendly_error_names) + (libraries CommonFunctions mustache xapi-datamodel xmllight2 str)) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - (:x gen_csharp_binding.exe) - (source_tree templates) - (:sh ../windows-line-endings.sh) - (source_tree autogen) - (:x2 friendly_error_names.exe) - FriendlyErrorNames.resx - (:y XE_SR_ERRORCODES.xml) - (source_tree templates) - ) - (action - (progn - (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - (run %{x2} -s %{y}) - ) - (bash "rm autogen-out/.gitignore") - (bash "%{sh} autogen-out/") - )) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + (:x gen_csharp_binding.exe) + (source_tree templates) + (:sh ../windows-line-endings.sh) + (source_tree autogen) + (:x2 friendly_error_names.exe) + FriendlyErrorNames.resx + (:y XE_SR_ERRORCODES.xml) + (source_tree templates)) + (action + (progn + (concurrent + (bash "cp -r autogen/ autogen-out/") + (run %{x}) + (run %{x2} -s %{y})) + (bash "rm autogen-out/.gitignore") + (bash "%{sh} autogen-out/")))) (data_only_dirs templates) (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as csharp)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as csharp))) diff --git a/ocaml/sdk-gen/dune b/ocaml/sdk-gen/dune index 76bdaaab2ca..6c4a09913d5 100644 --- a/ocaml/sdk-gen/dune +++ b/ocaml/sdk-gen/dune @@ -1,18 +1,16 @@ (data_only_dirs component-test) (alias - (name sdkgen) - (package xapi-sdk) - (deps - c/gen_c_binding.exe - csharp/gen_csharp_binding.exe - java/main.exe - powershell/gen_powershell_binding.exe - go/gen_go_binding.exe - (alias_rec c/generate) - (alias_rec csharp/generate) - (alias_rec java/generate) - (alias_rec powershell/generate) - (alias_rec go/generate) - ) -) + (name sdkgen) + (package xapi-sdk) + (deps + c/gen_c_binding.exe + csharp/gen_csharp_binding.exe + java/main.exe + powershell/gen_powershell_binding.exe + go/gen_go_binding.exe + (alias_rec c/generate) + (alias_rec csharp/generate) + (alias_rec java/generate) + (alias_rec powershell/generate) + (alias_rec go/generate))) diff --git a/ocaml/sdk-gen/go/autogen/dune b/ocaml/sdk-gen/go/autogen/dune index 98bbd45a418..05b35e921a1 100644 --- a/ocaml/sdk-gen/go/autogen/dune +++ b/ocaml/sdk-gen/go/autogen/dune @@ -1,26 +1,20 @@ (rule - (targets LICENSE) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets LICENSE) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (rule - (targets README) - (deps - ../README.md - ) - (action (copy %{deps} %{targets})) -) + (targets README) + (deps ../README.md) + (action + (copy %{deps} %{targets}))) (alias - (name generate) - (deps - LICENSE - README - (source_tree .) - ) -) + (name generate) + (deps + LICENSE + README + (source_tree .))) (data_only_dirs src) diff --git a/ocaml/sdk-gen/go/dune b/ocaml/sdk-gen/go/dune index a126ee856bd..b45a87a34c7 100644 --- a/ocaml/sdk-gen/go/dune +++ b/ocaml/sdk-gen/go/dune @@ -1,60 +1,52 @@ (executable - (modes exe) - (name gen_go_binding) - (modules gen_go_binding) - (libraries - CommonFunctions - mustache - xapi-datamodel - xapi-stdext-unix - gen_go_helper - ) -) + (modes exe) + (name gen_go_binding) + (modules gen_go_binding) + (libraries + CommonFunctions + mustache + xapi-datamodel + xapi-stdext-unix + gen_go_helper)) (library - (name gen_go_helper) - (modules gen_go_helper) - (modes best) - (libraries - CommonFunctions - astring - (re_export mustache) - (re_export xapi-consts) - (re_export xapi-datamodel) - xapi-stdext-std - ) -) + (name gen_go_helper) + (modules gen_go_helper) + (modes best) + (libraries + CommonFunctions + astring + (re_export mustache) + (re_export xapi-consts) + (re_export xapi-datamodel) + xapi-stdext-std)) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - (:x gen_go_binding.exe) - (source_tree templates) - (source_tree autogen) - ) - (action - (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x} --destdir autogen-out) - ) - ) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + (:x gen_go_binding.exe) + (source_tree templates) + (source_tree autogen)) + (action + (concurrent + (bash "cp -r autogen/ autogen-out/") + (run %{x} --destdir autogen-out)))) (test - (name test_gen_go) - (package xapi-sdk) - (modules test_gen_go) - (libraries CommonFunctions alcotest fmt xapi-test-utils gen_go_helper) - (deps - (source_tree test_data) - (source_tree templates) - ) -) + (name test_gen_go) + (package xapi-sdk) + (modules test_gen_go) + (libraries CommonFunctions alcotest fmt xapi-test-utils gen_go_helper) + (deps + (source_tree test_data) + (source_tree templates))) (data_only_dirs test_data templates) + (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as go)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as go))) diff --git a/ocaml/sdk-gen/java/autogen/dune b/ocaml/sdk-gen/java/autogen/dune index da324f0b9d0..e14eba6a578 100644 --- a/ocaml/sdk-gen/java/autogen/dune +++ b/ocaml/sdk-gen/java/autogen/dune @@ -1,9 +1,6 @@ (alias - (name generate) - (deps - (source_tree .) - ) -) + (name generate) + (deps + (source_tree .))) (data_only_dirs xen-api) - diff --git a/ocaml/sdk-gen/java/dune b/ocaml/sdk-gen/java/dune index 07167296b84..36c17dee3df 100644 --- a/ocaml/sdk-gen/java/dune +++ b/ocaml/sdk-gen/java/dune @@ -1,44 +1,38 @@ (executable - (modes exe) - (name main) - (libraries - astring - CommonFunctions - - mustache - str - xapi-datamodel - xapi-stdext-unix - ) -) + (modes exe) + (name main) + (libraries + astring + CommonFunctions + mustache + str + xapi-datamodel + xapi-stdext-unix)) (rule - (targets LICENSE) - (deps - ../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets LICENSE) + (deps ../LICENSE) + (action + (copy %{deps} %{targets}))) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - LICENSE - (:x main.exe) - (source_tree templates) - (source_tree autogen) - ) - (action (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - )) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + LICENSE + (:x main.exe) + (source_tree templates) + (source_tree autogen)) + (action + (concurrent + (bash "cp -r autogen/ autogen-out/") + (run %{x})))) (data_only_dirs templates) (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as java)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as java))) diff --git a/ocaml/sdk-gen/powershell/autogen/dune b/ocaml/sdk-gen/powershell/autogen/dune index c4c2a5f8633..4cfb2b8c487 100644 --- a/ocaml/sdk-gen/powershell/autogen/dune +++ b/ocaml/sdk-gen/powershell/autogen/dune @@ -1,22 +1,20 @@ (rule - (targets LICENSE) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets LICENSE) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (alias - (name generate) - (deps - LICENSE - (source_tree .) - ) -) + (name generate) + (deps + LICENSE + (source_tree .))) (data_only_dirs src) + (install - (package xapi-sdk) - (section doc) - (files (glob_files_rec (autogen/* with_prefix powershell))) -) + (package xapi-sdk) + (section doc) + (files + (glob_files_rec + (autogen/* with_prefix powershell)))) diff --git a/ocaml/sdk-gen/powershell/dune b/ocaml/sdk-gen/powershell/dune index 7eb4d3e56d6..e56d50b4523 100644 --- a/ocaml/sdk-gen/powershell/dune +++ b/ocaml/sdk-gen/powershell/dune @@ -1,38 +1,29 @@ (executable - (modes exe) - (name gen_powershell_binding) - (libraries - astring - CommonFunctions - - mustache - xapi-datamodel - ) -) + (modes exe) + (name gen_powershell_binding) + (libraries astring CommonFunctions mustache xapi-datamodel)) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - (:x gen_powershell_binding.exe) - (source_tree templates) - (:sh ../windows-line-endings.sh) - (source_tree autogen) - ) - (action - (progn - (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - ) - (bash "rm autogen-out/.gitignore") - (bash "%{sh} autogen-out/") - )) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + (:x gen_powershell_binding.exe) + (source_tree templates) + (:sh ../windows-line-endings.sh) + (source_tree autogen)) + (action + (progn + (concurrent + (bash "cp -r autogen/ autogen-out/") + (run %{x})) + (bash "rm autogen-out/.gitignore") + (bash "%{sh} autogen-out/")))) (data_only_dirs templates) + (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as powershell)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as powershell))) From 2f2638175e8a1b1d8755e657f16c826c3c1191ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 28 Apr 2025 14:04:44 +0100 Subject: [PATCH 150/492] build: avoid race condition on install MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had various builds fail with: ``` cp: cannot create directory 'autogen-out/include': File exists ``` This is likely a race condition between `cp -r` and the other command both trying to create that directory (and `cp -r` not handling the race condition correctly due to a TOCTOU race). Work this around by running the 2 command in sequence: then even if they both try to create the 'include' directory there won't be any race. Signed-off-by: Edwin Török --- ocaml/sdk-gen/c/dune | 2 +- ocaml/sdk-gen/csharp/dune | 2 +- ocaml/sdk-gen/go/dune | 2 +- ocaml/sdk-gen/java/dune | 2 +- ocaml/sdk-gen/powershell/dune | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/sdk-gen/c/dune b/ocaml/sdk-gen/c/dune index e2470f9f712..ef7e42abbd5 100644 --- a/ocaml/sdk-gen/c/dune +++ b/ocaml/sdk-gen/c/dune @@ -13,7 +13,7 @@ (source_tree templates) (source_tree autogen)) (action - (concurrent + (progn (bash "cp -r autogen/ autogen-out/") (run %{x})))) diff --git a/ocaml/sdk-gen/csharp/dune b/ocaml/sdk-gen/csharp/dune index 66999fc95a3..25f35763c4b 100644 --- a/ocaml/sdk-gen/csharp/dune +++ b/ocaml/sdk-gen/csharp/dune @@ -25,7 +25,7 @@ (source_tree templates)) (action (progn - (concurrent + (progn (bash "cp -r autogen/ autogen-out/") (run %{x}) (run %{x2} -s %{y})) diff --git a/ocaml/sdk-gen/go/dune b/ocaml/sdk-gen/go/dune index b45a87a34c7..64717b85c6d 100644 --- a/ocaml/sdk-gen/go/dune +++ b/ocaml/sdk-gen/go/dune @@ -30,7 +30,7 @@ (source_tree templates) (source_tree autogen)) (action - (concurrent + (progn (bash "cp -r autogen/ autogen-out/") (run %{x} --destdir autogen-out)))) diff --git a/ocaml/sdk-gen/java/dune b/ocaml/sdk-gen/java/dune index 36c17dee3df..31fd56640a6 100644 --- a/ocaml/sdk-gen/java/dune +++ b/ocaml/sdk-gen/java/dune @@ -25,7 +25,7 @@ (source_tree templates) (source_tree autogen)) (action - (concurrent + (progn (bash "cp -r autogen/ autogen-out/") (run %{x})))) diff --git a/ocaml/sdk-gen/powershell/dune b/ocaml/sdk-gen/powershell/dune index e56d50b4523..6fdee3e0fcf 100644 --- a/ocaml/sdk-gen/powershell/dune +++ b/ocaml/sdk-gen/powershell/dune @@ -14,7 +14,7 @@ (source_tree autogen)) (action (progn - (concurrent + (progn (bash "cp -r autogen/ autogen-out/") (run %{x})) (bash "rm autogen-out/.gitignore") From f5591810f41944b42fb8b47d43857e16074aca77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 13 Mar 2025 09:25:56 +0000 Subject: [PATCH 151/492] [maintenance]: drop sexprpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unused. Avoids having to maintain the function when the code around it changes. Signed-off-by: Edwin Török --- ocaml/libs/sexpr/dune | 24 ++++++------------------ ocaml/libs/sexpr/sExpr.ml | 15 --------------- ocaml/libs/sexpr/sExpr.mli | 2 -- ocaml/libs/sexpr/sexprpp.ml | 30 ------------------------------ quality-gate.sh | 2 +- 5 files changed, 7 insertions(+), 66 deletions(-) delete mode 100644 ocaml/libs/sexpr/sexprpp.ml diff --git a/ocaml/libs/sexpr/dune b/ocaml/libs/sexpr/dune index 77653c2abcc..6490da85beb 100644 --- a/ocaml/libs/sexpr/dune +++ b/ocaml/libs/sexpr/dune @@ -1,22 +1,10 @@ -(menhir (modules sExprParser)) +(menhir + (modules sExprParser)) (ocamllex sExprLexer) (library - (name sexpr) - (public_name sexpr) - (wrapped false) - (modules (:standard \ sexprpp)) - (libraries - astring - ) -) - -(executable - (modes exe) - (name sexprpp) - (modules sexprpp) - (libraries - sexpr - ) -) + (name sexpr) + (public_name sexpr) + (wrapped false) + (libraries astring)) diff --git a/ocaml/libs/sexpr/sExpr.ml b/ocaml/libs/sexpr/sExpr.ml index 488142898c2..4529b2a54b4 100644 --- a/ocaml/libs/sexpr/sExpr.ml +++ b/ocaml/libs/sexpr/sExpr.ml @@ -86,18 +86,3 @@ let string_of sexpr = Buffer.add_string buf "\'" in __string_of_rec sexpr ; Buffer.contents buf - -let rec output_fmt ff = function - | Node list -> - let rec aux ?(first = true) = function - | [] -> - () - | h :: t when first -> - output_fmt ff h ; aux ~first:false t - | h :: t -> - Format.fprintf ff "@;<1 2>%a" output_fmt h ; - aux ~first t - in - Format.fprintf ff "@[(" ; aux list ; Format.fprintf ff ")@]" - | Symbol s | String s -> - Format.fprintf ff "\"%s\"" (escape s) diff --git a/ocaml/libs/sexpr/sExpr.mli b/ocaml/libs/sexpr/sExpr.mli index e7ab5c68a1a..7bf1c61812b 100644 --- a/ocaml/libs/sexpr/sExpr.mli +++ b/ocaml/libs/sexpr/sExpr.mli @@ -16,5 +16,3 @@ type t = Node of t list | Symbol of string | String of string val mkstring : string -> t val string_of : t -> string - -val output_fmt : Format.formatter -> t -> unit diff --git a/ocaml/libs/sexpr/sexprpp.ml b/ocaml/libs/sexpr/sexprpp.ml deleted file mode 100644 index 109ee577169..00000000000 --- a/ocaml/libs/sexpr/sexprpp.ml +++ /dev/null @@ -1,30 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -let lexer = Lexing.from_channel stdin - -let _ = - match Sys.argv with - | [|_; "-nofmt"|] -> - let start_time = Sys.time () in - let sexpr = SExprParser.expr SExprLexer.token lexer in - let parse_time = Sys.time () in - let s = SExpr.string_of sexpr in - let print_time = Sys.time () in - Printf.fprintf stderr "Parse time: %f\nPrint time: %f\n%!" - (parse_time -. start_time) (print_time -. parse_time) ; - print_endline s - | _ -> - let sexpr = SExprParser.expr SExprLexer.token lexer in - let ff = Format.formatter_of_out_channel stdout in - SExpr.output_fmt ff sexpr ; Format.fprintf ff "@." diff --git a/quality-gate.sh b/quality-gate.sh index 605d5142a38..1128e8de98e 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=497 + N=496 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From 5d47e32da8b0074256deae1a6598a446ee62ab66 Mon Sep 17 00:00:00 2001 From: Lucas Pottier <78223918+LuKP17@users.noreply.github.com> Date: Wed, 30 Apr 2025 15:32:55 +0200 Subject: [PATCH 152/492] XAPI website link updated in README XAPI website deadlink updated Signed-off-by: Lucas Pottier <78223918+LuKP17@users.noreply.github.com> --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 1b9243c6ded..b41ab950d87 100644 --- a/README.markdown +++ b/README.markdown @@ -11,7 +11,7 @@ Xen API is written mostly in [OCaml](http://caml.inria.fr/ocaml/) 4.07. Xapi is the main component produced by the Linux Foundation's -[Xapi Project](http://xenproject.org/developers/teams/xapi.html). +[Xapi Project](https://xenproject.org/projects/xapi/). Build and Install ----------------- From 8c447c0c26d309c24c826ef8c087f5ec4530184a Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 1 May 2025 14:19:00 +0100 Subject: [PATCH 153/492] xapi-log/test: Package the cram test in xapi-log This should fix the current xs-opam failure with xapi master. Fixes: bfea6f33240a ("CA-409628: Add backtrace logging test") Signed-off-by: Andrii Sultanov --- ocaml/libs/log/test/dune | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/libs/log/test/dune b/ocaml/libs/log/test/dune index ddfbf07bcc9..299a6155eac 100644 --- a/ocaml/libs/log/test/dune +++ b/ocaml/libs/log/test/dune @@ -3,4 +3,5 @@ (libraries log xapi-stdext-threads threads.posix xapi-backtrace)) (cram + (package xapi-log) (deps log_test.exe)) From 862732ddc0c7f46c980c9a86b3b0e618a9026624 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Fri, 25 Apr 2025 07:03:39 +0100 Subject: [PATCH 154/492] CA-409710: Modify the default backup parameters The current default count of metadata backup files is 25. This number is too many. For the customer in XSI-1873, the backup disk is 500 MB, and each backup file is about 35 MB. So it can't keep 25 backup files. A quick solution is to modify the backup parameters, including: 1. Reduce the default count of metadata backup files to 12. 2. Increase the backup VDI size to 1 GiB so it can keep more backup files. Signed-off-by: Bengang Yuan --- scripts/xe-backup-metadata | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/xe-backup-metadata b/scripts/xe-backup-metadata index 19f0cf0e4a9..88980776b9b 100755 --- a/scripts/xe-backup-metadata +++ b/scripts/xe-backup-metadata @@ -24,7 +24,7 @@ if [ "${master_uuid}" != "${INSTALLATION_UUID}" ]; then exit 1 fi -history_kept=25 +history_kept=12 metadata_version=1 debug=/bin/true @@ -129,7 +129,7 @@ if [ -z "${vdi_uuid}" ]; then echo -n "Creating new backup VDI: " label="Pool Metadata Backup" # the label must match what xapi_vdi.ml is using for backup VDIs - vdi_uuid=$(${XE} vdi-create virtual-size=500MiB sr-uuid="${sr_uuid}" type=user name-label="${label}") + vdi_uuid=$(${XE} vdi-create virtual-size=1GiB sr-uuid="${sr_uuid}" type=user name-label="${label}") init_fs=1 if [ $? -ne 0 ]; then echo failed From 091160444b6941e95718dbdf8b8045e547932781 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 24 Apr 2025 12:46:37 +0100 Subject: [PATCH 155/492] xenopsd: Don't balloon down memory on same-host migration When the VM (and its memory) isn't actually going to be moved anywhere (like in VDI migration to another SR), there's no point in ballooning down, it's actually likely to make VDI migration take longer if swap is engaged. Instead change the ballooning target to memory_actual and wait for any ballooning to be stopped. If no ballooning could have been happening in the first place (dynamic_min = dynamic_max = static_max), then don't do any ballooning manipulations at all. Signed-off-by: Andrii Sultanov --- ocaml/xapi-idl/xen/xenops_interface.ml | 6 ++++ ocaml/xapi/xapi_vm_migrate.ml | 14 +++++---- ocaml/xenopsd/lib/xenops_server.ml | 41 +++++++++++++++++--------- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 68ef01b29c9..4c9da479a78 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -718,6 +718,11 @@ module XenopsAPI (R : RPC) = struct ~description:["when true, verify remote server certificate"] Types.bool in + let localhost_migration = + Param.mk ~name:"localhost_migration" + ~description:["when true, localhost migration is being performed"] + Types.bool + in declare "VM.migrate" [] (debug_info_p @-> vm_id_p @@ -727,6 +732,7 @@ module XenopsAPI (R : RPC) = struct @-> xenops_url @-> compress @-> verify_dest + @-> localhost_migration @-> returning task_id_p err ) diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index 60c344d4c65..d767bc5629f 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -244,7 +244,7 @@ let assert_licensed_storage_motion ~__context = let rec migrate_with_retries ~__context ~queue_name ~max ~try_no ~dbg:_ ~vm_uuid ~xenops_vdi_map ~xenops_vif_map ~xenops_vgpu_map ~xenops_url ~compress - ~verify_cert = + ~verify_cert ~localhost_migration = let open Xapi_xenops_queue in let module Client = (val make_client queue_name : XENOPS) in let dbg = Context.string_of_task_and_tracing __context in @@ -254,7 +254,7 @@ let rec migrate_with_retries ~__context ~queue_name ~max ~try_no ~dbg:_ ~vm_uuid progress := "Client.VM.migrate" ; let t1 = Client.VM.migrate dbg vm_uuid xenops_vdi_map xenops_vif_map - xenops_vgpu_map xenops_url compress verify_dest + xenops_vgpu_map xenops_url compress verify_dest localhost_migration in progress := "sync_with_task" ; ignore (Xapi_xenops.sync_with_task __context queue_name t1) @@ -281,7 +281,7 @@ let rec migrate_with_retries ~__context ~queue_name ~max ~try_no ~dbg:_ ~vm_uuid (Printexc.to_string e) !progress try_no max ; migrate_with_retries ~__context ~queue_name ~max ~try_no:(try_no + 1) ~dbg ~vm_uuid ~xenops_vdi_map ~xenops_vif_map ~xenops_vgpu_map - ~xenops_url ~compress ~verify_cert + ~xenops_url ~compress ~verify_cert ~localhost_migration (* Something else went wrong *) | e -> debug @@ -374,7 +374,8 @@ let pool_migrate ~__context ~vm ~host ~options = Pool_features.assert_enabled ~__context ~f:Features.Xen_motion ; let dbg = Context.string_of_task __context in let localhost = Helpers.get_localhost ~__context in - if host = localhost then + let localhost_migration = host = localhost in + if localhost_migration then info "This is a localhost migration" ; let open Xapi_xenops_queue in let queue_name = queue_of_vm ~__context ~self:vm in @@ -431,7 +432,7 @@ let pool_migrate ~__context ~vm ~host ~options = let verify_cert = Stunnel_client.pool () in migrate_with_retry ~__context ~queue_name ~dbg ~vm_uuid ~xenops_vdi_map:[] ~xenops_vif_map:[] ~xenops_vgpu_map - ~xenops_url ~compress ~verify_cert ; + ~xenops_url ~compress ~verify_cert ~localhost_migration ; (* Delete all record of this VM locally (including caches) *) Xapi_xenops.Xenopsd_metadata.delete ~__context vm_uuid ) @@ -1586,7 +1587,8 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map let dbg = Context.string_of_task __context in migrate_with_retry ~__context ~queue_name ~dbg ~vm_uuid ~xenops_vdi_map ~xenops_vif_map ~xenops_vgpu_map - ~xenops_url:remote.xenops_url ~compress ~verify_cert ; + ~xenops_url:remote.xenops_url ~compress ~verify_cert + ~localhost_migration:is_same_host ; Xapi_xenops.Xenopsd_metadata.delete ~__context vm_uuid ) with diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 7d3a145acdb..5d2dcb9390b 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -297,6 +297,7 @@ type vm_migrate_op = { ; vmm_tmp_dest_id: Vm.id ; vmm_compress: bool ; vmm_verify_dest: bool + ; vmm_localhost_migration: bool } [@@deriving rpcty] @@ -2628,19 +2629,30 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ~path:(Uri.path_unencoded url ^ snippet ^ id_str) ~query:(Uri.query url) () in - (* CA-78365: set the memory dynamic range to a single value to stop - ballooning. *) - let atomic = - VM_set_memory_dynamic_range - (id, vm.Vm.memory_dynamic_min, vm.Vm.memory_dynamic_min) - in - let (_ : unit) = - perform_atomic ~progress_callback:(fun _ -> ()) atomic t - in - (* Waiting here is not essential but adds a degree of safety and - reducess unnecessary memory copying. *) - ( try B.VM.wait_ballooning t vm - with Xenopsd_error Ballooning_timeout_before_migration -> () + (* CA-78365: set the memory dynamic range to a single value + to stop ballooning, if ballooning is enabled at all *) + ( if vm.memory_dynamic_min <> vm.memory_dynamic_max then + (* There's no need to balloon down when doing localhost migration - + we're not copying any memory in the first place. This would + likely increase VDI migration time as swap would be engaged. + Instead change the ballooning target to the current state *) + let new_balloon_target = + if vmm.vmm_localhost_migration then + (B.VM.get_state vm).memory_actual + else + vm.memory_dynamic_min + in + let atomic = + VM_set_memory_dynamic_range + (id, new_balloon_target, new_balloon_target) + in + let (_ : unit) = + perform_atomic ~progress_callback:(fun _ -> ()) atomic t + in + (* Waiting here is not essential but adds a degree of safety and + reducess unnecessary memory copying. *) + try B.VM.wait_ballooning t vm + with Xenopsd_error Ballooning_timeout_before_migration -> () ) ; (* Find out the VM's current memory_limit: this will be used to allocate memory on the receiver *) @@ -3597,7 +3609,7 @@ module VM = struct let s3resume _ dbg id = queue_operation dbg id (Atomic (VM_s3resume id)) let migrate _context dbg id vmm_vdi_map vmm_vif_map vmm_vgpu_pci_map vmm_url - (compress : bool) (verify_dest : bool) = + (compress : bool) (localhost_migration : bool) (verify_dest : bool) = let tmp_uuid_of uuid ~kind = Printf.sprintf "%s00000000000%c" (String.sub uuid 0 24) (match kind with `dest -> '1' | `src -> '0') @@ -3614,6 +3626,7 @@ module VM = struct ; vmm_tmp_dest_id= tmp_uuid_of id ~kind:`dest ; vmm_compress= compress ; vmm_verify_dest= verify_dest + ; vmm_localhost_migration= localhost_migration } ) From 6f9a8db46584529fa067f90744e43609fde47e5e Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Wed, 30 Apr 2025 14:41:35 +0100 Subject: [PATCH 156/492] CA-410001: Check rrdi.rrd to avoid ds duplicate CA-391651 replaced the function `rrd_add_ds` with an unsafe function `rrd_add_ds_unsafe` in `rrdd_monitor.ml`. Although it has checked if the new ds exists in `rrdi.dss`, if a ds exists in `rrdi.rrd` but not in `rrdi.dss`, it leads the ds duplicates twice in `rrdi.rrd` (E.g. when rrdd plugin starts, it loads local rrdd backup file into `rrdi.rrd` but leaves `rrdi.dss` empty). Solution: Filter out `new_enabled_dss` based on `rrdi.rrd` instead of `rrdi.dss`. Signed-off-by: Bengang Yuan --- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 6fa7d58aefe..5872fb5b6c1 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -51,12 +51,19 @@ let merge_new_dss rrdi dss = !Rrdd_shared.enable_all_dss || ds.ds_default in let default_dss = StringMap.filter should_enable_ds dss in + let ds_names = + Array.fold_left + (fun (acc : StringSet.t) (e : Rrd.ds) : StringSet.t -> + StringSet.add e.ds_name acc + ) + StringSet.empty rrdi.rrd.rrd_dss + in (* NOTE: Only add enabled dss to the live rrd, ignoring non-default ones. This is because non-default ones are added to the RRD when they are enabled. *) let new_enabled_dss = StringMap.filter - (fun ds_name _ -> not (StringMap.mem ds_name rrdi.dss)) + (fun ds_name _ -> not (StringSet.mem ds_name ds_names)) default_dss in (* fold on Map is not tail-recursive, but the depth of the stack should be From d875cfa450adffd4368228f30cfed8c56982427b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 6 May 2025 14:20:41 +0100 Subject: [PATCH 157/492] xapi_xenops: Avoid a race during suspend As described in [#6451](https://github.com/xapi-project/xen-api/issues/6451), a xapi event could prevent update_vm from pulling the latest Xenopsd metadata, overwriting it with stale information. In case of suspend, this would make the snapshot unresumable, raising an assert in xenopsd due to incongruities in memory values. Instead pull the xenopsd metadata right before updating DB.power_state in Xapi_vm_lifecycle.force_state_reset_keep_current_operations, eliminating the window for the race. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_xenops.ml | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 1a7350c2e9d..ce98dcd3a9d 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2049,18 +2049,10 @@ let update_vm ~__context id = ) ; debug "xenopsd event: Updating VM %s power_state <- %s" id (Record_util.vm_power_state_to_string power_state) ; - (* This will mark VBDs, VIFs as detached and clear resident_on - if the VM has permanently shutdown. current-operations - should not be reset as there maybe a checkpoint is ongoing*) - Xapi_vm_lifecycle.force_state_reset_keep_current_operations - ~__context ~self ~value:power_state ; - if power_state = `Running then create_guest_metrics_if_needed () ; - if power_state = `Suspended || power_state = `Halted then ( - Xapi_network.detach_for_vm ~__context ~host:localhost ~vm:self ; - Storage_access.reset ~__context ~vm:self - ) ; - if power_state = `Halted then - Xenopsd_metadata.delete ~__context id ; + + (* NOTE: Pull xenopsd metadata as soon as possible so that + nothing comes inbetween the power state change and the + Xenopsd_metadata.pull and overwrites it. *) ( if power_state = `Suspended then let md = Xenopsd_metadata.pull ~__context id in match md.Metadata.domains with @@ -2071,8 +2063,22 @@ let update_vm ~__context id = debug "VM %s last_booted_record set to %s" (Ref.string_of self) x ) ; - if power_state = `Halted then + + (* This will mark VBDs, VIFs as detached and clear resident_on + if the VM has permanently shutdown. current-operations + should not be reset as there maybe a checkpoint is ongoing*) + Xapi_vm_lifecycle.force_state_reset_keep_current_operations + ~__context ~self ~value:power_state ; + if power_state = `Running then + create_guest_metrics_if_needed () ; + if power_state = `Suspended || power_state = `Halted then ( + Xapi_network.detach_for_vm ~__context ~host:localhost ~vm:self ; + Storage_access.reset ~__context ~vm:self + ) ; + if power_state = `Halted then ( + Xenopsd_metadata.delete ~__context id ; !trigger_xenapi_reregister () + ) with e -> error "Caught %s: while updating VM %s power_state" (Printexc.to_string e) id From 5b06182ea887a4d2506db1be499b6fe8219e75cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 28 Apr 2025 17:22:36 +0100 Subject: [PATCH 158/492] CP-54828: import latest benchmark CLI from ocaml-rpc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Better progress indication, CLI tunables, and the ability to dump raw data to a directory, that can be used by `ministat`. Example usage: ``` mkdir /tmp/pool dune exec ./bench_pool_field.exe --profile=release -- -d /tmp/pool --quota 20 ... dune exec ./bench_pool_field.exe --profile=release -- -d /tmp/pool --quota 20 for i in 'Db.Pool.get_all_records' 'Rpc.t -> pool_t' 'pool_t -> Rpc.t'; do ~/git/ministat/ministat -s "/tmp/pool/${OLD}/${i}.dat" "/tmp/pool/${NEW}/$i.dat" -c 99.5; done ``` Signed-off-by: Edwin Török --- ocaml/tests/bench/bechamel_simple_cli.ml | 168 +++++++++++++++--- ocaml/tests/bench/bench_cached_reads.ml | 3 +- ocaml/tests/bench/bench_throttle2.ml | 31 ++-- ocaml/tests/bench/bench_tracing.ml | 25 ++- ocaml/tests/bench/bench_uuid.ml | 9 +- .../bench/bench_vdi_allowed_operations.ml | 9 +- ocaml/tests/bench/dune | 2 + 7 files changed, 185 insertions(+), 62 deletions(-) diff --git a/ocaml/tests/bench/bechamel_simple_cli.ml b/ocaml/tests/bench/bechamel_simple_cli.ml index e40399cf04d..bcbd574f7f0 100644 --- a/ocaml/tests/bench/bechamel_simple_cli.ml +++ b/ocaml/tests/bench/bechamel_simple_cli.ml @@ -1,3 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + open Bechamel open Toolkit @@ -83,11 +97,19 @@ let thread_workload ~before ~run ~after = a few times. Bechamel has both an iteration count and time limit, so this won't be a problem for slower benchmarks. *) -let limit = 10_000_000 +let default_limit = 10_000_000 -let benchmark ~instances tests = - let cfg = Benchmark.cfg ~limit ~quota:(Time.second 10.0) () in - Benchmark.all cfg instances tests +let benchmark ~instances cfg tests = + let n = List.length tests in + tests + |> List.to_seq + |> Seq.mapi (fun i test -> + let name = Test.Elt.name test in + Format.eprintf "Running benchmark %u/%u %s ...@?" (i + 1) n name ; + let results = Benchmark.run cfg instances test in + Format.eprintf "@." ; (name, results) + ) + |> Hashtbl.of_seq let analyze ~instances raw_results = let ols ~bootstrap = @@ -108,14 +130,13 @@ open Notty_unix let img (window, results) = Bechamel_notty.Multiple.image_of_ols_results ~rect:window ~predictor:Measure.run results - |> eol let not_workload measure = not (Measure.label measure = skip_label) -let run_and_print instances tests = - let results, _ = +let run_and_print cfg instances tests = + let results, raw_results = tests - |> benchmark ~instances + |> benchmark ~instances cfg |> analyze ~instances:(List.filter not_workload instances) in let window = @@ -127,27 +148,132 @@ let run_and_print instances tests = in img (window, results) |> eol |> output_image ; results - |> Hashtbl.iter @@ fun label results -> - if label = Measure.label Instance.monotonic_clock then - let units = Bechamel_notty.Unit.unit_of_label label in - results - |> Hashtbl.iter @@ fun name ols -> - Format.printf "%s (%s):@, %a@." name units Analyze.OLS.pp ols + |> Hashtbl.iter (fun label results -> + if label = Measure.label Instance.monotonic_clock then + let units = Bechamel_notty.Unit.unit_of_label label in + results + |> Hashtbl.iter @@ fun name ols -> + Format.printf "%s (%s):@, %a@." name units Analyze.OLS.pp ols + ) ; + (results, raw_results) -let cli ?(always = []) ?(workloads = []) tests = +let cli ~always ~workloads cfg tests store = let instances = always @ Instance.[monotonic_clock; minor_allocated; major_allocated] @ always in List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) instances ; - Format.printf "@,Running benchmarks (no workloads)@." ; - run_and_print instances tests ; - + Format.eprintf "@,Running benchmarks (no workloads)@." ; + let _, raw_results = run_and_print cfg instances tests in if workloads <> [] then ( - Format.printf "@,Running benchmarks (workloads)@." ; + Format.eprintf "@,Running benchmarks (workloads)@." ; List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) workloads ; (* workloads come first, so that we unpause them in time *) let instances = workloads @ instances @ workloads in - run_and_print instances tests - ) + let _, _ = run_and_print cfg instances tests in + () + ) ; + store + |> Option.iter @@ fun dir -> + let epoch = Unix.gettimeofday () in + raw_results + |> Hashtbl.iter @@ fun label results -> + let label = String.map (function '/' -> '_' | c -> c) label in + let dir = Filename.concat dir (Float.to_string epoch) in + let () = + try Unix.mkdir dir 0o700 + with Unix.Unix_error (Unix.EEXIST, _, _) -> () + in + + let file = Filename.concat dir (label ^ ".dat") in + Out_channel.with_open_text file @@ fun out -> + let label = Measure.label Instance.monotonic_clock in + results.Benchmark.lr + |> Array.iter @@ fun measurement -> + let repeat = Measurement_raw.run measurement in + let avg = Measurement_raw.get ~label measurement /. repeat in + (* ministat wants to compare individual measurements, but all we have is a sum. *) + Printf.fprintf out "%.16g\n" avg + +open Cmdliner + +let cli ?(always = []) ?(workloads = []) tests = + let tests = List.concat_map Test.elements tests in + let cmd = + let test_names = tests |> List.map (fun t -> (Test.Elt.name t, t)) in + let filtered = + let doc = + Printf.sprintf "Choose the benchmarks to run. $(docv) must be %s" + Arg.(doc_alts_enum test_names) + in + Arg.( + value + & pos_all (enum test_names) tests + & info [] ~absent:"all" ~doc ~docv:"BENCHMARK" + ) + and cfg = + let open Term.Syntax in + let+ limit = + Arg.( + value + & opt int default_limit + & info ["limit"] ~doc:"Maximum number of samples" ~docv:"SAMPLES" + ) + and+ quota = + Arg.( + value + & opt float 10.0 (* 1s is too short to reach high batch sizes *) + & info ["quota"] ~doc:"Maximum time per benchmark" ~docv:"SECONDS" + ) + and+ kde = + Arg.( + value + & opt (some int) None + & info ["kde"] ~doc:"Additional samples for Kernel Density Estimation" + ~docv:"SAMPLES" + ) + and+ stabilize = + Arg.( + value + & opt bool false + & info ["stabilize"] ~doc:"Stabilize the GC between measurements" + (* this actually makes measurements more noisy, not less + (although there'll be the ocasional outlier). + When stabilization is disabled we can instead get more measurements within the same amount of time, + which ultimately increases accuracy. + core_bench also has this disabled by default + *) + ) + and+ compaction = + Arg.( + value + & opt bool false + (* avoid large differences between runs (since we no longer stabilize the GC) *) + & info ["compaction"] ~doc:"Enable GC compaction" + ) + and+ start = + Arg.( + value + & opt int 5 (* small batches can have higher overhead: skip them *) + & info ["start"] ~doc:"Starting iteration count" ~docv:"COUNT" + ) + in + Benchmark.cfg ~limit + ~quota:Time.(second quota) + ~kde ~stabilize ~compaction ~start () + and store = + Arg.( + value + & opt (some dir) None + & info ["output-dir"; "d"] + ~doc: + "directory to save the raw results to. The output can be used by \ + ministat" + ~docv:"DIRECTORY" + ) + in + let info = Cmd.info "benchmark" ~doc:"Run benchmarks" in + Cmd.v info Term.(const (cli ~always ~workloads) $ cfg $ filtered $ store) + in + exit (Cmd.eval cmd) diff --git a/ocaml/tests/bench/bench_cached_reads.ml b/ocaml/tests/bench/bench_cached_reads.ml index e81a8991cb4..bcba2ed6cf3 100644 --- a/ocaml/tests/bench/bench_cached_reads.ml +++ b/ocaml/tests/bench/bench_cached_reads.ml @@ -8,7 +8,6 @@ let mutex_workload = Bechamel_simple_cli.thread_workload ~before:ignore ~after:ignore ~run let benchmarks = - Test.make_grouped ~name:"Cached reads" - [Test.make ~name:"Pool_role.is_master" (Staged.stage Pool_role.is_master)] + [Test.make ~name:"Pool_role.is_master" (Staged.stage Pool_role.is_master)] let () = Bechamel_simple_cli.cli ~workloads:[mutex_workload] benchmarks diff --git a/ocaml/tests/bench/bench_throttle2.ml b/ocaml/tests/bench/bench_throttle2.ml index 50582eff4cc..b4f61173420 100644 --- a/ocaml/tests/bench/bench_throttle2.ml +++ b/ocaml/tests/bench/bench_throttle2.ml @@ -66,21 +66,20 @@ let run_tasks'' n (__context, tasks) = Thread.join t let benchmarks = - Test.make_grouped ~name:"Task latency" - [ - Test.make_indexed_with_resource ~name:"task complete+wait latency" - ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks - ~free:free_tasks (fun n -> Staged.stage (run_tasks n) - ) - ; Test.make_indexed_with_resource ~name:"task complete+wait all latency" - ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks - ~free:free_tasks (fun n -> Staged.stage (run_tasks' n) - ) - ; Test.make_indexed_with_resource - ~name:"task complete+wait all latency (thread)" ~args:[1; 10; 100] - Test.multiple ~allocate:allocate_tasks ~free:free_tasks (fun n -> - Staged.stage (run_tasks'' n) - ) - ] + [ + Test.make_indexed_with_resource ~name:"task complete+wait latency" + ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks ~free:free_tasks + (fun n -> Staged.stage (run_tasks n) + ) + ; Test.make_indexed_with_resource ~name:"task complete+wait all latency" + ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks ~free:free_tasks + (fun n -> Staged.stage (run_tasks' n) + ) + ; Test.make_indexed_with_resource + ~name:"task complete+wait all latency (thread)" ~args:[1; 10; 100] + Test.multiple ~allocate:allocate_tasks ~free:free_tasks (fun n -> + Staged.stage (run_tasks'' n) + ) + ] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/bench_tracing.ml b/ocaml/tests/bench/bench_tracing.ml index eebe6e6aef2..ff8d872ee64 100644 --- a/ocaml/tests/bench/bench_tracing.ml +++ b/ocaml/tests/bench/bench_tracing.ml @@ -64,24 +64,23 @@ let test_tracing_on ?(overflow = false) ~name f = allocate () and free t = if overflow then ( - Tracing.Spans.set_max_spans Bechamel_simple_cli.limit ; - Tracing.Spans.set_max_traces Bechamel_simple_cli.limit + Tracing.Spans.set_max_spans Bechamel_simple_cli.default_limit ; + Tracing.Spans.set_max_traces Bechamel_simple_cli.default_limit ) ; free t in Test.make_with_resource ~name ~allocate ~free Test.uniq f let benchmarks = - Tracing.Spans.set_max_spans Bechamel_simple_cli.limit ; - Tracing.Spans.set_max_traces Bechamel_simple_cli.limit ; - Test.make_grouped ~name:"tracing" - [ - Test.make ~name:"overhead(off)" (Staged.stage trace_test_off) - ; test_tracing_on ~name:"overhead(on, no span)" (Staged.stage trace_test_off) - ; test_tracing_on ~name:"overhead(on, create span)" - (Staged.stage trace_test_span) - ; test_tracing_on ~overflow:true ~name:"max span overflow" - (Staged.stage trace_test_span) - ] + Tracing.Spans.set_max_spans Bechamel_simple_cli.default_limit ; + Tracing.Spans.set_max_traces Bechamel_simple_cli.default_limit ; + [ + Test.make ~name:"overhead(off)" (Staged.stage trace_test_off) + ; test_tracing_on ~name:"overhead(on, no span)" (Staged.stage trace_test_off) + ; test_tracing_on ~name:"overhead(on, create span)" + (Staged.stage trace_test_span) + ; test_tracing_on ~overflow:true ~name:"max span overflow" + (Staged.stage trace_test_span) + ] let () = Bechamel_simple_cli.cli ~always:[export_thread] ~workloads benchmarks diff --git a/ocaml/tests/bench/bench_uuid.ml b/ocaml/tests/bench/bench_uuid.ml index f13118e48db..53e817211a6 100644 --- a/ocaml/tests/bench/bench_uuid.ml +++ b/ocaml/tests/bench/bench_uuid.ml @@ -1,10 +1,9 @@ open Bechamel let benchmarks = - Test.make_grouped ~name:"uuidx creation" - [ - Test.make ~name:"Uuidx.make_uuid_urnd" (Staged.stage Uuidx.make_uuid_urnd) - ; Test.make ~name:"Uuidx.make" (Staged.stage Uuidx.make) - ] + [ + Test.make ~name:"Uuidx.make_uuid_urnd" (Staged.stage Uuidx.make_uuid_urnd) + ; Test.make ~name:"Uuidx.make" (Staged.stage Uuidx.make) + ] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/bench_vdi_allowed_operations.ml b/ocaml/tests/bench/bench_vdi_allowed_operations.ml index 9400490fde5..5b13084370a 100644 --- a/ocaml/tests/bench/bench_vdi_allowed_operations.ml +++ b/ocaml/tests/bench/bench_vdi_allowed_operations.ml @@ -50,10 +50,9 @@ let test_vdi_update_allowed_operations (__context, vm_disks) = Xapi_vdi.update_allowed_operations ~__context ~self:vdi_ref let benchmarks = - Test.make_grouped ~name:"update_allowed_operations" - [ - Test.make_with_resource ~name:"VDI" ~allocate ~free:ignore Test.uniq - (Staged.stage test_vdi_update_allowed_operations) - ] + [ + Test.make_with_resource ~name:"VDI" ~allocate ~free:ignore Test.uniq + (Staged.stage test_vdi_update_allowed_operations) + ] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index 61f92787759..460a9d276f0 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -6,9 +6,11 @@ bench_cached_reads bench_vdi_allowed_operations) (libraries + dune-build-info tracing bechamel bechamel-notty + cmdliner notty.unix tracing_export threads.posix From 89b512e1008220a5e152274098c1dce33875d198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 28 Apr 2025 11:00:23 +0100 Subject: [PATCH 159/492] CP-54828: benchmark for pool field serialization/deserialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/tests/bench/bench_pool_field.ml | 78 +++++++++++++++++++++++++++ ocaml/tests/bench/dune | 9 +++- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 ocaml/tests/bench/bench_pool_field.ml diff --git a/ocaml/tests/bench/bench_pool_field.ml b/ocaml/tests/bench/bench_pool_field.ml new file mode 100644 index 00000000000..e2239407983 --- /dev/null +++ b/ocaml/tests/bench/bench_pool_field.ml @@ -0,0 +1,78 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Bechamel + +let () = + Suite_init.harness_init () ; + Printexc.record_backtrace true ; + Debug.set_level Syslog.Emerg + +let date = "20250102T03:04:05Z" + +let json_dict = + [ + ("fingerprint_sha256", String.make 64 'd') + ; ("not_before", date) + ; ("not_after", date) + ; ("subject", String.make 100 'x') + ; ("san", String.make 50 'y') + ] + +let json_str = + Rpc.Dict (List.map (fun (k, v) -> (k, Rpc.rpc_of_string v)) json_dict) + |> Jsonrpc.to_string + +let __context = Test_common.make_test_database () + +let () = + let host = Test_common.make_host ~__context () in + let pool = Test_common.make_pool ~__context ~master:host () in + Db.Pool.set_license_server ~__context ~self:pool + ~value:[("jsontest", json_str)] + +let get_all () : API.pool_t list = + Db.Pool.get_all_records ~__context |> List.map snd + +let all = get_all () + +let serialize () : Rpc.t list = all |> List.map API.rpc_of_pool_t + +let serialized = serialize () + +let deserialize () : API.pool_t list = serialized |> List.map API.pool_t_of_rpc + +let str_sexpr_json = SExpr.(string_of (String json_str)) + +let sexpr_of_json_string () = SExpr.(string_of (String json_str)) + +let str_of_sexpr_json () = SExpr.mkstring str_sexpr_json + +let date_of_iso8601 () = Clock.Date.of_iso8601 date + +let local_session_hook () = + Xapi_local_session.local_session_hook ~__context ~session_id:Ref.null + +let benchmarks = + [ + Test.make ~name:"local_session_hook" (Staged.stage local_session_hook) + ; Test.make ~name:"Date.of_iso8601" (Staged.stage date_of_iso8601) + ; Test.make ~name:"sexpr_of_json_string" (Staged.stage sexpr_of_json_string) + ; Test.make ~name:"str_of_sexp_json" (Staged.stage str_of_sexpr_json) + ; Test.make ~name:"Db.Pool.get_all_records" (Staged.stage get_all) + ; Test.make ~name:"pool_t -> Rpc.t" (Staged.stage serialize) + ; Test.make ~name:"Rpc.t -> pool_t" (Staged.stage deserialize) + ] + +let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index 460a9d276f0..bf053a1ef18 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -4,20 +4,27 @@ bench_uuid bench_throttle2 bench_cached_reads - bench_vdi_allowed_operations) + bench_vdi_allowed_operations + bench_pool_field) (libraries dune-build-info tracing bechamel bechamel-notty + clock cmdliner notty.unix tracing_export threads.posix + rpclib.core + rpclib.json + sexpr fmt notty uuid xapi_aux tests_common log + xapi_database + xapi_datamodel xapi_internal)) From 68c761aee8057f58a42d6113dbb4dd2aa6baefe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 13 Mar 2025 09:25:56 +0000 Subject: [PATCH 160/492] CP-54827: sexpr: speed up escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write directly to the buffer, instead of writing to a buffer, returning a string, and writing to a buffer again. Escaping shows up in performance profiles, because `trusted_on_first_use` field in the pool contains a JSON, which contains \" characters that need escaping. Reduces memory allocations from 324.4308 mnw/run to 196.3633 mnw/run. Slight change in performance: ``` Db.Pool.get_all_records: N Min Max Median Avg Stddev x 384 81858.452 513398.88 87109.044 88314.45 21963.255 + 389 78006.031 559052.12 83431.429 84780.535 24315.561 Difference at 95.0% confidence -3533.91 +/- 3267.84 -4.00151% +/- 3.63493% (Student's t, pooled s = 23176.9) ``` Signed-off-by: Edwin Török --- ocaml/libs/sexpr/sExpr.ml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ocaml/libs/sexpr/sExpr.ml b/ocaml/libs/sexpr/sExpr.ml index 4529b2a54b4..85710ef69d0 100644 --- a/ocaml/libs/sexpr/sExpr.ml +++ b/ocaml/libs/sexpr/sExpr.ml @@ -32,10 +32,9 @@ let is_escape_char = function '\\' | '"' | '\'' -> true | _ -> false * - Astring.String.Ascii.escape_string * - Astring.String.Ascii.unescape * that have guaranteed invariants and optimised performances *) -let escape s = +let escape_buf escaped s = let open Astring in - if String.exists is_escape_char s then ( - let escaped = Buffer.create (String.length s + 10) in + if String.exists is_escape_char s then String.iter (fun c -> match c with @@ -48,10 +47,9 @@ let escape s = | _ -> Buffer.add_char escaped c ) - s ; - Buffer.contents escaped - ) else - s + s + else + Buffer.add_string escaped s let unescape s = if String.contains s '\\' then ( @@ -82,7 +80,7 @@ let string_of sexpr = Buffer.add_char buf ')' | Symbol s | String s -> Buffer.add_string buf "\'" ; - Buffer.add_string buf (escape s) ; + escape_buf buf s ; Buffer.add_string buf "\'" in __string_of_rec sexpr ; Buffer.contents buf From 3a44ad5d0a8a0d9e820e4ca7d42e8be933d523ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 13 Mar 2025 09:25:56 +0000 Subject: [PATCH 161/492] CP-54827: sexpr: avoid escaping " MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The non-standard S-expression serializer wraps all strings in `'`. The lexer also ignores every non-escaped character after a `'`, until it sees the next `'` or escape char. So it should be safe to avoid escaping `"`. Unescaping is unchanged: any character can be escaped with `\`, and it returns it unchanged after removing the escape char, so this preserves backwards compatibility when loading an old database. Escaping shows up in performance profiles, because `trusted_on_first_use` field in the pool contains a JSON, which contains \" characters that needed escaping. With this change it won't anymore. `ministat` confirms that there is an improvement: ``` sexpr_of_json_string: N Min Max Median Avg Stddev x 806 1438.7128 63663.127 1490.3661 1594.5095 2192.0548 + 850 911.23529 48528.173 967.25054 1037.7674 1632.6855 Difference at 95.0% confidence -556.742 +/- 185.531 -34.9162% +/- 9.28192% (Student's t, pooled s = 1925.34) str_of_sexp_json: N Min Max Median Avg Stddev x 792 1622.9135 49591.388 1719.5377 1786.3412 1702.6472 + 893 605.37329 3354.8035 626.51812 636.34457 107.24734 Difference at 95.0% confidence -1150 +/- 111.92 -64.3772% +/- 2.26583% (Student's t, pooled s = 1169.88) ``` Signed-off-by: Edwin Török --- ocaml/libs/sexpr/sExpr.ml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ocaml/libs/sexpr/sExpr.ml b/ocaml/libs/sexpr/sExpr.ml index 85710ef69d0..3637ac6abf5 100644 --- a/ocaml/libs/sexpr/sExpr.ml +++ b/ocaml/libs/sexpr/sExpr.ml @@ -23,7 +23,7 @@ let unescape_buf buf s = if Astring.String.fold_left aux false s then Buffer.add_char buf '\\' -let is_escape_char = function '\\' | '"' | '\'' -> true | _ -> false +let is_escape_char = function '\\' | '\'' -> true | _ -> false (* XXX: This escapes "'c'" and "\'c\'" to "\\'c\\'". * They are both unescaped as "'c'". They have been ported @@ -40,8 +40,6 @@ let escape_buf escaped s = match c with | '\\' -> Buffer.add_string escaped "\\\\" - | '"' -> - Buffer.add_string escaped "\\\"" | '\'' -> Buffer.add_string escaped "\\\'" | _ -> From 5f52ded63f2ed0cb903f0f16216dff5602c4731f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 13 Mar 2025 09:25:56 +0000 Subject: [PATCH 162/492] CP-54827: date: speed up of_iso8601 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For backwards compatibility the serialized form looks like 20250319T04:16:24Z, which had to go through several transformations before it was parsed: 2 sscanf, 2 sprintf, and then the Ptime parser. Since we're parsing the format with sscanf anyway, add a fastpath that builds a Ptime.t directly without going through reformatting the string and reparsing it. This speeds up API replies that contain dates, like pool which contains 'telemetry_next_collection' as a date. `ministat` confirms: ``` Date.of_iso8601: N Min Max Median Avg Stddev x 786 1703.462 98255.061 1796.5826 2031.7502 3858.1277 + 905 525.1954 73923.347 558.17972 711.02195 2732.7547 Difference at 95.0% confidence -1320.73 +/- 315.725 -65.0045% +/- 10.3523% (Student's t, pooled s = 3303.82) Db.Pool.get_all_records: N Min Max Median Avg Stddev x 390 76966.273 498179.5 82995.374 84536.667 21266.749 + 401 69709.657 546133 74811.568 76379.246 23782.821 Difference at 95.0% confidence -8157.42 +/- 3147.12 -9.64957% +/- 3.57009% (Student's t, pooled s = 22577.4) Rpc.t -> pool_t : N Min Max Median Avg Stddev x 554 16945.375 267477.23 17620.482 18195.16 10648.914 + 594 11432.375 251226.67 12011.373 12493.367 9824.9986 Difference at 95.0% confidence -5701.79 +/- 1184.38 -31.3369% +/- 5.53746% (Student's t, pooled s = 10230.9) str_of_sexp_json: N Min Max Median Avg Stddev x 893 605.37329 3354.8035 626.51812 636.34457 107.24734 + 911 510.23412 3298.9936 523.73684 532.25916 98.007142 Difference at 95.0% confidence -104.085 +/- 9.47756 -16.3568% +/- 1.36325% (Student's t, pooled s = 102.685) ``` Signed-off-by: Edwin Török --- ocaml/libs/clock/date.ml | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/ocaml/libs/clock/date.ml b/ocaml/libs/clock/date.ml index c668b0c1fb3..2dab4a95443 100644 --- a/ocaml/libs/clock/date.ml +++ b/ocaml/libs/clock/date.ml @@ -64,12 +64,24 @@ let best_effort_iso8601_to_rfc3339 x = x let of_iso8601 x = - let rfc3339 = best_effort_iso8601_to_rfc3339 x in - match Ptime.of_rfc3339 rfc3339 |> Ptime.rfc3339_error_to_msg with - | Error _ -> - invalid_arg (Printf.sprintf "%s: %s" __FUNCTION__ x) - | Ok (t, tz, _) -> - {t; tz} + if String.length x > 5 && x.[4] <> '-' && x.[String.length x - 1] = 'Z' then + (* dates in the DB look like "20250319T04:16:24Z", so decoding that should be the fastpath *) + Scanf.sscanf x "%04i%02i%02iT%02i:%02i:%02iZ" (fun y mon d hh mm ss -> + let tz = 0 in + let date = (y, mon, d) and time = ((hh, mm, ss), tz) in + match Ptime.of_date_time (date, time) with + | Some t -> + {t; tz= Some tz} + | None -> + invalid_arg (Printf.sprintf "%s: %s" __FUNCTION__ x) + ) + else + let rfc3339 = best_effort_iso8601_to_rfc3339 x in + match Ptime.of_rfc3339 rfc3339 |> Ptime.rfc3339_error_to_msg with + | Error _ -> + invalid_arg (Printf.sprintf "%s: %s" __FUNCTION__ x) + | Ok (t, tz, _) -> + {t; tz} let print_tz tz_s = match tz_s with From f642b347a243b107dba6bc99809ca0c0571dc718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 13 Mar 2025 11:25:14 +0000 Subject: [PATCH 163/492] CP-54826: Mutex.execute: avoid costly backtrace formatting in finally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We called the wrong finally here. In general we mark the backtrace in finally as important (which involves formatting it, this could be optimized separately). However Mutex.unlock doesn't raise (in well-behaved code, unless you double unlock), so we can use Fun.protect instead. Hashtbl.find in xapi_local_session.ml raises every time in the RBAC checks, this change avoids the costly backtrace formatting (which was discarded by a try/with later anyway). Signed-off-by: Edwin Török --- ocaml/libs/log/test/log_test.t | 4 ++-- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/log/test/log_test.t b/ocaml/libs/log/test/log_test.t index 2d7b5fa1414..b51ea26fca0 100644 --- a/ocaml/libs/log/test/log_test.t +++ b/ocaml/libs/log/test/log_test.t @@ -1,8 +1,8 @@ $ ./log_test.exe | sed -re 's/[0-9]+T[0-9:.]+Z//' [|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") [|error||0 |main|backtrace] 1/4 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 - [|error||0 |main|backtrace] 2/4 log_test.exe Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 24 - [|error||0 |main|backtrace] 3/4 log_test.exe Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 39 + [|error||0 |main|backtrace] 2/4 log_test.exe Called from file fun.ml, line 33 + [|error||0 |main|backtrace] 3/4 log_test.exe Called from file fun.ml, line 38 [|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 14 [|error||0 |main|backtrace] [| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index c8d85d8b6c5..251b35473a8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -19,8 +19,8 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally module Mutex = struct (** execute the function f with the mutex hold *) let execute lock f = - Mutex.lock lock ; - finally f (fun () -> Mutex.unlock lock) + let finally () = Mutex.unlock lock in + Mutex.lock lock ; Fun.protect ~finally f end module Semaphore = struct From 63f493851e0c15bf1ece98520774c740290995cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 28 Apr 2025 11:00:23 +0100 Subject: [PATCH 164/492] CP-54828: avoid raising exceptions all the time in the RBAC code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use Hashtbl.mem instead of catching the exception from Hashtbl.find. `ministat` confirms: ``` local_session_hook : N Min Max Median Avg Stddev x 1057 117.49169 15160.913 123.5201 142.19665 464.12468 + 1166 38.851635 14365.19 41.619012 57.978805 423.21452 Difference at 95.0% confidence -84.2178 +/- 36.8873 -59.2263% +/- 19.5015% (Student's t, pooled s = 443.137) ``` Although there are also some unexplained, but reproducible slowdowns in code that isn't touched by this commit at all: ``` str_of_sexp_json : N Min Max Median Avg Stddev x 911 510.16149 3075.0192 523.44043 531.48578 97.085991 + 892 605.73462 2951.0948 632.27053 652.10806 95.96889 Difference at 95.0% confidence 120.622 +/- 8.91245 22.6953% +/- 1.88103% (Student's t, pooled s = 96.5349) ``` Perhaps this is due to code layout changes? Signed-off-by: Edwin Török --- ocaml/xapi/xapi_local_session.ml | 7 ++----- ocaml/xapi/xapi_local_session.mli | 2 -- quality-gate.sh | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/xapi_local_session.ml b/ocaml/xapi/xapi_local_session.ml index e356ae87256..709275077b0 100644 --- a/ocaml/xapi/xapi_local_session.ml +++ b/ocaml/xapi/xapi_local_session.ml @@ -31,12 +31,9 @@ let create ~__context ~pool = with_lock m (fun () -> Hashtbl.replace table r session) ; r -let get_record ~__context ~self = with_lock m (fun () -> Hashtbl.find table self) +let has_record ~__context ~self = with_lock m (fun () -> Hashtbl.mem table self) let destroy ~__context ~self = with_lock m (fun () -> Hashtbl.remove table self) let local_session_hook ~__context ~session_id = - try - ignore (get_record ~__context ~self:session_id) ; - true - with _ -> false + has_record ~__context ~self:session_id diff --git a/ocaml/xapi/xapi_local_session.mli b/ocaml/xapi/xapi_local_session.mli index ca8c1810018..8e7c4d31bc9 100644 --- a/ocaml/xapi/xapi_local_session.mli +++ b/ocaml/xapi/xapi_local_session.mli @@ -19,8 +19,6 @@ val get_all : __context:Context.t -> API.ref_session list val create : __context:Context.t -> pool:bool -> API.ref_session -val get_record : __context:Context.t -> self:API.ref_session -> t - val destroy : __context:Context.t -> self:API.ref_session -> unit val local_session_hook : diff --git a/quality-gate.sh b/quality-gate.sh index 1128e8de98e..f3ab4da2045 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -110,7 +110,7 @@ unixgetenv () { } hashtblfind () { - N=35 + N=34 # Looks for all .ml files except the ones using Core.Hashtbl.find, # which already returns Option HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) From 931048256c495fc3366f126b3f4202fee741f0d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 1 May 2024 17:47:30 +0100 Subject: [PATCH 165/492] CP-54828: cache marshaled row values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read_record was marshaling the maps all the time, and some of these maps can be quite big. Cache the marshaled form in the row itself. This is a trade-off between memory usage and performance (caching increases global memory usage, but avoids repeated allocations every time get_record is called, which may decrease memory usage/GC pressure under load). Eventually we may be able to drop the marshaled form, but we need to change the db_rpc remote protocol for that (it currently cannot marshal/unmarshal values on its own, because it doesn't have access to the type information). `ministat` confirms: ``` Db.Pool.get_all_records : N Min Max Median Avg Stddev x 400 71008.769 489829.75 75874.584 76957.497 20852.389 + 871 51638.867 469264.11 54400.946 55511.462 19845.992 Difference at 95.0% confidence -21446 +/- 2387.53 -27.8674% +/- 2.84131% (Student's t, pooled s = 20167.8) sexpr_of_json_string : N Min Max Median Avg Stddev x 862 819.74359 44758.053 851.53894 915.0725 1496.4717 + 1688 970.16146 51923.24 1024.1987 1097.3771 1748.4532 Difference at 95.0% confidence 182.305 +/- 136.827 19.9224% +/- 15.8188% (Student's t, pooled s = 1667.57) str_of_sexp_json : N Min Max Median Avg Stddev x 1896 605.00587 2932.3931 627.51199 640.73142 89.48524 + 1812 537.56839 3105.1991 550.25275 558.89761 93.633252 Difference at 95.0% confidence -81.8338 +/- 5.89411 -12.7719% +/- 0.864486% (Student's t, pooled s = 91.5357) ``` Slight increase in 'sexpr_of_json_string'. Signed-off-by: Edwin Török --- ocaml/database/database_test.ml | 5 ++--- ocaml/database/db_backend.ml | 2 +- ocaml/database/db_cache_impl.ml | 13 ++++++++----- ocaml/database/db_cache_types.ml | 20 ++++++++++++++++++-- ocaml/database/db_cache_types.mli | 6 ++++++ ocaml/database/db_xml.ml | 2 +- 6 files changed, 36 insertions(+), 12 deletions(-) diff --git a/ocaml/database/database_test.ml b/ocaml/database/database_test.ml index dc176488f3b..4c82291eb6a 100644 --- a/ocaml/database/database_test.ml +++ b/ocaml/database/database_test.ml @@ -269,9 +269,8 @@ functor let row = Db_cache_types.Table.find r table in let s = Db_cache_types.Row.fold_over_recent g - (fun k _ v acc -> - Printf.sprintf "%s %s=%s" acc k - (Schema.Value.marshal v) + (fun k _ (_, cached) acc -> + Printf.sprintf "%s %s=%s" acc k cached ) row "" in diff --git a/ocaml/database/db_backend.ml b/ocaml/database/db_backend.ml index 92954540c33..935704d4840 100644 --- a/ocaml/database/db_backend.ml +++ b/ocaml/database/db_backend.ml @@ -38,7 +38,7 @@ let blow_away_non_persistent_fields (schema : Schema.t) db = (* Generate a new row given a table schema *) let row schema row : Row.t * int64 = Row.fold - (fun name {Stat.created; modified; _} v (acc, max_upd) -> + (fun name {Stat.created; modified; _} (v, _) (acc, max_upd) -> try let col = Schema.Table.find name schema in let empty = col.Schema.Column.empty in diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index 7bbf062bd02..79967e934e8 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -116,11 +116,10 @@ let read_record_internal db tblname objref = else None in - let map_fvlist v = Schema.Value.marshal v in (* Unfortunately the interface distinguishes between Set(Ref _) types and ordinary fields *) Row.fold - (fun k _ d (accum_fvlist, accum_setref) -> + (fun k _ (d, cached) (accum_fvlist, accum_setref) -> let accum_setref = match map_setref_opt k d with | Some v -> @@ -128,7 +127,7 @@ let read_record_internal db tblname objref = | None -> accum_setref in - let accum_fvlist = (k, map_fvlist d) :: accum_fvlist in + let accum_fvlist = (k, cached) :: accum_fvlist in (accum_fvlist, accum_setref) ) row ([], []) @@ -146,7 +145,8 @@ let delete_row_locked t tblname objref = Database.notify (PreDelete (tblname, objref)) db ; update_database t (remove_row tblname objref) ; Database.notify - (Delete (tblname, objref, Row.fold (fun k _ v acc -> (k, v) :: acc) row []) + (Delete + (tblname, objref, Row.fold (fun k _ (v, _) acc -> (k, v) :: acc) row []) ) (get_database t) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) @@ -182,7 +182,10 @@ let create_row_locked t tblname kvs' new_objref = update_database t (add_row tblname new_objref row) ; Database.notify (Create - (tblname, new_objref, Row.fold (fun k _ v acc -> (k, v) :: acc) row []) + ( tblname + , new_objref + , Row.fold (fun k _ (v, _) acc -> (k, v) :: acc) row [] + ) ) (get_database t) diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index be73b91958f..86e5916a550 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -136,10 +136,17 @@ functor end module Row = struct - include Make (Schema.Value) + module CachedValue = struct + type t = Schema.Value.t * string + + let v v = (v, Schema.Value.marshal v) + end + + include Make (CachedValue) let add gen key v = add gen key + @@ CachedValue.v @@ match v with | Schema.Value.String x -> @@ -153,8 +160,17 @@ module Row = struct type value = Schema.Value.t + let iter f t = iter (fun k (v, _) -> f k v) t + + let touch generation key default row = + touch generation key (CachedValue.v default) row + + let update gen key default f row = + let f (v, _) = f v |> CachedValue.v in + update gen key (CachedValue.v default) f row + let find key t = - try find key t + try find key t |> fst with Not_found -> raise (DBCache_NotFound ("missing field", key, "")) let add_defaults g (schema : Schema.Table.t) t = diff --git a/ocaml/database/db_cache_types.mli b/ocaml/database/db_cache_types.mli index 2ffe79c411b..e29f6127211 100644 --- a/ocaml/database/db_cache_types.mli +++ b/ocaml/database/db_cache_types.mli @@ -82,6 +82,12 @@ end module Row : sig include MAP with type value = Schema.Value.t + val fold : (string -> Stat.t -> value * string -> 'b -> 'b) -> t -> 'b -> 'b + (** [fold f t initial] folds [f key stats value acc] over the items in [t] *) + + val fold_over_recent : + Time.t -> (string -> Stat.t -> value * string -> 'b -> 'b) -> t -> 'b -> 'b + val add_defaults : Time.t -> Schema.Table.t -> t -> t (** [add_defaults now schema t]: returns a row which is [t] extended to contain all the columns specified in the schema, with default values set if not already diff --git a/ocaml/database/db_xml.ml b/ocaml/database/db_xml.ml index 1795cdef3bd..1cb653f9b2b 100644 --- a/ocaml/database/db_xml.ml +++ b/ocaml/database/db_xml.ml @@ -67,7 +67,7 @@ module To = struct make_tag "row" (List.rev (Row.fold - (fun k _ v acc -> + (fun k _ (v, _) acc -> (k, Xml_spaces.protect (Schema.Value.marshal v)) :: acc ) row preamble From caaad26cc74b4a2916eccb237d2e6e217e7824b6 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 18 Apr 2025 18:11:37 +0800 Subject: [PATCH 166/492] CA-403867: Block pool join if IP not configured on cluster network Refix with some review comments addressed. To join a host into a pool with cluster enabled, the host must have one and only one IP configured on the joining cluster network. If not, after the host joinied the pool, GFS2 SR cannot be plugged on the joined host because an IP is required in the cluster network. Pool join in this scenario has been blocked in XenCenter, here we will block it inside xapi. Signed-off-by: Gang Ji --- ocaml/idl/datamodel_errors.ml | 8 ++++ ocaml/xapi-consts/api_errors.ml | 6 +++ ocaml/xapi/xapi_pool.ml | 84 +++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 27bb8a7bf98..72faab08065 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -897,6 +897,14 @@ let _ = the pool coordinator. Make sure the sm are of the same versions and try \ again." () ; + error Api_errors.pool_joining_pool_cannot_enable_clustering_on_vlan_network + ["vlan"] ~doc:"The remote pool cannot enable clustering on vlan network" () ; + error Api_errors.pool_joining_host_must_have_only_one_IP_on_clustering_network + [] + ~doc: + "The host joining the pool must have one and only one IP on the \ + clustering network" + () ; (* External directory service *) error Api_errors.subject_cannot_be_resolved [] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 906e22bf259..6a25fbe48c8 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -757,6 +757,12 @@ let pool_joining_host_ca_certificates_conflict = let pool_joining_sm_features_incompatible = add_error "POOL_JOINING_SM_FEATURES_INCOMPATIBLE" +let pool_joining_pool_cannot_enable_clustering_on_vlan_network = + add_error "POOL_JOINING_POOL_CANNOT_ENABLE_CLUSTERING_ON_VLAN_NETWORK" + +let pool_joining_host_must_have_only_one_IP_on_clustering_network = + add_error "POOL_JOINING_HOST_MUST_HAVE_ONLY_ONE_IP_ON_CLUSTERING_NETWORK" + (*workload balancing*) let wlb_not_initialized = add_error "WLB_NOT_INITIALIZED" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index b2d6da1122f..c310028fcf3 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -112,6 +112,89 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) ) in + let one_ip_configured_on_joining_cluster_network () = + let one_ip_configured_on_joining_cluster_network' cluster_host = + match Client.Cluster_host.get_PIF ~rpc ~session_id ~self:cluster_host with + | pif when pif = Ref.null -> + () + | pif -> ( + match Client.PIF.get_VLAN ~rpc ~session_id ~self:pif with + | vlan when vlan > 0L -> + error "Cannot join pool whose clustering is enabled on VLAN network" ; + raise + (Api_errors.Server_error + ( Api_errors + .pool_joining_pool_cannot_enable_clustering_on_vlan_network + , [Int64.to_string vlan] + ) + ) + | 0L | _ -> ( + let clustering_bridges_in_pool = + ( match + Client.PIF.get_bond_master_of ~rpc ~session_id ~self:pif + with + | [] -> + [pif] + | bonds -> + List.concat_map + (fun bond -> + Client.Bond.get_slaves ~rpc ~session_id ~self:bond + ) + bonds + ) + |> List.map (fun self -> + Client.PIF.get_network ~rpc ~session_id ~self + ) + |> List.map (fun self -> + Client.Network.get_bridge ~rpc ~session_id ~self + ) + in + match + Db.Host.get_PIFs ~__context + ~self:(Helpers.get_localhost ~__context) + |> List.filter (fun p -> + List.exists + (fun b -> + let network = Db.PIF.get_network ~__context ~self:p in + Db.Network.get_bridge ~__context ~self:network = b + ) + clustering_bridges_in_pool + && Db.PIF.get_IP ~__context ~self:p <> "" + ) + with + | [_] -> + () + | _ -> + error + "Cannot join pool as the joining host needs to have one (and \ + only one) IP address on the network that will be used for \ + clustering." ; + raise + (Api_errors.Server_error + ( Api_errors + .pool_joining_host_must_have_only_one_IP_on_clustering_network + , [] + ) + ) + ) + ) + in + match Client.Cluster_host.get_all ~rpc ~session_id with + | [] -> + () + | ch :: _ -> ( + let cluster = + Client.Cluster_host.get_cluster ~rpc ~session_id ~self:ch + in + match + Client.Cluster.get_pool_auto_join ~rpc ~session_id ~self:cluster + with + | false -> + () + | true -> + one_ip_configured_on_joining_cluster_network' ch + ) + in (* CA-26975: Pool edition MUST match *) let assert_restrictions_match () = let my_edition = @@ -888,6 +971,7 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = assert_management_interface_exists () ; ha_is_not_enable_on_me () ; clustering_is_not_enabled_on_me () ; + one_ip_configured_on_joining_cluster_network () ; ha_is_not_enable_on_the_distant_pool () ; assert_not_joining_myself () ; assert_i_know_of_no_other_hosts () ; From 61f79d24e58d2fcbb4f9b02925ccf702e0271dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 29 Apr 2025 19:54:02 +0100 Subject: [PATCH 167/492] [maintenance]: bool is a keyword in newer C versions, cannot be a parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c index 28fd7f9af89..27b2f632d08 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c @@ -39,11 +39,11 @@ #include "blkgetsize.h" /* Set the TCP_NODELAY flag on a Unix.file_descr */ -CAMLprim value stub_unixext_set_tcp_nodelay (value fd, value bool) +CAMLprim value stub_unixext_set_tcp_nodelay (value fd, value nodelay) { - CAMLparam2 (fd, bool); + CAMLparam2 (fd, nodelay); int c_fd = Int_val(fd); - int opt = (Bool_val(bool)) ? 1 : 0; + int opt = (Bool_val(nodelay)) ? 1 : 0; if (setsockopt(c_fd, IPPROTO_TCP, TCP_NODELAY, (void *)&opt, sizeof(opt)) != 0){ uerror("setsockopt", Nothing); } From 34e29628da7a0d9355aa3a0e9dad020bf5bd0783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 15:50:48 +0100 Subject: [PATCH 168/492] CP-307947: [maintenance]: reformat dune file in database MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used 'dune format-dune-file dune >dune && mv dune.tmp dune' No functional change. Signed-off-by: Edwin Török --- ocaml/database/dune | 230 ++++++++++++++++++++++---------------------- 1 file changed, 114 insertions(+), 116 deletions(-) diff --git a/ocaml/database/dune b/ocaml/database/dune index 1b67e2146d9..bc8fb767497 100644 --- a/ocaml/database/dune +++ b/ocaml/database/dune @@ -1,134 +1,132 @@ (ocamllex db_filter_lex) -(menhir (modules db_filter_parse)) +(menhir + (modules db_filter_parse)) (library - (name xapi_schema) - (public_name xapi-schema) - (modules - db_names db_exn schema string_marshall_helper string_unmarshall_helper - test_schemas) - (libraries - sexpr - xapi-log - xapi-stdext-encodings - ) - (wrapped false) - (preprocess (per_module ((pps ppx_sexp_conv) Schema))) -) + (name xapi_schema) + (public_name xapi-schema) + (modules + db_names + db_exn + schema + string_marshall_helper + string_unmarshall_helper + test_schemas) + (libraries sexpr xapi-log xapi-stdext-encodings) + (wrapped false) + (preprocess + (per_module + ((pps ppx_sexp_conv) + Schema)))) (library - (name xapi_database) - (modes best) - (modules - (:standard \ database_server_main db_cache_test db_names db_exn - block_device_io string_marshall_helper string_unmarshall_helper schema - test_schemas unit_test_marshall unit_test_sql)) - (libraries - forkexec - gzip - mtime - mtime.clock.os - clock - rpclib.core - rpclib.json - safe-resources - stunnel - threads.posix - http_lib - httpsvr - uuid - xapi-backtrace - xapi-datamodel - xapi-log - (re_export xapi-schema) - xapi-idl.updates - xapi-stdext-encodings - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-unix - xapi_timeslice - xml-light2 - xmlm - ) - (preprocess - (per_module - ((pps ppx_deriving_rpc) - Db_cache_types Db_filter_types Db_rpc_common_v2 Db_secret_string))) -) + (name xapi_database) + (modes best) + (modules + (:standard + \ + database_server_main + db_cache_test + db_names + db_exn + block_device_io + string_marshall_helper + string_unmarshall_helper + schema + test_schemas + unit_test_marshall + unit_test_sql)) + (libraries + forkexec + gzip + mtime + mtime.clock.os + clock + rpclib.core + rpclib.json + safe-resources + stunnel + threads.posix + http_lib + httpsvr + uuid + xapi-backtrace + xapi-datamodel + xapi-log + (re_export xapi-schema) + xapi-idl.updates + xapi-stdext-encodings + xapi-stdext-pervasives + xapi-stdext-std + xapi-stdext-threads + xapi-stdext-unix + xapi_timeslice + xml-light2 + xmlm) + (preprocess + (per_module + ((pps ppx_deriving_rpc) + Db_cache_types + Db_filter_types + Db_rpc_common_v2 + Db_secret_string)))) (executable - (modes exe) - (name block_device_io) - (modules block_device_io) - (libraries - - xapi_database - xapi-log - xapi-stdext-pervasives - xapi-stdext-unix - uuid - ) -) + (modes exe) + (name block_device_io) + (modules block_device_io) + (libraries + xapi_database + xapi-log + xapi-stdext-pervasives + xapi-stdext-unix + uuid)) (install - (package xapi) - (files (block_device_io.exe as block_device_io)) - (section libexec_root) -) + (package xapi) + (files + (block_device_io.exe as block_device_io)) + (section libexec_root)) (executable - (name database_server_main) - (modes exe) - (modules database_server_main) - (libraries - - http_lib - httpsvr - threads.posix - xapi_database - xapi-stdext-threads - xapi-stdext-unix - ) -) + (name database_server_main) + (modes exe) + (modules database_server_main) + (libraries + http_lib + httpsvr + threads.posix + xapi_database + xapi-stdext-threads + xapi-stdext-unix)) (tests - (names unit_test_marshall db_cache_test) - (modes exe) - (package xapi) - (modules db_cache_test unit_test_marshall) - (libraries - alcotest - http_lib - rpclib.xml - sexplib - sexplib0 - xapi_database - xml-light2 - ) -) + (names unit_test_marshall db_cache_test) + (modes exe) + (package xapi) + (modules db_cache_test unit_test_marshall) + (libraries + alcotest + http_lib + rpclib.xml + sexplib + sexplib0 + xapi_database + xml-light2)) (test - (name unit_test_sql) - (modes exe) - (package xapi) - (modules unit_test_sql) - (deps - sql_msg_example.txt - ) - (libraries - alcotest - xapi_database - xml-light2 - ) -) + (name unit_test_sql) + (modes exe) + (package xapi) + (modules unit_test_sql) + (deps sql_msg_example.txt) + (libraries alcotest xapi_database xml-light2)) (rule - (alias runtest) - (deps - (:x database_server_main.exe) - ) - (package xapi) - (action (run %{x} --master db.xml --test)) -) + (alias runtest) + (deps + (:x database_server_main.exe)) + (package xapi) + (action + (run %{x} --master db.xml --test))) From 93928943793d13915f844a2b2b4a147b8c2b23cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 15:19:14 +0100 Subject: [PATCH 169/492] CP-307947: [maintenance]: add .mli files to all database modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-lsp-server's 'Action->Infer interface' was used for this, and then removed internal/unused entries. Having an mli file is useful: * can find dead code more easily if it is clear which functions are internal to a module and which aren't * the impact of refactoring changes is more obvious (did we have to change an mli at all?) * makes it easier to understand what a module does There are some .ml files which only contain types, these are instead renamed to .mli and dune's `modules_without_implementation` feature is used. Executables get an empty .mli (recent versions of dune already do the equivalent of this, but this makes it more obvious). Drop code from .ml files that now show up as unused. Signed-off-by: Edwin Török --- ocaml/database/block_device_io.mli | 1 + ocaml/database/block_device_io_errors.mli | 19 ++ ocaml/database/database_server_main.mli | 1 + ocaml/database/db_action_helper.ml | 11 -- ocaml/database/db_action_helper.mli | 18 ++ ocaml/database/db_cache.mli | 17 ++ ocaml/database/db_cache_test.mli | 1 + ocaml/database/db_connections.mli | 29 +++ ocaml/database/db_exn.mli | 39 ++++ ocaml/database/db_filter.ml | 27 --- ocaml/database/db_filter.mli | 25 +++ ocaml/database/db_filter_lex.mli | 15 ++ ocaml/database/db_filter_types.mli | 31 ++++ ocaml/database/db_globs.mli | 67 +++++++ .../{db_interface.ml => db_interface.mli} | 0 ocaml/database/db_names.mli | 85 +++++++++ ocaml/database/db_ref.mli | 24 +++ ocaml/database/db_rpc_common_v1.mli | 175 ++++++++++++++++++ ocaml/database/db_rpc_common_v2.mli | 70 +++++++ ocaml/database/db_upgrade.mli | 16 ++ ocaml/database/db_xml.ml | 2 - ocaml/database/db_xml.mli | 27 +++ ocaml/database/dune | 1 + ocaml/database/generation.mli | 25 +++ ocaml/database/master_connection.ml | 2 - ocaml/database/master_connection.mli | 43 +++++ ocaml/database/parse_db_conf.ml | 3 - ocaml/database/parse_db_conf.mli | 44 +++++ ocaml/database/schema.mli | 142 ++++++++++++++ ocaml/database/static_vdis_list.mli | 23 +++ ocaml/database/string_marshall_helper.mli | 19 ++ ocaml/database/string_unmarshall_helper.mli | 19 ++ ocaml/database/test_schemas.mli | 17 ++ ocaml/database/unit_test_marshall.mli | 1 + ocaml/database/unit_test_sql.mli | 1 + ocaml/database/xml_spaces.mli | 17 ++ quality-gate.sh | 2 +- 37 files changed, 1013 insertions(+), 46 deletions(-) create mode 100644 ocaml/database/block_device_io.mli create mode 100644 ocaml/database/block_device_io_errors.mli create mode 100644 ocaml/database/database_server_main.mli create mode 100644 ocaml/database/db_action_helper.mli create mode 100644 ocaml/database/db_cache.mli create mode 100644 ocaml/database/db_cache_test.mli create mode 100644 ocaml/database/db_connections.mli create mode 100644 ocaml/database/db_exn.mli create mode 100644 ocaml/database/db_filter.mli create mode 100644 ocaml/database/db_filter_lex.mli create mode 100644 ocaml/database/db_filter_types.mli create mode 100644 ocaml/database/db_globs.mli rename ocaml/database/{db_interface.ml => db_interface.mli} (100%) create mode 100644 ocaml/database/db_names.mli create mode 100644 ocaml/database/db_ref.mli create mode 100644 ocaml/database/db_rpc_common_v1.mli create mode 100644 ocaml/database/db_rpc_common_v2.mli create mode 100644 ocaml/database/db_upgrade.mli create mode 100644 ocaml/database/db_xml.mli create mode 100644 ocaml/database/generation.mli create mode 100644 ocaml/database/master_connection.mli create mode 100644 ocaml/database/parse_db_conf.mli create mode 100644 ocaml/database/schema.mli create mode 100644 ocaml/database/static_vdis_list.mli create mode 100644 ocaml/database/string_marshall_helper.mli create mode 100644 ocaml/database/string_unmarshall_helper.mli create mode 100644 ocaml/database/test_schemas.mli create mode 100644 ocaml/database/unit_test_marshall.mli create mode 100644 ocaml/database/unit_test_sql.mli create mode 100644 ocaml/database/xml_spaces.mli diff --git a/ocaml/database/block_device_io.mli b/ocaml/database/block_device_io.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/block_device_io.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/block_device_io_errors.mli b/ocaml/database/block_device_io_errors.mli new file mode 100644 index 00000000000..260c8b701ef --- /dev/null +++ b/ocaml/database/block_device_io_errors.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val timeout_error_msg : string + +val not_enough_space_error_msg : string + +val not_initialised_error_msg : string diff --git a/ocaml/database/database_server_main.mli b/ocaml/database/database_server_main.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/database_server_main.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/db_action_helper.ml b/ocaml/database/db_action_helper.ml index a553846e3d7..87ff4884933 100644 --- a/ocaml/database/db_action_helper.ml +++ b/ocaml/database/db_action_helper.ml @@ -20,16 +20,5 @@ let __callback : let events_register f = __callback := Some f -let events_unregister () = __callback := None - let events_notify ?snapshot ty op ref = match !__callback with None -> () | Some f -> f ?snapshot ty op ref - -(* -exception Db_set_or_map_parse_fail of string - -let parse_sexpr s : SExpr.t list = - match SExpr_TS.of_string s with - | SExpr.Node xs -> xs - | _ -> raise (Db_set_or_map_parse_fail s) -*) diff --git a/ocaml/database/db_action_helper.mli b/ocaml/database/db_action_helper.mli new file mode 100644 index 00000000000..81fb7eb480d --- /dev/null +++ b/ocaml/database/db_action_helper.mli @@ -0,0 +1,18 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val events_register : + (?snapshot:Rpc.t -> string -> string -> string -> unit) -> unit + +val events_notify : ?snapshot:Rpc.t -> string -> string -> string -> unit diff --git a/ocaml/database/db_cache.mli b/ocaml/database/db_cache.mli new file mode 100644 index 00000000000..0198ddb36b3 --- /dev/null +++ b/ocaml/database/db_cache.mli @@ -0,0 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val get : Db_ref.t -> (module Db_interface.DB_ACCESS) + +val apply_delta_to_cache : Redo_log.t -> Db_ref.t -> unit diff --git a/ocaml/database/db_cache_test.mli b/ocaml/database/db_cache_test.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/db_cache_test.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/db_connections.mli b/ocaml/database/db_connections.mli new file mode 100644 index 00000000000..81ec405a581 --- /dev/null +++ b/ocaml/database/db_connections.mli @@ -0,0 +1,29 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val get_dbs_and_gen_counts : unit -> (int64 * Parse_db_conf.db_connection) list + +val choose : + Parse_db_conf.db_connection list -> Parse_db_conf.db_connection option + +val preferred_write_db : unit -> Parse_db_conf.db_connection + +val exit_on_next_flush : bool ref + +val inc_db_flush_thread_refcount : unit -> unit + +val flush_dirty_and_maybe_exit : + Parse_db_conf.db_connection -> int option -> bool + +val flush : Parse_db_conf.db_connection -> Db_cache_types.Database.t -> unit diff --git a/ocaml/database/db_exn.mli b/ocaml/database/db_exn.mli new file mode 100644 index 00000000000..53b686e1f4c --- /dev/null +++ b/ocaml/database/db_exn.mli @@ -0,0 +1,39 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** class * field * uuid * key *) +exception Duplicate_key of string * string * string * string + +(** message * class * key *) +exception DBCache_NotFound of string * string * string + +(** class * field * key *) +exception Uniqueness_constraint_violation of string * string * string + +(** class * field * value *) +exception Integrity_violation of string * string * string + +(** class * _ * uuid *) +exception Read_missing_uuid of string * string * string + +(** class * _ * uuid *) +exception Too_many_values of string * string * string + +exception Remote_db_server_returned_unknown_exception + +exception Remote_db_server_returned_bad_message + +exception Empty_key_in_map + +exception Invalid_value diff --git a/ocaml/database/db_filter.ml b/ocaml/database/db_filter.ml index 25a171c8384..915162ae8db 100644 --- a/ocaml/database/db_filter.ml +++ b/ocaml/database/db_filter.ml @@ -18,33 +18,6 @@ open Db_filter_types -let string_of_val = function - | Field x -> - "Field " ^ x - | Literal x -> - "Literal " ^ x - -let rec string_of_expr = - let binexpr name a b = - Printf.sprintf "%s (%s, %s)" name (string_of_expr a) (string_of_expr b) - in - let binval name a b = - Printf.sprintf "%s (%s, %s)" name (string_of_val a) (string_of_val b) - in - function - | True -> - "True" - | False -> - "False" - | Not x -> - Printf.sprintf "Not ( %s )" (string_of_expr x) - | And (a, b) -> - binexpr "And" a b - | Or (a, b) -> - binexpr "Or" a b - | Eq (a, b) -> - binval "Eq" a b - exception XML_unmarshall_error let val_of_xml xml = diff --git a/ocaml/database/db_filter.mli b/ocaml/database/db_filter.mli new file mode 100644 index 00000000000..392974c470e --- /dev/null +++ b/ocaml/database/db_filter.mli @@ -0,0 +1,25 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception XML_unmarshall_error + +exception Expression_error of (string * exn) + +val expr_of_xml : XMLRPC.xmlrpc -> Db_filter_types.expr + +val expr_of_string : string -> Db_filter_types.expr + +val xml_of_expr : Db_filter_types.expr -> XMLRPC.xmlrpc + +val eval_expr : (Db_filter_types._val -> string) -> Db_filter_types.expr -> bool diff --git a/ocaml/database/db_filter_lex.mli b/ocaml/database/db_filter_lex.mli new file mode 100644 index 00000000000..63834965084 --- /dev/null +++ b/ocaml/database/db_filter_lex.mli @@ -0,0 +1,15 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val lexer : Lexing.lexbuf -> Db_filter_parse.token diff --git a/ocaml/database/db_filter_types.mli b/ocaml/database/db_filter_types.mli new file mode 100644 index 00000000000..1584d7b3497 --- /dev/null +++ b/ocaml/database/db_filter_types.mli @@ -0,0 +1,31 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type _val = Field of string | Literal of string + +val rpc_of__val : _val -> Rpc.t + +val _val_of_rpc : Rpc.t -> _val + +type expr = + | True + | False + | Not of expr + | Eq of _val * _val + | And of expr * expr + | Or of expr * expr + +val rpc_of_expr : expr -> Rpc.t + +val expr_of_rpc : Rpc.t -> expr diff --git a/ocaml/database/db_globs.mli b/ocaml/database/db_globs.mli new file mode 100644 index 00000000000..d51d569907d --- /dev/null +++ b/ocaml/database/db_globs.mli @@ -0,0 +1,67 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val redo_log_block_device_io : string ref + +val redo_log_connect_delay : float ref + +val redo_log_max_block_time_empty : float ref + +val redo_log_max_block_time_read : float ref + +val redo_log_max_block_time_writedelta : float ref + +val redo_log_max_block_time_writedb : float ref + +val redo_log_initial_backoff_delay : int + +val redo_log_exponentiation_base : int + +val redo_log_maximum_backoff_delay : int + +val redo_log_max_dying_processes : int + +val redo_log_comms_socket_stem : string + +val redo_log_max_startup_time : float ref + +val redo_log_length_of_half : int + +val ha_metadata_db : string + +val gen_metadata_db : string + +val static_vdis_dir : string ref + +val http_limit_max_rpc_size : int + +val idempotent_map : bool ref + +val permanent_master_failure_retry_interval : float ref + +val master_connection_reset_timeout : float ref + +val master_connection_retry_timeout : float ref + +val master_connection_default_timeout : float ref + +val pool_secret : Db_secret_string.t ref + +val restart_fn : (unit -> unit) ref + +val https_port : int ref + +val snapshot_db : string + +val db_conf_path : string ref diff --git a/ocaml/database/db_interface.ml b/ocaml/database/db_interface.mli similarity index 100% rename from ocaml/database/db_interface.ml rename to ocaml/database/db_interface.mli diff --git a/ocaml/database/db_names.mli b/ocaml/database/db_names.mli new file mode 100644 index 00000000000..b1bb79d751c --- /dev/null +++ b/ocaml/database/db_names.mli @@ -0,0 +1,85 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val uuid : string + +val ref : string + +val suspend_VDI : string + +val vm : string + +val console : string + +val name_label : string + +val power_state : string + +val allowed_operations : string + +val current_operations : string + +val memory_dynamic_max : string + +val memory_dynamic_min : string + +val memory_static_max : string + +val memory_static_min : string + +val memory_target : string + +val is_a_template : string + +val is_default_template : string + +val is_a_snapshot : string + +val is_control_domain : string + +val platform : string + +val other_config : string + +val metrics : string + +val guest_metrics : string + +val parent : string + +val snapshot_of : string + +val snapshot_time : string + +val transportable_snapshot_id : string + +val resident_on : string + +val scheduled_to_be_resident_on : string + +val domid : string + +val ha_always_run : string + +val host : string + +val pool : string + +val master : string + +val bios_strings : string + +val protection_policy : string + +val snapshot_schedule : string diff --git a/ocaml/database/db_ref.mli b/ocaml/database/db_ref.mli new file mode 100644 index 00000000000..705d7eaafe9 --- /dev/null +++ b/ocaml/database/db_ref.mli @@ -0,0 +1,24 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t = In_memory of Db_cache_types.Database.t ref ref | Remote + +exception Database_not_in_memory + +val in_memory : Db_cache_types.Database.t ref ref -> t + +val get_database : t -> Db_cache_types.Database.t + +val update_database : + t -> (Db_cache_types.Database.t -> Db_cache_types.Database.t) -> unit diff --git a/ocaml/database/db_rpc_common_v1.mli b/ocaml/database/db_rpc_common_v1.mli new file mode 100644 index 00000000000..baba04f45d9 --- /dev/null +++ b/ocaml/database/db_rpc_common_v1.mli @@ -0,0 +1,175 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception DB_remote_marshall_error + +val marshall_4strings : string * string * string * string -> XMLRPC.xmlrpc + +val unmarshall_4strings : XMLRPC.xmlrpc -> string * string * string * string + +val marshall_3strings : string * string * string -> XMLRPC.xmlrpc + +val unmarshall_3strings : XMLRPC.xmlrpc -> string * string * string + +val marshall_get_table_from_ref_args : string -> XMLRPC.xmlrpc + +val unmarshall_get_table_from_ref_args : XMLRPC.xmlrpc -> string + +val marshall_get_table_from_ref_response : string option -> XMLRPC.xmlrpc + +val unmarshall_get_table_from_ref_response : XMLRPC.xmlrpc -> string option + +val marshall_is_valid_ref_args : string -> XMLRPC.xmlrpc + +val unmarshall_is_valid_ref_args : XMLRPC.xmlrpc -> string + +val marshall_is_valid_ref_response : bool -> XMLRPC.xmlrpc + +val unmarshall_is_valid_ref_response : XMLRPC.xmlrpc -> bool + +val marshall_read_refs_args : string -> XMLRPC.xmlrpc + +val unmarshall_read_refs_args : XMLRPC.xmlrpc -> string + +val marshall_read_refs_response : string list -> XMLRPC.xmlrpc + +val unmarshall_read_refs_response : XMLRPC.xmlrpc -> string list + +val marshall_read_field_where_args : + Db_cache_types.where_record -> XMLRPC.xmlrpc + +val unmarshall_read_field_where_args : + XMLRPC.xmlrpc -> Db_cache_types.where_record + +val marshall_read_field_where_response : string list -> XMLRPC.xmlrpc + +val unmarshall_read_field_where_response : XMLRPC.xmlrpc -> string list + +val marshall_db_get_by_uuid_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_uuid_args : XMLRPC.xmlrpc -> string * string + +val marshall_db_get_by_uuid_response : string -> XMLRPC.xmlrpc + +val marshall_db_get_by_uuid_opt_response : string option -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_uuid_response : XMLRPC.xmlrpc -> string + +val unmarshall_db_get_by_uuid_opt_response : XMLRPC.xmlrpc -> string option + +val marshall_db_get_by_name_label_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_name_label_args : XMLRPC.xmlrpc -> string * string + +val marshall_db_get_by_name_label_response : string list -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_name_label_response : XMLRPC.xmlrpc -> string list + +val marshall_create_row_args : + string * (string * string) list * string -> XMLRPC.xmlrpc + +val unmarshall_create_row_args : + XMLRPC.xmlrpc -> string * (string * string) list * string + +val marshall_create_row_response : unit -> XMLRPC.xmlrpc + +val unmarshall_create_row_response : XMLRPC.xmlrpc -> unit + +val marshall_delete_row_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_delete_row_args : XMLRPC.xmlrpc -> string * string + +val marshall_delete_row_response : unit -> XMLRPC.xmlrpc + +val unmarshall_delete_row_response : XMLRPC.xmlrpc -> unit + +val marshall_write_field_args : + string * string * string * string -> XMLRPC.xmlrpc + +val unmarshall_write_field_args : + XMLRPC.xmlrpc -> string * string * string * string + +val marshall_write_field_response : unit -> XMLRPC.xmlrpc + +val unmarshall_write_field_response : XMLRPC.xmlrpc -> unit + +val marshall_read_field_args : string * string * string -> XMLRPC.xmlrpc + +val unmarshall_read_field_args : XMLRPC.xmlrpc -> string * string * string + +val marshall_read_field_response : string -> XMLRPC.xmlrpc + +val unmarshall_read_field_response : XMLRPC.xmlrpc -> string + +val marshall_find_refs_with_filter_args : + string * Db_filter_types.expr -> XMLRPC.xmlrpc + +val unmarshall_find_refs_with_filter_args : + XMLRPC.xmlrpc -> string * Db_filter_types.expr + +val marshall_find_refs_with_filter_response : string list -> XMLRPC.xmlrpc + +val unmarshall_find_refs_with_filter_response : XMLRPC.xmlrpc -> string list + +val marshall_process_structured_field_args : + (string * string) + * string + * string + * string + * Db_cache_types.structured_op_t + -> XMLRPC.xmlrpc + +val unmarshall_process_structured_field_args : + XMLRPC.xmlrpc + -> (string * string) + * string + * string + * string + * Db_cache_types.structured_op_t + +val marshall_process_structured_field_response : unit -> XMLRPC.xmlrpc + +val unmarshall_process_structured_field_response : XMLRPC.xmlrpc -> unit + +val marshall_read_record_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_read_record_args : XMLRPC.xmlrpc -> string * string + +val marshall_read_record_response : + (string * string) list * (string * string list) list -> XMLRPC.xmlrpc + +val unmarshall_read_record_response : + XMLRPC.xmlrpc -> (string * string) list * (string * string list) list + +val marshall_read_records_where_args : + string * Db_filter_types.expr -> XMLRPC.xmlrpc + +val unmarshall_read_records_where_args : + XMLRPC.xmlrpc -> string * Db_filter_types.expr + +val marshall_read_records_where_response : + (string * ((string * string) list * (string * string list) list)) list + -> XMLRPC.xmlrpc + +val unmarshall_read_records_where_response : + XMLRPC.xmlrpc + -> (string * ((string * string) list * (string * string list) list)) list + +val marshall_stringstringlist : (string * string) list -> Xml.xml + +val unmarshall_stringstringlist : Xml.xml -> (string * string) list + +val marshall_structured_op : Db_cache_types.structured_op_t -> Xml.xml + +val unmarshall_structured_op : Xml.xml -> Db_cache_types.structured_op_t diff --git a/ocaml/database/db_rpc_common_v2.mli b/ocaml/database/db_rpc_common_v2.mli new file mode 100644 index 00000000000..3555e696096 --- /dev/null +++ b/ocaml/database/db_rpc_common_v2.mli @@ -0,0 +1,70 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Request : sig + type t = + | Get_table_from_ref of string + | Is_valid_ref of string + | Read_refs of string + | Find_refs_with_filter of string * Db_filter_types.expr + | Read_field_where of Db_cache_types.where_record + | Db_get_by_uuid of string * string + | Db_get_by_uuid_opt of string * string + | Db_get_by_name_label of string * string + | Create_row of string * (string * string) list * string + | Delete_row of string * string + | Write_field of string * string * string * string + | Read_field of string * string * string + | Read_record of string * string + | Read_records_where of string * Db_filter_types.expr + | Process_structured_field of + (string * string) + * string + * string + * string + * Db_cache_types.structured_op_t + + val t_of_rpc : Rpc.t -> t + + val rpc_of_t : t -> Rpc.t +end + +module Response : sig + type t = + | Get_table_from_ref of string option + | Is_valid_ref of bool + | Read_refs of string list + | Find_refs_with_filter of string list + | Read_field_where of string list + | Db_get_by_uuid of string + | Db_get_by_uuid_opt of string option + | Db_get_by_name_label of string list + | Create_row of unit + | Delete_row of unit + | Write_field of unit + | Read_field of string + | Read_record of (string * string) list * (string * string list) list + | Read_records_where of + (string * ((string * string) list * (string * string list) list)) list + | Process_structured_field of unit + | Dbcache_notfound of string * string * string + | Duplicate_key_of of string * string * string * string + | Uniqueness_constraint_violation of string * string * string + | Read_missing_uuid of string * string * string + | Too_many_values of string * string * string + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t +end diff --git a/ocaml/database/db_upgrade.mli b/ocaml/database/db_upgrade.mli new file mode 100644 index 00000000000..90eb5bf6912 --- /dev/null +++ b/ocaml/database/db_upgrade.mli @@ -0,0 +1,16 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val generic_database_upgrade : + Db_cache_types.Database.t -> Db_cache_types.Database.t diff --git a/ocaml/database/db_xml.ml b/ocaml/database/db_xml.ml index 1cb653f9b2b..642746e5e11 100644 --- a/ocaml/database/db_xml.ml +++ b/ocaml/database/db_xml.ml @@ -39,8 +39,6 @@ module To = struct Xmlm.output output `El_end (* Write out a string *) - let string (output : Xmlm.output) (key : string) (x : string) = - pair output key x (* Write out an int *) let int (output : Xmlm.output) (key : string) (x : int) = diff --git a/ocaml/database/db_xml.mli b/ocaml/database/db_xml.mli new file mode 100644 index 00000000000..24a969c95cb --- /dev/null +++ b/ocaml/database/db_xml.mli @@ -0,0 +1,27 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception Unmarshall_error of string + +module To : sig + val fd : Unix.file_descr -> Db_cache_types.Database.t -> unit + + val file : string -> Db_cache_types.Database.t -> unit +end + +module From : sig + val file : Schema.t -> string -> Db_cache_types.Database.t + + val channel : Schema.t -> in_channel -> Db_cache_types.Database.t +end diff --git a/ocaml/database/dune b/ocaml/database/dune index bc8fb767497..7422d6dc900 100644 --- a/ocaml/database/dune +++ b/ocaml/database/dune @@ -37,6 +37,7 @@ test_schemas unit_test_marshall unit_test_sql)) + (modules_without_implementation db_interface) (libraries forkexec gzip diff --git a/ocaml/database/generation.mli b/ocaml/database/generation.mli new file mode 100644 index 00000000000..4a5dd6c90ed --- /dev/null +++ b/ocaml/database/generation.mli @@ -0,0 +1,25 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t = int64 + +val of_string : string -> t + +val to_string : int64 -> string + +val add_int : int64 -> int -> int64 + +val null_generation : int64 + +val suffix : string diff --git a/ocaml/database/master_connection.ml b/ocaml/database/master_connection.ml index d7faff1cd62..09fde7dceef 100644 --- a/ocaml/database/master_connection.ml +++ b/ocaml/database/master_connection.ml @@ -20,8 +20,6 @@ open Safe_resources -type db_record = (string * string) list * (string * string list) list - module D = Debug.Make (struct let name = "master_connection" end) open D diff --git a/ocaml/database/master_connection.mli b/ocaml/database/master_connection.mli new file mode 100644 index 00000000000..eca6c22d025 --- /dev/null +++ b/ocaml/database/master_connection.mli @@ -0,0 +1,43 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val delay : Scheduler.PipeDelay.t + +exception Uninitialised + +val is_slave : (unit -> bool) ref + +val get_master_address : (unit -> string) ref + +val master_rpc_path : string ref + +exception Cannot_connect_to_master + +val force_connection_reset : unit -> unit + +val start_master_connection_watchdog : unit -> unit + +exception Goto_handler + +val on_database_connection_established : (unit -> unit) ref + +val open_secure_connection : unit -> unit + +val connection_timeout : float ref + +val restart_on_connection_timeout : bool ref + +exception Content_length_required + +val execute_remote_fn : string -> Db_interface.response diff --git a/ocaml/database/parse_db_conf.ml b/ocaml/database/parse_db_conf.ml index 8eb55ee2afe..67aa5c70d80 100644 --- a/ocaml/database/parse_db_conf.ml +++ b/ocaml/database/parse_db_conf.ml @@ -62,9 +62,6 @@ let generation_read dbconn = try Generation.of_string (Unixext.string_of_file gencount_fname) with _ -> 0L -(* The db conf used for bootstrap purposes, e.g. mounting the 'real' db on shared storage *) -let db_snapshot_dbconn = {dummy_conf with path= Db_globs.snapshot_db} - let from_mode v = match v with Write_limit -> "write_limit" | No_limit -> "no_limit" diff --git a/ocaml/database/parse_db_conf.mli b/ocaml/database/parse_db_conf.mli new file mode 100644 index 00000000000..95004fdb61f --- /dev/null +++ b/ocaml/database/parse_db_conf.mli @@ -0,0 +1,44 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type db_connection_mode = Write_limit | No_limit + +type db_connection = { + path: string + ; mode: db_connection_mode + ; compress: bool + ; write_limit_period: int + ; write_limit_write_cycles: int + ; is_on_remote_storage: bool + ; other_parameters: (string * string) list + ; mutable last_generation_count: Generation.t +} + +val dummy_conf : db_connection + +val make : string -> db_connection + +val generation_filename : db_connection -> string + +val generation_read : db_connection -> Generation.t + +val write_db_conf : db_connection list -> unit + +exception Cannot_parse_database_config_file + +exception Cannot_have_multiple_dbs_in_sr + +val parse_db_conf : string -> db_connection list + +val get_db_conf : string -> db_connection list diff --git a/ocaml/database/schema.mli b/ocaml/database/schema.mli new file mode 100644 index 00000000000..78991544214 --- /dev/null +++ b/ocaml/database/schema.mli @@ -0,0 +1,142 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Type : sig + type t = String | Set | Pairs [@@deriving sexp_of] + + exception Error of t * t +end + +module Value : sig + type t = + | String of string + | Set of string list + | Pairs of (string * string) list + [@@deriving sexp_of] + + val marshal : t -> string + + val unmarshal : Type.t -> string -> t + + module Unsafe_cast : sig + val string : t -> string + + val set : t -> string list + + val pairs : t -> (string * string) list + end +end + +module Column : sig + type t = { + name: string + ; persistent: bool + ; empty: Value.t + ; default: Value.t option + ; ty: Type.t + ; issetref: bool + } + [@@deriving sexp_of] + + val name_of : t -> string +end + +val tabulate : 'a list -> key_fn:('a -> 'b) -> ('b, 'a) Hashtbl.t + +val values_of_table : ('a, 'b) Hashtbl.t -> 'b list + +module Table : sig + type t' = {name: string; columns: Column.t list; persistent: bool} + [@@deriving sexp_of] + + val sexp_of_t' : t' -> Sexplib0.Sexp.t + + type t = { + name: string + ; columns: (string, Column.t) Hashtbl.t + ; persistent: bool + } + [@@deriving sexp_of] + + val t'_of_t : t -> t' + + val t_of_t' : t' -> t + + val find : string -> t -> Column.t + + val create : name:string -> columns:Column.t list -> persistent:bool -> t + + val name_of : t -> string +end + +type relationship = OneToMany of string * string * string * string + +val sexp_of_relationship : relationship -> Sexplib0.Sexp.t + +module Database : sig + type t' = {tables: Table.t list} + + val sexp_of_t' : t' -> Sexplib0.Sexp.t + + type t = {tables: (string, Table.t) Hashtbl.t} + + val t_of_t' : t' -> t + + val t'_of_t : t -> t' + + val sexp_of_t : t -> Sexplib0.Sexp.t + + val find : string -> t -> Table.t + + val of_tables : Table.t list -> t +end + +type foreign = (string * string * string) list + +val sexp_of_foreign : foreign -> Sexplib0.Sexp.t + +module ForeignMap : sig + include Map.S with type key = string + + type t' = (key * foreign) list + + val sexp_of_t' : t' -> Sexplib0.Sexp.t + + type m = foreign t [@@deriving sexp_of] +end + +type t = { + major_vsn: int + ; minor_vsn: int + ; database: Database.t + ; one_to_many: ForeignMap.m + ; many_to_many: ForeignMap.m +} +[@@deriving sexp_of] + +val database : t -> Database.t + +val table : string -> t -> Table.t + +val empty : t + +val is_table_persistent : t -> string -> bool + +val is_field_persistent : t -> string -> string -> bool + +val table_names : t -> string list + +val one_to_many : ForeignMap.key -> t -> foreign + +val many_to_many : ForeignMap.key -> t -> foreign diff --git a/ocaml/database/static_vdis_list.mli b/ocaml/database/static_vdis_list.mli new file mode 100644 index 00000000000..4e59f5b75c1 --- /dev/null +++ b/ocaml/database/static_vdis_list.mli @@ -0,0 +1,23 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type vdi = { + uuid: string + ; reason: string + ; delete_next_boot: bool + ; currently_attached: bool + ; path: string option +} + +val list : unit -> vdi list diff --git a/ocaml/database/string_marshall_helper.mli b/ocaml/database/string_marshall_helper.mli new file mode 100644 index 00000000000..2fc57ff97b7 --- /dev/null +++ b/ocaml/database/string_marshall_helper.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val ensure_utf8_xml : string -> string + +val set : ('a -> string) -> 'a list -> string + +val map : ('a -> string) -> ('b -> string) -> ('a * 'b) list -> string diff --git a/ocaml/database/string_unmarshall_helper.mli b/ocaml/database/string_unmarshall_helper.mli new file mode 100644 index 00000000000..3362c9659f0 --- /dev/null +++ b/ocaml/database/string_unmarshall_helper.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception Failure of string + +val set : (string -> 'a) -> string -> 'a list + +val map : (string -> 'a) -> (string -> 'b) -> string -> ('a * 'b) list diff --git a/ocaml/database/test_schemas.mli b/ocaml/database/test_schemas.mli new file mode 100644 index 00000000000..fa4cb6ebac4 --- /dev/null +++ b/ocaml/database/test_schemas.mli @@ -0,0 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val schema : Schema.t + +val many_to_many : Schema.t diff --git a/ocaml/database/unit_test_marshall.mli b/ocaml/database/unit_test_marshall.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/unit_test_marshall.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/unit_test_sql.mli b/ocaml/database/unit_test_sql.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/unit_test_sql.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/xml_spaces.mli b/ocaml/database/xml_spaces.mli new file mode 100644 index 00000000000..4ec7f9016d7 --- /dev/null +++ b/ocaml/database/xml_spaces.mli @@ -0,0 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val protect : string -> string + +val unprotect : string -> string diff --git a/quality-gate.sh b/quality-gate.sh index f3ab4da2045..a1f57a67525 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=496 + N=467 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From ddeff67d4cf4b51da691139a7c71d72f7328a0ce Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 9 May 2025 13:58:31 +0100 Subject: [PATCH 170/492] CA-409949 CA-408048 remove unavailable SM types at startup On XS9 a previiously available SM type might become unavailable. Make sure we remove it on upgrade. The previius fix in CA-408048 did not work. Signed-off-by: Christian Lindig --- ocaml/xapi/storage_access.ml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index 6f2b540dac4..128640b24df 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -111,13 +111,12 @@ exception Message_switch_failure let on_xapi_start ~__context = (* An SM is either implemented as a plugin - for which we check its presence, or via an API *) - let is_available (_rf, rc) = + let is_available rc = Sys.file_exists rc.API.sM_driver_filename || Version.String.ge rc.sM_required_api_version "5.0" in let existing = Db.SM.get_all_records ~__context - |> List.filter is_available |> List.map (fun (rf, rc) -> (rc.API.sM_type, (rf, rc))) in let explicitly_configured_drivers = @@ -172,6 +171,9 @@ let on_xapi_start ~__context = in (* Add all the running SMAPIv2 drivers *) let to_keep = to_keep @ running_smapiv2_drivers in + let unavailable = + List.filter (fun (_, (_, rc)) -> not (is_available rc)) existing + in (* Delete all records which aren't configured or in-use *) List.iter (fun ty -> @@ -181,7 +183,12 @@ let on_xapi_start ~__context = let self, _ = List.assoc ty existing in try Db.SM.destroy ~__context ~self with _ -> () ) - (Listext.List.set_difference (List.map fst existing) to_keep) ; + (List.concat + [ + Listext.List.set_difference (List.map fst existing) to_keep + ; List.map fst unavailable + ] + ) ; (* Synchronize SMAPIv1 plugins *) From 136e72b3ba86c33d02a1c8add28866dae178d3e6 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 24 Apr 2025 17:36:36 +0100 Subject: [PATCH 171/492] Mux mirror failure check for SXM The current logic in storage_migrate.ml for mirror failure check is specific to tapdisk, hence multiplex it. `pre_deactivate_hook` also has something similar to check for mirror failure, so do something similar there. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 23 +++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 6 ++ ocaml/xapi-storage-script/main.ml | 2 + ocaml/xapi/storage_migrate.ml | 73 +++------------------ ocaml/xapi/storage_mux.ml | 6 ++ ocaml/xapi/storage_smapiv1.ml | 4 ++ ocaml/xapi/storage_smapiv1_migrate.ml | 68 +++++++++++++++++++ ocaml/xapi/storage_smapiv1_wrapper.ml | 6 ++ ocaml/xapi/storage_smapiv3_migrate.ml | 4 ++ 9 files changed, 127 insertions(+), 65 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 28e3752d8c6..55b27bf1a01 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1179,6 +1179,17 @@ module StorageAPI (R : RPC) = struct let receive_cancel2 = declare "DATA.MIRROR.receive_cancel2" [] (dbg_p @-> id_p @-> url_p @-> verify_dest_p @-> returning unit_p err) + + let pre_deactivate_hook = + declare "DATA.MIRROR.pre_deactivate_hook" [] + (dbg_p @-> dp_p @-> sr_p @-> vdi_p @-> returning unit_p err) + + let has_mirror_failed = + let mirror_failed_p = + Param.mk ~name:"mirror_failed_p" ~description:[] Types.bool + in + declare "DATA.MIRROR.has_mirror_failed" [] + (dbg_p @-> id_p @-> sr_p @-> returning mirror_failed_p err) end end @@ -1285,6 +1296,12 @@ module type MIRROR = sig -> url:string -> verify_dest:bool -> unit + + val pre_deactivate_hook : + context -> dbg:debug_info -> dp:dp -> sr:sr -> vdi:vdi -> unit + + val has_mirror_failed : + context -> dbg:debug_info -> mirror_id:Mirror.id -> sr:Sr.t -> bool end module type Server_impl = sig @@ -1759,6 +1776,12 @@ module Server (Impl : Server_impl) () = struct Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~mirror_id ~sr ~url ~verify_dest ) ; + S.DATA.MIRROR.pre_deactivate_hook (fun dbg dp sr vdi -> + Impl.DATA.MIRROR.pre_deactivate_hook () ~dbg ~dp ~sr ~vdi + ) ; + S.DATA.MIRROR.has_mirror_failed (fun dbg mirror_id sr -> + Impl.DATA.MIRROR.has_mirror_failed () ~dbg ~mirror_id ~sr + ) ; S.DATA.import_activate (fun dbg dp sr vdi vm -> Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm ) ; diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 01f66eebb21..f01f6e94cb0 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -182,6 +182,12 @@ module DATA = struct let receive_cancel2 ctx ~dbg ~mirror_id ~url ~verify_dest = u "DATA.MIRROR.receive_cancel2" + + let pre_deactivate_hook ctx ~dbg ~dp ~sr ~vdi = + u "DATA.MIRROR.pre_deactivate_hook" + + let has_mirror_failed ctx ~dbg ~mirror_id ~sr = + u "DATA.MIRROR.has_mirror_failed" end end diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 0800223c3f4..98ace6581d2 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1927,6 +1927,8 @@ let bind ~volume_script_dir = S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.DATA.MIRROR.receive_cancel2 (u "DATA.MIRROR.receive_cancel2") ; + S.DATA.MIRROR.pre_deactivate_hook (u "DATA.MIRROR.pre_deactivate_hook") ; + S.DATA.MIRROR.has_mirror_failed (u "DATA.MIRROR.has_mirror_failed") ; S.DP.create (u "DP.create") ; S.TASK.cancel (u "TASK.cancel") ; S.TASK.list (u "TASK.list") ; diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 54144ce5a2b..b799e497d3d 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -209,28 +209,20 @@ module MigrateLocal = struct stop ~dbg ~id:mirror_id ; raise e - let stat ~dbg:_ ~id = + let stat ~dbg ~id = let recv_opt = State.find_active_receive_mirror id in let send_opt = State.find_active_local_mirror id in let copy_opt = State.find_active_copy id in + let sr, _vdi = State.of_mirror_id id in let open State in let failed = match send_opt with | Some send_state -> + let (module Migrate_Backend) = choose_backend dbg sr in let failed = - match send_state.Send_state.tapdev with - | Some tapdev -> ( - try - let stats = Tapctl.stats (Tapctl.create ()) tapdev in - stats.Tapctl.Stats.nbd_mirror_failed = 1 - with _ -> - debug "Using cached copy of failure status" ; - send_state.Send_state.failed - ) - | None -> - false + Migrate_Backend.has_mirror_failed () ~dbg ~mirror_id:id ~sr in - send_state.Send_state.failed <- failed ; + send_state.failed <- failed ; failed | None -> false @@ -325,58 +317,9 @@ module MigrateLocal = struct State.clear () end -exception Timeout of Mtime.Span.t - -let reqs_outstanding_timeout = Mtime.Span.(150 * s) - -let pp_time () = Fmt.str "%a" Mtime.Span.pp - -(* Tapdisk should time out after 2 mins. We can wait a little longer *) - -let pre_deactivate_hook ~dbg:_ ~dp:_ ~sr ~vdi = - let open State.Send_state in - let id = State.mirror_id_of (sr, vdi) in - let start = Mtime_clock.counter () in - State.find_active_local_mirror id - |> Option.iter (fun s -> - (* We used to pause here and then check the nbd_mirror_failed key. Now, we poll - until the number of outstanding requests has gone to zero, then check the - status. This avoids confusing the backend (CA-128460) *) - try - match s.tapdev with - | None -> - () - | Some tapdev -> - let open Tapctl in - let ctx = create () in - let rec wait () = - let elapsed = Mtime_clock.count start in - if Mtime.Span.compare elapsed reqs_outstanding_timeout > 0 then - raise (Timeout elapsed) ; - let st = stats ctx tapdev in - if st.Stats.reqs_outstanding > 0 then ( - Thread.delay 1.0 ; wait () - ) else - (st, elapsed) - in - let st, elapsed = wait () in - debug "Got final stats after waiting %a" pp_time elapsed ; - if st.Stats.nbd_mirror_failed = 1 then ( - error "tapdisk reports mirroring failed" ; - s.failed <- true - ) - with - | Timeout elapsed -> - error - "Timeout out after %a waiting for tapdisk to complete all \ - outstanding requests" - pp_time elapsed ; - s.failed <- true - | e -> - error "Caught exception while finally checking mirror state: %s" - (Printexc.to_string e) ; - s.failed <- true - ) +let pre_deactivate_hook ~dbg ~dp ~sr ~vdi = + let (module Migrate_Backend) = choose_backend dbg sr in + Migrate_Backend.pre_deactivate_hook () ~dbg ~dp ~sr ~vdi let post_deactivate_hook ~sr ~vdi ~dp:_ = let open State.Send_state in diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index c9a387f6269..26c7b8db6be 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -857,6 +857,12 @@ module Mux = struct let receive_cancel2 () ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = u __FUNCTION__ + + let pre_deactivate_hook _ctx ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = + u "DATA.MIRROR.pre_deactivate_hook" + + let has_mirror_failed _ctx ~dbg:_ ~mirror_id:_ ~sr:_ = + u "DATA.MIRROR.has_mirror_failed" end end diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 708e35c0a96..3e3b8d61dff 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1157,6 +1157,10 @@ module SMAPIv1 : Server_impl = struct let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = assert false + + let pre_deactivate_hook _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = assert false + + let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = assert false end end diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index d6156a7fad0..98342156b15 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -757,4 +757,72 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_cancel2 _ctx ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = (* see Storage_migrate.receive_cancel2 *) u __FUNCTION__ + + exception Timeout of Mtime.Span.t + + let reqs_outstanding_timeout = Mtime.Span.(150 * s) + + let pp_time () = Fmt.str "%a" Mtime.Span.pp + + (* Tapdisk should time out after 2 mins. We can wait a little longer *) + + let pre_deactivate_hook _ctx ~dbg:_ ~dp:_ ~sr ~vdi = + let open State.Send_state in + let id = State.mirror_id_of (sr, vdi) in + let start = Mtime_clock.counter () in + State.find_active_local_mirror id + |> Option.iter (fun s -> + (* We used to pause here and then check the nbd_mirror_failed key. Now, we poll + until the number of outstanding requests has gone to zero, then check the + status. This avoids confusing the backend (CA-128460) *) + try + match s.tapdev with + | None -> + () + | Some tapdev -> + let open Tapctl in + let ctx = create () in + let rec wait () = + let elapsed = Mtime_clock.count start in + if Mtime.Span.compare elapsed reqs_outstanding_timeout > 0 + then + raise (Timeout elapsed) ; + let st = stats ctx tapdev in + if st.Stats.reqs_outstanding > 0 then ( + Thread.delay 1.0 ; wait () + ) else + (st, elapsed) + in + let st, elapsed = wait () in + D.debug "Got final stats after waiting %a" pp_time elapsed ; + if st.Stats.nbd_mirror_failed = 1 then ( + D.error "tapdisk reports mirroring failed" ; + s.failed <- true + ) + with + | Timeout elapsed -> + D.error + "Timeout out after %a waiting for tapdisk to complete all \ + outstanding requests" + pp_time elapsed ; + s.failed <- true + | e -> + D.error + "Caught exception while finally checking mirror state: %s" + (Printexc.to_string e) ; + s.failed <- true + ) + + let has_mirror_failed _ctx ~dbg:_ ~mirror_id ~sr:_ = + match State.find_active_local_mirror mirror_id with + | Some {tapdev= Some tapdev; failed; _} -> ( + try + let stats = Tapctl.stats (Tapctl.create ()) tapdev in + stats.Tapctl.Stats.nbd_mirror_failed = 1 + with _ -> + D.debug "Using cached copy of failure status" ; + failed + ) + | _ -> + false end diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 7d418fb9091..4f35e9fcdcd 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1219,6 +1219,12 @@ functor let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = u __FUNCTION__ + + let pre_deactivate_hook _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = + u __FUNCTION__ + + let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = + u __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 72d9f2bde9d..ebda6b71b0e 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -38,4 +38,8 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_cancel _ctx = u __FUNCTION__ let receive_cancel2 _ctx = u __FUNCTION__ + + let has_mirror_failed _ctx = u __FUNCTION__ + + let pre_deactivate_hook _ctx = u __FUNCTION__ end From b8ce5a26ae9b15b443f2a1dc7b835970d127ae63 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 25 Apr 2025 13:52:57 +0100 Subject: [PATCH 172/492] Bring back DATA.MIRROR.list to storage_mux Previously this was deleted in commit 1fe6389 as it was not multiplexed, but looks like we still need to keep it in storage_mux because sm-cli needs to make rpc calls to storage_mux when trying to list all the mirrors to make it work properly, due to the fact that the sr plugins are stored in the address space of the xapi process. There are other invocations in sm-cli such as `Storage_migrate.start` which may have similar problems. But I have left them alone as I don't any reasonable way of calling them from the cli. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 9 +++++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 2 ++ ocaml/xapi-storage-cli/main.ml | 2 +- ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/storage_mux.ml | 5 +++++ ocaml/xapi/storage_smapiv1.ml | 2 ++ ocaml/xapi/storage_smapiv1_migrate.ml | 2 ++ ocaml/xapi/storage_smapiv1_wrapper.ml | 2 ++ ocaml/xapi/storage_smapiv3_migrate.ml | 2 ++ 9 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 55b27bf1a01..c4730fa8241 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1190,6 +1190,12 @@ module StorageAPI (R : RPC) = struct in declare "DATA.MIRROR.has_mirror_failed" [] (dbg_p @-> id_p @-> sr_p @-> returning mirror_failed_p err) + + let list = + let result_p = + Param.mk ~name:"mirrors" TypeCombinators.(list (pair Mirror.(id, t))) + in + declare "DATA.MIRROR.list" [] (dbg_p @-> returning result_p err) end end @@ -1302,6 +1308,8 @@ module type MIRROR = sig val has_mirror_failed : context -> dbg:debug_info -> mirror_id:Mirror.id -> sr:Sr.t -> bool + + val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list end module type Server_impl = sig @@ -1782,6 +1790,7 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.has_mirror_failed (fun dbg mirror_id sr -> Impl.DATA.MIRROR.has_mirror_failed () ~dbg ~mirror_id ~sr ) ; + S.DATA.MIRROR.list (fun dbg -> Impl.DATA.MIRROR.list () ~dbg) ; S.DATA.import_activate (fun dbg dp sr vdi vm -> Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm ) ; diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index f01f6e94cb0..84b1216952e 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -188,6 +188,8 @@ module DATA = struct let has_mirror_failed ctx ~dbg ~mirror_id ~sr = u "DATA.MIRROR.has_mirror_failed" + + let list ctx ~dbg = u "DATA.MIRROR.list" end end diff --git a/ocaml/xapi-storage-cli/main.ml b/ocaml/xapi-storage-cli/main.ml index 6a607f50986..f581d6b6b48 100644 --- a/ocaml/xapi-storage-cli/main.ml +++ b/ocaml/xapi-storage-cli/main.ml @@ -149,7 +149,7 @@ let string_of_file filename = let mirror_list common_opts = wrap common_opts (fun () -> - let list = Storage_migrate.list ~dbg in + let list = Client.DATA.MIRROR.list dbg in List.iter (fun (id, status) -> Printf.printf "%s" (string_of_mirror id status)) list diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 98ace6581d2..39646f9cf11 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1929,6 +1929,7 @@ let bind ~volume_script_dir = S.DATA.MIRROR.receive_cancel2 (u "DATA.MIRROR.receive_cancel2") ; S.DATA.MIRROR.pre_deactivate_hook (u "DATA.MIRROR.pre_deactivate_hook") ; S.DATA.MIRROR.has_mirror_failed (u "DATA.MIRROR.has_mirror_failed") ; + S.DATA.MIRROR.list (u "DATA.MIRROR.list") ; S.DP.create (u "DP.create") ; S.TASK.cancel (u "TASK.cancel") ; S.TASK.list (u "TASK.list") ; diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 26c7b8db6be..68ba7a9413b 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -863,6 +863,11 @@ module Mux = struct let has_mirror_failed _ctx ~dbg:_ ~mirror_id:_ ~sr:_ = u "DATA.MIRROR.has_mirror_failed" + + let list () ~dbg = + with_dbg ~name:"DATA.MIRROR.list" ~dbg @@ fun di -> + info "%s dbg: %s" __FUNCTION__ dbg ; + Storage_migrate.list ~dbg:di.log end end diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 3e3b8d61dff..cf6f8299ea9 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1161,6 +1161,8 @@ module SMAPIv1 : Server_impl = struct let pre_deactivate_hook _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = assert false let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = assert false + + let list _context ~dbg:_ = assert false end end diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 98342156b15..162eba6a772 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -825,4 +825,6 @@ module MIRROR : SMAPIv2_MIRROR = struct ) | _ -> false + + let list _ctx = u __FUNCTION__ end diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 4f35e9fcdcd..9fbbd6e677a 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1225,6 +1225,8 @@ functor let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = u __FUNCTION__ + + let list _context ~dbg:_ = u __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index ebda6b71b0e..22dbda4fb18 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -42,4 +42,6 @@ module MIRROR : SMAPIv2_MIRROR = struct let has_mirror_failed _ctx = u __FUNCTION__ let pre_deactivate_hook _ctx = u __FUNCTION__ + + let list _ctx = u __FUNCTION__ end From 83fbfdc02849dcd77a56e34d938ed1451b0ab8e0 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 30 Apr 2025 16:08:45 +0100 Subject: [PATCH 173/492] Bring back DATA.MIRROR.stat to storage_mux Do the same as DATA.MIRROR.list Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 7 +++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 2 ++ ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/storage_access.ml | 2 +- ocaml/xapi/storage_mux.ml | 5 +++++ ocaml/xapi/storage_smapiv1.ml | 2 ++ ocaml/xapi/storage_smapiv1_migrate.ml | 2 ++ ocaml/xapi/storage_smapiv1_wrapper.ml | 2 ++ ocaml/xapi/storage_smapiv3_migrate.ml | 2 ++ ocaml/xapi/xapi_vm_migrate.ml | 4 ++-- 10 files changed, 26 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index c4730fa8241..42cfd4fe722 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1196,6 +1196,10 @@ module StorageAPI (R : RPC) = struct Param.mk ~name:"mirrors" TypeCombinators.(list (pair Mirror.(id, t))) in declare "DATA.MIRROR.list" [] (dbg_p @-> returning result_p err) + + let stat = + let result_p = Param.mk ~name:"result" Mirror.t in + declare "DATA.MIRROR.stat" [] (dbg_p @-> id_p @-> returning result_p err) end end @@ -1310,6 +1314,8 @@ module type MIRROR = sig context -> dbg:debug_info -> mirror_id:Mirror.id -> sr:Sr.t -> bool val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list + + val stat : context -> dbg:debug_info -> id:Mirror.id -> Mirror.t end module type Server_impl = sig @@ -1791,6 +1797,7 @@ module Server (Impl : Server_impl) () = struct Impl.DATA.MIRROR.has_mirror_failed () ~dbg ~mirror_id ~sr ) ; S.DATA.MIRROR.list (fun dbg -> Impl.DATA.MIRROR.list () ~dbg) ; + S.DATA.MIRROR.stat (fun dbg id -> Impl.DATA.MIRROR.stat () ~dbg ~id) ; S.DATA.import_activate (fun dbg dp sr vdi vm -> Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm ) ; diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 84b1216952e..2c72cd94fef 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -190,6 +190,8 @@ module DATA = struct u "DATA.MIRROR.has_mirror_failed" let list ctx ~dbg = u "DATA.MIRROR.list" + + let stat ctx ~dbg ~id = u "DATA.MIRROR.stat" end end diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 39646f9cf11..d370040f79b 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1930,6 +1930,7 @@ let bind ~volume_script_dir = S.DATA.MIRROR.pre_deactivate_hook (u "DATA.MIRROR.pre_deactivate_hook") ; S.DATA.MIRROR.has_mirror_failed (u "DATA.MIRROR.has_mirror_failed") ; S.DATA.MIRROR.list (u "DATA.MIRROR.list") ; + S.DATA.MIRROR.stat (u "DATA.MIRROR.stat") ; S.DP.create (u "DP.create") ; S.TASK.cancel (u "TASK.cancel") ; S.TASK.list (u "TASK.list") ; diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index 6f2b540dac4..ed0b7d77e2a 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -446,7 +446,7 @@ let update_task ~__context id = let update_mirror ~__context id = try let dbg = Context.string_of_task __context in - let m = Storage_migrate.stat ~dbg ~id in + let m = Client.DATA.MIRROR.stat dbg id in if m.Mirror.failed then debug "Mirror %s has failed" id ; let task = get_mirror_task id in diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 68ba7a9413b..1788b19e157 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -868,6 +868,11 @@ module Mux = struct with_dbg ~name:"DATA.MIRROR.list" ~dbg @@ fun di -> info "%s dbg: %s" __FUNCTION__ dbg ; Storage_migrate.list ~dbg:di.log + + let stat () ~dbg ~id = + with_dbg ~name:"DATA.MIRROR.stat" ~dbg @@ fun di -> + info "%s dbg: %s mirror_id: %s" __FUNCTION__ di.log id ; + Storage_migrate.stat ~dbg:di.log ~id end end diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index cf6f8299ea9..792cae733b3 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1163,6 +1163,8 @@ module SMAPIv1 : Server_impl = struct let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = assert false let list _context ~dbg:_ = assert false + + let stat _context ~dbg:_ ~id:_ = assert false end end diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 162eba6a772..91aa4741f04 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -827,4 +827,6 @@ module MIRROR : SMAPIv2_MIRROR = struct false let list _ctx = u __FUNCTION__ + + let stat _ctx = u __FUNCTION__ end diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 9fbbd6e677a..245380f6b48 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1227,6 +1227,8 @@ functor u __FUNCTION__ let list _context ~dbg:_ = u __FUNCTION__ + + let stat _context ~dbg:_ ~id:_ = u __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 22dbda4fb18..41f3b4a0ff9 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -44,4 +44,6 @@ module MIRROR : SMAPIv2_MIRROR = struct let pre_deactivate_hook _ctx = u __FUNCTION__ let list _ctx = u __FUNCTION__ + + let stat _ctx = u __FUNCTION__ end diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index 0dbb8265aff..e28242cadf1 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1078,7 +1078,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far (None, vdi.vdi) ) else let mirrorid = task_result |> mirror_of_task dbg in - let m = Storage_migrate.stat ~dbg ~id:mirrorid in + let m = SMAPI.DATA.MIRROR.stat dbg mirrorid in (Some mirrorid, m.Mirror.dest_vdi) in so_far := Int64.add !so_far vconf.size ; @@ -1107,7 +1107,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far match mirror_id with | Some mid -> ignore (Storage_access.unregister_mirror mid) ; - let m = Storage_migrate.stat ~dbg ~id:mid in + let m = SMAPI.DATA.MIRROR.stat dbg mid in (try Storage_migrate.stop ~dbg ~id:mid with _ -> ()) ; m.Mirror.failed | None -> From bb994e0562d954a206a498e96818cfa928ddbc84 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 7 May 2025 17:28:15 +0100 Subject: [PATCH 174/492] Add more debugging to storage_smapiv1_migrate Signed-off-by: Vincent Liu --- ocaml/xapi/storage_smapiv1_migrate.ml | 45 +++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 91aa4741f04..696bffd789d 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -25,6 +25,12 @@ module SXM = Storage_migrate_helper.SXM module type SMAPIv2_MIRROR = Storage_interface.MIRROR +let s_of_sr = Storage_interface.Sr.string_of + +let s_of_vdi = Storage_interface.Vdi.string_of + +let s_of_vm = Storage_interface.Vm.string_of + let with_activated_disk ~dbg ~sr ~vdi ~dp ~vm f = let attached_vdi = Option.map @@ -389,6 +395,11 @@ end let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~live_vm ~mirror_id ~url ~dest_sr ~verify_dest ~(remote_mirror : Mirror.mirror_receive_result_vhd_t) = + D.debug + "%s dbg:%s dp:%s sr:%s vdi:%s mirror_vm:%s live_vm:%s mirror_id:%s url:%s \ + dest_sr:%s verify_dest:%B" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm mirror_vm) + (s_of_vm live_vm) mirror_id url (s_of_sr dest_sr) verify_dest ; let remote_vdi = remote_mirror.mirror_vdi.vdi in let mirror_dp = remote_mirror.mirror_datapath in @@ -482,6 +493,9 @@ let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~live_vm ~mirror_id ~url tapdev let mirror_snapshot ~dbg ~sr ~dp ~mirror_id ~local_vdi = + D.debug "%s dbg:%s sr:%s dp:%s mirror_id:%s local_vdi:%s" __FUNCTION__ dbg + (s_of_sr sr) dp mirror_id + (string_of_vdi_info local_vdi) ; SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ (string_of_vdi_info local_vdi) ; let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in @@ -556,6 +570,11 @@ module MIRROR : SMAPIv2_MIRROR = struct let send_start _ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = + D.debug + "%s dbg: %s dp: %s sr: %s vdi:%s mirror_vm:%s mirror_id: %s live_vm: %s \ + url:%s dest_sr:%s verify_dest:%B" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm mirror_vm) + mirror_id (s_of_vm live_vm) url (s_of_sr dest_sr) verify_dest ; let (module Remote) = Storage_migrate_helper.get_remote_backend url verify_dest in @@ -699,11 +718,18 @@ module MIRROR : SMAPIv2_MIRROR = struct raise e let receive_start _ctx ~dbg ~sr ~vdi_info ~id ~similar = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s" __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + id ; receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") (module Local) let receive_start2 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s vm: %s url: %s verify_dest: %B" + __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + mirror_id (s_of_vm vm) url verify_dest ; let (module Remote) = Storage_migrate_helper.get_remote_backend url verify_dest in @@ -711,12 +737,15 @@ module MIRROR : SMAPIv2_MIRROR = struct (module Remote) let receive_finalize _ctx ~dbg ~id = + D.debug "%s dbg:%s id: %s" __FUNCTION__ dbg id ; let recv_state = State.find_active_receive_mirror id in let open State.Receive_state in Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; State.remove_receive_mirror id - let receive_finalize2 _ctx ~dbg ~mirror_id ~sr:_ ~url ~verify_dest = + let receive_finalize2 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg + mirror_id (s_of_sr sr) url verify_dest ; let (module Remote) = Storage_migrate_helper.get_remote_backend url verify_dest in @@ -740,6 +769,7 @@ module MIRROR : SMAPIv2_MIRROR = struct State.remove_receive_mirror mirror_id let receive_cancel _ctx ~dbg ~id = + D.debug "%s dbg:%s mirror_id:%s" __FUNCTION__ dbg id ; let receive_state = State.find_active_receive_mirror id in let open State.Receive_state in Option.iter @@ -766,7 +796,9 @@ module MIRROR : SMAPIv2_MIRROR = struct (* Tapdisk should time out after 2 mins. We can wait a little longer *) - let pre_deactivate_hook _ctx ~dbg:_ ~dp:_ ~sr ~vdi = + let pre_deactivate_hook _ctx ~dbg ~dp ~sr ~vdi = + D.debug "%s dbg:%s dp:%s sr:%s vdi:%s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) ; let open State.Send_state in let id = State.mirror_id_of (sr, vdi) in let start = Mtime_clock.counter () in @@ -803,13 +835,14 @@ module MIRROR : SMAPIv2_MIRROR = struct | Timeout elapsed -> D.error "Timeout out after %a waiting for tapdisk to complete all \ - outstanding requests" - pp_time elapsed ; + outstanding requests while migrating vdi %s of domain %s" + pp_time elapsed (s_of_vdi vdi) (s_of_vm s.live_vm) ; s.failed <- true | e -> D.error - "Caught exception while finally checking mirror state: %s" - (Printexc.to_string e) ; + "Caught exception while finally checking mirror state: %s \ + when migrating vdi %s of domain %s" + (Printexc.to_string e) (s_of_vdi vdi) (s_of_vm s.live_vm) ; s.failed <- true ) From aea9ab515263c79974ffab4f3206f91e06335dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 175/492] CP-307958: Benchmark for Db_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change Signed-off-by: Edwin Török --- ocaml/tests/bench/bench_pool_field.ml | 22 ++++++++++++++++++++++ ocaml/tests/bench/dune | 3 ++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/bench/bench_pool_field.ml b/ocaml/tests/bench/bench_pool_field.ml index e2239407983..d30ab4d3735 100644 --- a/ocaml/tests/bench/bench_pool_field.ml +++ b/ocaml/tests/bench/bench_pool_field.ml @@ -64,6 +64,24 @@ let date_of_iso8601 () = Clock.Date.of_iso8601 date let local_session_hook () = Xapi_local_session.local_session_hook ~__context ~session_id:Ref.null +let atomic = Atomic.make 0 + +let atomic_inc () = Atomic.incr atomic + +let mutex = Mutex.create () + +let locked_ref = ref 0 + +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + +let inc_locked () = incr locked_ref + +let inc_with_mutex () = with_lock mutex inc_locked + +let noop () = Sys.opaque_identity () + +let db_lock_uncontended () : unit = Xapi_database.Db_lock.with_lock noop + let benchmarks = [ Test.make ~name:"local_session_hook" (Staged.stage local_session_hook) @@ -73,6 +91,10 @@ let benchmarks = ; Test.make ~name:"Db.Pool.get_all_records" (Staged.stage get_all) ; Test.make ~name:"pool_t -> Rpc.t" (Staged.stage serialize) ; Test.make ~name:"Rpc.t -> pool_t" (Staged.stage deserialize) + ; Test.make ~name:"Atomic.incr" (Staged.stage atomic_inc) + ; Test.make ~name:"Mutex+incr" (Staged.stage inc_with_mutex) + ; Test.make ~name:"Db_lock.with_lock uncontended" + (Staged.stage db_lock_uncontended) ] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index bf053a1ef18..fe0af458c14 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -27,4 +27,5 @@ log xapi_database xapi_datamodel - xapi_internal)) + xapi_internal + xapi-stdext-threads)) From 3da9ef1106a040bca6902dfb5ea1a8ddb9de122a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 176/492] CP-307958: Benchmark for rpc_of_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change Signed-off-by: Edwin Török --- ocaml/tests/bench/bench_pool_field.ml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ocaml/tests/bench/bench_pool_field.ml b/ocaml/tests/bench/bench_pool_field.ml index d30ab4d3735..ee1886e9986 100644 --- a/ocaml/tests/bench/bench_pool_field.ml +++ b/ocaml/tests/bench/bench_pool_field.ml @@ -82,6 +82,19 @@ let noop () = Sys.opaque_identity () let db_lock_uncontended () : unit = Xapi_database.Db_lock.with_lock noop +let event = + let open Event_types in + { + id= "id" + ; ts= "1000" + ; ty= "test" + ; op= `add + ; reference= "test" + ; snapshot= Some (Rpc.Dict []) + } + +let test_rpc_of_event () = Event_types.rpc_of_event event + let benchmarks = [ Test.make ~name:"local_session_hook" (Staged.stage local_session_hook) @@ -95,6 +108,7 @@ let benchmarks = ; Test.make ~name:"Mutex+incr" (Staged.stage inc_with_mutex) ; Test.make ~name:"Db_lock.with_lock uncontended" (Staged.stage db_lock_uncontended) + ; Test.make ~name:"rpc_of_event" (Staged.stage test_rpc_of_event) ] let () = Bechamel_simple_cli.cli benchmarks From d215b36c68d9070225f434f8de389a6b23836a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 177/492] CP-307958: Generate event RPC more directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use an attribute to change the name of the fields instead of serializing with one name, and then mapping to the other. `ministat` confirms an improvement on `rpc_of_event`: ``` N Min Max Median Avg Stddev x 922 440.79126 46094.975 467.83727 563.79259 1676.0843 + 1131 51.346821 33407.866 58.715393 115.98006 1105.4178 Difference at 95.0% confidence -447.813 +/- 120.966 -79.4286% +/- 13.1488% (Student's t, pooled s = 1390.95) ``` Signed-off-by: Edwin Török --- ocaml/xapi-types/event_types.ml | 34 ++++++--------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/ocaml/xapi-types/event_types.ml b/ocaml/xapi-types/event_types.ml index 83c82b0bc8d..46ea2d310df 100644 --- a/ocaml/xapi-types/event_types.ml +++ b/ocaml/xapi-types/event_types.ml @@ -20,37 +20,15 @@ let rpc_of_op = API.rpc_of_event_operation let op_of_rpc = API.event_operation_of_rpc type event = { - id: string - ; ts: string - ; ty: string - ; op: op - ; reference: string - ; snapshot: Rpc.t option + id: string [@key "id"] + ; ts: string [@key "timestamp"] + ; ty: string [@key "class"] + ; op: op [@key "operation"] + ; reference: string [@key "ref"] + ; snapshot: Rpc.t option [@key "snapshot"] } [@@deriving rpc] -let ev_struct_remap = - [ - ("id", "id") - ; ("ts", "timestamp") - ; ("ty", "class") - ; ("op", "operation") - ; ("reference", "ref") - ; ("snapshot", "snapshot") - ] - -let remap map str = - match str with - | Rpc.Dict d -> - Rpc.Dict (List.map (fun (k, v) -> (List.assoc k map, v)) d) - | _ -> - str - -let rpc_of_event ev = remap ev_struct_remap (rpc_of_event ev) - -let event_of_rpc rpc = - event_of_rpc (remap (List.map (fun (k, v) -> (v, k)) ev_struct_remap) rpc) - type events = event list [@@deriving rpc] type token = string [@@deriving rpc] From 8b030f1fb3045b682e84b6d7554dcdee2da54b51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 178/492] CP-307958: Replace mutexes with Atomic where possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atomic has lower overhead, and can be used for simple situations (incrementing an integer, setting a boolean). ``` Mutex+incr (ns): { monotonic-clock per run = 32.272091 (confidence: 32.365531 to 32.170138); r² = Some 0.999441 } Atomic.incr (ns): { monotonic-clock per run = 2.938678 (confidence: 2.944486 to 2.933688); r² = Some 0.999857 } ``` No functional change Signed-off-by: Edwin Török --- ocaml/database/db_connections.ml | 16 +++---------- ocaml/database/db_remote_cache_access_v1.ml | 7 ++---- ocaml/database/redo_log.ml | 26 +++++++-------------- 3 files changed, 14 insertions(+), 35 deletions(-) diff --git a/ocaml/database/db_connections.ml b/ocaml/database/db_connections.ml index 9b390967fce..18152a18c4e 100644 --- a/ocaml/database/db_connections.ml +++ b/ocaml/database/db_connections.ml @@ -62,22 +62,12 @@ let preferred_write_db () = List.hd (Db_conn_store.read_db_connections ()) let exit_on_next_flush = ref false (* db flushing thread refcount: the last thread out of the door does the exit(0) when flush_on_exit is true *) -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute +let db_flush_thread_refcount = Atomic.make 0 -let db_flush_thread_refcount_m = Mutex.create () - -let db_flush_thread_refcount = ref 0 - -let inc_db_flush_thread_refcount () = - with_lock db_flush_thread_refcount_m (fun () -> - db_flush_thread_refcount := !db_flush_thread_refcount + 1 - ) +let inc_db_flush_thread_refcount () = Atomic.incr db_flush_thread_refcount let dec_and_read_db_flush_thread_refcount () = - with_lock db_flush_thread_refcount_m (fun () -> - db_flush_thread_refcount := !db_flush_thread_refcount - 1 ; - !db_flush_thread_refcount - ) + Atomic.fetch_and_add db_flush_thread_refcount (-1) let pre_exit_hook () = (* We're about to exit. Close the active redo logs. *) diff --git a/ocaml/database/db_remote_cache_access_v1.ml b/ocaml/database/db_remote_cache_access_v1.ml index 1499fa3fc13..fe0db8cad25 100644 --- a/ocaml/database/db_remote_cache_access_v1.ml +++ b/ocaml/database/db_remote_cache_access_v1.ml @@ -6,9 +6,7 @@ module DBCacheRemoteListener = struct exception DBCacheListenerUnknownMessageName of string - let ctr_mutex = Mutex.create () - - let calls_processed = ref 0 + let calls_processed = Atomic.make 0 let success xml = let resp = XMLRPC.To.array [XMLRPC.To.string "success"; xml] in @@ -34,8 +32,7 @@ module DBCacheRemoteListener = struct Note that, although the messages still contain the pool_secret for historical reasons, access has already been applied by the RBAC code in Xapi_http.add_handler. *) let process_xmlrpc xml = - let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute in - with_lock ctr_mutex (fun () -> calls_processed := !calls_processed + 1) ; + Atomic.incr calls_processed ; let fn_name, args = match XMLRPC.From.array (fun x -> x) xml with | [fn_name; _; args] -> diff --git a/ocaml/database/redo_log.ml b/ocaml/database/redo_log.ml index 429646dcce7..8c2c95928d7 100644 --- a/ocaml/database/redo_log.ml +++ b/ocaml/database/redo_log.ml @@ -77,8 +77,7 @@ type redo_log_conf = { ; backoff_delay: int ref ; sock: Unix.file_descr option ref ; pid: (Forkhelpers.pidty * string * string) option ref - ; dying_processes_mutex: Mutex.t - ; num_dying_processes: int ref + ; num_dying_processes: int Atomic.t ; mutex: Mutex.t (** exclusive access to this configuration *) } @@ -585,14 +584,10 @@ let shutdown log = (Thread.create (fun () -> D.debug "Waiting for I/O process with pid %d to die..." ipid ; - with_lock log.dying_processes_mutex (fun () -> - log.num_dying_processes := !(log.num_dying_processes) + 1 - ) ; + Atomic.incr log.num_dying_processes ; ignore (Forkhelpers.waitpid p) ; D.debug "Finished waiting for process with pid %d" ipid ; - with_lock log.dying_processes_mutex (fun () -> - log.num_dying_processes := !(log.num_dying_processes) - 1 - ) + Atomic.decr log.num_dying_processes ) () ) ; @@ -633,13 +628,11 @@ let startup log = () (* We're already started *) | None -> ( (* Don't start if there are already some processes hanging around *) - with_lock log.dying_processes_mutex (fun () -> - if - !(log.num_dying_processes) - >= Db_globs.redo_log_max_dying_processes - then - raise TooManyProcesses - ) ; + if + Atomic.get log.num_dying_processes + >= Db_globs.redo_log_max_dying_processes + then + raise TooManyProcesses ; match !(log.device) with | None -> D.info "Could not find block device" ; @@ -793,8 +786,7 @@ let create ~name ~state_change_callback ~read_only = ; backoff_delay= ref Db_globs.redo_log_initial_backoff_delay ; sock= ref None ; pid= ref None - ; dying_processes_mutex= Mutex.create () - ; num_dying_processes= ref 0 + ; num_dying_processes= Atomic.make 0 ; mutex= Mutex.create () } in From 40e57b0bc12ea93434b1b00fcb6e6229f35293dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 179/492] CP-307958: remove condition from Db_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No measurable performance change, but this makes it clearer for deadlock detection algorithms that we are waiting to acquire a lock. It'll also make it clearer for OCaml 5's thread sanitizer that values are modified with a lock held. Signed-off-by: Edwin Török --- ocaml/database/db_lock.ml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/ocaml/database/db_lock.ml b/ocaml/database/db_lock.ml index e893050f58c..648ca94dc26 100644 --- a/ocaml/database/db_lock.ml +++ b/ocaml/database/db_lock.ml @@ -59,9 +59,7 @@ module ReentrantLock : REENTRANT_LOCK = struct type t = { holder: tid option Atomic.t (* The holder of the lock *) ; mutable holds: int (* How many holds the holder has on the lock *) - ; lock: Mutex.t (* Barrier to signal waiting threads *) - ; condition: Condition.t - (* Waiting threads are signalled via this condition to reattempt to acquire the lock *) + ; lock: Mutex.t (* Mutex held by the holder thread *) ; statistics: statistics (* Bookkeeping of time taken to acquire lock *) } @@ -73,7 +71,6 @@ module ReentrantLock : REENTRANT_LOCK = struct holder= Atomic.make None ; holds= 0 ; lock= Mutex.create () - ; condition= Condition.create () ; statistics= create_statistics () } @@ -94,9 +91,7 @@ module ReentrantLock : REENTRANT_LOCK = struct let intended = Some me in let counter = Mtime_clock.counter () in Mutex.lock l.lock ; - while not (Atomic.compare_and_set l.holder None intended) do - Condition.wait l.condition l.lock - done ; + Atomic.set l.holder intended ; lock_acquired () ; let stats = l.statistics in let delta = Clock.Timer.span_to_s (Mtime_clock.count counter) in @@ -104,7 +99,7 @@ module ReentrantLock : REENTRANT_LOCK = struct stats.min_time <- Float.min delta stats.min_time ; stats.max_time <- Float.max delta stats.max_time ; stats.acquires <- stats.acquires + 1 ; - Mutex.unlock l.lock ; + (* do not unlock, it will be done when holds reaches 0 instead *) l.holds <- 1 let unlock l = @@ -114,10 +109,8 @@ module ReentrantLock : REENTRANT_LOCK = struct l.holds <- l.holds - 1 ; if l.holds = 0 then ( let () = Atomic.set l.holder None in - Mutex.lock l.lock ; - Condition.signal l.condition ; - Mutex.unlock l.lock ; - lock_released () + (* the lock is held (acquired in [lock]), we only need to unlock *) + Mutex.unlock l.lock ; lock_released () ) | _ -> failwith From 10b4f497248b99add09bf8284f3bd250cac0773b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 180/492] CP-307958: Benchmark for Db writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change Signed-off-by: Edwin Török --- ocaml/tests/bench/bench_pool_field.ml | 36 ++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/ocaml/tests/bench/bench_pool_field.ml b/ocaml/tests/bench/bench_pool_field.ml index ee1886e9986..bd34693a92f 100644 --- a/ocaml/tests/bench/bench_pool_field.ml +++ b/ocaml/tests/bench/bench_pool_field.ml @@ -17,7 +17,8 @@ open Bechamel let () = Suite_init.harness_init () ; Printexc.record_backtrace true ; - Debug.set_level Syslog.Emerg + Debug.set_level Syslog.Emerg ; + Xapi_event.register_hooks () let date = "20250102T03:04:05Z" @@ -36,11 +37,21 @@ let json_str = let __context = Test_common.make_test_database () +let host = Test_common.make_host ~__context () + +let pool = Test_common.make_pool ~__context ~master:host () + let () = - let host = Test_common.make_host ~__context () in - let pool = Test_common.make_pool ~__context ~master:host () in Db.Pool.set_license_server ~__context ~self:pool - ~value:[("jsontest", json_str)] + ~value:[("jsontest", json_str)] ; + let open Xapi_database in + Db_ref.update_database + (Context.database_of __context) + (Db_cache_types.Database.register_callback "redo_log" + Redo_log.database_callback + ) + +let vm = Test_common.make_vm ~__context ~name_label:"test" () let get_all () : API.pool_t list = Db.Pool.get_all_records ~__context |> List.map snd @@ -95,6 +106,20 @@ let event = let test_rpc_of_event () = Event_types.rpc_of_event event +let counter = Atomic.make 0 + +let test_set_vm_nvram () : unit = + let c = Atomic.fetch_and_add counter 1 mod 0x7F in + (* use different value each iteration, otherwise it becomes a noop *) + Db.VM.set_NVRAM ~__context ~self:vm + ~value:[("test", String.make 32768 (Char.chr @@ c))] + +let test_db_pool_write () = + let c = Atomic.fetch_and_add counter 1 mod 0x7F in + Db.Pool.set_tags ~__context ~self:pool ~value:[String.make 16 (Char.chr @@ c)] + +let test_db_pool_read () = Db.Pool.get_tags ~__context ~self:pool + let benchmarks = [ Test.make ~name:"local_session_hook" (Staged.stage local_session_hook) @@ -109,6 +134,9 @@ let benchmarks = ; Test.make ~name:"Db_lock.with_lock uncontended" (Staged.stage db_lock_uncontended) ; Test.make ~name:"rpc_of_event" (Staged.stage test_rpc_of_event) + ; Test.make ~name:"Db.Pool.set_tags" (Staged.stage test_db_pool_write) + ; Test.make ~name:"Db.Pool.get_tags" (Staged.stage test_db_pool_read) + ; Test.make ~name:"Db.VM.set_NVRAM" (Staged.stage test_set_vm_nvram) ] let () = Bechamel_simple_cli.cli benchmarks From fbb711750ac8768d6715d5c6e4b3e2c784b988b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 181/492] CP-307958: Use Atomic.t for DB ref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The double ref is not needed, replace it with an Atomic.t No functional change. Signed-off-by: Edwin Török --- ocaml/database/db_backend.ml | 6 +++--- ocaml/database/db_ref.ml | 8 ++++---- ocaml/database/db_ref.mli | 4 ++-- ocaml/xapi/pool_db_backup.ml | 2 +- ocaml/xapi/xapi_session.ml | 2 +- ocaml/xapi/xapi_vdi_helpers.ml | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ocaml/database/db_backend.ml b/ocaml/database/db_backend.ml index 935704d4840..d6bc320cb0a 100644 --- a/ocaml/database/db_backend.ml +++ b/ocaml/database/db_backend.ml @@ -21,11 +21,11 @@ let db_FLUSH_TIMER = 2.0 (* --------------------- Util functions on db datastructures *) -let master_database = ref (Db_cache_types.Database.make Schema.empty) +let master_database = Atomic.make (Db_cache_types.Database.make Schema.empty) -let __test_set_master_database db = master_database := db +let __test_set_master_database db = Atomic.set master_database db -let make () = Db_ref.in_memory (ref master_database) +let make () = Db_ref.in_memory master_database (* !!! Right now this is called at cache population time. It would probably be preferable to call it on flush time instead, so we don't waste writes storing non-persistent field values on disk.. At the moment there's not much to worry about, since there are diff --git a/ocaml/database/db_ref.ml b/ocaml/database/db_ref.ml index c1819e5aa22..100fea3701c 100644 --- a/ocaml/database/db_ref.ml +++ b/ocaml/database/db_ref.ml @@ -12,15 +12,15 @@ * GNU Lesser General Public License for more details. *) -type t = In_memory of Db_cache_types.Database.t ref ref | Remote +type t = In_memory of Db_cache_types.Database.t Atomic.t | Remote exception Database_not_in_memory -let in_memory (rf : Db_cache_types.Database.t ref ref) = In_memory rf +let in_memory (rf : Db_cache_types.Database.t Atomic.t) = In_memory rf let get_database = function | In_memory x -> - !(!x) + Atomic.get x | Remote -> raise Database_not_in_memory @@ -28,6 +28,6 @@ let update_database t f = match t with | In_memory x -> let d : Db_cache_types.Database.t = f (get_database t) in - !x := d + Atomic.set x d | Remote -> raise Database_not_in_memory diff --git a/ocaml/database/db_ref.mli b/ocaml/database/db_ref.mli index 705d7eaafe9..93ab8655868 100644 --- a/ocaml/database/db_ref.mli +++ b/ocaml/database/db_ref.mli @@ -12,11 +12,11 @@ * GNU Lesser General Public License for more details. *) -type t = In_memory of Db_cache_types.Database.t ref ref | Remote +type t = In_memory of Db_cache_types.Database.t Atomic.t | Remote exception Database_not_in_memory -val in_memory : Db_cache_types.Database.t ref ref -> t +val in_memory : Db_cache_types.Database.t Atomic.t -> t val get_database : t -> Db_cache_types.Database.t diff --git a/ocaml/xapi/pool_db_backup.ml b/ocaml/xapi/pool_db_backup.ml index 2a0ab1eae21..f82e3340c12 100644 --- a/ocaml/xapi/pool_db_backup.ml +++ b/ocaml/xapi/pool_db_backup.ml @@ -192,7 +192,7 @@ let restore_from_xml __context dry_run (xml_filename : string) = (Db_xml.From.file (Datamodel_schema.of_datamodel ()) xml_filename) in version_check db ; - let db_ref = Db_ref.in_memory (ref (ref db)) in + let db_ref = Db_ref.in_memory (Atomic.make db) in let new_context = Context.make ~database:db_ref "restore_db" in prepare_database_for_restore ~old_context:__context ~new_context ; (* write manifest and unmarshalled db directly to db_temporary_restore_path, so its ready for us on restart *) diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index f7fcfdac7e9..1c28416dfe8 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -1569,5 +1569,5 @@ let create_from_db_file ~__context ~filename = Xapi_database.Db_xml.From.file (Datamodel_schema.of_datamodel ()) filename |> Xapi_database.Db_upgrade.generic_database_upgrade in - let db_ref = Some (Xapi_database.Db_ref.in_memory (ref (ref db))) in + let db_ref = Some (Xapi_database.Db_ref.in_memory (Atomic.make db)) in create_readonly_session ~__context ~uname:"db-from-file" ~db_ref diff --git a/ocaml/xapi/xapi_vdi_helpers.ml b/ocaml/xapi/xapi_vdi_helpers.ml index 3cc2d4a7f5f..84db627c719 100644 --- a/ocaml/xapi/xapi_vdi_helpers.ml +++ b/ocaml/xapi/xapi_vdi_helpers.ml @@ -184,7 +184,7 @@ let database_ref_of_vdi ~__context ~vdi = debug "Enabling redo_log with device reason [%s]" device ; Redo_log.enable_block_existing log device ; let db = Database.make (Datamodel_schema.of_datamodel ()) in - let db_ref = Xapi_database.Db_ref.in_memory (ref (ref db)) in + let db_ref = Xapi_database.Db_ref.in_memory (Atomic.make db) in Redo_log_usage.read_from_redo_log log Xapi_globs.foreign_metadata_db db_ref ; Redo_log.delete log ; (* Upgrade database to the local schema. *) From f1a993ecd531c42cfcb2ab102226e708d18e8b8e Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Mon, 21 Apr 2025 08:45:30 +0000 Subject: [PATCH 182/492] CP-54332 Update host/pool datamodel to support SSH auto mode Add new host object fields: - ssh_auto_mode Add new host/pool API to enable to set auto mode - set_ssh_auto_mode Signed-off-by: Lunfan Zhang --- ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_errors.ml | 3 +++ ocaml/idl/datamodel_host.ml | 31 +++++++++++++++++++++++++++++-- ocaml/idl/datamodel_pool.ml | 16 ++++++++++++++++ ocaml/idl/schematest.ml | 2 +- ocaml/tests/common/test_common.ml | 6 +++--- ocaml/tests/test_host.ml | 2 +- ocaml/xapi-consts/api_errors.ml | 3 +++ ocaml/xapi-consts/constants.ml | 2 ++ ocaml/xapi/dbsync_slave.ml | 1 + ocaml/xapi/message_forwarding.ml | 14 ++++++++++++++ ocaml/xapi/xapi_host.ml | 6 ++++-- ocaml/xapi/xapi_host.mli | 4 ++++ ocaml/xapi/xapi_pool.ml | 3 +++ ocaml/xapi/xapi_pool.mli | 3 +++ 15 files changed, 88 insertions(+), 10 deletions(-) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 8a87d7eb524..819b7c61141 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 788 +let schema_minor_vsn = 789 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index d7d24c6d76a..69286e11f1f 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2046,6 +2046,9 @@ let _ = error Api_errors.set_console_timeout_partially_failed ["hosts"] ~doc:"Some hosts failed to set console timeout." () ; + error Api_errors.set_ssh_auto_mode_partially_failed ["hosts"] + ~doc:"Some hosts failed to set SSH auto mode." () ; + error Api_errors.host_driver_no_hardware ["driver variant"] ~doc:"No hardware present for this host driver variant" () ; diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index e51b59eb573..1dcd39aad40 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1335,6 +1335,13 @@ let create_params = ; param_release= numbered_release "25.14.0-next" ; param_default= Some (VInt Constants.default_console_idle_timeout) } + ; { + param_type= Bool + ; param_name= "ssh_auto_mode" + ; param_doc= "True if SSH auto mode is enabled for the host" + ; param_release= numbered_release "25.14.0-next" + ; param_default= Some (VBool Constants.default_ssh_auto_mode) + } ] let create = @@ -1350,8 +1357,8 @@ let create = ; ( Changed , "25.14.0-next" , "Added --ssh_enabled --ssh_enabled_timeout --ssh_expiry \ - --console_idle_timeout options to allow them to be configured for \ - new host" + --console_idle_timeout --ssh_auto_mode options to allow them to be \ + configured for new host" ) ] ~versioned_params:create_params ~doc:"Create a new host record" @@ -2440,6 +2447,21 @@ let set_console_idle_timeout = ] ~allowed_roles:_R_POOL_ADMIN () +let set_ssh_auto_mode = + call ~name:"set_ssh_auto_mode" ~lifecycle:[] + ~doc:"Set the SSH auto mode for the host" + ~params: + [ + (Ref _host, "self", "The host") + ; ( Bool + , "value" + , "The SSH auto mode for the host,when set to true, SSH to normally be \ + disabled and SSH to be enabled only in case of emergency e.g., xapi \ + is down" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + let latest_synced_updates_applied_state = Enum ( "latest_synced_updates_applied_state" @@ -2601,6 +2623,7 @@ let t = ; disable_ssh ; set_ssh_enabled_timeout ; set_console_idle_timeout + ; set_ssh_auto_mode ] ~contents: ([ @@ -3056,6 +3079,10 @@ let t = "console_idle_timeout" "The timeout in seconds after which idle console will be \ automatically terminated (0 means never)" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Bool + ~default_value:(Some (VBool Constants.default_ssh_auto_mode)) + "ssh_auto_mode" + "Reflects whether SSH auto mode is enabled for the host" ] ) () diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index 97b42e12876..1874512c14d 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1606,6 +1606,21 @@ let set_console_idle_timeout = ] ~allowed_roles:_R_POOL_ADMIN () +let set_ssh_auto_mode = + call ~name:"set_ssh_auto_mode" ~lifecycle:[] + ~doc:"Set the SSH auto mode for all hosts in the pool" + ~params: + [ + (Ref _pool, "self", "The pool") + ; ( Bool + , "value" + , "The SSH auto mode for all hosts in the pool,when set to true, SSH \ + to normally be disabled and SSH to be enabled only in case of \ + emergency e.g., xapi is down" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + (** A pool class *) let t = create_obj ~in_db:true @@ -1704,6 +1719,7 @@ let t = ; disable_ssh ; set_ssh_enabled_timeout ; set_console_idle_timeout + ; set_ssh_auto_mode ] ~contents: ([ diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 0938dd78e83..f94dd14ae72 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "8bf2b9ab509301baf138820cf34608d3" +let last_known_schema_hash = "7c52d11789dea3ab3167c5d0e3e7fa89" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index b61de22ad8d..f5b8a270518 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -172,14 +172,14 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(local_cache_sr = Ref.null) ?(chipset_info = []) ?(ssl_legacy = false) ?(last_software_update = Date.epoch) ?(last_update_hash = "") ?(ssh_enabled = true) ?(ssh_enabled_timeout = 0L) ?(ssh_expiry = Date.epoch) - ?(console_idle_timeout = 0L) () = + ?(console_idle_timeout = 0L) ?(ssh_auto_mode = false) () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy ~last_software_update ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry - ~console_idle_timeout + ~console_idle_timeout ~ssh_auto_mode in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host @@ -219,7 +219,7 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L - ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ; + ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false ; ref let make_pif ~__context ~network ~host ?(device = "eth0") diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index 03f526d08d0..1f814b5adf7 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -25,7 +25,7 @@ let add_host __context name = ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false ~last_software_update:Clock.Date.epoch ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch - ~console_idle_timeout:0L + ~console_idle_timeout:0L ~ssh_auto_mode:false ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index adfb96e4b86..96f9352f449 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1426,6 +1426,9 @@ let set_ssh_timeout_partially_failed = let set_console_timeout_partially_failed = add_error "SET_CONSOLE_TIMEOUT_PARTIALLY_FAILED" +let set_ssh_auto_mode_partially_failed = + add_error "SET_SSH_AUTO_MODE_PARTIALLY_FAILED" + let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 185f9669a7c..7af0ccf649b 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -428,3 +428,5 @@ let default_ssh_enabled = true let default_ssh_enabled_timeout = 0L let default_console_idle_timeout = 0L + +let default_ssh_auto_mode = false diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 51ef2665d15..900e8a1ac04 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -64,6 +64,7 @@ let create_localhost ~__context info = ~ssh_enabled_timeout:Constants.default_ssh_enabled_timeout ~ssh_expiry:Date.epoch ~console_idle_timeout:Constants.default_console_idle_timeout + ~ssh_auto_mode:Constants.default_ssh_auto_mode in () diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index c9268e82d3b..924cb3f1244 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -1197,6 +1197,12 @@ functor (pool_uuid ~__context self) value ; Local.Pool.set_console_idle_timeout ~__context ~self ~value + + let set_ssh_auto_mode ~__context ~self ~value = + info "Pool.set_ssh_auto_mode: pool='%s' value='%b'" + (pool_uuid ~__context self) + value ; + Local.Pool.set_ssh_auto_mode ~__context ~self ~value end module VM = struct @@ -4063,6 +4069,14 @@ functor let local_fn = Local.Host.set_console_idle_timeout ~self ~value in let remote_fn = Client.Host.set_console_idle_timeout ~self ~value in do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let set_ssh_auto_mode ~__context ~self ~value = + info "Host.set_ssh_auto_mode: host='%s' value='%b'" + (host_uuid ~__context self) + value ; + let local_fn = Local.Host.set_ssh_auto_mode ~self ~value in + let remote_fn = Client.Host.set_ssh_auto_mode ~self ~value in + do_op_on ~local_fn ~__context ~host:self ~remote_fn end module Host_crashdump = struct diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index dfccd2ebc73..d4da2d9903a 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -979,7 +979,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled - ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout = + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1044,7 +1044,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~tls_verification_enabled ~last_software_update ~last_update_hash ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~ssh_enabled - ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ; + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ; (* If the host we're creating is us, make sure its set to live *) Db.Host_metrics.set_last_updated ~__context ~self:metrics ~value:(Date.now ()) ; Db.Host_metrics.set_live ~__context ~self:metrics ~value:host_is_us ; @@ -3243,3 +3243,5 @@ let set_console_idle_timeout ~__context ~self ~value = error "Failed to configure console timeout: %s" (Printexc.to_string e) ; Helpers.internal_error "Failed to set console timeout: %Ld: %s" value (Printexc.to_string e) + +let set_ssh_auto_mode ~__context ~self:_ ~value:_ = () diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index a3d7504b4a4..4b0c53c14bf 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -134,6 +134,7 @@ val create : -> ssh_enabled_timeout:int64 -> ssh_expiry:API.datetime -> console_idle_timeout:int64 + -> ssh_auto_mode:bool -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit @@ -580,3 +581,6 @@ val set_console_idle_timeout : val schedule_disable_ssh_job : __context:Context.t -> self:API.ref_host -> timeout:int64 -> unit + +val set_ssh_auto_mode : + __context:Context.t -> self:API.ref_host -> value:bool -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 96fff99e34b..7640f881bae 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -967,6 +967,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~ssh_enabled_timeout:host.API.host_ssh_enabled_timeout ~ssh_expiry:host.API.host_ssh_expiry ~console_idle_timeout:host.API.host_console_idle_timeout + ~ssh_auto_mode:host.API.host_ssh_auto_mode in (* Copy other-config into newly created host record: *) no_exn @@ -4079,3 +4080,5 @@ let disable_ssh = Ssh.disable let set_ssh_enabled_timeout = Ssh.set_enabled_timeout let set_console_idle_timeout = Ssh.set_console_timeout + +let set_ssh_auto_mode ~__context ~self:_ ~value:_ = () diff --git a/ocaml/xapi/xapi_pool.mli b/ocaml/xapi/xapi_pool.mli index b9c5b6fea3f..dc87e90a18e 100644 --- a/ocaml/xapi/xapi_pool.mli +++ b/ocaml/xapi/xapi_pool.mli @@ -443,3 +443,6 @@ val set_ssh_enabled_timeout : val set_console_idle_timeout : __context:Context.t -> self:API.ref_pool -> value:int64 -> unit + +val set_ssh_auto_mode : + __context:Context.t -> self:API.ref_pool -> value:bool -> unit From 377e9234b2e24cb06c3a98a48c2d2cd91f9ad201 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 12 May 2025 14:47:42 +0100 Subject: [PATCH 183/492] Change all receive_start2 to receive_start3 As a new SMAPIv2 call, need to reserve receive_start2 for older versions of xapi (25.3.0 -- 25.17.0) Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 16 ++++++++-------- ocaml/xapi-idl/storage/storage_skeleton.ml | 4 ++-- ocaml/xapi-storage-script/main.ml | 2 +- ocaml/xapi/storage_migrate.ml | 2 +- ocaml/xapi/storage_mux.ml | 4 ++-- ocaml/xapi/storage_smapiv1.ml | 2 +- ocaml/xapi/storage_smapiv1_migrate.ml | 4 ++-- ocaml/xapi/storage_smapiv1_wrapper.ml | 2 +- ocaml/xapi/storage_smapiv3_migrate.ml | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 42cfd4fe722..b8761fc8d6a 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1110,7 +1110,7 @@ module StorageAPI (R : RPC) = struct (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward compatibility with old xapis that call Remote.DATA.MIRROR.receive_start during SXM. - Use the receive_start2 function instead. + Use the receive_start3 function instead. *) let receive_start = let similar_p = Param.mk ~name:"similar" Mirror.similars in @@ -1126,10 +1126,10 @@ module StorageAPI (R : RPC) = struct (** Called on the receiving end to prepare for receipt of the storage. This function should be used in conjunction with [receive_finalize2]*) - let receive_start2 = + let receive_start3 = let similar_p = Param.mk ~name:"similar" Mirror.similars in let result = Param.mk ~name:"result" Mirror.mirror_receive_result in - declare "DATA.MIRROR.receive_start2" [] + declare "DATA.MIRROR.receive_start3" [] (dbg_p @-> sr_p @-> VDI.vdi_info_p @@ -1153,7 +1153,7 @@ module StorageAPI (R : RPC) = struct (** [receive_finalize2 dbg id] will stop the mirroring process and compose the snapshot VDI with the mirror VDI. It also cleans up the storage resources used by mirroring. It is called after the the source VM is paused. This fucntion - should be used in conjunction with [receive_start2] *) + should be used in conjunction with [receive_start3] *) let receive_finalize2 = declare "DATA.MIRROR.receive_finalize2" [] (dbg_p @@ -1175,7 +1175,7 @@ module StorageAPI (R : RPC) = struct (dbg_p @-> id_p @-> returning unit_p err) (** [receive_cancel2 dbg mirror_id url verify_dest] cleans up the side effects - done by [receive_start2] on the destination host when the migration fails. *) + done by [receive_start3] on the destination host when the migration fails. *) let receive_cancel2 = declare "DATA.MIRROR.receive_cancel2" [] (dbg_p @-> id_p @-> url_p @-> verify_dest_p @-> returning unit_p err) @@ -1274,7 +1274,7 @@ module type MIRROR = sig -> similar:Mirror.similars -> Mirror.mirror_receive_result - val receive_start2 : + val receive_start3 : context -> dbg:debug_info -> sr:sr @@ -1772,9 +1772,9 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.receive_start (fun dbg sr vdi_info id similar -> Impl.DATA.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar ) ; - S.DATA.MIRROR.receive_start2 + S.DATA.MIRROR.receive_start3 (fun dbg sr vdi_info mirror_id similar vm url verify_dest -> - Impl.DATA.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~mirror_id + Impl.DATA.MIRROR.receive_start3 () ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest ) ; S.DATA.MIRROR.receive_cancel (fun dbg id -> diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 2c72cd94fef..947cc2d1710 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -169,9 +169,9 @@ module DATA = struct let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = u "DATA.MIRROR.receive_start" - let receive_start2 ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url + let receive_start3 ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest = - u "DATA.MIRROR.receive_start2" + u "DATA.MIRROR.receive_start3" let receive_finalize ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index d370040f79b..36fc733dc7f 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1922,7 +1922,7 @@ let bind ~volume_script_dir = S.DP.stat_vdi (u "DP.stat_vdi") ; S.DATA.MIRROR.send_start (u "DATA.MIRROR.send_start") ; S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; - S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; + S.DATA.MIRROR.receive_start3 (u "DATA.MIRROR.receive_start3") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index b799e497d3d..a386f03ec3c 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -131,7 +131,7 @@ module MigrateLocal = struct try let (module Migrate_Backend) = choose_backend dbg sr in let similars = similar_vdis ~dbg ~sr ~vdi in - Migrate_Backend.receive_start2 () ~dbg ~sr:dest ~vdi_info:local_vdi + Migrate_Backend.receive_start3 () ~dbg ~sr:dest ~vdi_info:local_vdi ~mirror_id ~similar:similars ~vm:mirror_vm ~url ~verify_dest with e -> error "%s Caught error %s while preparing for SXM" __FUNCTION__ diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 1788b19e157..e39a55ebd6b 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -837,8 +837,8 @@ module Mux = struct Storage_smapiv1_migrate.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar - (** see storage_smapiv{1,3}_migrate.receive_start2 *) - let receive_start2 () ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ + (** see storage_smapiv{1,3}_migrate.receive_start3 *) + let receive_start3 () ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ = u __FUNCTION__ diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 792cae733b3..f88bf5c816a 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1143,7 +1143,7 @@ module SMAPIv1 : Server_impl = struct let receive_start _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = assert false - let receive_start2 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ + let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ ~url:_ ~verify_dest:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 696bffd789d..fe92a413d55 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -680,7 +680,7 @@ module MIRROR : SMAPIv2_MIRROR = struct (* The state tracking here does not need to be changed, however, it will be stored in memory on different hosts. If receive_start is called, by an older host, this State.add is run on the destination host. On the other hand, if - receive_start2 is called, this will be stored in memory on the source host. + receive_start3 is called, this will be stored in memory on the source host. receive_finalize2 and receive_cancel2 handles this similarly. *) State.add id State.( @@ -724,7 +724,7 @@ module MIRROR : SMAPIv2_MIRROR = struct receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") (module Local) - let receive_start2 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url + let receive_start3 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest = D.debug "%s dbg: %s sr: %s vdi: %s id: %s vm: %s url: %s verify_dest: %B" __FUNCTION__ dbg (s_of_sr sr) diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 245380f6b48..502dc7e947c 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1200,7 +1200,7 @@ functor (String.concat "," similar) ; Impl.DATA.MIRROR.receive_start context ~dbg ~sr ~vdi_info ~id ~similar - let receive_start2 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ + let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ = u __FUNCTION__ diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 41f3b4a0ff9..2e4599ccbf3 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -29,7 +29,7 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_start _ctx = u __FUNCTION__ - let receive_start2 _ctx = u __FUNCTION__ + let receive_start3 _ctx = u __FUNCTION__ let receive_finalize _ctx = u __FUNCTION__ From 84b096ac7babf77cad4d04da08e394c6502119cc Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 12 May 2025 15:00:43 +0100 Subject: [PATCH 184/492] CA-408492: Add back receive_start2 for backwards compat This function was introduced during the development of inbound SXM for SMAPIv3, but was used in SMAPIv1 migrations as well as SMAPIv1 -> SMAPIv3 migration, so add this back to SMAPIv2 for backwards compatibility. Migrations from xapi-(25.3.0 -- 25.17.0) would otherwise be broken if they were migrating to the latest version of xapi. Yangtze, however, is not affected. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 31 +++++++++++++++++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 3 ++ ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/storage_mux.ml | 14 ++++++++++ ocaml/xapi/storage_smapiv1.ml | 4 +++ ocaml/xapi/storage_smapiv1_migrate.ml | 6 ++++ ocaml/xapi/storage_smapiv1_wrapper.ml | 10 +++++++ ocaml/xapi/storage_smapiv3_migrate.ml | 2 ++ 8 files changed, 71 insertions(+) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index b8761fc8d6a..c5564090c7f 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1124,6 +1124,24 @@ module StorageAPI (R : RPC) = struct @-> returning result err ) + (** Called on the receiving end + @deprecated This function is deprecated, and is only here to keep backward + compatibility with old xapis that call Remote.DATA.MIRROR.receive_start2 during SXM. + Use the receive_start3 function instead. + *) + let receive_start2 = + let similar_p = Param.mk ~name:"similar" Mirror.similars in + let result = Param.mk ~name:"result" Mirror.mirror_receive_result in + declare "DATA.MIRROR.receive_start2" [] + (dbg_p + @-> sr_p + @-> VDI.vdi_info_p + @-> id_p + @-> similar_p + @-> vm_p + @-> returning result err + ) + (** Called on the receiving end to prepare for receipt of the storage. This function should be used in conjunction with [receive_finalize2]*) let receive_start3 = @@ -1274,6 +1292,16 @@ module type MIRROR = sig -> similar:Mirror.similars -> Mirror.mirror_receive_result + val receive_start2 : + context + -> dbg:debug_info + -> sr:sr + -> vdi_info:vdi_info + -> id:Mirror.id + -> similar:Mirror.similars + -> vm:vm + -> Mirror.mirror_receive_result + val receive_start3 : context -> dbg:debug_info @@ -1772,6 +1800,9 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.receive_start (fun dbg sr vdi_info id similar -> Impl.DATA.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar ) ; + S.DATA.MIRROR.receive_start2 (fun dbg sr vdi_info id similar vm -> + Impl.DATA.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~id ~similar ~vm + ) ; S.DATA.MIRROR.receive_start3 (fun dbg sr vdi_info mirror_id similar vm url verify_dest -> Impl.DATA.MIRROR.receive_start3 () ~dbg ~sr ~vdi_info ~mirror_id diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 947cc2d1710..f135fa92a0f 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -169,6 +169,9 @@ module DATA = struct let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = u "DATA.MIRROR.receive_start" + let receive_start2 ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = + u "DATA.MIRROR.receive_start2" + let receive_start3 ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest = u "DATA.MIRROR.receive_start3" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 36fc733dc7f..cd23b616da5 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1922,6 +1922,7 @@ let bind ~volume_script_dir = S.DP.stat_vdi (u "DP.stat_vdi") ; S.DATA.MIRROR.send_start (u "DATA.MIRROR.send_start") ; S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; + S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.DATA.MIRROR.receive_start3 (u "DATA.MIRROR.receive_start3") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index e39a55ebd6b..e9cf7bf57d0 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -837,6 +837,20 @@ module Mux = struct Storage_smapiv1_migrate.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar + let receive_start2 () ~dbg ~sr ~vdi_info ~id ~similar ~vm = + with_dbg ~name:"DATA.MIRROR.receive_start2" ~dbg @@ fun _di -> + info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s vm: %s" + __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + id + (String.concat ";" similar) + (s_of_vm vm) ; + info "%s dbg:%s" __FUNCTION__ dbg ; + (* This goes straight to storage_smapiv1_migrate for backwards compatability + reasons, new code should not call receive_start any more *) + Storage_smapiv1_migrate.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~id + ~similar ~vm + (** see storage_smapiv{1,3}_migrate.receive_start3 *) let receive_start3 () ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ = diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index f88bf5c816a..abcb778522e 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1143,6 +1143,10 @@ module SMAPIv1 : Server_impl = struct let receive_start _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = assert false + let receive_start2 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ + ~vm:_ = + assert false + let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ ~url:_ ~verify_dest:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index fe92a413d55..004bad89f56 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -724,6 +724,12 @@ module MIRROR : SMAPIv2_MIRROR = struct receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") (module Local) + let receive_start2 _ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s" __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + id ; + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm (module Local) + let receive_start3 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest = D.debug "%s dbg: %s sr: %s vdi: %s id: %s vm: %s url: %s verify_dest: %B" diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 502dc7e947c..bc2a749e030 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1200,8 +1200,18 @@ functor (String.concat "," similar) ; Impl.DATA.MIRROR.receive_start context ~dbg ~sr ~vdi_info ~id ~similar + let receive_start2 context ~dbg ~sr ~vdi_info ~id ~similar ~vm = + info + "DATA.MIRROR.receive_start2 dbg:%s sr:%s id:%s similar:[%s] vm:%s" + dbg (s_of_sr sr) id + (String.concat "," similar) + (s_of_vm vm) ; + Impl.DATA.MIRROR.receive_start2 context ~dbg ~sr ~vdi_info ~id + ~similar ~vm + let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ = + (* See Storage_smapiv1_migrate.receive_start3 *) u __FUNCTION__ let receive_finalize context ~dbg ~id = diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 2e4599ccbf3..f10d1d4bd39 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -29,6 +29,8 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_start _ctx = u __FUNCTION__ + let receive_start2 _ctx = u __FUNCTION__ + let receive_start3 _ctx = u __FUNCTION__ let receive_finalize _ctx = u __FUNCTION__ From 1b57d09ebc7483ea6b7748edb9259c1ba5b2c7aa Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 12 May 2025 21:46:26 +0100 Subject: [PATCH 185/492] Change all receive_finalize2 to receive_finalize3 Similar to receive_start2, we need to keep receive_finalize2 as well for backwards compatibility Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 16 ++++++++-------- ocaml/xapi-idl/storage/storage_skeleton.ml | 4 ++-- ocaml/xapi-storage-script/main.ml | 2 +- ocaml/xapi/storage_migrate.ml | 12 ++++++------ ocaml/xapi/storage_mux.ml | 2 +- ocaml/xapi/storage_smapiv1.ml | 2 +- ocaml/xapi/storage_smapiv1_migrate.ml | 4 ++-- ocaml/xapi/storage_smapiv1_wrapper.ml | 2 +- ocaml/xapi/storage_smapiv3_migrate.ml | 2 +- 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index c5564090c7f..c55d78569d7 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1143,7 +1143,7 @@ module StorageAPI (R : RPC) = struct ) (** Called on the receiving end to prepare for receipt of the storage. This - function should be used in conjunction with [receive_finalize2]*) + function should be used in conjunction with [receive_finalize3]*) let receive_start3 = let similar_p = Param.mk ~name:"similar" Mirror.similars in let result = Param.mk ~name:"result" Mirror.mirror_receive_result in @@ -1162,18 +1162,18 @@ module StorageAPI (R : RPC) = struct (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward compatibility with old xapis that call Remote.DATA.MIRROR.receive_finalize - during SXM. Use the receive_finalize2 function instead. + during SXM. Use the receive_finalize3 function instead. *) let receive_finalize = declare "DATA.MIRROR.receive_finalize" [] (dbg_p @-> id_p @-> returning unit_p err) - (** [receive_finalize2 dbg id] will stop the mirroring process and compose + (** [receive_finalize3 dbg id] will stop the mirroring process and compose the snapshot VDI with the mirror VDI. It also cleans up the storage resources used by mirroring. It is called after the the source VM is paused. This fucntion should be used in conjunction with [receive_start3] *) - let receive_finalize2 = - declare "DATA.MIRROR.receive_finalize2" [] + let receive_finalize3 = + declare "DATA.MIRROR.receive_finalize3" [] (dbg_p @-> id_p @-> sr_p @@ -1316,7 +1316,7 @@ module type MIRROR = sig val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit - val receive_finalize2 : + val receive_finalize3 : context -> dbg:debug_info -> mirror_id:Mirror.id @@ -1817,8 +1817,8 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.receive_finalize (fun dbg id -> Impl.DATA.MIRROR.receive_finalize () ~dbg ~id ) ; - S.DATA.MIRROR.receive_finalize2 (fun dbg mirror_id sr url verify_dest -> - Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~mirror_id ~sr ~url + S.DATA.MIRROR.receive_finalize3 (fun dbg mirror_id sr url verify_dest -> + Impl.DATA.MIRROR.receive_finalize3 () ~dbg ~mirror_id ~sr ~url ~verify_dest ) ; S.DATA.MIRROR.pre_deactivate_hook (fun dbg dp sr vdi -> diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index f135fa92a0f..5b2f2bedbc1 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -178,8 +178,8 @@ module DATA = struct let receive_finalize ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize" - let receive_finalize2 ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = - u "DATA.MIRROR.receive_finalize2" + let receive_finalize3 ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + u "DATA.MIRROR.receive_finalize3" let receive_cancel ctx ~dbg ~id = u "DATA.MIRROR.receive_cancel" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index cd23b616da5..79dce1d897a 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1925,7 +1925,7 @@ let bind ~volume_script_dir = S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.DATA.MIRROR.receive_start3 (u "DATA.MIRROR.receive_start3") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; - S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; + S.DATA.MIRROR.receive_finalize3 (u "DATA.MIRROR.receive_finalize3") ; S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.DATA.MIRROR.receive_cancel2 (u "DATA.MIRROR.receive_cancel2") ; S.DATA.MIRROR.pre_deactivate_hook (u "DATA.MIRROR.pre_deactivate_hook") ; diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index a386f03ec3c..85f36e31fab 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -40,11 +40,11 @@ let choose_backend dbg sr = (** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions tend to be executed on the receiver side. *) module MigrateRemote = struct - (** [receive_finalize2 dbg mirror_id sr url verify_dest] takes an [sr] parameter + (** [receive_finalize3 dbg mirror_id sr url verify_dest] takes an [sr] parameter which is the source sr and multiplexes based on the type of that *) - let receive_finalize2 ~dbg ~mirror_id ~sr ~url ~verify_dest = + let receive_finalize3 ~dbg ~mirror_id ~sr ~url ~verify_dest = let (module Migrate_Backend) = choose_backend dbg sr in - Migrate_Backend.receive_finalize2 () ~dbg ~mirror_id ~sr ~url ~verify_dest + Migrate_Backend.receive_finalize3 () ~dbg ~mirror_id ~sr ~url ~verify_dest let receive_cancel2 ~dbg ~mirror_id ~url ~verify_dest = let (module Remote) = @@ -332,12 +332,12 @@ let post_deactivate_hook ~sr ~vdi ~dp:_ = r.remote_info in let (module Remote) = get_remote_backend r.url verify_dest in - debug "Calling receive_finalize2" ; + debug "Calling receive_finalize3" ; log_and_ignore_exn (fun () -> - MigrateRemote.receive_finalize2 ~dbg:"Mirror-cleanup" ~mirror_id:id + MigrateRemote.receive_finalize3 ~dbg:"Mirror-cleanup" ~mirror_id:id ~sr ~url:r.url ~verify_dest ) ; - debug "Finished calling receive_finalize2" ; + debug "Finished calling receive_finalize3" ; State.remove_local_mirror id ; debug "Removed active local mirror: %s" id ; Option.iter (fun id -> Scheduler.cancel scheduler id) r.watchdog diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index e9cf7bf57d0..d62517093ba 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -861,7 +861,7 @@ module Mux = struct info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; Storage_smapiv1_migrate.MIRROR.receive_finalize () ~dbg:di.log ~id - let receive_finalize2 () ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = + let receive_finalize3 () ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = u __FUNCTION__ let receive_cancel () ~dbg ~id = diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index abcb778522e..78b7505c12c 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1153,7 +1153,7 @@ module SMAPIv1 : Server_impl = struct let receive_finalize _context ~dbg:_ ~id:_ = assert false - let receive_finalize2 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ + let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 004bad89f56..226a93c78f1 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -681,7 +681,7 @@ module MIRROR : SMAPIv2_MIRROR = struct stored in memory on different hosts. If receive_start is called, by an older host, this State.add is run on the destination host. On the other hand, if receive_start3 is called, this will be stored in memory on the source host. - receive_finalize2 and receive_cancel2 handles this similarly. *) + receive_finalize3 and receive_cancel2 handles this similarly. *) State.add id State.( Recv_op @@ -749,7 +749,7 @@ module MIRROR : SMAPIv2_MIRROR = struct Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; State.remove_receive_mirror id - let receive_finalize2 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg mirror_id (s_of_sr sr) url verify_dest ; let (module Remote) = diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index bc2a749e030..09b514ee7da 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1218,7 +1218,7 @@ functor info "DATA.MIRROR.receive_finalize dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_finalize context ~dbg ~id - let receive_finalize2 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ + let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = (* see storage_smapiv{1,3}_migrate *) u __FUNCTION__ diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index f10d1d4bd39..799f7872f1c 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -35,7 +35,7 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_finalize _ctx = u __FUNCTION__ - let receive_finalize2 _ctx = u __FUNCTION__ + let receive_finalize3 _ctx = u __FUNCTION__ let receive_cancel _ctx = u __FUNCTION__ From 96f8ba9023e2b94adc5c0c22b9c7a28611cc544e Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 12 May 2025 21:57:17 +0100 Subject: [PATCH 186/492] CA-408492: Add back receive_finalize2 Similar to receive_start2 Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 14 +++++++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 2 ++ ocaml/xapi-storage-script/main.ml | 1 + ocaml/xapi/storage_mux.ml | 5 ++++ ocaml/xapi/storage_smapiv1.ml | 2 ++ ocaml/xapi/storage_smapiv1_migrate.ml | 27 +++++++++++++-------- ocaml/xapi/storage_smapiv1_wrapper.ml | 4 +++ ocaml/xapi/storage_smapiv3_migrate.ml | 2 ++ 8 files changed, 47 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index c55d78569d7..a3da3d906d0 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1168,6 +1168,15 @@ module StorageAPI (R : RPC) = struct declare "DATA.MIRROR.receive_finalize" [] (dbg_p @-> id_p @-> returning unit_p err) + (** Called on the receiving end + @deprecated This function is deprecated, and is only here to keep backward + compatibility with old xapis that call Remote.DATA.MIRROR.receive_finalize + during SXM. Use the receive_finalize3 function instead. + *) + let receive_finalize2 = + declare "DATA.MIRROR.receive_finalize2" [] + (dbg_p @-> id_p @-> returning unit_p err) + (** [receive_finalize3 dbg id] will stop the mirroring process and compose the snapshot VDI with the mirror VDI. It also cleans up the storage resources used by mirroring. It is called after the the source VM is paused. This fucntion @@ -1316,6 +1325,8 @@ module type MIRROR = sig val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit + val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit + val receive_finalize3 : context -> dbg:debug_info @@ -1817,6 +1828,9 @@ module Server (Impl : Server_impl) () = struct S.DATA.MIRROR.receive_finalize (fun dbg id -> Impl.DATA.MIRROR.receive_finalize () ~dbg ~id ) ; + S.DATA.MIRROR.receive_finalize2 (fun dbg id -> + Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~id + ) ; S.DATA.MIRROR.receive_finalize3 (fun dbg mirror_id sr url verify_dest -> Impl.DATA.MIRROR.receive_finalize3 () ~dbg ~mirror_id ~sr ~url ~verify_dest diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 5b2f2bedbc1..edaf4bc9812 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -178,6 +178,8 @@ module DATA = struct let receive_finalize ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize" + let receive_finalize2 ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize2" + let receive_finalize3 ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = u "DATA.MIRROR.receive_finalize3" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 79dce1d897a..0d76c09601f 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1925,6 +1925,7 @@ let bind ~volume_script_dir = S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.DATA.MIRROR.receive_start3 (u "DATA.MIRROR.receive_start3") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; + S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; S.DATA.MIRROR.receive_finalize3 (u "DATA.MIRROR.receive_finalize3") ; S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.DATA.MIRROR.receive_cancel2 (u "DATA.MIRROR.receive_cancel2") ; diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index d62517093ba..a523000c7b4 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -861,6 +861,11 @@ module Mux = struct info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; Storage_smapiv1_migrate.MIRROR.receive_finalize () ~dbg:di.log ~id + let receive_finalize2 () ~dbg ~id = + with_dbg ~name:"DATA.MIRROR.receive_finalize2" ~dbg @@ fun di -> + info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; + Storage_smapiv1_migrate.MIRROR.receive_finalize2 () ~dbg:di.log ~id + let receive_finalize3 () ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = u __FUNCTION__ diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 78b7505c12c..7eef88b46e3 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1153,6 +1153,8 @@ module SMAPIv1 : Server_impl = struct let receive_finalize _context ~dbg:_ ~id:_ = assert false + let receive_finalize2 _context ~dbg:_ ~id:_ = assert false + let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 226a93c78f1..8a605fd59be 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -749,12 +749,7 @@ module MIRROR : SMAPIv2_MIRROR = struct Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; State.remove_receive_mirror id - let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = - D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg - mirror_id (s_of_sr sr) url verify_dest ; - let (module Remote) = - Storage_migrate_helper.get_remote_backend url verify_dest - in + let receive_finalize_common ~dbg ~mirror_id (module SMAPI : SMAPIv2) = let recv_state = State.find_active_receive_mirror mirror_id in let open State.Receive_state in Option.iter @@ -764,16 +759,28 @@ module MIRROR : SMAPIv2_MIRROR = struct __FUNCTION__ (Sr.string_of r.sr) (Vdi.string_of r.parent_vdi) (Vdi.string_of r.leaf_vdi) ; - Remote.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; - Remote.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; + SMAPI.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + SMAPI.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) - D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr r.dummy_vdi) ; - Remote.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + D.log_and_ignore_exn (fun () -> SMAPI.VDI.destroy dbg r.sr r.dummy_vdi) ; + SMAPI.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" ) recv_state ; State.remove_receive_mirror mirror_id + let receive_finalize2 _ctx ~dbg ~id = + D.debug "%s dbg:%s id: %s" __FUNCTION__ dbg id ; + receive_finalize_common ~dbg ~mirror_id:id (module Local) + + let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg + mirror_id (s_of_sr sr) url verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + receive_finalize_common ~dbg ~mirror_id (module Remote) + let receive_cancel _ctx ~dbg ~id = D.debug "%s dbg:%s mirror_id:%s" __FUNCTION__ dbg id ; let receive_state = State.find_active_receive_mirror id in diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 09b514ee7da..397cee17d6a 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1218,6 +1218,10 @@ functor info "DATA.MIRROR.receive_finalize dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_finalize context ~dbg ~id + let receive_finalize2 context ~dbg ~id = + info "DATA.MIRROR.receive_finalize2 dbg:%s id:%s" dbg id ; + Impl.DATA.MIRROR.receive_finalize2 context ~dbg ~id + let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = (* see storage_smapiv{1,3}_migrate *) diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 799f7872f1c..5ef3eeaac6c 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -35,6 +35,8 @@ module MIRROR : SMAPIv2_MIRROR = struct let receive_finalize _ctx = u __FUNCTION__ + let receive_finalize2 _ctx = u __FUNCTION__ + let receive_finalize3 _ctx = u __FUNCTION__ let receive_cancel _ctx = u __FUNCTION__ From 1450e2e843cc50c2fc6349f373b0bf0036575954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 187/492] [prepare]: make StringPool share safer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we can call this from more generic code, without worrying whether locks are held. Skip adding to the stringpool when locks aren't held and we can't obtain an atomic lock either. No functional change Signed-off-by: Edwin Török --- ocaml/database/db_cache_types.ml | 41 ++++++++++++++++++++++++------- ocaml/database/db_cache_types.mli | 11 +++++++++ 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index 86e5916a550..161d8dc813e 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -24,11 +24,33 @@ module HashedString = struct let hash = Hashtbl.hash end -module StringPool = Weak.Make (HashedString) - -let share = - let pool = StringPool.create 2048 in - StringPool.merge pool +module Share : sig + val merge : string -> string + (** [merge str] merges [str] into the stringpool. + It returns a string equal to [str]. + + This function is thread-safe, it skips adding the string to the pool + when called concurrently. + For best results call this while holding another lock. + *) +end = struct + module StringPool = Weak.Make (HashedString) + + let pool = StringPool.create 2048 + + let merge_running = Atomic.make 0 + + let merge str = + let str = + if Atomic.fetch_and_add merge_running 1 = 0 then + StringPool.merge pool str + else + (* no point in using a mutex here, just fall back to not sharing, + which is quicker. *) + str + in + Atomic.decr merge_running ; str +end module Stat = struct type t = {created: Time.t; modified: Time.t; deleted: Time.t} @@ -45,7 +67,7 @@ module StringMap = struct let compare = String.compare end) - let add key v t = add (share key) v t + let add key v t = add (Share.merge key) v t end module type VAL = sig @@ -150,11 +172,12 @@ module Row = struct @@ match v with | Schema.Value.String x -> - Schema.Value.String (share x) + Schema.Value.String (Share.merge x) | Schema.Value.Pairs ps -> - Schema.Value.Pairs (List.map (fun (x, y) -> (share x, share y)) ps) + Schema.Value.Pairs + (List.map (fun (x, y) -> (Share.merge x, Share.merge y)) ps) | Schema.Value.Set xs -> - Schema.Value.Set (List.map share xs) + Schema.Value.Set (List.map Share.merge xs) type t = map_t diff --git a/ocaml/database/db_cache_types.mli b/ocaml/database/db_cache_types.mli index e29f6127211..dc9ceaaea88 100644 --- a/ocaml/database/db_cache_types.mli +++ b/ocaml/database/db_cache_types.mli @@ -79,6 +79,17 @@ module type MAP = sig On exit there will be a binding of [key] whose modification time is [now] *) end +module Share : sig + val merge : string -> string + (** [merge str] merges [str] into the stringpool. + It returns a string equal to [str]. + + This function is thread-safe, it skips adding the string to the pool + when called concurrently. + For best results call this while holding another lock. + *) +end + module Row : sig include MAP with type value = Schema.Value.t From 9fe0eba5e633c15a031d0c5ae03a81a8d08f865f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 May 2025 21:34:18 +0100 Subject: [PATCH 188/492] [prepare]: call Share.merge around ensure_utf8_xml calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This needs to be called around the same places that `ensure_utf8_xml` and `UTF8_XML.is_valid` gets called: those are the places where new strings can enter the system. Now that calling Share.merge is thread-safe also call it when writing new fields, not just when creating the row. For now the DB lock is still held when this function is called, but future refactoring might move code around. For maps and sets we only call this when new keys get added (except in create_row, where we need to process all elements) Signed-off-by: Edwin Török --- ocaml/database/db_cache_impl.ml | 32 ++++++++++++++++++++++---------- ocaml/database/db_cache_types.ml | 13 +------------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index 79967e934e8..5d56033d487 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -73,13 +73,15 @@ let ensure_utf8_xml string = let write_field_locked t tblname objref fldname newval = let current_val = get_field tblname objref fldname (get_database t) in if current_val <> newval then ( - ( match newval with - | Schema.Value.String s -> - if not (Xapi_stdext_encodings.Encodings.UTF8_XML.is_valid s) then - raise Invalid_value - | _ -> - () - ) ; + let newval = + match newval with + | Schema.Value.String s -> + if not (Xapi_stdext_encodings.Encodings.UTF8_XML.is_valid s) then + raise Invalid_value ; + Schema.Value.String (Share.merge s) + | _ -> + newval + in update_database t (set_field tblname objref fldname newval) ; Database.notify (WriteField (tblname, objref, fldname, current_val, newval)) @@ -163,7 +165,17 @@ let create_row_locked t tblname kvs' new_objref = (fun (key, value) -> let value = ensure_utf8_xml value in let column = Schema.Table.find key schema in - (key, Schema.Value.unmarshal column.Schema.Column.ty value) + let newval = + match Schema.Value.unmarshal column.Schema.Column.ty value with + | Schema.Value.String x -> + Schema.Value.String (Share.merge x) + | Schema.Value.Pairs ps -> + Schema.Value.Pairs + (List.map (fun (x, y) -> (Share.merge x, Share.merge y)) ps) + | Schema.Value.Set xs -> + Schema.Value.Set (List.map Share.merge xs) + in + (key, newval) ) kvs' in @@ -303,8 +315,8 @@ let read_records_where t tbl expr = let process_structured_field_locked t (key, value) tblname fld objref proc_fn_selector = (* Ensure that both keys and values are valid for UTF-8-encoded XML. *) - let key = ensure_utf8_xml key in - let value = ensure_utf8_xml value in + let key = ensure_utf8_xml key |> Share.merge in + let value = ensure_utf8_xml value |> Share.merge in try let tbl = TableSet.find tblname (Database.tableset (get_database t)) in let row = Table.find objref tbl in diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index 161d8dc813e..6ad8e40b7bb 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -166,18 +166,7 @@ module Row = struct include Make (CachedValue) - let add gen key v = - add gen key - @@ CachedValue.v - @@ - match v with - | Schema.Value.String x -> - Schema.Value.String (Share.merge x) - | Schema.Value.Pairs ps -> - Schema.Value.Pairs - (List.map (fun (x, y) -> (Share.merge x, Share.merge y)) ps) - | Schema.Value.Set xs -> - Schema.Value.Set (List.map Share.merge xs) + let add gen key v = add gen key @@ CachedValue.v v type t = map_t From 003ea4c7dbcaf337d4ac34e84d779829c9dd74ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 2 May 2025 10:07:36 +0100 Subject: [PATCH 189/492] CP-3097933: [prepare]: introduce type constructor in Value.t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will make it easier to introduce UTF8 validation here. No functional change Signed-off-by: Edwin Török --- ocaml/database/database_test.ml | 12 +++++++----- ocaml/database/db_cache_impl.ml | 2 +- ocaml/database/db_cache_test.ml | 12 +++++++----- ocaml/database/db_cache_types.ml | 8 +++++--- ocaml/database/schema.ml | 6 ++++++ ocaml/database/schema.mli | 6 ++++++ ocaml/database/test_schemas.ml | 18 ++++++++++-------- ocaml/idl/datamodel_schema.ml | 2 +- ocaml/idl/datamodel_values.ml | 32 ++++++++++++++++---------------- 9 files changed, 59 insertions(+), 39 deletions(-) diff --git a/ocaml/database/database_test.ml b/ocaml/database/database_test.ml index 2c3bfd8f0e1..b3e771e774c 100644 --- a/ocaml/database/database_test.ml +++ b/ocaml/database/database_test.ml @@ -205,11 +205,13 @@ functor let db = db |> add_row "bar" "bar:1" - (Row.add 0L Db_names.ref (String "bar:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "bar:1") (Row.add 0L "foos" (Set []) Row.empty) ) |> add_row "foo" "foo:1" - (Row.add 0L Db_names.ref (String "foo:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "foo:1") (Row.add 0L "bars" (Set []) Row.empty) ) |> set_field "foo" "foo:1" "bars" (add_to_set "bar:1" (Set [])) @@ -219,7 +221,7 @@ functor Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith_fmt "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s" (Schema.Value.marshal bar_foos) ; @@ -235,13 +237,13 @@ functor failwith_fmt "check_many_to_many: bar(bar:1).foos expected () got %s" (Schema.Value.marshal bar_foos) ; (* add 'bar' to foo.bars *) - let db = set_field "foo" "foo:1" "bars" (Set ["bar:1"]) db in + let db = set_field "foo" "foo:1" "bars" (Schema.Value.set ["bar:1"]) db in (* check that 'bar.foos' includes 'foo' *) let bar_1 = Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith_fmt "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s - 2" (Schema.Value.marshal bar_foos) ; diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index 3af107b7b6a..3016ba9972c 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -230,7 +230,7 @@ let create_row t tblname kvs' new_objref = else (* we add the reference to the row itself so callers can use read_field_where to return the reference: awkward if it is just the key *) - let kvs' = (Db_names.ref, Schema.Value.String new_objref) :: kvs' in + let kvs' = (Db_names.ref, Schema.Value.string new_objref) :: kvs' in W.debug "create_row %s (%s) [%s]" tblname new_objref (String.concat "," (List.map (fun (k, _) -> Printf.sprintf "(%s,v)" k) kvs') diff --git a/ocaml/database/db_cache_test.ml b/ocaml/database/db_cache_test.ml index ed2a3296940..aa472419bfc 100644 --- a/ocaml/database/db_cache_test.ml +++ b/ocaml/database/db_cache_test.ml @@ -29,11 +29,13 @@ let check_many_to_many () = let db = db |> add_row "bar" "bar:1" - (Row.add 0L Db_names.ref (Schema.Value.String "bar:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "bar:1") (Row.add 0L "foos" (Schema.Value.Set []) Row.empty) ) |> add_row "foo" "foo:1" - (Row.add 0L Db_names.ref (Schema.Value.String "foo:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "foo:1") (Row.add 0L "bars" (Schema.Value.Set []) Row.empty) ) |> set_field "foo" "foo:1" "bars" (add_to_set "bar:1" (Schema.Value.Set [])) @@ -41,7 +43,7 @@ let check_many_to_many () = (* check that 'bar.foos' includes 'foo' *) let bar_1 = Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Schema.Value.Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith (Printf.sprintf "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s" @@ -59,11 +61,11 @@ let check_many_to_many () = (Sexplib.Sexp.to_string (Schema.Value.sexp_of_t bar_foos)) ) ; (* add 'bar' to foo.bars *) - let db = set_field "foo" "foo:1" "bars" (Schema.Value.Set ["bar:1"]) db in + let db = set_field "foo" "foo:1" "bars" (Schema.Value.set ["bar:1"]) db in (* check that 'bar.foos' includes 'foo' *) let bar_1 = Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Schema.Value.Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith (Printf.sprintf "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s - 2" diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index 52e948dc65a..34a3ba01ac6 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -548,9 +548,11 @@ let get_field tblname objref fldname db = (Table.find objref (TableSet.find tblname (Database.tableset db))) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) +let empty = Schema.Value.string "" + let unsafe_set_field g tblname objref fldname newval = (fun _ -> newval) - |> Row.update g fldname (Schema.Value.String "") + |> Row.update g fldname empty |> Table.update g objref Row.empty |> TableSet.update g tblname Table.empty |> Database.update @@ -632,7 +634,7 @@ let set_field tblname objref fldname newval db = |> update_one_to_many g tblname objref remove_from_set |> Database.update ((fun _ -> newval) - |> Row.update g fldname (Schema.Value.String "") + |> Row.update g fldname empty |> Table.update g objref Row.empty |> TableSet.update g tblname Table.empty ) @@ -643,7 +645,7 @@ let set_field tblname objref fldname newval db = let g = Manifest.generation (Database.manifest db) in db |> ((fun _ -> newval) - |> Row.update g fldname (Schema.Value.String "") + |> Row.update g fldname empty |> Table.update g objref Row.empty |> TableSet.update g tblname Table.empty |> Database.update diff --git a/ocaml/database/schema.ml b/ocaml/database/schema.ml index b6c81bf70d1..febeaa0412e 100644 --- a/ocaml/database/schema.ml +++ b/ocaml/database/schema.ml @@ -40,6 +40,12 @@ module Value = struct | Pairs of (string * string) list [@@deriving sexp_of] + let string s = String s + + let set xs = Set xs + + let pairs xs = Pairs xs + let marshal = function | String x -> x diff --git a/ocaml/database/schema.mli b/ocaml/database/schema.mli index 21d1316cef1..07e34c06e04 100644 --- a/ocaml/database/schema.mli +++ b/ocaml/database/schema.mli @@ -25,6 +25,12 @@ module Value : sig | Pairs of (string * string) list [@@deriving sexp_of] + val string : string -> t + + val set : string list -> t + + val pairs : (string * string) list -> t + val marshal : t -> string val unmarshal : Type.t -> string -> t diff --git a/ocaml/database/test_schemas.ml b/ocaml/database/test_schemas.ml index fa2519b5f61..57b92cce060 100644 --- a/ocaml/database/test_schemas.ml +++ b/ocaml/database/test_schemas.ml @@ -1,9 +1,11 @@ +let empty = Schema.Value.string "" + let schema = let _ref = { Schema.Column.name= Db_names.ref ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -13,7 +15,7 @@ let schema = { Schema.Column.name= Db_names.uuid ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -23,7 +25,7 @@ let schema = { Schema.Column.name= Db_names.name_label ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -33,7 +35,7 @@ let schema = { Schema.Column.name= "name__description" ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -43,7 +45,7 @@ let schema = { Schema.Column.name= "type" ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -73,8 +75,8 @@ let schema = { Schema.Column.name= "protection_policy" ; persistent= true - ; empty= Schema.Value.String "" - ; default= Some (Schema.Value.String "OpaqueRef:NULL") + ; empty + ; default= Some (Schema.Value.string "OpaqueRef:NULL") ; ty= Schema.Type.String ; issetref= false } @@ -93,7 +95,7 @@ let schema = { Schema.Column.name= "VM" ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false diff --git a/ocaml/idl/datamodel_schema.ml b/ocaml/idl/datamodel_schema.ml index 10f20662496..6c295ef00f4 100644 --- a/ocaml/idl/datamodel_schema.ml +++ b/ocaml/idl/datamodel_schema.ml @@ -77,7 +77,7 @@ let of_datamodel () = { Column.name= Db_names.ref ; persistent= true - ; empty= Value.String "" + ; empty= Value.string "" ; default= None ; ty= Type.String ; issetref= false diff --git a/ocaml/idl/datamodel_values.ml b/ocaml/idl/datamodel_values.ml index e270899b50f..522ab4e530a 100644 --- a/ocaml/idl/datamodel_values.ml +++ b/ocaml/idl/datamodel_values.ml @@ -84,42 +84,42 @@ let to_db v = let open Schema.Value in match v with | VString s -> - String s + string s | VInt i -> - String (Int64.to_string i) + string (Int64.to_string i) | VFloat f -> - String (string_of_float f) + string (string_of_float f) | VBool true -> - String "true" + string "true" | VBool false -> - String "false" + string "false" | VDateTime d -> - String (Date.to_rfc3339 d) + string (Date.to_rfc3339 d) | VEnum e -> - String e + string e | VMap vvl -> Pairs (List.map (fun (k, v) -> (to_string k, to_string v)) vvl) | VSet vl -> Set (List.map to_string vl) | VRef r -> - String r + string r (* Generate suitable "empty" database value of specified type *) let gen_empty_db_val t = let open Schema in match t with | SecretString | String -> - Value.String "" + Value.string "" | Int -> - Value.String "0" + Value.string "0" | Float -> - Value.String (string_of_float 0.0) + Value.string (string_of_float 0.0) | Bool -> - Value.String "false" + Value.string "false" | DateTime -> - Value.String Date.(to_rfc3339 epoch) + Value.string Date.(to_rfc3339 epoch) | Enum (_, (enum_value, _) :: _) -> - Value.String enum_value + Value.string enum_value | Enum (_, []) -> assert false | Set _ -> @@ -127,8 +127,8 @@ let gen_empty_db_val t = | Map _ -> Value.Pairs [] | Ref _ -> - Value.String null_ref + Value.string null_ref | Record _ -> - Value.String "" + Value.string "" | Option _ -> Value.Set [] From db991bb049ee485448a71edf73889db9c8a8b6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 2 May 2024 21:19:55 +0100 Subject: [PATCH 190/492] CP-307933: [prepare]: introduce field type in DB_ACCESS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit field=string for now, but will be changed to Schema.CachedValue.t later. There are separate types for input and output fields, e.g. we may want to perform UTF8 validation on input, which would modify both Value.t and its serialized form, so we can avoid serializing twice. No functional change Signed-off-by: Edwin Török --- ocaml/database/db_cache_impl.ml | 4 ++ ocaml/database/db_interface.mli | 69 +++++++++++++++++++----------- ocaml/database/db_rpc_client_v1.ml | 4 ++ ocaml/database/db_rpc_client_v2.ml | 4 ++ 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index 3016ba9972c..dda4fef1b11 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -36,6 +36,10 @@ open Db_ref let fist_delay_read_records_where = ref false +type field_in = string + +type field_out = string + (* Only needed by the DB_ACCESS signature *) let initialise () = () diff --git a/ocaml/database/db_interface.mli b/ocaml/database/db_interface.mli index 9343ed87e8d..d971ce19b3a 100644 --- a/ocaml/database/db_interface.mli +++ b/ocaml/database/db_interface.mli @@ -33,15 +33,15 @@ type db_ref = string type uuid = string -type regular_fields = (field_name * field) list +type 'field regular_fields = (field_name * 'field) list type associated_fields = (field_name * db_ref list) list (** dictionary of regular fields x dictionary of associated set_ref values *) -type db_record = regular_fields * associated_fields +type 'field db_record = 'field regular_fields * associated_fields (** The client interface to the database *) -module type DB_ACCESS = sig +module type DB_ACCESS_COMMON = sig val initialise : unit -> unit (** [initialise ()] must be called before any other function in this interface *) @@ -61,11 +61,6 @@ module type DB_ACCESS = sig (** [find_refs_with_filter tbl expr] returns a list of all references to rows which match [expr] *) - val read_field_where : Db_ref.t -> Db_cache_types.where_record -> field list - (** [read_field_where {tbl,return,where_field,where_value}] returns a - list of the [return] fields in table [tbl] where the [where_field] - equals [where_value] *) - val db_get_by_uuid : Db_ref.t -> table -> uuid -> db_ref (** [db_get_by_uuid tbl uuid] returns the single object reference associated with [uuid] *) @@ -79,13 +74,38 @@ module type DB_ACCESS = sig (** [db_get_by_name_label tbl label] returns the list of object references associated with [label] *) - val create_row : Db_ref.t -> table -> regular_fields -> db_ref -> unit - (** [create_row tbl kvpairs ref] create a new row in [tbl] with - key [ref] and contents [kvpairs] *) - val delete_row : Db_ref.t -> db_ref -> table -> unit (** [delete_row context tbl ref] deletes row [ref] from table [tbl] *) + val process_structured_field : + Db_ref.t + -> field_name * string + -> table + -> field_name + -> db_ref + -> Db_cache_types.structured_op_t + -> unit + (** [process_structured_field context kv tbl fld ref op] modifies the + value of field [fld] in row [ref] in table [tbl] according to [op] + which may be one of AddSet RemoveSet AddMap RemoveMap with + arguments [kv] *) +end + +module type DB_ACCESS_FIELD = sig + type field_in + + type field_out + + val read_field_where : Db_ref.t -> Db_cache_types.where_record -> field list + (** [read_field_where {tbl,return,where_field,where_value}] returns a + list of the [return] fields in table [tbl] where the [where_field] + equals [where_value] *) + + val create_row : + Db_ref.t -> table -> field_in regular_fields -> db_ref -> unit + (** [create_row tbl kvpairs ref] create a new row in [tbl] with + key [ref] and contents [kvpairs] *) + val write_field : Db_ref.t -> table -> db_ref -> field_name -> field -> unit (** [write_field context tbl ref fld val] changes field [fld] to [val] in row [ref] in table [tbl] *) @@ -94,25 +114,22 @@ module type DB_ACCESS = sig (** [read_field context tbl fld ref] returns the value of field [fld] in row [ref] in table [tbl] *) - val read_record : Db_ref.t -> table -> db_ref -> db_record + val read_record : Db_ref.t -> table -> db_ref -> field_out db_record (** [read_record tbl ref] returns [ (field, value) ] * [ (set_ref fieldname * [ ref ]) ] *) val read_records_where : - Db_ref.t -> table -> Db_filter_types.expr -> (db_ref * db_record) list + Db_ref.t + -> table + -> Db_filter_types.expr + -> (db_ref * field_out db_record) list (** [read_records_where tbl expr] returns a list of the values returned by read_record that match the expression *) +end - val process_structured_field : - Db_ref.t - -> field_name * field - -> table - -> field_name - -> db_ref - -> Db_cache_types.structured_op_t - -> unit - (** [process_structured_field context kv tbl fld ref op] modifies the - value of field [fld] in row [ref] in table [tbl] according to [op] - which may be one of AddSet RemoveSet AddMap RemoveMap with - arguments [kv] *) +module type DB_ACCESS = sig + include DB_ACCESS_COMMON + + include + DB_ACCESS_FIELD with type field_in = string and type field_out = string end diff --git a/ocaml/database/db_rpc_client_v1.ml b/ocaml/database/db_rpc_client_v1.ml index 7adbcd6bbed..9219779966b 100644 --- a/ocaml/database/db_rpc_client_v1.ml +++ b/ocaml/database/db_rpc_client_v1.ml @@ -22,6 +22,10 @@ functor struct exception Remote_db_server_returned_unknown_exception + type field_in = string + + type field_out = string + (* Process an exception returned from server, throwing local exception *) let process_exception_xml xml = match XMLRPC.From.array (fun x -> x) xml with diff --git a/ocaml/database/db_rpc_client_v2.ml b/ocaml/database/db_rpc_client_v2.ml index 2e03f069497..434677d3990 100644 --- a/ocaml/database/db_rpc_client_v2.ml +++ b/ocaml/database/db_rpc_client_v2.ml @@ -22,6 +22,10 @@ functor (RPC : Db_interface.RPC) -> struct + type field_in = string + + type field_out = string + let initialise = RPC.initialise let rpc x = RPC.rpc (Jsonrpc.to_string x) |> Jsonrpc.of_string From f2e86c2dffad6734c6ce54585fec20a38b9ef654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 2 May 2025 11:02:44 +0100 Subject: [PATCH 191/492] CP-307933: [prepare]: introduce DB_ACCESS2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides direct access to a Value.t when known, which avoids having to serialize and deserialize a Value.t just to read it. Not yet used, no functional change. Signed-off-by: Edwin Török --- ocaml/database/db_interface.mli | 20 +++++++-- ocaml/database/db_interface_compat.ml | 61 ++++++++++++++++++++++++++ ocaml/database/db_interface_compat.mli | 18 ++++++++ 3 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 ocaml/database/db_interface_compat.ml create mode 100644 ocaml/database/db_interface_compat.mli diff --git a/ocaml/database/db_interface.mli b/ocaml/database/db_interface.mli index d971ce19b3a..af1d4572909 100644 --- a/ocaml/database/db_interface.mli +++ b/ocaml/database/db_interface.mli @@ -96,7 +96,8 @@ module type DB_ACCESS_FIELD = sig type field_out - val read_field_where : Db_ref.t -> Db_cache_types.where_record -> field list + val read_field_where : + Db_ref.t -> Db_cache_types.where_record -> field_out list (** [read_field_where {tbl,return,where_field,where_value}] returns a list of the [return] fields in table [tbl] where the [where_field] equals [where_value] *) @@ -106,11 +107,12 @@ module type DB_ACCESS_FIELD = sig (** [create_row tbl kvpairs ref] create a new row in [tbl] with key [ref] and contents [kvpairs] *) - val write_field : Db_ref.t -> table -> db_ref -> field_name -> field -> unit + val write_field : + Db_ref.t -> table -> db_ref -> field_name -> field_in -> unit (** [write_field context tbl ref fld val] changes field [fld] to [val] in row [ref] in table [tbl] *) - val read_field : Db_ref.t -> table -> field_name -> db_ref -> field + val read_field : Db_ref.t -> table -> field_name -> db_ref -> field_out (** [read_field context tbl fld ref] returns the value of field [fld] in row [ref] in table [tbl] *) @@ -133,3 +135,15 @@ module type DB_ACCESS = sig include DB_ACCESS_FIELD with type field_in = string and type field_out = string end + +module type DB_ACCESS2 = sig + include DB_ACCESS_COMMON + + include + DB_ACCESS_FIELD + with type field_in = Schema.Value.t + and type field_out = Schema.maybe_cached_value + + module Compat : + DB_ACCESS_FIELD with type field_in = string and type field_out = string +end diff --git a/ocaml/database/db_interface_compat.ml b/ocaml/database/db_interface_compat.ml new file mode 100644 index 00000000000..a1c981a9e7e --- /dev/null +++ b/ocaml/database/db_interface_compat.ml @@ -0,0 +1,61 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Db_interface + +module OfCached (DB : DB_ACCESS2) : DB_ACCESS = struct + include DB include DB.Compat +end + +module OfCompat (DB : DB_ACCESS) : DB_ACCESS2 = struct + module Compat = DB + include Compat + + type field_in = Schema.Value.t + + type field_out = Schema.maybe_cached_value + + let field_of_compat = Schema.CachedValue.of_string + + let compat_of_field = Schema.Value.marshal + + let regular_field_of_compat (k, v) = (k, field_of_compat v) + + let regular_fields_of_compat l = List.map regular_field_of_compat l + + let compat_of_regular_field (k, v) = (k, compat_of_field v) + + let compat_of_regular_fields l = List.map compat_of_regular_field l + + let db_record_of_compat (regular, assoc) = + (regular_fields_of_compat regular, assoc) + + let db_record_entry_of_compat (ref, record) = (ref, db_record_of_compat record) + + let read_field_where t where = + read_field_where t where |> List.map field_of_compat + + let create_row t tbl fields ref = + create_row t tbl (compat_of_regular_fields fields) ref + + let write_field t tbl ref fld field = + write_field t tbl ref fld (compat_of_field field) + + let read_field t tbl fld ref = read_field t tbl fld ref |> field_of_compat + + let read_record t tbl ref = read_record t tbl ref |> db_record_of_compat + + let read_records_where t tbl expr = + read_records_where t tbl expr |> List.map db_record_entry_of_compat +end diff --git a/ocaml/database/db_interface_compat.mli b/ocaml/database/db_interface_compat.mli new file mode 100644 index 00000000000..a735cf122dc --- /dev/null +++ b/ocaml/database/db_interface_compat.mli @@ -0,0 +1,18 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +open Db_interface + +module OfCached : functor (_ : DB_ACCESS2) -> DB_ACCESS + +module OfCompat : functor (_ : DB_ACCESS) -> DB_ACCESS2 From d0dc77fcf22e1bd799da320a7796a0a337c75a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 2 May 2024 21:19:55 +0100 Subject: [PATCH 192/492] CP-307933: [prepare]: introduce DB_ACCESS2 implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change, we still use the Compat module Signed-off-by: Edwin Török --- ocaml/database/database_server_main.ml | 3 +- ocaml/database/db_cache.ml | 2 +- ocaml/database/db_cache_impl.ml | 153 +++++++++++++------- ocaml/database/db_cache_impl.mli | 2 +- ocaml/database/db_cache_types.ml | 6 +- ocaml/database/db_cache_types.mli | 8 + ocaml/database/db_remote_cache_access_v1.ml | 3 +- ocaml/database/db_remote_cache_access_v2.ml | 3 +- ocaml/database/schema.ml | 12 ++ ocaml/database/schema.mli | 31 +++- 10 files changed, 158 insertions(+), 65 deletions(-) diff --git a/ocaml/database/database_server_main.ml b/ocaml/database/database_server_main.ml index e75539a5592..a4ebb21ab47 100644 --- a/ocaml/database/database_server_main.ml +++ b/ocaml/database/database_server_main.ml @@ -33,8 +33,9 @@ let remote_database_access_handler_v2 req bio = flush stdout ; raise e +open Xapi_database module Local_tests = - Xapi_database.Database_test.Tests (Xapi_database.Db_cache_impl) + Database_test.Tests (Db_interface_compat.OfCached (Db_cache_impl)) let schema = Test_schemas.schema diff --git a/ocaml/database/db_cache.ml b/ocaml/database/db_cache.ml index eba091889ec..6bdd4aefff0 100644 --- a/ocaml/database/db_cache.ml +++ b/ocaml/database/db_cache.ml @@ -19,7 +19,7 @@ module D = Debug.Make (struct let name = "db_cache" end) open D (** Masters will use this to modify the in-memory cache directly *) -module Local_db : DB_ACCESS = Db_cache_impl +module Local_db : DB_ACCESS = Db_interface_compat.OfCached (Db_cache_impl) (** Slaves will use this to call the master by XMLRPC *) module Remote_db : DB_ACCESS = Db_rpc_client_v1.Make (struct diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index dda4fef1b11..e9745749ada 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -36,9 +36,9 @@ open Db_ref let fist_delay_read_records_where = ref false -type field_in = string +type field_in = Schema.Value.t -type field_out = string +type field_out = Schema.maybe_cached_value (* Only needed by the DB_ACCESS signature *) let initialise () = () @@ -53,14 +53,13 @@ let is_valid_ref t objref = let read_field_internal _ tblname fldname objref db = try - Row.find fldname + Row.find' fldname (Table.find objref (TableSet.find tblname (Database.tableset db))) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) (* Read field from cache *) let read_field t tblname fldname objref = - Schema.Value.marshal - (read_field_internal t tblname fldname objref (get_database t)) + read_field_internal t tblname fldname objref (get_database t) (** Finds the longest XML-compatible UTF-8 prefix of the given string, by truncating the string at the first incompatible @@ -75,6 +74,8 @@ let ensure_utf8_xml string = warn "string truncated to: '%s'." prefix ; prefix +let ensure_utf8_xml_and_share string = string |> ensure_utf8_xml |> Share.merge + (* Write field in cache *) let write_field_locked t tblname objref fldname newval = let current_val = get_field tblname objref fldname (get_database t) in @@ -86,10 +87,6 @@ let write_field_locked t tblname objref fldname newval = ) let write_field t tblname objref fldname newval = - let schema = Database.schema (get_database t) in - let schema = Schema.table tblname schema in - let column = Schema.Table.find fldname schema in - let newval = Schema.Value.unmarshal column.Schema.Column.ty newval in let newval = match newval with | Schema.Value.String s -> @@ -114,7 +111,7 @@ let touch_row t tblname objref = and iterates through set-refs [returning (fieldname, ref list) list; where fieldname is the name of the Set Ref field in tbl; and ref list is the list of foreign keys from related table with remote-fieldname=objref] *) -let read_record_internal db tblname objref = +let read_record_internal conv db tblname objref = try let tbl = TableSet.find tblname (Database.tableset db) in let row = Table.find objref tbl in @@ -138,15 +135,14 @@ let read_record_internal db tblname objref = | None -> accum_setref in - let accum_fvlist = - (k, Schema.CachedValue.string_of cached) :: accum_fvlist - in + let accum_fvlist = (k, conv cached) :: accum_fvlist in (accum_fvlist, accum_setref) ) row ([], []) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) -let read_record t = read_record_internal (get_database t) +let read_record t = + read_record_internal Schema.CachedValue.open_present (get_database t) (* Delete row from tbl *) let delete_row_locked t tblname objref = @@ -177,7 +173,7 @@ let create_row_locked t tblname kvs' new_objref = let db = get_database t in let g = Manifest.generation (Database.manifest db) in let row = - List.fold_left (fun row (k, v) -> Row.add g k v row) Row.empty kvs' + List.fold_left (fun row (k, v) -> Row.add' g k v row) Row.empty kvs' in let schema = Schema.table tblname (Database.schema db) in (* fill in default values if kv pairs for these are not supplied already *) @@ -195,31 +191,13 @@ let create_row_locked t tblname kvs' new_objref = (get_database t) let fld_check t tblname objref (fldname, value) = - let v = read_field_internal t tblname fldname objref (get_database t) in - (v = value, fldname, v) - -let create_row t tblname kvs' new_objref = - let schema = Database.schema (get_database t) in - let schema = Schema.table tblname schema in - let kvs' = - List.map - (fun (key, value) -> - let value = ensure_utf8_xml value in - let column = Schema.Table.find key schema in - let newval = - match Schema.Value.unmarshal column.Schema.Column.ty value with - | Schema.Value.String x -> - Schema.Value.String (Share.merge x) - | Schema.Value.Pairs ps -> - Schema.Value.Pairs - (List.map (fun (x, y) -> (Share.merge x, Share.merge y)) ps) - | Schema.Value.Set xs -> - Schema.Value.Set (List.map Share.merge xs) - in - (key, newval) - ) - kvs' + let v = + Schema.CachedValue.string_of + (read_field_internal t tblname fldname objref (get_database t)) in + (v = Schema.CachedValue.string_of value, fldname, v) + +let create_row' t tblname kvs' new_objref = with_lock (fun () -> if is_valid_ref t new_objref then let uniq_check_list = List.map (fld_check t tblname new_objref) kvs' in @@ -228,13 +206,16 @@ let create_row t tblname kvs' new_objref = in match failure_opt with | Some (_, f, v) -> - raise (Integrity_violation (tblname, f, Schema.Value.marshal v)) + raise (Integrity_violation (tblname, f, v)) | _ -> () else (* we add the reference to the row itself so callers can use read_field_where to return the reference: awkward if it is just the key *) - let kvs' = (Db_names.ref, Schema.Value.string new_objref) :: kvs' in + let kvs' = + (Db_names.ref, Schema.Value.string new_objref |> Schema.CachedValue.v) + :: kvs' + in W.debug "create_row %s (%s) [%s]" tblname new_objref (String.concat "," (List.map (fun (k, _) -> Printf.sprintf "(%s,v)" k) kvs') @@ -242,23 +223,52 @@ let create_row t tblname kvs' new_objref = create_row_locked t tblname kvs' new_objref ) +let create_row t tblname kvs' new_objref = + let kvs' = + List.map + (fun (key, value) -> + let value = + match value with + | Schema.Value.String x -> + Schema.Value.String (ensure_utf8_xml_and_share x) + | Schema.Value.Pairs ps -> + Schema.Value.Pairs + (List.map + (fun (x, y) -> + (ensure_utf8_xml_and_share x, ensure_utf8_xml_and_share y) + ) + ps + ) + | Schema.Value.Set xs -> + Schema.Value.Set (List.map ensure_utf8_xml_and_share xs) + in + (key, Schema.CachedValue.v value) + ) + kvs' + in + create_row' t tblname kvs' new_objref + (* Do linear scan to find field values which match where clause *) -let read_field_where t rcd = +let read_field_where' conv t rcd = let db = get_database t in let tbl = TableSet.find rcd.table (Database.tableset db) in Table.fold (fun _ _ row acc -> - let field = Schema.Value.marshal (Row.find rcd.where_field row) in + let field = + Schema.CachedValue.string_of (Row.find' rcd.where_field row) + in if field = rcd.where_value then - Schema.Value.marshal (Row.find rcd.return row) :: acc + conv (Row.find' rcd.return row) :: acc else acc ) tbl [] +let read_field_where t rcd = read_field_where' Fun.id t rcd + let db_get_by_uuid t tbl uuid_val = match - read_field_where t + read_field_where' Schema.CachedValue.string_of t { table= tbl ; return= Db_names.ref @@ -275,7 +285,7 @@ let db_get_by_uuid t tbl uuid_val = let db_get_by_uuid_opt t tbl uuid_val = match - read_field_where t + read_field_where' Schema.CachedValue.string_of t { table= tbl ; return= Db_names.ref @@ -290,7 +300,7 @@ let db_get_by_uuid_opt t tbl uuid_val = (** Return reference fields from tbl that matches specified name_label field *) let db_get_by_name_label t tbl label = - read_field_where t + read_field_where' Schema.CachedValue.string_of t { table= tbl ; return= Db_names.ref @@ -324,11 +334,14 @@ let find_refs_with_filter_internal db (tblname : Db_interface.table) let find_refs_with_filter t = find_refs_with_filter_internal (get_database t) -let read_records_where t tbl expr = +let read_records_where' conv t tbl expr = let db = get_database t in let reqd_refs = find_refs_with_filter_internal db tbl expr in if !fist_delay_read_records_where then Thread.delay 0.5 ; - List.map (fun ref -> (ref, read_record_internal db tbl ref)) reqd_refs + List.map (fun ref -> (ref, read_record_internal conv db tbl ref)) reqd_refs + +let read_records_where t tbl expr = + read_records_where' Schema.CachedValue.open_present t tbl expr let process_structured_field_locked t (key, value) tblname fld objref proc_fn_selector = @@ -369,8 +382,8 @@ let process_structured_field_locked t (key, value) tblname fld objref let process_structured_field t (key, value) tblname fld objref proc_fn_selector = (* Ensure that both keys and values are valid for UTF-8-encoded XML. *) - let key = ensure_utf8_xml key |> Share.merge in - let value = ensure_utf8_xml value |> Share.merge in + let key = ensure_utf8_xml_and_share key in + let value = ensure_utf8_xml_and_share value in with_lock (fun () -> process_structured_field_locked t (key, value) tblname fld objref proc_fn_selector @@ -530,3 +543,41 @@ let stats t = ) (Database.tableset (get_database t)) [] + +module Compat = struct + type field_in = string + + type field_out = string + + let read_field_where t rcd = + read_field_where' Schema.CachedValue.string_of t rcd + + let read_field t tblname fldname objref = + read_field t tblname fldname objref |> Schema.CachedValue.string_of + + let write_field t tblname objref fldname newval = + let db = get_database t in + let schema = Schema.table tblname (Database.schema db) in + let column = Schema.Table.find fldname schema in + let newval = Schema.Value.unmarshal column.Schema.Column.ty newval in + write_field t tblname objref fldname newval + + let read_record t = + read_record_internal Schema.CachedValue.string_of (get_database t) + + let read_records_where t tbl expr = + read_records_where' Schema.CachedValue.string_of t tbl expr + + let create_row t tblname kvs' new_objref = + let db = get_database t in + let schema = Schema.table tblname (Database.schema db) in + let kvs' = + List.map + (fun (key, value) -> + let column = Schema.Table.find key schema in + (key, Schema.CachedValue.of_typed_string column.Schema.Column.ty value) + ) + kvs' + in + create_row' t tblname kvs' new_objref +end diff --git a/ocaml/database/db_cache_impl.mli b/ocaml/database/db_cache_impl.mli index b9b26cfc0ee..8dd161b0f8e 100644 --- a/ocaml/database/db_cache_impl.mli +++ b/ocaml/database/db_cache_impl.mli @@ -1,4 +1,4 @@ -include Db_interface.DB_ACCESS +include Db_interface.DB_ACCESS2 val make : Db_ref.t -> Parse_db_conf.db_connection list -> Schema.t -> unit (** [make t connections default_schema] initialises the in-memory cache *) diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index 34a3ba01ac6..63c91d14bb4 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -166,7 +166,9 @@ module Row = struct include Make (CachedValue) - let add gen key v = add gen key @@ CachedValue.v v + let add' = add + + let add gen key v = add' gen key @@ CachedValue.v v type t = map_t @@ -182,7 +184,7 @@ module Row = struct update gen key (CachedValue.v default) f row let find' key t = - try find key t + try find key t |> Schema.CachedValue.open_present with Not_found -> raise (DBCache_NotFound ("missing field", key, "")) let find key t = find' key t |> Schema.CachedValue.value_of diff --git a/ocaml/database/db_cache_types.mli b/ocaml/database/db_cache_types.mli index 977ddda1e27..f06af9a31c6 100644 --- a/ocaml/database/db_cache_types.mli +++ b/ocaml/database/db_cache_types.mli @@ -93,6 +93,14 @@ end module Row : sig include MAP with type value = Schema.Value.t + val add' : Time.t -> string -> Schema.cached_value -> t -> t + (** [add now key value map] returns a new map with [key] associated with [value], + with creation time [now] *) + + val find' : string -> t -> [> Schema.present] Schema.CachedValue.t + (** [find key t] returns the value associated with [key] in [t] or raises + [DBCache_NotFound] *) + val fold : (string -> Stat.t -> Schema.cached_value -> 'b -> 'b) -> t -> 'b -> 'b (** [fold f t initial] folds [f key stats value acc] over the items in [t] *) diff --git a/ocaml/database/db_remote_cache_access_v1.ml b/ocaml/database/db_remote_cache_access_v1.ml index fe0db8cad25..6cb7af729c5 100644 --- a/ocaml/database/db_remote_cache_access_v1.ml +++ b/ocaml/database/db_remote_cache_access_v1.ml @@ -26,7 +26,8 @@ module DBCacheRemoteListener = struct (* update_lengths xml resp; *) resp - module DBCache : Db_interface.DB_ACCESS = Db_cache_impl + module DBCache : Db_interface.DB_ACCESS = + Db_interface_compat.OfCached (Db_cache_impl) (** Unmarshals the request, calls the DBCache function and marshals the result. Note that, although the messages still contain the pool_secret for historical reasons, diff --git a/ocaml/database/db_remote_cache_access_v2.ml b/ocaml/database/db_remote_cache_access_v2.ml index 754fd2fa340..51a1177cabd 100644 --- a/ocaml/database/db_remote_cache_access_v2.ml +++ b/ocaml/database/db_remote_cache_access_v2.ml @@ -19,7 +19,8 @@ open Db_exn (** Convert a marshalled Request Rpc.t into a marshalled Response Rpc.t *) let process_rpc (req : Rpc.t) = - let module DB : Db_interface.DB_ACCESS = Db_cache_impl in + let module DB : Db_interface.DB_ACCESS = + Db_interface_compat.OfCached (Db_cache_impl) in let t = Db_backend.make () in Response.rpc_of_t ( try diff --git a/ocaml/database/schema.ml b/ocaml/database/schema.ml index febeaa0412e..06a2dc391d4 100644 --- a/ocaml/database/schema.ml +++ b/ocaml/database/schema.ml @@ -115,6 +115,18 @@ module CachedValue = struct v | `Absent -> Value.unmarshal ty t.marshalled + + let of_typed_string ty marshalled = + let v = Value.unmarshal ty marshalled in + {v= `Present v; marshalled} + + let maybe_unmarshal ty = function + | {v= `Present _; _} as p -> + p + | {v= `Absent; marshalled} -> + of_typed_string ty marshalled + + let open_present ({v= `Present _; _} as t) = t end type cached_value = present CachedValue.t diff --git a/ocaml/database/schema.mli b/ocaml/database/schema.mli index 07e34c06e04..8a248d49953 100644 --- a/ocaml/database/schema.mli +++ b/ocaml/database/schema.mli @@ -50,9 +50,9 @@ type absent = [`Absent] type maybe = [`Absent | `Present of Value.t] -(** Abstract type, ensuring marshaled form was created from a Value.t. +(** Abstract type, ensuring marshalled form was created from a Value.t. - For backwards compatibility this can also be created from a marshaled form, + For backwards compatibility this can also be created from a marshalled form, but then retrieving the value requires its {Type.t} to be known. A polymorphic variant is used to decide at the type level when we are always guaranteed to have @@ -70,24 +70,30 @@ module CachedValue : sig val v : Value.t -> [> present] t (** [v value] creates a cached value, storing the value and its serialized form. - [O(1)] for strings, and [O(n)] for sets and maps, where [n] is the result size in marshaled form. + [O(1)] for strings, and [O(n)] for sets and maps, where [n] is the result size in marshalled form. *) val of_string : string -> [> absent] t - (** [of_string marshaled] created a cached value from a marshaled form. + (** [of_string marshalled] created a cached value from a marshalled form. - This is provided for backwards compatibility, e.g. for DB RPC calls which only send the marshaled form without type information. + This is provided for backwards compatibility, e.g. for DB RPC calls which only send the marshalled form without type information. [O(1)] operation, but {!val:unmarshal} can be [O(n)] for sets and maps. *) val string_of : 'a t -> string - (** [string_of t] returns [t] in marshaled form. + (** [string_of t] returns [t] in marshalled form. This works on any cached value types. [O(1)] operation, marshaling happens at construction time. *) + val of_typed_string : Type.t -> string -> [> present] t + (** [of_typed_string ty marshalled] creates a cached value, storing both the serialized form and the value. + + Same complexity as {!val:unmarshal} + *) + val value_of : [< present] t -> Value.t (** [value_of t] returns [t] in {!type:Value.t} form. @@ -103,8 +109,19 @@ module CachedValue : sig When the value was created by {!val:v} this is an [O(1)] operation. When the value was created by {!val:of_string} this is an [O(1)] operation for strings, and [O(n)] operation for sets and maps, as it requires unmarshaling. - The unmarshaled value is not cached, so each unmarshal call has the same cost. + The unmarshalled value is not cached, so each unmarshal call has the same cost. *) + + val maybe_unmarshal : Type.t -> [< maybe] t -> present t + (** [maybe_unmarshal ty t] returns [t] with both a Value and its marshaled form. + + Called {!val:unmarshal} internally if [t] doesn't contain a {type:Value.t}. + + Same complexity as !{val:unmarshal}. + *) + + val open_present : [< present] t -> [> present] t + (** [open_present t] returns [t] as an open polymorphic variant, that can be merged with [absent]. *) end type cached_value = present CachedValue.t From b096d83c2499c6aee7950a9d47d1bf0234c06d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 2 May 2025 17:54:21 +0100 Subject: [PATCH 193/492] CP-307933: Start using DB_ACCESS2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ministat` confirms a speedup: ``` Db.Pool.get_all_records : N Min Max Median Avg Stddev x 432 54115.256 493532.78 56048.42 57937.384 24117.68 + 524 22642.778 333257.36 23595.495 24679.258 15206.708 Difference at 95.0% confidence -33258.1 +/- 2513.99 -57.4036% +/- 2.90374% (Student's t, pooled s = 19737.2) >>>> Db.VM.set_NVRAM : N Min Max Median Avg Stddev x 132 36794 2355369.4 1095736.6 1107222.1 298247.5 + 168 49167.417 1485278.1 678231.31 685636.89 161480.92 Difference at 95.0% confidence -421585 +/- 52835.6 -38.0759% +/- 3.54275% (Student's t, pooled s = 231767) ``` Signed-off-by: Edwin Török --- ocaml/database/db_cache.ml | 14 ++- ocaml/database/db_cache.mli | 2 +- ocaml/idl/ocaml_backend/gen_api.ml | 2 + ocaml/idl/ocaml_backend/gen_db_actions.ml | 135 +++++++++++++++++++--- ocaml/xapi/console.ml | 2 +- ocaml/xapi/db.ml | 2 +- ocaml/xapi/db_gc_util.ml | 2 +- ocaml/xapi/eventgen.ml | 2 +- ocaml/xapi/helpers.ml | 12 +- ocaml/xapi/xapi_vm_snapshot.ml | 6 +- 10 files changed, 146 insertions(+), 33 deletions(-) diff --git a/ocaml/database/db_cache.ml b/ocaml/database/db_cache.ml index 6bdd4aefff0..c6ec25d6130 100644 --- a/ocaml/database/db_cache.ml +++ b/ocaml/database/db_cache.ml @@ -19,30 +19,32 @@ module D = Debug.Make (struct let name = "db_cache" end) open D (** Masters will use this to modify the in-memory cache directly *) -module Local_db : DB_ACCESS = Db_interface_compat.OfCached (Db_cache_impl) +module Local_db : DB_ACCESS2 = Db_cache_impl (** Slaves will use this to call the master by XMLRPC *) -module Remote_db : DB_ACCESS = Db_rpc_client_v1.Make (struct +module Remote_db : DB_ACCESS2 = +Db_interface_compat.OfCompat (Db_rpc_client_v1.Make (struct let initialise () = ignore (Master_connection.start_master_connection_watchdog ()) ; ignore (Master_connection.open_secure_connection ()) let rpc request = Master_connection.execute_remote_fn request -end) +end)) let get = function | Db_ref.In_memory _ -> - (module Local_db : DB_ACCESS) + (module Local_db : DB_ACCESS2) | Db_ref.Remote -> - (module Remote_db : DB_ACCESS) + (module Remote_db : DB_ACCESS2) let lifecycle_state_of ~obj fld = let open Datamodel in let {fld_states; _} = StringMap.find obj all_lifecycles in StringMap.find fld fld_states +module DB = Db_interface_compat.OfCached (Local_db) + let apply_delta_to_cache entry db_ref = - let module DB : DB_ACCESS = Local_db in match entry with | Redo_log.CreateRow (tblname, objref, kvs) -> debug "Redoing create_row %s (%s)" tblname objref ; diff --git a/ocaml/database/db_cache.mli b/ocaml/database/db_cache.mli index 0198ddb36b3..ed1de2cd9ad 100644 --- a/ocaml/database/db_cache.mli +++ b/ocaml/database/db_cache.mli @@ -12,6 +12,6 @@ * GNU Lesser General Public License for more details. *) -val get : Db_ref.t -> (module Db_interface.DB_ACCESS) +val get : Db_ref.t -> (module Db_interface.DB_ACCESS2) val apply_delta_to_cache : Redo_log.t -> Db_ref.t -> unit diff --git a/ocaml/idl/ocaml_backend/gen_api.ml b/ocaml/idl/ocaml_backend/gen_api.ml index 44d8bf9298e..863ae6b2b50 100644 --- a/ocaml/idl/ocaml_backend/gen_api.ml +++ b/ocaml/idl/ocaml_backend/gen_api.ml @@ -484,7 +484,9 @@ let gen_db_actions _config highapi = (toposort_types highapi only_records) ; (* NB record types are ignored by dm_to_string and string_to_dm *) O.Module.strings_of (dm_to_string all_types_in_db) + ; O.Module.strings_of (dm_to_field all_types_in_db) ; O.Module.strings_of (string_to_dm all_types_in_db) + ; O.Module.strings_of (field_to_dm all_types_in_db) ; O.Module.strings_of (db_action highapi_in_db) ] @ List.map O.Module.strings_of (Gen_db_check.all highapi_in_db) diff --git a/ocaml/idl/ocaml_backend/gen_db_actions.ml b/ocaml/idl/ocaml_backend/gen_db_actions.ml index e467624ab13..f4633fd1ba8 100644 --- a/ocaml/idl/ocaml_backend/gen_db_actions.ml +++ b/ocaml/idl/ocaml_backend/gen_db_actions.ml @@ -25,8 +25,12 @@ open DT (* Names of the modules we're going to generate (use these to prevent typos) *) let _dm_to_string = "DM_to_String" +let _dm_to_field = "DM_to_Field" + let _string_to_dm = "String_to_DM" +let _field_to_dm = "Field_to_DM" + let _db_action = "DB_Action" let _db_defaults = "DB_DEFAULTS" @@ -109,6 +113,44 @@ let dm_to_string tys : O.Module.t = ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) () +let dm_to_field tys : O.Module.t = + let tys = List.filter type_marshalled_in_db tys in + (* For every type, we create a single function *) + let ty_fun ty = + let body = + match ty with + | DT.Map (String, String) -> + "Schema.Value.pairs" + | DT.Map (key, value) -> + Printf.sprintf + "fun s -> s |> List.map (fun (k, v) -> %s.%s k, %s.%s v) |> \ + Schema.Value.pairs" + _dm_to_string (OU.alias_of_ty key) _dm_to_string + (OU.alias_of_ty value) + | DT.Set String -> + "Schema.Value.set" + | DT.Set ty -> + Printf.sprintf "fun s -> s |> List.map %s.%s |> Schema.Value.set" + _dm_to_string (OU.alias_of_ty ty) + | DT.String -> + "Schema.Value.string" + | _ -> + Printf.sprintf "fun s -> s |> %s.%s |> Schema.Value.string" + _dm_to_string (OU.alias_of_ty ty) + in + O.Let.make ~name:(OU.alias_of_ty ty) ~params:[] ~ty:"Db_interface.field_in" + ~body:[body] () + in + O.Module.make ~name:_dm_to_field + ~preamble: + [ + "exception StringEnumTypeError of string" + ; "exception DateTimeError of string" + ] + ~letrec:true + ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) + () + (** Generate a module of string to datamodel type unmarshalling functions *) let string_to_dm tys : O.Module.t = let tys = List.filter type_marshalled_in_db tys in @@ -171,6 +213,53 @@ let string_to_dm tys : O.Module.t = ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) () +let field_to_dm tys : O.Module.t = + let tys = List.filter type_marshalled_in_db tys in + (* For every type, we create a single function *) + let ty_fun ty = + let name = OU.alias_of_ty ty in + let body = + match ty with + | DT.Map (key, value) -> + let conv = + match (key, value) with + | DT.String, DT.String -> + "" + | _ -> + Printf.sprintf " |> List.map (fun (k, v) -> %s.%s k, %s.%s v)" + _string_to_dm (OU.alias_of_ty key) _string_to_dm + (OU.alias_of_ty value) + in + "fun s -> s |> Schema.CachedValue.maybe_unmarshal Schema.Type.Pairs \ + |> Schema.CachedValue.value_of |> Schema.Value.Unsafe_cast.pairs" + ^ conv + | DT.Set ty -> + let conv = + match ty with + | DT.String -> + "" + | _ -> + Printf.sprintf " |> List.map %s.%s" _string_to_dm + (OU.alias_of_ty ty) + in + "fun s -> s |> Schema.CachedValue.maybe_unmarshal Schema.Type.Set |> \ + Schema.CachedValue.value_of |> Schema.Value.Unsafe_cast.set" + ^ conv + | DT.String -> + "fun s -> s |> Schema.CachedValue.maybe_unmarshal Schema.Type.String \ + |> Schema.CachedValue.value_of |> Schema.Value.Unsafe_cast.string" + | _ -> + Printf.sprintf "fun f -> f |> Schema.CachedValue.string_of |> %s.%s" + _string_to_dm name + in + O.Let.make ~name ~params:[] ~ty:(OU.alias_of_ty ty) ~body:[body] () + in + O.Module.make ~name:_field_to_dm + ~preamble:["exception StringEnumTypeError of string"] + ~letrec:true + ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) + () + (** True if a field is actually in this table, false if stored elsewhere (ie Set(Ref _) are stored in foreign tables *) let field_in_this_table = function @@ -283,7 +372,7 @@ let open_db_module = [ "let __t = Context.database_of __context in" ; "let module DB = (val (Xapi_database.Db_cache.get __t) : \ - Xapi_database.Db_interface.DB_ACCESS) in" + Xapi_database.Db_interface.DB_ACCESS2) in" ] let db_action api : O.Module.t = @@ -331,7 +420,7 @@ let db_action api : O.Module.t = let ty_alias = OU.alias_of_ty f.DT.ty in let accessor = "find_regular" in let field_name = Escaping.escape_id f.full_name in - Printf.sprintf {|%s.%s (%s "%s")|} _string_to_dm ty_alias accessor + Printf.sprintf {|%s.%s (%s "%s")|} _field_to_dm ty_alias accessor field_name in let make_field f = @@ -433,8 +522,13 @@ let db_action api : O.Module.t = let to_string arg = let binding = O.string_of_param arg in let converter = O.type_of_param arg in - Printf.sprintf "let %s = %s.%s %s in" binding _dm_to_string converter - binding + Printf.sprintf "let %s = %s.%s %s in" binding + ( if binding = Client._self || binding = "ref" then + _dm_to_string + else + _dm_to_field + ) + converter binding in let body = match tag with @@ -445,37 +539,38 @@ let db_action api : O.Module.t = (Escaping.escape_id fld.DT.full_name) | FromField (Getter, {DT.ty; full_name; _}) -> Printf.sprintf "%s.%s (DB.read_field __t \"%s\" \"%s\" %s)" - _string_to_dm (OU.alias_of_ty ty) + _field_to_dm (OU.alias_of_ty ty) (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Add, {DT.ty= DT.Map (_, _); full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,%s) \"%s\" \"%s\" %s \ - AddMapLegacy" + "DB.process_structured_field __t (Schema.Value.marshal %s, \ + Schema.Value.marshal %s) \"%s\" \"%s\" %s AddMapLegacy" Client._key Client._value (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Add, {DT.ty= DT.Set _; full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,\"\") \"%s\" \"%s\" %s AddSet" + "DB.process_structured_field __t (Schema.Value.marshal %s,\"\") \ + \"%s\" \"%s\" %s AddSet" Client._value (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Remove, {DT.ty= DT.Map (_, _); full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,\"\") \"%s\" \"%s\" %s \ - RemoveMap" + "DB.process_structured_field __t (Schema.Value.marshal %s,\"\") \ + \"%s\" \"%s\" %s RemoveMap" Client._key (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Remove, {DT.ty= DT.Set _; full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,\"\") \"%s\" \"%s\" %s \ - RemoveSet" + "DB.process_structured_field __t (Schema.Value.marshal %s,\"\") \ + \"%s\" \"%s\" %s RemoveSet" Client._value (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) @@ -517,7 +612,9 @@ let db_action api : O.Module.t = match (x.msg_params, x.msg_result) with | [{param_name= name; _}], Some (result_ty, _) -> let query = - Printf.sprintf "DB.db_get_by_uuid __t \"%s\" %s" + Printf.sprintf + "DB.db_get_by_uuid __t \"%s\" (Schema.Value.Unsafe_cast.string \ + %s)" (Escaping.escape_obj obj.DT.name) (OU.escape name) in @@ -530,7 +627,7 @@ let db_action api : O.Module.t = ^ ")" in let query_opt = - Printf.sprintf "DB.db_get_by_uuid_opt __t \"%s\" %s" + Printf.sprintf "DB.db_get_by_uuid_opt __t \"%s\" (%s)" (Escaping.escape_obj obj.DT.name) (OU.escape name) in @@ -555,7 +652,9 @@ let db_action api : O.Module.t = match (x.msg_params, x.msg_result) with | [{param_name= name; _}], Some (Set result_ty, _) -> let query = - Printf.sprintf "DB.db_get_by_name_label __t \"%s\" %s" + Printf.sprintf + "DB.db_get_by_name_label __t \"%s\" \ + (Schema.Value.Unsafe_cast.string %s)" (Escaping.escape_obj obj.DT.name) (OU.escape name) in @@ -606,13 +705,15 @@ let db_action api : O.Module.t = | FromObject GetAllRecordsWhere -> String.concat "\n" [ - "let expr' = Xapi_database.Db_filter.expr_of_string expr in" + "let expr' = Xapi_database.Db_filter.expr_of_string \ + (Schema.Value.Unsafe_cast.string expr) in" ; "get_records_where ~" ^ Gen_common.context ^ " ~expr:expr'" ] | FromObject GetAllWhere -> String.concat "\n" [ - "let expr' = Xapi_database.Db_filter.expr_of_string expr in" + "let expr' = Xapi_database.Db_filter.expr_of_string \ + (Schema.Value.Unsafe_cast.string expr) in" ; "get_refs_where ~" ^ Gen_common.context ^ " ~expr:expr'" ] | _ -> diff --git a/ocaml/xapi/console.ml b/ocaml/xapi/console.ml index 03cb4bf9559..b812cf65c76 100644 --- a/ocaml/xapi/console.ml +++ b/ocaml/xapi/console.ml @@ -185,7 +185,7 @@ let console_of_request __context req = let db = Context.database_of __context in let is_vm, _ = let module DB = - (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS) + (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS2) in match DB.get_table_from_ref db _ref with | Some c when c = Db_names.vm -> diff --git a/ocaml/xapi/db.ml b/ocaml/xapi/db.ml index 4b4b6c2deea..f343086a2d2 100644 --- a/ocaml/xapi/db.ml +++ b/ocaml/xapi/db.ml @@ -23,5 +23,5 @@ let is_valid_ref __context r = false else let t = Context.database_of __context in - let module DB = (val Db_cache.get t : Db_interface.DB_ACCESS) in + let module DB = (val Db_cache.get t : Db_interface.DB_ACCESS2) in DB.is_valid_ref t (Ref.string_of r) diff --git a/ocaml/xapi/db_gc_util.ml b/ocaml/xapi/db_gc_util.ml index 202b51cc5eb..bcacc7d86c0 100644 --- a/ocaml/xapi/db_gc_util.ml +++ b/ocaml/xapi/db_gc_util.ml @@ -28,7 +28,7 @@ let gc_connector ~__context get_all get_record valid_ref1 valid_ref2 delete_record = let db = Context.database_of __context in let module DB = - (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS) + (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS2) in let all_refs = get_all ~__context in let do_gc ref = diff --git a/ocaml/xapi/eventgen.ml b/ocaml/xapi/eventgen.ml index 753bb8fdf7b..810b30bd80b 100644 --- a/ocaml/xapi/eventgen.ml +++ b/ocaml/xapi/eventgen.ml @@ -107,7 +107,7 @@ open Xapi_database.Db_action_helper let is_valid_ref db = function | Schema.Value.String r -> ( try - ignore (Database.table_of_ref r db) ; + ignore (Database.table_of_ref (r :> string) db) ; true with _ -> false ) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 2ef16112053..8271d45c3cf 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -1349,13 +1349,19 @@ let vm_to_string __context vm = raise (Api_errors.Server_error (Api_errors.invalid_value, [str])) ; let t = Context.database_of __context in let module DB = - (val Xapi_database.Db_cache.get t : Xapi_database.Db_interface.DB_ACCESS) + (val Xapi_database.Db_cache.get t : Xapi_database.Db_interface.DB_ACCESS2) in - let fields = fst (DB.read_record t Db_names.vm str) in + let fields, _ = DB.read_record t Db_names.vm str in let sexpr = SExpr.Node (List.map - (fun (key, value) -> SExpr.Node [SExpr.String key; SExpr.String value]) + (fun (key, value) -> + SExpr.Node + [ + SExpr.String key + ; SExpr.String (Schema.CachedValue.string_of value) + ] + ) fields ) in diff --git a/ocaml/xapi/xapi_vm_snapshot.ml b/ocaml/xapi/xapi_vm_snapshot.ml index a7fc76a8417..fe7c7bed9dd 100644 --- a/ocaml/xapi/xapi_vm_snapshot.ml +++ b/ocaml/xapi/xapi_vm_snapshot.ml @@ -167,8 +167,10 @@ let copy_vm_fields ~__context ~metadata ~dst ~do_not_copy ~overrides = debug "copying metadata into %s" (Ref.string_of dst) ; let db = Context.database_of __context in let module DB = - (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS) - in + Xapi_database.Db_interface_compat.OfCached + (( val Xapi_database.Db_cache.get db + : Xapi_database.Db_interface.DB_ACCESS2 + )) in List.iter (fun (key, value) -> let value = Option.value ~default:value (List.assoc_opt key overrides) in From 7ad042e53b215c043d4d3f0ce9fc0a84ada7fa7a Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 14 May 2025 13:39:25 +0100 Subject: [PATCH 194/492] CP-307865 accept SHA512 for custom server certs A user can install server certificates to secure the connection between xapi and their API clients. So far we demanded SHA256 certificates. Accept SHA512 in addition. The patch renames the predicate to no longer imply that we only accept SHA256. Signed-off-by: Christian Lindig --- ocaml/gencert/lib.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/gencert/lib.ml b/ocaml/gencert/lib.ml index cd964276e65..d0794633de9 100644 --- a/ocaml/gencert/lib.ml +++ b/ocaml/gencert/lib.ml @@ -93,9 +93,9 @@ let validate_pem_chain ~pem_leaf ~pem_chain now private_key = | _ -> Error (`Msg (server_certificate_key_mismatch, [])) in - let ensure_sha256_signature_algorithm certificate = + let ensure_signature_algorithm certificate = match X509.Certificate.signature_algorithm certificate with - | Some (_, `SHA256) -> + | Some (_, (`SHA256 | `SHA512)) -> Ok certificate | _ -> Error (`Msg (server_certificate_signature_not_supported, [])) @@ -116,7 +116,7 @@ let validate_pem_chain ~pem_leaf ~pem_chain now private_key = ~error_not_yet:server_certificate_not_valid_yet ~error_expired:server_certificate_expired >>= ensure_keys_match private_key - >>= ensure_sha256_signature_algorithm + >>= ensure_signature_algorithm >>= fun cert -> match Option.map validate_chain pem_chain with | None -> From 1d12b8e186f3183974375a6a9dfb04ce32fe1916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 11 Dec 2024 22:51:36 +0000 Subject: [PATCH 195/492] CP-308049: rrdview tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Viewing RRDs produced by xcp-rrdd is difficult, because the format is incompatible with rrdtool. rrdtool has a hardcoded limit of 20 char for RRD names for backward compat with its binary format. Steps: * given a directory of xml .gz files containing xcp-rrdd produced rrds * invokes itself recursively with each file in turn using xargs -P (easy way to parallelize on OCaml 4) * load all RRDs, and split them into separate files, allowing us to shorten many of their names without conflicts * some names are still too long, there is a builtin translation table to shorten these * once split an .rrd file is created using 'rrdtool restore'. This can further be queried/inspected/transformed by rrdtool as needed * a .sh script is produced that can plot the RRD if desired. There are many RRDs so plotting isn't done automatically yet. RRDs contain min/avg/max usually, so this is drawn as a strong line at the average, and an area in a lighter color for min/max (especially useful for historic data that has been aggregated). Caveats: * we don't know the unit name, that is part of the XAPI metadata, but not the XML apparently? * separate plots are generated for separate intervals, it'd be nice to join all these into the same graph * the visualization type is not the best for all RRDs, some might benefit from a smoother line, etc. * for now the tool is just built, but not installed (that'll require a .spec change too and can be done later) This is just a starting point to be able to visualize this data somehow, and we can improve the actual plotting later. Signed-off-by: Edwin Török --- ocaml/xcp-rrdd/bin/rrdview/dune | 17 + ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml | 83 ++++ ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli | 88 +++++ ocaml/xcp-rrdd/bin/rrdview/rrdview.ml | 483 ++++++++++++++++++++++++ ocaml/xcp-rrdd/bin/rrdview/rrdview.mli | 0 5 files changed, 671 insertions(+) create mode 100644 ocaml/xcp-rrdd/bin/rrdview/dune create mode 100644 ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml create mode 100644 ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli create mode 100644 ocaml/xcp-rrdd/bin/rrdview/rrdview.ml create mode 100644 ocaml/xcp-rrdd/bin/rrdview/rrdview.mli diff --git a/ocaml/xcp-rrdd/bin/rrdview/dune b/ocaml/xcp-rrdd/bin/rrdview/dune new file mode 100644 index 00000000000..e2b2401ff76 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/dune @@ -0,0 +1,17 @@ +(executable + (modes byte exe) + (name rrdview) + ;(public_name rrdview) + (libraries + threads + xapi-rrd.unix + bos.setup + astring + fpath + rresult + xmlm + tyre + xapi-rrd + result) + ;(package xapi-tools) + ) diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml new file mode 100644 index 00000000000..80717c21e36 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml @@ -0,0 +1,83 @@ +open Rrd + +type vname = VName of string + +module Rpn = struct + module VDef = struct + (* see rrdgraph_rpn(3) *) + type t = vname * string + + type op = vname -> t + + let op kind vname = (vname, kind) + + let maximum = op "MAXIMUM" + + let minimum = op "MINIMUM" + + let average = op "AVERAGE" + + let stdev = op "STDEV" + + let last = op "LAST" + + let first = op "FIRST" + + let total = op "TOTAL" + + let percent = op "PERCENT" + + let percentnan = op "PERCENTNAN" + + let lsl_slope = op "LSLSLOPE" + + let lsl_intercept = op "LSLSLINT" + + let lsl_correlation = op "LSLCORREL" + end + + module CDef = struct + type t = string Seq.t (* stores a serialized RPN expression *) + + let to_string r = r |> List.of_seq |> String.concat "," + + let vname (VName vname) = Seq.return vname + + let value f = Printf.sprintf "%g" f |> Seq.return + + (* reverse polish notation: arguments first, operator last *) + + let opn op args = Seq.append (List.to_seq args |> Seq.concat) (Seq.return op) + + let op1 op arg = opn op [arg] + + let op2 op arg1 arg2 = opn op [arg1; arg2] + + let op3 op arg1 arg2 arg3 = opn op [arg1; arg2; arg3] + end +end + +module Data = struct + type t = string + + (* see rrdgraph_data (3) *) + + let def vname rrdfile rrd rra ds = + let step = Int64.mul rrd.timestep @@ Int64.of_int rra.rra_pdp_cnt in + ( VName vname + , String.concat ":" + [ + "DEF" + ; vname ^ "=" ^ Fpath.to_string rrdfile + ; ds.ds_name + ; Rrd.cf_type_to_string rra.rra_cf + ; Printf.sprintf "step=%Lu" step + ] + ) + + let vdef vname (VName var, rpnvdefop) = + (VName vname, Printf.sprintf "CDEF:%s=%s,%s" vname var rpnvdefop) + + let cdef vname rpn = + (VName vname, Printf.sprintf "CDEF:%s=%s" vname (Rpn.CDef.to_string rpn)) +end diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli new file mode 100644 index 00000000000..0c4ac9738e9 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli @@ -0,0 +1,88 @@ +(** a variable name *) +type vname + +module Rpn : sig + (** RPN expressions for VDEF statements, see [rrdgraph_rpn(3)] *) + module VDef : sig + (** an RPN expression for VDEF, see [rrdgraph_data(3)] *) + type t + + (** a VDEF RPN expression, see [rrdgraph_rpn(3)] *) + type op = vname -> t + + val maximum : op + (** see [rrdgraph_rpn(3)] *) + + val minimum : op + (** see [rrdgraph_rpn(3)] *) + + val average : op + (** see [rrdgraph_rpn(3)] *) + + val stdev : op + (** see [rrdgraph_rpn(3)] *) + + val last : op + (** see [rrdgraph_rpn(3)] *) + + val first : op + (** see [rrdgraph_rpn(3)] *) + + val total : op + (** see [rrdgraph_rpn(3)] *) + + val percent : op + (** see [rrdgraph_rpn(3)] *) + + val percentnan : op + (** see [rrdgraph_rpn(3)] *) + + val lsl_slope : op + (** see [rrdgraph_rpn(3)] *) + + val lsl_intercept : op + (** see [rrdgraph_rpn(3)] *) + + val lsl_correlation : op + (** see [rrdgraph_rpn(3)] *) + end + + module CDef : sig + (** an RPN expression for CDEF, see [rrdgraph_data(3)] *) + type t + + val vname : vname -> t + (** [vname v] is [v] as an RPN expression *) + + val value : float -> t + (** [value v] is [v] as an RPN expression *) + + val op1 : string -> t -> t + (** [op1 op arg1] is [op arg1]. For valid operators see [rrdgraph_rpn(3)] *) + + val op2 : string -> t -> t -> t + (** [op2 op arg1 arg2] is [op arg1 arg2]. For valid operators see [rrdgraph_rpn(3)] *) + + val op3 : string -> t -> t -> t -> t + (** [op3 op arg1 arg2 arg3] is [op arg1 arg2 arg3]. For valid operators see [rrdgraph_rpn(3)] *) + end +end + +module Data : sig + (** an rrd graph data definition, see [rrdgraph_data(3)] *) + type t + + val def : string -> Fpath.t -> Rrd.rrd -> Rrd.rra -> Rrd.ds -> vname * t + (** [def vname rrdfile rrd rra datasource] is a [DEF] (see [rrdgraph_data(3)]) that loads + [datasource.ds_name] from the [rrdfile] and plots it according to the consolidation function in the + specified [rra] and timestep calculated based on [rrd]. This data can be refered to as [vname] + elsewhere. *) + + val vdef : string -> Rpn.VDef.t -> vname * t + (** [vdef vname vdefrpn] defines [vname] through a [VDEF] (see [rrdgraph_data(3)]) using the + specified [vdefrpn] expression. Conversion to RPN form is handled internally. *) + + val cdef : string -> Rpn.CDef.t -> vname * t + (** [cdef vname cdefrpn] defines [vname] through a [CDEF] (see [rrdgraph_data(3)]) using the + specified [cdefrpn] expression. Conversion to RPN form is handled internally. *) +end diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdview.ml b/ocaml/xcp-rrdd/bin/rrdview/rrdview.ml new file mode 100644 index 00000000000..3716f4cfded --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/rrdview.ml @@ -0,0 +1,483 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Bos_setup + +type def = Def of string * Rrd.cf_type | Cdef of string + +let name ~ds_name ~cf_type = + cf_type + |> Rrd.cf_type_to_string + |> String.Ascii.lowercase + |> Printf.sprintf "%s_%s" ds_name + +type ds_def = {units: string option} + +let default_def = {units= None} + +let def ~data ~step ~ds_name ~cf_type = + let cfstr = Rrd.cf_type_to_string cf_type in + let namestr = name ~ds_name ~cf_type in + ( Def (ds_name, cf_type) + , Printf.sprintf "DEF:%s=%s:%s:%s:step=%Ld" namestr (Fpath.to_string data) + ds_name cfstr step + ) + +type ds = Ds : string -> ds + +type cdef = Op of cdef * string * cdef | Var of def + +let rec string_of_cdef = function + | Op (lhs, op, rhs) -> + String.concat ~sep:"," [string_of_cdef lhs; string_of_cdef rhs; op] + | Var (Def (ds_name, cf_type)) -> + name ~ds_name ~cf_type + | Var (Cdef s) -> + s + +let cdef name ops = + (Cdef name, Printf.sprintf "CDEF:%s=%s" name @@ string_of_cdef ops) + +type rgb = {r: int; g: int; b: int; alpha: int option} + +type fill = RGB of rgb + +let shape ?(stack = false) kind ?label ~def fill = + let defstr = + match def with + | Def (ds_name, cf_type) -> + name ~ds_name ~cf_type + | Cdef str -> + str + in + let fillstr = + match fill with + | Some (RGB {r; g; b; alpha}) -> + Printf.sprintf "#%02x%02x%02x%s" r g b + (Option.fold ~none:"" ~some:(Printf.sprintf "%02u") alpha) + | None -> + "" + in + Printf.sprintf "%s:%s%s%s%s" kind defstr fillstr + (if stack then ":STACK" else "") + (match label with None -> "" | Some x -> ":" ^ x) + +let area = shape "AREA" + +let area_stack = shape ~stack:true "AREA" + +let line ?label = shape ?label "LINE" + +(* colors from rrdtool wiki OutlinedAreaGraph *) +let rgb ?alpha hex = + let r = (hex lsr 16) land 0xff + and g = (hex lsr 8) land 0xff + and b = hex land 0xff in + RGB {r; g; b; alpha} + +let rgb light dark = (rgb light, rgb dark) + +let colors = + [| + rgb 0x54EC48 0x24BC14 + ; rgb 0x48C4EC 0x1598C3 + ; rgb 0xDE48EC 0xB415C7 + ; rgb 0x7648EC 0x4D18E4 + ; rgb 0xEA644A 0xCC3118 + ; rgb 0xEC9D48 0xCC7016 + ; rgb 0xECD748 0xC9B215 + |] + +let get_color ~dark i = + let RGB col_light, col_dark = colors.(i mod Array.length colors) in + Some (if dark then col_dark else RGB {col_light with alpha= Some 50}) + +let rrdtool ~filename ~data title ~ds_names ~first ~last ~step ~width + ~has_min_max = + let graph = + List.of_seq + (ds_names + |> List.mapi (fun x s -> (s, x)) + |> List.to_seq + |> Seq.flat_map @@ fun (ds_name, i) -> + Seq.append + ( if has_min_max then + let ds_min, def1 = def ~step ~data ~ds_name ~cf_type:Rrd.CF_Min + and ds_max, def2 = + def ~step ~data ~ds_name ~cf_type:Rrd.CF_Max + in + let ds_range, cdef1 = + cdef (ds_name ^ "range") (Op (Var ds_max, "-", Var ds_min)) + in + List.to_seq + [ + def1 + ; def2 + ; cdef1 + ; area ~def:ds_min None + ; area_stack ~def:ds_range @@ get_color ~dark:false i + ] + else + Seq.empty + ) + (let ds_avg, def3 = + def ~step ~data ~ds_name ~cf_type:Rrd.CF_Average + in + List.to_seq + [def3; line ~label:ds_name ~def:ds_avg @@ get_color ~dark:true i] + ) + ) + in + Cmd.( + v "rrdtool" + % "graph" + % "--imgformat" + % "SVG" + % Fpath.to_string filename + % "--title" + % title + % "--width" + % string_of_int width + % "--height" + % "256" (* ~4 rows *) + % "--start" + % Int64.to_string first + % "--end" + % Int64.to_string last + %% of_list graph + ) + +let prepare_plot_cmds ~filename ~data rrd = + let open Rrd in + let has cf rra = rra.rra_cf = cf in + let has_min = + Array.find_opt (has Rrd.CF_Min) rrd.rrd_rras |> Option.is_some + in + let has_max = + Array.find_opt (has Rrd.CF_Max) rrd.rrd_rras |> Option.is_some + in + rrd.rrd_rras + |> Array.to_seq + |> Seq.map @@ fun rra -> + let timespan = + Int64.mul (Int64.of_int (rra.rra_pdp_cnt * rra.rra_row_cnt)) rrd.timestep + in + let start = rrd.last_updated -. Int64.to_float timespan in + let filename = + Fpath.add_ext (Int64.to_string timespan) filename |> Fpath.add_ext "svg" + in + let title = + Fpath.rem_ext filename + |> Fpath.basename + |> String.cuts ~sep:"." + |> String.concat ~sep:"
" + in + let step = Int64.(mul (of_int rra.rra_pdp_cnt) rrd.timestep) in + let width = 2 * rra.rra_row_cnt in + (* 1 point = 1 CDP from the RRA *) + (* TODO: could look up original names in original_ds *) + rrdtool ~step ~width ~data ~filename title ~ds_names:(ds_names rrd) + ~has_min_max:(has_min && has_max) ~first:(Int64.of_float start) + ~last:(Int64.of_float rrd.last_updated) + +let prepare_plots ?(exec = false) ~filename ~data rrd = + let output = Fpath.set_ext ".sh" filename in + let cmds = prepare_plot_cmds ~filename ~data rrd in + if exec then + cmds + |> Seq.iter @@ fun cmd -> + OS.Cmd.run cmd + |> Logs.on_error_msg ~use:(fun () -> failwith "failed to run rrdtool") + else + cmds + |> Seq.map Cmd.to_string + |> List.of_seq + |> OS.File.write_lines output + |> Logs.on_error_msg ~use:(fun _ -> exit 2) + +let finally f ~(always : unit -> unit) = + match f () with + | result -> + always () ; result + | exception e -> + always () ; raise e + +let with_input_file path f = + if Fpath.has_ext "gz" path then + let cmd = Cmd.(v "zcat" % p path) in + let ic = cmd |> Cmd.to_string |> Unix.open_process_in in + finally + (fun () -> f ic) + ~always:(fun () -> + let (_ : Unix.process_status) = Unix.close_process_in ic in + () + ) + else + let ic = open_in Fpath.(to_string path) in + finally (fun () -> f ic) ~always:(fun () -> close_in ic) + +let with_input_rrd f filename = + with_input_file filename @@ fun ic -> + Logs.info (fun m -> m "Parsing RRD %a" Fpath.pp filename) ; + let input = Xmlm.make_input (`Channel ic) in + let rrd = Rrd.from_xml input in + f ~filename rrd + +(* to avoid mixing data source and filenames we use a different type here *) + +let make_ds ?filename dsname = + let dsname = + if String.length dsname >= 20 then ( + Logs.warn (fun m -> + m "RRD data source name exceeds 20 char limit: %s" dsname + ) ; + String.with_range dsname ~len:19 + ) else + dsname + in + (Option.map Fpath.v filename, Ds dsname) + +let make_sr (dsname, uuid) = make_ds ~filename:("_sr_" ^ uuid) dsname + +let make_vbd (vbd, dsname) = make_ds ~filename:vbd dsname + +let make_runstate dsname = make_ds ~filename:"runstate" dsname + +(* top-level value to compile regexes only once *) +let classify = + (* some RRD data source names are too long, max is 20 chars. + Splitting RRDs into different files allows to shorten the names, + e.g. remove the UUID from SR datasources. + Some names are still too long, but those can be shortened without losing information. *) + let open Tyre in + let uuid8 = pcre "[0-9a-f]{8}" in + let uuid_rest = pcre "(-[0-9a-f]{4}){3}-[0-9a-f]{12}" in + let dsname = pcre "[a-zA-Z_]+" in + let shorten from target = str from --> fun () -> make_ds target in + [ + (dsname <&> char '_' *> uuid8) --> make_sr + ; (str "sr_" *> uuid8 <* uuid_rest <* char '_' <&> dsname) --> make_sr + ; shorten "Tapdisks_in_low_memory_mode" "Tapdisks_in_lowmem" + ; ( (opt dsname <* str "memory_" <&> dsname) --> fun (pre, post) -> + make_ds (Option.value ~default:"" pre ^ "mem_" ^ post) + ) + ; (pcre "vbd_[^_]+" <* char '_' <&> dsname) --> make_vbd + ; (str "runstate_" *> dsname) --> make_runstate + ; ( (str "cpu" *> int <&> opt @@ (str "-C" *> int)) --> fun (cpuidx, cstate) -> + let filename = + match cstate with None -> "cpu" | Some n -> Printf.sprintf "cpu-C%d" n + in + make_ds ~filename ("cpu" ^ string_of_int cpuidx) + ) + ; (str "cpu_avg" --> fun () -> make_ds ~filename:"cpu_avg" "cpu_avg") + ; (pcre "pif_" *> dsname) --> make_ds ~filename:"pif" + (* TODO: could provide info on polarity based on rx/tx and on kind, TICK for errors *) + ] + |> route + +let classify_dsname dsname = + let error _ = make_ds dsname in + dsname |> Tyre.exec classify |> Result.fold ~ok:Fun.id ~error + +let classify ~ds_def ~filename ds = + let open Rrd in + let override, dsname = classify_dsname ds.ds_name in + let pathname = + let name = Fpath.rem_ext filename in + match override with + | None -> + Fpath.(name + "_filtered") + | Some newname -> + Fpath.(name + to_string newname) + in + (* Logs.debug (fun m -> m "%s -> %a" ds.ds_name Fpath.pp pathname); *) + let def = + StringMap.find_opt ds.ds_name ds_def |> Option.value ~default:default_def + in + (* can only plot graphs with same units *) + let extra = + match def.units with + | None -> + (* use RRD type as approximation to "same unit", at least same kind of unit, + e.g. rate vs duration *) + Rrd.ds_type_to_string ds.ds_ty + | Some u -> + String.take ~sat:Char.Ascii.is_alphanum u + in + (Fpath.(pathname + extra |> add_ext "xml"), dsname) + +let rrdtool = + OS.Cmd.resolve (Cmd.v "rrdtool") + |> Logs.on_error_msg ~use:(fun () -> failwith "rrdtool is not installed") + +let rrd_restore filename rrd = + let filename = Fpath.set_ext "xml" filename in + Logs.debug (fun m -> m "Writing RRD xml to %a" Fpath.pp filename) ; + let () = + Out_channel.with_open_text (Fpath.to_string filename) @@ fun ch -> + Rrd_unix.to_fd rrd (Unix.descr_of_out_channel ch) + in + let dot_rrd = Fpath.set_ext "rrd" filename in + Logs.debug (fun m -> m "Restoring RRD to %a" Fpath.pp dot_rrd) ; + Cmd.(rrdtool % "restore" % "-f" % p filename % p dot_rrd) + |> OS.Cmd.run + |> Result.map (fun () -> dot_rrd) + +let split_rrd ~ds_def ~filename rrd = + let open Rrd in + let rrds = Hashtbl.create 3 in + let original_ds = Hashtbl.create 127 in + + (* split the rrd into multiple rrds based on data source name *) + let () = + Logs.info (fun m -> m "classifying data sources") ; + rrd.rrd_dss + |> Array.iteri @@ fun i ds -> + let filename, Ds ds_name = classify ~ds_def ~filename ds in + let get_i rra = (rra.rra_data.(i), rra.rra_cdps.(i)) in + let previous = + Hashtbl.find_opt rrds filename |> Option.value ~default:[] + in + Hashtbl.replace original_ds ds_name ds ; + Hashtbl.replace rrds filename + @@ (({ds with ds_name}, Array.map get_i rrd.rrd_rras) :: previous) + in + Logs.info (fun m -> m "Building and restoring RRDs") ; + (* now build an RRD and restore it to binary .rrd form *) + rrds + |> Hashtbl.iter @@ fun filename lst -> + Logs.debug (fun m -> m "Building %a" Fpath.pp filename) ; + let rrd_dss, rrd_rras = List.split lst in + let rrd_rras = + rrd.rrd_rras + |> Array.mapi @@ fun i rra -> + let rra_seq = List.to_seq rrd_rras in + let geti a = a.(i) in + { + rra with + rra_data= rra_seq |> Seq.map geti |> Seq.map fst |> Array.of_seq + ; rra_cdps= rra_seq |> Seq.map geti |> Seq.map snd |> Array.of_seq + } + in + let rrd = {rrd with rrd_dss= Array.of_list rrd_dss; rrd_rras} in + let data = + rrd_restore filename rrd + |> Logs.on_error_msg ~use:(fun () -> failwith "Failed to restore RRD") + in + prepare_plots ~filename ~data rrd + +type mode = Split | Default | Plot + +let parse_ds_def def k v = + match k with "units" when v <> "unknown" -> {units= Some v} | _ -> def + +let parse_ds_defs path = + Logs.info (fun m -> m "Loading data source definitions from %a" Fpath.pp path) ; + let fields line = + line + |> String.cut ~sep:":" + |> Option.map @@ fun (k, v) -> (String.trim k, String.trim v) + in + let fold (map, key_opt) line = + match (fields line, key_opt) with + | Some ("name_label", ds_name), None -> + (map, Some ds_name) (* start parsing new item *) + | _, None -> + (map, None) (* ignore *) + | None, Some _ -> + (map, None) + | Some (k, v), Some ds_name -> + let map = + map + |> Rrd.StringMap.update ds_name @@ fun def -> + Some (parse_ds_def (Option.value ~default:default_def def) k v) + in + (map, Some ds_name) + in + OS.File.fold_lines fold (Rrd.StringMap.empty, None) path + |> Logs.on_error_msg ~use:(fun _ -> + failwith "Could not parse datasource definitions" + ) + |> fst + +let plot_rrd ~filename rrd = + let data = + rrd_restore filename rrd + |> Logs.on_error_msg ~use:(fun () -> failwith "Failed to restore RRD") + in + prepare_plots ~exec:true ~filename ~data rrd + +let () = + let open OS.Arg in + let level = + let conv = + conv ~docv:"LEVEL" Logs.level_of_string Fmt.(option Logs.pp_level) + in + opt ~doc:"Set log level" ["log"] conv ~absent:(Some Logs.Debug) + in + let mode = + opt + ~doc: + "Used in self-invocation to split rrd into multiple rrds, or to plot \ + an already split rrd" + ["mode"] ~absent:Default + @@ enum [("split", Split); ("plot", Plot); ("default", Default)] + in + + let data_source_list = + opt ~doc:"Load data source definitions" ~docv:"PATH" ["def"] ~absent:None + (some path) + in + let paths = + OS.Arg.( + parse ~doc:"Split and plot xcp-rrdd XML rrd.gz with rrdtool" ~pos:path () + ) + in + + Logs.set_level level ; + let ds_def = + Option.map parse_ds_defs data_source_list + |> Option.value ~default:Rrd.StringMap.empty + in + match mode with + | Default -> + let cmd = + Cmd.( + v "find" %% of_values p paths % "-name" % "*.gz" % "-print0" + |> OS.Cmd.run_out + ) + in + (* TODO: forward level *) + let xargs = + Cmd.( + v "xargs" + % "-0" + % "-P0" + % "-n1" + % Sys.executable_name + %% of_values ~slip:"--def" p (Option.to_list data_source_list) + % "--mode=split" + |> OS.Cmd.run_in + ) + in + let res = + OS.Cmd.out_run_in cmd + |> Logs.on_error_msg ~use:(fun _ -> exit 1) + |> xargs + in + Logs.on_error_msg ~use:(fun _ -> exit 1) res + | Split -> + paths |> List.iter @@ with_input_rrd (split_rrd ~ds_def) + | Plot -> + paths |> List.iter @@ with_input_rrd plot_rrd diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdview.mli b/ocaml/xcp-rrdd/bin/rrdview/rrdview.mli new file mode 100644 index 00000000000..e69de29bb2d From 8deed51b1bbcc4ca0e54bda35d89da5624b5ba2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 11 Dec 2024 22:51:36 +0000 Subject: [PATCH 196/492] CP-308049: rrdview tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 'tyre' dependency. Signed-off-by: Edwin Török --- dune-project | 1591 ++++++++++++++++++++++-------------------- opam/xapi-tools.opam | 1 + 2 files changed, 838 insertions(+), 754 deletions(-) diff --git a/dune-project b/dune-project index 5ad3b3a0ff3..56de01f0fd3 100644 --- a/dune-project +++ b/dune-project @@ -1,771 +1,854 @@ (lang dune 3.15) -(formatting (enabled_for ocaml)) +(formatting + (enabled_for ocaml)) + (using menhir 2.0) + (using directory-targets 0.1) + (opam_file_location inside_opam_directory) (cram enable) + (implicit_transitive_deps false) + (generate_opam_files true) (name "xapi") -(source (github xapi-project/xen-api)) + +(source + (github xapi-project/xen-api)) + (license "LGPL-2.1-only WITH OCaml-LGPL-linking-exception") + (authors "xen-api@lists.xen.org") -(maintainers "Xapi project maintainers") -(homepage "https://xapi-project.github.io/") -(package - (name zstd) -) - - -(package - (name clock) - (synopsis "Xapi's library for managing time") - (authors "Jonathan Ludlam" "Pau Ruiz Safont") - (depends - (ocaml (>= 4.12)) - (alcotest :with-test) - astring - fmt - mtime - ptime - (xapi-log (= :version)) - (qcheck-core :with-test) - (qcheck-alcotest :with-test) - ) -) - -(package - (name tgroup) - (depends - xapi-log - xapi-stdext-unix) -) - -(package - (name xml-light2) -) - -(package - (name xapi-sdk) - (license "BSD-2-Clause") - (synopsis "Xen API SDK generation code") - (depends - (alcotest :with-test) - astring - (fmt :with-test) - mustache - (xapi-datamodel (= :version)) - (xapi-stdext-unix (and (= :version) :with-test)) - (xapi-test-utils :with-test) - ) - (allow_empty) -) -(package - (name xen-api-client-lwt) -) - - -(package - (name xen-api-client) - (synopsis "Xen-API client library for remotely-controlling a xapi host") - (authors "David Scott" "Anil Madhavapeddy" "Jerome Maloberti" "John Else" "Jon Ludlam" "Thomas Sanders" "Mike McClurg") - (depends - (alcotest :with-test) - astring - (cohttp (>= "0.22.0")) - re - rpclib - uri - (uuid (= :version)) - (xapi-client (= :version)) - (xapi-idl (= :version)) - (xapi-rrd (= :version)) - (xapi-types (= :version)) - xmlm - ) -) - -(package - (name xe) -) - -(package - (name xapi-types) -) - -(package - (name xapi-tracing) - (depends - ocaml - dune - (alcotest :with-test) - (fmt :with-test) - ppx_deriving_yojson - re - uri - (uuid :with-test) - (xapi-log (= :version)) - (xapi-stdext-threads (= :version)) - yojson - ) - (synopsis "Allows to instrument code to generate tracing information") - (description "This library provides modules to allow gathering runtime traces.") -) - -(package - (name xapi-tracing-export) - (depends - ocaml - cohttp-posix - dune - cohttp - ptime - result - rresult - rpclib - ppx_deriving_rpc - uri - (xapi-log (= :version)) - (xapi-open-uri (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-tracing (= :version)) - (zstd (= :version)) - ) - (synopsis "Export traces in multiple protocols and formats") - (description "This library export traces is able to push traces to http endpoints or generate compressed tarballs in the filesystem.") -) +(maintainers "Xapi project maintainers") -(package - (name xapi-storage-script) -) +(homepage "https://xapi-project.github.io/") (package - (name xapi-storage-cli) - (depends - cmdliner - re - rpclib - ppx_deriving_rpc - (xapi-client (= :version)) - (xapi-idl (= :version)) - (xapi-types (= :version)) - ) - (synopsis "A CLI for xapi storage services") - (description "The CLI allows you to directly manipulate virtual disk images, without them being attached to VMs.") -) - -(package - (name xapi-storage) -) - -(package - (name xapi-schema) -) - -(package - (name rrdd-plugin) - (synopsis "A plugin library for the xapi performance monitoring daemon") - (description "This library allows one to expose a datasource which can then be sampled by the performance monitoring daemon.") - (depends - ocaml - astring - rpclib - (rrd-transport (= :version)) - (xapi-forkexecd (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-std (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-idl (= :version)) - xenstore - xenstore_transport - ) -) - -(package - (name xapi-open-uri) -) - -(package - (name xapi-nbd) -) - -(package - (name xapi-log) -) - -(package - (name xapi-idl) -) - -(package - (name xapi-forkexecd) - (synopsis "Sub-process control service for xapi") - (description "This daemon creates and manages sub-processes on behalf of xapi.") - (depends - astring - (forkexec (= :version)) - (uuid (= :version)) - (xapi-stdext-unix (= :version)) - ) -) - -(package - (name xapi-expiry-alerts) -) - -(package - (name xapi-datamodel) -) - -(package - (name xapi-consts) -) - -(package - (name xapi-compression) -) - -(package - (name xapi-client) -) - -(package - (name xapi-cli-protocol) -) - -(package - (name xapi-debug) - (synopsis "Debugging tools for XAPI") - (description "Tools installed into the non-standard /opt/xensource/debug location") - (depends - alcotest - angstrom - astring - base64 - cmdliner - cohttp - cstruct - ctypes - domain-name - fd-send-recv - fmt - hex - integers - ipaddr - logs - magic-mime - mirage-crypto - mirage-crypto-pk - mirage-crypto-rng - mtime - pci - polly - ppx_deriving - ppx_deriving_rpc - ppx_sexp_conv - psq - ptime - qcheck-alcotest - qcheck-core - re - result - rpclib - rrdd-plugin - rresult - sexplib - sexplib0 - sha - tar - tar-unix - uri - uuidm - uutf - x509 - xapi-backtrace - xapi-log - xapi-types - xapi-stdext-pervasives - xapi-stdext-unix - xen-api-client - xen-api-client-lwt - xenctrl - xenstore_transport - xmlm - yojson - ) -) - -(package - (name xapi-tools) - (synopsis "Various daemons and CLI applications required by XAPI") - (description "Includes message-switch, xenopsd, forkexecd, ...") - (depends - astring - base64 - cmdliner - cstruct-unix - fmt - logs - lwt - mtime - netlink - qmp - re - result - rpclib - rresult - uri - xenctrl - xmlm - yojson - ; can't use '= version' here yet, - ; 'xapi-tools' will have version ~dev, not 'master' like all the others - ; because it is not in xs-opam yet - rrd-transport - rrdd-plugin - xapi-tracing-export - xen-api-client - (alcotest :with-test) - (ppx_deriving_rpc :with-test) - (qcheck-core :with-test) - (xapi-test-utils :with-test) - (xenstore_transport :with-test) - ) -) - -(package - (name xapi) - (synopsis "The toolstack daemon which implements the XenAPI") - (description "This daemon exposes the XenAPI and is used by clients such as 'xe' and 'XenCenter' to manage clusters of Xen-enabled hosts.") - (depends - (ocaml (>= 4.09)) - (alcotest :with-test) - angstrom - astring - base-threads - base64 - (bos :with-test) - cdrom - (clock (= :version)) - cmdliner - cohttp - conf-pam - (crowbar :with-test) - cstruct - ctypes - ctypes-foreign - domain-name - (ezxenstore (= :version)) - fmt - fd-send-recv - hex - (http-lib (and :with-test (= :version))) ; the public library is only used for testing - integers - ipaddr - logs - magic-mime - mirage-crypto - mirage-crypto-pk - (mirage-crypto-rng (>= "0.11.0")) - (message-switch-unix (= :version)) - mtime - opentelemetry-client-ocurl - pci - (pciutil (= :version)) - polly - ppx_deriving_rpc - ppx_sexp_conv - ppx_deriving - psq - ptime - qcheck-alcotest - qcheck-core - re - result - rpclib - (rrdd-plugin (= :version)) - rresult - sexpr - sexplib - sexplib0 - sha - (stunnel (= :version)) - tar - tar-unix - uri - tgroup - (uuid (= :version)) - uutf - uuidm - x509 - xapi-backtrace - (xapi-client (= :version)) - (xapi-cli-protocol (= :version)) - (xapi-consts (= :version)) - (xapi-datamodel (= :version)) - (xapi-expiry-alerts (= :version)) - (xapi-idl (= :version)) - (xapi-inventory (= :version)) - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-std (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-stdext-zerocheck (= :version)) - (xapi-test-utils :with-test) - (xapi-tracing (= :version)) - (xapi-tracing-export (= :version)) - (xapi-types (= :version)) - (xen-api-client-lwt (= :version)) - xenctrl ; for quicktest - xenstore_transport - xmlm - (xml-light2 (= :version)) - yojson - (zstd (= :version)) - ) -) - -(package - (name vhd-tool) - (synopsis "Manipulate .vhd files") - (tags ("org.mirage" "org:xapi-project")) - (depends - (alcotest-lwt :with-test) - astring - bigarray-compat - cmdliner - cohttp - cohttp-lwt - conf-libssl - (cstruct (>= "3.0.0")) - (ezxenstore (= :version)) - (forkexec (= :version)) - io-page - lwt - lwt_ssl - nbd - nbd-unix - ppx_cstruct - ppx_deriving_rpc - re - result - rpclib - ssl - sha - tar - uri - (vhd-format (= :version)) - (vhd-format-lwt (= :version)) - (xapi-idl (= :version)) - (xapi-log (= :version)) - (xen-api-client-lwt (= :version)) - xenstore - xenstore_transport - ) -) - -(package - (name vhd-format) -) - -(package - (name vhd-format-lwt) - (synopsis "Lwt interface to read/write VHD format data") - (description "A pure OCaml library to read and write -[vhd](http://en.wikipedia.org/wiki/VHD_(file_format)) format data, plus a -simple command-line tool which allows vhd files to be interrogated, -manipulated, format-converted and streamed to and from files and remote -servers. - -This package provides an Lwt compatible interface to the library.") - (authors "Jon Ludlam" "Dave Scott") - (maintainers "Dave Scott ") - (tags ("org:mirage" "org:xapi-project")) - (homepage "https://github.com/mirage/ocaml-vhd") - (source (github mirage/ocaml-vhd)) - (depends - (ocaml (>= "4.10.0")) - (alcotest :with-test) - (alcotest-lwt (and :with-test (>= "1.0.0"))) - (bigarray-compat (>= "1.1.0")) - (cstruct (>= "6.0.0")) - cstruct-lwt - (fmt :with-test) - (lwt (>= "3.2.0")) - (mirage-block (>= "3.0.0")) - (rresult (>= "0.7.0")) - (vhd-format (= :version)) - (io-page (and :with-test (>= "2.4.0"))) - ) -) - -(package - (name varstored-guard) -) - -(package - (name uuid) -) - -(package - (name stunnel) - (synopsis "Library used by xapi to herd stunnel processes") - (description "This library allows xapi to configure, launch and terminate stunnel processes that act as clients and servers.") - (depends - astring - (forkexec (= :version)) - (safe-resources (= :version)) - (uuid (= :version)) - (xapi-consts (= :version)) - xapi-inventory - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (odoc :with-doc) - ) -) - -(package - (name sexpr) -) - -(package - (name safe-resources) -) - -(package - (name rrd-transport) - (synopsis "Shared-memory protocols for exposing system metrics") - (description "VMs running on a Xen host can use this library to expose performance counters which can be sampled by xapi's metric daemon.") - (authors "John Else") - (depends - (alcotest :with-test) - astring - bigarray-compat - cstruct - crc - (fmt :with-test) - rpclib - yojson - (xapi-idl (= :version)) - (xapi-rrd (= :version)) - (odoc :with-doc) - ) -) - -(package - (name pciutil) -) - -(package - (name message-switch-lwt) -) - -(package - (name message-switch-core) - (synopsis "A simple store-and-forward message switch") - (description "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") - (depends - astring - (cohttp (>= "0.21.1")) - ppx_deriving_rpc - ppx_sexp_conv - rpclib - sexplib - sexplib0 - uri - (xapi-log (= :version)) - (xapi-stdext-threads (= :version)) - (odoc :with-doc) - ) -) - -(package - (name message-switch-cli) -) - -(package - (name message-switch-unix) - (synopsis "A simple store-and-forward message switch") - (description "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") - (depends - base-threads - cohttp - (message-switch-core (= :version)) - ppx_deriving_rpc - rpclib - (xapi-stdext-threads (= :version)) - ) -) - -(package - (name message-switch) -) - -(package - (name http-lib) - (synopsis "An HTTP required used by xapi") - (description "This library allows xapi to perform varios activities related to the HTTP protocol.") - (depends - (alcotest :with-test) - astring - (base64 (>= "3.1.0")) - (clock (= :version)) - fmt - ipaddr - mtime - ppx_deriving_rpc - (qcheck-core :with-test) - rpclib - (safe-resources(= :version)) - sha - (stunnel (= :version)) - tgroup - uri - (uuid (= :version)) - xapi-backtrace - (xapi-idl (= :version)) - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-tracing (= :version)) - (xml-light2 (= :version)) - (odoc :with-doc) - ) -) - -(package - (name gzip) -) - -(package - (name forkexec) - (synopsis "Process-spawning library") - (description "Client and server library to spawn processes.") - (depends - astring - base-threads - (fd-send-recv (>= "2.0.0")) - ppx_deriving_rpc - rpclib - (uuid (= :version)) - xapi-backtrace - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-tracing (= :version)) - ) -) - -(package - (name ezxenstore) -) - -(package - (name cohttp-posix) -) - -(package - (name xapi-rrd) -) - -(package - (name xapi-inventory) -) - -(package - (name xapi-stdext-encodings) - (synopsis "Xapi's standard library extension, Encodings") - (authors "Jonathan Ludlam") - (depends - (ocaml (>= 4.13.0)) - (alcotest (and (>= 0.6.0) :with-test)) - (odoc :with-doc) - (bechamel :with-test) - (bechamel-notty :with-test) - (notty :with-test) - ) -) - -(package - (name xapi-stdext-pervasives) - (synopsis "Xapi's standard library extension, Pervasives") - (authors "Jonathan Ludlam") - (depends - (ocaml (>= 4.08)) - logs - (odoc :with-doc) - xapi-backtrace - ) -) - -(package - (name xapi-stdext-std) - (synopsis "Xapi's standard library extension, Stdlib") - (depends - (ocaml (>= 4.08.0)) - (alcotest :with-test) - (odoc :with-doc) - ) -) - -(package - (name xapi-stdext-threads) - (synopsis "Xapi's standard library extension, Threads") - (authors "Jonathan Ludlam") - (depends - ambient-context - base-threads - base-unix - (alcotest :with-test) - (clock (= :version)) - (fmt :with-test) - mtime - tgroup - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-unix (= :version)) - ) -) - -(package - (name xapi-stdext-unix) - (synopsis "Xapi's standard library extension, Unix") - (authors "Jonathan Ludlam") - (depends - (ocaml (>= 4.12.0)) - (alcotest :with-test) - astring - base-unix - (bisect_ppx :with-test) - (clock (and (= :version) :with-test)) - (fd-send-recv (>= 2.0.0)) - fmt - integers - (mtime (and (>= 2.0.0) :with-test)) - (logs :with-test) - (qcheck-core (and (>= 0.21.2) :with-test)) - (odoc :with-doc) - xapi-backtrace - unix-errno - (xapi-stdext-pervasives (= :version)) - polly - ) -) - -(package - (name xapi-stdext-zerocheck) - (synopsis "Xapi's standard library extension, Zerocheck") - (authors "Jonathan Ludlam") - (depends - (odoc :with-doc) - ) -) + (name zstd)) + +(package + (name clock) + (synopsis "Xapi's library for managing time") + (authors "Jonathan Ludlam" "Pau Ruiz Safont") + (depends + (ocaml + (>= 4.12)) + (alcotest :with-test) + astring + fmt + mtime + ptime + (xapi-log + (= :version)) + (qcheck-core :with-test) + (qcheck-alcotest :with-test))) + +(package + (name tgroup) + (depends xapi-log xapi-stdext-unix)) + +(package + (name xml-light2)) + +(package + (name xapi-sdk) + (license "BSD-2-Clause") + (synopsis "Xen API SDK generation code") + (depends + (alcotest :with-test) + astring + (fmt :with-test) + mustache + (xapi-datamodel + (= :version)) + (xapi-stdext-unix + (and + (= :version) + :with-test)) + (xapi-test-utils :with-test)) + (allow_empty)) + +(package + (name xen-api-client-lwt)) + +(package + (name xen-api-client) + (synopsis "Xen-API client library for remotely-controlling a xapi host") + (authors + "David Scott" + "Anil Madhavapeddy" + "Jerome Maloberti" + "John Else" + "Jon Ludlam" + "Thomas Sanders" + "Mike McClurg") + (depends + (alcotest :with-test) + astring + (cohttp + (>= "0.22.0")) + re + rpclib + uri + (uuid + (= :version)) + (xapi-client + (= :version)) + (xapi-idl + (= :version)) + (xapi-rrd + (= :version)) + (xapi-types + (= :version)) + xmlm)) + +(package + (name xe)) + +(package + (name xapi-types)) + +(package + (name xapi-tracing) + (depends + ocaml + dune + (alcotest :with-test) + (fmt :with-test) + ppx_deriving_yojson + re + uri + (uuid :with-test) + (xapi-log + (= :version)) + (xapi-stdext-threads + (= :version)) + yojson) + (synopsis "Allows to instrument code to generate tracing information") + (description + "This library provides modules to allow gathering runtime traces.")) + +(package + (name xapi-tracing-export) + (depends + ocaml + cohttp-posix + dune + cohttp + ptime + result + rresult + rpclib + ppx_deriving_rpc + uri + (xapi-log + (= :version)) + (xapi-open-uri + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-tracing + (= :version)) + (zstd + (= :version))) + (synopsis "Export traces in multiple protocols and formats") + (description + "This library export traces is able to push traces to http endpoints or generate compressed tarballs in the filesystem.")) + +(package + (name xapi-storage-script)) + +(package + (name xapi-storage-cli) + (depends + cmdliner + re + rpclib + ppx_deriving_rpc + (xapi-client + (= :version)) + (xapi-idl + (= :version)) + (xapi-types + (= :version))) + (synopsis "A CLI for xapi storage services") + (description + "The CLI allows you to directly manipulate virtual disk images, without them being attached to VMs.")) + +(package + (name xapi-storage)) + +(package + (name xapi-schema)) + +(package + (name rrdd-plugin) + (synopsis "A plugin library for the xapi performance monitoring daemon") + (description + "This library allows one to expose a datasource which can then be sampled by the performance monitoring daemon.") + (depends + ocaml + astring + rpclib + (rrd-transport + (= :version)) + (xapi-forkexecd + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-std + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-idl + (= :version)) + xenstore + xenstore_transport)) + +(package + (name xapi-open-uri)) + +(package + (name xapi-nbd)) + +(package + (name xapi-log)) + +(package + (name xapi-idl)) + +(package + (name xapi-forkexecd) + (synopsis "Sub-process control service for xapi") + (description + "This daemon creates and manages sub-processes on behalf of xapi.") + (depends + astring + (forkexec + (= :version)) + (uuid + (= :version)) + (xapi-stdext-unix + (= :version)))) + +(package + (name xapi-expiry-alerts)) + +(package + (name xapi-datamodel)) + +(package + (name xapi-consts)) + +(package + (name xapi-compression)) + +(package + (name xapi-client)) + +(package + (name xapi-cli-protocol)) + +(package + (name xapi-debug) + (synopsis "Debugging tools for XAPI") + (description + "Tools installed into the non-standard /opt/xensource/debug location") + (depends + alcotest + angstrom + astring + base64 + cmdliner + cohttp + cstruct + ctypes + domain-name + fd-send-recv + fmt + hex + integers + ipaddr + logs + magic-mime + mirage-crypto + mirage-crypto-pk + mirage-crypto-rng + mtime + pci + polly + ppx_deriving + ppx_deriving_rpc + ppx_sexp_conv + psq + ptime + qcheck-alcotest + qcheck-core + re + result + rpclib + rrdd-plugin + rresult + sexplib + sexplib0 + sha + tar + tar-unix + uri + uuidm + uutf + x509 + xapi-backtrace + xapi-log + xapi-types + xapi-stdext-pervasives + xapi-stdext-unix + xen-api-client + xen-api-client-lwt + xenctrl + xenstore_transport + xmlm + yojson)) + +(package + (name xapi-tools) + (synopsis "Various daemons and CLI applications required by XAPI") + (description "Includes message-switch, xenopsd, forkexecd, ...") + (depends + astring + base64 + cmdliner + cstruct-unix + fmt + logs + lwt + mtime + netlink + qmp + re + result + rpclib + rresult + uri + tyre + xenctrl + xmlm + yojson + ; can't use '= version' here yet, + ; 'xapi-tools' will have version ~dev, not 'master' like all the others + ; because it is not in xs-opam yet + rrd-transport + rrdd-plugin + xapi-tracing-export + xen-api-client + (alcotest :with-test) + (ppx_deriving_rpc :with-test) + (qcheck-core :with-test) + (xapi-test-utils :with-test) + (xenstore_transport :with-test))) + +(package + (name xapi) + (synopsis "The toolstack daemon which implements the XenAPI") + (description + "This daemon exposes the XenAPI and is used by clients such as 'xe' and 'XenCenter' to manage clusters of Xen-enabled hosts.") + (depends + (ocaml + (>= 4.09)) + (alcotest :with-test) + angstrom + astring + base-threads + base64 + (bos :with-test) + cdrom + (clock + (= :version)) + cmdliner + cohttp + conf-pam + (crowbar :with-test) + cstruct + ctypes + ctypes-foreign + domain-name + (ezxenstore + (= :version)) + fmt + fd-send-recv + hex + (http-lib + (and + :with-test + (= :version))) ; the public library is only used for testing + integers + ipaddr + logs + magic-mime + mirage-crypto + mirage-crypto-pk + (mirage-crypto-rng + (>= "0.11.0")) + (message-switch-unix + (= :version)) + mtime + opentelemetry-client-ocurl + pci + (pciutil + (= :version)) + polly + ppx_deriving_rpc + ppx_sexp_conv + ppx_deriving + psq + ptime + qcheck-alcotest + qcheck-core + re + result + rpclib + (rrdd-plugin + (= :version)) + rresult + sexpr + sexplib + sexplib0 + sha + (stunnel + (= :version)) + tar + tar-unix + uri + tgroup + (uuid + (= :version)) + uutf + uuidm + x509 + xapi-backtrace + (xapi-client + (= :version)) + (xapi-cli-protocol + (= :version)) + (xapi-consts + (= :version)) + (xapi-datamodel + (= :version)) + (xapi-expiry-alerts + (= :version)) + (xapi-idl + (= :version)) + (xapi-inventory + (= :version)) + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-std + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-stdext-zerocheck + (= :version)) + (xapi-test-utils :with-test) + (xapi-tracing + (= :version)) + (xapi-tracing-export + (= :version)) + (xapi-types + (= :version)) + (xen-api-client-lwt + (= :version)) + xenctrl ; for quicktest + xenstore_transport + xmlm + (xml-light2 + (= :version)) + yojson + (zstd + (= :version)))) + +(package + (name vhd-tool) + (synopsis "Manipulate .vhd files") + (tags + ("org.mirage" "org:xapi-project")) + (depends + (alcotest-lwt :with-test) + astring + bigarray-compat + cmdliner + cohttp + cohttp-lwt + conf-libssl + (cstruct + (>= "3.0.0")) + (ezxenstore + (= :version)) + (forkexec + (= :version)) + io-page + lwt + lwt_ssl + nbd + nbd-unix + ppx_cstruct + ppx_deriving_rpc + re + result + rpclib + ssl + sha + tar + uri + (vhd-format + (= :version)) + (vhd-format-lwt + (= :version)) + (xapi-idl + (= :version)) + (xapi-log + (= :version)) + (xen-api-client-lwt + (= :version)) + xenstore + xenstore_transport)) + +(package + (name vhd-format)) + +(package + (name vhd-format-lwt) + (synopsis "Lwt interface to read/write VHD format data") + (description + "A pure OCaml library to read and write\n[vhd](http://en.wikipedia.org/wiki/VHD_(file_format)) format data, plus a\nsimple command-line tool which allows vhd files to be interrogated,\nmanipulated, format-converted and streamed to and from files and remote\nservers.\n\nThis package provides an Lwt compatible interface to the library.") + (authors "Jon Ludlam" "Dave Scott") + (maintainers "Dave Scott ") + (tags + ("org:mirage" "org:xapi-project")) + (homepage "https://github.com/mirage/ocaml-vhd") + (source + (github mirage/ocaml-vhd)) + (depends + (ocaml + (>= "4.10.0")) + (alcotest :with-test) + (alcotest-lwt + (and + :with-test + (>= "1.0.0"))) + (bigarray-compat + (>= "1.1.0")) + (cstruct + (>= "6.0.0")) + cstruct-lwt + (fmt :with-test) + (lwt + (>= "3.2.0")) + (mirage-block + (>= "3.0.0")) + (rresult + (>= "0.7.0")) + (vhd-format + (= :version)) + (io-page + (and + :with-test + (>= "2.4.0"))))) + +(package + (name varstored-guard)) + +(package + (name uuid)) + +(package + (name stunnel) + (synopsis "Library used by xapi to herd stunnel processes") + (description + "This library allows xapi to configure, launch and terminate stunnel processes that act as clients and servers.") + (depends + astring + (forkexec + (= :version)) + (safe-resources + (= :version)) + (uuid + (= :version)) + (xapi-consts + (= :version)) + xapi-inventory + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (odoc :with-doc))) + +(package + (name sexpr)) + +(package + (name safe-resources)) + +(package + (name rrd-transport) + (synopsis "Shared-memory protocols for exposing system metrics") + (description + "VMs running on a Xen host can use this library to expose performance counters which can be sampled by xapi's metric daemon.") + (authors "John Else") + (depends + (alcotest :with-test) + astring + bigarray-compat + cstruct + crc + (fmt :with-test) + rpclib + yojson + (xapi-idl + (= :version)) + (xapi-rrd + (= :version)) + (odoc :with-doc))) + +(package + (name pciutil)) + +(package + (name message-switch-lwt)) + +(package + (name message-switch-core) + (synopsis "A simple store-and-forward message switch") + (description + "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") + (depends + astring + (cohttp + (>= "0.21.1")) + ppx_deriving_rpc + ppx_sexp_conv + rpclib + sexplib + sexplib0 + uri + (xapi-log + (= :version)) + (xapi-stdext-threads + (= :version)) + (odoc :with-doc))) + +(package + (name message-switch-cli)) + +(package + (name message-switch-unix) + (synopsis "A simple store-and-forward message switch") + (description + "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") + (depends + base-threads + cohttp + (message-switch-core + (= :version)) + ppx_deriving_rpc + rpclib + (xapi-stdext-threads + (= :version)))) + +(package + (name message-switch)) + +(package + (name http-lib) + (synopsis "An HTTP required used by xapi") + (description + "This library allows xapi to perform varios activities related to the HTTP protocol.") + (depends + (alcotest :with-test) + astring + (base64 + (>= "3.1.0")) + (clock + (= :version)) + fmt + ipaddr + mtime + ppx_deriving_rpc + (qcheck-core :with-test) + rpclib + (safe-resources + (= :version)) + sha + (stunnel + (= :version)) + tgroup + uri + (uuid + (= :version)) + xapi-backtrace + (xapi-idl + (= :version)) + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-tracing + (= :version)) + (xml-light2 + (= :version)) + (odoc :with-doc))) + +(package + (name gzip)) + +(package + (name forkexec) + (synopsis "Process-spawning library") + (description "Client and server library to spawn processes.") + (depends + astring + base-threads + (fd-send-recv + (>= "2.0.0")) + ppx_deriving_rpc + rpclib + (uuid + (= :version)) + xapi-backtrace + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-tracing + (= :version)))) + +(package + (name ezxenstore)) + +(package + (name cohttp-posix)) + +(package + (name xapi-rrd)) + +(package + (name xapi-inventory)) + +(package + (name xapi-stdext-encodings) + (synopsis "Xapi's standard library extension, Encodings") + (authors "Jonathan Ludlam") + (depends + (ocaml + (>= 4.13.0)) + (alcotest + (and + (>= 0.6.0) + :with-test)) + (odoc :with-doc) + (bechamel :with-test) + (bechamel-notty :with-test) + (notty :with-test))) + +(package + (name xapi-stdext-pervasives) + (synopsis "Xapi's standard library extension, Pervasives") + (authors "Jonathan Ludlam") + (depends + (ocaml + (>= 4.08)) + logs + (odoc :with-doc) + xapi-backtrace)) + +(package + (name xapi-stdext-std) + (synopsis "Xapi's standard library extension, Stdlib") + (depends + (ocaml + (>= 4.08.0)) + (alcotest :with-test) + (odoc :with-doc))) + +(package + (name xapi-stdext-threads) + (synopsis "Xapi's standard library extension, Threads") + (authors "Jonathan Ludlam") + (depends + ambient-context + base-threads + base-unix + (alcotest :with-test) + (clock + (= :version)) + (fmt :with-test) + mtime + tgroup + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-unix + (= :version)))) + +(package + (name xapi-stdext-unix) + (synopsis "Xapi's standard library extension, Unix") + (authors "Jonathan Ludlam") + (depends + (ocaml + (>= 4.12.0)) + (alcotest :with-test) + astring + base-unix + (bisect_ppx :with-test) + (clock + (and + (= :version) + :with-test)) + (fd-send-recv + (>= 2.0.0)) + fmt + integers + (mtime + (and + (>= 2.0.0) + :with-test)) + (logs :with-test) + (qcheck-core + (and + (>= 0.21.2) + :with-test)) + (odoc :with-doc) + xapi-backtrace + unix-errno + (xapi-stdext-pervasives + (= :version)) + polly)) + +(package + (name xapi-stdext-zerocheck) + (synopsis "Xapi's standard library extension, Zerocheck") + (authors "Jonathan Ludlam") + (depends + (odoc :with-doc))) diff --git a/opam/xapi-tools.opam b/opam/xapi-tools.opam index da2e2ce2967..3116f8d3293 100644 --- a/opam/xapi-tools.opam +++ b/opam/xapi-tools.opam @@ -24,6 +24,7 @@ depends: [ "rpclib" "rresult" "uri" + "tyre" "xenctrl" "xmlm" "yojson" From ce0d919ecb5aff6094d7a42faf0f373e40eb879a Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 14 May 2025 15:14:07 +0100 Subject: [PATCH 197/492] CP-307865 adjust error meesage to mention supported SHA algorithms Mention the supported algorithms, re-arrange the wording to usa list. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_errors.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 72faab08065..30c56b21202 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1708,8 +1708,8 @@ let _ = ~doc:"The provided certificate has expired." () ; error Api_errors.server_certificate_signature_not_supported [] ~doc: - "The provided certificate is not using the SHA256 (SHA2) signature \ - algorithm." + "The provided certificate is not using one of the following SHA2 \ + signature algorithms: SHA256, SHA512." () ; error Api_errors.server_certificate_chain_invalid [] From 44031b5232c145c387b5be56b472a60e5218ed11 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 15 May 2025 10:34:32 +0100 Subject: [PATCH 198/492] CP-307865 add unit tests for SHA512 Signed-off-by: Christian Lindig --- ocaml/gencert/test_lib.ml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ocaml/gencert/test_lib.ml b/ocaml/gencert/test_lib.ml index 379eb35f2e3..7cea9c6e7a6 100644 --- a/ocaml/gencert/test_lib.ml +++ b/ocaml/gencert/test_lib.ml @@ -50,6 +50,11 @@ let valid_leaf_certificates = , "2020-02-01T00:00:00Z" , `SHA256 ) + ; ( "Valid, SHA512, matches key" + , "pkey_rsa_2048" + , "2020-02-01T00:00:00Z" + , `SHA512 + ) ] (* ( description, leaf_private_key, expected_private_key, time_of_validation, @@ -80,6 +85,14 @@ let invalid_leaf_certificates = , server_certificate_key_mismatch , [] ) + ; ( "Valid, SHA512, keys do not match" + , "pkey_rsa_2048" + , "pkey_rsa_4096" + , "2020-02-01T00:00:00Z" + , `SHA512 + , server_certificate_key_mismatch + , [] + ) ; ( "Valid, SHA1, matching keys" , "pkey_rsa_2048" , "pkey_rsa_2048" From da22219f3bbd50822d82eee58681194a2fffa809 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 15 May 2025 10:57:18 +0100 Subject: [PATCH 199/492] gencert testing: use human-readable errors for validation This is done using the API errors. The use of Hashtbl.find is warranted, if the name is missing from the table something very bad has happened and would have had to raise an error anyway. Signed-off-by: Pau Ruiz Safont --- ocaml/gencert/dune | 1 + ocaml/gencert/test_lib.ml | 15 ++++++++++++--- quality-gate.sh | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ocaml/gencert/dune b/ocaml/gencert/dune index cbd5cd73ae2..600811a13b6 100644 --- a/ocaml/gencert/dune +++ b/ocaml/gencert/dune @@ -64,6 +64,7 @@ rresult x509 xapi-consts + xapi-datamodel xapi-stdext-unix ) (deps diff --git a/ocaml/gencert/test_lib.ml b/ocaml/gencert/test_lib.ml index 379eb35f2e3..30096c7a990 100644 --- a/ocaml/gencert/test_lib.ml +++ b/ocaml/gencert/test_lib.ml @@ -166,11 +166,20 @@ let test_valid_leaf_cert pem_leaf time pkey () = match validate_pem_chain ~pem_leaf ~pem_chain:None time pkey with | Ok _ -> () - | Error (`Msg (_, msg)) -> + | Error (`Msg err) -> + let err_to_str (name, params) = + let Datamodel_types.{err_doc; err_params; _} = + Hashtbl.find Datamodel_errors.errors name + in + let args = List.combine err_params params in + Format.asprintf "%s %a" err_doc + Fmt.(Dump.list (pair ~sep:(Fmt.any ":@ ") string string)) + args + in Alcotest.fail (Format.asprintf "Valid certificate could not be validated: %a" - Fmt.(Dump.list string) - msg + (Fmt.of_to_string err_to_str) + err ) let test_invalid_cert pem_leaf time pkey error reason = diff --git a/quality-gate.sh b/quality-gate.sh index a1f57a67525..d00da722b3a 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -110,7 +110,7 @@ unixgetenv () { } hashtblfind () { - N=34 + N=35 # Looks for all .ml files except the ones using Core.Hashtbl.find, # which already returns Option HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) From cedf8362e71964ab05451224ff8fa6b731f254e4 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Tue, 29 Apr 2025 11:33:42 +0100 Subject: [PATCH 200/492] CA-409510: Make xenopsd nested Parallel atoms explicit Each Parallel atom takes up a worker thread whilst its children do the actual work, so we have parallel_queues to prevent a deadlock. However, nested Parallel atoms take up an additional worker, meaning they can still cause a deadlock. This commit adds a new Nested_parallel atomic with matching nested_parallel_queues to remove the possibility of this deadlock. This increases the total number of workers, but these workers are just to hold the Nested_parallel Atomics and will not be doing any actual work Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/xenops_server.ml | 196 ++++++++++++++++++----------- quality-gate.sh | 2 +- 2 files changed, 126 insertions(+), 72 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 3a4cf238068..da91cc651c9 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -168,6 +168,8 @@ type atomic = | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) | Parallel of Vm.id * string * atomic list + | Nested_parallel of Vm.id * string * atomic list + (** used to make nested parallel atoms explicit, as each atom requires its own worker *) | Serial of Vm.id * string * atomic list | Best_effort of atomic [@@deriving rpcty] @@ -286,6 +288,9 @@ let rec name_of_atomic = function | Parallel (_, _, atomics) -> Printf.sprintf "Parallel (%s)" (String.concat " | " (List.map name_of_atomic atomics)) + | Nested_parallel (_, _, atomics) -> + Printf.sprintf "Nested_parallel (%s)" + (String.concat " | " (List.map name_of_atomic atomics)) | Serial (_, _, atomics) -> Printf.sprintf "Serial (%s)" (String.concat " & " (List.map name_of_atomic atomics)) @@ -295,7 +300,7 @@ let rec name_of_atomic = function let rec atomic_expires_after = function | Serial (_, _, ops) -> List.map atomic_expires_after ops |> List.fold_left ( +. ) 0. - | Parallel (_, _, ops) -> + | Parallel (_, _, ops) | Nested_parallel (_, _, ops) -> List.map atomic_expires_after ops |> List.fold_left Float.max 0. | _ -> (* 20 minutes, in seconds *) @@ -916,6 +921,27 @@ module Redirector = struct Parallel atoms, creating a deadlock. *) let parallel_queues = {queues= Queues.create (); mutex= Mutex.create ()} + (* We create another queue only for Nested_parallel atoms for the same reason + as parallel_queues. When a Nested_parallel atom is inside a Parallel atom, + they are both using a worker whilst not doing any work, so they each need + additional space to prevent a deadlock. *) + let nested_parallel_queues = + {queues= Queues.create (); mutex= Mutex.create ()} + + (* we do not want to use = when comparing queues: queues can contain + (uncomparable) functions, and we are only interested in comparing the + equality of their static references *) + let is_same_redirector q1 q2 = q1 == q2 + + let to_string r = + match r with + | w when is_same_redirector w parallel_queues -> + "Parallel" + | w when is_same_redirector w nested_parallel_queues -> + "Nested_parallel" + | _ -> + "Default" + (* When a thread is actively processing a queue, items are redirected to a thread-private queue *) let overrides = ref StringMap.empty @@ -1035,6 +1061,7 @@ module Redirector = struct List.concat_map one (default.queues :: parallel_queues.queues + :: nested_parallel_queues.queues :: List.map snd (StringMap.bindings !overrides) ) ) @@ -1219,11 +1246,11 @@ module WorkerPool = struct operate *) let count_active queues = with_lock m (fun () -> - (* we do not want to use = when comparing queues: queues can contain - (uncomparable) functions, and we are only interested in comparing the - equality of their static references *) List.map - (fun w -> w.Worker.redirector == queues && Worker.is_active w) + (fun w -> + Redirector.is_same_redirector w.Worker.redirector queues + && Worker.is_active w + ) !pool |> List.filter (fun x -> x) |> List.length @@ -1231,17 +1258,18 @@ module WorkerPool = struct let find_one queues f = List.fold_left - (fun acc x -> acc || (x.Worker.redirector == queues && f x)) + (fun acc x -> + acc || (Redirector.is_same_redirector x.Worker.redirector queues && f x) + ) false (* Clean up any shutdown threads and remove them from the master list *) let gc queues pool = List.fold_left (fun acc w -> - (* we do not want to use = when comparing queues: queues can contain - (uncomparable) functions, and we are only interested in comparing the - equality of their static references *) - if w.Worker.redirector == queues && Worker.get_state w = Worker.Shutdown + if + Redirector.is_same_redirector w.Worker.redirector queues + && Worker.get_state w = Worker.Shutdown then ( Worker.join w ; acc ) else @@ -1268,7 +1296,8 @@ module WorkerPool = struct let start size = for _i = 1 to size do incr Redirector.default ; - incr Redirector.parallel_queues + incr Redirector.parallel_queues ; + incr Redirector.nested_parallel_queues done let set_size size = @@ -1283,7 +1312,8 @@ module WorkerPool = struct done in inner Redirector.default ; - inner Redirector.parallel_queues + inner Redirector.parallel_queues ; + inner Redirector.nested_parallel_queues end (* Keep track of which VMs we're rebooting so we avoid transient glitches where @@ -1584,6 +1614,11 @@ let collect_into apply = function [] -> [] | [op] -> [op] | lst -> apply lst let parallel name ~id = collect_into (fun ls -> [Parallel (id, Printf.sprintf "%s VM=%s" name id, ls)]) +let nested_parallel name ~id = + collect_into (fun ls -> + [Nested_parallel (id, Printf.sprintf "%s VM=%s" name id, ls)] + ) + let serial name ~id = collect_into (fun ls -> [Serial (id, Printf.sprintf "%s VM=%s" name id, ls)]) @@ -1593,6 +1628,9 @@ let serial_concat name ~id lst = serial name ~id (List.concat lst) let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) +let nested_parallel_map name ~id lst f = + nested_parallel name ~id (List.concat_map f lst) + let map_or_empty f x = Option.value ~default:[] (Option.map f x) (* Creates a Serial of 2 or more Atomics. If the number of Atomics could be @@ -1630,7 +1668,7 @@ let rec atomics_of_operation = function let pf = Printf.sprintf in let name_multi = pf "VBDs.activate_epoch_and_plug %s" typ in let name_one = pf "VBD.activate_epoch_and_plug %s" typ in - parallel_map name_multi ~id vbds (fun vbd -> + nested_parallel_map name_multi ~id vbds (fun vbd -> serial_concat name_one ~id [ [VBD_set_active (vbd.Vbd.id, true)] @@ -1664,11 +1702,11 @@ let rec atomics_of_operation = function vifs ; serial_concat "VGPUs.activate & PCI.plug (SRIOV)" ~id [ - parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> + nested_parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> [VGPU_set_active (vgpu.Vgpu.id, true)] ) - ; parallel_map "PCIs.plug (SRIOV)" ~id pcis_sriov (fun pci -> - [PCI_plug (pci.Pci.id, false)] + ; nested_parallel_map "PCIs.plug (SRIOV)" ~id pcis_sriov + (fun pci -> [PCI_plug (pci.Pci.id, false)] ) ] ] @@ -1883,56 +1921,9 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) (Printexc.to_string e) ) | Parallel (_id, description, atoms) -> - (* parallel_id is a unused unique name prefix for a parallel worker queue *) - let parallel_id = - Printf.sprintf "Parallel:task=%s.atoms=%d.(%s)" - (Xenops_task.id_of_handle t) - (List.length atoms) description - in - let with_tracing = id_with_tracing parallel_id t in - debug "begin_%s" parallel_id ; - let task_list = - queue_atomics_and_wait ~progress_callback ~max_parallel_atoms:10 - with_tracing parallel_id atoms - in - debug "end_%s" parallel_id ; - (* make sure that we destroy all the parallel tasks that finished *) - let errors = - List.map - (fun (id, task_handle, task_state) -> - match task_state with - | Some (Task.Completed _) -> - TASK.destroy' id ; None - | Some (Task.Failed e) -> - TASK.destroy' id ; - let e = - match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with - | Ok x -> - Xenopsd_error x - | Error (`Msg x) -> - internal_error "Error unmarshalling failure: %s" x - in - Some e - | None | Some (Task.Pending _) -> - (* Because pending tasks are filtered out in - queue_atomics_and_wait with task_ended the second case will - never be encountered. The previous boolean used in - event_wait was enough to express the possible cases *) - let err_msg = - Printf.sprintf "Timed out while waiting on task %s (%s)" id - (Xenops_task.get_dbg task_handle) - in - error "%s" err_msg ; - Xenops_task.cancel task_handle ; - Some (Xenopsd_error (Internal_error err_msg)) - ) - task_list - in - (* if any error was present, raise first one, so that - trigger_cleanup_after_failure is called *) - List.iter - (fun err -> match err with None -> () | Some e -> raise e) - errors + parallel_atomic ~progress_callback ~description ~nested:false atoms t + | Nested_parallel (_id, description, atoms) -> + parallel_atomic ~progress_callback ~description ~nested:true atoms t | Serial (_, _, atoms) -> List.iter (Fun.flip (perform_atomic ~progress_callback) t) atoms | VIF_plug id -> @@ -2361,7 +2352,64 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "VM.soft_reset %s" id ; B.VM.soft_reset t (VM_DB.read_exn id) -and queue_atomic_int ~progress_callback dbg id op = +and parallel_atomic ~progress_callback ~description ~nested atoms t = + (* parallel_id is a unused unique name prefix for a parallel worker queue *) + let redirector = + if nested then + Redirector.nested_parallel_queues + else + Redirector.parallel_queues + in + let parallel_id = + Printf.sprintf "%s:task=%s.atoms=%d.(%s)" + (Redirector.to_string redirector) + (Xenops_task.id_of_handle t) + (List.length atoms) description + in + let with_tracing = id_with_tracing parallel_id t in + debug "begin_%s" parallel_id ; + let task_list = + queue_atomics_and_wait ~progress_callback ~max_parallel_atoms:10 + with_tracing parallel_id atoms redirector + in + debug "end_%s" parallel_id ; + (* make sure that we destroy all the parallel tasks that finished *) + let errors = + List.map + (fun (id, task_handle, task_state) -> + match task_state with + | Some (Task.Completed _) -> + TASK.destroy' id ; None + | Some (Task.Failed e) -> + TASK.destroy' id ; + let e = + match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with + | Ok x -> + Xenopsd_error x + | Error (`Msg x) -> + internal_error "Error unmarshalling failure: %s" x + in + Some e + | None | Some (Task.Pending _) -> + (* Because pending tasks are filtered out in + queue_atomics_and_wait with task_ended the second case will + never be encountered. The previous boolean used in + event_wait was enough to express the possible cases *) + let err_msg = + Printf.sprintf "Timed out while waiting on task %s (%s)" id + (Xenops_task.get_dbg task_handle) + in + error "%s" err_msg ; + Xenops_task.cancel task_handle ; + Some (Xenopsd_error (Internal_error err_msg)) + ) + task_list + in + (* if any error was present, raise first one, so that + trigger_cleanup_after_failure is called *) + List.iter (fun err -> match err with None -> () | Some e -> raise e) errors + +and queue_atomic_int ~progress_callback dbg id op redirector = let task = Xenops_task.add tasks dbg (let r = ref None in @@ -2370,10 +2418,12 @@ and queue_atomic_int ~progress_callback dbg id op = !r ) in - Redirector.push Redirector.parallel_queues id (Atomic op, task) ; + debug "Adding to %s queues" (Redirector.to_string redirector) ; + Redirector.push redirector id (Atomic op, task) ; task -and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops = +and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops + redirector = let from = Updates.last_id dbg updates in Xenops_utils.chunks max_parallel_atoms ops |> List.mapi (fun chunk_idx ops -> @@ -2386,7 +2436,9 @@ and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops = let atom_id = Printf.sprintf "%s.chunk=%d.atom=%d" id chunk_idx atom_idx in - (queue_atomic_int ~progress_callback dbg atom_id op, op) + ( queue_atomic_int ~progress_callback dbg atom_id op redirector + , op + ) ) ops in @@ -2562,7 +2614,9 @@ and trigger_cleanup_after_failure_atom op t = immediate_operation dbg id (VM_check_state id) | Best_effort op -> trigger_cleanup_after_failure_atom op t - | Parallel (_id, _description, ops) | Serial (_id, _description, ops) -> + | Parallel (_id, _description, ops) + | Nested_parallel (_id, _description, ops) + | Serial (_id, _description, ops) -> List.iter (fun op -> trigger_cleanup_after_failure_atom op t) ops | VM_rename (id1, id2, _) -> immediate_operation dbg id1 (VM_check_state id1) ; diff --git a/quality-gate.sh b/quality-gate.sh index a1f57a67525..c8126975567 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -44,7 +44,7 @@ mli-files () { } structural-equality () { - N=9 + N=7 EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$EQ" -eq "$N" ]; then echo "OK counted $EQ usages of ' == '" From 1c19aa8d91ce66ce3511ce10ac62038d6e550082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 19 May 2025 14:34:36 +0100 Subject: [PATCH 201/492] CA-411122: do not call set-iscsi-initiator with an empty string for IQN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Back in 2018 a4a94b38c66203c9f02752f181d1c01f73bf52a1 rejected empty IQNs in set_iscsi_iqn API calls. However hosts are created with an empty IQN, and if this code runs too early then it will attempt to call `set-iscsi-initiator` with an empty string for the IQN: ``` /opt/xensource/libexec/set-iscsi-initiator myhost ``` About a second later the script is called again with the correct value. This could potentially result in the iscsid service being restarted multiple times (and if a restart is still pending when restart is called a 2nd time I'm not sure it'll take effect, so we might be left with an empty initiator). It is best to avoid setting empty initiators. The exception would be raised and ignore due to the log_and_ignore in the caller. Also log wherever the IQN is set using %S, so that we notice if it ends up containing some extra whitespace characters. Signed-off-by: Edwin Török --- ocaml/xapi/xapi_host.ml | 1 + ocaml/xapi/xapi_host_helpers.ml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index e2cece5cb5c..e31c96c7a81 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -2790,6 +2790,7 @@ let set_uefi_certificates ~__context ~host:_ ~value:_ = let set_iscsi_iqn ~__context ~host ~value = if value = "" then raise Api_errors.(Server_error (invalid_value, ["value"; value])) ; + D.debug "%s: iqn=%S" __FUNCTION__ value ; (* Note, the following sequence is carefully written - see the other-config watcher thread in xapi_host_helpers.ml *) Db.Host.remove_from_other_config ~__context ~self:host ~key:"iscsi_iqn" ; diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index 834b34beb4b..7b9ac9d7a2e 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -497,10 +497,13 @@ module Configuration = struct [iqn; hostname_chopped] let set_initiator_name iqn = + if iqn = "" then + raise Api_errors.(Server_error (invalid_value, ["iqn"; iqn])) ; let hostname = Unix.gethostname () in (* CA-377454 - robustness, create dir if necessary *) Unixext.mkdir_rec "/var/lock/sm/iscsiadm" 0o700 ; let args = make_set_initiator_args iqn hostname in + D.debug "%s: iqn=%S" __FUNCTION__ iqn ; ignore (Helpers.call_script !Xapi_globs.set_iSCSI_initiator_script args) let set_multipathing enabled = @@ -541,6 +544,7 @@ module Configuration = struct | Some "" -> () | Some iqn when iqn <> host_rec.API.host_iscsi_iqn -> + D.debug "%s: iqn=%S" __FUNCTION__ iqn ; Client.Client.Host.set_iscsi_iqn ~rpc ~session_id ~host:host_ref ~value:iqn | _ -> From 62584053b0c54c8ca3bfa1f0b31c6b482b049bd7 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 19 May 2025 16:28:23 +0100 Subject: [PATCH 202/492] CA-409510: Give a warning if atoms nested incorrectly This is a stopgap until we add compile-time constraints on the nesting, by for example using a polymorphic variant. Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/xenops_server.ml | 34 ++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index da91cc651c9..ae93a2476cc 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1920,9 +1920,11 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "Ignoring error during best-effort operation: %s" (Printexc.to_string e) ) - | Parallel (_id, description, atoms) -> + | Parallel (_id, description, atoms) as atom -> + check_nesting atom ; parallel_atomic ~progress_callback ~description ~nested:false atoms t - | Nested_parallel (_id, description, atoms) -> + | Nested_parallel (_id, description, atoms) as atom -> + check_nesting atom ; parallel_atomic ~progress_callback ~description ~nested:true atoms t | Serial (_, _, atoms) -> List.iter (Fun.flip (perform_atomic ~progress_callback) t) atoms @@ -2352,6 +2354,34 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "VM.soft_reset %s" id ; B.VM.soft_reset t (VM_DB.read_exn id) +and check_nesting atom = + let msg_prefix = "Nested atomics error" in + let rec check_nesting_inner found_parallel found_nested = function + | Parallel (_, _, rem) -> + if found_parallel then ( + warn + "%s: Two or more Parallel atoms found, use Nested_parallel for the \ + inner atom" + msg_prefix ; + true + ) else + List.exists (check_nesting_inner true found_nested) rem + | Nested_parallel (_, _, rem) -> + if found_nested then ( + warn + "%s: Two or more Nested_parallel atoms found, there should only be \ + one layer of nesting" + msg_prefix ; + true + ) else + List.exists (check_nesting_inner found_parallel true) rem + | Serial (_, _, rem) -> + List.exists (check_nesting_inner found_parallel found_nested) rem + | _ -> + false + in + ignore @@ check_nesting_inner false false atom + and parallel_atomic ~progress_callback ~description ~nested atoms t = (* parallel_id is a unused unique name prefix for a parallel worker queue *) let redirector = From 7e5c1ea537ce70d0626c890e75f3e41806d3cc9a Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Tue, 22 Apr 2025 09:45:00 +0000 Subject: [PATCH 203/492] CP-53721 Implement SSH set auto mode API for Dom0 SSH control Implemented XAPI APIs for SSH auto mode configuration: - `host.set_ssh_auto_mode`: Configures SSH auto mode for a specific host. - `pool.set_ssh_auto_mode`: Configures SSH auto mode for all hosts in the pool. Additionally: - `host.enable_ssh` now automatically sets SSH auto mode to `false`. Signed-off-by: Lunfan Zhang --- ocaml/xapi/xapi_globs.ml | 2 + ocaml/xapi/xapi_host.ml | 48 +++++++++++++++++++--- ocaml/xapi/xapi_host.mli | 6 ++- ocaml/xapi/xapi_periodic_scheduler_init.ml | 1 + ocaml/xapi/xapi_pool.ml | 9 +++- 5 files changed, 58 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index a5fed248e63..a8b2d8485ca 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1297,6 +1297,8 @@ let job_for_disable_ssh = ref "Disable SSH" let ssh_service = ref "sshd" +let ssh_monitor_service = ref "xapi-ssh-monitor" + (* Fingerprint of default patch key *) let citrix_patch_key = "NERDNTUzMDMwRUMwNDFFNDI4N0M4OEVCRUFEMzlGOTJEOEE5REUyNg==" diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index d4da2d9903a..d7f793aa33a 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3112,10 +3112,39 @@ let emergency_clear_mandatory_guidance ~__context = ) ; Db.Host.set_pending_guidances ~__context ~self ~value:[] +let set_ssh_auto_mode ~__context ~self ~value = + debug "Setting SSH auto mode for host %s to %B" + (Helpers.get_localhost_uuid ()) + value ; + + Db.Host.set_ssh_auto_mode ~__context ~self ~value ; + + try + (* When enabled, the ssh_monitor_service regularly checks XAPI status to manage SSH availability. + During normal operation when XAPI is running properly, SSH is automatically disabled. + SSH is only enabled during emergency scenarios + (e.g., when XAPI is down) to allow administrative access for troubleshooting. *) + if value then ( + Xapi_systemctl.enable ~wait_until_success:false + !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.start ~wait_until_success:false + !Xapi_globs.ssh_monitor_service + ) else ( + Xapi_systemctl.stop ~wait_until_success:false + !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.disable ~wait_until_success:false + !Xapi_globs.ssh_monitor_service + ) + with e -> + error "Failed to configure SSH auto mode: %s" (Printexc.to_string e) ; + Helpers.internal_error "Failed to configure SSH auto mode: %s" + (Printexc.to_string e) + let disable_ssh_internal ~__context ~self = try debug "Disabling SSH for host %s" (Helpers.get_localhost_uuid ()) ; - Xapi_systemctl.disable ~wait_until_success:false !Xapi_globs.ssh_service ; + if not (Db.Host.get_ssh_auto_mode ~__context ~self) then + Xapi_systemctl.disable ~wait_until_success:false !Xapi_globs.ssh_service ; Xapi_systemctl.stop ~wait_until_success:false !Xapi_globs.ssh_service ; Db.Host.set_ssh_enabled ~__context ~self ~value:false with e -> @@ -3123,7 +3152,7 @@ let disable_ssh_internal ~__context ~self = (Printexc.to_string e) ; Helpers.internal_error "Failed to disable SSH: %s" (Printexc.to_string e) -let schedule_disable_ssh_job ~__context ~self ~timeout = +let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = let host_uuid = Helpers.get_localhost_uuid () in let expiry_time = match @@ -3152,7 +3181,11 @@ let schedule_disable_ssh_job ~__context ~self ~timeout = Xapi_stdext_threads_scheduler.Scheduler.add_to_queue !Xapi_globs.job_for_disable_ssh Xapi_stdext_threads_scheduler.Scheduler.OneShot (Int64.to_float timeout) - (fun () -> disable_ssh_internal ~__context ~self + (fun () -> + disable_ssh_internal ~__context ~self ; + (* re-enable SSH auto mode if it was enabled before calling host.enable_ssh *) + if auto_mode then + set_ssh_auto_mode ~__context ~self ~value:true ) ; Db.Host.set_ssh_expiry ~__context ~self ~value:expiry_time @@ -3161,6 +3194,10 @@ let enable_ssh ~__context ~self = try debug "Enabling SSH for host %s" (Helpers.get_localhost_uuid ()) ; + let cached_ssh_auto_mode = Db.Host.get_ssh_auto_mode ~__context ~self in + (* Disable SSH auto mode when SSH is enabled manually *) + set_ssh_auto_mode ~__context ~self ~value:false ; + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; Xapi_systemctl.start ~wait_until_success:false !Xapi_globs.ssh_service ; @@ -3171,6 +3208,7 @@ let enable_ssh ~__context ~self = !Xapi_globs.job_for_disable_ssh | t -> schedule_disable_ssh_job ~__context ~self ~timeout:t + ~auto_mode:cached_ssh_auto_mode ) ; Db.Host.set_ssh_enabled ~__context ~self ~value:true @@ -3208,7 +3246,7 @@ let set_ssh_enabled_timeout ~__context ~self ~value = !Xapi_globs.job_for_disable_ssh ; Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch | t -> - schedule_disable_ssh_job ~__context ~self ~timeout:t + schedule_disable_ssh_job ~__context ~self ~timeout:t ~auto_mode:false let set_console_idle_timeout ~__context ~self ~value = let assert_timeout_valid timeout = @@ -3243,5 +3281,3 @@ let set_console_idle_timeout ~__context ~self ~value = error "Failed to configure console timeout: %s" (Printexc.to_string e) ; Helpers.internal_error "Failed to set console timeout: %Ld: %s" value (Printexc.to_string e) - -let set_ssh_auto_mode ~__context ~self:_ ~value:_ = () diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 4b0c53c14bf..481b4699d57 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -580,7 +580,11 @@ val set_console_idle_timeout : __context:Context.t -> self:API.ref_host -> value:int64 -> unit val schedule_disable_ssh_job : - __context:Context.t -> self:API.ref_host -> timeout:int64 -> unit + __context:Context.t + -> self:API.ref_host + -> timeout:int64 + -> auto_mode:bool + -> unit val set_ssh_auto_mode : __context:Context.t -> self:API.ref_host -> value:bool -> unit diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index f394a9ad999..ff7c3187c20 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -90,6 +90,7 @@ let register ~__context = if Int64.compare expiry_time current_time > 0 then let remaining = Int64.sub expiry_time current_time in Xapi_host.schedule_disable_ssh_job ~__context ~self ~timeout:remaining + ~auto_mode:true (* handle the case where XAPI is not active when the SSH timeout expires *) else if Fe_systemctl.is_active ~service:!Xapi_globs.ssh_service then Xapi_host.disable_ssh ~__context ~self diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 7640f881bae..ac09ebca7fa 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -4071,6 +4071,13 @@ module Ssh = struct Client.Host.set_console_idle_timeout ~rpc ~session_id ~self ~value ) ~error:Api_errors.set_console_timeout_partially_failed + + let set_ssh_auto_mode ~__context ~self:_ ~value = + operate ~__context + ~action:(fun ~rpc ~session_id ~self -> + Client.Host.set_ssh_auto_mode ~rpc ~session_id ~self ~value + ) + ~error:Api_errors.set_ssh_auto_mode_partially_failed end let enable_ssh = Ssh.enable @@ -4081,4 +4088,4 @@ let set_ssh_enabled_timeout = Ssh.set_enabled_timeout let set_console_idle_timeout = Ssh.set_console_timeout -let set_ssh_auto_mode ~__context ~self:_ ~value:_ = () +let set_ssh_auto_mode = Ssh.set_ssh_auto_mode From 4db3c2ff6ce5985b1df785592cb01c157baa4403 Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Thu, 24 Apr 2025 06:24:56 +0000 Subject: [PATCH 204/492] CP-53724 Add xe CLI commands for setting and querying Dom0 SSH auto mode Updated `records.ml` file to support `host-param-set/get/list` and `pool-param-set/get/list` for ssh-auto-mode. Signed-off-by: Lunfan Zhang --- ocaml/xapi-cli-server/records.ml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index ca5d04b95ac..cc305486761 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -1584,6 +1584,17 @@ let pool_record rpc session_id pool = ~value:(safe_i64_of_string "console-idle-timeout" value) ) () + ; make_field ~name:"ssh-auto-mode" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_auto_mode ~transform:string_of_bool + ~default:inconsistent + ) + ~set:(fun value -> + Client.Pool.set_ssh_auto_mode ~rpc ~session_id ~self:pool + ~value:(safe_bool_of_string "ssh-auto-mode" value) + ) + () ] } @@ -3375,6 +3386,13 @@ let host_record rpc session_id host = ~value:(safe_i64_of_string "console-idle-timeout" value) ) () + ; make_field ~name:"ssh-auto-mode" + ~get:(fun () -> string_of_bool (x ()).API.host_ssh_auto_mode) + ~set:(fun value -> + Client.Host.set_ssh_auto_mode ~rpc ~session_id ~self:host + ~value:(safe_bool_of_string "ssh-auto-mode" value) + ) + () ] } From 179854e5c094f552652d2c72ba130bbd8b26efd4 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Wed, 14 May 2025 23:10:14 +0100 Subject: [PATCH 205/492] CA-410782: Add receive_memory_queues for VM_receive_memory operations Migration spawns 2 operations which depend on each other so we need to ensure there is always space for both of them to prevent a deadlock. Adding VM_receive_memory to a new queue ensures that there will always be a worker for the receive operation so the paired send will never be blocked. Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/xenops_server.ml | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index ae93a2476cc..8bdbf6d376e 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -928,6 +928,12 @@ module Redirector = struct let nested_parallel_queues = {queues= Queues.create (); mutex= Mutex.create ()} + (* We create another queue only for VM_receive_memory operations for the same reason again. + Migration spawns 2 operations, send and receive, so if there is limited available worker space + a deadlock can happen when VMs are migrating between hosts or on localhost migration + as the receiver has no free workers to receive memory. *) + let receive_memory_queues = {queues= Queues.create (); mutex= Mutex.create ()} + (* we do not want to use = when comparing queues: queues can contain (uncomparable) functions, and we are only interested in comparing the equality of their static references *) @@ -1062,6 +1068,7 @@ module Redirector = struct (default.queues :: parallel_queues.queues :: nested_parallel_queues.queues + :: receive_memory_queues.queues :: List.map snd (StringMap.bindings !overrides) ) ) @@ -1297,7 +1304,8 @@ module WorkerPool = struct for _i = 1 to size do incr Redirector.default ; incr Redirector.parallel_queues ; - incr Redirector.nested_parallel_queues + incr Redirector.nested_parallel_queues ; + incr Redirector.receive_memory_queues done let set_size size = @@ -1313,7 +1321,8 @@ module WorkerPool = struct in inner Redirector.default ; inner Redirector.parallel_queues ; - inner Redirector.nested_parallel_queues + inner Redirector.nested_parallel_queues ; + inner Redirector.receive_memory_queues end (* Keep track of which VMs we're rebooting so we avoid transient glitches where @@ -3360,7 +3369,8 @@ let uses_mxgpu id = ) (VGPU_DB.ids id) -let queue_operation_int ?traceparent dbg id op = +let queue_operation_int ?traceparent ?(redirector = Redirector.default) dbg id + op = let task = Xenops_task.add ?traceparent tasks dbg (let r = ref None in @@ -3368,11 +3378,11 @@ let queue_operation_int ?traceparent dbg id op = ) in let tag = if uses_mxgpu id then "mxgpu" else id in - Redirector.push Redirector.default tag (op, task) ; + Redirector.push redirector tag (op, task) ; task -let queue_operation ?traceparent dbg id op = - let task = queue_operation_int ?traceparent dbg id op in +let queue_operation ?traceparent ?redirector dbg id op = + let task = queue_operation_int ?traceparent ?redirector dbg id op in Xenops_task.id_of_handle task let queue_operation_and_wait dbg id op = @@ -3821,7 +3831,12 @@ module VM = struct ; vmr_compressed= compressed_memory } in - let task = Some (queue_operation ?traceparent dbg id op) in + let task = + Some + (queue_operation ?traceparent + ~redirector:Redirector.receive_memory_queues dbg id op + ) + in Option.iter (fun t -> t |> Xenops_client.wait_for_task dbg |> ignore) task From 9ba2d653529a1b266bf6d8f0d0ab6d813cb7aa0c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 20 May 2025 12:08:11 +0100 Subject: [PATCH 206/492] xapi: Cleanup unused functions Helpers.progress' only user was Xapi_vm.immediate_complete, which wasn't used anywhere. Remove both. No functional changes. Signed-off-by: Andrii Sultanov --- ocaml/xapi/helpers.ml | 8 -------- ocaml/xapi/xapi_vm.ml | 3 --- ocaml/xapi/xapi_vm.mli | 2 -- 3 files changed, 13 deletions(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 8271d45c3cf..8e6578cacb2 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -606,14 +606,6 @@ let call_emergency_mode_functions hostname f = (fun () -> f rpc session_id) (fun () -> Client.Client.Session.local_logout ~rpc ~session_id) -let progress ~__context t = - for i = 0 to int_of_float (t *. 100.) do - let v = float_of_int i /. 100. /. t in - TaskHelper.set_progress ~__context v ; - Thread.delay 1. - done ; - TaskHelper.set_progress ~__context 1. - let is_domain_zero_with_record ~__context vm_ref vm_rec = let host_ref = vm_rec.API.vM_resident_on in vm_rec.API.vM_is_control_domain diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 78967197a8f..408c28d0a39 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -89,9 +89,6 @@ let retrieve_wlb_recommendations ~__context ~vm = let assert_agile ~__context ~self = Agility.vm_assert_agile ~__context ~self -(* helpers *) -let immediate_complete ~__context = Helpers.progress ~__context (0.0 -. 1.0) - (* API *) let set_actions_after_crash ~__context ~self ~value = set_actions_after_crash ~__context ~self ~value diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index d0771c49cfa..8559293df97 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -32,8 +32,6 @@ val retrieve_wlb_recommendations : val assert_agile : __context:Context.t -> self:[`VM] Ref.t -> unit -val immediate_complete : __context:Context.t -> unit - val set_actions_after_crash : __context:Context.t -> self:[`VM] Ref.t From f59845e3fda14eaf1a42948ae0c8ce60d0cef372 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 20 May 2025 13:12:51 +0100 Subject: [PATCH 207/492] CP-308075 document changing paths for SM plugins in XS9 The paths to SM plugins in XS9 is about to change. This will be implemented with a conf file in /etc/xapi.conf.d/ and will be easy to overlook when just looking at xapi.conf. Update the comment there. Signed-off-by: Christian Lindig --- scripts/xapi.conf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/xapi.conf b/scripts/xapi.conf index 46f859a8d42..91a5ea40f56 100644 --- a/scripts/xapi.conf +++ b/scripts/xapi.conf @@ -159,7 +159,8 @@ sparse_dd = /usr/libexec/xapi/sparse_dd # Directory containing supplemental pack data # packs-dir = @ETCXENDIR@/installed-repos -# Directory containing SM plugins +# Directory containing SM plugins. This path changes in XenServer 9 with a +# configuration coming from /etc/xapi.conf.d/, which takes precedence # sm-dir = @OPTDIR@/sm # Whitelist of SM plugins From 3be8ea9c867fad477e3ecc7febeeb30c3c3e8cfa Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 15 May 2025 17:10:21 +0100 Subject: [PATCH 208/492] CP-53642: change default NUMA placement policy to best-effort We've seen that using the policy can be up to 10% faster than using any is some workflows, while not observing workflows that were negatively affected. The policy per VM can always be change if need be. Note that currently sometime the best-effort falls back to the same behaviour, especially when restarting on starting more than one VM at a time. This needs xen patches to be fixed: https://lore.kernel.org/xen-devel/20250314172502.53498-1-alejandro.vallejo@cloud.com/T/#ma1246e352ea3cce71c7ddc26d1329a368548b3b2 Now the deprecated numa-placement configuration option for xenopsd does nothing. It was exclusively used to enable Best_effort, since now it's the default, there's no point in setting the option. It's value depends on whether the default option is best_effort or not, as per the spec. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 4 ++-- ocaml/xenopsd/lib/xenopsd.ml | 9 +++++---- ocaml/xenopsd/xc/xenops_server_xen.ml | 2 -- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index ae93a2476cc..d8407d2bda5 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3556,9 +3556,9 @@ module VIF = struct () end -let default_numa_affinity_policy = ref Xenops_interface.Host.Any +let default_numa_affinity_policy = ref Xenops_interface.Host.Best_effort -let numa_placement = ref Xenops_interface.Host.Any +let numa_placement = ref !default_numa_affinity_policy let string_of_numa_affinity_policy = Xenops_interface.Host.(function Any -> "any" | Best_effort -> "best-effort") diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index 276192792d4..9c5e83e04ce 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -59,8 +59,6 @@ let feature_flags_path = ref "/etc/xenserver/features.d" let pvinpvh_xen_cmdline = ref "pv-shim console=xen" -let numa_placement_compat = ref false - (* O(N^2) operations, until we get a xenstore cache, so use a small number here *) let vm_guest_agent_xenstore_quota = ref 128 @@ -242,8 +240,11 @@ let options = , "Command line for the inner-xen for PV-in-PVH guests" ) ; ( "numa-placement" - , Arg.Bool (fun x -> numa_placement_compat := x) - , (fun () -> string_of_bool !numa_placement_compat) + , Arg.Bool (fun _ -> ()) + , (fun () -> + string_of_bool + (!Xenops_server.default_numa_affinity_policy = Best_effort) + ) , "NUMA-aware placement of VMs (deprecated, use XAPI setting)" ) ; ( "pci-quarantine" diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 9eae9cb76b2..3d6b5cf7214 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -5294,8 +5294,6 @@ let init () = {Xs_protocol.ACL.owner= 0; other= Xs_protocol.ACL.READ; acl= []} ) ; Device.Backend.init () ; - Xenops_server.default_numa_affinity_policy := - if !Xenopsd.numa_placement_compat then Best_effort else Any ; info "Default NUMA affinity policy is '%s'" Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; From ef089d4ef978716d453a88a2fd7eb376246b3940 Mon Sep 17 00:00:00 2001 From: Stephen Cheng Date: Fri, 16 May 2025 16:09:23 +0800 Subject: [PATCH 209/492] CP-54275: Add a blocklist mechanism to avoid incorrect/old repo config. This change introduces a new `repository_domain__blocklist` that lists repo URL patterns to be blocked. On XAPI startup, any exsiting pool repository whose URLs matches an entry in this blocklist will be automatically removed. This ensures that, for example, when upgrading from XS8 to XS9, any XS8 repos are purged. Additionally, repository creating now check the same blocklist and rejects any attempt to add a blocked repo. - On startup: read blocklist, delete matching blocked repos - On repository creation: validate against blocklist and abort if matched Signed-off-by: Stephen Cheng --- ocaml/idl/datamodel_errors.ml | 5 ++++ ocaml/tests/test_repository_helpers.ml | 41 ++++++++++++++++++++++++++ ocaml/xapi-consts/api_errors.ml | 2 ++ ocaml/xapi/repository.ml | 2 ++ ocaml/xapi/repository_helpers.ml | 17 +++++++++++ ocaml/xapi/xapi.ml | 29 ++++++++++++++++++ ocaml/xapi/xapi_globs.ml | 12 ++++++++ 7 files changed, 108 insertions(+) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 30c56b21202..9a6cddc9107 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1921,6 +1921,11 @@ let _ = () ; error Api_errors.invalid_base_url ["url"] ~doc:"The base url in the repository is invalid." () ; + error Api_errors.blocked_repo_url ["url"] + ~doc: + "Cannot create the repository as the url is blocked, please check your \ + settings." + () ; error Api_errors.invalid_gpgkey_path ["gpgkey_path"] ~doc:"The GPG public key file name in the repository is invalid." () ; error Api_errors.repository_already_exists ["ref"] diff --git a/ocaml/tests/test_repository_helpers.ml b/ocaml/tests/test_repository_helpers.ml index c05e7c8a63e..d6c8421afdb 100644 --- a/ocaml/tests/test_repository_helpers.ml +++ b/ocaml/tests/test_repository_helpers.ml @@ -253,6 +253,46 @@ module AssertUrlIsValid = Generic.MakeStateless (struct ] end) +module AssertUrlIsNotBlocked = Generic.MakeStateless (struct + module Io = struct + type input_t = string * string list + + type output_t = (unit, exn) result + + let string_of_input_t = Fmt.(str "%a" Dump.(pair string (list string))) + + let string_of_output_t = + Fmt.(str "%a" Dump.(result ~ok:(any "()") ~error:exn)) + end + + let transform (url, url_blocklist) = + Xapi_globs.repository_url_blocklist := url_blocklist ; + try Ok (assert_url_is_not_blocked ~url) with e -> Error e + + let tests = + `QuickAndAutoDocumented + [ + (* no blocklist *) + (("https://test.com", []), Ok ()) + ; (* Not match in blocklist *) + ( ("https://test.com", ["http://blocked.com"; "http://also/blocked.com"]) + , Ok () + ) + ; (* match in blocklist *) + ( ( "http://blocked.com" + , ["http://blocked.com"; "http://also/blocked.com"] + ) + , Error + Api_errors.(Server_error (blocked_repo_url, ["http://blocked.com"])) + ) + ; (* match keyword in blocklist *) + ( ("http://blocked.com", ["private"; "blocked"]) + , Error + Api_errors.(Server_error (blocked_repo_url, ["http://blocked.com"])) + ) + ] +end) + module WriteYumConfig = Generic.MakeStateless (struct module Io = struct (* ( (source_url, binary_url), (need_gpg_check, gpgkey_path) ) *) @@ -4780,6 +4820,7 @@ let tests = [ ("update_of_json", UpdateOfJsonTest.tests) ; ("assert_url_is_valid", AssertUrlIsValid.tests) + ; ("assert_url_is_not_blocked", AssertUrlIsNotBlocked.tests) ; ("write_yum_config", WriteYumConfig.tests) ; ("eval_guidance_for_one_update", EvalGuidanceForOneUpdate.tests) ; ("get_update_in_json", GetUpdateInJson.tests) diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 6a25fbe48c8..3884abeed6f 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1323,6 +1323,8 @@ let configure_repositories_in_progress = let invalid_base_url = add_error "INVALID_BASE_URL" +let blocked_repo_url = add_error "BLOCKED_REPO_URL" + let invalid_gpgkey_path = add_error "INVALID_GPGKEY_PATH" let repository_already_exists = add_error "REPOSITORY_ALREADY_EXISTS" diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index 1ec1486a3e2..ea87a715e17 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -33,6 +33,8 @@ let updates_in_cache : (API.ref_host, Yojson.Basic.t) Hashtbl.t = let introduce ~__context ~name_label ~name_description ~binary_url ~source_url ~update ~gpgkey_path = + assert_url_is_not_blocked ~url:binary_url ; + assert_url_is_not_blocked ~url:source_url ; assert_url_is_valid ~url:binary_url ; assert_url_is_valid ~url:source_url ; assert_gpgkey_path_is_valid gpgkey_path ; diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 62df609c53a..91a3c1b4670 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -209,6 +209,23 @@ let assert_url_is_valid ~url = error "Invalid url %s: %s" url (ExnHelper.string_of_exn e) ; raise Api_errors.(Server_error (invalid_base_url, [url])) +let url_matches ~url (patterns : string list) : bool = + List.exists + (fun pattern -> + try + let re = Re.Perl.re pattern |> Re.compile in + Re.execp re url + with exn -> + error "Exception in %s: %s" __FUNCTION__ (Printexc.to_string exn) ; + false + ) + patterns + +let assert_url_is_not_blocked ~url = + let blocklist = !Xapi_globs.repository_url_blocklist in + if url_matches ~url blocklist then + raise Api_errors.(Server_error (blocked_repo_url, [url])) + let is_gpgkey_path_valid = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index f7ac9b546d3..a12e3ec0c83 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -327,6 +327,31 @@ let server_run_in_emergency_mode () = in wait_to_die () ; exit 0 +let remove_blocked_repositories ~__context () = + try + let blocklist = !Xapi_globs.repository_url_blocklist in + let repos = Db.Repository.get_all ~__context in + let pool = Helpers.get_pool ~__context in + let is_repo_blocked repo = + let binary_url = Db.Repository.get_binary_url ~__context ~self:repo in + let source_url = Db.Repository.get_source_url ~__context ~self:repo in + Repository_helpers.url_matches ~url:binary_url blocklist + || Repository_helpers.url_matches ~url:source_url blocklist + in + let remove_repo repo = + debug "%s Removing repository %s due to it being blocked" __FUNCTION__ + (Ref.string_of repo) ; + try + Xapi_pool.remove_repository ~__context ~self:pool ~value:repo ; + Db.Repository.destroy ~__context ~self:repo + with e -> + debug "%s Failed to remove repository for %s: %s" __FUNCTION__ + (Ref.string_of repo) (Printexc.to_string e) + in + List.filter (fun x -> is_repo_blocked x) repos + |> List.iter (fun x -> remove_repo x) + with e -> error "Exception in %s: %s" __FUNCTION__ (Printexc.to_string e) + let bring_up_management_if ~__context () = try let management_if = @@ -1115,6 +1140,10 @@ let server_init () = , [Startup.OnlyMaster] , Xapi_db_upgrade.hi_level_db_upgrade_rules ~__context ) + ; ( "removing blocked repositories" + , [Startup.OnlyMaster] + , remove_blocked_repositories ~__context + ) ; ( "bringing up management interface" , [] , bring_up_management_if ~__context diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 8bdeac10d06..b183d477ee9 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -932,6 +932,13 @@ let gen_pool_secret_script = ref "/usr/bin/pool_secret_wrapper" let repository_domain_name_allowlist = ref [] +(* + This blocklist aims to prevent the creation of any repository whose URL matches an entry in the blocklist. + Additionally, if an existing repository contains a URL that matches an entry in the blocklist, + it should be removed automatically after xapi is restarted. +*) +let repository_url_blocklist = ref [] + let yum_cmd = ref "/usr/bin/yum" let dnf_cmd = ref "/usr/bin/dnf" @@ -1599,6 +1606,11 @@ let other_options = (fun s -> s) (fun s -> s) repository_domain_name_allowlist + ; gen_list_option "repository-url-blocklist" + "space-separated list of blocked URL patterns in base URL in repository." + (fun s -> s) + (fun s -> s) + repository_url_blocklist ; ( "repository-gpgcheck" , Arg.Set repository_gpgcheck , (fun () -> string_of_bool !repository_gpgcheck) From 222f407f494eb31f85fc4290d4430761ba7a22cc Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 13 May 2025 10:03:36 +0100 Subject: [PATCH 210/492] Minor doc fix Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index a3da3d906d0..a33b311999a 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1170,7 +1170,7 @@ module StorageAPI (R : RPC) = struct (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward - compatibility with old xapis that call Remote.DATA.MIRROR.receive_finalize + compatibility with old xapis that call Remote.DATA.MIRROR.receive_finalize2 during SXM. Use the receive_finalize3 function instead. *) let receive_finalize2 = From f2037bd56bb0522d9bd1de649f7dabc271a1d933 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 17:35:20 +0100 Subject: [PATCH 211/492] Add helper functions to parse nbd info Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index a33b311999a..7dcfedee1b7 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -175,6 +175,9 @@ let parse_nbd_uri nbd = | _ -> fail () +let parse_nbd_uri_opt nbd = + try Some (parse_nbd_uri nbd) with Failure _e -> None + (** Separates the implementations of the given backend returned from the VDI.attach2 SMAPIv2 call based on their type *) let implementations_of_backend backend = @@ -192,6 +195,16 @@ let implementations_of_backend backend = ) ([], [], [], []) backend.implementations +let nbd_export_of_attach_info (backend : backend) = + let _, _, _, nbds = implementations_of_backend backend in + match nbds with + | [] -> + debug "%s no nbd uri found" __FUNCTION__ ; + None + | uri :: _ -> + debug "%s found nbd uri %s" __FUNCTION__ uri.uri ; + parse_nbd_uri_opt uri |> Option.map snd + (** Uniquely identifies the contents of a VDI *) type content_id = string [@@deriving rpcty] From 97ecbf461e47a452950563ce0574e0213cc72d08 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 16:51:30 +0100 Subject: [PATCH 212/492] Introduce DATA.mirror and DATA.stat These two functions are the new SMAPIv3 functions that will enable mirror and query of the mirror status. So implement them in xapi-storage-script. The SMAPIv1 counterparts remain unimplemented. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 47 +++++++++++++++ ocaml/xapi-idl/storage/storage_skeleton.ml | 4 ++ ocaml/xapi-storage-script/main.ml | 63 ++++++++++++++++++++- ocaml/xapi/storage_mux.ml | 20 +++++++ ocaml/xapi/storage_smapiv1.ml | 4 ++ ocaml/xapi/storage_smapiv1_wrapper.ml | 9 ++- 6 files changed, 142 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 7dcfedee1b7..14ca03e6cb8 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -1056,6 +1056,29 @@ module StorageAPI (R : RPC) = struct @-> returning result_p err ) + let operation_p = Param.mk ~name:"operation" Mirror.operation + + let mirror = + declare "DATA.mirror" [] + (dbg_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> url_p + @-> returning operation_p err + ) + + let stat = + let status_p = Param.mk ~name:"status" Mirror.status in + declare "DATA.stat" [] + (dbg_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> operation_p + @-> returning status_p err + ) + (** [import_activate dbg dp sr vdi vm] returns a server socket address to which a fd can be passed via SCM_RIGHTS for mirroring purposes.*) let import_activate = @@ -1626,6 +1649,24 @@ module type Server_impl = sig -> verify_dest:bool -> Task.id + val mirror : + context + -> dbg:debug_info + -> sr:sr + -> vdi:vdi + -> vm:vm + -> dest:string + -> operation + + val stat : + context + -> dbg:debug_info + -> sr:sr + -> vdi:vdi + -> vm:vm + -> key:operation + -> status + val import_activate : context -> dbg:debug_info @@ -1800,6 +1841,12 @@ module Server (Impl : Server_impl) () = struct S.DATA.copy (fun dbg sr vdi vm url dest verify_dest -> Impl.DATA.copy () ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest ) ; + S.DATA.mirror (fun dbg sr vdi vm dest -> + Impl.DATA.mirror () ~dbg ~sr ~vdi ~vm ~dest + ) ; + S.DATA.stat (fun dbg sr vdi vm key -> + Impl.DATA.stat () ~dbg ~sr ~vdi ~vm ~key + ) ; S.DATA.MIRROR.send_start (fun dbg diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index edaf4bc9812..290c09d6230 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -154,6 +154,10 @@ let get_by_name ctx ~dbg ~name = u "get_by_name" module DATA = struct let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = u "DATA.copy" + let mirror ctx ~dbg ~sr ~vdi ~vm ~dest = u "DATA.mirror" + + let stat ctx ~dbg ~sr ~vdi ~vm ~key = u "DATA.stat" + let import_activate ctx ~dbg ~dp ~sr ~vdi ~vm = u "DATA.MIRROR.import_activate" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 0d76c09601f..5e8402c94b2 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -16,6 +16,7 @@ module Plugin_client = Xapi_storage.Plugin.Plugin (Rpc_lwt.GenClient ()) module Volume_client = Xapi_storage.Control.Volume (Rpc_lwt.GenClient ()) module Sr_client = Xapi_storage.Control.Sr (Rpc_lwt.GenClient ()) module Datapath_client = Xapi_storage.Data.Datapath (Rpc_lwt.GenClient ()) +module Data_client = Xapi_storage.Data.Data (Rpc_lwt.GenClient ()) open Private.Lib let ( >>= ) = Lwt.bind @@ -1789,6 +1790,62 @@ end module DATAImpl (M : META) = struct module VDI = VDIImpl (M) + let stat dbg sr vdi' _vm key = + let open Storage_interface in + let convert_key = function + | Mirror.CopyV1 k -> + Data_client.CopyV1 k + | Mirror.MirrorV1 k -> + Data_client.MirrorV1 k + in + + let vdi = Vdi.string_of vdi' in + Attached_SRs.find sr >>>= fun sr -> + VDI.stat ~dbg ~sr ~vdi >>>= fun response -> + ( match + List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys + with + | None -> + return response + | Some temporary -> + VDI.stat ~dbg ~sr ~vdi:temporary + ) + >>>= fun response -> + choose_datapath response >>>= fun (rpc, _datapath, _uri) -> + let key = convert_key key in + return_data_rpc (fun () -> Data_client.stat (rpc ~dbg) dbg key) + >>>= function + | {failed; complete; progress} -> + return Mirror.{failed; complete; progress} + + let stat_impl dbg sr vdi vm key = wrap @@ stat dbg sr vdi vm key + + let mirror dbg sr vdi' vm' remote = + let vdi = Storage_interface.Vdi.string_of vdi' in + let domain = Storage_interface.Vm.string_of vm' in + Attached_SRs.find sr >>>= fun sr -> + VDI.stat ~dbg ~sr ~vdi >>>= fun response -> + ( match + List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys + with + | None -> + return response + | Some temporary -> + VDI.stat ~dbg ~sr ~vdi:temporary + ) + >>>= fun response -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> + return_data_rpc (fun () -> + Data_client.mirror (rpc ~dbg) dbg uri domain remote + ) + >>>= function + | CopyV1 v -> + return (Storage_interface.Mirror.CopyV1 v) + | MirrorV1 v -> + return (Storage_interface.Mirror.MirrorV1 v) + + let mirror_impl dbg sr vdi vm remote = wrap @@ mirror dbg sr vdi vm remote + let data_import_activate_impl dbg _dp sr vdi' vm' = wrap @@ @@ -1855,6 +1912,7 @@ let bind ~volume_script_dir = (* this version field will be updated once query is called *) let version = ref None end in + let u name _ = failwith ("Unimplemented: " ^ name) in let module Query = QueryImpl (RuntimeMeta) in S.Query.query Query.query_impl ; S.Query.diagnostics Query.query_diagnostics_impl ; @@ -1905,10 +1963,12 @@ let bind ~volume_script_dir = S.DP.attach_info DP.dp_attach_info_impl ; let module DATA = DATAImpl (RuntimeMeta) in + S.DATA.copy (u "DATA.copy") ; + S.DATA.mirror DATA.mirror_impl ; + S.DATA.stat DATA.stat_impl ; S.DATA.get_nbd_server DATA.get_nbd_server_impl ; S.DATA.import_activate DATA.data_import_activate_impl ; - let u name _ = failwith ("Unimplemented: " ^ name) in S.get_by_name (u "get_by_name") ; S.VDI.get_by_name (u "VDI.get_by_name") ; S.UPDATES.get (u "UPDATES.get") ; @@ -1918,7 +1978,6 @@ let bind ~volume_script_dir = S.TASK.destroy (u "TASK.destroy") ; S.DP.destroy (u "DP.destroy") ; S.VDI.similar_content (u "VDI.similar_content") ; - S.DATA.copy (u "DATA.copy") ; S.DP.stat_vdi (u "DP.stat_vdi") ; S.DATA.MIRROR.send_start (u "DATA.MIRROR.send_start") ; S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index a523000c7b4..b8d5fc2dd72 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -27,6 +27,8 @@ let s_of_vdi = Storage_interface.Vdi.string_of let s_of_vm = Storage_interface.Vm.string_of +let s_of_operation = Storage_interface.Mirror.show_operation + let with_dbg ~name ~dbg f = Debug_info.with_dbg ~with_thread:true ~module_name:"SMAPIv2" ~name ~dbg f @@ -797,6 +799,24 @@ module Mux = struct let copy () ~dbg = with_dbg ~name:"DATA.copy" ~dbg @@ fun dbg -> Storage_migrate.copy ~dbg + let mirror () ~dbg ~sr ~vdi ~vm ~dest = + with_dbg ~name:"DATA.mirror" ~dbg @@ fun di -> + info "%s dbg:%s sr: %s vdi: %s vm:%s remote:%s" __FUNCTION__ dbg + (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) dest ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.mirror (Debug_info.to_string di) sr vdi vm dest + + let stat () ~dbg ~sr ~vdi ~vm ~key = + with_dbg ~name:"DATA.stat" ~dbg @@ fun di -> + info "%s dbg:%s sr: %s vdi: %s vm: %s opeartion_key: %s" __FUNCTION__ dbg + (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) (s_of_operation key) ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.stat (Debug_info.to_string di) sr vdi vm key + let import_activate () ~dbg ~dp ~sr ~vdi ~vm = with_dbg ~name:"DATA.import_activate" ~dbg @@ fun di -> info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp (s_of_sr sr) diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 7eef88b46e3..b2fe0a6bfd3 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1128,6 +1128,10 @@ module SMAPIv1 : Server_impl = struct let copy _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~url:_ ~dest:_ ~verify_dest:_ = assert false + let mirror _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~dest:_ = assert false + + let stat _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~key:_ = assert false + let import_activate _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false let get_nbd_server _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 397cee17d6a..7066a649ce2 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1137,11 +1137,17 @@ functor end module DATA = struct + let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) + let copy context ~dbg ~sr ~vdi ~vm ~url ~dest = info "DATA.copy dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; Impl.DATA.copy context ~dbg ~sr ~vdi ~vm ~url ~dest + let mirror _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~dest:_ = u "DATA.mirror" + + let stat _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~key:_ = u "DATA.stat" + (* tapdisk supports three kind of nbd servers, the old style nbdserver, the new style nbd server and a real nbd server. The old and new style nbd servers are "special" nbd servers that accept fds passed via SCM_RIGHTS and handle @@ -1186,9 +1192,6 @@ functor module MIRROR = struct type context = unit - let u x = - raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = From 30b8355976485ccc75e77bdbb772f43d04059241 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 17:35:38 +0100 Subject: [PATCH 213/492] Add dummy implementation for VDI.similar_content for SMAPIv3 The similar VDI functionality is uncurrently unused for SMAPIv3 migration so just add a dummy implementation. Signed-off-by: Vincent Liu --- ocaml/xapi-storage-script/main.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 5e8402c94b2..269267001f7 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1754,6 +1754,8 @@ module VDIImpl (M : META) = struct let vdi = Storage_interface.Vdi.string_of vdi in let* () = unset ~dbg ~sr ~vdi ~key:(_sm_config_prefix_key ^ key) in return () + + let similar_content_impl _dbg _sr _vdi = wrap @@ return [] end module DPImpl (M : META) = struct @@ -1957,6 +1959,7 @@ let bind ~volume_script_dir = S.VDI.set_content_id VDI.vdi_set_content_id_impl ; S.VDI.add_to_sm_config VDI.vdi_add_to_sm_config_impl ; S.VDI.remove_from_sm_config VDI.vdi_remove_from_sm_config_impl ; + S.VDI.similar_content VDI.similar_content_impl ; let module DP = DPImpl (RuntimeMeta) in S.DP.destroy2 DP.dp_destroy2 ; @@ -1977,7 +1980,6 @@ let bind ~volume_script_dir = S.DP.diagnostics (u "DP.diagnostics") ; S.TASK.destroy (u "TASK.destroy") ; S.DP.destroy (u "DP.destroy") ; - S.VDI.similar_content (u "VDI.similar_content") ; S.DP.stat_vdi (u "DP.stat_vdi") ; S.DATA.MIRROR.send_start (u "DATA.MIRROR.send_start") ; S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; From 3426f464ccc1443952049d0b5e6c895ced54a4dd Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 15 Apr 2025 17:37:44 +0100 Subject: [PATCH 214/492] CP-307922: Implement SMAPIv3 outbound migration This is the main commit that implements the MIRROR interface in storage_smapiv3_migrate. The exact detail of how SMAPIv3 mirror is done is left in the SXM documentation, but core of it is to provide all the necessary infrastructure to able to call the `Data.mirror` SMAPIv3 call that will mirror a VDI to another. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 15 +- ocaml/xapi/storage_smapiv3_migrate.ml | 296 ++++++++++++++++++++++++-- 2 files changed, 293 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 85f36e31fab..9c9c574ef80 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -186,8 +186,8 @@ module MigrateLocal = struct ~verify_dest in Migrate_Backend.send_start () ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm - ~mirror_id ~local_vdi ~copy_vm ~live_vm:(Vm.of_string "0") ~url - ~remote_mirror ~dest_sr:dest ~verify_dest ; + ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror + ~dest_sr:dest ~verify_dest ; Some (Mirror_id mirror_id) with | Storage_error (Sr_not_attached sr_uuid) -> @@ -196,9 +196,14 @@ module MigrateLocal = struct raise (Api_errors.Server_error (Api_errors.sr_not_attached, [sr_uuid])) | ( Storage_error (Migration_mirror_fd_failure reason) | Storage_error (Migration_mirror_snapshot_failure reason) ) as e -> - error "%s: Caught %s: during storage migration preparation" __FUNCTION__ - reason ; - MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~url ~verify_dest ; + error "%s: Caught %s: during SMAPIv1 storage migration mirror " + __FUNCTION__ reason ; + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~sr ~url ~verify_dest ; + raise e + | Storage_error (Migration_mirror_failure reason) as e -> + error "%s: Caught :%s: during SMAPIv3 storage migration mirror" + __FUNCTION__ reason ; + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~sr ~url ~verify_dest ; raise e | Storage_error (Migration_mirror_copy_failure reason) as e -> error "%s: Caught %s: during storage migration copy" __FUNCTION__ reason ; diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 5ef3eeaac6c..de50fafc02c 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -12,42 +12,312 @@ * GNU Lesser General Public License for more details. *) -module D = Debug.Make (struct let name = "storage_smapiv1_migrate" end) +module D = Debug.Make (struct let name = __MODULE__ end) module Unixext = Xapi_stdext_unix.Unixext module State = Storage_migrate_helper.State module SXM = Storage_migrate_helper.SXM +open Storage_interface +open Storage_task +open Xmlrpc_client +open Storage_migrate_helper module type SMAPIv2_MIRROR = Storage_interface.MIRROR +let s_of_sr = Storage_interface.Sr.string_of + +let s_of_vdi = Storage_interface.Vdi.string_of + +let s_of_vm = Storage_interface.Vm.string_of + +let export_nbd_proxy ~remote_url ~mirror_vm ~sr ~vdi ~dp ~verify_dest = + D.debug "%s spawning exporting nbd proxy" __FUNCTION__ ; + let path = + Printf.sprintf "/var/run/nbdproxy/export/%s" (Vm.string_of mirror_vm) + in + let proxy_srv = Fecomms.open_unix_domain_sock_server path in + try + let uri = + Printf.sprintf "/services/SM/nbdproxy/import/%s/%s/%s/%s" + (Vm.string_of mirror_vm) (Sr.string_of sr) (Vdi.string_of vdi) dp + in + + let dest_url = Http.Url.set_uri (Http.Url.of_string remote_url) uri in + D.debug "%s now waiting for connection at %s" __FUNCTION__ path ; + let nbd_client, _addr = Unix.accept proxy_srv in + D.debug "%s connection accepted" __FUNCTION__ ; + let request = + Http.Request.make + ~query:(Http.Url.get_query_params dest_url) + ~version:"1.0" ~user_agent:"export_nbd_proxy" Http.Put uri + in + D.debug "%s making request to dest %s" __FUNCTION__ + (Http.Url.to_string dest_url) ; + let verify_cert = if verify_dest then Stunnel_client.pool () else None in + let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in + with_transport ~stunnel_wait_disconnect:false transport + (with_http request (fun (_response, s) -> + D.debug "%s starting proxy" __FUNCTION__ ; + Unixext.proxy (Unix.dup s) (Unix.dup nbd_client) + ) + ) ; + Unix.close proxy_srv + with e -> + D.debug "%s did not get connection due to %s, closing" __FUNCTION__ + (Printexc.to_string e) ; + Unix.close proxy_srv ; + raise e + +let mirror_wait ~dbg ~sr ~vdi ~vm ~mirror_id mirror_key = + let rec mirror_wait_rec key = + let {failed; complete; progress} : Mirror.status = + Local.DATA.stat dbg sr vdi vm key + in + if complete then ( + Option.fold ~none:() + ~some:(fun p -> D.info "%s progress is %f" __FUNCTION__ p) + progress ; + D.info "%s completed" __FUNCTION__ + ) else if failed then ( + Option.iter + (fun (snd_state : State.Send_state.t) -> snd_state.failed <- true) + (State.find_active_local_mirror mirror_id) ; + D.info "%s failed" __FUNCTION__ ; + raise + (Storage_interface.Storage_error + (Migration_mirror_failure "Mirror failed during syncing") + ) + ) else ( + Option.fold ~none:() + ~some:(fun p -> D.info "%s progress is %f" __FUNCTION__ p) + progress ; + mirror_wait_rec key + ) + in + + match mirror_key with + | Storage_interface.Mirror.CopyV1 _ -> + () + | Storage_interface.Mirror.MirrorV1 _ -> + D.debug "%s waiting for mirroring to be done" __FUNCTION__ ; + mirror_wait_rec mirror_key + module MIRROR : SMAPIv2_MIRROR = struct type context = unit let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - let send_start _ctx = u __FUNCTION__ + let send_start _ctx ~dbg ~task_id:_ ~dp ~sr ~vdi ~mirror_vm ~mirror_id + ~local_vdi:_ ~copy_vm:_ ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest + = + let nbd_proxy_path = + Printf.sprintf "/var/run/nbdproxy/export/%s" (Vm.string_of mirror_vm) + in + match remote_mirror with + | Mirror.Vhd_mirror _ -> + raise + (Storage_error + (Migration_preparation_failure + "Incorrect remote mirror format for SMAPIv3" + ) + ) + | Mirror.SMAPIv3_mirror {nbd_export; mirror_datapath; mirror_vdi} -> ( + try + let nbd_uri = + Uri.make ~scheme:"nbd+unix" ~host:"" ~path:nbd_export + ~query:[("socket", [nbd_proxy_path])] + () + |> Uri.to_string + in + let _ : Thread.t = + Thread.create + (fun () -> + export_nbd_proxy ~remote_url:url ~mirror_vm ~sr:dest_sr + ~vdi:mirror_vdi.vdi ~dp:mirror_datapath ~verify_dest + ) + () + in - let receive_start _ctx = u __FUNCTION__ + D.info "%s nbd_proxy_path: %s nbd_url %s" __FUNCTION__ nbd_proxy_path + nbd_uri ; + let mk = Local.DATA.mirror dbg sr vdi live_vm nbd_uri in - let receive_start2 _ctx = u __FUNCTION__ + D.debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; + let alm = + State.Send_state. + { + url + ; dest_sr + ; remote_info= + Some + {dp= mirror_datapath; vdi= mirror_vdi.vdi; url; verify_dest} + ; local_dp= dp + ; tapdev= None + ; failed= false + ; watchdog= None + ; vdi + ; live_vm + ; mirror_key= Some mk + } + in + State.add mirror_id (State.Send_op alm) ; + D.debug "%s Updated mirror_id %s in the active local mirror" + __FUNCTION__ mirror_id ; + mirror_wait ~dbg ~sr ~vdi ~vm:live_vm ~mirror_id mk + with e -> + D.error "%s caught exception during mirror: %s" __FUNCTION__ + (Printexc.to_string e) ; + raise + (Storage_interface.Storage_error + (Migration_mirror_failure (Printexc.to_string e)) + ) + ) - let receive_start3 _ctx = u __FUNCTION__ + let receive_start _ctx ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = + u "DATA.MIRROR.receive_start" - let receive_finalize _ctx = u __FUNCTION__ + let receive_start2 _ctx ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ ~vm:_ = + u "DATA.MIRROR.receive_start2" - let receive_finalize2 _ctx = u __FUNCTION__ + let receive_start3 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar:_ ~vm ~url + ~verify_dest = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s vm: %s url: %s verify_dest: %B" + __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + mirror_id (s_of_vm vm) url verify_dest ; + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" + (Storage_utils.connection_args_of_uri ~verify_dest url) + end)) in + let on_fail : (unit -> unit) list ref = ref [] in + try + (* We drop cbt_metadata VDIs that do not have any actual data *) + let (vdi_info : vdi_info) = + {vdi_info with sm_config= [("base_mirror", mirror_id)]} + in + let leaf_dp = Remote.DP.create dbg Uuidx.(to_string (make ())) in + let leaf = Remote.VDI.create dbg sr vdi_info in + D.info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; + on_fail := (fun () -> Remote.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + let backend = Remote.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in + let nbd_export = + match nbd_export_of_attach_info backend with + | None -> + raise + (Storage_error + (Migration_preparation_failure "Cannot parse nbd uri") + ) + | Some export -> + export + in + D.debug "%s activating dp %s sr: %s vdi: %s vm: %s" __FUNCTION__ leaf_dp + (s_of_sr sr) (s_of_vdi leaf.vdi) (s_of_vm vm) ; + Remote.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; + let qcow2_res = + {Mirror.mirror_vdi= leaf; mirror_datapath= leaf_dp; nbd_export} + in + let remote_mirror = Mirror.SMAPIv3_mirror qcow2_res in + D.debug + "%s updating receiving state lcoally to id: %s vm: %s vdi_info: %s" + __FUNCTION__ mirror_id (s_of_vm vm) + (string_of_vdi_info vdi_info) ; + State.add mirror_id + State.( + Recv_op + Receive_state. + { + sr + ; leaf_vdi= qcow2_res.mirror_vdi.vdi + ; leaf_dp= qcow2_res.mirror_datapath + ; remote_vdi= vdi_info.vdi + ; mirror_vm= vm + ; dummy_vdi= + Vdi.of_string "dummy" + (* No dummy_vdi is needed when migrating from SMAPIv3 SRs, having a + "dummy" VDI here is fine as cleanup code for SMAPIv3 will not + access dummy_vdi, and all the clean up functions will ignore + exceptions when trying to clean up the dummy VDIs even if they + do access dummy_vdi. The same applies to parent_vdi *) + ; parent_vdi= Vdi.of_string "dummy" + ; url + ; verify_dest + } + ) ; + remote_mirror + with e -> + List.iter + (fun op -> + try op () + with e -> + D.warn "Caught exception in on_fail: %s performing cleaning up" + (Printexc.to_string e) + ) + !on_fail ; + raise e - let receive_finalize3 _ctx = u __FUNCTION__ + let receive_finalize _ctx ~dbg:_ ~id:_ = u "DATA.MIRROR.receive_finalize" - let receive_cancel _ctx = u __FUNCTION__ - - let receive_cancel2 _ctx = u __FUNCTION__ + let receive_finalize2 _ctx ~dbg:_ ~id:_ = u "DATA.MIRROR.receive_finalize2" - let has_mirror_failed _ctx = u __FUNCTION__ + let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg + mirror_id (s_of_sr sr) url verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let open State.Receive_state in + let recv_state = State.find_active_receive_mirror mirror_id in + Option.iter + (fun r -> + Remote.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + Remote.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + ) + recv_state ; + State.remove_receive_mirror mirror_id - let pre_deactivate_hook _ctx = u __FUNCTION__ + let receive_cancel _ctx = u __FUNCTION__ let list _ctx = u __FUNCTION__ let stat _ctx = u __FUNCTION__ + + let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let receive_state = State.find_active_receive_mirror mirror_id in + let open State.Receive_state in + Option.iter + (fun r -> + D.log_and_ignore_exn (fun () -> Remote.DP.destroy dbg r.leaf_dp false) ; + D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr r.leaf_vdi) + ) + receive_state ; + State.remove_receive_mirror mirror_id + + let has_mirror_failed _ctx ~dbg ~mirror_id ~sr = + match State.find_active_local_mirror mirror_id with + | Some ({mirror_key= Some mk; vdi; live_vm; _} : State.Send_state.t) -> + let {failed; _} : Mirror.status = + Local.DATA.stat dbg sr vdi live_vm mk + in + failed + | _ -> + false + + let pre_deactivate_hook ctx ~dbg ~dp ~sr ~vdi = + D.debug "%s dbg: %s dp: %s sr: %s vdi: %s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) ; + let mirror_id = State.mirror_id_of (sr, vdi) in + D.debug "%s looking for final stats" __FUNCTION__ ; + State.find_active_local_mirror mirror_id + |> Option.iter (fun (s : State.Send_state.t) -> + if has_mirror_failed ctx ~dbg ~mirror_id ~sr then ( + D.error "%s QEMU reports mirroring failed" __FUNCTION__ ; + s.failed <- true + ) ; + Option.iter (Scheduler.cancel scheduler) s.watchdog ; + s.watchdog <- None + ) end From 2364acc686f30a477deab06275a907788eb3d0ca Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 16 Apr 2025 17:03:31 +0100 Subject: [PATCH 215/492] Multiplex receive_cancel2 dummy_vdi and parent_vdi are not created by storage_smapiv3_migrate.receive_start2, so do not attempt to destroy them in storage_smapiv3_migrate.receive_cancel2. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 31 +++++++++------------------ ocaml/xapi/storage_smapiv1_migrate.ml | 22 +++++++++++++++---- ocaml/xapi/storage_smapiv3_migrate.ml | 4 +++- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 9c9c574ef80..02fd5916454 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -46,23 +46,9 @@ module MigrateRemote = struct let (module Migrate_Backend) = choose_backend dbg sr in Migrate_Backend.receive_finalize3 () ~dbg ~mirror_id ~sr ~url ~verify_dest - let receive_cancel2 ~dbg ~mirror_id ~url ~verify_dest = - let (module Remote) = - Storage_migrate_helper.get_remote_backend url verify_dest - in - let receive_state = State.find_active_receive_mirror mirror_id in - let open State.Receive_state in - Option.iter - (fun r -> - D.log_and_ignore_exn (fun () -> Remote.DP.destroy dbg r.leaf_dp false) ; - List.iter - (fun v -> - D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr v) - ) - [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] - ) - receive_state ; - State.remove_receive_mirror mirror_id + let receive_cancel2 ~dbg ~mirror_id ~sr ~url ~verify_dest = + let (module Migrate_Backend) = choose_backend dbg sr in + Migrate_Backend.receive_cancel2 () ~dbg ~mirror_id ~url ~verify_dest end (** This module [MigrateLocal] consists of the concrete implementations of the @@ -107,7 +93,7 @@ module MigrateLocal = struct debug "Snapshot VDI already cleaned up" ) ; try - MigrateRemote.receive_cancel2 ~dbg ~mirror_id:id + MigrateRemote.receive_cancel2 ~dbg ~mirror_id:id ~sr ~url:remote_info.url ~verify_dest:remote_info.verify_dest with _ -> () ) @@ -312,10 +298,11 @@ module MigrateLocal = struct copy_ops ; List.iter (fun (mirror_id, (recv_state : State.Receive_state.t)) -> + let sr, _vdi = State.of_mirror_id mirror_id in debug "Receive in progress: %s" mirror_id ; log_and_ignore_exn (fun () -> - MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~url:recv_state.url - ~verify_dest:recv_state.verify_dest + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~sr + ~url:recv_state.url ~verify_dest:recv_state.verify_dest ) ) recv_ops ; @@ -456,7 +443,9 @@ let stop = MigrateLocal.stop let list = MigrateLocal.list -let killall = MigrateLocal.killall +let killall ~dbg = + with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + MigrateLocal.killall ~dbg:(Debug_info.to_string di) let stat = MigrateLocal.stat diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 8a605fd59be..972ec202917 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -797,10 +797,6 @@ module MIRROR : SMAPIv2_MIRROR = struct receive_state ; State.remove_receive_mirror id - let receive_cancel2 _ctx ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = - (* see Storage_migrate.receive_cancel2 *) - u __FUNCTION__ - exception Timeout of Mtime.Span.t let reqs_outstanding_timeout = Mtime.Span.(150 * s) @@ -875,4 +871,22 @@ module MIRROR : SMAPIv2_MIRROR = struct let list _ctx = u __FUNCTION__ let stat _ctx = u __FUNCTION__ + + let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let receive_state = State.find_active_receive_mirror mirror_id in + let open State.Receive_state in + Option.iter + (fun r -> + D.log_and_ignore_exn (fun () -> Remote.DP.destroy dbg r.leaf_dp false) ; + List.iter + (fun v -> + D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr v) + ) + [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] + ) + receive_state ; + State.remove_receive_mirror mirror_id end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index de50fafc02c..0fb4ae312e6 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -276,13 +276,15 @@ module MIRROR : SMAPIv2_MIRROR = struct recv_state ; State.remove_receive_mirror mirror_id - let receive_cancel _ctx = u __FUNCTION__ + let receive_cancel _ctx ~dbg:_ ~id:_ = u __FUNCTION__ let list _ctx = u __FUNCTION__ let stat _ctx = u __FUNCTION__ let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = + D.debug "%s dbg:%s mirror_id:%s url:%s verify_dest:%B" __FUNCTION__ dbg + mirror_id url verify_dest ; let (module Remote) = Storage_migrate_helper.get_remote_backend url verify_dest in From d91e2bac0ac5e0e5fe12c02d1b8fb6187871fffb Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 17 Apr 2025 15:21:14 +0100 Subject: [PATCH 216/492] Preserve content_id when doing snapshot on SMAPIv3 This is to mimic the behaviour on SMAPIv1. The update_snapshot_info function that runs at the end of migration will check for content_id, and this is needed to make it happy. Signed-off-by: Vincent Liu --- ocaml/xapi-storage-script/main.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 269267001f7..1b15a17f46e 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1457,6 +1457,9 @@ module VDIImpl (M : META) = struct set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key ~key:_snapshot_of_key ~value:vdi >>>= fun () -> + set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key + ~key:_vdi_content_id_key ~value:vdi_info.content_id + >>>= fun () -> let response = { (vdi_of_volume response) with From 5329a2d0737af3f41ea9c144584a99bff672a377 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 24 Apr 2025 13:58:45 +0100 Subject: [PATCH 217/492] Add Nbd parameters into VDI.attach Whilst it is not the default behaviour on XS 8 to attach a VDI through NBD, SXM inbound into a SMAPIv1 SR needs to have nbd enabled for mirroring purposes. As tapdisk will return usable nbd parameters to xapi, they can be included in the return value of attach. Most current users of this return value will keep using blktap2 kernel device and this nbd information is only used during SXM. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_smapiv1.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index b2fe0a6bfd3..0995edc35c4 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -478,6 +478,7 @@ module SMAPIv1 : Server_impl = struct ; backend_type= "vbd3" } ; BlockDevice {path= params} + ; Nbd {uri= attach_info_v1.Smint.params_nbd} ] ) } From 2eff6ab722f631e4345a0cb14e3856094dbceeb6 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 24 Apr 2025 15:14:21 +0100 Subject: [PATCH 218/492] Update the name of the nbd proxy As this nbd proxy is used for importing data, call it `import_nbd_proxy` to distinguish with the `export_nbd_proxy` that will be introduced later on. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 2 +- ocaml/xapi/storage_smapiv1_migrate.ml | 3 ++- ocaml/xapi/xapi_services.ml | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 02fd5916454..51a8995fc22 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -365,7 +365,7 @@ let nbd_handler req s ?(vm = "0") sr vdi dp = (** nbd_proxy is a http handler but will turn the http connection into an nbd connection. It proxies the connection between the sender and the generic nbd server, as returned by [get_nbd_server dp sr vdi vm]. *) -let nbd_proxy req s vm sr vdi dp = +let import_nbd_proxy req s vm sr vdi dp = debug "%s: vm=%s sr=%s vdi=%s dp=%s" __FUNCTION__ vm sr vdi dp ; let sr, vdi = Storage_interface.(Sr.of_string sr, Vdi.of_string vdi) in req.Http.Request.close <- true ; diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 972ec202917..7e438107225 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -203,7 +203,8 @@ module Copy = struct let dest_vdi_url = let url' = Http.Url.of_string url in Http.Url.set_uri url' - (Printf.sprintf "%s/nbdproxy/%s/%s/%s/%s" (Http.Url.get_uri url') + (Printf.sprintf "%s/nbdproxy/import/%s/%s/%s/%s" + (Http.Url.get_uri url') (Storage_interface.Vm.string_of vm) (Storage_interface.Sr.string_of dest) (Storage_interface.Vdi.string_of dest_vdi) diff --git a/ocaml/xapi/xapi_services.ml b/ocaml/xapi/xapi_services.ml index 1612c5050f8..d9fecf45495 100644 --- a/ocaml/xapi/xapi_services.ml +++ b/ocaml/xapi/xapi_services.ml @@ -206,9 +206,9 @@ let put_handler (req : Http.Request.t) s _ = | [""; services; "SM"; "nbd"; vm; sr; vdi; dp] when services = _services -> Storage_migrate.nbd_handler req s ~vm sr vdi dp - | [""; services; "SM"; "nbdproxy"; vm; sr; vdi; dp] + | [""; services; "SM"; "nbdproxy"; "import"; vm; sr; vdi; dp] when services = _services -> - Storage_migrate.nbd_proxy req s vm sr vdi dp + Storage_migrate.import_nbd_proxy req s vm sr vdi dp | _ -> Http_svr.headers s (Http.http_404_missing ~version:"1.0" ()) ; req.Http.Request.close <- true From 0c2660fe6d426b5bb08d84667dba8fb90004206c Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 8 May 2025 15:16:45 +0100 Subject: [PATCH 219/492] Add post_deactivate_hook to storage_mux for SMAPIv3 This is a bit of a layering violation as storage_mux should not care about the version of SMAPI the SR is, nor should it be responsible for calling hook functions. But as there is no way for xapi-storage-script to invoke code in xapi (which would also be a layering violation if it was possible), and smapiv1_wrapper has special state tracking logic for determining whether the hook should be called. Leave the hook here for now. Note the pre_deactivate_hook is not called as currently that remains a noop for SMAPIv3. And as we do not support VM shutdown during outbound SXM for SMAPIv3 anyway, leave a hack in the storage_mux for now until we have a plan on how to support that. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_mux.ml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index b8d5fc2dd72..1ea91e94078 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -647,7 +647,13 @@ module Mux = struct let module C = StorageAPI (Idl.Exn.GenClient (struct let rpc = of_sr sr end)) in - C.VDI.deactivate (Debug_info.to_string di) dp sr vdi vm + C.VDI.deactivate (Debug_info.to_string di) dp sr vdi vm ; + (*XX The hook should not be called here, nor should storage_mux care about + the SMAPI version of the SR, but as xapi-storage-script cannot call code + xapi, and smapiv1_wrapper has state tracking logic, the hook has to be placed + here for now. *) + if smapi_version_of_sr sr = SMAPIv3 then + Storage_migrate.post_deactivate_hook ~sr ~vdi ~dp let detach () ~dbg ~dp ~sr ~vdi ~vm = with_dbg ~name:"VDI.detach" ~dbg @@ fun di -> From 62dd9da248c8c61d379f90caa8d72e7abfbe8713 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Thu, 24 Apr 2025 15:16:51 +0100 Subject: [PATCH 220/492] Move attach/activate for SXM The attach and activate of the VDI being live migrated is there so that the SXM can keep working even if the VM on which the VDI is activated shutsdown. This is possible on SMAPIv1 as tapdisk does not distinguish between different domain paramters. But that is not the case for SMAPIv3. For now just avoid activating the VDI on dom0 since the VM is already activated on the live_vm. This does mean that SXM will stop working if the VM is shut down during storage migration. We will leave that case in the future. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_smapiv1_migrate.ml | 6 ++++++ ocaml/xapi/storage_smapiv3_migrate.ml | 10 ++++++++++ ocaml/xapi/xapi_vm_migrate.ml | 8 -------- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index 7e438107225..c07d098f9ee 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -579,6 +579,12 @@ module MIRROR : SMAPIv2_MIRROR = struct let (module Remote) = Storage_migrate_helper.get_remote_backend url verify_dest in + + let read_write = true in + (* DP set up is only essential for MIRROR.start/stop due to their open ended pattern. + It's not necessary for copy which will take care of that itself. *) + ignore (Local.VDI.attach3 dbg dp sr vdi (Vm.of_string "0") read_write) ; + Local.VDI.activate3 dbg dp sr vdi (Vm.of_string "0") ; match remote_mirror with | Mirror.SMAPIv3_mirror _ -> (* this should never happen *) diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 0fb4ae312e6..035e114c73d 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -110,6 +110,16 @@ module MIRROR : SMAPIv2_MIRROR = struct let send_start _ctx ~dbg ~task_id:_ ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~local_vdi:_ ~copy_vm:_ ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = + D.debug + "%s dbg: %s dp: %s sr: %s vdi:%s mirror_vm:%s mirror_id: %s live_vm: %s \ + url:%s dest_sr:%s verify_dest:%B" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm mirror_vm) + mirror_id (s_of_vm live_vm) url (s_of_sr dest_sr) verify_dest ; + ignore (Local.VDI.attach3 dbg dp sr vdi (Vm.of_string "0") true) ; + (* TODO we are not activating the VDI here because SMAPIv3 does not support + activating the VDI again on dom 0 when it is already activated on the live_vm. + This means that if the VM shutsdown while SXM is in progress the + mirroring for SMAPIv3 will fail.*) let nbd_proxy_path = Printf.sprintf "/var/run/nbdproxy/export/%s" (Vm.string_of mirror_vm) in diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index e28242cadf1..e5eca21283d 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1020,14 +1020,6 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far (* Though we have no intention of "write", here we use the same mode as the associated VBD on a mirrored VDIs (i.e. always RW). This avoids problem when we need to start/stop the VM along the migration. *) - let read_write = true in - (* DP set up is only essential for MIRROR.start/stop due to their open ended pattern. - It's not necessary for copy which will take care of that itself. *) - ignore - (SMAPI.VDI.attach3 dbg new_dp vconf.sr vconf.location vconf.mirror_vm - read_write - ) ; - SMAPI.VDI.activate3 dbg new_dp vconf.sr vconf.location vconf.mirror_vm ; let id = Storage_migrate_helper.State.mirror_id_of (vconf.sr, vconf.location) in From 49037e2c084514af3f213cb82942d7365bee1c64 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 28 Apr 2025 17:00:07 +0100 Subject: [PATCH 221/492] Add mirror_checker for SMAPIv3 migrate There is a mirror_checker/tapdisk_watchdog for SMAPIv1 that periodically checks the status of the mirror and sends an update if it detects a failure. Implement something similar for SMAPIv3 mirror, although this check happens for a shorter period of time compared to the SMAPIv1 tapdisk_watchdog because the `Data.stat` call will stop working once the VM is paused, and currently we have no easy way to terminate this mirror checker just before the VM is paused (in xenopsd). So only do this check whilst the mirror syncing is in progress, i.e. when we are copying over the existing disk content. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_smapiv3_migrate.ml | 31 ++++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index 035e114c73d..d9d34ffbe08 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -77,12 +77,15 @@ let mirror_wait ~dbg ~sr ~vdi ~vm ~mirror_id mirror_key = Option.fold ~none:() ~some:(fun p -> D.info "%s progress is %f" __FUNCTION__ p) progress ; - D.info "%s completed" __FUNCTION__ + D.info "%s qemu mirror %s completed" mirror_id __FUNCTION__ ) else if failed then ( Option.iter (fun (snd_state : State.Send_state.t) -> snd_state.failed <- true) (State.find_active_local_mirror mirror_id) ; - D.info "%s failed" __FUNCTION__ ; + D.info "%s qemu mirror %s failed" mirror_id __FUNCTION__ ; + State.find_active_local_mirror mirror_id + |> Option.iter (fun (s : State.Send_state.t) -> s.failed <- true) ; + Updates.add (Dynamic.Mirror mirror_id) updates ; raise (Storage_interface.Storage_error (Migration_mirror_failure "Mirror failed during syncing") @@ -318,18 +321,16 @@ module MIRROR : SMAPIv2_MIRROR = struct | _ -> false - let pre_deactivate_hook ctx ~dbg ~dp ~sr ~vdi = + (* TODO currently we make the pre_deactivate_hook for SMAPIv3 a noop while for + SMAPIv1 it will do a final check of the state of the mirror and report error + if there is a mirror failure. We leave this for SMAPIv3 because the Data.stat + call, which checks for the state of the mirror stops working once the domain + has been paused, which happens before VDI.deactivate, hence we cannot do this check in + pre_deactivate_hook. Instead we work around this by doing mirror check in mirror_wait + as we repeatedly poll the state of the mirror job. In the future we might + want to invent a different hook that can be called to do a final check just + before the VM is paused. *) + let pre_deactivate_hook _ctx ~dbg ~dp ~sr ~vdi = D.debug "%s dbg: %s dp: %s sr: %s vdi: %s" __FUNCTION__ dbg dp (s_of_sr sr) - (s_of_vdi vdi) ; - let mirror_id = State.mirror_id_of (sr, vdi) in - D.debug "%s looking for final stats" __FUNCTION__ ; - State.find_active_local_mirror mirror_id - |> Option.iter (fun (s : State.Send_state.t) -> - if has_mirror_failed ctx ~dbg ~mirror_id ~sr then ( - D.error "%s QEMU reports mirroring failed" __FUNCTION__ ; - s.failed <- true - ) ; - Option.iter (Scheduler.cancel scheduler) s.watchdog ; - s.watchdog <- None - ) + (s_of_vdi vdi) end From 16cbb0c134dad2d0537f4bdd3a6007a63cc52c7c Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 28 Apr 2025 17:01:16 +0100 Subject: [PATCH 222/492] Cancel the watchdog before deactivating SMAPIv1 VDI Previously the tapdisk watchdog in SMAPIv1 mirroring was cancelled in the `post_deactivate_hook`, but at that point the VDI has already been deactivated, and hence the mirror would have been terminated. Additionally, the last time the stats is retrieved is in `pre_deactivate_hook`, so do this cancelling after the last stats retrival. Note that SMAPIv3 mirror does not have a watchdog due to the limitations of the mirror job auto cancel after guest pause, so instead the mirror checking is only done whilst the mirror syncing (i.e. copying existing disk content) is in progress. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 3 +-- ocaml/xapi/storage_smapiv1_migrate.ml | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 51a8995fc22..1ff03c3d7ed 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -331,8 +331,7 @@ let post_deactivate_hook ~sr ~vdi ~dp:_ = ) ; debug "Finished calling receive_finalize3" ; State.remove_local_mirror id ; - debug "Removed active local mirror: %s" id ; - Option.iter (fun id -> Scheduler.cancel scheduler id) r.watchdog + debug "Removed active local mirror: %s" id ) let nbd_handler req s ?(vm = "0") sr vdi dp = diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index c07d098f9ee..fe291d44d66 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -846,7 +846,10 @@ module MIRROR : SMAPIv2_MIRROR = struct if st.Stats.nbd_mirror_failed = 1 then ( D.error "tapdisk reports mirroring failed" ; s.failed <- true - ) + ) ; + Option.iter + (fun id -> Scheduler.cancel scheduler id) + s.watchdog with | Timeout elapsed -> D.error From b62e9fbfc01fc3049b5a81d2c4873a9c89667b22 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Apr 2025 16:09:10 +0100 Subject: [PATCH 223/492] doc: Move SXM docs to its own dir Signed-off-by: Vincent Liu --- doc/content/xapi/storage/{sxm.md => sxm/index.md} | 0 doc/content/xapi/storage/{ => sxm}/sxm_mux_inbound.svg | 0 doc/content/xapi/storage/{ => sxm}/sxm_mux_outbound.svg | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename doc/content/xapi/storage/{sxm.md => sxm/index.md} (100%) rename doc/content/xapi/storage/{ => sxm}/sxm_mux_inbound.svg (100%) rename doc/content/xapi/storage/{ => sxm}/sxm_mux_outbound.svg (100%) diff --git a/doc/content/xapi/storage/sxm.md b/doc/content/xapi/storage/sxm/index.md similarity index 100% rename from doc/content/xapi/storage/sxm.md rename to doc/content/xapi/storage/sxm/index.md diff --git a/doc/content/xapi/storage/sxm_mux_inbound.svg b/doc/content/xapi/storage/sxm/sxm_mux_inbound.svg similarity index 100% rename from doc/content/xapi/storage/sxm_mux_inbound.svg rename to doc/content/xapi/storage/sxm/sxm_mux_inbound.svg diff --git a/doc/content/xapi/storage/sxm_mux_outbound.svg b/doc/content/xapi/storage/sxm/sxm_mux_outbound.svg similarity index 100% rename from doc/content/xapi/storage/sxm_mux_outbound.svg rename to doc/content/xapi/storage/sxm/sxm_mux_outbound.svg From 2514621e2d67ecb134bc4221ae60547fe565b9a2 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Apr 2025 16:19:54 +0100 Subject: [PATCH 224/492] doc: Add doc on how SMAPIv1 SXM works Signed-off-by: Vincent Liu --- doc/content/xapi/storage/sxm/index.md | 131 +++++++++++++++++- doc/content/xapi/storage/sxm/sxm-final-v1.svg | 4 + .../xapi/storage/sxm/sxm-mirror-v1.svg | 4 + .../xapi/storage/sxm/sxm-new-copy-v1.svg | 4 + .../xapi/storage/sxm/sxm-overview-v1.svg | 4 + .../xapi/storage/sxm/sxm-snapshot-v1.svg | 4 + 6 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 doc/content/xapi/storage/sxm/sxm-final-v1.svg create mode 100644 doc/content/xapi/storage/sxm/sxm-mirror-v1.svg create mode 100644 doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg create mode 100644 doc/content/xapi/storage/sxm/sxm-overview-v1.svg create mode 100644 doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg diff --git a/doc/content/xapi/storage/sxm/index.md b/doc/content/xapi/storage/sxm/index.md index 8b7971bed79..033cc446477 100644 --- a/doc/content/xapi/storage/sxm/index.md +++ b/doc/content/xapi/storage/sxm/index.md @@ -9,6 +9,12 @@ Title: Storage migration - [Thought experiments on an alternative design](#thought-experiments-on-an-alternative-design) - [Design](#design) - [SMAPIv1 migration](#smapiv1-migration) + - [Preparation](#preparation) + - [Establish mirror](#establish-mirror) + - [Mirror](#mirror) + - [Snapshot](#snapshot) + - [Copy and compose](#copy-and-compose) + - [Finish](#finish) - [SMAPIv3 migration](#smapiv3-migration) - [Error Handling](#error-handling) - [Preparation (SMAPIv1 and SMAPIv3)](#preparation-smapiv1-and-smapiv3) @@ -122,10 +128,44 @@ it will be handled just as before. ## SMAPIv1 migration +This section is about migration from SMAPIv1 SRs to SMAPIv1 or SMAPIv3 SRs, since +the migration is driven by the source host, it is usally the source host that +determines most of the logic during a storage migration. + +First we take a look at an overview diagram of what happens during SMAPIv1 SXM: +the diagram is labelled with S1, S2 ... which indicates different stages of the migration. +We will talk about each stage in more detail below. + +![overview-v1](sxm-overview-v1.svg) + +### Preparation + +Before we can start our migration process, there are a number of preparations +needed to prepare for the following mirror. For SMAPIv1 this involves: + +1. Create a new VDI (called leaf) that will be used as the receiving VDI for all the new writes +2. Create a dummy snapshot of the VDI above to make sure it is a differencing disk and can be composed later on +3. Create a VDI (called parent) that will be used to receive the existing content of the disk (the snapshot) + +Note that the leaf VDI needs to be attached and activated on the destination host (to a non-exsiting `mirror_vm`) +since it will later on accept writes to mirror what is written on the source host. + +The parent VDI may be created in two different ways: 1. If there is a "similar VDI", +clone it on the destination host and use it as the parent VDI; 2. If there is no +such VDI, create a new blank VDI. The similarity here is defined by the distances +between different VDIs in the VHD tree, which is exploiting the internal representation +of the storage layer, hence we will not go into too much detail about this here. + +Once these preparations are done, a `mirror_receive_result` data structure is then +passed back to the source host that will contain all the necessary information about +these new VDIs, etc. + +### Establishing mirror + At a high level, mirror establishment for SMAPIv1 works as follows: 1. Take a snapshot of a VDI that is attached to VM1. This gives us an immutable -copy of the current state of the VDI, with all the data until the point we took +copy of the current state of the VDI, with all the data up until the point we took the snapshot. This is illustrated in the diagram as a VDI and its snapshot connecting to a shared parent, which stores the shared content for the snapshot and the writable VDI from which we took the snapshot (snapshot) @@ -135,8 +175,83 @@ client VDI will also be written to the mirrored VDI on the remote host (mirror) 4. Compose the mirror and the snapshot to form a single VDI 5. Destroy the snapshot on the local host (cleanup) +#### Mirror + +The mirroring process for SMAPIv1 is rather unconventional, so it is worth +documenting how this works. Instead of a conventional client server architecture, +where the source client connects to the destination server directly through the +NBD protocol in tapdisk, the connection is established in xapi and then passed +onto tapdisk. It was done in this rather unusual way mainly due to authentication +issues. Because it is xapi that is creating the connection, tapdisk does not need +to be concerned about authentication of the connection, thus simplifying the storage +component. This is reasonable as the storage component should focus on handling +storage requests rather than worrying about network security. + +The diagram below illustrates this prcess. First, xapi on the source host will +initiate an https request to the remote xapi. This request contains the necessary +information about the VDI to be mirrored, and the SR that contains it, etc. This +information is then passed onto the https handler on the destination host (called +`nbd_handler`) which then processes this information. Now the unusual step is that +both the source and the destination xapi will pass this connection onto tapdisk, +by sending the fd representing the socket connection to the tapdisk process. On +the source this would be nbd client process of tapdisk, and on the destination +this would be the nbd server process of the tapdisk. After this step, we can consider +a client-server connection is established between two tapdisks on the client and +server, as if the tapdisk on the source host makes a request to the tapdisk on the +destination host and initiates the connection. On the diagram, this is indicated +by the dashed lines between the tapdisk processes. Logically, we can view this as +xapi creates the connection, and then passes this connection down into tapdisk. + +![mirror](sxm-mirror-v1.svg) + +#### Snapshot + +The next step would be create a snapshot of the VDI. This is easily done as a +`VDI.snapshot` operation. If the VDI was in VHD format, then internally this would +create two children for, one for the snapshot, which only contains the metadata +information and tends to be small, the other for the writable VDI where all the +new writes will go to. The shared base copy contains the shared blocks. + +![snapshot](sxm-snapshot-v1.svg) + +#### Copy and compose + +Once the snapshot is created, we can then copy the snapshot from the source +to the destination. This step is done by `sparse_dd` using the nbd protocol. This +is also the step that takes the most time to complete. + +`sparse_dd` is a process forked by xapi that does the copying of the disk blocks. +`sparse_dd` can supports a number of protocols, including nbd. In this case, `sparse_dd` +will initiate an https put request to the destination host, with a url of the form +`
/services/SM/nbdproxy//`. This https request then +gets handled by the https handler on the destination host B, which will then spawn +a handler thread. This handler will find the +"generic" nbd server[^2] of either tapdisk or qemu-dp, depending on the destination +SR type, and then start proxying data between the https connection socket and the +socket connected to the nbd server. + +[^2]: The server is generic because it does not accept fd passing, and I call those +"special" nbd server/fd receiver. + +![sxm new copy](sxm-new-copy-v1.svg) + +Once copying is done, the snapshot and mirrored VDI can be then composed into a +single VDI. + +#### Finish + +At this point the VDI is synchronised to the new host! Mirror is still working at this point +though because that will not be destroyed until the VM itself has been migrated +as well. Some cleanups are done at this point, such as deleting the snapshot +that is taken on the source, destroying the mirror datapath, etc. + +The end results look like the following. Note that VM2 is in dashed line as it +is not yet created yet. The next steps would be to migrate the VM1 itself to the +destination as well, but this is part of the VM migration process and will not +be covered here. + +![final](sxm-final-v1.svg) -more detail to come... ## SMAPIv3 migration @@ -168,10 +283,10 @@ helps separate the error handling logic into the `with` part of a `try with` blo which is where they are supposed to be. Since we need to accommodate the existing SMAPIv1 migration (which has more stages than SMAPIv3), the following stages are introduced: preparation (v1,v3), snapshot(v1), mirror(v1, v3), copy(v1). Note that -each stage also roughly corresponds to a helper function that is called within `MIRROR.start`, +each stage also roughly corresponds to a helper function that is called within `Storage_migrate.start`, which is the wrapper function that initiates storage migration. And each helper functions themselves would also have error handling logic within themselves as -needed (e.g. see `Storage_smapiv1_migrate.receive_start) to deal with exceptions +needed (e.g. see `Storage_smapiv1_migrate.receive_start`) to deal with exceptions that happen within each helper functions. ### Preparation (SMAPIv1 and SMAPIv3) @@ -215,6 +330,14 @@ failure during copying. ## SMAPIv1 Migration implementation detail +{{% notice info %}} +The following doc refers to the xapi a [version](https://github.com/xapi-project/xen-api/blob/v24.37.0/ocaml/xapi/storage_migrate.ml) +of xapi that is before 24.37 after which point this code structure has undergone +many changes as part of adding support for SMAPIv3 SXM. Therefore the following +tutorial might be less relevant in terms of the implementation detail. Although +the general principle should remain the same. +{{% /notice %}} + ```mermaid sequenceDiagram participant local_tapdisk as local tapdisk diff --git a/doc/content/xapi/storage/sxm/sxm-final-v1.svg b/doc/content/xapi/storage/sxm/sxm-final-v1.svg new file mode 100644 index 00000000000..7cdb2d540a3 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-final-v1.svg @@ -0,0 +1,4 @@ + + + +
VM1
Host1
VDI
Host2
VDI
VM2
SR1
Mirror
SR2
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-mirror-v1.svg b/doc/content/xapi/storage/sxm/sxm-mirror-v1.svg new file mode 100644 index 00000000000..4b6f61131c5 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-mirror-v1.svg @@ -0,0 +1,4 @@ + + + +
xapi
xapi
VDI
VDI
xapi
xapi
tapdisk
tapdisk
Host A
Host A
Host B
Host B
http connection
http connection
pass client socket of the http connection
via SCM_RIGHTS
pass client socket o...
tapdisk
tapdisk
http handler
http handler
pass server socket of the http connection
pass server socket o...
VDI
VDI
mirror
mirror
Text is not SVG - cannot display
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg b/doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg new file mode 100644 index 00000000000..891913850d3 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg @@ -0,0 +1,4 @@ + + + +
xapi
xapi
Host A
Host B
tapdisk
http connection
qemu-dp
generic nbd server
generic nbd server
proxy
sparse_dd
http handler
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-overview-v1.svg b/doc/content/xapi/storage/sxm/sxm-overview-v1.svg new file mode 100644 index 00000000000..b6002382db2 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-overview-v1.svg @@ -0,0 +1,4 @@ + + + +
VM1
Host1
VDI
VDI snapshot
Host2
VDI
VDI snapshot
VM2
SR1
SR2
S2:Mirror
S1:Snapshot
S3: Copy
S4: Compose
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg b/doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg new file mode 100644 index 00000000000..5fe0f398c17 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg @@ -0,0 +1,4 @@ + + + +
VDI
VDI snapshot
base
\ No newline at end of file From 5a0babd7bc4af69a0b014981739d4e58eb4473dd Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Apr 2025 17:06:47 +0100 Subject: [PATCH 225/492] doc: Add doc for SMAPIv3 SXM Signed-off-by: Vincent Liu --- doc/content/xapi/storage/sxm/index.md | 106 +++++++++++++++++- .../xapi/storage/sxm/sxm-mirror-v3.svg | 4 + 2 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 doc/content/xapi/storage/sxm/sxm-mirror-v3.svg diff --git a/doc/content/xapi/storage/sxm/index.md b/doc/content/xapi/storage/sxm/index.md index 033cc446477..4a8a68ced52 100644 --- a/doc/content/xapi/storage/sxm/index.md +++ b/doc/content/xapi/storage/sxm/index.md @@ -10,12 +10,16 @@ Title: Storage migration - [Design](#design) - [SMAPIv1 migration](#smapiv1-migration) - [Preparation](#preparation) - - [Establish mirror](#establish-mirror) + - [Establishing mirror](#establishing-mirror) - [Mirror](#mirror) - [Snapshot](#snapshot) - [Copy and compose](#copy-and-compose) - [Finish](#finish) - [SMAPIv3 migration](#smapiv3-migration) + - [Preparation](#preparation-1) + - [Establishing mirror](#establishing-mirror-1) + - [Limitations](#limitations) + - [Finish](#finish-1) - [Error Handling](#error-handling) - [Preparation (SMAPIv1 and SMAPIv3)](#preparation-smapiv1-and-smapiv3) - [Snapshot and mirror failure (SMAPIv1)](#snapshot-and-mirror-failure-smapiv1) @@ -255,7 +259,94 @@ be covered here. ## SMAPIv3 migration -More detail to come... +This section covers the mechanism of migrations *from* SRs using SMAPIv3 (to +SMAPIv1 or SMAPIv3). Although the core ideas are the same, SMAPIv3 has a rather +different mechanism for mirroring: 1. it does not require xapi to take snapshot +of the VDI anymore, since the mirror itself will take care of replicating the +existing data to the destination; 2. there is no fd passing for connection establishment anymore, and instead proxies are used for connection setup. + +### Preparation + +The preparation work for SMAPIv3 is greatly simplified by the fact that the mirror +at the storage layer will copy the existing data in the VDI to the destination. +This means that snapshot of the source VDI is not required anymore. So we are left +with only one thing: + +1. Create a VDI used for mirroring the data of the source VDI + +For this reason, the implementation logic for SMAPIv3 preparation is also shorter, +as the complexity is now handled by the storage layer, which is where it is supposed +to be handled. + +### Establishing mirror + +The other significant difference is that the storage backend for SMAPIv3 `qemu-dp` +SRs no longer accepts fds, so xapi needs to proxy the data between two nbd client +and nbd server. + +SMAPIv3 provides the `Data.mirror uri domain remote` which needs three parameters: +`uri` for accessing the local disk, `doamin` for the domain slice on which mirroring +should happen, and most importantly for this design, a `remote` url which represents +the remote nbd server to which the blocks of data can be sent to. + +This function itself, when called by xapi and forwarded to the storage layer's qemu-dp +nbd client, will initiate a nbd connection to the nbd server pointed to by `remote`. +This works fine when the storage migration happens entirely within a local host, +where qemu-dp's nbd client and nbd server can communicate over unix domain sockets. +However, it does not work for inter-host migrations as qemu-dp's nbd server is not +exposed publicly over the network (just as tapdisk's nbd server). Therefore a proxying +service on the source host is needed for forwarding the nbd connection from the +source host to the destination host. And it would be the responsiblity of +xapi to manage this proxy service. + +The following diagram illustrates the mirroring process of a single VDI: + +![sxm mirror](sxm-mirror-v3.svg) + +The first step for xapi is then to set up a nbd proxy thread that will be listening +on a local unix domain socket with path `/var/run/nbdproxy/export/` where +domain is the `domain` parameter mentioned above in `Data.mirror`. The nbd proxy +thread will accept nbd connections (or rather any connections, it does not +speak/care about nbd protocol at all) and sends an https put request +to the remote xapi. The proxy itself will then forward the data exactly as it is +to the remote side through the https connection. + +Once the proxy is set up, xapi will call `Data.mirror`, which +will be forwarded to the xapi-storage-script and is further forwarded to the qemu-dp. +This call contains, among other parameters, the destination NBD server url (`remote`) +to be connected. In this case the destination nbd server is exactly the domain +socket to which the proxy thread is listening. Therefore the `remote` parameter +will be of the form `nbd+unix:///?socket=` where the export is provided +by the destination nbd server that represents the VDI prepared on the destination +host, and the socket will be the path of the unix domain socket where the proxy +thread (which we just created) is listening at. + +When this connection is set up, the proxy process will talk to the remote xapi via +https requests, and on the remote side, an https handler will proxy this request to +the appropriate nbd server of either tapdisk or qemu-dp, using exactly the same +[import proxy](#copy-and-compose) as mentioned before. + +Note that this proxying service is tightly integrated with outbound SXM of SMAPIv3 +SRs. This is to make it simple to focus on the migration itself. + +Although there is no need to explicitly copy the VDI anymore, we still need to +transfer the data and wait for it finish. For this we use `Data.stat` call provided +by the storage backend to query the status of the mirror, and wait for it to finish +as needed. + +#### Limitations + +This way of establishing the connection simplifies the implementation of the migration +for SMAPIv3, but it also has limitations: + +One proxy per live VDI migration is needed, which can potentially consume lots of resources in dom0, and we should measure the impact of this before we switch to using more resource-efficient ways such as wire guard that allows establishing a single connection between multiple hosts. + + +### Finish + +As there is no need to copy a VDI, there is also no need to compose or delete the +snapshot. The cleanup procedure would therefore just involve destroy the datapath +that was used for receiving writes for the mirrored VDI. ## Error Handling @@ -318,7 +409,16 @@ are migrating from. ### Mirror failure (SMAPIv3) -To be filled... +The `Data.stat` call in SMAPIv3 returns a data structure that includes the current +progress of the mirror job, whether it has completed syncing the existing data and +whether the mirorr has failed. Similar to how it is done in SMAPIv1, we wait for +the sync to complete once we issue the `Data.mirror` call, by repeatedly polling +the status of the mirror using the `Data.stat` call. During this process, the status +of the mirror is also checked and if a failure is detected, a `Migration_mirror_failure` +will be raised and then gets handled by the code in `storage_migrate.ml` by calling +`Storage_smapiv3_migrate.receive_cancel2`, which will clean up the mirror datapath +and destroy the mirror VDI, similar to what is done in SMAPIv1. + ### Copy failure (SMAPIv1) diff --git a/doc/content/xapi/storage/sxm/sxm-mirror-v3.svg b/doc/content/xapi/storage/sxm/sxm-mirror-v3.svg new file mode 100644 index 00000000000..8ed03406acc --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-mirror-v3.svg @@ -0,0 +1,4 @@ + + + +
xapi
xapi
Source Host A
Destination Host B
tapdisk
qemu-dp
generic nbd server
generic nbd server
xapi-storage-script
Data.mirror 
qemu-dp 
nbd client
Data.mirror 
nbd exporting proxy
http handler
http request
nbd import proxy
Legend
belongs/spawns
talks to
\ No newline at end of file From f4b1c5ec13415651b21402936082aeddecae0163 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 21 May 2025 18:19:02 +0100 Subject: [PATCH 226/492] SXM: Keep previous http handler for back-compat In commit 2eff6ab722f631e4345a0cb14e3856094dbceeb6, the http handler was renamed to add an "import" in the url, but we need to keep the previous one for backwards compatability. This is so that previous versions of sparse_dd in XS 8 can migrate to the latest one. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_services.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/xapi_services.ml b/ocaml/xapi/xapi_services.ml index d9fecf45495..ca9e3d729ca 100644 --- a/ocaml/xapi/xapi_services.ml +++ b/ocaml/xapi/xapi_services.ml @@ -206,6 +206,7 @@ let put_handler (req : Http.Request.t) s _ = | [""; services; "SM"; "nbd"; vm; sr; vdi; dp] when services = _services -> Storage_migrate.nbd_handler req s ~vm sr vdi dp + | [""; services; "SM"; "nbdproxy"; vm; sr; vdi; dp] | [""; services; "SM"; "nbdproxy"; "import"; vm; sr; vdi; dp] when services = _services -> Storage_migrate.import_nbd_proxy req s vm sr vdi dp From b947e1e4bc48e97366a7816405ce488460e5cb1a Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Thu, 8 May 2025 16:31:58 +0800 Subject: [PATCH 227/492] CA-409482: Using computed delay for RRD loop RRD loop is executed each 5 seconds. It delays fixed 5 seconds between each loop. But the loop self also consumes time (The time consuming depends on CPU's count. If there are many CPUs, the time consuming may be hundreds milliseconds). This implementation leads RRD will take an offset after several loops. Then one of RRD data lose and a gap can be observed on XenCenter performance graph. The solution is to use a fixed deadline as each iteration start time and to use a computed delay (timeslice - loop time consuming) instead of fixed delay. Signed-off-by: Bengang Yuan --- ocaml/xcp-rrdd/bin/rrdd/dune | 5 ++-- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 8 +++++-- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 13 ++++++----- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 32 ++++++++++++++++++++------ 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/dune b/ocaml/xcp-rrdd/bin/rrdd/dune index b8419b12fb8..d84e06e46fd 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/dune +++ b/ocaml/xcp-rrdd/bin/rrdd/dune @@ -10,8 +10,8 @@ http_lib httpsvr inotify - mtime - mtime.clock.os + clock + mtime.clock rpclib.core rrd-transport rrd-transport.lib @@ -46,6 +46,7 @@ http_lib httpsvr inotify + clock rpclib.core rpclib.json rpclib.xml diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 6e11a2da31c..6a1212f178a 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -716,8 +716,12 @@ module Plugin = struct let next_reading (uid : P.uid) : float = let open Rrdd_shared in if with_lock registered_m (fun _ -> Hashtbl.mem registered uid) then - with_lock last_loop_end_time_m (fun _ -> - !last_loop_end_time +. !timeslice -. Unix.gettimeofday () + with_lock next_iteration_start_m (fun _ -> + match Clock.Timer.remaining !next_iteration_start with + | Remaining diff -> + Clock.Timer.span_to_s diff + | Expired diff -> + Clock.Timer.span_to_s diff *. -1. ) else -1. diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 8800ed56836..816860e5815 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -20,14 +20,15 @@ module StringSet = Set.Make (String) (* Whether to enable all non-default datasources *) let enable_all_dss = ref false -(* The time between each monitoring loop. *) -let timeslice : float ref = ref 5. +(* The expected time span between each monitoring loop. *) +let timeslice : Mtime.span ref = ref Mtime.Span.(5 * s) -(* Timestamp of the last monitoring loop end. *) -let last_loop_end_time : float ref = ref neg_infinity +(* A timer that expires at the start of the next iteration *) +let next_iteration_start : Clock.Timer.t ref = + ref (Clock.Timer.start ~duration:!timeslice) -(* The mutex that protects the last_loop_end_time against data corruption. *) -let last_loop_end_time_m : Mutex.t = Mutex.create () +(* The mutex that protects the next_iteration_start against data corruption. *) +let next_iteration_start_m : Mutex.t = Mutex.create () (** Cache memory/target values *) let memory_targets : (int, int64) Hashtbl.t = Hashtbl.create 20 diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 448dc98f9cb..75108465907 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -538,18 +538,36 @@ let monitor_write_loop writers = while true do try do_monitor_write xc writers ; - with_lock Rrdd_shared.last_loop_end_time_m (fun _ -> - Rrdd_shared.last_loop_end_time := Unix.gettimeofday () + with_lock Rrdd_shared.next_iteration_start_m (fun _ -> + Rrdd_shared.next_iteration_start := + Clock.Timer.extend_by !Rrdd_shared.timeslice + !Rrdd_shared.next_iteration_start ) ; - Thread.delay !Rrdd_shared.timeslice + match Clock.Timer.remaining !Rrdd_shared.next_iteration_start with + | Remaining remaining -> + Thread.delay (Clock.Timer.span_to_s remaining) + | Expired missed_by -> + warn + "%s: Monitor write iteration missed cycle by %a, skipping \ + the delay" + __FUNCTION__ Debug.Pp.mtime_span missed_by ; + (* To avoid to use up 100% CPU when the timer is already + expired, still delay 1s *) + Thread.delay 1. with e -> Backtrace.is_important e ; warn - "Monitor/write thread caught an exception. Pausing for 10s, \ - then restarting: %s" - (Printexc.to_string e) ; + "%s: Monitor/write thread caught an exception. Pausing for \ + 10s, then restarting: %s" + __FUNCTION__ (Printexc.to_string e) ; log_backtrace e ; - Thread.delay 10. + Thread.delay 10. ; + with_lock Rrdd_shared.next_iteration_start_m (fun _ -> + Rrdd_shared.next_iteration_start := + Clock.Timer.extend_by + Mtime.Span.(10 * s) + !Rrdd_shared.next_iteration_start + ) done ) ) From fadf70636a5b053639b1468f005cfa17cd12a107 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 20 May 2025 09:30:59 +0100 Subject: [PATCH 228/492] CA-411319: Concurrent `VM.assert_can_migrate` failure When the customers open "Migrate VM Wizard" on XenCenter, XenCenter will call `VM.assert_can_migrate` to check each host in each pool connected to XenCenter if the VM can be migrated to it. The API `VM.assert_can_migrate` then calls `VM.export_metadata`. `VM.export_metadata` will lock VM. During this time, other `VM.export_metadata` requests will fail as they can't get VM lock. The solution is to add retry when failing to lock VM. Signed-off-by: Bengang Yuan --- ocaml/xapi/export.ml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/export.ml b/ocaml/xapi/export.ml index 81dcb22bc44..3c00b544f73 100644 --- a/ocaml/xapi/export.ml +++ b/ocaml/xapi/export.ml @@ -713,11 +713,15 @@ open Http open Client let lock_vm ~__context ~vm ~task_id op = - (* Note slight race here because we haven't got the master lock *) - Xapi_vm_lifecycle.assert_operation_valid ~__context ~self:vm ~op ~strict:true ; - (* ... small race lives here ... *) - Db.VM.add_to_current_operations ~__context ~self:vm ~key:task_id ~value:op ; - Xapi_vm_lifecycle.update_allowed_operations ~__context ~self:vm + Helpers.retry ~__context ~doc:task_id ~policy:Helpers.Policy.fail_quickly + (fun () -> + (* Note slight race here because we haven't got the master lock *) + Xapi_vm_lifecycle.assert_operation_valid ~__context ~self:vm ~op + ~strict:true ; + (* ... small race lives here ... *) + Db.VM.add_to_current_operations ~__context ~self:vm ~key:task_id ~value:op ; + Xapi_vm_lifecycle.update_allowed_operations ~__context ~self:vm + ) let unlock_vm ~__context ~vm ~task_id = Db.VM.remove_from_current_operations ~__context ~self:vm ~key:task_id ; From 50bc09a52261851f9fbfb56b1697a5270c6fc073 Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Tue, 20 May 2025 09:48:48 +0000 Subject: [PATCH 229/492] CP-54382 Set Different Auto-Mode Default Values for XS8 and XS9 - For XS8, a configuration file (/etc/xapi.conf.d/ssh-auto-mode.conf) will be loaded, and auto_mode will be set to false. - For XS9, no configuration file exists, so the default value will be set to true. Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi/dbsync_slave.ml | 16 ++++++++++++++-- ocaml/xapi/xapi_globs.ml | 8 ++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 900e8a1ac04..9aff8823ea9 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -64,7 +64,7 @@ let create_localhost ~__context info = ~ssh_enabled_timeout:Constants.default_ssh_enabled_timeout ~ssh_expiry:Date.epoch ~console_idle_timeout:Constants.default_console_idle_timeout - ~ssh_auto_mode:Constants.default_ssh_auto_mode + ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default in () @@ -384,7 +384,19 @@ let update_env __context sync_keys = switched_sync Xapi_globs.sync_ssh_status (fun () -> let ssh_service = !Xapi_globs.ssh_service in let status = Fe_systemctl.is_active ~service:ssh_service in - Db.Host.set_ssh_enabled ~__context ~self:localhost ~value:status + Db.Host.set_ssh_enabled ~__context ~self:localhost ~value:status ; + let auto_mode_in_db = + Db.Host.get_ssh_auto_mode ~__context ~self:localhost + in + let ssh_monitor_enabled = + Fe_systemctl.is_active ~service:!Xapi_globs.ssh_monitor_service + in + (* For xs9 when fresh install, the ssh_monitor service is not enabled by default. + If the auto_mode is enabled, we need to enable the ssh_monitor service. + and user may have disabled monitor service by mistake as well, so we need to check the status. *) + if auto_mode_in_db <> ssh_monitor_enabled then + Xapi_host.set_ssh_auto_mode ~__context ~self:localhost + ~value:auto_mode_in_db ) ; remove_pending_guidances ~__context diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index a8b2d8485ca..302ceca5f24 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1299,6 +1299,8 @@ let ssh_service = ref "sshd" let ssh_monitor_service = ref "xapi-ssh-monitor" +let ssh_auto_mode_default = ref true + (* Fingerprint of default patch key *) let citrix_patch_key = "NERDNTUzMDMwRUMwNDFFNDI4N0M4OEVCRUFEMzlGOTJEOEE5REUyNg==" @@ -1743,6 +1745,12 @@ let other_options = , (fun () -> string_of_bool !validate_reusable_pool_session) , "Enable validation of reusable pool sessions before use" ) + ; ( "ssh-auto-mode" + , Arg.Bool (fun b -> ssh_auto_mode_default := b) + , (fun () -> string_of_bool !ssh_auto_mode_default) + , "Defaults to true; overridden to false via \ + /etc/xapi.conf.d/ssh-auto-mode.conf(e.g., in XenServer 8)" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. From e114448d79bbb25da7d060ba5fae4415d4810732 Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Wed, 21 May 2025 02:12:52 +0000 Subject: [PATCH 230/492] CP-54382 Reconfigure Auto mode when pool join and pool eject - Copy auto mode setting from pool coordinator in pool join - Restore auto mode setting to default when pool eject Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi/xapi_pool.ml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index ac09ebca7fa..b669a6738df 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1670,10 +1670,15 @@ let join_common ~__context ~master_address ~master_username ~master_password Client.Host.get_console_idle_timeout ~rpc ~session_id ~self:remote_coordinator in + let ssh_auto_mode = + Client.Host.get_ssh_auto_mode ~rpc ~session_id + ~self:remote_coordinator + in Xapi_host.set_console_idle_timeout ~__context ~self:me ~value:console_idle_timeout ; Xapi_host.set_ssh_enabled_timeout ~__context ~self:me ~value:ssh_enabled_timeout ; + Xapi_host.set_ssh_auto_mode ~__context ~self:me ~value:ssh_auto_mode ; let ssh_enabled = Client.Host.get_ssh_enabled ~rpc ~session_id ~self:remote_coordinator @@ -2056,6 +2061,8 @@ let eject_self ~__context ~host = (* Restore SSH service to default state *) Xapi_host.set_ssh_enabled_timeout ~__context ~self:host ~value:Constants.default_ssh_enabled_timeout ; + Xapi_host.set_ssh_auto_mode ~__context ~self:host + ~value:!Xapi_globs.ssh_auto_mode_default ; match Constants.default_ssh_enabled with | true -> Xapi_host.enable_ssh ~__context ~self:host From 84ecb1eaa092cb8c517132cdc37e9640e6f23162 Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Wed, 14 May 2025 09:28:33 +0000 Subject: [PATCH 231/492] CA-410948 Avoid rasie full Exception when disable/enable ssh failed - Refine the exception when host.enable_ssh/host.disable_ssh failed - Reset the host.ssh_expiry to default when host.enabl_ssh with no timeout Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi/xapi_host.ml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index dfccd2ebc73..5efdacf780d 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3121,7 +3121,8 @@ let disable_ssh_internal ~__context ~self = with e -> error "Failed to disable SSH for host %s: %s" (Ref.string_of self) (Printexc.to_string e) ; - Helpers.internal_error "Failed to disable SSH: %s" (Printexc.to_string e) + Helpers.internal_error "Failed to disable SSH access, host: %s" + (Ref.string_of self) let schedule_disable_ssh_job ~__context ~self ~timeout = let host_uuid = Helpers.get_localhost_uuid () in @@ -3168,7 +3169,8 @@ let enable_ssh ~__context ~self = ( match timeout with | 0L -> Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue - !Xapi_globs.job_for_disable_ssh + !Xapi_globs.job_for_disable_ssh ; + Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch | t -> schedule_disable_ssh_job ~__context ~self ~timeout:t ) ; @@ -3177,7 +3179,8 @@ let enable_ssh ~__context ~self = with e -> error "Failed to enable SSH on host %s: %s" (Ref.string_of self) (Printexc.to_string e) ; - Helpers.internal_error "Failed to enable SSH: %s" (Printexc.to_string e) + Helpers.internal_error "Failed to enable SSH access, host: %s" + (Ref.string_of self) let disable_ssh ~__context ~self = Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue From d823e045d9ebba2372172031b2fbc414bb645433 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Thu, 22 May 2025 16:58:03 +0100 Subject: [PATCH 232/492] Improve the xapi_observer debug logs by adding more context Signed-off-by: Steven Woods --- ocaml/xapi/xapi_observer.ml | 66 ++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 404c4496f29..0dd53865307 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -61,56 +61,56 @@ end module Observer : ObserverInterface = struct let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = - debug "Observer.create %s" uuid ; + debug "xapi Observer.create %s" uuid ; Tracing.TracerProvider.create ~uuid ~name_label ~attributes ~endpoints ~enabled let destroy ~__context ~uuid = - debug "Observer.destroy %s" uuid ; + debug "xapi Observer.destroy %s" uuid ; Tracing.TracerProvider.destroy ~uuid let set_enabled ~__context ~uuid ~enabled = - debug "Observer.set_enabled %s" uuid ; + debug "xapi Observer.set_enabled %s" uuid ; Tracing.TracerProvider.set ~uuid ~enabled () let set_attributes ~__context ~uuid ~attributes = - debug "Observer.set_attributes %s" uuid ; + debug "xapi Observer.set_attributes %s" uuid ; Tracing.TracerProvider.set ~uuid ~attributes () let set_endpoints ~__context ~uuid ~endpoints = - debug "Observer.set_endpoints %s" uuid ; + debug "xapi Observer.set_endpoints %s" uuid ; Tracing.TracerProvider.set ~uuid ~endpoints () let init ~__context = - debug "Observer.init" ; + debug "xapi Observer.init" ; ignore @@ Tracing_export.main () let set_trace_log_dir ~__context ~dir = - debug "Observer.set_trace_log_dir" ; + debug "xapi Observer.set_trace_log_dir" ; Tracing_export.Destination.File.set_trace_log_dir dir let set_export_interval ~__context ~interval = - debug "Observer.set_export_interval" ; + debug "xapi Observer.set_export_interval" ; Tracing_export.set_export_interval interval let set_max_spans ~__context ~spans = - debug "Observer.set_max_spans" ; + debug "xapi Observer.set_max_spans" ; Tracing.Spans.set_max_spans spans let set_max_traces ~__context ~traces = - debug "Observer.set_max_traces" ; + debug "xapi Observer.set_max_traces" ; Tracing.Spans.set_max_traces traces let set_max_file_size ~__context ~file_size = - debug "Observer.set_max_file_size" ; + debug "xapi Observer.set_max_file_size" ; Tracing_export.Destination.File.set_max_file_size file_size let set_host_id ~__context ~host_id = - debug "Observer.set_host_id" ; + debug "xapi Observer.set_host_id" ; Tracing_export.set_host_id host_id let set_compress_tracing_files ~__context ~enabled = - debug "Observer.set_compress_tracing_files" ; + debug "xapi Observer.set_compress_tracing_files" ; Tracing_export.Destination.File.set_compress_tracing_files enabled end @@ -142,79 +142,79 @@ module Xapi_cluster = struct module Observer = struct let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = - debug "Observer.create %s" uuid ; + debug "xapi_cluster Observer.create %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.create dbg uuid name_label attributes endpoints enabled let destroy ~__context ~uuid = - debug "Observer.destroy %s" uuid ; + debug "xapi_cluster Observer.destroy %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.destroy dbg uuid let set_enabled ~__context ~uuid ~enabled = - debug "Observer.set_enabled %s" uuid ; + debug "xapi_cluster Observer.set_enabled %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_enabled dbg uuid enabled let set_attributes ~__context ~uuid ~attributes = - debug "Observer.set_attributes %s" uuid ; + debug "xapi_cluster Observer.set_attributes %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_attributes dbg uuid attributes let set_endpoints ~__context ~uuid ~endpoints = - debug "Observer.set_endpoints %s" uuid ; + debug "xapi_cluster Observer.set_endpoints %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_endpoints dbg uuid endpoints let init ~__context = - debug "Observer.init" ; + debug "xapi_cluster Observer.init" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.init dbg let set_trace_log_dir ~__context ~dir = - debug "Observer.set_trace_log_dir" ; + debug "xapi_cluster Observer.set_trace_log_dir" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_trace_log_dir dbg dir let set_export_interval ~__context ~interval = - debug "Observer.set_export_interval" ; + debug "xapi_cluster Observer.set_export_interval" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_export_interval dbg interval let set_max_spans ~__context ~spans = - debug "Observer.set_max_spans" ; + debug "xapi_cluster Observer.set_max_spans" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_max_spans dbg spans let set_max_traces ~__context ~traces = - debug "Observer.set_max_traces" ; + debug "xapi_cluster Observer.set_max_traces" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_max_traces dbg traces let set_max_file_size ~__context ~file_size = - debug "Observer.set_max_file_size" ; + debug "xapi_cluster Observer.set_max_file_size" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_max_file_size dbg file_size let set_host_id ~__context ~host_id = - debug "Observer.set_host_id" ; + debug "xapi_cluster Observer.set_host_id" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_host_id dbg host_id let set_compress_tracing_files ~__context ~enabled = - debug "Observer.set_compress_tracing_files" ; + debug "xapi_cluster Observer.set_compress_tracing_files" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_compress_tracing_files dbg enabled @@ -331,28 +331,40 @@ module Dom0ObserverConfig (ObserverComponent : OBSERVER_COMPONENT) : let create ~__context ~uuid ~name_label:_ ~attributes:_ ~endpoints:_ ~enabled:_ = + debug "%s config Observer.create" (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid - let destroy ~__context ~uuid = remove_config ~uuid + let destroy ~__context ~uuid = + debug "%s config Observer.destroy" (to_string ObserverComponent.component) ; + remove_config ~uuid let set_enabled ~__context ~uuid ~enabled:_ = + debug "%s config Observer.set_enabled" + (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid let set_attributes ~__context ~uuid ~attributes:_ = + debug "%s config Observer.set_attributes" + (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid let set_endpoints ~__context ~uuid ~endpoints:_ = + debug "%s config Observer.set_endpoints" + (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid let init ~__context = + debug "%s config Observer.init" (to_string ObserverComponent.component) ; let observer_all = Db.Observer.get_all ~__context in update_all_configs ~__context ~observer_all let set_trace_log_dir ~__context ~dir:_ = + debug "%s config Observer.set_trace_log_dir" + (to_string ObserverComponent.component) ; let observer_all = Db.Observer.get_all ~__context in update_all_configs ~__context ~observer_all From 13042fd0580fb314ae785aba1b762a8f7a241e6b Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Tue, 27 May 2025 10:44:33 +0100 Subject: [PATCH 233/492] Reduce code duplication by using a common Observer Interface Besides the errors, Xapi_cluster and Xenopsd use the exact same Observer RPC definitions. Add a new Observer error (as the unique errors for cluster/xenops are not applicable to the Observer functions anyway) and use common code to remove this duplication. Signed-off-by: Steven Woods --- ocaml/xapi-idl/cluster/cluster_interface.ml | 77 +--------- ocaml/xapi-idl/lib/dune | 2 +- ocaml/xapi-idl/lib/observer_helpers.ml | 150 ++++++++++++++++++++ ocaml/xapi-idl/xen/xenops_interface.ml | 77 +--------- 4 files changed, 153 insertions(+), 153 deletions(-) create mode 100644 ocaml/xapi-idl/lib/observer_helpers.ml diff --git a/ocaml/xapi-idl/cluster/cluster_interface.ml b/ocaml/xapi-idl/cluster/cluster_interface.ml index a39fc0a2ae9..d537cf0f99e 100644 --- a/ocaml/xapi-idl/cluster/cluster_interface.ml +++ b/ocaml/xapi-idl/cluster/cluster_interface.ml @@ -384,80 +384,5 @@ module LocalAPI (R : RPC) = struct (debug_info_p @-> timeout_p @-> returning result_p err) end - module Observer = struct - open TypeCombinators - - let endpoints_p = Param.mk ~name:"endpoints" (list Types.string) - - let bool_p = Param.mk ~name:"bool" Types.bool - - let uuid_p = Param.mk ~name:"uuid" Types.string - - let name_label_p = Param.mk ~name:"name_label" Types.string - - let dict_p = Param.mk ~name:"dict" dict - - let string_p = Param.mk ~name:"string" Types.string - - let int_p = Param.mk ~name:"int" Types.int - - let float_p = Param.mk ~name:"float" Types.float - - let create = - declare "Observer.create" [] - (debug_info_p - @-> uuid_p - @-> name_label_p - @-> dict_p - @-> endpoints_p - @-> bool_p - @-> returning unit_p err - ) - - let destroy = - declare "Observer.destroy" [] - (debug_info_p @-> uuid_p @-> returning unit_p err) - - let set_enabled = - declare "Observer.set_enabled" [] - (debug_info_p @-> uuid_p @-> bool_p @-> returning unit_p err) - - let set_attributes = - declare "Observer.set_attributes" [] - (debug_info_p @-> uuid_p @-> dict_p @-> returning unit_p err) - - let set_endpoints = - declare "Observer.set_endpoints" [] - (debug_info_p @-> uuid_p @-> endpoints_p @-> returning unit_p err) - - let init = declare "Observer.init" [] (debug_info_p @-> returning unit_p err) - - let set_trace_log_dir = - declare "Observer.set_trace_log_dir" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_export_interval = - declare "Observer.set_export_interval" [] - (debug_info_p @-> float_p @-> returning unit_p err) - - let set_host_id = - declare "Observer.set_host_id" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_max_traces = - declare "Observer.set_max_traces" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_spans = - declare "Observer.set_max_spans" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_file_size = - declare "Observer.set_max_file_size" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_compress_tracing_files = - declare "Observer.set_compress_tracing_files" [] - (debug_info_p @-> bool_p @-> returning unit_p err) - end + module Observer = Observer_helpers.ObserverAPI (R) end diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index 8f0d7ca27de..f0f1f4ce588 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -40,7 +40,7 @@ (wrapped false) (preprocess (per_module - ((pps ppx_deriving_rpc) Xcp_channel Xcp_channel_protocol TypeCombinators) + ((pps ppx_deriving_rpc) Xcp_channel Xcp_channel_protocol TypeCombinators Observer_helpers) ((pps ppx_sexp_conv ppx_deriving_rpc) Xcp_pci)))) (library diff --git a/ocaml/xapi-idl/lib/observer_helpers.ml b/ocaml/xapi-idl/lib/observer_helpers.ml new file mode 100644 index 00000000000..959a666b1c4 --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_helpers.ml @@ -0,0 +1,150 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Rpc +open Idl + +module D = Debug.Make (struct let name = "observer_interface" end) + +open D + +module Errors = struct + type error = + | Internal_error of string + | Unimplemented of string + | Unknown_error + [@@default Unknown_error] [@@deriving rpcty] +end + +exception Observer_error of Errors.error + +let err = + let open Error in + { + def= Errors.error + ; raiser= + (fun e -> + let exn = Observer_error e in + error "%s (%s)" (Printexc.to_string exn) __LOC__ ; + raise exn + ) + ; matcher= + (function + | Observer_error e as exn -> + error "%s (%s)" (Printexc.to_string exn) __LOC__ ; + Some e + | exn -> + error "%s (%s)" (Printexc.to_string exn) __LOC__ ; + Some (Internal_error (Printexc.to_string exn)) + ) + } + +(** An uninterpreted string associated with the operation. *) +type debug_info = string [@@deriving rpcty] + +module ObserverAPI (R : RPC) = struct + open R + open TypeCombinators + + let description = + let open Interface in + { + name= "Observer" + ; namespace= None + ; description= + [ + "This interface is used to create, update and destroy Observers to \ + control the use of tracing in different xapi components" + ] + ; version= (1, 0, 0) + } + + let implementation = implement description + + let dbg_p = Param.mk ~name:"dbg" Types.string + + let unit_p = Param.mk ~name:"unit" Types.unit + + let endpoints_p = Param.mk ~name:"endpoints" (list Types.string) + + let bool_p = Param.mk ~name:"bool" Types.bool + + let uuid_p = Param.mk ~name:"uuid" Types.string + + let name_label_p = Param.mk ~name:"name_label" Types.string + + let dict_p = Param.mk ~name:"dict" dict + + let string_p = Param.mk ~name:"string" Types.string + + let int_p = Param.mk ~name:"int" Types.int + + let float_p = Param.mk ~name:"float" Types.float + + let create = + declare "Observer.create" [] + (dbg_p + @-> uuid_p + @-> name_label_p + @-> dict_p + @-> endpoints_p + @-> bool_p + @-> returning unit_p err + ) + + let destroy = + declare "Observer.destroy" [] (dbg_p @-> uuid_p @-> returning unit_p err) + + let set_enabled = + declare "Observer.set_enabled" [] + (dbg_p @-> uuid_p @-> bool_p @-> returning unit_p err) + + let set_attributes = + declare "Observer.set_attributes" [] + (dbg_p @-> uuid_p @-> dict_p @-> returning unit_p err) + + let set_endpoints = + declare "Observer.set_endpoints" [] + (dbg_p @-> uuid_p @-> endpoints_p @-> returning unit_p err) + + let init = declare "Observer.init" [] (dbg_p @-> returning unit_p err) + + let set_trace_log_dir = + declare "Observer.set_trace_log_dir" [] + (dbg_p @-> string_p @-> returning unit_p err) + + let set_export_interval = + declare "Observer.set_export_interval" [] + (dbg_p @-> float_p @-> returning unit_p err) + + let set_max_spans = + declare "Observer.set_max_spans" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_max_traces = + declare "Observer.set_max_traces" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_max_file_size = + declare "Observer.set_max_file_size" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_host_id = + declare "Observer.set_host_id" [] + (dbg_p @-> string_p @-> returning unit_p err) + + let set_compress_tracing_files = + declare "Observer.set_compress_tracing_files" [] + (dbg_p @-> bool_p @-> returning unit_p err) +end diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 4c9da479a78..85ac0665450 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -1152,80 +1152,5 @@ module XenopsAPI (R : RPC) = struct (debug_info_p @-> unit_p @-> returning unit_p err) end - module Observer = struct - open TypeCombinators - - let endpoints_p = Param.mk ~name:"endpoints" (list Types.string) - - let bool_p = Param.mk ~name:"bool" Types.bool - - let uuid_p = Param.mk ~name:"uuid" Types.string - - let name_label_p = Param.mk ~name:"name_label" Types.string - - let dict_p = Param.mk ~name:"dict" dict - - let string_p = Param.mk ~name:"string" Types.string - - let int_p = Param.mk ~name:"int" Types.int - - let float_p = Param.mk ~name:"float" Types.float - - let create = - declare "Observer.create" [] - (debug_info_p - @-> uuid_p - @-> name_label_p - @-> dict_p - @-> endpoints_p - @-> bool_p - @-> returning unit_p err - ) - - let destroy = - declare "Observer.destroy" [] - (debug_info_p @-> uuid_p @-> returning unit_p err) - - let set_enabled = - declare "Observer.set_enabled" [] - (debug_info_p @-> uuid_p @-> bool_p @-> returning unit_p err) - - let set_attributes = - declare "Observer.set_attributes" [] - (debug_info_p @-> uuid_p @-> dict_p @-> returning unit_p err) - - let set_endpoints = - declare "Observer.set_endpoints" [] - (debug_info_p @-> uuid_p @-> endpoints_p @-> returning unit_p err) - - let init = declare "Observer.init" [] (debug_info_p @-> returning unit_p err) - - let set_trace_log_dir = - declare "Observer.set_trace_log_dir" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_export_interval = - declare "Observer.set_export_interval" [] - (debug_info_p @-> float_p @-> returning unit_p err) - - let set_host_id = - declare "Observer.set_host_id" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_max_traces = - declare "Observer.set_max_traces" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_spans = - declare "Observer.set_max_spans" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_file_size = - declare "Observer.set_max_file_size" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_compress_tracing_files = - declare "Observer.set_compress_tracing_files" [] - (debug_info_p @-> bool_p @-> returning unit_p err) - end + module Observer = Observer_helpers.ObserverAPI (R) end From 4ab193a0615fcc1727cf188d921bf6a6be046ba3 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Tue, 27 May 2025 11:20:03 +0100 Subject: [PATCH 234/492] CA-409431: Use an Observer forwarder for xapi-storage-script Currently, xapi-storage-script uses the presence/absence of a smapi observer config file to determine whether it should create traces. This only happens on startup which means smapiv3 traces will often not be created when they should be. This commit updates the Smapi Observer forwarder to use an RPC client to send messages to xapi-storage-script, updating it on any relevant changes to the Observer. Signed-off-by: Steven Woods --- ocaml/xapi-idl/lib/dune | 2 +- ocaml/xapi-idl/lib/observer_helpers.ml | 114 ++++++++++++++++++++++++ ocaml/xapi-idl/lib/observer_skeleton.ml | 48 ++++++++++ ocaml/xapi-storage-script/dune | 1 + ocaml/xapi-storage-script/main.ml | 38 ++++---- ocaml/xapi/xapi_observer.ml | 26 +++++- quality-gate.sh | 2 +- 7 files changed, 213 insertions(+), 18 deletions(-) create mode 100644 ocaml/xapi-idl/lib/observer_skeleton.ml diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index f0f1f4ce588..4f29504a97a 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -40,7 +40,7 @@ (wrapped false) (preprocess (per_module - ((pps ppx_deriving_rpc) Xcp_channel Xcp_channel_protocol TypeCombinators Observer_helpers) + ((pps ppx_deriving_rpc) Xcp_channel Xcp_channel_protocol TypeCombinators Observer_helpers Observer_skeleton) ((pps ppx_sexp_conv ppx_deriving_rpc) Xcp_pci)))) (library diff --git a/ocaml/xapi-idl/lib/observer_helpers.ml b/ocaml/xapi-idl/lib/observer_helpers.ml index 959a666b1c4..125ba101722 100644 --- a/ocaml/xapi-idl/lib/observer_helpers.ml +++ b/ocaml/xapi-idl/lib/observer_helpers.ml @@ -19,6 +19,16 @@ module D = Debug.Make (struct let name = "observer_interface" end) open D +let service_name = "observer" + +let queue_name = Xcp_service.common_prefix ^ service_name + +let default_sockets_dir = "/var/lib/xcp" + +let default_path = Filename.concat default_sockets_dir service_name + +let uri () = "file:" ^ default_path + module Errors = struct type error = | Internal_error of string @@ -148,3 +158,107 @@ module ObserverAPI (R : RPC) = struct declare "Observer.set_compress_tracing_files" [] (dbg_p @-> bool_p @-> returning unit_p err) end + +module type Server_impl = sig + type context = unit + + val create : + context + -> dbg:debug_info + -> uuid:string + -> name_label:string + -> attributes:(string * string) list + -> endpoints:string list + -> enabled:bool + -> unit + + val destroy : context -> dbg:debug_info -> uuid:string -> unit + + val set_enabled : + context -> dbg:debug_info -> uuid:string -> enabled:bool -> unit + + val set_attributes : + context + -> dbg:debug_info + -> uuid:string + -> attributes:(string * string) list + -> unit + + val set_endpoints : + context -> dbg:debug_info -> uuid:string -> endpoints:string list -> unit + + val init : context -> dbg:debug_info -> unit + + val set_trace_log_dir : context -> dbg:debug_info -> dir:string -> unit + + val set_export_interval : context -> dbg:debug_info -> interval:float -> unit + + val set_max_spans : context -> dbg:debug_info -> spans:int -> unit + + val set_max_traces : context -> dbg:debug_info -> traces:int -> unit + + val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit + + val set_host_id : context -> dbg:debug_info -> host_id:string -> unit + + val set_compress_tracing_files : + context -> dbg:debug_info -> enabled:bool -> unit +end + +module Server (Impl : Server_impl) () = struct + module S = ObserverAPI (Idl.Exn.GenServer ()) + + let _ = + S.create (fun dbg uuid name_label attributes endpoints enabled -> + Impl.create () ~dbg ~uuid ~name_label ~attributes ~endpoints ~enabled + ) ; + S.destroy (fun dbg uuid -> Impl.destroy () ~dbg ~uuid) ; + S.set_enabled (fun dbg uuid enabled -> + Impl.set_enabled () ~dbg ~uuid ~enabled + ) ; + S.set_attributes (fun dbg uuid attributes -> + Impl.set_attributes () ~dbg ~uuid ~attributes + ) ; + S.set_endpoints (fun dbg uuid endpoints -> + Impl.set_endpoints () ~dbg ~uuid ~endpoints + ) ; + S.init (fun dbg -> Impl.init () ~dbg) ; + S.set_trace_log_dir (fun dbg dir -> Impl.set_trace_log_dir () ~dbg ~dir) ; + S.set_export_interval (fun dbg interval -> + Impl.set_export_interval () ~dbg ~interval + ) ; + S.set_max_spans (fun dbg spans -> Impl.set_max_spans () ~dbg ~spans) ; + S.set_max_traces (fun dbg traces -> Impl.set_max_traces () ~dbg ~traces) ; + S.set_max_file_size (fun dbg file_size -> + Impl.set_max_file_size () ~dbg ~file_size + ) ; + S.set_host_id (fun dbg host_id -> Impl.set_host_id () ~dbg ~host_id) ; + S.set_compress_tracing_files (fun dbg enabled -> + Impl.set_compress_tracing_files () ~dbg ~enabled + ) + + (* Bind all *) + let process call = Idl.Exn.server S.implementation call +end + +let rec retry_econnrefused f = + try f () with + | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> + (* debug "Caught ECONNREFUSED; retrying in 5s"; *) + Thread.delay 5. ; retry_econnrefused f + | e -> + (* error "Caught %s: does the observer service need restarting?" + (Printexc.to_string e); *) + raise e + +module Client = ObserverAPI (Idl.Exn.GenClient (struct + open Xcp_client + + let rpc call = + retry_econnrefused (fun () -> + if !use_switch then + json_switch_rpc queue_name call + else + xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr:queue_name uri call + ) +end)) diff --git a/ocaml/xapi-idl/lib/observer_skeleton.ml b/ocaml/xapi-idl/lib/observer_skeleton.ml new file mode 100644 index 00000000000..8cf5e2f5221 --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_skeleton.ml @@ -0,0 +1,48 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +[@@@ocaml.warning "-27"] + +let u x = raise Observer_helpers.(Observer_error (Errors.Unimplemented x)) + +module Observer = struct + type context = unit + + let create ctx ~dbg ~uuid ~name_label ~attributes ~endpoints ~enabled = + u "Observer.create" + + let destroy ctx ~dbg ~uuid = u "Observer.destroy" + + let set_enabled ctx ~dbg ~uuid ~enabled = u "Observer.set_enabled" + + let set_attributes ctx ~dbg ~uuid ~attributes = u "Observer.set_attributes" + + let set_endpoints ctx ~dbg ~uuid ~endpoints = u "Observer.set_endpoints" + + let init ctx ~dbg = u "Observer.init" + + let set_trace_log_dir ctx ~dbg ~dir = u "Observer.set_trace_log_dir" + + let set_export_interval ctx ~dbg ~interval = u "Observer.set_export_interval" + + let set_max_spans ctx ~dbg ~spans = u "Observer.set_max_spans" + + let set_max_traces ctx ~dbg ~traces = u "Observer.set_max_traces" + + let set_max_file_size ctx ~dbg ~file_size = u "Observer.set_max_file_size" + + let set_host_id ctx ~dbg ~host_id = u "Observer.set_host_id" + + let set_compress_tracing_files ctx ~dbg ~enabled = + u "Observer.set_compress_tracing_files" +end diff --git a/ocaml/xapi-storage-script/dune b/ocaml/xapi-storage-script/dune index e1391aed2ca..f917e426ca5 100644 --- a/ocaml/xapi-storage-script/dune +++ b/ocaml/xapi-storage-script/dune @@ -41,6 +41,7 @@ sexplib sexplib0 uri + threads.posix xapi-backtrace xapi-consts xapi-consts.xapi_version diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 1b15a17f46e..e04a93203b3 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -411,19 +411,6 @@ let observer_config_dir = in dir // component // "enabled" -(** Determine if SM API observation is enabled from the - filesystem. Ordinarily, determining if a component is enabled - would consist of querying the 'components' field of an observer - from the xapi database. *) -let observer_is_component_enabled () = - let is_enabled () = - let is_config_file path = Filename.check_suffix path ".observer.conf" in - let* files = Sys.readdir observer_config_dir in - Lwt.return (List.exists is_config_file files) - in - let* result = Deferred.try_with is_enabled in - Lwt.return (Option.value (Result.to_option result) ~default:false) - (** Call the script named after the RPC method in the [script_dir] directory. The arguments (not the whole JSON-RPC call) are passed as JSON to its stdin, and stdout is returned. In case of a non-zero exit code, @@ -2247,6 +2234,19 @@ let register_exn_pretty_printers () = assert false ) +module XapiStorageScript : Observer_helpers.Server_impl = struct + include Observer_skeleton.Observer + + let create _context ~dbg:_ ~uuid:_ ~name_label:_ ~attributes:_ ~endpoints:_ + ~enabled = + config.use_observer <- enabled + + let destroy _context ~dbg:_ ~uuid:_ = config.use_observer <- false + + let set_enabled _context ~dbg:_ ~uuid:_ ~enabled = + config.use_observer <- enabled +end + let () = register_exn_pretty_printers () ; let root_dir = ref "/var/lib/xapi/storage-scripts" in @@ -2293,9 +2293,17 @@ let () = Logs.set_reporter (lwt_reporter ()) ; Logs.set_level ~all:true (Some Logs.Info) ; + + let module S = Observer_helpers.Server (XapiStorageScript) () in + let s = + Xcp_service.make ~path:Observer_helpers.default_path + ~queue_name:Observer_helpers.queue_name ~rpc_fn:S.process () + in + let (_ : Thread.t) = + Thread.create (fun () -> Xcp_service.serve_forever s) () + in + let main = - let* observer_enabled = observer_is_component_enabled () in - config.use_observer <- observer_enabled ; if !self_test_only then self_test ~root_dir:!root_dir else diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 0dd53865307..62d3ea4359c 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -383,6 +383,30 @@ end module SMObserverConfig = Dom0ObserverConfig (struct let component = SMApi end) +module SMObserver = struct + include SMObserverConfig + open Observer_helpers + + let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = + debug "SMObserver Observer.create %s" uuid ; + SMObserverConfig.create ~__context ~uuid ~name_label ~attributes ~endpoints + ~enabled ; + let dbg = Context.string_of_task __context in + Client.create dbg uuid name_label attributes endpoints enabled + + let destroy ~__context ~uuid = + debug "SMObserver Observer.destroy %s" uuid ; + SMObserverConfig.destroy ~__context ~uuid ; + let dbg = Context.string_of_task __context in + Client.destroy dbg uuid + + let set_enabled ~__context ~uuid ~enabled = + debug "SMObserver Observer.set_enabled %s" uuid ; + SMObserverConfig.set_enabled ~__context ~uuid ~enabled ; + let dbg = Context.string_of_task __context in + Client.set_enabled dbg uuid enabled +end + let get_forwarder c = let module Forwarder = ( val match c with @@ -393,7 +417,7 @@ let get_forwarder c = | Xapi_clusterd -> (module Xapi_cluster.Observer) | SMApi -> - (module SMObserverConfig) + (module SMObserver) : ObserverInterface ) in diff --git a/quality-gate.sh b/quality-gate.sh index f6540cb2a1f..6455846d21b 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=467 + N=469 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From 760fb260ccd2ba18f8660023f96d78ed6fde89a4 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 29 May 2025 10:28:35 +0100 Subject: [PATCH 235/492] CA-409949 CA-408048 XSI-1912 remove unabailable SM plugin by ref When we have multiple SM plugins in XAPI for the same type (which happens only because of past problems) and want to remove the obsolete one, do this iby reference. The code so far was assuming only one per type and looked up the reference by name which was not unique and hence could end up removing the wrong SM entry. Signed-off-by: Christian Lindig --- ocaml/xapi/storage_access.ml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index 64b69b28ffe..cda399e9d60 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -183,12 +183,14 @@ let on_xapi_start ~__context = let self, _ = List.assoc ty existing in try Db.SM.destroy ~__context ~self with _ -> () ) - (List.concat - [ - Listext.List.set_difference (List.map fst existing) to_keep - ; List.map fst unavailable - ] - ) ; + (Listext.List.set_difference (List.map fst existing) to_keep) ; + List.iter + (fun (name, (self, rc)) -> + info "%s: unregistering SM plugin %s (%s) since it is unavailable" + __FUNCTION__ name rc.API.sM_uuid ; + try Db.SM.destroy ~__context ~self with _ -> () + ) + unavailable ; (* Synchronize SMAPIv1 plugins *) From 6049a4a2614773c6a50f38639f103de9d3b5774c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 21 May 2025 16:31:33 +0100 Subject: [PATCH 236/492] xapi-types: remove dev errors when adding features When adding a feature, developers had to change the variant, and the list all_features. Now the list is autogenerated from the variant, and the compiler will complain if its properties are not defined. Also reduced complexity of the code in the rest of the module. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-types/dune | 6 +- ocaml/xapi-types/features.ml | 224 ++++++++++++++++++++--------------- 2 files changed, 130 insertions(+), 100 deletions(-) diff --git a/ocaml/xapi-types/dune b/ocaml/xapi-types/dune index ab33ae1f354..3a49a7dca2e 100644 --- a/ocaml/xapi-types/dune +++ b/ocaml/xapi-types/dune @@ -23,6 +23,8 @@ xapi-stdext-unix ) (wrapped false) - (preprocess (per_module ((pps ppx_deriving_rpc) API Event_types Features SecretString))) + (preprocess + (per_module + ((pps ppx_deriving_rpc) API Event_types SecretString) + ((pps ppx_deriving_rpc ppx_deriving.enum) Features))) ) - diff --git a/ocaml/xapi-types/features.ml b/ocaml/xapi-types/features.ml index 52469387acc..7453ab49a7c 100644 --- a/ocaml/xapi-types/features.ml +++ b/ocaml/xapi-types/features.ml @@ -68,79 +68,119 @@ type feature = | VM_groups | VM_start | VM_appliance_start -[@@deriving rpc] +[@@deriving rpc, enum] type orientation = Positive | Negative -let keys_of_features = - [ - (VLAN, ("restrict_vlan", Negative, "VLAN")) - ; (QoS, ("restrict_qos", Negative, "QoS")) - ; (Shared_storage, ("restrict_pool_attached_storage", Negative, "SStorage")) - ; (Netapp, ("restrict_netapp", Negative, "NTAP")) - ; (Equalogic, ("restrict_equalogic", Negative, "EQL")) - ; (Pooling, ("restrict_pooling", Negative, "Pool")) - ; (HA, ("enable_xha", Positive, "XHA")) - ; (Marathon, ("restrict_marathon", Negative, "MTC")) - ; (Email, ("restrict_email_alerting", Negative, "email")) - ; (Performance, ("restrict_historical_performance", Negative, "perf")) - ; (WLB, ("restrict_wlb", Negative, "WLB")) - ; (RBAC, ("restrict_rbac", Negative, "RBAC")) - ; (DMC, ("restrict_dmc", Negative, "DMC")) - ; (Checkpoint, ("restrict_checkpoint", Negative, "chpt")) - ; (CPU_masking, ("restrict_cpu_masking", Negative, "Mask")) - ; (Connection, ("restrict_connection", Negative, "Cnx")) - ; (No_platform_filter, ("platform_filter", Negative, "Plat")) - ; (No_nag_dialog, ("regular_nag_dialog", Negative, "nonag")) - ; (VMPR, ("restrict_vmpr", Negative, "VMPR")) - ; (VMSS, ("restrict_vmss", Negative, "VMSS")) - ; (IntelliCache, ("restrict_intellicache", Negative, "IntelliCache")) - ; (GPU, ("restrict_gpu", Negative, "GPU")) - ; (DR, ("restrict_dr", Negative, "DR")) - ; (VIF_locking, ("restrict_vif_locking", Negative, "VIFLock")) - ; (Storage_motion, ("restrict_storage_xen_motion", Negative, "SXM")) - ; (VGPU, ("restrict_vgpu", Negative, "vGPU")) - ; (Integrated_GPU, ("restrict_integrated_gpu_passthrough", Negative, "iGPU")) - ; (VSS, ("restrict_vss", Negative, "VSS")) - ; ( Guest_agent_auto_update - , ("restrict_guest_agent_auto_update", Negative, "GAAU") - ) - ; ( PCI_device_for_auto_update - , ("restrict_pci_device_for_auto_update", Negative, "PciAU") - ) - ; (Xen_motion, ("restrict_xen_motion", Negative, "Live_migration")) - ; (Guest_ip_setting, ("restrict_guest_ip_setting", Negative, "GuestIP")) - ; (AD, ("restrict_ad", Negative, "AD")) - ; (Nested_virt, ("restrict_nested_virt", Negative, "Nested_virt")) - ; (Live_patching, ("restrict_live_patching", Negative, "Live_patching")) - ; ( Live_set_vcpus - , ("restrict_set_vcpus_number_live", Negative, "Live_set_vcpus") - ) - ; (PVS_proxy, ("restrict_pvs_proxy", Negative, "PVS_proxy")) - ; (IGMP_snooping, ("restrict_igmp_snooping", Negative, "IGMP_snooping")) - ; (RPU, ("restrict_rpu", Negative, "RPU")) - ; (Pool_size, ("restrict_pool_size", Negative, "Pool_size")) - ; (CBT, ("restrict_cbt", Negative, "CBT")) - ; (USB_passthrough, ("restrict_usb_passthrough", Negative, "USB_passthrough")) - ; (Network_sriov, ("restrict_network_sriov", Negative, "Network_sriov")) - ; (Corosync, ("restrict_corosync", Negative, "Corosync")) - ; (Cluster_address, ("restrict_cluster_address", Negative, "Cluster_address")) - ; (Zstd_export, ("restrict_zstd_export", Negative, "Zstd_export")) - ; ( Pool_secret_rotation - , ("restrict_pool_secret_rotation", Negative, "Pool_secret_rotation") - ) - ; ( Certificate_verification - , ("restrict_certificate_verification", Negative, "Certificate_verification") - ) - ; (Updates, ("restrict_updates", Negative, "Upd")) - ; ( Internal_repo_access - , ("restrict_internal_repo_access", Negative, "Internal_repo_access") - ) - ; (VTPM, ("restrict_vtpm", Negative, "VTPM")) - ; (VM_groups, ("restrict_vm_groups", Negative, "VM_groups")) - ; (VM_start, ("restrict_vm_start", Negative, "Start")) - ; (VM_appliance_start, ("restrict_vm_appliance_start", Negative, "Start")) - ] +let props_of_feature = function + | VLAN -> + ("restrict_vlan", Negative, "VLAN") + | QoS -> + ("restrict_qos", Negative, "QoS") + | Shared_storage -> + ("restrict_pool_attached_storage", Negative, "SStorage") + | Netapp -> + ("restrict_netapp", Negative, "NTAP") + | Equalogic -> + ("restrict_equalogic", Negative, "EQL") + | Pooling -> + ("restrict_pooling", Negative, "Pool") + | HA -> + ("enable_xha", Positive, "XHA") + | Marathon -> + ("restrict_marathon", Negative, "MTC") + | Email -> + ("restrict_email_alerting", Negative, "email") + | Performance -> + ("restrict_historical_performance", Negative, "perf") + | WLB -> + ("restrict_wlb", Negative, "WLB") + | RBAC -> + ("restrict_rbac", Negative, "RBAC") + | DMC -> + ("restrict_dmc", Negative, "DMC") + | Checkpoint -> + ("restrict_checkpoint", Negative, "chpt") + | CPU_masking -> + ("restrict_cpu_masking", Negative, "Mask") + | Connection -> + ("restrict_connection", Negative, "Cnx") + | No_platform_filter -> + ("platform_filter", Negative, "Plat") + | No_nag_dialog -> + ("regular_nag_dialog", Negative, "nonag") + | VMPR -> + ("restrict_vmpr", Negative, "VMPR") + | VMSS -> + ("restrict_vmss", Negative, "VMSS") + | IntelliCache -> + ("restrict_intellicache", Negative, "IntelliCache") + | GPU -> + ("restrict_gpu", Negative, "GPU") + | DR -> + ("restrict_dr", Negative, "DR") + | VIF_locking -> + ("restrict_vif_locking", Negative, "VIFLock") + | Storage_motion -> + ("restrict_storage_xen_motion", Negative, "SXM") + | VGPU -> + ("restrict_vgpu", Negative, "vGPU") + | Integrated_GPU -> + ("restrict_integrated_gpu_passthrough", Negative, "iGPU") + | VSS -> + ("restrict_vss", Negative, "VSS") + | Guest_agent_auto_update -> + ("restrict_guest_agent_auto_update", Negative, "GAAU") + | PCI_device_for_auto_update -> + ("restrict_pci_device_for_auto_update", Negative, "PciAU") + | Xen_motion -> + ("restrict_xen_motion", Negative, "Live_migration") + | Guest_ip_setting -> + ("restrict_guest_ip_setting", Negative, "GuestIP") + | AD -> + ("restrict_ad", Negative, "AD") + | Nested_virt -> + ("restrict_nested_virt", Negative, "Nested_virt") + | Live_patching -> + ("restrict_live_patching", Negative, "Live_patching") + | Live_set_vcpus -> + ("restrict_set_vcpus_number_live", Negative, "Live_set_vcpus") + | PVS_proxy -> + ("restrict_pvs_proxy", Negative, "PVS_proxy") + | IGMP_snooping -> + ("restrict_igmp_snooping", Negative, "IGMP_snooping") + | RPU -> + ("restrict_rpu", Negative, "RPU") + | Pool_size -> + ("restrict_pool_size", Negative, "Pool_size") + | CBT -> + ("restrict_cbt", Negative, "CBT") + | USB_passthrough -> + ("restrict_usb_passthrough", Negative, "USB_passthrough") + | Network_sriov -> + ("restrict_network_sriov", Negative, "Network_sriov") + | Corosync -> + ("restrict_corosync", Negative, "Corosync") + | Cluster_address -> + ("restrict_cluster_address", Negative, "Cluster_address") + | Zstd_export -> + ("restrict_zstd_export", Negative, "Zstd_export") + | Pool_secret_rotation -> + ("restrict_pool_secret_rotation", Negative, "Pool_secret_rotation") + | Certificate_verification -> + ("restrict_certificate_verification", Negative, "Certificate_verification") + | Updates -> + ("restrict_updates", Negative, "Upd") + | Internal_repo_access -> + ("restrict_internal_repo_access", Negative, "Internal_repo_access") + | VTPM -> + ("restrict_vtpm", Negative, "VTPM") + | VM_groups -> + ("restrict_vm_groups", Negative, "VM_groups") + | VM_start -> + ("restrict_vm_start", Negative, "Start") + | VM_appliance_start -> + ("restrict_vm_appliance_start", Negative, "Start") (* A list of features that must be considered "enabled" by `of_assoc_list` if the feature string is missing from the list. These are existing features @@ -149,52 +189,40 @@ let keys_of_features = let enabled_when_unknown = [Xen_motion; AD; Updates; VM_start; VM_appliance_start] -let name_of_feature f = rpc_of_feature f |> Rpc.string_of_rpc - -let string_of_feature f = - let str, o, _ = List.assoc f keys_of_features in - (str, o) +let all_features = + let length = max_feature - min_feature + 1 in + let start = min_feature in + List.init length (fun i -> feature_of_enum (i + start) |> Option.get) -let tag_of_feature f = - let _, _, tag = List.assoc f keys_of_features in - tag +let name_of_feature f = rpc_of_feature f |> Rpc.string_of_rpc -let all_features = List.map (fun (f, _) -> f) keys_of_features +let is_enabled v = function Positive -> v | Negative -> not v let to_compact_string (s : feature list) = let get_tag f = - let tag = tag_of_feature f in + let _, _, tag = props_of_feature f in if List.mem f s then tag else String.make (String.length tag) ' ' in - let tags = List.map get_tag all_features in - String.concat " " tags + List.map get_tag all_features |> String.concat " " let to_assoc_list (s : feature list) = let get_map f = - let str, o = string_of_feature f in + let str, o, _ = props_of_feature f in let switch = List.mem f s in - let switch = string_of_bool (if o = Positive then switch else not switch) in + let switch = string_of_bool (is_enabled switch o) in (str, switch) in List.map get_map all_features let of_assoc_list l = - let get_feature f = + let enabled f = try - let str, o = string_of_feature f in - let v = bool_of_string (List.assoc str l) in - let v = if o = Positive then v else not v in - if v then Some f else None - with _ -> if List.mem f enabled_when_unknown then Some f else None + let str, o, _ = props_of_feature f in + let v = List.assoc str l in + is_enabled (bool_of_string v) o + with _ -> List.mem f enabled_when_unknown in - (* Filter_map to avoid having to carry the whole xapi-stdext-std - * Note that the following is not tail recursive, in this case I - * have chosen such implementation because the feature list is small - * and the implementation looks readable and fairly self-contained. - * Do not use this pattern for lists that can be long. *) - List.fold_right - (fun f acc -> match get_feature f with Some v -> v :: acc | None -> acc) - all_features [] + List.filter enabled all_features From ab90994b31ff0919dd06dba7f7d3e83f6c92e26d Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 28 May 2025 17:19:41 +0100 Subject: [PATCH 237/492] xenctrlext: add function to set the hard-affinity for vcpus I tried sharing more code between hard and soft affinities, but the memory management of the two cpumaps blows up the number of branches that need to be taken care of, making it more worthwhile to duplicate a bit of code instead. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/c_stubs/xenctrlext_stubs.c | 74 +++++++++++++++++------- ocaml/xenopsd/xc/xenctrlext.ml | 3 + ocaml/xenopsd/xc/xenctrlext.mli | 3 + 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c index d7f3fee8f5e..4af5e60c8ec 100644 --- a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c +++ b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c @@ -323,40 +323,72 @@ CAMLprim value stub_xenctrlext_domain_update_channels(value xch_val, value domid } /* based on xenctrl_stubs.c */ -static int get_cpumap_len(value xch_val, value cpumap) +static int get_cpumap_len(xc_interface *xch, value cpumap_val) { - xc_interface* xch = xch_of_val(xch_val); - int ml_len = Wosize_val(cpumap); + int ml_len = Wosize_val(cpumap_val); int xc_len = xc_get_max_cpus(xch); return (ml_len < xc_len ? ml_len : xc_len); } -CAMLprim value stub_xenctrlext_vcpu_setaffinity_soft(value xch_val, value domid, - value vcpu, value cpumap) +static void populate_cpumap(xc_interface *xch, xc_cpumap_t cpumap, + value cpumap_val) { - CAMLparam4(xch_val, domid, vcpu, cpumap); - int i, len = get_cpumap_len(xch_val, cpumap); - xc_cpumap_t c_cpumap; - int retval; + int i, len = get_cpumap_len(xch, cpumap_val); + for (i=0; i domid -> unit external domain_update_channels : handle -> domid -> int -> int -> unit = "stub_xenctrlext_domain_update_channels" +external vcpu_setaffinity_hard : handle -> domid -> int -> bool array -> unit + = "stub_xenctrlext_vcpu_setaffinity_hard" + external vcpu_setaffinity_soft : handle -> domid -> int -> bool array -> unit = "stub_xenctrlext_vcpu_setaffinity_soft" diff --git a/ocaml/xenopsd/xc/xenctrlext.mli b/ocaml/xenopsd/xc/xenctrlext.mli index 559842fac75..2199f42c452 100644 --- a/ocaml/xenopsd/xc/xenctrlext.mli +++ b/ocaml/xenopsd/xc/xenctrlext.mli @@ -78,6 +78,9 @@ type numainfo = {memory: meminfo array; distances: int array array} type cputopo = {core: int; socket: int; node: int} +external vcpu_setaffinity_hard : handle -> domid -> int -> bool array -> unit + = "stub_xenctrlext_vcpu_setaffinity_hard" + external vcpu_setaffinity_soft : handle -> domid -> int -> bool array -> unit = "stub_xenctrlext_vcpu_setaffinity_soft" From 836b3f17bb3c6f9e1a5744680e925b1a1684f094 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 29 May 2025 14:31:43 +0100 Subject: [PATCH 238/492] xenopsd: pass the hard-affinity map to pre_build No functional change. This prepares pre_build in the domain module to be able to set the hard affinity mask, without communicating the mask to xenguest through xenstore. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/domain.ml | 17 ++++++++--------- ocaml/xenopsd/xc/domain.mli | 2 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 19f28e41985..39a3b7e5191 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -153,7 +153,7 @@ type build_info = { ; kernel: string (** in hvm case, point to hvmloader *) ; vcpus: int (** vcpus max *) ; priv: builder_spec_info - ; has_hard_affinity: bool [@default false] + ; hard_affinity: int list list [@default []] } [@@deriving rpcty] @@ -898,7 +898,7 @@ let numa_placement domid ~vcpus ~memory = None ) -let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = +let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = let open Memory in let uuid = get_uuid ~xc domid in debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory" @@ -956,7 +956,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = None | Best_effort -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> - if has_hard_affinity then ( + if hard_affinity <> [] then ( D.debug "VM has hard affinity set, skipping NUMA optimization" ; None ) else @@ -1129,7 +1129,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid let target_kib = info.memory_target in let vcpus = info.vcpus in let kernel = info.kernel in - let has_hard_affinity = info.has_hard_affinity in + let hard_affinity = info.hard_affinity in let force_arg = if force then ["--force"] else [] in assert_file_is_readable kernel ; (* Convert memory configuration values into the correct units. *) @@ -1148,7 +1148,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid in maybe_ca_140252_workaround ~xc ~vcpus domid ; let store_port, console_port, numa_placement = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity domid in let store_mfn, console_mfn = let args = @@ -1176,7 +1176,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid in Option.iter assert_file_is_readable pvinfo.ramdisk ; let store_port, console_port, numa_placement = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity domid in let store_mfn, console_mfn = let args = @@ -1199,7 +1199,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid in maybe_ca_140252_workaround ~xc ~vcpus domid ; let store_port, console_port, numa_placement = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity domid in let store_mfn, console_mfn = let args = @@ -1633,8 +1633,7 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~store_domid (memory, vm_stuff, `pvh) in let store_port, console_port, numa_placements = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity:info.has_hard_affinity - domid + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity:info.hard_affinity domid in let store_mfn, console_mfn = restore_common task ~xc ~xs ~dm ~domain_type ~store_port ~store_domid diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index c8f83b0994a..4fac8ccde5a 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -133,7 +133,7 @@ type build_info = { ; kernel: string (** image to load. In HVM case, point to hvmloader *) ; vcpus: int (** vcpus max *) ; priv: builder_spec_info - ; has_hard_affinity: bool + ; hard_affinity: int list list (** vcpu -> pcpu map *) } val typ_of_build_info : build_info Rpc.Types.typ diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 3d6b5cf7214..81af8998f1e 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1287,7 +1287,7 @@ module VM = struct ; kernel= "" ; vcpus= vm.vcpu_max ; priv= builder_spec_info - ; has_hard_affinity= vm.scheduler_params.affinity <> [] + ; hard_affinity= vm.scheduler_params.affinity } in VmExtra. @@ -2040,7 +2040,7 @@ module VM = struct ; kernel ; vcpus= vm.vcpu_max ; priv - ; has_hard_affinity= vm.scheduler_params.affinity <> [] + ; hard_affinity= vm.scheduler_params.affinity } in debug "static_max_mib=%Ld" static_max_mib ; From 8d6044fd1b911007dd7de44deefcea1b4d5ff24f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 29 May 2025 16:41:37 +0100 Subject: [PATCH 239/492] xenopsd: do not send hard affinities to xenguest when not needed When all the vcpus can run on all pcpus, there's no need to do any call to set the hard affinities, so omit this step. Because xenguest does the calls to set the affinities of the vcpus after the NUMA code can set the affinities, this frees up the latter to also set the hard affinities, not just the soft ones Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/xenops_server_xen.ml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 81af8998f1e..e3ae98c8181 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1314,13 +1314,12 @@ module VM = struct (* VCPU configuration *) let xcext = Xenctrlext.get_handle () in let pcpus = Xenctrlext.get_max_nr_cpus xcext in - let all_pcpus = mkints pcpus in let all_vcpus = mkints vm.vcpu_max in let masks = match vm.scheduler_params.affinity with | [] -> - (* Every vcpu can run on every pcpu *) - List.map (fun _ -> all_pcpus) all_vcpus + (* do not set affinity if it's missing *) + [] | m :: ms -> (* Treat the first as the template for the rest *) let defaults = List.map (fun _ -> m) all_vcpus in From 0c0037300d593e09a7e93edd893996b44d4495e7 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 29 May 2025 16:38:20 +0100 Subject: [PATCH 240/492] xenopsd: set the hard affinities directly when set by the user previously they were set up in a roundabout way, writing to xenstore so xenguest can read them and apply them. This reduces the churn needed to communicate this information and instead is done close to where the NUMA decisions are done. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/domain.ml | 24 +++++++++++++++++++ ocaml/xenopsd/xc/xenops_server_xen.ml | 34 --------------------------- 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 39a3b7e5191..bbbefa89a54 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -950,6 +950,30 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = log_reraise (Printf.sprintf "shadow_allocation_set %d MiB" shadow_mib) (fun () -> Xenctrl.shadow_allocation_set xc domid shadow_mib ) ; + let apply_hard_vcpu_map () = + let xcext = Xenctrlext.get_handle () in + let pcpus = Xenctrlext.get_max_nr_cpus xcext in + let bitmap cpus : bool array = + (* convert a mask into a boolean array, one element per pCPU *) + let cpus = List.filter (fun x -> x >= 0 && x < pcpus) cpus in + let result = Array.init pcpus (fun _ -> false) in + List.iter (fun cpu -> result.(cpu) <- true) cpus ; + result + in + ( match hard_affinity with + | [] -> + [] + | m :: ms -> + (* Treat the first as the template for the rest *) + let all_vcpus = List.init vcpus Fun.id in + let defaults = List.map (fun _ -> m) all_vcpus in + Xapi_stdext_std.Listext.List.take vcpus ((m :: ms) @ defaults) + ) + |> List.iteri (fun vcpu mask -> + Xenctrlext.vcpu_setaffinity_hard xcext domid vcpu (bitmap mask) + ) + in + apply_hard_vcpu_map () ; let node_placement = match !Xenops_server.numa_placement with | Any -> diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index e3ae98c8181..a1a37085659 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1303,8 +1303,6 @@ module VM = struct |> rpc_of VmExtra.persistent_t |> Jsonrpc.to_string - let mkints n = List.init n Fun.id - let generate_create_info ~xs:_ vm persistent = let ty = match persistent.VmExtra.ty with Some ty -> ty | None -> vm.ty in let hvm = @@ -1312,37 +1310,6 @@ module VM = struct in (* XXX add per-vcpu information to the platform data *) (* VCPU configuration *) - let xcext = Xenctrlext.get_handle () in - let pcpus = Xenctrlext.get_max_nr_cpus xcext in - let all_vcpus = mkints vm.vcpu_max in - let masks = - match vm.scheduler_params.affinity with - | [] -> - (* do not set affinity if it's missing *) - [] - | m :: ms -> - (* Treat the first as the template for the rest *) - let defaults = List.map (fun _ -> m) all_vcpus in - Xapi_stdext_std.Listext.List.take vm.vcpu_max ((m :: ms) @ defaults) - in - (* convert a mask into a binary string, one char per pCPU *) - let bitmap cpus : string = - let cpus = List.filter (fun x -> x >= 0 && x < pcpus) cpus in - let result = Bytes.make pcpus '0' in - List.iter (fun cpu -> Bytes.set result cpu '1') cpus ; - Bytes.unsafe_to_string result - in - let affinity = - snd - (List.fold_left - (fun (idx, acc) mask -> - ( idx + 1 - , (Printf.sprintf "vcpu/%d/affinity" idx, bitmap mask) :: acc - ) - ) - (0, []) masks - ) - in let weight = vm.scheduler_params.priority |> Option.map (fun (w, c) -> @@ -1358,7 +1325,6 @@ module VM = struct (match vm.ty with PVinPVH _ -> vm.vcpu_max | _ -> vm.vcpus) ) ] - @ affinity @ weight in let set_generation_id platformdata = From 02fca09cef990880b5f7f57a83b74fa0671e50fe Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 28 May 2025 17:23:21 +0100 Subject: [PATCH 241/492] xenopsd: expose a best-effort mode that set the hard affinity mask (CP-54234) This allows xapi to "hard-pin" instead of "soft-pin". This is useful to restrict the scheduler to a single numa node, instead of letting it to move the vpus across nodes. This must be used with care because the effect of overprovisioning CPUs in this mode is unknown and will probalby have undesired effects. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-idl/xen/xenops_interface.ml | 7 +++++-- ocaml/xenopsd/lib/xenops_server.ml | 15 ++++++++++++++- ocaml/xenopsd/xc/domain.ml | 16 +++++++++++++--- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 4c9da479a78..a883152207a 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -496,9 +496,12 @@ module Host = struct [@@deriving rpcty] type numa_affinity_policy = - | Any (** VMs may run on any NUMA nodes. This is the default in 8.2CU1 *) + | Any (** VMs may run on any NUMA nodes. *) | Best_effort - (** best effort placement on the smallest number of NUMA nodes where possible *) + (** Best-effort placement. Assigns the memory of the VM to a single + node, and soft-pins its VCPUs to the node, if possible. Otherwise + behaves like Any. *) + | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) [@@deriving rpcty] type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty] diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 0b7dc1130a1..9b109c1c980 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3570,8 +3570,21 @@ let default_numa_affinity_policy = ref Xenops_interface.Host.Best_effort let numa_placement = ref !default_numa_affinity_policy +type affinity = Soft | Hard + let string_of_numa_affinity_policy = - Xenops_interface.Host.(function Any -> "any" | Best_effort -> "best-effort") + let open Xenops_interface.Host in + function + | Any -> + "any" + | Best_effort -> + "best-effort" + | Best_effort_hard -> + "best-effort-hard" + +let affinity_of_numa_affinity_policy = + let open Xenops_interface.Host in + function Any | Best_effort -> Soft | Best_effort_hard -> Hard module HOST = struct let stat _ dbg = diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index bbbefa89a54..c1561b862a5 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -857,7 +857,13 @@ let numa_init () = ) mem -let numa_placement domid ~vcpus ~memory = +let set_affinity = function + | Xenops_server.Hard -> + Xenctrlext.vcpu_setaffinity_hard + | Xenops_server.Soft -> + Xenctrlext.vcpu_setaffinity_soft + +let numa_placement domid ~vcpus ~memory affinity = let open Xenctrlext in let open Topology in with_lock numa_mutex (fun () -> @@ -888,7 +894,7 @@ let numa_placement domid ~vcpus ~memory = | Some (cpu_affinity, mem_plan) -> let cpus = CPUSet.to_mask cpu_affinity in for i = 0 to vcpus - 1 do - Xenctrlext.vcpu_setaffinity_soft xcext domid i cpus + set_affinity affinity xcext domid i cpus done ; mem_plan in @@ -978,14 +984,18 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = match !Xenops_server.numa_placement with | Any -> None - | Best_effort -> + | (Best_effort | Best_effort_hard) as pin -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> if hard_affinity <> [] then ( D.debug "VM has hard affinity set, skipping NUMA optimization" ; None ) else + let affinity = + Xenops_server.affinity_of_numa_affinity_policy pin + in numa_placement domid ~vcpus ~memory:(Int64.mul memory.xen_max_mib 1048576L) + affinity |> Option.map fst ) in From 18c952fd8910f7b7adff7ee05f79361c86756edf Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 29 May 2025 10:57:43 +0100 Subject: [PATCH 242/492] xapi: use hard-pinning with best-effort as an experimental feature (CP-54234) This allows users to enable the feature on a host by running: echo 1 > /etc/xenserver/features.d/hard_numa xe host-apply-edition edition=${CURRENT_EDITION} host-uuid=${HOST_UUID} where CURRENT_EDITION is xe host-param-get param-name=edition uuid=${HOST_UUID} set best-effort mode if it wasn't already set: xe host-param-set uuid=${HOST-UUID} numa-affinity-policy=best-effort and finally restart the toolstack: xe-toolstack-restart Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_xenops.ml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index ce98dcd3a9d..2f0add74368 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -3110,6 +3110,12 @@ let resync_all_vms ~__context = in List.iter (fun vm -> refresh_vm ~__context ~self:vm) resident_vms_in_db +(* experimental feature for hard-pinning vcpus *) +let hard_numa_enabled ~__context = + let pool = Helpers.get_pool ~__context in + let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in + List.assoc_opt "restrict_hard_numa" restrictions = Some "false" + let set_numa_affinity_policy ~__context ~value = let dbg = Context.string_of_task __context in let open Xapi_xenops_queue in @@ -3119,6 +3125,8 @@ let set_numa_affinity_policy ~__context ~value = match value with | `any -> Some Any + | `best_effort when hard_numa_enabled ~__context -> + Some Best_effort_hard | `best_effort -> Some Best_effort | `default_policy -> From 3f21deafa2312edffa924d9fc4cf100603d977f1 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 3 Jun 2025 04:24:10 +0100 Subject: [PATCH 243/492] CA-411679: Add min/max to runstate metrics Signed-off-by: Bengang Yuan --- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index e3b86db975b..8ab1acaba70 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -63,7 +63,7 @@ let dss_vcpus xc doms = , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) ~description:"Fraction of time that all VCPUs are running" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)" @@ -71,7 +71,7 @@ let dss_vcpus xc doms = ~description: "Fraction of time that all VCPUs are runnable (i.e., \ waiting for CPU)" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_concurrency_hazard" @@ -80,14 +80,14 @@ let dss_vcpus xc doms = ~description: "Fraction of time that some VCPUs are running and some are \ runnable" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9)) ~description: "Fraction of time that all VCPUs are blocked or offline" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)" @@ -95,7 +95,7 @@ let dss_vcpus xc doms = ~description: "Fraction of time that some VCPUs are running, and some are \ blocked" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_contention" @@ -104,7 +104,7 @@ let dss_vcpus xc doms = ~description: "Fraction of time that some VCPUs are runnable and some are \ blocked" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make From 78d25e3f2bd8b988f3bb1121b177a37437c45980 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 3 Jun 2025 04:22:56 +0100 Subject: [PATCH 244/492] CA-411679: Runstate metrics return data over 100% To handle deviations in CPU rates, Derive values exceeding the maximum by up to 5% are capped at the maximum; others are marked as unknown. This logic is specific to Derive data sources because they represent rates derived from differences over time, which can occasionally exceed expected bounds due to measurement inaccuracies. Signed-off-by: Bengang Yuan --- ocaml/libs/xapi-rrd/lib/rrd.ml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index b4c827705c9..bb516ea6a28 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -468,11 +468,23 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = in (* Apply the transform after the raw value has been calculated *) let raw = apply_transform_function transform raw in + (* Make sure the values are not out of bounds after all the processing *) - if raw < ds.ds_min || raw > ds.ds_max then - (i, nan) - else - (i, raw) + match (ds.ds_ty, raw) with + | Derive, _ when raw > ds.ds_max && raw < ds.ds_max *. (1. +. 0.05) + -> + (* CA-411679: To handle deviations in CPU rates, Derive values + exceeding the maximum by up to 5% are capped at the maximum; + others are marked as unknown. This logic is specific to + Derive data sources because they represent rates derived + from differences over time, which can occasionally exceed + expected bounds due to measurement inaccuracies. *) + (i, ds.ds_max) + | (Derive | Gauge | Absolute), _ + when raw < ds.ds_min || raw > ds.ds_max -> + (i, nan) + | (Derive | Gauge | Absolute), _ -> + (i, raw) ) valuesandtransforms in From 1356e9c9adb546f2e177bf3145c0af275f71515b Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 3 Jun 2025 07:43:38 +0100 Subject: [PATCH 245/492] Modify doc mistakes 1. `operatoin` should be `operation`. 2. Miss a parameter `vm_name_label` during formatting. Signed-off-by: Bengang Yuan --- doc/content/toolstack/features/events/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/content/toolstack/features/events/index.md b/doc/content/toolstack/features/events/index.md index 3d76d4db927..98bdf17e6ae 100644 --- a/doc/content/toolstack/features/events/index.md +++ b/doc/content/toolstack/features/events/index.md @@ -72,9 +72,9 @@ while True: events = session.xenapi.event.next() # block until a xapi event on a xapi DB object is available for event in events: print "received event op=%s class=%s ref=%s" % (event['operation'], event['class'], event['ref']) - if event['class'] == 'vm' and event['operatoin'] == 'mod': + if event['class'] == 'vm' and event['operation'] == 'mod': vm = event['snapshot'] - print "xapi-event on vm: vm_uuid=%s, power_state=%s, current_operation=%s" % (vm['uuid'],vm['name_label'],vm['power_state'],vm['current_operations'].values()) + print "xapi-event on vm: vm_uuid=%s, vm_name_label=%s, power_state=%s, current_operation=%s" % (vm['uuid'],vm['name_label'],vm['power_state'],vm['current_operations'].values()) except XenAPI.Failure, e: if len(e.details) > 0 and e.details[0] == 'EVENTS_LOST': session.xenapi.event.unregister(["VM","pool"]) From d5a6e88fc2ae65947b376a7ac73df98e285e4d19 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 23 May 2025 09:28:58 +0100 Subject: [PATCH 246/492] CONTRIBUTING: add some initial guidelines Signed-off-by: Andrii Sultanov --- CONTRIBUTING.md | 162 ++++++++++++++++++++++++++++++++++++++++++++++++ README.markdown | 3 + 2 files changed, 165 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..34b62707ea4 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,162 @@ +# Issues + +We welcome reports of technical issues with the components of the xen-api +toolstack. Please make sure that the description of the issue is as detailed as +possible to help anyone investigating it: + +1) Mention how it was detected, if and how it could be reproduced + +1) What's the desired behaviour? In what cases would it be useful? + +1) Include error messages, related logs if appropriate + +# Pull Requests + +To contribute changes to xen-api, please fork the repository on +GitHub, and then submit a pull request. + +It is required to add a `Signed-off-by:` as a +[Developers Certificate of Origin](http://developercertificate.org). +It certifies the patch's origin and is licensed under an +appropriate open-source licence to include it in Xapi: +https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---signoff + +The following points are intended to describe what makes a contribution "good" - +easier to review, integrate, and maintain. Please follow them in your work. + +## Commit subjects and PR titles + +Commit subjects should preferrably start with the name of the component the +commit is most related to, and describe what the commit achieves. If your +commit only touches the `ocaml/xenopsd` directory, it should look like this, +for example: + +``` +xenopsd: Fix a deadlock during VM suspend +``` + +Similar principle applies to Pull Request titles. If there is only a single +commit in the PR, Github will automatically copy its subject and description to +the PR's title and body. If there are several commits in the PR, describe what +the PR achieves and the components it most directly impacts. + +If the commit subject includes some tracking identifier (such as `CP-1234`, for +example) referring to internal systems, please make sure to include all of the +essential information in the public descriptions - describe the symptoms of the +issue, how it was detected, investigated, how it could be reproduced, what are +the trade-offs and so on as appropriate. + +## Split into commits + +Following from the rules described above, if what the commit achieves is +difficult to fit into its subject, it is probably better to split it into +several commits, if possible. Note that every commit should build (`make` +should work and the CI should pass) independently, without requiring future +commits. This means some modifications can't really be split into several +commits (datamodel changes, in particular, require modifications to several +components at the same time), but makes it easier to revert part of the Pull +Request if some issues are detected in integration testing at a later point. + +## Good Commit Messages + +Commit messages (and the body of a Pull Request) should be as helpful and +descriptive as possible. If applicable, please include a description of current +behaviour, your changes, and the new behaviour. Justify the reasoning behind +your changes - are they sufficient on their own, or preparing for more changes? +Link any appropriate documentation, issues, or commits (avoiding internal and +publicly inaccessible sources) + +## CI + +Please make sure your Pull Request passes the Github CI. It will verify that +your code has been properly formatted (can be done locally with `make format`), +builds (`make` and `make check`), and passes the unit tests (`make test`). +The CI will run in the branches of your fork, so you can verify it passes +there before opening a Pull Request. + +## Testing + +Describe what kind of testing your contribution underwent. If the testing was +manual, please describe the commands or external clients that were used. If the +tests were automated, include at least a cursory description/name of the tests, +when they were regressed, if possible. + +Please note that any contribution to the code of the project will likely +require at least some testing to be done. Depending on how central the +component touched in your PR is to the system, the more things could only be +detected in real-world usecases through integration testing. + +If a commit has been determined to break integration testing at a later stage, +please note that the first and safest measure will almost always be reverting +the faulty commit. Making sure critical tests are passing remains a priority +over waiting for some commit to be reworked or refactored (which can be worked +on after a revert has been done). Though we are striving to make more tests +public (with failure then being visible to all), as long as some critical tests +remain private, this will also apply to such tests (with maintainers flagging +the breakage preferrably describing at least the gist of the test). + +If you are still waiting on some testing to be done, please mark the PR as a +"draft" and make the reasoning clear. + +If wider testing is needed (e.g. the change itself is believed to be correct +but may expose latent bugs in other components), lightweight feature flags can +also be used. E.g. an entry in `xapi_globs.ml` and `xapi.conf`, where the +feature/change is defaulted to `off`, to be turned on at a future time +(when e.g. more related PRs land, or it has passed some wider testing). + +If your contribution doesn't intend to have any functional changes, please make +that clear as well. + +## Feature work + +If your contribution adds some new feature or reworks some major aspect of the +system (as opposed to one-off fixes), it can be benefitial to first describe +the plan of your work in a design proposal. Architectural issues are better +spotted early on, and taking a big-picture view can often lead to new insights. + +An example of a design proposal is here: + +https://github.com/xapi-project/xen-api/pull/6387 + +If submitting a design first is not possible, include documentation alongside +with your PR describing the work, like it was done in the last three commits +here: + +https://github.com/xapi-project/xen-api/pull/6457 + +Note that the design will often serve as documentation as well - so take care +updating it after the implementation is done to better reflect reality. + +## Review process and merge + +It can often be useful to address review suggestions with a "fixup" commit +(created manually or with the help of `git commit --fixup=HASH`). This way it +is clear what the original code was and what your fix touches. Once the +fixup commit has been reviewed and the PR approved, please squash the fixup +commits with `git rebase --autosquash` before merging. Otherwise the commits in +the Pull Request should stay as independent commits - we do not require +squashing all the commits into a single one on merge. + +If the commit fixes a bug in an earlier, already merged PR then it might be +useful to mention that in the commit, if known. + +This can be done by adding this to your GIT configuration: + +``` +[pretty] + fixes = Fixes: %h (\"%s\") +``` + +And then running: + +``` +# git log -1 --pretty=fixes +Fixes: 1c581c074 ("xenopsd: Fix a deadlock during VM suspend") +``` + +This will print the commit title and hash in a nice format, which can then be +added to the footer of the commit message (alongside the sign-off). + +This is useful information to have if any of these commits get backported to +another release in the future, so that we also backport the bugfixes, not just +the buggy commits. diff --git a/README.markdown b/README.markdown index b41ab950d87..9f795d85506 100644 --- a/README.markdown +++ b/README.markdown @@ -108,6 +108,9 @@ It certifies the patch's origin and is licensed under an appropriate open-source licence to include it in Xapi: https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---signoff +For more detailed guidelines on what makes a good contribution, see +[CONTRIBUTING](./CONTRIBUTING.md). + Discussions ----------- From c7de2ce5ac22eb66cd01628cd3891bb18d037ea7 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Tue, 3 Jun 2025 15:14:39 +0100 Subject: [PATCH 247/492] Removed PowerShell 5.x build due to the retirement of windows-2019. Signed-off-by: Konstantina Chremmou --- .github/workflows/generate-and-build-sdks.yml | 74 ------------------- .github/workflows/release.yml | 11 --- 2 files changed, 85 deletions(-) diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index ca1a67a4c78..4083db393c9 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -200,80 +200,6 @@ jobs: name: SDK_Binaries_CSharp path: source/src/bin/Release/XenServer.NET.${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned.nupkg - build-powershell-5x-sdk: - name: Build PowerShell 5.x SDK (.NET Framework 4.5) - needs: build-csharp-sdk - # PowerShell SDK for PowerShell 5.x needs to run on windows-2019 because - # windows-2022 doesn't contain .NET Framework 4.x dev tools - runs-on: windows-2019 - permissions: - contents: read - - steps: - - name: Strip 'v' prefix from xapi version - shell: pwsh - run: echo "XAPI_VERSION_NUMBER=$("${{ inputs.xapi_version }}".TrimStart('v'))" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - - - name: Retrieve PowerShell SDK source - uses: actions/download-artifact@v4 - with: - name: SDK_Source_PowerShell - path: source/ - - - name: Retrieve C# SDK binaries - uses: actions/download-artifact@v4 - with: - name: SDK_Binaries_CSharp - path: csharp/ - - # Following needed for restoring packages - # when calling dotnet add package - - name: Set up dotnet CLI (.NET 6.0 and 8.0) - uses: actions/setup-dotnet@v4 - with: - dotnet-version: | - 6 - 8 - - - name: Setup project and dotnet CLI - shell: pwsh - run: | - dotnet nuget add source --name local ${{ github.workspace }}\csharp - dotnet add source/src package XenServer.NET --version ${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned - - - name: Build PowerShell SDK (.NET Framework 4.5) - shell: pwsh - run: | - dotnet build source/src/XenServerPowerShell.csproj ` - --disable-build-servers ` - --configuration Release ` - -p:Version=${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned ` - -p:TargetFramework=net45 ` - --verbosity=normal` - - - name: Update SDK and PS versions in "XenServerPSModule.psd1" - shell: pwsh - run: | - (Get-Content "source\XenServerPSModule.psd1") -replace "@SDK_VERSION@","${{ env.XAPI_VERSION_NUMBER }}" | Set-Content -Path "source\XenServerPSModule.psd1" - (Get-Content "source\XenServerPSModule.psd1") -replace "@PS_VERSION@","5.0" | Set-Content -Path "source\XenServerPSModule.psd1" - - - name: Move binaries to destination folder - shell: pwsh - run: | - New-Item -Path "." -Name "output" -ItemType "directory" - Copy-Item -Verbose "source\README_51.md" -Destination "output" -Force - Copy-Item -Verbose "source\LICENSE" -Destination "output" -Force - Copy-Item -Path "source\src\bin\Release\net45\*" -Include "*.dll" "output\" - Get-ChildItem -Path "source" |` - Where-Object { $_.Extension -eq ".ps1" -or $_.Extension -eq ".ps1xml" -or $_.Extension -eq ".psd1" -or $_.Extension -eq ".txt" } |` - ForEach-Object -Process { Copy-Item -Verbose $_.FullName -Destination "output" } - - - name: Store PowerShell SDK (.NET Framework 4.5) - uses: actions/upload-artifact@v4 - with: - name: SDK_Binaries_XenServerPowerShell_NET45 - path: output/**/* - build-powershell-7x-sdk: name: Build PowerShell 7.x SDK strategy: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d766f4f1e4a..9c892846e1e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -83,12 +83,6 @@ jobs: name: SDK_Binaries_CSharp path: dist/ - - name: Retrieve PowerShell 5.x SDK distribution artifacts - uses: actions/download-artifact@v4 - with: - name: SDK_Binaries_XenServerPowerShell_NET45 - path: sdk_powershell_5x/ - - name: Retrieve PowerShell 7.x SDK distribution artifacts uses: actions/download-artifact@v4 with: @@ -104,10 +98,6 @@ jobs: rm -rf libxenserver/usr/local/lib/ tar -zcvf libxenserver-prerelease.src.tar.gz -C ./libxenserver/usr/local . - - name: Zip PowerShell 5.x SDK artifacts for deployment - shell: bash - run: zip PowerShell-SDK-5.x-prerelease-unsigned.zip ./sdk_powershell_5x -r - - name: Zip PowerShell 7.x SDK artifacts for deployment shell: bash run: zip PowerShell-SDK-7.x-prerelease-unsigned.zip ./sdk_powershell_7x -r @@ -120,7 +110,6 @@ jobs: shell: bash run: | gh release create ${{ github.ref_name }} --repo ${{ github.repository }} --generate-notes dist/* \ - PowerShell-SDK-5.x-prerelease-unsigned.zip \ PowerShell-SDK-7.x-prerelease-unsigned.zip \ Go-SDK-prerelease-unsigned.zip \ libxenserver-prerelease.tar.gz libxenserver-prerelease.src.tar.gz From 8a5111d63e9658beaedd470ffa9f4e31c12867b0 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 2 Jun 2025 15:25:32 +0100 Subject: [PATCH 248/492] Add file-upload support to xe host-call-plugin To pass file content to a plugin, vm-call-plugin supports passing the content of a client-side file as a parameter. This is missing for host-call-plugin - this patch adds it. Otherwise is difficult to pass anything beyond a short string to a plugin. Signed-off-by: Christian Lindig --- ocaml/xapi-cli-server/cli_frontend.ml | 5 ++-- ocaml/xapi-cli-server/cli_operations.ml | 36 +++++++++++++------------ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 389b880a268..57861e95001 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -958,8 +958,9 @@ let rec cmdtable_data : (string * cmd_spec) list = ; optn= ["args:"] ; help= "Calls the function within the plugin on the given host with \ - optional arguments." - ; implementation= No_fd Cli_operations.host_call_plugin + optional arguments. The syntax args:key:file=/path/file.ext passes \ + the content of /path/file.ext under key to the plugin." + ; implementation= With_fd Cli_operations.host_call_plugin ; flags= [] } ) diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 431cc76fa80..25e4c84ce79 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3490,28 +3490,29 @@ let vm_memory_target_wait printer rpc session_id params = params [] ) +(** This implements the key:file=/path/to/file.txt syntax. The value for + key is the content of a file requested from the client *) +let args_file fd ((k, v) as p) = + match Astring.String.cut ~sep:":" k with + | Some (key, "file") -> ( + match get_client_file fd v with + | Some s -> + (key, s) + | None -> + marshal fd + (Command (PrintStderr (Printf.sprintf "Failed to read file %s\n" v))) ; + raise (ExitWithError 1) + ) + | _ -> + p + let vm_call_plugin fd printer rpc session_id params = let vm_uuid = List.assoc "vm-uuid" params in let vm = Client.VM.get_by_uuid ~rpc ~session_id ~uuid:vm_uuid in let plugin = List.assoc "plugin" params in let fn = List.assoc "fn" params in let args = read_map_params "args" params in - (* Syntax interpretation: args:key:file=filename equals args:key=filename_content *) - let convert ((k, v) as p) = - match Astring.String.cut ~sep:":" k with - | Some (key, "file") -> ( - match get_client_file fd v with - | Some s -> - (key, s) - | None -> - marshal fd - (Command (PrintStderr (Printf.sprintf "Failed to read file %s\n" v))) ; - raise (ExitWithError 1) - ) - | _ -> - p - in - let args = List.map convert args in + let args = List.map (args_file fd) args in let result = Client.VM.call_plugin ~rpc ~session_id ~vm ~plugin ~fn ~args in printer (Cli_printer.PList [result]) @@ -6907,12 +6908,13 @@ let host_set_hostname_live _printer rpc session_id params = let hostname = List.assoc "host-name" params in Client.Host.set_hostname_live ~rpc ~session_id ~host ~hostname -let host_call_plugin printer rpc session_id params = +let host_call_plugin fd printer rpc session_id params = let host_uuid = List.assoc "host-uuid" params in let host = Client.Host.get_by_uuid ~rpc ~session_id ~uuid:host_uuid in let plugin = List.assoc "plugin" params in let fn = List.assoc "fn" params in let args = read_map_params "args" params in + let args = List.map (args_file fd) args in let result = Client.Host.call_plugin ~rpc ~session_id ~host ~plugin ~fn ~args in From cf89d655a8d6c2d64736ee7d9f5dd26de94ef7ce Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Tue, 3 Jun 2025 10:59:15 +0000 Subject: [PATCH 249/492] CP-53475 Update release number to latest tag Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/idl/datamodel_host.ml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index e51b59eb573..f0bce099389 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1304,7 +1304,7 @@ let create_params = param_type= Bool ; param_name= "ssh_enabled" ; param_doc= "True if SSH access is enabled for the host" - ; param_release= numbered_release "25.14.0-next" + ; param_release= numbered_release "25.20.0-next" ; param_default= Some (VBool Constants.default_ssh_enabled) } ; { @@ -1314,7 +1314,7 @@ let create_params = "The timeout in seconds after which SSH access will be automatically \ disabled (0 means never), this setting will be applied every time the \ SSH is enabled by XAPI" - ; param_release= numbered_release "25.14.0-next" + ; param_release= numbered_release "25.20.0-next" ; param_default= Some (VInt Constants.default_ssh_enabled_timeout) } ; { @@ -1323,7 +1323,7 @@ let create_params = ; param_doc= "The time in UTC after which the SSH access will be automatically \ disabled" - ; param_release= numbered_release "25.14.0-next" + ; param_release= numbered_release "25.20.0-next" ; param_default= Some (VDateTime Date.epoch) } ; { @@ -1332,7 +1332,7 @@ let create_params = ; param_doc= "The timeout in seconds after which idle console will be automatically \ terminated (0 means never)" - ; param_release= numbered_release "25.14.0-next" + ; param_release= numbered_release "25.20.0-next" ; param_default= Some (VInt Constants.default_console_idle_timeout) } ] @@ -1348,7 +1348,7 @@ let create = kept for host joined a pool" ) ; ( Changed - , "25.14.0-next" + , "25.20.0-next" , "Added --ssh_enabled --ssh_enabled_timeout --ssh_expiry \ --console_idle_timeout options to allow them to be configured for \ new host" From cba2f1d5e4340b352f32effeacb023a0315d671b Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 30 May 2025 18:52:59 +0800 Subject: [PATCH 250/492] XSI-1918: Host can not join pool after enable external auth commit 6ac37d770 add localhost name to /etc/hosts There problem is, instead of appending FQDN to the end of the line, the entry should in following format: IP FQDN [alias1] [alias2] Besides, another non-regression is found. The host need to change its static IP and DNS to join AD, However, then the Host record is not send to DNS, cause the pool member can not resovle each other. To resovle the issue, a task is scheduled to sync the hostname to DNS. Signed-off-by: Lin Liu --- ocaml/tests/test_extauth_plugin_ADwinbind.ml | 20 ++-- ocaml/xapi/extauth_plugin_ADwinbind.ml | 111 +++++++++++++++++-- ocaml/xapi/xapi_globs.ml | 3 + 3 files changed, 117 insertions(+), 17 deletions(-) diff --git a/ocaml/tests/test_extauth_plugin_ADwinbind.ml b/ocaml/tests/test_extauth_plugin_ADwinbind.ml index 5fe5bfc91cd..2244e9ddde2 100644 --- a/ocaml/tests/test_extauth_plugin_ADwinbind.ml +++ b/ocaml/tests/test_extauth_plugin_ADwinbind.ml @@ -517,17 +517,17 @@ let test_add_ipv4_localhost_to_hosts = localhost4.localdomain4" ] , [ - "127.0.0.1 localhost localhost.localdomain localhost4 \ - localhost4.localdomain4 hostname hostname.domain" + "127.0.0.1 hostname.domain hostname localhost \ + localhost.localdomain localhost4 localhost4.localdomain4" ] ) ; ( ["127.0.0.1 localhost hostname hostname.domain localhost.localdomain"] - , ["127.0.0.1 localhost localhost.localdomain hostname hostname.domain"] + , ["127.0.0.1 hostname.domain hostname localhost localhost.localdomain"] ) ; ( ["192.168.0.1 some_host"] - , ["127.0.0.1 hostname hostname.domain"; "192.168.0.1 some_host"] + , ["127.0.0.1 hostname.domain hostname"; "192.168.0.1 some_host"] ) - ; ([], ["127.0.0.1 hostname hostname.domain"]) + ; ([], ["127.0.0.1 hostname.domain hostname"]) ] in matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) @@ -549,18 +549,18 @@ let test_add_ipv4_and_ipv6_localhost_to_hosts = [ ( ["127.0.0.1 localhost"] , [ - "::1 hostname hostname.domain" - ; "127.0.0.1 localhost hostname hostname.domain" + "::1 hostname.domain hostname" + ; "127.0.0.1 hostname.domain hostname localhost" ] ) ; ( ["127.0.0.1 localhost"; "::1 localhost"] , [ - "127.0.0.1 localhost hostname hostname.domain" - ; "::1 localhost hostname hostname.domain" + "127.0.0.1 hostname.domain hostname localhost" + ; "::1 hostname.domain hostname localhost" ] ) ; ( [] - , ["127.0.0.1 hostname hostname.domain"; "::1 hostname hostname.domain"] + , ["127.0.0.1 hostname.domain hostname"; "::1 hostname.domain hostname"] ) ] in diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index b3458478e3e..b4f075a4dc4 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -1347,13 +1347,28 @@ module HostsConfFunc (T : LocalHostTag) : HostsConf = struct let name = String.lowercase_ascii name in let domain = String.lowercase_ascii domain in let fqdn = Printf.sprintf "%s.%s" name domain in + let rec add_hostname pre line = + match line with + | ip :: alias when ip = T.local_ip -> + (* Add localhost IP *) + add_hostname [ip] alias + | sp :: left when sp = "" -> + (* Add space to reserve the indent *) + add_hostname (pre @ [sp]) left + | alias :: left -> + (* hosts entry: ip fqdn alias1 alias2 ... *) + pre @ [fqdn; name; alias] @ left + | [] -> + failwith "Can not add local hostname to non local IP" + in + match interest line with | false -> line | true -> String.split_on_char sep line |> List.filter (fun x -> x <> name && x <> fqdn) - |> (fun x -> match op with Add -> x @ [name; fqdn] | Remove -> x) + |> (fun x -> match op with Add -> add_hostname [] x | Remove -> x) |> String.concat sep_str let leave ~name ~domain ~lines = @@ -1369,8 +1384,8 @@ module HostsConfFunc (T : LocalHostTag) : HostsConf = struct | false -> (* Does not found and updated the conf, then add one *) [ - Printf.sprintf "%s%s%s%s%s.%s" T.local_ip sep_str name sep_str name - domain + Printf.sprintf "%s%s%s.%s%s%s" T.local_ip sep_str name domain sep_str + name ] @ x end @@ -1386,18 +1401,90 @@ module ConfigHosts = struct let join ~name ~domain = read_lines ~path |> fun lines -> HostsConfIPv4.join ~name ~domain ~lines |> fun lines -> - HostsConfIPv6.join ~name ~domain ~lines + HostsConfIPv6.join ~name ~domain ~lines |> fun x -> + x @ [""] (* Add final line break *) |> String.concat "\n" |> write_string_to_file path let leave ~name ~domain = read_lines ~path |> fun lines -> HostsConfIPv4.leave ~name ~domain ~lines |> fun lines -> - HostsConfIPv6.leave ~name ~domain ~lines + HostsConfIPv6.leave ~name ~domain ~lines |> fun x -> + x @ [""] (* Add final line break *) |> String.concat "\n" |> write_string_to_file path end +module ResolveConfig = struct + let path = "/etc/resolv.conf" + + type t = Add | Remove + + let handle op domain = + let open Xapi_stdext_unix.Unixext in + let config = Printf.sprintf "search %s" domain in + read_lines ~path |> List.filter (fun x -> x <> config) |> fun x -> + (match op with Add -> config :: x | Remove -> x) |> fun x -> + x @ [""] |> String.concat "\n" |> write_string_to_file path + + let join ~domain = handle Add domain + + let leave ~domain = handle Remove domain +end + +module DNSSync = struct + let task_name = "Sync hostname with DNS" + + type t = Register | Unregister + + let handle op hostname netbios_name domain = + (* By default, hostname should equal to netbios_name, just register it to DNS server*) + try + let ops = + match op with Register -> "register" | Unregister -> "unregister" + in + let netbios_fqdn = Printf.sprintf "%s.%s" netbios_name domain in + let args = ["ads"; "dns"] @ [ops] @ ["--machine-pass"] in + Helpers.call_script net_cmd (args @ [netbios_fqdn]) |> ignore ; + if hostname <> netbios_name then + let hostname_fqdn = Printf.sprintf "%s.%s" hostname domain in + (* netbios_name is compressed, op on extra hostname *) + Helpers.call_script net_cmd (args @ [hostname_fqdn]) |> ignore + with e -> + debug "Register/unregister with DNS failed %s" (ExnHelper.string_of_exn e) + + let register hostname netbios_name domain = + handle Register hostname netbios_name domain + + let unregister hostname netbios_name domain = + handle Unregister hostname netbios_name domain + + let sync () = + Server_helpers.exec_with_new_task "sync hostname with DNS" + @@ fun __context -> + let host = Helpers.get_localhost ~__context in + let service_name = + Db.Host.get_external_auth_service_name ~__context ~self:host + in + let netbios_name = + Db.Host.get_external_auth_configuration ~__context ~self:host + |> fun config -> List.assoc_opt "netbios_name" config + in + let hostname = Db.Host.get_hostname ~__context ~self:host in + match netbios_name with + | Some netbios -> + register hostname netbios service_name + | None -> + debug "Netbios name is none, skip sync hostname to DNS" + + let trigger_sync ~start = + debug "Trigger task: %s" task_name ; + Scheduler.add_to_queue task_name + (Scheduler.Periodic !Xapi_globs.winbind_dns_sync_interval) start sync + + let stop_sync () = Scheduler.remove_from_queue task_name +end + let build_netbios_name ~config_params = let key = "netbios-name" in match List.assoc_opt key config_params with @@ -1721,6 +1808,8 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct ClosestKdc.trigger_update ~start:0. ; RotateMachinePassword.trigger_rotate ~start:0. ; ConfigHosts.join ~domain:service_name ~name:netbios_name ; + ResolveConfig.join ~domain:service_name ; + DNSSync.trigger_sync ~start:0. ; Winbind.set_machine_account_encryption_type netbios_name ; debug "Succeed to join domain %s" service_name with @@ -1728,6 +1817,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct error "Join domain: %s error: %s" service_name stdout ; clear_winbind_config () ; ConfigHosts.leave ~domain:service_name ~name:netbios_name ; + ResolveConfig.leave ~domain:service_name ; (* The configure is kept for debug purpose with max level *) raise (Auth_service_error (stdout |> tag_from_err_msg, stdout)) | Xapi_systemctl.Systemctl_fail _ -> @@ -1735,6 +1825,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct error "Start daemon error: %s" msg ; config_winbind_daemon ~domain:None ~workgroup:None ~netbios_name:None ; ConfigHosts.leave ~domain:service_name ~name:netbios_name ; + ResolveConfig.leave ~domain:service_name ; raise (Auth_service_error (E_GENERIC, msg)) | e -> let msg = @@ -1746,6 +1837,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct error "Enable extauth error: %s" msg ; clear_winbind_config () ; ConfigHosts.leave ~domain:service_name ~name:netbios_name ; + ResolveConfig.leave ~domain:service_name ; raise (Auth_service_error (E_GENERIC, msg)) (* unit on_disable() @@ -1760,9 +1852,13 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct let user = List.assoc_opt "user" config_params in let pass = List.assoc_opt "pass" config_params in let {service_name; netbios_name; _} = get_domain_info_from_db () in + ResolveConfig.leave ~domain:service_name ; + DNSSync.stop_sync () ; ( match netbios_name with - | Some name -> - ConfigHosts.leave ~domain:service_name ~name + | Some netbios -> + ConfigHosts.leave ~domain:service_name ~name:netbios ; + let hostname = get_localhost_name () in + DNSSync.unregister hostname netbios service_name | _ -> () ) ; @@ -1792,6 +1888,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct ClosestKdc.trigger_update ~start:ClosestKdc.startup_delay ; RotateMachinePassword.trigger_rotate ~start:5. ; Winbind.check_ready_to_serve ~timeout:300. ; + DNSSync.trigger_sync ~start:5. ; let {service_name; netbios_name; _} = get_domain_info_from_db () in match netbios_name with diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index b183d477ee9..42d554ae22a 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1017,6 +1017,8 @@ let winbind_cache_time = ref 60 let winbind_machine_pwd_timeout = ref (2. *. 7. *. 24. *. 3600.) +let winbind_dns_sync_interval = ref 3600. + let winbind_update_closest_kdc_interval = ref (3600. *. 22.) (* every 22 hours *) @@ -1219,6 +1221,7 @@ let xapi_globs_spec = ; ("winbind_debug_level", Int winbind_debug_level) ; ("winbind_cache_time", Int winbind_cache_time) ; ("winbind_machine_pwd_timeout", Float winbind_machine_pwd_timeout) + ; ("winbind_dns_sync_interval", Float winbind_dns_sync_interval) ; ( "winbind_update_closest_kdc_interval" , Float winbind_update_closest_kdc_interval ) From 09b6256e844c1c35e68c2669404fe8d567697388 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 3 Jun 2025 10:56:00 +0100 Subject: [PATCH 251/492] xapi_vif: Guarantee the device parameter is an unsigned decimal integer This has been always true as xapi will call valid_device on VIF creation to make sure device is an integer, but the datamodel type of 'device' is string, without any such guarantees. Specify the guarantee in the documentation and make the check stricter (int_of_string will accept "0x9fe" as an integer, for example), making sure that the device is specifically a decimal unsigned integer. allowed_VIF_devices has already enforced the unsigned decimal integer limitation on compliant clients. This could be helpful in ensuring that the clients will always be right in sorting devices as numbers, not as strings (so that "2" follows "1" instead of the string order of "1"->"10"->"11", etc.). Signed-off-by: Andrii Sultanov --- ocaml/idl/datamodel.ml | 4 +++- ocaml/idl/schematest.ml | 2 +- ocaml/xapi/xapi_vif_helpers.ml | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index a2bfaf4d4fb..4372877b995 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -3866,7 +3866,9 @@ module VIF = struct , "order in which VIF backends are created by xapi" ) ] - "device" "order in which VIF backends are created by xapi" + "device" + "order in which VIF backends are created by xapi. Guaranteed to \ + be an unsigned decimal integer." ; field ~qualifier:StaticRO ~ty:(Ref _network) ~lifecycle: [ diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index b77c2a676b7..c8abcb1f999 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "dc1ccf295f957509f7eac4a005d17965" +let last_known_schema_hash = "4cd835e2557dd7b5cbda6c681730c447" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index 2fab562dbe4..5ab6f146339 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -192,10 +192,11 @@ let clear_current_operations ~__context ~self = (**************************************************************************************) -(** Check if the device string has the right form *) +(** Check if the device string has the right form - it should only be an + unsigned decimal integer *) let valid_device dev = try - ignore (int_of_string dev) ; + Scanf.sscanf dev "%u%!" ignore ; true with _ -> false From 5c54da32ff3e20b6e96269f841c48e1af7705b46 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 4 Jun 2025 14:07:05 +0100 Subject: [PATCH 252/492] xapi-idl: Avoid printing cli output when testing Use cram tests to expect the desired output of the command instead This reduces the amount of text displayed when running tests, which makes locating the errors in the logs easier When the output of the tools changes deliberately, the expect files can be changed with `dune runtest --auto-promote` Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-idl/cluster/cli-help.t | 115 ++++++++++++ ocaml/xapi-idl/cluster/dune | 6 +- ocaml/xapi-idl/example/cli-help.t | 90 ++++++++++ ocaml/xapi-idl/example/dune | 6 +- ocaml/xapi-idl/gpumon/cli-help.t | 58 +++++++ ocaml/xapi-idl/gpumon/dune | 6 +- ocaml/xapi-idl/guard/privileged/cli-help.t | 53 ++++++ ocaml/xapi-idl/guard/privileged/dune | 8 +- ocaml/xapi-idl/guard/varstored/cli-help.t | 49 ++++++ ocaml/xapi-idl/guard/varstored/dune | 8 +- ocaml/xapi-idl/lib/xcp_service.ml | 4 +- ocaml/xapi-idl/memory/cli-help.t | 80 +++++++++ ocaml/xapi-idl/memory/dune | 6 +- ocaml/xapi-idl/network/cli-help.t | 156 +++++++++++++++++ ocaml/xapi-idl/network/dune | 8 +- ocaml/xapi-idl/rrd/cli-help.t | 193 +++++++++++++++++++++ ocaml/xapi-idl/rrd/dune | 9 +- ocaml/xapi-idl/v6/cli-help.t | 40 +++++ ocaml/xapi-idl/v6/dune | 8 +- 19 files changed, 859 insertions(+), 44 deletions(-) create mode 100644 ocaml/xapi-idl/cluster/cli-help.t create mode 100644 ocaml/xapi-idl/example/cli-help.t create mode 100644 ocaml/xapi-idl/gpumon/cli-help.t create mode 100644 ocaml/xapi-idl/guard/privileged/cli-help.t create mode 100644 ocaml/xapi-idl/guard/varstored/cli-help.t create mode 100644 ocaml/xapi-idl/memory/cli-help.t create mode 100644 ocaml/xapi-idl/network/cli-help.t create mode 100644 ocaml/xapi-idl/rrd/cli-help.t create mode 100644 ocaml/xapi-idl/v6/cli-help.t diff --git a/ocaml/xapi-idl/cluster/cli-help.t b/ocaml/xapi-idl/cluster/cli-help.t new file mode 100644 index 00000000000..5b9362aa648 --- /dev/null +++ b/ocaml/xapi-idl/cluster/cli-help.t @@ -0,0 +1,115 @@ + $ ./cluster_cli.exe --help=plain + NAME + cluster_cli - A CLI for the cluster API. This tool is not intended to + be used as an end user tool + + SYNOPSIS + cluster_cli [COMMAND] … + + COMMANDS + Observer.create [OPTION]… dbg uuid name_label dict endpoints bool + + Observer.destroy [OPTION]… dbg uuid + + Observer.init [OPTION]… dbg + + Observer.set_attributes [OPTION]… dbg uuid dict + + Observer.set_compress_tracing_files [OPTION]… dbg bool + + Observer.set_enabled [OPTION]… dbg uuid bool + + Observer.set_endpoints [OPTION]… dbg uuid endpoints + + Observer.set_export_interval [OPTION]… dbg float + + Observer.set_host_id [OPTION]… dbg string + + Observer.set_max_file_size [OPTION]… dbg int + + Observer.set_max_spans [OPTION]… dbg int + + Observer.set_max_traces [OPTION]… dbg int + + Observer.set_trace_log_dir [OPTION]… dbg string + + UPDATES.get [OPTION]… dbg timeout + Get updates from corosync-notifyd, this blocking call will return + when there is an update from corosync-notifyd or it is timed out + after timeout_p seconds + + create [OPTION]… dbg init_config + Creates the cluster. The call takes the initial config of the + initial host to add to the cluster. This will be the address on + which the rings will be created. + + declare-changed-addrs [OPTION]… dbg changed_members + Declare that one or more hosts in the cluster have changed + address. Only use this command if unable to rejoin the cluster + using `enable` because the IPv4 addresses of all nodes this node + previously saw are now invalid. If any one of these addresses + remains valid on an enabled node then this action is unnecessary. + + declare-dead [OPTION]… dbg dead_members + Declare that some hosts in the cluster are permanently dead. + Removes the hosts from the cluster. If the hosts do attempt to + rejoin the cluster in future, this may lead to fencing of other + hosts and/or data loss or data corruption. + + destroy [OPTION]… dbg + Destroys a created cluster + + diagnostics [OPTION]… dbg + Returns diagnostic information about the cluster + + disable [OPTION]… dbg + Stop the cluster on this host; leave the rest of the cluster + enabled. The cluster can be reenabled either by restarting the + host, or by calling the `enable` API call. + + enable [OPTION]… dbg init_config + Rejoins the cluster following a call to `disable`. The parameter + passed is the cluster config to use (optional fields set to None + unless updated) in case it changed while the host was disabled. + (Note that changing optional fields isn't yet supported, TODO) + + join [OPTION]… dbg token new_member tls_config existing_members + Adds a node to an initialised cluster. Takes the IPv4 address of + the new member and a list of the addresses of all the existing + members. + + leave [OPTION]… dbg + Causes this host to permanently leave the cluster, but leaves the + rest of the cluster enabled. This is not a temporary removal - if + the admin wants the hosts to rejoin the cluster again, he will + have to call `join` rather than `enable`. + + set-tls-verification [OPTION]… dbg server_pem_path + trusted_bundle_path cn enabled + Enable or disable TLS verification for xapi/clusterd + communication. The trusted_bundle_path is ignored when + verification is disabled and can be empty + + switch-cluster-stack [OPTION]… dbg cluster_stack + Switch cluster stack version to the target + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + cluster_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/cluster/dune b/ocaml/xapi-idl/cluster/dune index 50777aeb2b3..f1ec6e321de 100644 --- a/ocaml/xapi-idl/cluster/dune +++ b/ocaml/xapi-idl/cluster/dune @@ -27,8 +27,6 @@ xapi-idl xapi-idl.cluster)) -(rule - (alias runtest) - (deps (:x cluster_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps cluster_cli.exe)) diff --git a/ocaml/xapi-idl/example/cli-help.t b/ocaml/xapi-idl/example/cli-help.t new file mode 100644 index 00000000000..c38ea73040c --- /dev/null +++ b/ocaml/xapi-idl/example/cli-help.t @@ -0,0 +1,90 @@ + $ ./example.exe --help=plain + NAME + Example-service + + SYNOPSIS + Example-service [OPTION]… + + DESCRIPTION + This is an example service which demonstrates the configuration + mechanism. + + OPTIONS + --config=VAL (absent=/etc/example.exe.conf) + Location of configuration file + + --config-dir=VAL (absent=/etc/example.exe.conf.d) + Location of directory containing configuration file fragments + + --default-format=VAL (absent=vhd) + Default format for disk files + + --disable-logging-for=VAL + A space-separated list of debug modules to suppress logging from + + --inventory=VAL (absent=/etc/xensource-inventory) + Location of the inventory file + + --log=VAL (absent=syslog:daemon) + Where to write log messages + + --loglevel=VAL (absent=debug) + Log level + + --ls=VAL (absent=/bin/ls) + program used to list things + + --pidfile=VAL (absent=/var/run/example.exe.pid) + Filename to write process PID + + --queue-name=VAL (absent=org.xen.xapi.ffs) + Comma-separated list of queue names to listen on + + --search-path=VAL + Search path for resources + + --sh=VAL (absent=/bin/sh) + interpreter for arcane programming language + + --socket-path=VAL (absent=/var/xapi/socket) + Path of listening socket + + --sr-mount-path=VAL (absent=/mnt) + Default mountpoint for mounting remote filesystems + + --switch-path=VAL (absent=/var/run/message-switch/sock) + Unix domain socket path on localhost where the message switch is + listening + + --timeslice=VAL (absent=0.05) + timeslice in seconds + + --use-switch=VAL (absent=true) + true if the message switch is to be enabled + + COMMON OPTIONS + These options are common to all services. + + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + Example-service exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + + BUGS + Check bug reports at http://github.com/xapi-project/xen-api + + diff --git a/ocaml/xapi-idl/example/dune b/ocaml/xapi-idl/example/dune index cf27e69dcf3..db360ff8030 100644 --- a/ocaml/xapi-idl/example/dune +++ b/ocaml/xapi-idl/example/dune @@ -9,8 +9,6 @@ ) (preprocess (pps ppx_deriving_rpc))) -(rule - (alias runtest) - (deps (:x example.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps example.exe)) diff --git a/ocaml/xapi-idl/gpumon/cli-help.t b/ocaml/xapi-idl/gpumon/cli-help.t new file mode 100644 index 00000000000..afe309b014f --- /dev/null +++ b/ocaml/xapi-idl/gpumon/cli-help.t @@ -0,0 +1,58 @@ + $ ./gpumon_cli.exe --help=plain + NAME + gpumon_cli - A CLI for the GPU monitoring API. This allows scripting + of the gpumon daemon for testing and debugging. This tool is not + intended to be used as an end user tool + + SYNOPSIS + gpumon_cli [COMMAND] … + + COMMANDS + get_pgpu_metadata [OPTION]… debug_info pgpu_address + Gets the metadata for a pGPU, given its address (PCI bus ID). + + get_pgpu_vgpu_compatibility [OPTION]… debug_info + nvidia_pgpu_metadata nvidia_vgpu_metadata_list + Checks compatibility between a pGPU (on a host) and a list of + vGPUs (assigned to a VM). Note: A VM may use several vGPUs. The + use case is VM.suspend/VM.resume: before VM.resume + [nvidia_vgpu_metadata] of the suspended VM is checked against the + [nvidia_pgpu_metadata] on the host where the VM is resumed. + + get_pgpu_vm_compatibility [OPTION]… debug_info pgpu_address domid + nvidia_pgpu_metadata + Checks compatibility between a VM's vGPU(s) and another pGPU. + + get_vgpu_metadata [OPTION]… debug_info domid pgpu_address vgpu_uuid + Obtains metadata for all vGPUs running in a domain. + + nvml_attach [OPTION]… debug_info + Attach nVidia cards to Gpumon for metrics and compatibility + checking. + + nvml_detach [OPTION]… debug_info + Detach nVidia cards from Gpumon + + nvml_is_attached [OPTION]… debug_info + Return true if nVidia cards are currently attached. + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + gpumon_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/gpumon/dune b/ocaml/xapi-idl/gpumon/dune index de10e06dae6..269a6690eeb 100644 --- a/ocaml/xapi-idl/gpumon/dune +++ b/ocaml/xapi-idl/gpumon/dune @@ -27,8 +27,6 @@ xapi-idl xapi-idl.gpumon)) -(rule - (alias runtest) - (deps (:x gpumon_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps gpumon_cli.exe)) diff --git a/ocaml/xapi-idl/guard/privileged/cli-help.t b/ocaml/xapi-idl/guard/privileged/cli-help.t new file mode 100644 index 00000000000..0e990ca9490 --- /dev/null +++ b/ocaml/xapi-idl/guard/privileged/cli-help.t @@ -0,0 +1,53 @@ + $ ./xapiguard_cli.exe --help=plain + NAME + xapiguard_cli - A CLI for the deprivileged socket spawning API. This + allows scripting of the varstored and SWTPM deprivileging daemon for + testing and debugging. This tool is not intended to be used as an end + user tool + + SYNOPSIS + xapiguard_cli [COMMAND] … + + COMMANDS + varstore_create [OPTION]… dbg vm_uuid gid path + Create a deprivileged varstore socket that only accepts API calls + for a specific VM. The socket will be writable only to the + specified group. + + varstore_destroy [OPTION]… dbg gid path + Stop listening on varstore sockets for the specified group + + vtpm_create [OPTION]… dbg vm_uuid gid path + Create a deprivileged vtpm socket that only accepts API calls for + a specific VM. The socket will be writable only to the specified + group. + + vtpm_destroy [OPTION]… dbg gid path + Stop listening on vtpm sockets for the specified group + + vtpm_get_contents [OPTION]… dbg vtpm_uuid + Get vTPM contents blob + + vtpm_set_contents [OPTION]… dbg vtpm_uuid string + Set vTPM contents blob + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + xapiguard_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/guard/privileged/dune b/ocaml/xapi-idl/guard/privileged/dune index cdb888692d1..b5de6b38b8a 100644 --- a/ocaml/xapi-idl/guard/privileged/dune +++ b/ocaml/xapi-idl/guard/privileged/dune @@ -18,7 +18,7 @@ (package varstored-guard) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -26,8 +26,6 @@ xapi-idl.guard.privileged )) -(rule - (alias runtest) - (deps xapiguard_cli.exe) +(cram (package varstored-guard) - (action (run %{deps} --help=plain))) + (deps xapiguard_cli.exe)) diff --git a/ocaml/xapi-idl/guard/varstored/cli-help.t b/ocaml/xapi-idl/guard/varstored/cli-help.t new file mode 100644 index 00000000000..6f36f4bf5bd --- /dev/null +++ b/ocaml/xapi-idl/guard/varstored/cli-help.t @@ -0,0 +1,49 @@ + $ ./varstored_cli.exe --help=plain + NAME + varstored_cli - debug CLI + + SYNOPSIS + varstored_cli [COMMAND] … + + COMMANDS + VM.get_NVRAM [--socket=SOCKET] [OPTION]… string string + Get the current VM's NVRAM contents + + VM.get_by_uuid [--socket=SOCKET] [OPTION]… string string + Dummy, for wire compatibility with XAPI + + VM.set_NVRAM_EFI_variables [--socket=SOCKET] [OPTION]… string string + string + Set the current VM's NVRAM contents + + message.create [--socket=SOCKET] [OPTION]… string string int64 + string string string + Send an alert when booting a UEFI guest fails + + session.login_with_password [--socket=SOCKET] [OPTION]… string + string string string + Dummy, for wire compatibility with XAPI + + session.logout [--socket=SOCKET] [OPTION]… string + Dummy, for wire compatibility with XAPI + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + varstored_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/guard/varstored/dune b/ocaml/xapi-idl/guard/varstored/dune index abded2e1c17..6957b6c7a78 100644 --- a/ocaml/xapi-idl/guard/varstored/dune +++ b/ocaml/xapi-idl/guard/varstored/dune @@ -17,7 +17,7 @@ (modules varstored_cli) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -25,8 +25,6 @@ xapi-idl.guard.varstored )) -(rule - (alias runtest) - (deps varstored_cli.exe) +(cram (package xapi-idl) - (action (run %{deps} --help=plain))) + (deps varstored_cli.exe)) diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index 817825c44fe..8250842689b 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -168,7 +168,7 @@ let setify = This needs to be as small as possible to reduce latency. Too small values reduce performance due to context switching overheads - + 4ms = 1/HZ in Dom0 seems like a good default, a better value will be written by a boot time service. *) @@ -357,7 +357,7 @@ let command_of ?(name = Sys.argv.(0)) ?(version = "unknown") ; `S _common_options ; `P "These options are common to all services." ; `S "BUGS" - ; `P "Check bug reports at http://github.com/xapi-project/xcp-idl" + ; `P "Check bug reports at http://github.com/xapi-project/xen-api" ] in Cmd.v diff --git a/ocaml/xapi-idl/memory/cli-help.t b/ocaml/xapi-idl/memory/cli-help.t new file mode 100644 index 00000000000..ff85cda4f0d --- /dev/null +++ b/ocaml/xapi-idl/memory/cli-help.t @@ -0,0 +1,80 @@ + $ ./memory_cli.exe --help=plain + NAME + memory_cli - A CLI for the memory API. This allows scripting of the + squeeze daemon for testing and debugging. This tool is not intended to + be used as an end user tool + + SYNOPSIS + memory_cli [COMMAND] … + + COMMANDS + balance_memory [OPTION]… string + Forces a rebalance of the hosts memory. Blocks until the system is + in a stable state. + + delete_reservation [OPTION]… string string reservation_id + Deletes a reservation. Note that memory rebalancing is not done + synchronously after the operation has completed. + + get_diagnostics [OPTION]… string + Gets diagnostic information from the server + + get_domain_zero_policy [OPTION]… string + Gets the ballooning policy for domain zero. + + get_host_initial_free_memory [OPTION]… string + Gets the amount of initial free memory in a host + + get_host_reserved_memory [OPTION]… string + Gets the amount of reserved memory in a host. This is the lower + limit of memory that squeezed will ensure remains unused by any + domain or reservation. + + login [OPTION]… string string + Logs into the squeeze daemon. Any reservations previously made + with the specified service name not yet associated with a domain + will be removed. + + query_reservation_of_domain [OPTION]… string string int + Queries the reservation_id associated with a domain + + reserve_memory [OPTION]… string string int64 + [reserve_memory dbg session size] reserves memory for a domain. If + necessary, other domains will be ballooned down to ensure [size] + is available. The call returns a reservation_id that can later be + transferred to a domain. + + reserve_memory_range [OPTION]… string string int64 int64 + [reserve_memory_range dbg session min max] reserves memory for a + domain. If necessary, other domains will be ballooned down to + ensure enough memory is available. The amount of memory will be + between [min] and [max] according to the policy in operation. The + call returns a reservation_id and the actual memory amount that + can later be transferred to a domain. + + transfer_reservation_to_domain [OPTION]… string string + reservation_id int + Transfers a reservation to a domain. This is called when the + domain has been created for the VM for which the reservation was + initially made. + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + memory_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/memory/dune b/ocaml/xapi-idl/memory/dune index f0f70e0a69e..7df6724a299 100644 --- a/ocaml/xapi-idl/memory/dune +++ b/ocaml/xapi-idl/memory/dune @@ -25,8 +25,6 @@ xapi-idl.memory )) -(rule - (alias runtest) - (deps (:x memory_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps memory_cli.exe)) diff --git a/ocaml/xapi-idl/network/cli-help.t b/ocaml/xapi-idl/network/cli-help.t new file mode 100644 index 00000000000..b8878a9040d --- /dev/null +++ b/ocaml/xapi-idl/network/cli-help.t @@ -0,0 +1,156 @@ + $ ./network_cli.exe --help=plain + NAME + network_cli - A CLI for the network API. This allows scripting of the + xcp-networkd daemon for testing and debugging. This tool is not + intended to be used as an end user tool + + SYNOPSIS + network_cli [COMMAND] … + + COMMANDS + Network.Bridge.add_port [OPTION]… string bridge name interfaces + Add port + + Network.Bridge.create [OPTION]… string name + Create bridge + + Network.Bridge.destroy [OPTION]… string force name + Destroy bridge + + Network.Bridge.get_all [OPTION]… string + Get all bridges + + Network.Bridge.get_all_bonds [OPTION]… string from_cache + get all bonds + + Network.Bridge.get_all_ports [OPTION]… string from_cache + Get all ports + + Network.Bridge.get_interfaces [OPTION]… string name + Get interfaces + + Network.Bridge.get_kind [OPTION]… string + Get backend kind + + Network.Bridge.get_physical_interfaces [OPTION]… string name + Get physical interfaces + + Network.Bridge.make_config [OPTION]… string conservative config + Make bridge configuration + + Network.Bridge.remove_port [OPTION]… string bridge name + Remove port + + Network.Bridge.set_persistent [OPTION]… string name value + Make bridge to persistent or not + + Network.Interface.bring_down [OPTION]… string name + Bring PIF down + + Network.Interface.exists [OPTION]… string name + Check interface existence + + Network.Interface.get_all [OPTION]… string + Get list of all interface names + + Network.Interface.get_capabilities [OPTION]… string name + Get capabilities on the interface + + Network.Interface.get_dns [OPTION]… string name + Get DNS + + Network.Interface.get_ipv4_addr [OPTION]… string name + Get list of IPv4 addresses of the interface + + Network.Interface.get_ipv4_gateway [OPTION]… string name + Get IPv4 gateway + + Network.Interface.get_ipv6_addr [OPTION]… string name + Get IPv6 address + + Network.Interface.get_ipv6_gateway [OPTION]… string name + Get IPv6 gateway + + Network.Interface.get_mac [OPTION]… string name + Get Mac address of the interface + + Network.Interface.get_mtu [OPTION]… string name + Get MTU + + Network.Interface.get_pci_bus_path [OPTION]… string name + Get PCI bus path of the interface + + Network.Interface.has_vlan [OPTION]… string name vlan + Check whether interface has vlan + + Network.Interface.is_connected [OPTION]… string name + Check whether interface is connected + + Network.Interface.is_physical [OPTION]… string name + Check whether interface is physical + + Network.Interface.is_up [OPTION]… string name + Check whether the interface is up + + Network.Interface.make_config [OPTION]… string conservative config + Make interface configuration + + Network.Interface.set_ipv4_conf [OPTION]… string name ipv4 + Set IPv4 configuration + + Network.Interface.set_persistent [OPTION]… string name value + Make PIF to persistent or not + + Network.PVS_proxy.configure_site [OPTION]… string t + Configure site + + Network.PVS_proxy.remove_site [OPTION]… string string + Remove site + + Network.Sriov.disable [OPTION]… string name + Disable SR-IOV + + Network.Sriov.enable [OPTION]… string name + Enable SR-IOV + + Network.Sriov.make_vf_config [OPTION]… string address sriov_pci_t + Make SR-IOV vf config + + Network.clear_state [OPTION]… + Clear configuration state then lock the writing of the state to + disk + + Network.reset_state [OPTION]… + Reset configuration state + + Network.set_dns_interface [OPTION]… string name + Set dns interface + + Network.set_gateway_interface [OPTION]… string name + Set gateway interface + + Network.sync_state [OPTION]… + Allow for the config state to be written to disk then perform a + write + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + network_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + + diff --git a/ocaml/xapi-idl/network/dune b/ocaml/xapi-idl/network/dune index a9a4869945d..d1016ae8821 100644 --- a/ocaml/xapi-idl/network/dune +++ b/ocaml/xapi-idl/network/dune @@ -21,7 +21,7 @@ (modules network_cli) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -29,8 +29,6 @@ xapi-idl.network )) -(rule - (alias runtest) - (deps (:x network_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps network_cli.exe)) diff --git a/ocaml/xapi-idl/rrd/cli-help.t b/ocaml/xapi-idl/rrd/cli-help.t new file mode 100644 index 00000000000..a503e0b75bb --- /dev/null +++ b/ocaml/xapi-idl/rrd/cli-help.t @@ -0,0 +1,193 @@ + $ ./rrd_cli.exe --help=plain + NAME + rrd-cli - A CLI for the Db monitoring API. This allows scripting of + the Rrd daemon for testing and debugging. This tool is not intended to + be used as an end user tool + + SYNOPSIS + rrd-cli [COMMAND] … + + COMMANDS + Deprecated.load_rrd [OPTION]… uuid timescale + Deprecated call. + + HA.disable [OPTION]… + Disables the HA metrics. + + HA.enable_and_update [OPTION]… statefile_latencies heartbeat_latency + xapi_latency + Enables the gathering of HA metrics, a built-in function of + xcp-rrdd. + + Plugin.Local.deregister [OPTION]… uid + Deregisters a plugin by uid + + Plugin.Local.next_reading [OPTION]… uid + Returns the number of seconds until the next reading will be + taken. + + Plugin.Local.register [OPTION]… uid info protocol + [Plugin.Local.register uid info protocol] registers a plugin as a + source of a set of data-sources. [uid] is a unique identifier for + the plugin, often the name of the plugin. [info] is the RRD + frequency, and [protocol] specifies whether the plugin will be + using V1 or V2 of the protocol. + + Plugin.deregister [OPTION]… uid + Preserved for backwards compatibility. Deregesters a local plugin. + + Plugin.get_header [OPTION]… + Returns header string. This string should be copied exactly to the + start of the shared memory containing the data source + + Plugin.get_path [OPTION]… uid + Returns path in the local filesystem to place the data source file + + Plugin.next_reading [OPTION]… uid + Returns the time until the next reading. + + Plugin.register [OPTION]… uid frequency + Preserved for backwards compatibility. Equivalent to a Local + plugin registration with V1 protocol. + + add_host_ds [OPTION]… ds_name + Adds a host data source to the host RRD. This causes the data + source to be recorded if it wasn't a default data source. + + add_sr_ds [OPTION]… sr_uuid ds_name + Adds an SR data source to the SR RRD. This causes the data source + to be recorded if it wasn't a default data source. + + add_vm_ds [OPTION]… vm_uuid domid ds_name + Adds a VM data source to the VM RRD. This causes the data source + to be recorded if it wasn't a default data source. + + archive_rrd [OPTION]… vm_uuid + Sends the VM RRD either to local disk or the remote address if + specified, and removes it from memory. Called on VM + shutdown/suspend. + + archive_sr_rrd [OPTION]… sr_uuid + Saves the SR RRD to the local disk. Returns the path to the saved + RRD so it can be copied onto the SR before it is detached. + + backup_rrds [OPTION]… + Backs up RRD data to disk. This should be done periodically to + ensure that if the host crashes we don't lose too much data. + + forget_host_ds [OPTION]… ds_name + Forgets the recorded archives for the named data source. Note that + if the data source is marked as default, new data coming in will + cause the archive to be recreated. + + forget_sr_ds [OPTION]… sr_uuid ds_name + Forgets the recorded archives for the named SR data source. Note + that if the data source is marked as default, new data coming in + will cause the archive to be recreated. + + forget_vm_ds [OPTION]… vm_uuid ds_name + Forgets the recorded archives for the named VM data source. Note + that if the data source is marked as default, new data coming in + will cause the archive to be recreated. + + has_vm_rrd [OPTION]… vm_uuid + Returns `true` if xcp-rrdd has an RRD for the specified VM in + memory + + migrate_rrd [OPTION]… remote_address vm_uuid host_uuid + Migrate_push - used by the migrate code to push an RRD directly to + a remote host without going via the master. If the host is on a + different pool, you must pass both the remote_address and + session_id parameters. + + push_rrd_local [OPTION]… vm_uuid domid + Loads a VM RRD from local storage, associates it with the + specified domid, and starts recording all data sources related to + the VM to that RRD + + push_rrd_remote [OPTION]… vm_uuid remote_address + Loads a VM RRD from local storage and pushes it to a remote host + + push_sr_rrd [OPTION]… sr_uuid path + Loads the RRD from the path specified on the local disk. + Overwrites any RRD already in memory for the SR. Data sources will + subsequently be recorded to this RRD. + + query_host_ds [OPTION]… ds_name + Returns the current value of the named host data source. Note this + returns the raw data source value, not the smoothed last value of + the RRA. + + query_possible_host_dss [OPTION]… + Returns list of possible host DSs. This will include data sources + not currently being recorded into archives. + + query_possible_sr_dss [OPTION]… sr_uuid + Returns list of possible SR DSs. This will include data sources + not currently being recorded into archives. + + query_possible_vm_dss [OPTION]… vm_uuid + Returns list of possible VM DSs. This will include data sources + not currently being recorded into archives. + + query_sr_ds [OPTION]… sr_uuid ds_name + Returns the current value of the named VM data source. Note this + returns the raw data source value, not the smoothed last value of + the RRA. + + query_vm_ds [OPTION]… vm_uuid ds_name + Returns the current value of the named VM data source. Note this + returns the raw data source value, not the smoothed last value of + the RRA. + + remove_rrd [OPTION]… uuid + Removes a VM RRD from the local filesystem, if it exists. + + save_rrds [OPTION]… + Backs up RRD data to disk on localhost. This should be done + periodically to ensure that if the host crashes we don't lose too + much data. + + send_host_rrd_to_master [OPTION]… master_address + Called on host shutdown/reboot to send the Host RRD to the master + for backup. + + set_cache_sr [OPTION]… sr_uuid + Sets the uuid of the cache SR. If this is set, statistics about + the usage of the cache will be recorded into the host SR. + + unset_cache_sr [OPTION]… + Unsets the cache_sr. No futher data will be gathered about cache + usage, but existing archive data will not be deleted. + + update_use_min_max [OPTION]… value + Set the value of the `use_min_max` variable. If this is `true`, + when creating a new RRD, archives for the minimum and maximum + observed values will be created alongside the standard archive of + average values + + update_vm_memory_target [OPTION]… domid target + Sets the `memory_target` value for a VM. This is called by xapi + when it is told by xenopsd that squeezed has changed the target + for a VM. + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + rrd-cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/rrd/dune b/ocaml/xapi-idl/rrd/dune index f7b2a8e7b70..e0e8693c13f 100644 --- a/ocaml/xapi-idl/rrd/dune +++ b/ocaml/xapi-idl/rrd/dune @@ -49,7 +49,7 @@ (modes exe) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -57,9 +57,6 @@ xapi-idl.rrd )) -(rule - (alias runtest) - (deps (:x rrd_cli.exe)) +(cram (package xapi-tools) - (action (run %{x} --help=plain))) - + (deps rrd_cli.exe)) diff --git a/ocaml/xapi-idl/v6/cli-help.t b/ocaml/xapi-idl/v6/cli-help.t new file mode 100644 index 00000000000..ed7d3b47ba5 --- /dev/null +++ b/ocaml/xapi-idl/v6/cli-help.t @@ -0,0 +1,40 @@ + $ ./v6_cli.exe --help=plain + NAME + licensing_cli - A CLI for the V6d API. This allows scripting of the + licensing daemon for testing and debugging. This tool is not intended + to be used as an end user tool + + SYNOPSIS + licensing_cli [COMMAND] … + + COMMANDS + apply_edition [OPTION]… debug_info string string_pair_lst + Checks license info to ensures enabled features are compatible. + + get_editions [OPTION]… debug_info + Gets list of accepted editions. + + get_version [OPTION]… debug_info + Gets list of version-related string pairs + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + licensing_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + + diff --git a/ocaml/xapi-idl/v6/dune b/ocaml/xapi-idl/v6/dune index 79751c08794..3fb2579af06 100644 --- a/ocaml/xapi-idl/v6/dune +++ b/ocaml/xapi-idl/v6/dune @@ -19,7 +19,7 @@ (modules v6_cli) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -28,8 +28,6 @@ xapi-log )) -(rule - (alias runtest) - (deps (:x v6_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps v6_cli.exe)) From e1213288c0ea73f77f9464b3ace03f6977c256d4 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 4 Jun 2025 15:27:14 +0100 Subject: [PATCH 253/492] xapi-storage-script: avoid output when running python tests These tests simply printed output, used cram tests to document the output and remove it from the CI logs. IF the output gets changed on purpose, the expect file can be changed by running `dune build @runtest-python --auto-promote` Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-storage-script/dune | 17 ++++--- ocaml/xapi-storage-script/python-self-test.t | 47 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 ocaml/xapi-storage-script/python-self-test.t diff --git a/ocaml/xapi-storage-script/dune b/ocaml/xapi-storage-script/dune index e1391aed2ca..435c7a8ecf6 100644 --- a/ocaml/xapi-storage-script/dune +++ b/ocaml/xapi-storage-script/dune @@ -76,20 +76,19 @@ (files (xapi-storage-script.8 as man8/xapi-storage-script.8)) ) -(rule +(cram (alias runtest-python) + (runtest_alias false) (package xapi-storage-script) (deps - (:x main.exe) + main.exe + ../xapi-storage/python/xapi/storage/api/v5/datapath.py + ../xapi-storage/python/xapi/storage/api/v5/plugin.py + ../xapi-storage/python/xapi/storage/api/v5/task.py + ../xapi-storage/python/xapi/storage/api/v5/volume.py + (source_tree ../xapi-storage/python/xapi) (source_tree test/volume) - (:p - ../xapi-storage/python/xapi/storage/api/v5/datapath.py - ../xapi-storage/python/xapi/storage/api/v5/plugin.py - ../xapi-storage/python/xapi/storage/api/v5/task.py - ../xapi-storage/python/xapi/storage/api/v5/volume.py - ) ) - (action (bash "export PYTHONPATH=../xapi-storage/python/; echo $PYTHONPATH; ./%{x} --root=$PWD/test --self-test-only=true")) ) (data_only_dirs test examples) diff --git a/ocaml/xapi-storage-script/python-self-test.t b/ocaml/xapi-storage-script/python-self-test.t new file mode 100644 index 00000000000..9ac59bed953 --- /dev/null +++ b/ocaml/xapi-storage-script/python-self-test.t @@ -0,0 +1,47 @@ +run the self-checks for xapi-storage-script, it logs to stderr, so process +stderr instead of stdout + +The output of the logs needs to delete randomization, there are two sources: +pids and uuids + + $ export PYTHONPATH=../xapi-storage/python/; ./main.exe --root=$PWD/test --self-test-only=true 2>&1 >/dev/null | sed -E 's/\[[0-9]+\]/[PID]/g' | sed -E 's/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/UUID/g' + [INFO] {"method":"Plugin.query","params":[{"dbg":"debug"}],"id":2} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Plugin.Query[PID] succeeded: {"plugin": "dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "vendor": "Citrix Systems Inc", "copyright": "(C) 2018 Citrix Inc", "version": "1.0", "required_api_version": "5.0", "features": ["SR_ATTACH", "SR_DETACH", "SR_CREATE", "SR_PROBE", "VDI_CREATE", "VDI_DESTROY"], "configuration": {}, "required_cluster_stack": []} + + [INFO] {"method":"Plugin.diagnostics","params":[{"dbg":"debug"}],"id":4} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Plugin.diagnostics[PID] succeeded: "Dummy diagnostics" + + [INFO] {"method":"SR.create","params":[{"description":"dummy description","name":"dummy name","configuration":{"uri":"file:///dev/null"},"uuid":"dummySR","dbg":"debug"}],"id":6} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.create[PID] succeeded: {"uri": "file:///tmp/dummy"} + + [INFO] {"method":"SR.attach","params":[{"configuration":{"uri":"file:///tmp/dummy"},"dbg":"debug"}],"id":9} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.attach[PID] succeeded: "file:///tmp/dummy" + + [INFO] {"method":"SR.stat","params":[{"sr":"file:///tmp/dummy","dbg":"debug"}],"id":10} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.stat[PID] succeeded: {"sr": "file:///tmp/dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "total_space": 0, "free_space": 0, "datasources": [], "clustered": false, "health": ["Healthy", ""]} + + [INFO] {"method":"Volume.create","params":[{"sharable":false,"size":0,"description":"vdi description","name":"vdi name","sr":"file:///tmp/dummy","dbg":"debug"}],"id":12} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.create[PID] succeeded: {"name": "vdi name", "description": "vdi description", "key": "UUID", "uuid": "UUID", "read_write": true, "sharable": false, "virtual_size": 0, "physical_utilisation": 0, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}} + + [INFO] {"method":"Volume.set","params":[{"v":"redolog","k":"vdi-type","key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":13} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.set[PID] succeeded: null + + [INFO] {"method":"Volume.stat","params":[{"key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":15} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.stat[PID] succeeded: {"name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "key": "UUID", "uuid": "UUID", "read_write": true, "virtual_size": 0, "physical_utilisation": 0, "sharable": false, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}} + + [INFO] {"method":"Volume.stat","params":[{"key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":17} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.stat[PID] succeeded: {"name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "key": "UUID", "uuid": "UUID", "read_write": true, "virtual_size": 0, "physical_utilisation": 0, "sharable": false, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}} + + [INFO] {"method":"Volume.destroy","params":[{"key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":18} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.destroy[PID] succeeded: null + + [INFO] {"method":"SR.stat","params":[{"sr":"file:///tmp/dummy","dbg":"debug"}],"id":20} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.stat[PID] succeeded: {"sr": "file:///tmp/dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "total_space": 0, "free_space": 0, "datasources": [], "clustered": false, "health": ["Healthy", ""]} + + [INFO] {"method":"SR.ls","params":[{"sr":"file:///tmp/dummy","dbg":"debug"}],"id":22} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.ls[PID] succeeded: [{"name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "key": "file1", "uuid": "file1", "read_write": true, "virtual_size": 0, "physical_utilisation": 0, "sharable": false, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}}] + + [INFO] {"method":"SR.probe","params":[{"configuration":{"uri":"file:///tmp/dummy"},"dbg":"debug"}],"id":24} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.probe[PID] succeeded: [{"configuration": {"uri": "file:///tmp/dummy"}, "complete": true, "extra_info": {}}, {"configuration": {"uri": "file:///tmp/dummy", "sr_uuid": "myuuid"}, "sr": {"sr": "file:///tmp/dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "total_space": 0, "free_space": 0, "datasources": [], "clustered": false, "health": ["Healthy", ""]}, "complete": true, "extra_info": {}}] + + [INFO] test thread shutdown cleanly From 19e1704fb28646e1080c8e7cd10eeb8acc821abb Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Wed, 4 Jun 2025 08:34:38 +0100 Subject: [PATCH 254/492] CA-411766: Detach VBDs right after VM Halted Fix race condition when destroying VBD after VM power_state change. A client of XenServer attempted to destroy a VBD immediately after receiving an event triggered by a VM power_state change, resulting in a failure. The root cause is below: 1. The update to VM's power_state and the update to VBDs are not performed atomically, so the client may receive the event from the update to VM's power_state and attempt to operate VBDs before their state is updated. 2. If the VM is running on a supporter, database operations require sending RPCs to the coordinator, introducing additional latency. 3. Between the updates to the VM's power_state and the VBDs, xapi also updates the pending_guidences fields, which requires at least eight database operations and then further delays the VBD update. It's not straightforward to add transactions for these DB operations. The workaround is to move the update to pending_guildences to the end of the relevant database operations (VBDs, VIFs, GPUs, etc), ensuring that VBDs are updated immediately after the VM's power_state change. This is related to XSI-1915 where Citrix deploy tool MCS triggered the issue. Signed-off-by: Bengang Yuan --- ocaml/xapi/xapi_vm_lifecycle.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 9ab13f79b54..fc281c70de0 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -856,8 +856,6 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = if state = `Suspended then remove_pending_guidance ~__context ~self ~value:`restart_device_model ; if state = `Halted then ( - remove_pending_guidance ~__context ~self ~value:`restart_device_model ; - remove_pending_guidance ~__context ~self ~value:`restart_vm ; (* mark all devices as disconnected *) List.iter (fun vbd -> @@ -899,7 +897,9 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = ) (Db.VM.get_VUSBs ~__context ~self) ; (* Blank the requires_reboot flag *) - Db.VM.set_requires_reboot ~__context ~self ~value:false + Db.VM.set_requires_reboot ~__context ~self ~value:false ; + remove_pending_guidance ~__context ~self ~value:`restart_device_model ; + remove_pending_guidance ~__context ~self ~value:`restart_vm ) ; (* Do not clear resident_on for VM and VGPU in a checkpoint operation *) if From 7c27cc2a7f40cf7fb5354b9edc704e4f7206ed7d Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Jun 2025 14:18:50 +0100 Subject: [PATCH 255/492] datamodel_lifecycle: automatic update Signed-off-by: Pau Ruiz Safont --- ocaml/idl/datamodel_lifecycle.ml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 9aaa87e5fec..1939c05269a 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -97,6 +97,14 @@ let prototyped_of_field = function Some "22.26.0" | "SM", "host_pending_features" -> Some "24.37.0" + | "host", "console_idle_timeout" -> + Some "25.20.0-next" + | "host", "ssh_expiry" -> + Some "25.20.0-next" + | "host", "ssh_enabled_timeout" -> + Some "25.20.0-next" + | "host", "ssh_enabled" -> + Some "25.20.0-next" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> @@ -213,6 +221,10 @@ let prototyped_of_message = function Some "22.26.0" | "VTPM", "create" -> Some "22.26.0" + | "host", "set_console_idle_timeout" -> + Some "25.20.0-next" + | "host", "set_ssh_enabled_timeout" -> + Some "25.20.0-next" | "host", "disable_ssh" -> Some "25.13.0" | "host", "enable_ssh" -> @@ -235,6 +247,10 @@ let prototyped_of_message = function Some "23.30.0" | "VM", "set_groups" -> Some "24.19.1" + | "pool", "set_console_idle_timeout" -> + Some "25.20.0-next" + | "pool", "set_ssh_enabled_timeout" -> + Some "25.20.0-next" | "pool", "disable_ssh" -> Some "25.13.0" | "pool", "enable_ssh" -> From b8aa4a7c71b9504a3434375de6cfebdfa1b9c03e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Jun 2025 16:16:38 +0100 Subject: [PATCH 256/492] xcp-rrdd: change the code responsible for filtering out paused domains The consolidator used to be aware of which domains were paused, this was used to avoid reporting memory changes for paused domains, exclusively. Move that responsibility to the domain memory reporter instead, this makes the decision local, simplifying code. This is useful to separate the memory code from the rest of rrdd. Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 20 +-- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 135 +++++++++--------- ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml | 19 ++- 3 files changed, 84 insertions(+), 90 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 5872fb5b6c1..72fe076b4c4 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -155,9 +155,8 @@ let convert_to_owner_map dss = Also resets the value of datasources that are enabled in the RRD, but weren't updated on this refresh cycle. *) -let update_rrds uuid_domids paused_vms plugins_dss = +let update_rrds uuid_domids plugins_dss = let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in - let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in let per_owner_flattened_map, per_plugin_map = convert_to_owner_map plugins_dss in @@ -237,18 +236,11 @@ let update_rrds uuid_domids paused_vms plugins_dss = match vm_rrd with | Some rrdi -> let updated_dss, rrd = merge_new_dss rrdi dss in - (* CA-34383: Memory updates from paused domains serve no useful - purpose. During a migrate such updates can also cause undesirable - discontinuities in the observed value of memory_actual. Hence, we - ignore changes from paused domains: *) - ( if not (StringSet.mem vm_uuid paused_vms) then - let named_updates = - StringMap.map to_named_updates dss - in - Rrd.ds_update_named rrd - ~new_rrd:(domid <> rrdi.domid) timestamp - named_updates - ) ; + let named_updates = + StringMap.map to_named_updates dss + in + Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid) + timestamp named_updates ; Some {rrd; dss= updated_dss; domid} | None -> debug "%s: Creating fresh RRD for VM uuid=%s" diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 75108465907..2528c009845 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -258,71 +258,80 @@ let mem_available () = let dss_mem_vms doms = List.fold_left (fun acc (dom, uuid, domid) -> - let kib = - Xenctrl.pages_to_kib (Int64.of_nativeint dom.Xenctrl.total_memory_pages) - in - let memory = Int64.mul kib 1024L in - let main_mem_ds = - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory" - ~description:"Memory currently allocated to VM" ~units:"B" - ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () - ) - in - let memory_target_opt = - with_lock Rrdd_shared.memory_targets_m (fun _ -> - Hashtbl.find_opt Rrdd_shared.memory_targets domid - ) - in - let mem_target_ds = - Option.map - (fun memory_target -> - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_target" - ~description:"Target of VM balloon driver" ~units:"B" - ~value:(Rrd.VT_Int64 memory_target) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) + let add_vm_metrics () = + let kib = + Xenctrl.pages_to_kib + (Int64.of_nativeint dom.Xenctrl.total_memory_pages) + in + let memory = Int64.mul kib 1024L in + let main_mem_ds = + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory" + ~description:"Memory currently allocated to VM" ~units:"B" + ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true + () ) - memory_target_opt - in - let other_ds = - if domid = 0 then - match mem_available () with - | Ok mem -> + in + let memory_target_opt = + with_lock Rrdd_shared.memory_targets_m (fun _ -> + Hashtbl.find_opt Rrdd_shared.memory_targets domid + ) + in + let mem_target_ds = + Option.map + (fun memory_target -> + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_target" + ~description:"Target of VM balloon driver" ~units:"B" + ~value:(Rrd.VT_Int64 memory_target) ~ty:Rrd.Gauge ~min:0.0 + ~default:true () + ) + ) + memory_target_opt + in + let other_ds = + if domid = 0 then + match mem_available () with + | Ok mem -> + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" + ~description:"Dom0 current free memory" + ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 + ~default:true () + ) + | Error msg -> + let _ = + error "%s: retrieving Dom0 free memory failed: %s" + __FUNCTION__ msg + in + None + else + try + let mem_free = + Watch.IntMap.find domid !current_meminfofree_values + in Some ( Rrd.VM uuid , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Dom0 current free memory" - ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 + ~description:"Memory used as reported by the guest agent" + ~value:(Rrd.VT_Int64 mem_free) ~ty:Rrd.Gauge ~min:0.0 ~default:true () ) - | Error msg -> - let _ = - error "%s: retrieving Dom0 free memory failed: %s" __FUNCTION__ - msg - in - None - else - try - let mem_free = - Watch.IntMap.find domid !current_meminfofree_values - in - Some - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Memory used as reported by the guest agent" - ~value:(Rrd.VT_Int64 mem_free) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - with Not_found -> None + with Not_found -> None + in + List.concat + [ + main_mem_ds :: Option.to_list other_ds + ; Option.to_list mem_target_ds + ; acc + ] in - List.concat - [ - main_mem_ds :: Option.to_list other_ds - ; Option.to_list mem_target_ds - ; acc - ] + (* CA-34383: Memory updates from paused domains serve no useful purpose. + During a migrate such updates can also cause undesirable + discontinuities in the observed value of memory_actual. Hence, we + ignore changes from paused domains: *) + if dom.Xenctrl.paused then acc else add_vm_metrics () ) [] doms @@ -466,14 +475,10 @@ let domain_snapshot xc = let domains = Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain in - let domain_paused (d, uuid, _) = - if d.Xenctrl.paused then Some uuid else None - in - let paused_uuids = List.filter_map domain_paused domains in let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; - (domains, paused_uuids) + domains let dom0_stat_generators = [ @@ -507,7 +512,7 @@ let write_dom0_stats writers tagged_dss = let do_monitor_write xc writers = Rrdd_libs.Stats.time_this "monitor" (fun _ -> - let domains, my_paused_vms = domain_snapshot xc in + let domains = domain_snapshot xc in let tagged_dom0_stats = generate_all_dom0_stats xc domains in write_dom0_stats writers tagged_dom0_stats ; let dom0_stats = @@ -523,7 +528,7 @@ let do_monitor_write xc writers = let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in (* stats are grouped per plugin, which provides its timestamp *) - Rrdd_monitor.update_rrds uuid_domids my_paused_vms stats ; + Rrdd_monitor.update_rrds uuid_domids stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index bb0f726b5eb..725b34351c6 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -60,11 +60,11 @@ let host_rrds rrd_info = Hashtbl.add h "host" rrd_info ; Some h -let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds +let update_rrds_test ~timestamp ~dss ~uuid_domids ~expected_vm_rrds ~expected_sr_rrds ~expected_host_dss = let test () = reset_rrdd_shared_state () ; - Rrdd_monitor.update_rrds uuid_domids paused_vms + Rrdd_monitor.update_rrds uuid_domids (List.to_seq [("update_rrds_test", timestamp, List.to_seq dss)]) ; check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ; check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ; @@ -76,38 +76,37 @@ let update_rrds = let open Rrd in [ ( "Null update" - , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] ~paused_vms:[] + , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single host update" , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a])] ) ; ( "Multiple host updates" , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a); (Host, ds_b)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a; ds_b])] ) ; ( "Single non-resident VM update" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple non-resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single resident VM update" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] ~uuid_domids:[("a", 1)] - ~paused_vms:[] ~expected_vm_rrds:[("a", [ds_a])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) @@ -115,7 +114,6 @@ let update_rrds = , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "b", ds_b)] ~uuid_domids:[("a", 1); ("b", 1)] - ~paused_vms:[] ~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) @@ -123,14 +121,13 @@ let update_rrds = , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "c", ds_a)] ~uuid_domids:[("a", 1); ("b", 1)] - ~paused_vms:[] ~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple SR updates" , update_rrds_test ~timestamp:0. ~dss:[(SR "a", ds_a); (SR "b", ds_a); (SR "b", ds_b)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] + ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] ~expected_host_dss:[] ) From a227018431c8123285bcf085aa56218781c0bd6e Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 9 Jun 2025 03:04:25 +0100 Subject: [PATCH 257/492] Update datamodel_lifecycle (25.21.0) Update all `25.20.0-next` to `25.21.0` in `datamodel_lifecycle.ml`. Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_lifecycle.ml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 1939c05269a..b90cd441f05 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -98,13 +98,13 @@ let prototyped_of_field = function | "SM", "host_pending_features" -> Some "24.37.0" | "host", "console_idle_timeout" -> - Some "25.20.0-next" + Some "25.21.0" | "host", "ssh_expiry" -> - Some "25.20.0-next" + Some "25.21.0" | "host", "ssh_enabled_timeout" -> - Some "25.20.0-next" + Some "25.21.0" | "host", "ssh_enabled" -> - Some "25.20.0-next" + Some "25.21.0" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> @@ -222,9 +222,9 @@ let prototyped_of_message = function | "VTPM", "create" -> Some "22.26.0" | "host", "set_console_idle_timeout" -> - Some "25.20.0-next" + Some "25.21.0" | "host", "set_ssh_enabled_timeout" -> - Some "25.20.0-next" + Some "25.21.0" | "host", "disable_ssh" -> Some "25.13.0" | "host", "enable_ssh" -> @@ -248,9 +248,9 @@ let prototyped_of_message = function | "VM", "set_groups" -> Some "24.19.1" | "pool", "set_console_idle_timeout" -> - Some "25.20.0-next" + Some "25.21.0" | "pool", "set_ssh_enabled_timeout" -> - Some "25.20.0-next" + Some "25.21.0" | "pool", "disable_ssh" -> Some "25.13.0" | "pool", "enable_ssh" -> From da12baba860c37c5fc6fd476d17fb4fc14c6c59e Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Jun 2025 10:50:09 +0100 Subject: [PATCH 258/492] CA-410085: Improving clearing cgroup after vfork The /sys/fs/cgroup/systemd/cgroup.procs file is not always present, particularly in updated Linux systems with newer cgroup and SystemD. So fallback to root /sys/fs/cgroup/cgroup.procs. Also handle and report errors back to Ocaml. Although SystemD discourage handling cgroups without service configuration changes the root cgroup is a bit special as receiving processes from multiple sources, including the kernel. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/helper/vfork_helper.c | 47 +++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/ocaml/forkexecd/helper/vfork_helper.c b/ocaml/forkexecd/helper/vfork_helper.c index 434afba6126..0afd285e094 100644 --- a/ocaml/forkexecd/helper/vfork_helper.c +++ b/ocaml/forkexecd/helper/vfork_helper.c @@ -335,14 +335,49 @@ reset_signal_handlers(void) static void clear_cgroup(void) { - int fd = open("/sys/fs/cgroup/systemd/cgroup.procs", O_WRONLY|O_CLOEXEC); - if (fd >= 0) { - char string_pid[32]; - int ignored __attribute__((unused)); - sprintf(string_pid, "%d\n", (int) getpid()); - ignored = write(fd, string_pid, strlen(string_pid)); + // list of files to try, terminated by NULL + static const char *const cgroup_files[] = { + "/sys/fs/cgroup/systemd/cgroup.procs", + "/sys/fs/cgroup/cgroup.procs", + NULL + }; + + char string_pid[32]; + int last_error = 0; + const char *last_error_operation = NULL; + const char *last_fn = NULL; + + snprintf(string_pid, sizeof(string_pid), "%ld\n", (long int) getpid()); + + for (const char *const *fn = cgroup_files; *fn != NULL; ++fn) { + last_fn = *fn; + int fd = open(*fn, O_WRONLY|O_CLOEXEC); + if (fd < 0) { + last_error = errno; + last_error_operation = "opening"; + continue; + } + + // Here we are writing to a virtual file system, partial write is + // not possible. + ssize_t written = write(fd, string_pid, strlen(string_pid)); + if (written < 0) { + last_error = errno; + last_error_operation = "writing"; + } + // Error ignored, we are using a virtual file system, only potential + // errors would be if we have a race and the file was replaced or a + // memory error in the kernel. close(fd); + if (written >= 0) + return; } + + // If we reach this point something went wrong. + // Report error and exit, unless we are not root user, we should be + // root so probably we are testing. + if (last_error_operation && geteuid() == 0) + error(last_error, "Error %s file %s", last_error_operation, last_fn); } static const char * From bd5fe182f5b9fac9cd5aefde34622ed1ac0d9f9c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 3 Jun 2025 11:36:58 +0100 Subject: [PATCH 259/492] Adapt code to new mirage-crypto (CP-308222) Unfortunately mirage-crypto has accumulated breaking changes: - Cstructs have been replaced with strings - The digestif library has replaced ad-hoc hash implementation A deprecation has happened as well: - RNG initialization has changed Signed-off-by: Pau Ruiz Safont --- ocaml/gencert/dune | 4 ++-- ocaml/gencert/lib.ml | 23 ++++++++--------------- ocaml/gencert/selfcert.ml | 7 ++----- ocaml/gencert/selfcert.mli | 2 +- ocaml/gencert/test_lib.ml | 8 +++----- ocaml/tests/dune | 2 +- ocaml/tests/test_certificates.ml | 2 +- ocaml/xapi-aux/networking_info.ml | 10 ++++++---- ocaml/xapi-aux/networking_info.mli | 2 +- ocaml/xapi/cert_refresh.ml | 2 +- ocaml/xapi/certificates.ml | 8 +++----- ocaml/xapi/certificates.mli | 5 ++--- ocaml/xapi/certificates_sync.ml | 4 +--- ocaml/xapi/dune | 1 + ocaml/xapi/xapi_db_upgrade.ml | 1 - ocaml/xapi/xapi_session.ml | 4 ++-- 16 files changed, 35 insertions(+), 50 deletions(-) diff --git a/ocaml/gencert/dune b/ocaml/gencert/dune index 600811a13b6..608b274963f 100644 --- a/ocaml/gencert/dune +++ b/ocaml/gencert/dune @@ -6,7 +6,7 @@ (libraries angstrom astring - cstruct + digestif forkexec mirage-crypto mirage-crypto-pk @@ -52,7 +52,7 @@ (modules test_lib test_pem) (libraries alcotest - cstruct + digestif fmt gencertlib mirage-crypto diff --git a/ocaml/gencert/lib.ml b/ocaml/gencert/lib.ml index d0794633de9..b25f4db2633 100644 --- a/ocaml/gencert/lib.ml +++ b/ocaml/gencert/lib.ml @@ -34,8 +34,7 @@ let validate_private_key pkcs8_private_key = let key_type = X509.(Key_type.to_string (Private_key.key_type key)) in Error (`Msg (server_certificate_key_algorithm_not_supported, [key_type])) in - let raw_pem = Cstruct.of_string pkcs8_private_key in - X509.Private_key.decode_pem raw_pem + X509.Private_key.decode_pem pkcs8_private_key |> R.reword_error (fun (`Msg err_msg) -> let unknown_algorithm = "Unknown algorithm " in if Astring.String.is_prefix ~affix:"multi-prime RSA" err_msg then @@ -56,9 +55,8 @@ let validate_private_key pkcs8_private_key = ) >>= ensure_rsa_key_length -let pem_of_string x ~error_invalid = - let raw_pem = Cstruct.of_string x in - X509.Certificate.decode_pem raw_pem +let decode_cert pem ~error_invalid = + X509.Certificate.decode_pem pem |> R.reword_error (fun (`Msg err_msg) -> D.info {|Failed to validate certificate because "%s"|} err_msg ; `Msg (error_invalid, []) @@ -76,7 +74,7 @@ let assert_not_expired ~now certificate ~error_not_yet ~error_expired = let _validate_not_expired ~now (blob : string) ~error_invalid ~error_not_yet ~error_expired = - pem_of_string blob ~error_invalid >>= fun cert -> + decode_cert blob ~error_invalid >>= fun cert -> assert_not_expired ~now cert ~error_not_yet ~error_expired let validate_not_expired x ~error_not_yet ~error_expired ~error_invalid = @@ -101,8 +99,7 @@ let validate_pem_chain ~pem_leaf ~pem_chain now private_key = Error (`Msg (server_certificate_signature_not_supported, [])) in let validate_chain pem_chain = - let raw_pem = Cstruct.of_string pem_chain in - X509.Certificate.decode_pem_multiple raw_pem |> function + X509.Certificate.decode_pem_multiple pem_chain |> function | Ok (_ :: _ as certs) -> Ok certs | Ok [] -> @@ -135,17 +132,13 @@ let install_server_certificate ~pem_chain ~pem_leaf ~pkcs8_private_key ~server_cert_path ~cert_gid = let now = Ptime_clock.now () in validate_private_key pkcs8_private_key >>= fun priv -> - let pkcs8_private_key = - X509.Private_key.encode_pem priv |> Cstruct.to_string - in + let pkcs8_private_key = X509.Private_key.encode_pem priv in validate_pem_chain ~pem_leaf ~pem_chain now priv >>= fun (cert, chain) -> - let pem_leaf = X509.Certificate.encode_pem cert |> Cstruct.to_string in + let pem_leaf = X509.Certificate.encode_pem cert in Option.fold ~none:(Ok [pkcs8_private_key; pem_leaf]) ~some:(fun chain -> - let pem_chain = - X509.Certificate.encode_pem_multiple chain |> Cstruct.to_string - in + let pem_chain = X509.Certificate.encode_pem_multiple chain in Ok [pkcs8_private_key; pem_leaf; pem_chain] ) chain diff --git a/ocaml/gencert/selfcert.ml b/ocaml/gencert/selfcert.ml index 3d840d34c2a..68ff2125dea 100644 --- a/ocaml/gencert/selfcert.ml +++ b/ocaml/gencert/selfcert.ml @@ -43,7 +43,7 @@ let valid_from' date = (* Needed to initialize the rng to create random serial codes when signing certificates *) -let () = Mirage_crypto_rng_unix.initialize (module Mirage_crypto_rng.Fortuna) +let () = Mirage_crypto_rng_unix.use_default () (** [write_cert] writes a PKCS12 file to [path]. The typical file extension would be ".pem". It attempts to do that atomically by @@ -117,7 +117,6 @@ let generate_pub_priv_key length = in let* privkey = rsa_string - |> Cstruct.of_string |> X509.Private_key.decode_pem |> R.reword_error (fun _ -> R.msg "decoding private key failed") in @@ -132,9 +131,7 @@ let selfsign' issuer extensions key_length expiration = let* cert = sign expiration privkey pubkey issuer req extensions in let key_pem = X509.Private_key.encode_pem privkey in let cert_pem = X509.Certificate.encode_pem cert in - let pkcs12 = - String.concat "\n\n" [Cstruct.to_string key_pem; Cstruct.to_string cert_pem] - in + let pkcs12 = String.concat "\n\n" [key_pem; cert_pem] in Ok (cert, pkcs12) let selfsign issuer extensions key_length expiration certfile cert_gid = diff --git a/ocaml/gencert/selfcert.mli b/ocaml/gencert/selfcert.mli index 2e073725e02..d8ce652f8a5 100644 --- a/ocaml/gencert/selfcert.mli +++ b/ocaml/gencert/selfcert.mli @@ -23,7 +23,7 @@ val write_certs : string -> int -> string -> (unit, [> Rresult.R.msg]) result val host : name:string -> dns_names:string list - -> ips:Cstruct.t list + -> ips:string list -> ?valid_from:Ptime.t (* default: now *) -> valid_for_days:int -> string diff --git a/ocaml/gencert/test_lib.ml b/ocaml/gencert/test_lib.ml index ce657db330b..e2a71225d90 100644 --- a/ocaml/gencert/test_lib.ml +++ b/ocaml/gencert/test_lib.ml @@ -8,7 +8,7 @@ open Rresult.R.Infix let ( let* ) = Rresult.R.bind (* Initialize RNG for testing certificates *) -let () = Mirage_crypto_rng_unix.initialize (module Mirage_crypto_rng.Fortuna) +let () = Mirage_crypto_rng_unix.use_default () let time_of_rfc3339 date = match Ptime.of_rfc3339 date with @@ -204,7 +204,7 @@ let test_invalid_cert pem_leaf time pkey error reason = "Error must match" (error, reason) msg let load_pkcs8 name = - X509.Private_key.decode_pem (Cstruct.of_string (load_test_data name)) + X509.Private_key.decode_pem (load_test_data name) |> Rresult.R.reword_error (fun (`Msg msg) -> `Msg (Printf.sprintf "Could not load private key with name '%s': %s" name @@ -222,7 +222,6 @@ let sign_leaf_cert host_name digest pkey_leaf = load_pkcs8 "pkey_rsa_4096" >>= fun pkey_sign -> sign_cert host_name ~pkey_sign digest pkey_leaf >>| X509.Certificate.encode_pem - >>| Cstruct.to_string let valid_leaf_cert_tests = List.map @@ -300,8 +299,7 @@ let valid_chain_cert_tests = (pkey_root, Ok []) key_chain in sign_leaf_cert host_name `SHA256 pkey_leaf >>= fun pem_leaf -> - chain >>| X509.Certificate.encode_pem_multiple >>| Cstruct.to_string - >>| fun pem_chain -> + chain >>| X509.Certificate.encode_pem_multiple >>| fun pem_chain -> test_valid_cert_chain ~pem_leaf ~pem_chain time pkey_leaf in [("Validation of a supported certificate chain", `Quick, test_cert)] diff --git a/ocaml/tests/dune b/ocaml/tests/dune index c4b590c6cb8..7a3620fb6c3 100644 --- a/ocaml/tests/dune +++ b/ocaml/tests/dune @@ -15,7 +15,7 @@ angstrom astring cstruct - + digestif fmt http_lib httpsvr diff --git a/ocaml/tests/test_certificates.ml b/ocaml/tests/test_certificates.ml index 96017d3156a..dcd018e0993 100644 --- a/ocaml/tests/test_certificates.ml +++ b/ocaml/tests/test_certificates.ml @@ -13,7 +13,7 @@ let pp_hash_test = (fun (hashable, expected) -> let test_hash () = let digest = - Cstruct.of_string hashable |> Mirage_crypto.Hash.digest `SHA256 + Digestif.SHA256.(digest_string hashable |> to_raw_string) in Alcotest.(check string) "fingerprints must match" expected diff --git a/ocaml/xapi-aux/networking_info.ml b/ocaml/xapi-aux/networking_info.ml index 52de3fb12f6..b11e5b3d2e1 100644 --- a/ocaml/xapi-aux/networking_info.ml +++ b/ocaml/xapi-aux/networking_info.ml @@ -55,11 +55,13 @@ let dns_names () = ) |> Astring.String.uniquify -let ipaddr_to_cstruct = function +let ipaddr_to_octets = function | Ipaddr.V4 addr -> - Cstruct.of_string (Ipaddr.V4.to_octets addr) + Ipaddr.V4.to_octets addr | Ipaddr.V6 addr -> - Cstruct.of_string (Ipaddr.V6.to_octets addr) + Ipaddr.V6.to_octets addr + +let ipaddr_to_cstruct c = ipaddr_to_octets c |> Cstruct.of_string let get_management_ip_addrs ~dbg = let iface = Inventory.lookup Inventory._management_interface in @@ -113,7 +115,7 @@ let get_host_certificate_subjects ~dbg = | Ok (preferred, others) -> let ips = List.(rev_append (rev preferred) others) in Option.fold ~none:(Error IP_missing) - ~some:(fun ip -> Ok (List.map ipaddr_to_cstruct ips, ip)) + ~some:(fun ip -> Ok (List.map ipaddr_to_octets ips, ip)) (List.nth_opt ips 0) in let dns_names = dns_names () in diff --git a/ocaml/xapi-aux/networking_info.mli b/ocaml/xapi-aux/networking_info.mli index ced93d30dd5..70ac0ff85b9 100644 --- a/ocaml/xapi-aux/networking_info.mli +++ b/ocaml/xapi-aux/networking_info.mli @@ -31,6 +31,6 @@ val get_management_ip_addr : dbg:string -> (string * Cstruct.t) option val get_host_certificate_subjects : dbg:string - -> (string * string list * Cstruct.t list, management_ip_error) Result.t + -> (string * string list * string list, management_ip_error) Result.t (** [get_host_certificate_subjects ~dbg] returns the main, dns names and ip addresses that identify the host in secure connections. *) diff --git a/ocaml/xapi/cert_refresh.ml b/ocaml/xapi/cert_refresh.ml index 12ab75dc230..213d0abc224 100644 --- a/ocaml/xapi/cert_refresh.ml +++ b/ocaml/xapi/cert_refresh.ml @@ -79,7 +79,7 @@ let host ~__context ~type' = Server_error (cannot_contact_host, [Ref.string_of (HostSet.choose unreachable)]) ) ; - let content = X509.Certificate.encode_pem cert |> Cstruct.to_string in + let content = X509.Certificate.encode_pem cert in (* distribute public part of new cert in pool *) Cert_distrib.distribute_new_host_cert ~__context ~host ~content ; (* replace certs in file system on host *) diff --git a/ocaml/xapi/certificates.ml b/ocaml/xapi/certificates.ml index 4d9702bb439..f69497ce118 100644 --- a/ocaml/xapi/certificates.ml +++ b/ocaml/xapi/certificates.ml @@ -32,7 +32,7 @@ open D type t_trusted = CA_Certificate | CRL let pem_of_string x = - match Cstruct.of_string x |> X509.Certificate.decode_pem with + match X509.Certificate.decode_pem x with | Error _ -> D.error "pem_of_string: failed to parse certificate string" ; raise @@ -75,7 +75,7 @@ let to_string = function CA_Certificate -> "CA certificate" | CRL -> "CRL" adding a colon between every octet, in uppercase. *) let pp_hash hash = - let hex = Hex.(show @@ of_cstruct hash) in + let hex = Hex.(show @@ of_string hash) in let length = (3 * String.length hex / 2) - 1 in let value_of i = match (i + 1) mod 3 with @@ -441,9 +441,7 @@ let get_internal_server_certificate () = open Rresult let hostnames_of_pem_cert pem = - Cstruct.of_string pem - |> X509.Certificate.decode_pem - >>| X509.Certificate.hostnames + X509.Certificate.decode_pem pem >>| X509.Certificate.hostnames let install_server_certificate ~pem_chain ~pem_leaf ~pkcs8_private_key ~path = let installation = diff --git a/ocaml/xapi/certificates.mli b/ocaml/xapi/certificates.mli index 064c7e47e31..6776220df45 100644 --- a/ocaml/xapi/certificates.mli +++ b/ocaml/xapi/certificates.mli @@ -18,10 +18,9 @@ type t_trusted = CA_Certificate | CRL val pem_of_string : string -> X509.Certificate.t -val pp_hash : Cstruct.t -> string +val pp_hash : string -> string -val pp_fingerprint : - hash_type:Mirage_crypto.Hash.hash -> X509.Certificate.t -> string +val pp_fingerprint : hash_type:Digestif.hash' -> X509.Certificate.t -> string val validate_name : t_trusted -> string -> unit diff --git a/ocaml/xapi/certificates_sync.ml b/ocaml/xapi/certificates_sync.ml index a9691adf298..2ab3492ffa8 100644 --- a/ocaml/xapi/certificates_sync.ml +++ b/ocaml/xapi/certificates_sync.ml @@ -57,10 +57,8 @@ let get_server_cert path = | Error msg -> Error (`Msg (msg, [])) | Ok cert -> - let host_pem = cert.GP.host_cert in let* host_cert = - Cstruct.of_string host_pem - |> X509.Certificate.decode_pem + X509.Certificate.decode_pem cert.GP.host_cert |> R.reword_error (fun (`Msg msg) -> D.info {|Failed to decode certificate because "%s"|} msg ; `Msg (server_certificate_invalid, []) diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 85f4bf030af..88213955afc 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -138,6 +138,7 @@ clock cohttp cohttp_posix + digestif domain-name ezxenstore.core fmt diff --git a/ocaml/xapi/xapi_db_upgrade.ml b/ocaml/xapi/xapi_db_upgrade.ml index f4102782916..e1f0eba63fc 100644 --- a/ocaml/xapi/xapi_db_upgrade.ml +++ b/ocaml/xapi/xapi_db_upgrade.ml @@ -930,7 +930,6 @@ let upgrade_ca_fingerprints = try let* certificate = Xapi_stdext_unix.Unixext.string_of_file filename - |> Cstruct.of_string |> X509.Certificate.decode_pem in let sha1 = diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index 1c28416dfe8..ad1e1a37a0a 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -801,12 +801,12 @@ module Caching = struct and type password = string and type session = external_auth_result - let () = Mirage_crypto_rng_unix.initialize (module Mirage_crypto_rng.Fortuna) + let () = Mirage_crypto_rng_unix.use_default () let create_salt () = (* Creates a Cstruct of length 8. *) let data = Mirage_crypto_rng.generate 8 in - let bytes = Cstruct.to_bytes data in + let bytes = Bytes.of_string data in (* Encode the salt as a hex string. Each byte becomes 2 hexadecimal digits, so the length is 16 (the maximum for crypt_r). *) From 61eb0aa4fdb78dbb446564bbb82c7d6dd1720067 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 5 Jun 2025 14:50:36 +0100 Subject: [PATCH 260/492] CP-308252 add VM.call_host_plugin This API call and corresponding XE implementation calls a host plugin on the host where a VM is running. It thus takes care of finding the right host, compared to Host.call_plugin where this would be left to the user. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_lifecycle.ml | 2 ++ ocaml/idl/datamodel_vm.ml | 14 +++++++++++++ ocaml/xapi-cli-server/cli_frontend.ml | 15 +++++++++++++ ocaml/xapi-cli-server/cli_operations.ml | 12 +++++++++++ ocaml/xapi/message_forwarding.ml | 28 +++++++++++++++++++++++++ ocaml/xapi/xapi_vm.ml | 5 +++++ ocaml/xapi/xapi_vm.mli | 8 +++++++ 7 files changed, 84 insertions(+) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index b90cd441f05..fc9acec7bd1 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -245,6 +245,8 @@ let prototyped_of_message = function Some "24.17.0" | "VM", "restart_device_models" -> Some "23.30.0" + | "VM", "call_host_plugin" -> + Some "25.21.0-next" | "VM", "set_groups" -> Some "24.19.1" | "pool", "set_console_idle_timeout" -> diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 44ca1466d78..e72721b4ce0 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2098,6 +2098,19 @@ let call_plugin = ~result:(String, "Result from the plugin") ~allowed_roles:_R_VM_OP () +let call_host_plugin = + call ~name:"call_host_plugin" + ~doc:"Call an API plugin on the host where this vm resides" ~lifecycle:[] + ~params: + [ + (Ref _vm, "vm", "The vm") + ; (String, "plugin", "The name of the plugin") + ; (String, "fn", "The name of the function within the plugin") + ; (Map (String, String), "args", "Arguments for the function") + ] + ~result:(String, "Result from the plugin") + ~allowed_roles:_R_VM_OP () + let set_has_vendor_device = call ~name:"set_has_vendor_device" ~lifecycle: @@ -2545,6 +2558,7 @@ let t = ; set_groups ; query_services ; call_plugin + ; call_host_plugin ; set_has_vendor_device ; import ; set_actions_after_crash diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 57861e95001..a7874db6f0a 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -1842,6 +1842,21 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "vm-call-host-plugin" + , { + reqd= ["vm-uuid"; "plugin"; "fn"] + ; optn= ["args:"] + ; help= + "Calls function fn within the plugin on the host where the VM is \ + running with arguments (args:key=value). To pass a \"value\" string \ + with special characters in it (e.g. new line), an alternative \ + syntax args:key:file=local_file can be used in place, where the \ + content of local_file will be retrieved and assigned to \"key\" as \ + a whole." + ; implementation= With_fd Cli_operations.vm_call_host_plugin + ; flags= [] + } + ) ; ( "snapshot-export-to-template" , { reqd= ["filename"; "snapshot-uuid"] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 25e4c84ce79..e2bce3c38b8 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3516,6 +3516,18 @@ let vm_call_plugin fd printer rpc session_id params = let result = Client.VM.call_plugin ~rpc ~session_id ~vm ~plugin ~fn ~args in printer (Cli_printer.PList [result]) +let vm_call_host_plugin fd printer rpc session_id params = + let vm_uuid = List.assoc "vm-uuid" params in + let vm = Client.VM.get_by_uuid ~rpc ~session_id ~uuid:vm_uuid in + let plugin = List.assoc "plugin" params in + let fn = List.assoc "fn" params in + let args = read_map_params "args" params in + let args = List.map (args_file fd) args in + let result = + Client.VM.call_host_plugin ~rpc ~session_id ~vm ~plugin ~fn ~args + in + printer (Cli_printer.PList [result]) + let data_source_to_kvs ds = [ ("name_label", ds.API.data_source_name_label) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index c9268e82d3b..d1773e4f0c6 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -2030,6 +2030,34 @@ functor forward_vm_op ~local_fn ~__context ~vm ~remote_fn ) + let call_host_plugin ~__context ~vm ~plugin ~fn ~args = + info + "VM.call_host_plugin: VM = '%s'; plugin = '%s'; fn = '%s'; args = [ \ + 'hidden' ]" + (vm_uuid ~__context vm) plugin fn ; + let local_fn = Local.VM.call_host_plugin ~vm ~plugin ~fn ~args in + let remote_fn = Client.VM.call_host_plugin ~vm ~plugin ~fn ~args in + let power_state = Db.VM.get_power_state ~__context ~self:vm in + (* Insisting on running to make sure xenstore and domain exist + and the VM can react to xenstore events. Permitting Paused in + addition could be an option *) + if power_state <> `Running then + raise + Api_errors.( + Server_error + ( vm_bad_power_state + , [ + Ref.string_of vm + ; Record_util.vm_power_state_to_string `Running + ; Record_util.vm_power_state_to_string power_state + ] + ) + ) ; + with_vm_operation ~__context ~self:vm ~doc:"VM.call_host_plugin" + ~op:`call_plugin ~policy:Helpers.Policy.fail_immediately (fun () -> + forward_vm_op ~local_fn ~__context ~vm ~remote_fn + ) + let set_has_vendor_device ~__context ~self ~value = info "VM.set_has_vendor_device: VM = '%s' to %b" (vm_uuid ~__context self) value ; diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 408c28d0a39..8a1ca5e493a 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1168,6 +1168,11 @@ let call_plugin ~__context ~vm ~plugin ~fn ~args = (Api_errors.xenapi_plugin_failure, ["failed to execute fn"; msg; msg]) ) +let call_host_plugin ~__context ~vm ~plugin ~fn ~args = + (* vm is unused; was used to find the host *) + let _ = vm in + Xapi_plugins.call_plugin (Context.get_session_id __context) plugin fn args + let send_sysrq ~__context ~vm:_ ~key:_ = raise (Api_errors.Server_error (Api_errors.not_implemented, ["send_sysrq"])) diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index 8559293df97..363e68b03d1 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -399,6 +399,14 @@ val call_plugin : -> args:(string * string) list -> string +val call_host_plugin : + __context:Context.t + -> vm:API.ref_VM + -> plugin:string + -> fn:string + -> args:(string * string) list + -> string + val set_has_vendor_device : __context:Context.t -> self:API.ref_VM -> value:bool -> unit From 0b9f900cbc06a1402c95f59cfc6eca97a9f4e3b7 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 9 Jun 2025 10:26:45 +0100 Subject: [PATCH 261/492] xapi: Move cpu_info keys to xapi-consts from xapi_globs to be used across modules Signed-off-by: Andrii Sultanov --- ocaml/tests/test_xapi_xenops.ml | 4 ++-- ocaml/tests/test_xenopsd_metadata.ml | 4 ++-- ocaml/xapi-consts/constants.ml | 14 ++++++++++++++ ocaml/xapi/cpuid_helpers.ml | 23 +++++++++++------------ ocaml/xapi/create_misc.ml | 12 ++++++------ ocaml/xapi/xapi_globs.ml | 14 -------------- ocaml/xapi/xapi_xenops.ml | 8 ++++---- 7 files changed, 39 insertions(+), 40 deletions(-) diff --git a/ocaml/tests/test_xapi_xenops.ml b/ocaml/tests/test_xapi_xenops.ml index 551c7d0d90f..e1f1bf048e2 100644 --- a/ocaml/tests/test_xapi_xenops.ml +++ b/ocaml/tests/test_xapi_xenops.ml @@ -62,8 +62,8 @@ let test_xapi_restart_inner () = in let flags = [ - (Xapi_globs.cpu_info_vendor_key, "AuthenticAMD") - ; (Xapi_globs.cpu_info_features_key, "deadbeef-deadbeef") + (Constants.cpu_info_vendor_key, "AuthenticAMD") + ; (Constants.cpu_info_features_key, "deadbeef-deadbeef") ] in let add_flags vm = diff --git a/ocaml/tests/test_xenopsd_metadata.ml b/ocaml/tests/test_xenopsd_metadata.ml index c052de228fa..14362e73b68 100644 --- a/ocaml/tests/test_xenopsd_metadata.ml +++ b/ocaml/tests/test_xenopsd_metadata.ml @@ -38,8 +38,8 @@ let load_vm_config __context conf = in let flags = [ - (Xapi_globs.cpu_info_vendor_key, "AuthenticAMD") - ; (Xapi_globs.cpu_info_features_key, "deadbeef-deadbeef") + (Constants.cpu_info_vendor_key, "AuthenticAMD") + ; (Constants.cpu_info_features_key, "deadbeef-deadbeef") ] in Db.VM.set_last_boot_CPU_flags ~__context ~self ~value:flags ; diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 185f9669a7c..07481abc54d 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -177,6 +177,20 @@ let hvm_boot_params_order = "order" let hvm_default_boot_order = "cd" +(** Keys for different CPUID policies in {Host,Pool}.cpu_info *) + +let cpu_info_vendor_key = "vendor" + +let cpu_info_features_key = "features" + +let cpu_info_features_pv_key = "features_pv" + +let cpu_info_features_hvm_key = "features_hvm" + +let cpu_info_features_pv_host_key = "features_pv_host" + +let cpu_info_features_hvm_host_key = "features_hvm_host" + (* Key we put in VM.other_config when we upgrade a VM from Zurich/Geneva to Rio *) let vm_upgrade_time = "upgraded at" diff --git a/ocaml/xapi/cpuid_helpers.ml b/ocaml/xapi/cpuid_helpers.ml index 1bf6731efad..d001b7ab73f 100644 --- a/ocaml/xapi/cpuid_helpers.ml +++ b/ocaml/xapi/cpuid_helpers.ml @@ -12,8 +12,6 @@ * GNU Lesser General Public License for more details. *) -open Xapi_globs - module D = Debug.Make (struct let name = "cpuid_helpers" end) open D @@ -24,20 +22,19 @@ let features_t t = (Xenops_interface.CPU_policy.of_string t) Xenops_interface.CPU_policy.to_string -let features = - Map_check.(field Xapi_globs.cpu_info_features_key (features_t `vm)) +let features = Map_check.(field Constants.cpu_info_features_key (features_t `vm)) let features_pv = - Map_check.(field Xapi_globs.cpu_info_features_pv_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_pv_key (features_t `host)) let features_hvm = - Map_check.(field Xapi_globs.cpu_info_features_hvm_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_hvm_key (features_t `host)) let features_pv_host = - Map_check.(field Xapi_globs.cpu_info_features_pv_host_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_pv_host_key (features_t `host)) let features_hvm_host = - Map_check.(field Xapi_globs.cpu_info_features_hvm_host_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_hvm_host_key (features_t `host)) let cpu_count = Map_check.(field "cpu_count" int) @@ -55,7 +52,7 @@ let get_flags_for_vm ~__context domain_type cpu_info = | `pv -> features_pv in - let vendor = List.assoc cpu_info_vendor_key cpu_info in + let vendor = List.assoc Constants.cpu_info_vendor_key cpu_info in let migration = Map_check.getf features_field cpu_info in (vendor, migration) @@ -124,16 +121,18 @@ let assert_vm_is_compatible ~__context ~vm ~host = get_host_compatibility_info ~__context ~domain_type ~host () in let vm_cpu_info = vm_rec.API.vM_last_boot_CPU_flags in - if List.mem_assoc cpu_info_vendor_key vm_cpu_info then ( + if List.mem_assoc Constants.cpu_info_vendor_key vm_cpu_info then ( (* Check the VM was last booted on a CPU with the same vendor as this host's CPU. *) - let vm_cpu_vendor = List.assoc cpu_info_vendor_key vm_cpu_info in + let vm_cpu_vendor = + List.assoc Constants.cpu_info_vendor_key vm_cpu_info + in debug "VM last booted on CPU of vendor %s; host CPUs are of vendor %s" vm_cpu_vendor host_cpu_vendor ; if vm_cpu_vendor <> host_cpu_vendor then fail "VM last booted on a host which had a CPU from a different vendor." ) ; - if List.mem_assoc cpu_info_features_key vm_cpu_info then ( + if List.mem_assoc Constants.cpu_info_features_key vm_cpu_info then ( (* Check the VM was last booted on a CPU whose features are a subset of the features of this host's CPU. *) let vm_cpu_features = Map_check.getf features vm_cpu_info in debug diff --git a/ocaml/xapi/create_misc.ml b/ocaml/xapi/create_misc.ml index cd0a97b4115..cd3412156cd 100644 --- a/ocaml/xapi/create_misc.ml +++ b/ocaml/xapi/create_misc.ml @@ -579,16 +579,16 @@ let create_host_cpu ~__context host_info = ; ("model", cpu_info.model) ; ("stepping", cpu_info.stepping) ; ("flags", cpu_info.flags) - ; ( Xapi_globs.cpu_info_features_pv_key + ; ( Constants.cpu_info_features_pv_key , CPU_policy.to_string cpu_info.features_pv ) - ; ( Xapi_globs.cpu_info_features_hvm_key + ; ( Constants.cpu_info_features_hvm_key , CPU_policy.to_string cpu_info.features_hvm ) - ; ( Xapi_globs.cpu_info_features_hvm_host_key + ; ( Constants.cpu_info_features_hvm_host_key , CPU_policy.to_string cpu_info.features_hvm_host ) - ; ( Xapi_globs.cpu_info_features_pv_host_key + ; ( Constants.cpu_info_features_pv_host_key , CPU_policy.to_string cpu_info.features_pv_host ) ] @@ -698,8 +698,8 @@ let create_pool_cpuinfo ~__context = ("vendor", "") ; ("socket_count", "0") ; ("cpu_count", "0") - ; (Xapi_globs.cpu_info_features_pv_host_key, "") - ; (Xapi_globs.cpu_info_features_hvm_host_key, "") + ; (Constants.cpu_info_features_pv_host_key, "") + ; (Constants.cpu_info_features_hvm_host_key, "") ] in let pool_cpuinfo = List.fold_left merge zero all_host_cpus in diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 22908a496b1..e3957deea71 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -631,20 +631,6 @@ let auth_type_PAM = "PAM" let event_hook_auth_on_xapi_initialize_succeeded = ref false -(** {2 CPUID feature masking} *) - -let cpu_info_vendor_key = "vendor" - -let cpu_info_features_key = "features" - -let cpu_info_features_pv_key = "features_pv" - -let cpu_info_features_hvm_key = "features_hvm" - -let cpu_info_features_pv_host_key = "features_pv_host" - -let cpu_info_features_hvm_host_key = "features_hvm_host" - (** Metrics *) let metrics_root = "/dev/shm/metrics" diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 2f0add74368..e9f5174f90b 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -1213,7 +1213,7 @@ module MD = struct if not (List.mem_assoc Vm_platform.featureset platformdata) then let featureset = match - List.assoc_opt Xapi_globs.cpu_info_features_key + List.assoc_opt Constants.cpu_info_features_key vm.API.vM_last_boot_CPU_flags with | _ when vm.API.vM_power_state <> `Suspended -> @@ -2418,12 +2418,12 @@ let update_vm ~__context id = state.Vm.featureset ; let vendor = Db.Host.get_cpu_info ~__context ~self:localhost - |> List.assoc Xapi_globs.cpu_info_vendor_key + |> List.assoc Constants.cpu_info_vendor_key in let value = [ - (Xapi_globs.cpu_info_vendor_key, vendor) - ; (Xapi_globs.cpu_info_features_key, state.Vm.featureset) + (Constants.cpu_info_vendor_key, vendor) + ; (Constants.cpu_info_features_key, state.Vm.featureset) ] in Db.VM.set_last_boot_CPU_flags ~__context ~self ~value From 6ffa2998cecbfb80799205e1e055e29336c68561 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 6 Jun 2025 14:43:25 +0100 Subject: [PATCH 262/492] xapi-client: Add Tasks.wait_for_all_with_callback Add a new function that will invoke a callback every time one of the tasks is deemed non-pending. This will allow its users to: 1) track the progress of tasks within the submitted batch 2) schedule new tasks to replace the completed ones Modify wait_for_all_inner so that it adds the tasks returned from the callback to its internal set on every new task completion. Signed-off-by: Andrii Sultanov --- ocaml/xapi-client/tasks.ml | 54 +++++++++++++++++++++++++++++-------- ocaml/xapi-client/tasks.mli | 21 +++++++++++++++ 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/ocaml/xapi-client/tasks.ml b/ocaml/xapi-client/tasks.ml index c62f681d602..a9da21890ec 100644 --- a/ocaml/xapi-client/tasks.ml +++ b/ocaml/xapi-client/tasks.ml @@ -23,7 +23,7 @@ module TaskSet = Set.Make (struct end) (* Return once none of the tasks have a `pending status. *) -let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = +let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks ~callback = let classes = List.map (fun task -> Printf.sprintf "task/%s" (Ref.string_of task)) tasks in @@ -36,7 +36,12 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = in let timer = Mtime_clock.counter () in let timeout = 5.0 in - let rec wait ~token ~task_set = + let get_new_classes task_set = + TaskSet.fold + (fun task l -> Printf.sprintf "task/%s" (Ref.string_of task) :: l) + task_set [] + in + let rec wait ~token ~task_set ~completed_task_count ~classes = if TaskSet.is_empty task_set then true else @@ -58,24 +63,39 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = List.map Event_helper.record_of_event event_from.events in (* If any records indicate that a task is no longer pending, remove that task from the set. *) - let pending_task_set = + let pending_task_set, completed_task_count, classes = List.fold_left - (fun task_set' record -> + (fun (task_set', completed_task_count, _) record -> match record with | Event_helper.Task (t, Some t_rec) -> if TaskSet.mem t task_set' && t_rec.API.task_status <> `pending then - TaskSet.remove t task_set' + let new_task_set = TaskSet.remove t task_set' in + let completed_task_count = completed_task_count + 1 in + + (* Call the callback function, wait for new tasks if any *) + let tasks_to_add = callback completed_task_count t in + let new_task_set = + List.fold_left + (fun task_set task -> TaskSet.add task task_set) + new_task_set tasks_to_add + in + ( new_task_set + , completed_task_count + , get_new_classes new_task_set + ) else - task_set' + (task_set', completed_task_count, classes) | _ -> - task_set' + (task_set', completed_task_count, classes) ) - task_set records + (task_set, completed_task_count, classes) + records in wait ~token:event_from.Event_types.token ~task_set:pending_task_set + ~completed_task_count ~classes in let token = "" in let task_set = @@ -83,17 +103,27 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = (fun task_set' task -> TaskSet.add task task_set') TaskSet.empty tasks in - wait ~token ~task_set + wait ~token ~task_set ~completed_task_count:0 ~classes let wait_for_all ~rpc ~session_id ~tasks = - wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks |> ignore + wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks + ~callback:(fun _ _ -> [] + ) + |> ignore + +let wait_for_all_with_callback ~rpc ~session_id ~tasks ~callback = + wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks ~callback + |> ignore let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = let wait_or_cancel () = D.info "Waiting for %d tasks, timeout: %.3fs" (List.length tasks) timeout ; if not - (wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some timeout) ~tasks) + (wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some timeout) ~tasks + ~callback:(fun _ _ -> [] + ) + ) then ( D.info "Canceling tasks" ; List.iter @@ -104,6 +134,8 @@ let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = tasks ; (* cancel is not immediate, give it a reasonable chance to take effect *) wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some 60.) ~tasks + ~callback:(fun _ _ -> [] + ) |> ignore ; false ) else diff --git a/ocaml/xapi-client/tasks.mli b/ocaml/xapi-client/tasks.mli index 8989b01716f..ab360414bc8 100644 --- a/ocaml/xapi-client/tasks.mli +++ b/ocaml/xapi-client/tasks.mli @@ -20,6 +20,27 @@ val wait_for_all : (** [wait_for_all ~rpc ~session_id ~tasks] returns when all of [tasks] are in some non-pending state. *) +val wait_for_all_with_callback : + rpc:(Rpc.call -> Rpc.response) + -> session_id:API.ref_session + -> tasks:API.ref_task list + -> callback:(int -> API.ref_task -> API.ref_task list) + -> unit +(** [wait_for_all_with_callback ~rpc ~session_id ~tasks ~callback] returns when + all of [tasks] are in some non-pending state. When one of the [tasks] is + completed, [callback overall_completed_task_count] is invoked, which returns + a list of tasks that need to be added to [tasks] and waited on as well. + + This allows, for example, to implement a system where tasks are processed + in batches of *constant* size X, with new tasks being started as soon as at + least one slot in the batch is freed, instead of waiting for the whole batch + to finish (and potentially being slowed down by a single worst performer). + + The callback could instead just perform some side-effect (set the progress + of the overall task representing progress of individual units, for example) + and return an empty list. + *) + val with_tasks_destroy : rpc:(Rpc.call -> Rpc.response) -> session_id:API.ref_session From 64f6f631b5b3e180c7ac25eef6d244c537967b85 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 6 Jun 2025 14:47:54 +0100 Subject: [PATCH 263/492] xapi_host: Parallelize host evacuation even more With bab83d9d787e206a7445df6a12c4121d4c823547, host evacuation was parallelized by grouping VMs into batches, and starting a new batch once the previous one has finished. This means that a single slow VM can potentially slow down the whole evacuation. Instead use Tasks.wait_for_all_with_callback to schedule a new migration as soon as any of the previous ones have finished, thus maintaining a constant flow of n migrations. Signed-off-by: Andrii Sultanov --- ocaml/xapi-client/tasks.mli | 2 + ocaml/xapi/xapi_host.ml | 73 ++++++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/ocaml/xapi-client/tasks.mli b/ocaml/xapi-client/tasks.mli index ab360414bc8..a396c569aef 100644 --- a/ocaml/xapi-client/tasks.mli +++ b/ocaml/xapi-client/tasks.mli @@ -12,6 +12,8 @@ * GNU Lesser General Public License for more details. *) +module TaskSet : Set.S with type elt = API.ref_task + val wait_for_all : rpc:(Rpc.call -> Rpc.response) -> session_id:API.ref_session diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 6fd5c7f0721..405733baa78 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -649,8 +649,9 @@ let evacuate ~__context ~host ~network ~evacuate_batch_size = raise (Api_errors.Server_error (code, params)) in - (* execute [n] asynchronous API calls [api_fn] for [xs] and wait for them to - finish before executing the next batch. *) + (* execute [plans_length] asynchronous API calls [api_fn] for [xs] in batches + of [n] at a time, scheduling a new call as soon as one of the tasks from + the previous batch is completed *) let batch ~__context n api_fn xs = let finally = Xapi_stdext_pervasives.Pervasiveext.finally in let destroy = Client.Client.Task.destroy in @@ -675,27 +676,55 @@ let evacuate ~__context ~host ~network ~evacuate_batch_size = fail task "unexpected status of migration task" in - let rec loop xs = - match take n xs with - | [], _ -> - () - | head, tail -> - Helpers.call_api_functions ~__context @@ fun rpc session_id -> - let tasks = List.map (api_fn ~rpc ~session_id) head in - finally - (fun () -> - Tasks.wait_for_all ~rpc ~session_id ~tasks ; - List.iter assert_success tasks ; - let tail_length = List.length tail |> float in - let progress = 1.0 -. (tail_length /. plans_length) in - TaskHelper.set_progress ~__context progress + Helpers.call_api_functions ~__context @@ fun rpc session_id -> + ( match take n xs with + | [], _ -> + () + | head, tasks_left -> + let tasks_left = ref tasks_left in + let initial_task_batch = List.map (api_fn ~rpc ~session_id) head in + let tasks_pending = + ref + (List.fold_left + (fun task_set' task -> Tasks.TaskSet.add task task_set') + Tasks.TaskSet.empty initial_task_batch ) - (fun () -> - List.iter (fun self -> destroy ~rpc ~session_id ~self) tasks - ) ; - loop tail - in - loop xs ; + in + + let single_task_progress = 1.0 /. plans_length in + let on_each_task_completion completed_task_count completed_task = + (* Clean up the completed task *) + assert_success completed_task ; + destroy ~rpc ~session_id ~self:completed_task ; + tasks_pending := Tasks.TaskSet.remove completed_task !tasks_pending ; + + (* Update progress *) + let progress = + Int.to_float completed_task_count *. single_task_progress + in + TaskHelper.set_progress ~__context progress ; + + (* Schedule a new task, if there are any left *) + match !tasks_left with + | [] -> + [] + | task_to_schedule :: left -> + tasks_left := left ; + let new_task = api_fn ~rpc ~session_id task_to_schedule in + tasks_pending := Tasks.TaskSet.add new_task !tasks_pending ; + [new_task] + in + finally + (fun () -> + Tasks.wait_for_all_with_callback ~rpc ~session_id + ~tasks:initial_task_batch ~callback:on_each_task_completion + ) + (fun () -> + Tasks.TaskSet.iter + (fun self -> destroy ~rpc ~session_id ~self) + !tasks_pending + ) + ) ; TaskHelper.set_progress ~__context 1.0 in From c23f1a3d702dad90d15062cccac7f5b8f0aac6a3 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 9 Jun 2025 10:27:30 +0100 Subject: [PATCH 264/492] xapi-cli-server: Fix host-get-cpu-features and add pool-get-cpu-features Host.cpu_info list no longer contains a value associated with a "features" key, but the CLI implementation was hardcoded to expect it. Instead use the cpu_info_features keys from xapi-consts. Add the pool version of the command. Additionally document their output format. Signed-off-by: Andrii Sultanov --- ocaml/xapi-cli-server/cli_frontend.ml | 18 +++++++++++-- ocaml/xapi-cli-server/cli_operations.ml | 35 +++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 57861e95001..34d107ef87a 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -535,6 +535,18 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "pool-get-cpu-features" + , { + reqd= [] + ; optn= [] + ; help= + {|Prints a hexadecimal representation of the pool's physical-CPU + features for PV and HVM VMs. These are combinations of all the + hosts' policies and are used when starting new VMs in a pool.|} + ; implementation= No_fd Cli_operations.pool_get_cpu_features + ; flags= [] + } + ) ; ( "host-is-in-emergency-mode" , { reqd= [] @@ -1018,8 +1030,10 @@ let rec cmdtable_data : (string * cmd_spec) list = reqd= [] ; optn= ["uuid"] ; help= - "Prints a hexadecimal representation of the host's physical-CPU \ - features." + {|Prints a hexadecimal representation of the host's physical-CPU + features for PV and HVM VMs. features_{hvm,pv} are "maximum" + featuresets the host will accept during migrations, and + features_{hvm,pv}_host will be used to start new VMs.|} ; implementation= No_fd Cli_operations.host_get_cpu_features ; flags= [] } diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 25e4c84ce79..c2e67cf3764 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -6799,6 +6799,28 @@ let pool_get_guest_secureboot_readiness printer rpc session_id params = (Record_util.pool_guest_secureboot_readiness_to_string result) ) +let cpu_info_features_of feature_keys cpu_info = + let ( let* ) = Option.bind in + List.filter_map + (fun key -> + let* features = List.assoc_opt key cpu_info in + Some (key, features) + ) + feature_keys + +let pool_get_cpu_features printer rpc session_id params = + let pool = get_pool_with_default rpc session_id params "uuid" in + let cpu_info = Client.Pool.get_cpu_info ~rpc ~session_id ~self:pool in + + let feature_keys = + [ + Constants.cpu_info_features_pv_host_key + ; Constants.cpu_info_features_hvm_host_key + ] + in + let features = cpu_info_features_of feature_keys cpu_info in + printer (Cli_printer.PTable [features]) + let pool_sync_bundle fd _printer rpc session_id params = let filename_opt = List.assoc_opt "filename" params in match filename_opt with @@ -6968,8 +6990,17 @@ let host_get_cpu_features printer rpc session_id params = get_host_from_session rpc session_id in let cpu_info = Client.Host.get_cpu_info ~rpc ~session_id ~self:host in - let features = List.assoc "features" cpu_info in - printer (Cli_printer.PMsg features) + + let feature_keys = + [ + Constants.cpu_info_features_pv_key + ; Constants.cpu_info_features_hvm_key + ; Constants.cpu_info_features_pv_host_key + ; Constants.cpu_info_features_hvm_host_key + ] + in + let features = cpu_info_features_of feature_keys cpu_info in + printer (Cli_printer.PTable [features]) let host_enable_display printer rpc session_id params = let host = From 0c961bb9d34666950ca2447b28b9b4b868fe74a9 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 9 Jun 2025 16:26:31 +0100 Subject: [PATCH 265/492] github: keep scheduled yangtze's runs working at least for a while longer... Mirrors the changes in the 1.249 LCM branch: #6473 Signed-off-by: Pau Ruiz Safont --- .github/workflows/1.249-lcm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/1.249-lcm.yml b/.github/workflows/1.249-lcm.yml index 8057b255a92..8ba69e28ec2 100644 --- a/.github/workflows/1.249-lcm.yml +++ b/.github/workflows/1.249-lcm.yml @@ -10,7 +10,7 @@ on: jobs: python-test: name: Python tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 permissions: contents: read strategy: @@ -28,7 +28,7 @@ jobs: ocaml-test: name: Ocaml tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout code From fd49f35c54b8bb12bc84868416ff3011a5ac28b7 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Jun 2025 17:38:13 +0100 Subject: [PATCH 266/492] rrdd: Avoid missing aggregation of metrics from newly destroyed domains Currently rrdd needs to know when a metric comes from a newly created domain, (after a local migration, for example). This is because when a new domain is created the counters start from zero again. This needs special logic for aggregating metrics since xcp-rrdd needs to provide continuity of metrics of a VM with a UUID, even if the domid changes. Previously rrdd fetched the data about domains before metrics from plugins were collected, and reused the data for self-reported metrics. While this meant that for self-reported metrics it was impossible to miss collected information, for plugin metrics it meant that for created and destroyed domains, the between between domain id and VM UUID was not available. With the current change the domain ids and VM UUIDs are collected every iteration of the monitor loop, and kept for one more iteration, so domains destroyed in the last iteration are remembered and not missed. With this done it's now safe to move the host and memory metrics collection into its own plugin. Also use sequences more thoroughly in the code for transformations Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 1 - ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 258 +++++++++--------- ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml | 19 +- 3 files changed, 140 insertions(+), 138 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 72fe076b4c4..172735708b4 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -156,7 +156,6 @@ let convert_to_owner_map dss = weren't updated on this refresh cycle. *) let update_rrds uuid_domids plugins_dss = - let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in let per_owner_flattened_map, per_plugin_map = convert_to_owner_map plugins_dss in diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 2528c009845..7f110d7e576 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -255,85 +255,124 @@ let mem_available () = let* size, kb = scan "/proc/meminfo" in match kb with "kB" -> ok size | _ -> res_error "unexpected unit: %s" kb -let dss_mem_vms doms = - List.fold_left - (fun acc (dom, uuid, domid) -> - let add_vm_metrics () = - let kib = - Xenctrl.pages_to_kib - (Int64.of_nativeint dom.Xenctrl.total_memory_pages) - in - let memory = Int64.mul kib 1024L in - let main_mem_ds = - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory" - ~description:"Memory currently allocated to VM" ~units:"B" - ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true - () - ) - in - let memory_target_opt = - with_lock Rrdd_shared.memory_targets_m (fun _ -> - Hashtbl.find_opt Rrdd_shared.memory_targets domid - ) - in - let mem_target_ds = - Option.map - (fun memory_target -> - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_target" - ~description:"Target of VM balloon driver" ~units:"B" - ~value:(Rrd.VT_Int64 memory_target) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) +let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] + +module IntSet = Set.Make (Int) + +let domain_snapshot xc = + let metadata_of_domain dom = + let ( let* ) = Option.bind in + let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in + let uuid = Uuidx.to_string uuid_raw in + let domid = dom.Xenctrl.domid in + let start = String.sub uuid 0 18 in + (* Actively hide migrating VM uuids, these are temporary and xenops writes + the original and the final uuid to xenstore *) + let uuid_from_key key = + let path = Printf.sprintf "/vm/%s/%s" uuid key in + try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path)) + with Xs_protocol.Enoent _hint -> + info "Couldn't read path %s; falling back to actual uuid" path ; + uuid + in + let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in + if List.mem start uuid_blacklist then + None + else + let key = + if Astring.String.is_suffix ~affix:"000000000000" uuid then + Some "origin-uuid" + else if Astring.String.is_suffix ~affix:"000000000001" uuid then + Some "final-uuid" + else + None + in + Some (dom, stable_uuid key, domid) + in + let domains = + Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain + in + let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in + let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in + Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; + domains |> List.to_seq + +let dss_mem_vms xc = + let mem_metrics_of (dom, uuid, domid) = + let vm_metrics () = + let kib = + Xenctrl.pages_to_kib (Int64.of_nativeint dom.Xenctrl.total_memory_pages) + in + let memory = Int64.mul kib 1024L in + let main_mem_ds = + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory" + ~description:"Memory currently allocated to VM" ~units:"B" + ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + in + let memory_target_opt = + with_lock Rrdd_shared.memory_targets_m (fun _ -> + Hashtbl.find_opt Rrdd_shared.memory_targets domid + ) + in + let mem_target_ds = + Option.map + (fun memory_target -> + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_target" + ~description:"Target of VM balloon driver" ~units:"B" + ~value:(Rrd.VT_Int64 memory_target) ~ty:Rrd.Gauge ~min:0.0 + ~default:true () ) - memory_target_opt - in - let other_ds = - if domid = 0 then - match mem_available () with - | Ok mem -> - Some - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Dom0 current free memory" - ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - | Error msg -> - let _ = - error "%s: retrieving Dom0 free memory failed: %s" - __FUNCTION__ msg - in - None - else - try - let mem_free = - Watch.IntMap.find domid !current_meminfofree_values - in + ) + memory_target_opt + in + let other_ds = + if domid = 0 then + match mem_available () with + | Ok mem -> Some ( Rrd.VM uuid , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Memory used as reported by the guest agent" - ~value:(Rrd.VT_Int64 mem_free) ~ty:Rrd.Gauge ~min:0.0 + ~description:"Dom0 current free memory" + ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 ~default:true () ) - with Not_found -> None - in + | Error msg -> + let _ = + error "%s: retrieving Dom0 free memory failed: %s" __FUNCTION__ + msg + in + None + else + try + let mem_free = + Watch.IntMap.find domid !current_meminfofree_values + in + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" + ~description:"Memory used as reported by the guest agent" + ~value:(Rrd.VT_Int64 mem_free) ~ty:Rrd.Gauge ~min:0.0 + ~default:true () + ) + with Not_found -> None + in + let metrics = List.concat - [ - main_mem_ds :: Option.to_list other_ds - ; Option.to_list mem_target_ds - ; acc - ] + [main_mem_ds :: Option.to_list other_ds; Option.to_list mem_target_ds] in - (* CA-34383: Memory updates from paused domains serve no useful purpose. - During a migrate such updates can also cause undesirable - discontinuities in the observed value of memory_actual. Hence, we - ignore changes from paused domains: *) - if dom.Xenctrl.paused then acc else add_vm_metrics () - ) - [] doms + Some (List.to_seq metrics) + in + (* CA-34383: Memory updates from paused domains serve no useful purpose. + During a migrate such updates can also cause undesirable + discontinuities in the observed value of memory_actual. Hence, we + ignore changes from paused domains: *) + if dom.Xenctrl.paused then None else vm_metrics () + in + let domains = domain_snapshot xc in + Seq.filter_map mem_metrics_of domains |> Seq.concat |> List.of_seq (**** Local cache SR stuff *) @@ -438,62 +477,18 @@ let handle_exn log f default = (Printexc.to_string e) ; default -let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] - -module IntSet = Set.Make (Int) - -let domain_snapshot xc = - let metadata_of_domain dom = - let ( let* ) = Option.bind in - let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in - let uuid = Uuidx.to_string uuid_raw in - let domid = dom.Xenctrl.domid in - let start = String.sub uuid 0 18 in - (* Actively hide migrating VM uuids, these are temporary and xenops writes - the original and the final uuid to xenstore *) - let uuid_from_key key = - let path = Printf.sprintf "/vm/%s/%s" uuid key in - try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path)) - with Xs_protocol.Enoent _hint -> - info "Couldn't read path %s; falling back to actual uuid" path ; - uuid - in - let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in - if List.mem start uuid_blacklist then - None - else - let key = - if Astring.String.is_suffix ~affix:"000000000000" uuid then - Some "origin-uuid" - else if Astring.String.is_suffix ~affix:"000000000001" uuid then - Some "final-uuid" - else - None - in - Some (dom, stable_uuid key, domid) - in - let domains = - Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain - in - let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in - let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in - Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; - domains - let dom0_stat_generators = [ - ("ha", fun _ _ _ -> Rrdd_ha_stats.all ()) - ; ("mem_host", fun xc _ _ -> dss_mem_host xc) - ; ("mem_vms", fun _ _ domains -> dss_mem_vms domains) - ; ("cache", fun _ timestamp _ -> dss_cache timestamp) + ("ha", fun _ _ -> Rrdd_ha_stats.all ()) + ; ("mem_host", fun xc _ -> dss_mem_host xc) + ; ("mem_vms", fun xc _ -> dss_mem_vms xc) + ; ("cache", fun _ timestamp -> dss_cache timestamp) ] -let generate_all_dom0_stats xc domains = +let generate_all_dom0_stats xc = let handle_generator (name, generator) = let timestamp = Unix.gettimeofday () in - ( name - , (timestamp, handle_exn name (fun _ -> generator xc timestamp domains) []) - ) + (name, (timestamp, handle_exn name (fun _ -> generator xc timestamp) [])) in List.map handle_generator dom0_stat_generators @@ -510,10 +505,9 @@ let write_dom0_stats writers tagged_dss = in List.iter write_dss writers -let do_monitor_write xc writers = +let do_monitor_write domains_before xc writers = Rrdd_libs.Stats.time_this "monitor" (fun _ -> - let domains = domain_snapshot xc in - let tagged_dom0_stats = generate_all_dom0_stats xc domains in + let tagged_dom0_stats = generate_all_dom0_stats xc in write_dom0_stats writers tagged_dom0_stats ; let dom0_stats = tagged_dom0_stats @@ -523,26 +517,34 @@ let do_monitor_write xc writers = ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in + let domains_after = domain_snapshot xc in let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; - let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in - + (* merge the domain ids from the previous iteration and the current one + to avoid missing updates *) + let uuid_domids = + Seq.append domains_before domains_after + |> Seq.map (fun (_, u, i) -> (u, i)) + |> Rrd.StringMap.of_seq + in (* stats are grouped per plugin, which provides its timestamp *) Rrdd_monitor.update_rrds uuid_domids stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; Rrdd_libs.Constants.datasource_vm_dump_file - |> Rrdd_server.dump_vm_dss_to_file + |> Rrdd_server.dump_vm_dss_to_file ; + domains_after ) let monitor_write_loop writers = Debug.with_thread_named "monitor_write" (fun () -> Xenctrl.with_intf (fun xc -> + let domains = ref Seq.empty in while true do try - do_monitor_write xc writers ; + domains := do_monitor_write !domains xc writers ; with_lock Rrdd_shared.next_iteration_start_m (fun _ -> Rrdd_shared.next_iteration_start := Clock.Timer.extend_by !Rrdd_shared.timeslice diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index 725b34351c6..5ff9fac1bf2 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -74,60 +74,61 @@ let update_rrds_test ~timestamp ~dss ~uuid_domids ~expected_vm_rrds let update_rrds = let open Rrd in + let map_of_list ls = StringMap.of_seq (List.to_seq ls) in [ ( "Null update" - , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] + , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single host update" , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a)] - ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a])] ) ; ( "Multiple host updates" , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a); (Host, ds_b)] - ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a; ds_b])] ) ; ( "Single non-resident VM update" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] - ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple non-resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a)] - ~uuid_domids:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single resident VM update" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] - ~uuid_domids:[("a", 1)] + ~uuid_domids:(map_of_list [("a", 1)]) ~expected_vm_rrds:[("a", [ds_a])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "b", ds_b)] - ~uuid_domids:[("a", 1); ("b", 1)] + ~uuid_domids:(map_of_list [("a", 1); ("b", 1)]) ~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident and non-resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "c", ds_a)] - ~uuid_domids:[("a", 1); ("b", 1)] + ~uuid_domids:(map_of_list [("a", 1); ("b", 1)]) ~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple SR updates" , update_rrds_test ~timestamp:0. ~dss:[(SR "a", ds_a); (SR "b", ds_a); (SR "b", ds_b)] - ~uuid_domids:[] ~expected_vm_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] ~expected_host_dss:[] ) From 87ca4499df5204578fa1549f9d9bf6df8ff7a11f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 3 Jun 2025 11:45:27 +0100 Subject: [PATCH 267/492] xapi-aux: remove cstruct usage from networking_info The only use of it was a parameter that was not used anywhere Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-aux/networking_info.ml | 5 +---- ocaml/xapi-aux/networking_info.mli | 5 ++--- ocaml/xapi/helpers.ml | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi-aux/networking_info.ml b/ocaml/xapi-aux/networking_info.ml index b11e5b3d2e1..928ad45322b 100644 --- a/ocaml/xapi-aux/networking_info.ml +++ b/ocaml/xapi-aux/networking_info.ml @@ -61,8 +61,6 @@ let ipaddr_to_octets = function | Ipaddr.V6 addr -> Ipaddr.V6.to_octets addr -let ipaddr_to_cstruct c = ipaddr_to_octets c |> Cstruct.of_string - let get_management_ip_addrs ~dbg = let iface = Inventory.lookup Inventory._management_interface in try @@ -101,8 +99,7 @@ let get_management_ip_addrs ~dbg = let get_management_ip_addr ~dbg = match get_management_ip_addrs ~dbg with | Ok (preferred, _) -> - List.nth_opt preferred 0 - |> Option.map (fun addr -> (Ipaddr.to_string addr, ipaddr_to_cstruct addr)) + List.nth_opt preferred 0 |> Option.map Ipaddr.to_string | Error _ -> None diff --git a/ocaml/xapi-aux/networking_info.mli b/ocaml/xapi-aux/networking_info.mli index 70ac0ff85b9..4c8418443ab 100644 --- a/ocaml/xapi-aux/networking_info.mli +++ b/ocaml/xapi-aux/networking_info.mli @@ -24,10 +24,9 @@ val management_ip_error_to_string : management_ip_error -> string (** [management_ip_error err] returns a string representation of [err], useful only for logging. *) -val get_management_ip_addr : dbg:string -> (string * Cstruct.t) option +val get_management_ip_addr : dbg:string -> string option (** [get_management_ip_addr ~dbg] returns the preferred IP of the management - network, or None. The address is returned in two formats: a human-readable - string and its bytes representation. *) + network, or None. The address is returned in a human-readable string *) val get_host_certificate_subjects : dbg:string diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 8e6578cacb2..75199a62fa9 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -157,7 +157,7 @@ let get_management_iface_is_connected ~__context = let get_management_ip_addr ~__context = let dbg = Context.string_of_task __context in - Option.map fst (Networking_info.get_management_ip_addr ~dbg) + Networking_info.get_management_ip_addr ~dbg let get_localhost_uuid () = Xapi_inventory.lookup Xapi_inventory._installation_uuid From 40e9fa1c4f98fb5d0f55f37992110a2221ce13a6 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 6 Jun 2025 15:19:34 +0100 Subject: [PATCH 268/492] xapi-cli-server: Expose evacuate-batch-size parameter in the CLI Signed-off-by: Andrii Sultanov --- ocaml/xapi-cli-server/cli_frontend.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 57861e95001..7ee5310c5f2 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -2866,7 +2866,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "host-evacuate" , { reqd= [] - ; optn= ["network-uuid"] + ; optn= ["network-uuid"; "batch-size"] ; help= "Migrate all VMs off a host." ; implementation= No_fd Cli_operations.host_evacuate ; flags= [Host_selectors] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 25e4c84ce79..5ddb54ec301 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -5369,13 +5369,21 @@ let host_evacuate _printer rpc session_id params = Client.Network.get_by_uuid ~rpc ~session_id ~uuid ) in + let evacuate_batch_size = + match List.assoc_opt "batch-size" params with + | Some x -> + Scanf.sscanf x "%Lu%!" Fun.id + | None -> + 0L + in ignore (do_host_op rpc session_id ~multiple:false (fun _ host -> Client.Host.evacuate ~rpc ~session_id ~host:(host.getref ()) ~network - ~evacuate_batch_size:0L + ~evacuate_batch_size ) - params ["network-uuid"] + params + ["network-uuid"; "batch-size"] ) let host_get_vms_which_prevent_evacuation printer rpc session_id params = From 4cb387d89d65ae0f685e876a60fd5249087b54c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 10 Jun 2025 15:26:38 +0100 Subject: [PATCH 269/492] [maintenance]: add forkexecd C objects to .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also fix the Makefile, so that 'make clean' also deletes the `.o.d` files. This avoids accidentally adding these files to git (although normally dune would invoke make in _build, only if you manually invoke it would it create these extra files): ``` A ocaml/forkexecd/helper/close_from.o A ocaml/forkexecd/helper/close_from.o.d A ocaml/forkexecd/helper/syslog.o A ocaml/forkexecd/helper/syslog.o.d A ocaml/forkexecd/helper/vfork_helper A ocaml/forkexecd/helper/vfork_helper.o A ocaml/forkexecd/helper/vfork_helper.o.d ``` Signed-off-by: Edwin Török --- ocaml/forkexecd/.gitignore | 3 +++ ocaml/forkexecd/helper/Makefile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/forkexecd/.gitignore b/ocaml/forkexecd/.gitignore index d9b5b8ca4be..2c89ac5c34f 100644 --- a/ocaml/forkexecd/.gitignore +++ b/ocaml/forkexecd/.gitignore @@ -1,4 +1,7 @@ _build/ +helper/*.o +helper/*.o.d +helper/vfork_helper .merlin *.install diff --git a/ocaml/forkexecd/helper/Makefile b/ocaml/forkexecd/helper/Makefile index 2bfc3b07e35..6c14a3aeb6c 100644 --- a/ocaml/forkexecd/helper/Makefile +++ b/ocaml/forkexecd/helper/Makefile @@ -5,7 +5,7 @@ LDFLAGS ?= all:: vfork_helper clean:: - rm -f vfork_helper *.o + rm -f vfork_helper *.o *.o.d %.o: %.c $(CC) $(CFLAGS) -MMD -MP -MF $@.d -c -o $@ $< From 11ab8a681bf9aa0a959e297f3c4b07c2849a72b6 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 10 Jun 2025 16:47:53 +0100 Subject: [PATCH 270/492] unixext: Add a raise_with_preserved_backtrace function Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml | 5 +++++ ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml index 32a9f5119ab..893a7e4d9bc 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml @@ -17,6 +17,11 @@ exception Unix_error of int let _exit = Unix._exit +let raise_with_preserved_backtrace exn f = + let bt = Printexc.get_raw_backtrace () in + f () ; + Printexc.raise_with_backtrace exn bt + (** remove a file, but doesn't raise an exception if the file is already removed *) let unlink_safe file = try Unix.unlink file with (* Unix.Unix_error (Unix.ENOENT, _ , _)*) _ -> () diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli index 047935b475c..3db652bd2a3 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli @@ -15,6 +15,10 @@ val _exit : int -> unit +val raise_with_preserved_backtrace : exn -> (unit -> unit) -> 'b +(** A wrapper that preserves the backtrace (otherwise erased by calling + formatting functions, for example) *) + val unlink_safe : string -> unit val mkdir_safe : string -> Unix.file_perm -> unit From 929eefd11f6354056e4e7cd2c37d46675444e62d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 10 Jun 2025 11:36:36 +0100 Subject: [PATCH 271/492] xapi_vgpu_type: Don't pollute the logs with non-critical errors on every toolstack restart on hosts without Nvidia GPUs, xapi complains about a non-existent directory: xapi: [error||0 |dbsync (update_env) R:733fc2551767|xapi_vgpu_type] Failed to create NVidia compat config_file: Sys_error("/usr/share/nvidia/vgx: No such file or directory") Handle the directory's absence without propagating the error. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vgpu_type.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_vgpu_type.ml b/ocaml/xapi/xapi_vgpu_type.ml index f7d5e1eb408..aae64cef195 100644 --- a/ocaml/xapi/xapi_vgpu_type.ml +++ b/ocaml/xapi/xapi_vgpu_type.ml @@ -1033,7 +1033,9 @@ module Nvidia_compat = struct read_configs ac tl ) in - let conf_files = Array.to_list (Sys.readdir conf_dir) in + let conf_files = + try Array.to_list (Sys.readdir conf_dir) with Sys_error _ -> [] + in debug "Reading NVIDIA vGPU config files %s/{%s}" conf_dir (String.concat ", " conf_files) ; read_configs [] From a4b992c586c0623170336936e555b028cca7234b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 10 Jun 2025 14:26:22 +0100 Subject: [PATCH 272/492] networkd: Add ENOENT to the list of expected errors in Sysfs.read_one_line Otherwise this quite frequently logs something like: ``` xcp-networkd: [error||22 |dbsync (update_env)|network_utils] Error in read one line of file: /sys/class/net/eth0/device/sriov_totalvfs, exception Unix.Unix_error(Unix.ENOENT, "open", "/sys/class/net/eth0/device/sriov_totalvfs") Backtrace ... ``` Signed-off-by: Andrii Sultanov --- ocaml/networkd/lib/network_utils.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 1c8c8cd1a27..846c517c82e 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -162,7 +162,8 @@ module Sysfs = struct with | End_of_file -> "" - | Unix.Unix_error (Unix.EINVAL, _, _) -> + | Unix.Unix_error (Unix.EINVAL, _, _) | Unix.Unix_error (Unix.ENOENT, _, _) + -> (* The device is not yet up *) raise (Network_error (Read_error file)) | exn -> From 3f0e977e94449d9dd1aa4939ae5ab22549c99c6b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 10 Jun 2025 15:55:36 +0100 Subject: [PATCH 273/492] xenguestHelper: Don't dump errors on End_of_file non_debug_receive will dump an error after reading the last bits of the header, which is expected and handled by the caller appropriately: ``` xenopsd-xc: [error||67 |Async.VM.resume R:beac7be348f1|xenguesthelper] Memory F 6019464 KiB S 0 KiB T 8183 MiB <--- dumping error xenopsd-xc: [debug||67 |Async.VM.resume R:beac7be348f1|mig64] Finished emu-manager result processing <---- End_of_file expected and handled ``` Don't pollute the logs and instead just log the same info with 'debug' when the error is End_of_file. Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/xc/xenguestHelper.ml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/xc/xenguestHelper.ml b/ocaml/xenopsd/xc/xenguestHelper.ml index b76fec51c25..06a28d92f33 100644 --- a/ocaml/xenopsd/xc/xenguestHelper.ml +++ b/ocaml/xenopsd/xc/xenguestHelper.ml @@ -200,13 +200,14 @@ let rec non_debug_receive ?(debug_callback = fun s -> debug "%s" s) cnx = (* Dump memory statistics on failure *) let non_debug_receive ?debug_callback cnx = - let debug_memory () = + let debug_memory log_type = Xenctrl.with_intf (fun xc -> let open Memory in let open Int64 in let open Xenctrl in let p = Xenctrl.physinfo xc in - error "Memory F %Ld KiB S %Ld KiB T %Ld MiB" + (match log_type with Syslog.Debug -> debug | _ -> error) + "Memory F %Ld KiB S %Ld KiB T %Ld MiB" (p.free_pages |> of_nativeint |> kib_of_pages) (p.scrub_pages |> of_nativeint |> kib_of_pages) (p.total_pages |> of_nativeint |> mib_of_pages_free) @@ -215,10 +216,18 @@ let non_debug_receive ?debug_callback cnx = try match non_debug_receive ?debug_callback cnx with | Error y as x -> - error "Received: %s" y ; debug_memory () ; x + error "Received: %s" y ; debug_memory Syslog.Err ; x | x -> x - with e -> debug_memory () ; raise e + with + | End_of_file as e -> + Unixext.raise_with_preserved_backtrace e (fun () -> + debug_memory Syslog.Debug + ) + | e -> + Unixext.raise_with_preserved_backtrace e (fun () -> + debug_memory Syslog.Err + ) (** For the simple case where we just want the successful result, return it. If we get an error message (or suspend) then throw an exception. *) From 3b7278f85df7fdd8a566ae65fce9c01c294fe876 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 14:59:36 +0100 Subject: [PATCH 274/492] xe-cli completion: Remove "" from completion suggestions While one could potentially filter for this "value", I don't think it's that useful and adds noise to the completions, like here: ``` $ xe vm-list resident-on= 64c11cad-2c52-4dea-aea6-5fae0e720699 \ 7f566729-0ee7-47c4-853d-2c5f3a195ad4 ``` Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 8120df874f3..5a1c535ad7f 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -755,6 +755,10 @@ __add_completion() local description_cmd="$2" local max_cmd_length="$3" + if [ "$word" = "" ]; then + return 0 + fi + COMPLETION_SUGGESTIONS=$((COMPLETION_SUGGESTIONS+1)) __xe_debug "\t$word" From 4e5012f4cb74947910abbcac016c21df380c367f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 15:01:22 +0100 Subject: [PATCH 275/492] xe-cli completion: Fix debug logging log needs to be moved one line below the first assignment into the "description" variable, otherwise it's always going to be an empty string Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 5a1c535ad7f..b65c81d4b58 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -772,8 +772,8 @@ __add_completion() COMPREPLY+=( $(printf '%s%q' "$description" "$word") ) else if [[ $SHOW_DESCRIPTION == 1 ]]; then - __xe_debug "\t showing command description - '$description'" description=" - $(eval $description_cmd$word)" + __xe_debug "\t showing command description - '$description'" fi # Right-pad the command with spaces before the help string COMPREPLY+=( $(printf "%-${max_cmd_length}q %s" "$word" "$description") ) From 0ba2168675f2788250c502e97873657b78f16209 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 15:02:18 +0100 Subject: [PATCH 276/492] xe-cli completion: Eliminate duplicate suggestions early on We used to rely on Bash's completion removing duplicate entries from the suggestions, but processing them in the first place is unnecessary (and will slow down completion since there's usually an 'xe' command run for each entry in the wordlist). Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index b65c81d4b58..2f0c09776e2 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -784,7 +784,8 @@ __preprocess_suggestions() { wordlist=$( echo "$1" | \ sed -re 's/(^|[^\])((\\\\)*),,*/\1\2\n/g' -e 's/\\,/,/g' -e 's/\\\\/\\/g' | \ - sed -e 's/ *$//') + sed -e 's/ *$//' | \ + sort -u ) local IFS=$'\n' for word in $wordlist; do if [[ "$word" =~ ^$prefix.* ]]; then From 32a772a28e675e93dae11d4c959c01d46b51e7d9 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 15:05:02 +0100 Subject: [PATCH 277/492] xe-cli completion: Improve completion for 'resident-on=' and 'affinity=' Provide a helpful description for some parameters of 'xe vm-list', compare before: ``` $ xe vm-list resident-on= 64c11cad-2c52-4dea-aea6-5fae0e720699 7f566729-0ee7-47c4-853d-2c5f3a195ad4 ``` with after: ``` $ xe vm-list resident-on= 64c11cad-2c52-4dea-aea6-5fae0e720699 - hpmc30 7f566729-0ee7-47c4-853d-2c5f3a195ad4 - hpmc29 ``` Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 2f0c09776e2..f7be3e3fbce 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -588,7 +588,21 @@ _xe() __xe_debug "fst is '$fst', snd is '$snd'" if [[ "$snd" == "list" || "$fst" == "vm" ]]; then IFS=$'\n,' - set_completions_for_names "${fst}-list" "$param" "$value" + + # Try to provide a helpful "description" to the suggestions + case "$param" in + resident-on | affinity) + SHOW_DESCRIPTION=1 + class="host" + ;; + *) + ;; + esac + + local name_label_cmd="$xe ${class}-list params=name-label 2>/dev/null --minimal uuid=" + __xe_debug "description class is '$class'" + + set_completions_for_names "${fst}-list" "$param" "$value" "$name_label_cmd" return 0 fi fi From 2a8e55b6c2c36a82ba8a2975a0da06c68aa2be84 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 15:07:45 +0100 Subject: [PATCH 278/492] xe-cli completion: Handle suggestions for 'suspend-{VDI,SR}-uuid' No completion was provided before, and it's handled properly now: ``` $ xe vm-list suspend-SR-uuid= 08906228-cbf6-dad4-720d-e581df11510a - SR1 37b734f0-e594-0e48-2114-cd063241dd36 - SR2 ``` Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index f7be3e3fbce..4126708f91b 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -566,11 +566,18 @@ _xe() else all="--all" fi - if [[ "$fst" == "into-vdi" || "$fst" == "base-vdi" || "$fst" == "vdi-from" || "$fst" == "vdi-to" ]]; then + + case "$fst" in + into-vdi | base-vdi | vdi-from | vdi-to | suspend-VDI) class=vdi - else + ;; + suspend-SR) + class=sr + ;; + *) class="$fst" - fi + ;; + esac # Show corresponding name labels for each UUID SHOW_DESCRIPTION=1 From ff8f112473832e8e4c74202b226e507b977d669b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 08:57:53 +0100 Subject: [PATCH 279/492] xapi/helpers: Note that get_localhost can fail while the database is starting up Otherwise, errors like this can be a little bit confusing in the logs: ``` [dispatch:session.login_with_password |backtrace] Raised Db_exn.Read_missing_uuid("host", "", "236acc01-0f95-4af1-8b35-f5a2fb51c354") ``` Signed-off-by: Andrii Sultanov --- ocaml/xapi/helpers.ml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 75199a62fa9..318ddfecf8d 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -171,8 +171,13 @@ let get_localhost ~__context = match localhost_ref = Ref.null with | false -> localhost_ref - | true -> - get_localhost_uncached ~__context + | true -> ( + try get_localhost_uncached ~__context + with Db_exn.Read_missing_uuid (_, _, _) as e -> + Unixext.raise_with_preserved_backtrace e (fun () -> + warn "The database has not fully come up yet, so localhost is missing" + ) + ) (* Determine the gateway and DNS PIFs: * If one of the PIFs with IP has other_config:defaultroute=true, then From e544d857d872960efe45b25a4b225791873be7d7 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 11 Jun 2025 08:59:23 +0100 Subject: [PATCH 280/492] xapi_host: missing UEFI certificates warrant a warning, not an error It is a XenServer-specific expectation that these certificates should always be present on the host (they are not provided on XCP-ng by default, for example, due to licensing restrictions). The error log is not followed by any exception, and the missing UEFI certificates do not interrupt any operation, they just mean the host is set up differently (which can be verified by the clients with appropriate API calls like pool-get-guest-secureboot-readiness): ``` xapi: [error||Sync UEFI certificates on host with XAPI db |xapi_host] check_valid_uefi_certs: missing KEK.auth in /var/lib/varstored xapi: [error||Sync UEFI certificates on host with XAPI db |xapi_host] check_valid_uefi_certs: missing db.auth in /var/lib/varstored ``` These warrant a warning instead of an error log. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_host.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 405733baa78..92297c2251f 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -2769,7 +2769,7 @@ let write_uefi_certificates_to_disk ~__context ~host = ["KEK.auth"; "db.auth"] |> List.iter (fun cert -> let log_of found = - (if found then info else error) + (if found then info else warn) "check_valid_uefi_certs: %s %s in %s" (if found then "found" else "missing") cert path From 6fe7c9e31d9866d85fe3b1368f80551195d13059 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Wed, 11 Jun 2025 06:49:45 +0000 Subject: [PATCH 281/492] CA-412164: XSI-1901: uid-info does not support `:` in gecos from https://en.wikipedia.org/wiki/Passwd#Password_file uid_info as following format username:password:uid:gid:gecos:homedir:shell Regarding gecos, it is recommended as follows Typically, this is a set of comma-separated values including the user's full name and contact details. However, this information comes form AD and user may mis-configure it with `:`, which is used as seperator. In such case, the parse would failed. Enhance the parse function to support `:` in gecos, other fields does not likely contain it. Signed-off-by: Lin Liu --- ocaml/tests/test_extauth_plugin_ADwinbind.ml | 21 +++++++++++++++ ocaml/xapi/extauth_plugin_ADwinbind.ml | 27 +++++++++++++++++--- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/ocaml/tests/test_extauth_plugin_ADwinbind.ml b/ocaml/tests/test_extauth_plugin_ADwinbind.ml index 5fe5bfc91cd..3c6d44daaa9 100644 --- a/ocaml/tests/test_extauth_plugin_ADwinbind.ml +++ b/ocaml/tests/test_extauth_plugin_ADwinbind.ml @@ -219,6 +219,27 @@ let test_parse_wbinfo_uid_info = ; gecos= {|ladmin|} } ) + (* XSI-1901: output of customer environment, has `:` in the gecos, + other fields does not likely contain it *) + ; ( {|HVS\udaadmin:*:3000000:3000000:ADMIN: Dalsem, Ulric:/home/HVS/udaadmin:/bin/bash|} + , Ok + { + user_name= {|HVS\udaadmin|} + ; uid= 3000000 + ; gid= 3000000 + ; gecos= {|ADMIN: Dalsem, Ulric|} + } + ) + (* Multiple `:` in gecos *) + ; ( {|HVS\udaadmin:*:3000000:3000000:ADMIN: Dalsem, Ulric, POOL OP: udaadmin:/home/HVS/udaadmin:/bin/bash|} + , Ok + { + user_name= {|HVS\udaadmin|} + ; uid= 3000000 + ; gid= 3000000 + ; gecos= {|ADMIN: Dalsem, Ulric, POOL OP: udaadmin|} + } + ) ; ( {|CONNAPP\locked:*:3000004:3000174::/home/CONNAPP/locked:/bin/bash|} , Ok {user_name= {|CONNAPP\locked|}; uid= 3000004; gid= 3000174; gecos= ""} diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index f23f1f5447e..9213b2dea09 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -686,11 +686,30 @@ module Wbinfo = struct let parse_uid_info stdout = (* looks like one line from /etc/passwd: https://en.wikipedia.org/wiki/Passwd#Password_file *) match String.split_on_char ':' stdout with - | [user_name; _passwd; uid; gid; gecos; _homedir; _shell] -> ( - try Ok {user_name; uid= int_of_string uid; gid= int_of_string gid; gecos} - with _ -> Error () - ) + | user_name :: _passwd :: uid :: gid :: rest -> ( + (* We expect at least homedir and shell at the end *) + let rest = List.rev rest in + match rest with + | _shell :: _homedir :: tail -> ( + (* Rev it back to original order *) + let tail = List.rev tail in + let gecos = String.concat ":" tail in + try + Ok + { + user_name + ; uid= int_of_string uid + ; gid= int_of_string gid + ; gecos + } + with _ -> Error () + ) + | _ -> + debug "%s uid_info format error: %s" __FUNCTION__ stdout ; + Error () + ) | _ -> + debug "%s uid_info format error: %s" __FUNCTION__ stdout ; Error () let uid_info_of_uid (uid : int) = From 3d755835e90afa7c26df63a8d19a7d61bd40226c Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 4 Jun 2025 14:09:27 +0100 Subject: [PATCH 282/492] CP-47063: Instrument xenops vm non-atomic functions. Instruments: - `VM.add`, - `VM.stat`, - `VM.exists`, - `VM.list`. Signed-off-by: Gabriel Buica --- ocaml/xapi-idl/lib/debug_info.ml | 2 +- ocaml/xapi-idl/lib/debug_info.mli | 2 +- ocaml/xenopsd/lib/xenops_server.ml | 16 ++++++++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi-idl/lib/debug_info.ml b/ocaml/xapi-idl/lib/debug_info.ml index 5483d6bc451..edf3c4979a8 100644 --- a/ocaml/xapi-idl/lib/debug_info.ml +++ b/ocaml/xapi-idl/lib/debug_info.ml @@ -76,7 +76,7 @@ let to_log_string t = t.log (* Sets the logging context based on `dbg`. Also adds a new tracing span, linked to the parent span from `dbg`, if available. *) -let with_dbg ?(with_thread = false) ~module_name ~name ~dbg f = +let with_dbg ?(with_thread = false) ?(module_name = "") ~name ~dbg f = let di = of_string dbg in let f_with_trace () = let name = diff --git a/ocaml/xapi-idl/lib/debug_info.mli b/ocaml/xapi-idl/lib/debug_info.mli index fa2f6ff5d6a..9db63471035 100644 --- a/ocaml/xapi-idl/lib/debug_info.mli +++ b/ocaml/xapi-idl/lib/debug_info.mli @@ -24,7 +24,7 @@ val to_log_string : t -> string val with_dbg : ?with_thread:bool - -> module_name:string + -> ?module_name:string -> name:string -> dbg:string -> (t -> 'a) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 9b109c1c980..8fe027630fe 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3682,7 +3682,9 @@ end module VM = struct module DB = VM_DB - let add _ dbg x = Debug.with_thread_associated dbg (fun () -> DB.add' x) () + let add _ dbg x = + Debug_info.with_dbg ~with_thread:true ~name:__FUNCTION__ ~dbg @@ fun _ -> + DB.add' x let rename _ dbg id1 id2 when' = queue_operation dbg id1 (Atomic (VM_rename (id1, id2, when'))) @@ -3719,11 +3721,17 @@ module VM = struct in (vm_t, state) - let stat _ dbg id = Debug.with_thread_associated dbg (fun () -> stat' id) () + let stat _ dbg id = + Debug_info.with_dbg ~with_thread:true ~name:__FUNCTION__ ~dbg @@ fun _ -> + stat' id - let exists _ _dbg id = match DB.read id with Some _ -> true | None -> false + let exists _ dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun _ -> + match DB.read id with Some _ -> true | None -> false - let list _ dbg () = Debug.with_thread_associated dbg (fun () -> DB.list ()) () + let list _ dbg () = + Debug_info.with_dbg ~with_thread:true ~name:__FUNCTION__ ~dbg @@ fun _ -> + DB.list () let create _ dbg id = let no_sharept = false in From 5138375bff23315e0b5a62cce17d82771e6648a8 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 4 Jun 2025 15:00:56 +0100 Subject: [PATCH 283/492] CP-47063: Instrument `switch_rpc` and message-switch client Instruments `switch_rpc` according to OpenTelemetry standard on instrumenting rpc calls. - `server.address` is the name of the message queue. Intruments sending the message on a queue according to OpenTelemetry standard on instrumenting messaging. - `destination` is the name of the message queue. `Tracing.with_tracing` now accepts an optional argument to set the Span Kind. Signed-off-by: Gabriel Buica --- ocaml/libs/tracing/tracing.ml | 8 +++-- ocaml/libs/tracing/tracing.mli | 39 +++++++++++----------- ocaml/libs/tracing/tracing_export.ml | 8 ++--- ocaml/message-switch/core/dune | 1 + ocaml/message-switch/core/make.ml | 2 +- ocaml/message-switch/core/s.ml | 3 +- ocaml/message-switch/unix/dune | 1 + ocaml/message-switch/unix/protocol_unix.ml | 22 +++++++++--- ocaml/xapi-idl/lib/xcp_client.ml | 29 ++++++++++++++-- 9 files changed, 80 insertions(+), 33 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index c1cdc33692e..d320fd6061b 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -792,10 +792,14 @@ end let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout -let with_tracing ?(attributes = []) ?(parent = None) ?trace_context ~name f = +let with_tracing ?(attributes = []) ?(parent = None) ?span_kind ?trace_context + ~name f = let tracer = Tracer.get_tracer ~name in if tracer.enabled then ( - match Tracer.start ~tracer ?trace_context ~attributes ~name ~parent () with + match + Tracer.start ?span_kind ~tracer ?trace_context ~attributes ~name ~parent + () + with | Ok span -> ( try let result = f span in diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 262acb52f27..8323346a443 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -190,12 +190,12 @@ module Tracer : sig -> (Span.t option, exn) result val update_span_with_parent : Span.t -> Span.t option -> Span.t option - (**[update_span_with_parent s p] returns [Some span] where [span] is an + (**[update_span_with_parent s p] returns [Some span] where [span] is an updated verison of the span [s]. - If [p] is [Some parent], [span] is a child of [parent], otherwise it is the + If [p] is [Some parent], [span] is a child of [parent], otherwise it is the original [s]. - - If the span [s] is finished or is no longer considered an on-going span, + + If the span [s] is finished or is no longer considered an on-going span, returns [None]. *) @@ -209,7 +209,7 @@ module Tracer : sig val finished_span_hashtbl_is_empty : unit -> bool end -(** [TracerProvider] module provides ways to intereact with the tracer providers. +(** [TracerProvider] module provides ways to intereact with the tracer providers. *) module TracerProvider : sig (** Type that represents a tracer provider.*) @@ -222,7 +222,7 @@ module TracerProvider : sig -> name_label:string -> uuid:string -> unit - (** [create ~enabled ~attributes ~endpoints ~name_label ~uuid] initializes a + (** [create ~enabled ~attributes ~endpoints ~name_label ~uuid] initializes a tracer provider based on the following parameters: [enabled], [attributes], [endpoints], [name_label], and [uuid]. *) @@ -234,17 +234,17 @@ module TracerProvider : sig -> unit -> unit (** [set ?enabled ?attributes ?endpoints ~uuid ()] updates the tracer provider - identified by the given [uuid] with the new configuration paremeters: - [enabled], [attributes], and [endpoints]. - + identified by the given [uuid] with the new configuration paremeters: + [enabled], [attributes], and [endpoints]. + If any of the configuration parameters are missing, the old ones are kept. - + Raises [Failure] if there are no tracer provider with the given [uuid]. *) val destroy : uuid:string -> unit - (** [destroy ~uuid] destroys the tracer provider with the given [uuid]. + (** [destroy ~uuid] destroys the tracer provider with the given [uuid]. If there are no tracer provider with the given [uuid], it does nothing. *) @@ -269,6 +269,7 @@ val enable_span_garbage_collector : ?timeout:float -> unit -> unit val with_tracing : ?attributes:(string * string) list -> ?parent:Span.t option + -> ?span_kind:SpanKind.t -> ?trace_context:TraceContext.t -> name:string -> (Span.t option -> 'a) @@ -288,24 +289,24 @@ val get_observe : unit -> bool val validate_attribute : string * string -> bool -(** [EnvHelpers] module is a helper module for the tracing library to easily - transition back and forth between a string list of environment variables to - a traceparent. +(** [EnvHelpers] module is a helper module for the tracing library to easily + transition back and forth between a string list of environment variables to + a traceparent. *) module EnvHelpers : sig val traceparent_key : string (** [traceparent_key] is a constant the represents the key of the traceparent - environment variable. + environment variable. *) val of_traceparent : string option -> string list (** [of_traceparent traceparent_opt] returns a singleton list consisting of a - envirentment variable with the key [traceparent_key] and value [v] if + envirentment variable with the key [traceparent_key] and value [v] if [traceparent_opt] is [Some v]. Otherwise, returns an empty list. *) val to_traceparent : string list -> string option - (** [to_traceparent env_var_lst] returns [Some v] where v is the value of the - environmental variable coresponding to the key [traceparent_key] from a + (** [to_traceparent env_var_lst] returns [Some v] where v is the value of the + environmental variable coresponding to the key [traceparent_key] from a string list of environmental variables [env_var_lst]. If there is no such evironmental variable in the list, it returns [None]. *) @@ -314,7 +315,7 @@ module EnvHelpers : sig (** [of_span span] returns a singleton list consisting of a envirentment variable with the key [traceparent_key] and value [v], where [v] is traceparent representation of span [s] (if [span] is [Some s]). - + If [span] is [None], it returns an empty list. *) end diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 5844d389e1c..c4cabb3c576 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -278,8 +278,8 @@ module Destination = struct ] in let@ _ = - with_tracing ~trace_context:TraceContext.empty ~parent ~attributes - ~name + with_tracing ~span_kind:Server ~trace_context:TraceContext.empty + ~parent ~attributes ~name in all_spans |> Content.Json.ZipkinV2.content_of @@ -293,8 +293,8 @@ module Destination = struct let ((_span_list, span_count) as span_info) = Spans.since () in let attributes = [("export.traces.count", string_of_int span_count)] in let@ parent = - with_tracing ~trace_context:TraceContext.empty ~parent:None ~attributes - ~name:"Tracing.flush_spans" + with_tracing ~span_kind:Server ~trace_context:TraceContext.empty + ~parent:None ~attributes ~name:"Tracing.flush_spans" in TracerProvider.get_tracer_providers () |> List.filter TracerProvider.get_enabled diff --git a/ocaml/message-switch/core/dune b/ocaml/message-switch/core/dune index d61746efe44..c9b5b3e2cff 100644 --- a/ocaml/message-switch/core/dune +++ b/ocaml/message-switch/core/dune @@ -9,6 +9,7 @@ sexplib sexplib0 threads.posix + tracing uri xapi-log xapi-stdext-threads diff --git a/ocaml/message-switch/core/make.ml b/ocaml/message-switch/core/make.ml index 43b7e301a9b..df1d003f5f5 100644 --- a/ocaml/message-switch/core/make.ml +++ b/ocaml/message-switch/core/make.ml @@ -229,7 +229,7 @@ functor in return (Ok t) - let rpc ~t ~queue ?timeout ~body:x () = + let rpc ?_span_parent ~t ~queue ?timeout ~body:x () = let ivar = M.Ivar.create () in let timer = Option.map diff --git a/ocaml/message-switch/core/s.ml b/ocaml/message-switch/core/s.ml index 423304d1b24..fefe4d7a1f6 100644 --- a/ocaml/message-switch/core/s.ml +++ b/ocaml/message-switch/core/s.ml @@ -144,7 +144,8 @@ module type CLIENT = sig (** [disconnect] closes the connection *) val rpc : - t:t + ?_span_parent:Tracing.Span.t + -> t:t -> queue:string -> ?timeout:int -> body:string diff --git a/ocaml/message-switch/unix/dune b/ocaml/message-switch/unix/dune index 92bddfd66fb..1858aa271b3 100644 --- a/ocaml/message-switch/unix/dune +++ b/ocaml/message-switch/unix/dune @@ -11,6 +11,7 @@ rpclib.core rpclib.json threads.posix + tracing xapi-stdext-threads xapi-stdext-unix ) diff --git a/ocaml/message-switch/unix/protocol_unix.ml b/ocaml/message-switch/unix/protocol_unix.ml index f7aa0802c0f..29b95f7ef12 100644 --- a/ocaml/message-switch/unix/protocol_unix.ml +++ b/ocaml/message-switch/unix/protocol_unix.ml @@ -347,7 +347,7 @@ module Client = struct Ok c' ) - let rpc ~t:c ~queue:dest_queue_name ?timeout ~body:x () = + let rpc ?_span_parent ~t:c ~queue:dest_queue_name ?timeout ~body:x () = let t = Ivar.create () in let timer = Option.map @@ -364,9 +364,23 @@ module Client = struct do_rpc c.requests_conn (In.CreatePersistent dest_queue_name) >>|= fun (_ : string) -> let msg = - In.Send - ( dest_queue_name - , {Message.payload= x; kind= Message.Request c.reply_queue_name} + Tracing.with_tracing + ~attributes: + [ + ("messaging.operation.name", "send") + ; ("messaging.system", "message-switch") + ; ("messaging.destination.name", dest_queue_name) + ] + ~span_kind:Producer ~parent:_span_parent + ~name:("send" ^ " " ^ dest_queue_name) + (fun _ -> + In.Send + ( dest_queue_name + , { + Message.payload= x + ; kind= Message.Request c.reply_queue_name + } + ) ) in do_rpc c.requests_conn msg >>|= fun (id : string) -> diff --git a/ocaml/xapi-idl/lib/xcp_client.ml b/ocaml/xapi-idl/lib/xcp_client.ml index 3ea0006b59c..435a63e3126 100644 --- a/ocaml/xapi-idl/lib/xcp_client.ml +++ b/ocaml/xapi-idl/lib/xcp_client.ml @@ -38,10 +38,35 @@ let switch_rpc ?timeout queue_name string_of_call response_of_string = get_ok (Message_switch_unix.Protocol_unix.Client.connect ~switch:!switch_path ()) in - fun call -> + fun (call : Rpc.call) -> + let _span_parent = + call.params + |> List.find_map (function Rpc.Dict kv_list -> Some kv_list | _ -> None) + |> Fun.flip Option.bind + (List.find_map (function + | "debug_info", Rpc.String debug_info -> + let di = debug_info |> Debug_info.of_string in + di.tracing + | _ -> + None + ) + ) + in + let rpc_service = "message_switch" in + Tracing.with_tracing + ~attributes: + [ + ("rpc.system", "ocaml-rpc") + ; ("rpc.service", rpc_service) + ; ("server.address", queue_name) + ; ("rpc.method", call.name) + ] + ~parent:_span_parent + ~name:(rpc_service ^ "/" ^ call.name) + @@ fun _span_parent -> response_of_string (get_ok - (Message_switch_unix.Protocol_unix.Client.rpc ~t ?timeout + (Message_switch_unix.Protocol_unix.Client.rpc ?_span_parent ~t ?timeout ~queue:queue_name ~body:(string_of_call call) () ) ) From 7825873c5e3b4f39032ba2011471cbe34c878f53 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 12 Jun 2025 10:42:12 +0100 Subject: [PATCH 284/492] xe-cli completion: Fix regex for checking previously provided parameters The regex removed parameters with a particular suffix instead of checking for the whole name. For example, after providing a uuid parameter to xe vif-move, network-uuid would no longer be suggested: ``` $ xe vif-move network-uuid= uuid= $ xe vif-move uuid=0af7619c-0798-c5be-5a0e-20813a48c7df ``` This is fixed now: ``` $ xe vif-move uuid=0af7619c-0798-c5be-5a0e-20813a48c7df $ xe vif-move uuid=0af7619c-0798-c5be-5a0e-20813a48c7df network-uuid= ``` Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 8120df874f3..589ada92e0f 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -638,7 +638,7 @@ _xe() local previous_params="${OLDSTYLE_WORDS[@]:2:$params_len}" previous_params=$( echo "$previous_params" | cut -d= -f1 | \ sed -r '/^\s*$/d' | cut -d: -f1 | \ - sed -re 's/^/-e "\\s*/g' -e 's/$/[=:]"/g' | paste -sd " ") + sed -re 's/^/-e "^\\s*/g' -e 's/$/[=:]"/g' | paste -sd " ") set_completions "$SUBCOMMAND_PARAMS" "$param" "" "$previous_params" From 1f83af948684a2a4c2f2605ac7ae8200d150873c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 12 Jun 2025 11:34:13 +0100 Subject: [PATCH 285/492] xapi-cli-server: Add some of the missing parameters to cli_frontend There were several optional boolean parameters that were checked and used in cli_operations but were not included in cli_frontend (therefore would not be shown in suggestions or 'xe help command'). Add these to cli_frontend. is_unique is a ... unique case because it does not follow the style of the CLI parameters (which use dashes, not underscores, to separate words), so add 'is-unique' to cli_frontend but handle both in cli_operations. Signed-off-by: Andrii Sultanov --- ocaml/xapi-cli-server/cli_frontend.ml | 12 +++++++++--- ocaml/xapi-cli-server/cli_operations.ml | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 2f6d2350345..cdb749df943 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -101,6 +101,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; "sr-uuid" ; "network-uuid" ; "pool-uuid" + ; "public" ] ; help= "Create a binary blob to be associated with an API object" ; implementation= No_fd Cli_operations.blob_create @@ -816,7 +817,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "host-emergency-ha-disable" , { reqd= [] - ; optn= ["force"] + ; optn= ["force"; "soft"] ; help= "Disable HA on the local host. Only to be used to recover a pool \ with a broken HA setup." @@ -1776,6 +1777,8 @@ let rec cmdtable_data : (string * cmd_spec) list = ; "host-password" ; "type" ; "remote-config" + ; "dry-run" + ; "metadata" ; "url" ; "vdi:" ] @@ -1789,7 +1792,8 @@ let rec cmdtable_data : (string * cmd_spec) list = VDIs will be imported into the Pool's default SR unless an override \ is provided. If the force option is given then any disk data \ checksum failures will be ignored. If the parameter 'url' is \ - specified, xapi will attempt to import from that URL." + specified, xapi will attempt to import from that URL. Only metadata \ + will be imported if 'metadata' is true" ; implementation= With_fd Cli_operations.vm_import ; flags= [Standard] } @@ -1803,6 +1807,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; "compress" ; "metadata" ; "excluded-device-types" + ; "include-snapshots" ] ; help= "Export a VM to ." ; implementation= With_fd Cli_operations.vm_export @@ -2393,6 +2398,7 @@ let rec cmdtable_data : (string * cmd_spec) list = "name-description" ; "sharable" ; "read-only" + ; "managed" ; "other-config:" ; "xenstore-data:" ; "sm-config:" @@ -3831,7 +3837,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "vtpm-create" , { reqd= ["vm-uuid"] - ; optn= [] + ; optn= ["is-unique"] ; help= "Create a VTPM associated with a VM." ; implementation= No_fd Cli_operations.VTPM.create ; flags= [] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index fb75f559099..b098632f1c0 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -8118,7 +8118,9 @@ module VTPM = struct let create printer rpc session_id params = let vm_uuid = List.assoc "vm-uuid" params in let vM = Client.VM.get_by_uuid ~rpc ~session_id ~uuid:vm_uuid in - let is_unique = get_bool_param params "is_unique" in + let is_unique = + get_bool_param params "is_unique" || get_bool_param params "is-unique" + in let ref = Client.VTPM.create ~rpc ~session_id ~vM ~is_unique in let uuid = Client.VTPM.get_uuid ~rpc ~session_id ~self:ref in printer (Cli_printer.PList [uuid]) From 065b2a6a752a326ea7978fb66cc721a2fe93bc53 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 12 Jun 2025 11:37:22 +0100 Subject: [PATCH 286/492] xapi-cli-server: Remove old commented-out code This code hasn't been used for 10+ years. Signed-off-by: Andrii Sultanov --- ocaml/xapi-cli-server/cli_frontend.ml | 86 ++----------------------- ocaml/xapi-cli-server/cli_operations.ml | 55 ---------------- 2 files changed, 5 insertions(+), 136 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index cdb749df943..b6be8a2023f 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -128,14 +128,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) - ; (* "host-introduce", - { - reqd=["name"; "address"; "remote-port"; "remote-username"; "remote-password"]; - optn=["description"]; - help="Introduce a remote host"; - implementation=No_fd Cli_operations.host_introduce - };*) - ( "pool-enable-binary-storage" + ; ( "pool-enable-binary-storage" , { reqd= [] ; optn= [] @@ -2768,17 +2761,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [Standard] } ) - ; (* - "diagnostic-event-deltas", - { - reqd=["class"]; - optn=[]; - help="Print the changes that are happening to all objects of class specified."; - implementation=With_fd Cli_operations.diagnostic_event_deltas; - flags=[]; - }; -*) - ( "diagnostic-license-status" + ; ( "diagnostic-license-status" , { reqd= [] ; optn= [] @@ -2998,35 +2981,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) - ; (* - "alert-create", - { - reqd=["message"]; - optn=["alert-level"]; - help="Create a new alert."; - implementation=No_fd Cli_operations.alert_create; - flags=[]; - }; - "alert-destroy", - { - reqd=["uuid"]; - optn=[]; - help="Destroy an Alert."; - implementation=No_fd Cli_operations.alert_destroy; - flags=[]; - }; -*) - (* - "host-fence", - { - reqd=["host-uuid"]; - optn=[]; - help="Fence a host"; - implementation=No_fd_local_session Cli_operations.host_fence; - flags=[]; - }; -*) - ( "pool-vlan-create" + ; ( "pool-vlan-create" , { reqd= ["pif-uuid"; "vlan"; "network-uuid"] ; optn= [] @@ -3187,28 +3142,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [Hidden; Neverforward] } ) - ; (* - "host-ha-query", - { - reqd=[]; - optn=[]; - help="Query the HA configuration of the local host."; - implementation=No_fd_local_session Cli_operations.host_ha_query; - flags=[Neverforward]; - }; - -*) - (* - "subject-list", - { - reqd=[]; - optn=[]; - help="Returns a list of subject names that can access the pool"; - implementation=No_fd Cli_operations.subject_list; - flags=[] - }; -*) - ( "subject-add" + ; ( "subject-add" , { reqd= ["subject-name"] ; optn= [] @@ -3254,17 +3188,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) - ; (* RBAC 2.0 only - "role-create", - { - reqd=["id";"name"]; - optn=[]; - help="Add a role to the pool"; - implementation=No_fd Cli_operations.role_create; - flags=[] - }; - *) - ( "session-subject-identifier-list" + ; ( "session-subject-identifier-list" , { reqd= [] ; optn= [] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index b098632f1c0..d1f2a87a76e 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -7237,59 +7237,11 @@ let host_send_debug_keys _printer rpc session_id params = let keys = List.assoc "keys" params in Client.Host.send_debug_keys ~rpc ~session_id ~host ~keys -(* - let host_introduce printer rpc session_id params = - let name = List.assoc "name" params in - let descr = if List.mem_assoc "description" params then List.assoc "description" params else "" in - let address = List.assoc "address" params in - let port = List.assoc "remote-port" params in - let remote_username = List.assoc "remote-username" params in - let remote_password = List.assoc "remote-password" params in - ignore(Client.Credential.create_with_password ~rpc ~session_id name descr address (Int64.of_string port) remote_username remote_password) - *) - let task_cancel _printer rpc session_id params = let uuid = List.assoc "uuid" params in let task = Client.Task.get_by_uuid ~rpc ~session_id ~uuid in Client.Task.cancel ~rpc ~session_id ~task -(* - let alert_create printer rpc session_id params = - let string_to_alert_level s = - match s with - | "info" -> `Info - | "warning" | "warn" -> `Warn - | "error" -> `Error - | _ -> `Info - in - let message = List.assoc "message" params in - let level = if List.mem_assoc "level" params then List.assoc "level" params else "info" in - let level = string_to_alert_level level in - let alert = Client.Alert.create ~rpc ~session_id message [] level in - let uuid = Client.Alert.get_uuid ~rpc ~session_id alert in - printer (Cli_printer.PList [uuid]) - - let alert_destroy printer rpc session_id params = - let uuid = List.assoc "uuid" params in - let alert = Client.Alert.get_by_uuid ~rpc ~session_id uuid in - Client.Alert.destroy ~rpc ~session_id alert - *) - -(* - let subject_list printer rpc session_id params = -(* we get all subjects from the pool *) - let subjects = Client.Subject.get_all_records ~rpc ~session_id in - let table_of_subject (subject,record) = - [ "subject-uuid", record.API.subject_uuid; - "subject-identifier", record.API.subject_subject_identifier; -(* "subject-name", Client.Subject.get_subject_name ~rpc ~session_id subject;*) - ] @ - record.API.subject_other_config - in - let all = List.map table_of_subject subjects in - printer (Cli_printer.PTable all) - *) - let subject_add printer rpc session_id params = let subject_name = List.assoc "subject-name" params in (* let's try to resolve the subject_name to a subject_id using the external directory *) @@ -7380,13 +7332,6 @@ let audit_log_get fd _printer rpc session_id params = download_file_with_task fd rpc session_id filename Constants.audit_log_uri query label label -(* RBAC 2.0 only - let role_create printer rpc session_id params = - (*let id = List.assoc "id" params in*) - let name = List.assoc "name" params in - ignore (Client.Role.create ~rpc ~session_id ~name ~description:"" ~permissions:[] ~is_basic:false ~is_complete:false) -*) - let session_subject_identifier_list printer rpc session_id _params = let subject_identifiers = Client.Session.get_all_subject_identifiers ~rpc ~session_id From 80c6afec41295fb2d16f3baab97d87c50bd1b023 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 12 Jun 2025 11:38:14 +0100 Subject: [PATCH 287/492] xe-cli completion: Add more boolean parameters Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 589ada92e0f..7ed05f0f1c8 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -542,12 +542,16 @@ _xe() hvm | nomigrate | nested-virt | PV-drivers-up-to-date | \ PV-drivers-detected | live | cooperative | enforce-homogeneity | \ host-metrics-live | sharable | read-only | storage-lock | missing | \ - metadata-latest | empty | clustered | pool-auto-join | joined) + metadata-latest | empty | clustered | pool-auto-join | joined | \ + dry-run | metadata | paused | approximate | copy | progress | public | \ + include-snapshots | preserve-power-state | soft | update | is-unique) # Until autocompletion can be generated from the # datamodel, this is just naive hardcoding. These cases were # obtained by looking for boolean fields: # 'xapi-cli-server/records.ml | grep bool_of_string' and # 'grep string_of_bool' + # and + # 'xapi-cli-server/cli_frontend.ml | grep get_bool_param' __xe_debug "triggering autocompletion for boolean params" IFS=$'\n,' set_completions 'true,false' "$value" From 69087b16ba65919aa63d062bfd6bf9cacf11b959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 11 Jun 2025 15:36:01 +0100 Subject: [PATCH 288/492] CA-412313: Tracing_export: flush on XAPI exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintenance mode is entered by running Host.evacuate, followed by promoting a new pool coordinator and shutting down XAPI. We only export spans every 30s, so we may miss exporting the span for Host.evacuate. Ensure that we at least trigger the export when XAPI is about to shutdown. Do not wait for the export to finish, because this could take a long time (e.g. when exporting to a remote Jaeger instance). After this change I now see Host.evacuate properly in the exported trace. Signed-off-by: Edwin Török --- ocaml/libs/tracing/tracing_export.ml | 13 +++++++++++-- ocaml/libs/tracing/tracing_export.mli | 6 +++--- ocaml/tests/bench/bench_tracing.ml | 4 ++-- ocaml/xapi/xapi_fuse.ml | 2 ++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 5844d389e1c..6b4371350cc 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -306,6 +306,8 @@ module Destination = struct (* Note this signal will flush the spans and terminate the exporter thread *) let signal () = Delay.signal delay + let wait_exit = Delay.make () + let create_exporter () = enable_span_garbage_collector () ; Thread.create @@ -319,7 +321,8 @@ module Destination = struct signaled := true ) ; flush_spans () - done + done ; + Delay.signal wait_exit ) () @@ -339,6 +342,12 @@ module Destination = struct ) end -let flush_and_exit = Destination.signal +let flush_and_exit ~max_wait () = + D.debug "flush_and_exit: signaling thread to export now" ; + Destination.signal () ; + if Delay.wait Destination.wait_exit max_wait then + D.info "flush_and_exit: timeout on span export" + else + D.debug "flush_and_exit: span export finished" let main = Destination.main diff --git a/ocaml/libs/tracing/tracing_export.mli b/ocaml/libs/tracing/tracing_export.mli index 3f8ca750026..f322bd2404c 100644 --- a/ocaml/libs/tracing/tracing_export.mli +++ b/ocaml/libs/tracing/tracing_export.mli @@ -85,9 +85,9 @@ module Destination : sig end end -val flush_and_exit : unit -> unit -(** [flush_and_exit ()] sends a signal to flush the finish spans and terminate - the exporter thread. +val flush_and_exit : max_wait:float -> unit -> unit +(** [flush_and_exit ~max_wait ()] sends a signal to flush the finish spans and terminate + the exporter thread. It waits at most [max_wait] seconds. *) val main : unit -> Thread.t diff --git a/ocaml/tests/bench/bench_tracing.ml b/ocaml/tests/bench/bench_tracing.ml index ff8d872ee64..8db30cfc220 100644 --- a/ocaml/tests/bench/bench_tracing.ml +++ b/ocaml/tests/bench/bench_tracing.ml @@ -25,7 +25,7 @@ let export_thread = (* need to ensure this isn't running outside the benchmarked section, or bechamel might fail with 'Failed to stabilize GC' *) - let after _ = Tracing_export.flush_and_exit () in + let after _ = Tracing_export.flush_and_exit ~max_wait:0. () in Bechamel_simple_cli.thread_workload ~before:Tracing_export.main ~after ~run:ignore @@ -52,7 +52,7 @@ let allocate () = let free t = Tracing.TracerProvider.destroy ~uuid ; - Tracing_export.flush_and_exit () ; + Tracing_export.flush_and_exit ~max_wait:0. () ; Thread.join t let test_tracing_on ?(overflow = false) ~name f = diff --git a/ocaml/xapi/xapi_fuse.ml b/ocaml/xapi/xapi_fuse.ml index 48d0737a613..8c2b5b56d3d 100644 --- a/ocaml/xapi/xapi_fuse.ml +++ b/ocaml/xapi/xapi_fuse.ml @@ -52,6 +52,8 @@ let light_fuse_and_run ?(fuse_length = !Constants.fuse_time) () = in let new_fuse_length = max 5. (fuse_length -. delay_so_far) in debug "light_fuse_and_run: current RRDs have been saved" ; + ignore + (Thread.create Tracing_export.(flush_and_exit ~max_wait:new_fuse_length) ()) ; ignore (Thread.create (fun () -> From a54505e4247778162970fc3c1b2d31fd73907c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 11 Jun 2025 16:02:35 +0100 Subject: [PATCH 289/492] CA-412313: xs-trace: introduce a pp command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit json_reformat cannot handle newline delimited json, it is easier if we have a command to reformat it ourselves. This can be useful when debugging why a trace is missing elements. Traces are stored as newline-delimited JSON in /var/log/dt/zipkinv2/json, however json_reformat cannot process them directly, and the lines can be very long and difficult to read otherwise. Signed-off-by: Edwin Török --- ocaml/xs-trace/dune | 29 ++++++++++++----------------- ocaml/xs-trace/xs_trace.ml | 32 +++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/ocaml/xs-trace/dune b/ocaml/xs-trace/dune index e34fc7e5575..4a19b8c888a 100644 --- a/ocaml/xs-trace/dune +++ b/ocaml/xs-trace/dune @@ -1,23 +1,18 @@ (executable - (modes exe) - (name xs_trace) - (public_name xs-trace) - (package xapi-tools) - (libraries - uri - tracing - cmdliner - tracing_export - xapi-stdext-unix - zstd - ) -) + (modes exe) + (name xs_trace) + (public_name xs-trace) + (package xapi-tools) + (libraries uri tracing cmdliner tracing_export yojson xapi-stdext-unix zstd)) (rule - (targets xs-trace.1) - (deps (:exe xs_trace.exe)) - (action (with-stdout-to %{targets} (run %{exe} --help=groff))) -) + (targets xs-trace.1) + (deps + (:exe xs_trace.exe)) + (action + (with-stdout-to + %{targets} + (run %{exe} --help=groff)))) ; not expected by the specfile ;(install diff --git a/ocaml/xs-trace/xs_trace.ml b/ocaml/xs-trace/xs_trace.ml index 6360649fb20..a5f0c8becef 100644 --- a/ocaml/xs-trace/xs_trace.ml +++ b/ocaml/xs-trace/xs_trace.ml @@ -25,10 +25,7 @@ module Exporter = struct | _ -> () - (** Export traces from file system to a remote endpoint. *) - let export erase src dst = - let dst = Uri.of_string dst in - let submit_json = submit_json dst in + let iter_src src f = let rec export_file = function | path when Sys.is_directory path -> (* Recursively export trace files. *) @@ -38,7 +35,7 @@ module Exporter = struct (* Decompress compressed trace file and submit each line iteratively *) let args = [|"zstdcat"; path|] in let ic = Unix.open_process_args_in args.(0) args in - Unixext.lines_iter submit_json ic ; + Unixext.lines_iter f ic ; match Unix.close_process_in ic with | Unix.WEXITED 0 -> () @@ -47,15 +44,27 @@ module Exporter = struct ) | path when Filename.check_suffix path ".ndjson" -> (* Submit traces line by line. *) - Unixext.readfile_line submit_json path + Unixext.readfile_line f path | path -> (* Assume any other extension is a valid JSON file. *) let json = Unixext.string_of_file path in - submit_json json + f json in - export_file src ; + export_file src + + (** Export traces from file system to a remote endpoint. *) + let export erase src dst = + let dst = Uri.of_string dst in + let submit_json = submit_json dst in + iter_src src submit_json ; if erase then Unixext.rm_rec ~rm_top:true src + + let pretty_print src = + iter_src src @@ fun line -> + line + |> Yojson.Safe.from_string + |> Yojson.Safe.pretty_to_channel ~std:true stdout end module Cli = struct @@ -83,6 +92,11 @@ module Cli = struct let doc = "copy a trace to an endpoint and erase it afterwards" in Cmd.(v (info "mv" ~doc) term) + let pp_cmd = + let term = Term.(const Exporter.pretty_print $ src) in + let doc = "Pretty print NDJSON traces" in + Cmd.(v (info "pp" ~doc) term) + let xs_trace_cmd = let man = [ @@ -94,7 +108,7 @@ module Cli = struct let doc = "utility for working with local trace files" in Cmd.info "xs-trace" ~doc ~version:"0.1" ~man in - Cmd.group desc [cp_cmd; mv_cmd] + Cmd.group desc [cp_cmd; mv_cmd; pp_cmd] let main () = Cmd.eval xs_trace_cmd end From c5a914565b094cd5032a5e9bfcd8fd5948c98c7b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 12 Jun 2025 16:12:41 +0100 Subject: [PATCH 290/492] xenopsd: Allow to override the default NUMA placement Use the numa-compat argument to be able to override the default numa placement using xenopsd.conf option. This allows to change the default placement when building a package This patch reverts some of the changes in e6f94be82198532d165018677a303a554c62c7ba Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenopsd.ml | 9 ++++----- ocaml/xenopsd/xc/xenops_server_xen.ml | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index 9c5e83e04ce..ccacea0ed8b 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -59,6 +59,8 @@ let feature_flags_path = ref "/etc/xenserver/features.d" let pvinpvh_xen_cmdline = ref "pv-shim console=xen" +let numa_placement_compat = ref true + (* O(N^2) operations, until we get a xenstore cache, so use a small number here *) let vm_guest_agent_xenstore_quota = ref 128 @@ -240,11 +242,8 @@ let options = , "Command line for the inner-xen for PV-in-PVH guests" ) ; ( "numa-placement" - , Arg.Bool (fun _ -> ()) - , (fun () -> - string_of_bool - (!Xenops_server.default_numa_affinity_policy = Best_effort) - ) + , Arg.Bool (fun x -> numa_placement_compat := x) + , (fun () -> string_of_bool !numa_placement_compat) , "NUMA-aware placement of VMs (deprecated, use XAPI setting)" ) ; ( "pci-quarantine" diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index a1a37085659..cdc54d32873 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -5259,6 +5259,8 @@ let init () = {Xs_protocol.ACL.owner= 0; other= Xs_protocol.ACL.READ; acl= []} ) ; Device.Backend.init () ; + Xenops_server.default_numa_affinity_policy := + if !Xenopsd.numa_placement_compat then Best_effort else Any ; info "Default NUMA affinity policy is '%s'" Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; From b8f3ab4aa77a73bd6dfb76a58681e4813279382d Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Fri, 13 Jun 2025 10:44:27 +0100 Subject: [PATCH 291/492] Fix `message-switch` opam metadata Signed-off-by: Gabriel Buica --- dune-project | 4 +++- opam/message-switch-core.opam | 1 + opam/message-switch-unix.opam | 1 + opam/message-switch.opam | 1 + opam/message-switch.opam.template | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dune-project b/dune-project index 56de01f0fd3..6ed0602a185 100644 --- a/dune-project +++ b/dune-project @@ -651,6 +651,7 @@ (= :version)) (xapi-stdext-threads (= :version)) + (xapi-tracing (= :version)) (odoc :with-doc))) (package @@ -669,7 +670,8 @@ ppx_deriving_rpc rpclib (xapi-stdext-threads - (= :version)))) + (= :version)) + (xapi-tracing (= :version)))) (package (name message-switch)) diff --git a/opam/message-switch-core.opam b/opam/message-switch-core.opam index a6b183bdd7f..dc4ca95da07 100644 --- a/opam/message-switch-core.opam +++ b/opam/message-switch-core.opam @@ -20,6 +20,7 @@ depends: [ "uri" "xapi-log" {= version} "xapi-stdext-threads" {= version} + "xapi-tracing" {= version} "odoc" {with-doc} ] build: [ diff --git a/opam/message-switch-unix.opam b/opam/message-switch-unix.opam index c9379979e2d..975d81ac831 100644 --- a/opam/message-switch-unix.opam +++ b/opam/message-switch-unix.opam @@ -16,6 +16,7 @@ depends: [ "ppx_deriving_rpc" "rpclib" "xapi-stdext-threads" {= version} + "xapi-tracing" {= version} "odoc" {with-doc} ] build: [ diff --git a/opam/message-switch.opam b/opam/message-switch.opam index f0dcf7ff224..41613cb034f 100644 --- a/opam/message-switch.opam +++ b/opam/message-switch.opam @@ -30,6 +30,7 @@ depends: [ "sexplib" "shared-block-ring" {>= "2.3.0"} "xapi-stdext-unix" + "xapi-tracing" ] synopsis: "A simple store-and-forward message switch" description: """ diff --git a/opam/message-switch.opam.template b/opam/message-switch.opam.template index a33fe27cb3e..0e8ec76c2e6 100644 --- a/opam/message-switch.opam.template +++ b/opam/message-switch.opam.template @@ -28,6 +28,7 @@ depends: [ "sexplib" "shared-block-ring" {>= "2.3.0"} "xapi-stdext-unix" + "xapi-tracing" ] synopsis: "A simple store-and-forward message switch" description: """ From a3a7ca4d025230ae57abcc4feb1391d67a97862a Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 13 Jun 2025 11:13:02 +0100 Subject: [PATCH 292/492] opam: generate xapi-log with dune Adds the missing dependency to stdext-threads for testing Signed-off-by: Pau Ruiz Safont --- dune-project | 13 ++++++++++- opam/xapi-log.opam | 45 ++++++++++++++++++++----------------- opam/xapi-log.opam.template | 29 ------------------------ 3 files changed, 37 insertions(+), 50 deletions(-) delete mode 100644 opam/xapi-log.opam.template diff --git a/dune-project b/dune-project index 56de01f0fd3..6030fb9ee92 100644 --- a/dune-project +++ b/dune-project @@ -221,7 +221,18 @@ (name xapi-nbd)) (package - (name xapi-log)) + (name xapi-log) + (synopsis "A Logs library required by xapi") + (description + "This package is provided for backwards compatibility only. No new package should use it.") + (depends + astring + fmt + logs + mtime + xapi-backtrace + (xapi-stdext-pervasives (= :version)) + (xapi-stdext-threads (and :with-test (= :version))))) (package (name xapi-idl)) diff --git a/opam/xapi-log.opam b/opam/xapi-log.opam index d83f9bec7c6..b811d1f7f9e 100644 --- a/opam/xapi-log.opam +++ b/opam/xapi-log.opam @@ -1,31 +1,36 @@ # This file is generated by dune, edit dune-project instead -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" +synopsis: "A Logs library required by xapi" +description: + "This package is provided for backwards compatibility only. No new package should use it." +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -available: [ os = "linux" ] +bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ - "ocaml" "dune" {>= "3.15"} "astring" "fmt" "logs" "mtime" "xapi-backtrace" - "xapi-stdext-pervasives" + "xapi-stdext-pervasives" {= version} + "xapi-stdext-threads" {with-test & = version} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] ] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} +dev-repo: "git+https://github.com/xapi-project/xen-api.git" diff --git a/opam/xapi-log.opam.template b/opam/xapi-log.opam.template deleted file mode 100644 index 00b5cce6fd5..00000000000 --- a/opam/xapi-log.opam.template +++ /dev/null @@ -1,29 +0,0 @@ -opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" -homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -available: [ os = "linux" ] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "astring" - "fmt" - "logs" - "mtime" - "xapi-backtrace" - "xapi-stdext-pervasives" -] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} From c35e8e2e7cc74c666b182353086e0c76786c11c2 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 13 Jun 2025 11:22:38 +0100 Subject: [PATCH 293/492] xapi-log: remove circular dependency on tests unfortunately threadext uses this log package, define a with_lock in xapi-log tests to avoid using the former. Signed-off-by: Pau Ruiz Safont --- dune-project | 3 +-- ocaml/libs/log/test/dune | 2 +- ocaml/libs/log/test/log_test.ml | 6 +++++- ocaml/libs/log/test/log_test.t | 2 +- opam/xapi-log.opam | 1 - 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dune-project b/dune-project index 6030fb9ee92..d54087f6c6b 100644 --- a/dune-project +++ b/dune-project @@ -231,8 +231,7 @@ logs mtime xapi-backtrace - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-threads (and :with-test (= :version))))) + (xapi-stdext-pervasives (= :version)))) (package (name xapi-idl)) diff --git a/ocaml/libs/log/test/dune b/ocaml/libs/log/test/dune index 299a6155eac..75fbbad7557 100644 --- a/ocaml/libs/log/test/dune +++ b/ocaml/libs/log/test/dune @@ -1,6 +1,6 @@ (executable (name log_test) - (libraries log xapi-stdext-threads threads.posix xapi-backtrace)) + (libraries log threads.posix xapi-backtrace)) (cram (package xapi-log) diff --git a/ocaml/libs/log/test/log_test.ml b/ocaml/libs/log/test/log_test.ml index 53d5cf0ddeb..b493b18d426 100644 --- a/ocaml/libs/log/test/log_test.ml +++ b/ocaml/libs/log/test/log_test.ml @@ -6,12 +6,16 @@ let a = [||] let buggy () = a.(1) <- 0 +let with_lock mutex f = + let finally () = Mutex.unlock mutex in + Mutex.lock mutex ; Fun.protect ~finally f + let () = Printexc.record_backtrace true ; Debug.log_to_stdout () ; () |> Debug.with_thread_associated "main" @@ fun () -> - try Xapi_stdext_threads.Threadext.Mutex.execute m buggy + try with_lock m buggy with e -> D.log_backtrace e ; D.warn "Got exception: %s" (Printexc.to_string e) diff --git a/ocaml/libs/log/test/log_test.t b/ocaml/libs/log/test/log_test.t index b51ea26fca0..20d41233f8a 100644 --- a/ocaml/libs/log/test/log_test.t +++ b/ocaml/libs/log/test/log_test.t @@ -3,7 +3,7 @@ [|error||0 |main|backtrace] 1/4 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 [|error||0 |main|backtrace] 2/4 log_test.exe Called from file fun.ml, line 33 [|error||0 |main|backtrace] 3/4 log_test.exe Called from file fun.ml, line 38 - [|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 14 + [|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 18 [|error||0 |main|backtrace] [| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") diff --git a/opam/xapi-log.opam b/opam/xapi-log.opam index b811d1f7f9e..12840be135b 100644 --- a/opam/xapi-log.opam +++ b/opam/xapi-log.opam @@ -16,7 +16,6 @@ depends: [ "mtime" "xapi-backtrace" "xapi-stdext-pervasives" {= version} - "xapi-stdext-threads" {with-test & = version} "odoc" {with-doc} ] build: [ From 87f82e7d0a645ccb4390861a08632bfdc58bf30f Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Fri, 13 Jun 2025 11:30:59 +0100 Subject: [PATCH 294/492] datamodel_lifecycle: automatic update Signed-off-by: Gabriel Buica --- ocaml/idl/datamodel_lifecycle.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index fc9acec7bd1..bbab96b8f0f 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -246,7 +246,7 @@ let prototyped_of_message = function | "VM", "restart_device_models" -> Some "23.30.0" | "VM", "call_host_plugin" -> - Some "25.21.0-next" + Some "25.22.0" | "VM", "set_groups" -> Some "24.19.1" | "pool", "set_console_idle_timeout" -> From d32c7cea3903a9223fd90bbd0fdd140b1143e4c8 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Thu, 12 Jun 2025 11:02:53 +0800 Subject: [PATCH 295/492] CA-412146 Filter out VF when scan SR-IOV (Single Root I/O Virtualization) is a technology that allows a single physical PCI Express (PCIe) device, such as a network adapter, to be shared efficiently among multiple virtual machines (VMs) or containers. It achieves this by creating Virtual Functions (VFs) that act as lightweight PCIe functions, each assigned to a VM, while the Physical Function (PF) remains responsible for managing the device. Add check in Sysfs.is_physical - check if there is "physfn" in the device dir to filter out VF, then XAPI will not create PIF object for VF during scan. Signed-off-by: Changlei Li --- ocaml/networkd/lib/network_utils.ml | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 846c517c82e..2c3cdab9fb8 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -181,18 +181,29 @@ module Sysfs = struct close_out outchan ; raise (Network_error (Write_error file)) - let is_physical name = + exception Unable_to_read_driver_link + + let is_vif name = + let devpath = getpath name "device" in try - let devpath = getpath name "device" in let driver_link = Unix.readlink (devpath ^ "/driver") in (* filter out symlinks under device/driver which look like /../../../devices/xen-backend/vif- *) - not - (List.mem "xen-backend" - (Astring.String.cuts ~empty:false ~sep:"/" driver_link) - ) + List.mem "xen-backend" + (Astring.String.cuts ~empty:false ~sep:"/" driver_link) + with _ -> raise Unable_to_read_driver_link + + let is_vf name = + let devpath = getpath name "device" in + try + ignore @@ Unix.readlink (devpath ^ "/physfn") ; + true with _ -> false + let is_physical name = + try not (is_vif name || is_vf name) + with Unable_to_read_driver_link -> false + (* device types are defined in linux/if_arp.h *) let is_ether_device name = match int_of_string (read_one_line (getpath name "type")) with @@ -1547,7 +1558,7 @@ module Ovs = struct let vif_arg = let existing_vifs = List.filter - (fun iface -> not (Sysfs.is_physical iface)) + (fun iface -> try Sysfs.is_vif iface with _ -> false) (bridge_to_interfaces name) in let ifaces_with_type = From 61374b31d0acb6d349dc73d9c1167d6ae82818fe Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 16 Jun 2025 03:29:06 +0100 Subject: [PATCH 296/492] Update datamodel_host Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_host.ml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index f0bce099389..2be03f90993 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1304,7 +1304,7 @@ let create_params = param_type= Bool ; param_name= "ssh_enabled" ; param_doc= "True if SSH access is enabled for the host" - ; param_release= numbered_release "25.20.0-next" + ; param_release= numbered_release "25.21.0" ; param_default= Some (VBool Constants.default_ssh_enabled) } ; { @@ -1314,7 +1314,7 @@ let create_params = "The timeout in seconds after which SSH access will be automatically \ disabled (0 means never), this setting will be applied every time the \ SSH is enabled by XAPI" - ; param_release= numbered_release "25.20.0-next" + ; param_release= numbered_release "25.21.0" ; param_default= Some (VInt Constants.default_ssh_enabled_timeout) } ; { @@ -1323,7 +1323,7 @@ let create_params = ; param_doc= "The time in UTC after which the SSH access will be automatically \ disabled" - ; param_release= numbered_release "25.20.0-next" + ; param_release= numbered_release "25.21.0" ; param_default= Some (VDateTime Date.epoch) } ; { @@ -1332,7 +1332,7 @@ let create_params = ; param_doc= "The timeout in seconds after which idle console will be automatically \ terminated (0 means never)" - ; param_release= numbered_release "25.20.0-next" + ; param_release= numbered_release "25.21.0" ; param_default= Some (VInt Constants.default_console_idle_timeout) } ] @@ -1348,7 +1348,7 @@ let create = kept for host joined a pool" ) ; ( Changed - , "25.20.0-next" + , "25.21.0" , "Added --ssh_enabled --ssh_enabled_timeout --ssh_expiry \ --console_idle_timeout options to allow them to be configured for \ new host" From 62b473381f57f12f8c197d64d344d481c4a662b8 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 16 Jun 2025 03:34:56 +0100 Subject: [PATCH 297/492] Update XE_SR_ERRORCODES from SM Signed-off-by: Bengang Yuan --- ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml b/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml index 47fefd83086..792fe17fcd7 100644 --- a/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml +++ b/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml @@ -513,6 +513,12 @@ 117 + + PVMultiIDs + PVs found with multiple SCSI IDs + 119 + + APISession From a4602b5a58d063c130aef3fbf8b2e76e4a53f3e5 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 18 Jun 2025 15:42:30 +0100 Subject: [PATCH 298/492] CP-308253: `Task.destroy` spans should no longer be orphaned Simplifies the logic of `exec_with_context` by letting the caller decide when the task is destroyed from the database. Adds helper function in `context.ml` to destroy and trace the destroy op correctly. Signed-off-by: Gabriel Buica --- ocaml/xapi/context.ml | 8 ++++ ocaml/xapi/context.mli | 2 + ocaml/xapi/server_helpers.ml | 73 ++++++++++++++++++------------------ 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index b71ed4ca234..e57c3c71eca 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -504,6 +504,14 @@ let get_client_ip context = let get_user_agent context = match context.origin with Internal -> None | Http (rq, _) -> rq.user_agent +let finally_destroy_context ~__context f = + let tracing = __context.tracing in + Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> + __context.tracing <- tracing ; + destroy __context ; + __context.tracing <- None + ) + let with_tracing ?originator ~__context name f = let open Tracing in let parent = __context.tracing in diff --git a/ocaml/xapi/context.mli b/ocaml/xapi/context.mli index 34e51afd2ee..61d307e6476 100644 --- a/ocaml/xapi/context.mli +++ b/ocaml/xapi/context.mli @@ -146,6 +146,8 @@ val complete_tracing : ?error:exn * Printexc.raw_backtrace -> t -> unit val tracing_of : t -> Tracing.Span.t option +val finally_destroy_context : __context:t -> (unit -> 'a) -> 'a + val with_tracing : ?originator:string -> __context:t -> string -> (t -> 'a) -> 'a diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 04aae674472..425fef29036 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -53,9 +53,10 @@ let parameter_count_mismatch_failure func expected received = API.response_of_failure Api_errors.message_parameter_count_mismatch [func; expected; received] -(** WARNING: the context is destroyed when execution is finished if the task is not forwarded, in database and not called asynchronous. *) -let exec_with_context ~__context ~need_complete ?marshaller ?f_forward - ?(called_async = false) ?quiet f = +(** WARNING: DOES NOT DESTROY the context when execution is finished. The + caller must destroy it *) +let exec_with_context ~__context ~need_complete ?marshaller ?f_forward ?quiet f + = (* Execute fn f in specified __context, marshalling result with "marshaller" *) let exec () = (* NB: @@ -95,23 +96,15 @@ let exec_with_context ~__context ~need_complete ?marshaller ?f_forward if need_complete then TaskHelper.failed ~__context e ; raise e in - Locking_helpers.Thread_state.with_named_thread - (TaskHelper.get_name ~__context) (Context.get_task_id __context) (fun () -> - let client = Context.get_client __context in - Debug.with_thread_associated ?client ?quiet - (Context.string_of_task __context) - (fun () -> - (* CP-982: promote tracking debug line to info status *) - if called_async then - info "spawning a new thread to handle the current task%s" - (Context.trackid ~with_brackets:true ~prefix:" " __context) ; - Xapi_stdext_pervasives.Pervasiveext.finally exec (fun () -> - if not called_async then Context.destroy __context - (* else debug "nothing more to process for this thread" *) - ) - ) - () - ) + let@ () = + Locking_helpers.Thread_state.with_named_thread + (TaskHelper.get_name ~__context) + (Context.get_task_id __context) + in + let client = Context.get_client __context in + Debug.with_thread_associated ?client ?quiet + (Context.string_of_task __context) + exec () let dispatch_exn_wrapper f = try f () @@ -168,18 +161,22 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name let sync () = let need_complete = not (Context.forwarded_task __context) in - exec_with_context ~__context ~need_complete ~called_async - ?f_forward:forward_op ~marshaller op_fn + let@ () = Context.finally_destroy_context ~__context in + exec_with_context ~__context ~need_complete ?f_forward:forward_op + ~marshaller op_fn |> marshaller |> Rpc.success in + let async ~need_complete = (* Fork thread in which to execute async call *) + info "spawning a new thread to handle the current task%s" + (Context.trackid ~with_brackets:true ~prefix:" " __context) ; ignore (Thread.create (fun () -> - exec_with_context ~__context ~need_complete ~called_async - ?f_forward:forward_op ~marshaller op_fn + exec_with_context ~__context ~need_complete ?f_forward:forward_op + ~marshaller op_fn ) () ) ; @@ -200,26 +197,30 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name (* in the following functions, it is our responsibility to complete any tasks we create *) let exec_with_new_task ?http_other_config ?quiet ?subtask_of ?session_id ?task_in_database ?task_description ?origin task_name f = - exec_with_context ?quiet - ~__context: - (Context.make ?http_other_config ?quiet ?subtask_of ?session_id - ?task_in_database ?task_description ?origin task_name - ) ~need_complete:true (fun ~__context -> f __context + let __context = + Context.make ?http_other_config ?quiet ?subtask_of ?session_id + ?task_in_database ?task_description ?origin task_name + in + let@ () = Context.finally_destroy_context ~__context in + exec_with_context ?quiet ~__context ~need_complete:true (fun ~__context -> + f __context ) let exec_with_forwarded_task ?http_other_config ?session_id ?origin task_id f = - exec_with_context - ~__context: - (Context.from_forwarded_task ?http_other_config ?session_id ?origin - task_id - ) ~need_complete:true (fun ~__context -> f __context + let __context = + Context.from_forwarded_task ?http_other_config ?session_id ?origin task_id + in + let@ () = Context.finally_destroy_context ~__context in + exec_with_context ~__context ~need_complete:true (fun ~__context -> + f __context ) let exec_with_subtask ~__context ?task_in_database task_name f = - let subcontext = + let __context = Context.make_subcontext ~__context ?task_in_database task_name in - exec_with_context ~__context:subcontext ~need_complete:true f + let@ () = Context.finally_destroy_context ~__context in + exec_with_context ~__context ~need_complete:true f let forward_extension ~__context rbac call = rbac __context (fun () -> Xapi_extensions.call_extension call) From 63eef6fdf06d010b12fc57b75d7aa34d6f8f1709 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 18 Jun 2025 17:08:22 +0100 Subject: [PATCH 299/492] CP-308392: Create specialized functions Signed-off-by: Gabriel Buica --- ocaml/xapi/context.ml | 24 +++++++++++++++++++++-- ocaml/xapi/context.mli | 38 +++++++++++++++++++++++++++++++++++- ocaml/xapi/server_helpers.ml | 19 ++++++++---------- 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index e57c3c71eca..f03ce60e2a0 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -506,11 +506,31 @@ let get_user_agent context = let finally_destroy_context ~__context f = let tracing = __context.tracing in - Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> f __context) + (fun () -> __context.tracing <- tracing ; destroy __context ; __context.tracing <- None - ) + ) + +let with_context ?http_other_config ?quiet ?subtask_of ?session_id ?database + ?task_in_database ?task_description ?origin task_name f = + let __context = + make ?http_other_config ?quiet ?subtask_of ?session_id ?database + ?task_in_database ?task_description ?origin task_name + in + finally_destroy_context ~__context f + +let with_subcontext ~__context ?task_in_database task_name f = + let __context = make_subcontext ~__context ?task_in_database task_name in + finally_destroy_context ~__context f + +let with_forwarded_task ?http_other_config ?session_id ?origin task_id f = + let __context = + from_forwarded_task ?http_other_config ?session_id ?origin task_id + in + finally_destroy_context ~__context f let with_tracing ?originator ~__context name f = let open Tracing in diff --git a/ocaml/xapi/context.mli b/ocaml/xapi/context.mli index 61d307e6476..281f67ca4b2 100644 --- a/ocaml/xapi/context.mli +++ b/ocaml/xapi/context.mli @@ -146,7 +146,43 @@ val complete_tracing : ?error:exn * Printexc.raw_backtrace -> t -> unit val tracing_of : t -> Tracing.Span.t option -val finally_destroy_context : __context:t -> (unit -> 'a) -> 'a +val finally_destroy_context : __context:t -> (t -> 'a) -> 'a +(** [finally_destroy_context ~context f] executes [f ~__context] and then + ensure [__context] is destroyed.*) + +val with_context : + ?http_other_config:(string * string) list + -> ?quiet:bool + -> ?subtask_of:API.ref_task + -> ?session_id:API.ref_session + -> ?database:Xapi_database.Db_ref.t + -> ?task_in_database:bool + -> ?task_description:string + -> ?origin:origin + -> string + -> (t -> 'a) + -> 'a +(** [with_context ?http_other_config ?quiet ?subtask_of ?session_id ?database + ?task_in_database ?task_description ?origin name f] creates a + context [__context], executes [f ~__context] and then ensure [__context] is + destroyed.*) + +val with_subcontext : + __context:t -> ?task_in_database:bool -> string -> (t -> 'a) -> 'a +(** [with_subcontext ~__context ?task_in_database name] creates a subcontext + [__context], executes [f ~__context] and then ensure `__context` is + destroyed.*) + +val with_forwarded_task : + ?http_other_config:(string * string) list + -> ?session_id:API.ref_session + -> ?origin:origin + -> API.ref_task + -> (t -> 'a) + -> 'a +(** [with_forwarded_task ?http_other_config ?session_id ?origin task f] + creates a context form frowarded task [task], executes [f ~__context] and + then ensure [__context] is destroyed.*) val with_tracing : ?originator:string -> __context:t -> string -> (t -> 'a) -> 'a diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 425fef29036..48789c455aa 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -161,7 +161,7 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name let sync () = let need_complete = not (Context.forwarded_task __context) in - let@ () = Context.finally_destroy_context ~__context in + let@ __context = Context.finally_destroy_context ~__context in exec_with_context ~__context ~need_complete ?f_forward:forward_op ~marshaller op_fn |> marshaller @@ -197,29 +197,26 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name (* in the following functions, it is our responsibility to complete any tasks we create *) let exec_with_new_task ?http_other_config ?quiet ?subtask_of ?session_id ?task_in_database ?task_description ?origin task_name f = - let __context = - Context.make ?http_other_config ?quiet ?subtask_of ?session_id + let@ __context = + Context.with_context ?http_other_config ?quiet ?subtask_of ?session_id ?task_in_database ?task_description ?origin task_name in - let@ () = Context.finally_destroy_context ~__context in - exec_with_context ?quiet ~__context ~need_complete:true (fun ~__context -> + exec_with_context ~__context ~need_complete:true (fun ~__context -> f __context ) let exec_with_forwarded_task ?http_other_config ?session_id ?origin task_id f = - let __context = - Context.from_forwarded_task ?http_other_config ?session_id ?origin task_id + let@ __context = + Context.with_forwarded_task ?http_other_config ?session_id ?origin task_id in - let@ () = Context.finally_destroy_context ~__context in exec_with_context ~__context ~need_complete:true (fun ~__context -> f __context ) let exec_with_subtask ~__context ?task_in_database task_name f = - let __context = - Context.make_subcontext ~__context ?task_in_database task_name + let@ __context = + Context.with_subcontext ~__context ?task_in_database task_name in - let@ () = Context.finally_destroy_context ~__context in exec_with_context ~__context ~need_complete:true f let forward_extension ~__context rbac call = From 3b76902a4cf02b662f262a14f3753f561fe6c82d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 19 Jun 2025 08:19:18 +0100 Subject: [PATCH 300/492] xapi-idl: Clean up xenops-related interfaces Dynamic.t is not used currently, just drop it. Signed-off-by: Andrii Sultanov --- ocaml/xapi-idl/xen/xenops_interface.ml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 39299a41f93..9b3f2941910 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -444,16 +444,6 @@ module Dynamic = struct type barrier = int * id list [@@deriving rpcty] - type t = - | Vm_t of Vm.id * (Vm.t * Vm.state) option - | Vbd_t of Vbd.id * (Vbd.t * Vbd.state) option - | Vif_t of Vif.id * (Vif.t * Vif.state) option - | Pci_t of Pci.id * (Pci.t * Pci.state) option - | Vgpu_t of Vgpu.id * (Vgpu.t * Vgpu.state) option - | Vusb_t of Vusb.id * (Vusb.t * Vusb.state) option - | Task_t of Task.id * Task.t option - [@@deriving rpcty] - let rpc_of_id = Rpcmarshal.marshal id.Rpc.Types.ty end From c568697aaa9e3ea2774a2a915b87f197eaaa0b60 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 18 Jun 2025 15:50:07 +0100 Subject: [PATCH 301/492] xapi_xenops: Remove unnecessary Helpers.get_localhost call There's already a 'localhost' in scope. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_xenops.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index e9f5174f90b..54e824188ab 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2124,7 +2124,6 @@ let update_vm ~__context id = debug "xenopsd event: Updating VM %s consoles" id ; Option.iter (fun (_, state) -> - let localhost = Helpers.get_localhost ~__context in let address = Db.Host.get_address ~__context ~self:localhost in From fa2810548faac8638cd3da5b1f59d377826ad753 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 18 Jun 2025 15:24:44 +0100 Subject: [PATCH 302/492] xapi_xenops: Split update_vm internals into a separate function No functional change, this just removes several indentation levels from the 500+ lines of the function, making it easier to refactor in the future. This also clears up the logic of the function, now that two arms of if-else and try-with clauses are not 500+ lines apart, and avoids splitting some expressions and strings over several lines given that they reach the line character limit more often inside of several levels of indentation. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_xenops.ml | 1110 ++++++++++++++++++------------------- 1 file changed, 542 insertions(+), 568 deletions(-) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 54e824188ab..dbefffb1571 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -1852,591 +1852,565 @@ module Events_from_xenopsd = struct ) end -let update_vm ~__context id = - try - if Events_from_xenopsd.are_suppressed id then - debug "xenopsd event: ignoring event for VM (VM %s migrating away)" id - else - let self = Db.VM.get_by_uuid ~__context ~uuid:id in - let localhost = Helpers.get_localhost ~__context in - if Db.VM.get_resident_on ~__context ~self = localhost then - let previous = Xenops_cache.find_vm id in - let dbg = Context.string_of_task_and_tracing __context in - let module Client = - (val make_client (queue_of_vm ~__context ~self) : XENOPS) - in - let info = try Some (Client.VM.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( - debug "xenopsd event: processing event for VM %s" id ; - if info = None then - debug "xenopsd event: VM state missing: assuming VM has shut down" ; - let should_update_allowed_operations = ref false in - let different f = - let a = Option.map (fun x -> f (snd x)) info in - let b = Option.map f previous in - a <> b - in - (* Helpers to create and update guest metrics when needed *) - let lookup state key = List.assoc_opt key state.Vm.guest_agent in - let list state dir = - let dir = - if dir.[0] = '/' then - String.sub dir 1 (String.length dir - 1) - else - dir - in - let results = - List.filter_map - (fun (path, _) -> - if String.starts_with ~prefix:dir path then - let rest = - String.sub path (String.length dir) - (String.length path - String.length dir) - in - match - List.filter (fun x -> x <> "") (String.split '/' rest) - with - | x :: _ -> - Some x - | _ -> - None - else - None - ) - state.Vm.guest_agent - |> Listext.setify +let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = + debug "xenopsd event: processing event for VM %s" id ; + if info = None then + debug "xenopsd event: VM state missing: assuming VM has shut down" ; + let should_update_allowed_operations = ref false in + let different f = + let a = Option.map (fun x -> f (snd x)) info in + let b = Option.map f previous in + a <> b + in + (* Helpers to create and update guest metrics when needed *) + let lookup state key = List.assoc_opt key state.Vm.guest_agent in + let list state dir = + let dir = + if dir.[0] = '/' then + String.sub dir 1 (String.length dir - 1) + else + dir + in + let results = + List.filter_map + (fun (path, _) -> + if String.starts_with ~prefix:dir path then + let rest = + String.sub path (String.length dir) + (String.length path - String.length dir) in - results - in - let create_guest_metrics_if_needed () = - let gm = Db.VM.get_guest_metrics ~__context ~self in - if gm = Ref.null then - Option.iter - (fun (_, state) -> - List.iter - (fun domid -> - try - let new_gm_ref = - Xapi_guest_agent.create_and_set_guest_metrics - (lookup state) (list state) ~__context ~domid - ~uuid:id - ~pV_drivers_detected:state.pv_drivers_detected - in - debug - "xenopsd event: created guest metrics %s for VM %s" - (Ref.string_of new_gm_ref) id - with e -> - error "Caught %s: while creating VM %s guest metrics" - (Printexc.to_string e) id - ) - state.domids - ) - info - in - let check_guest_agent () = - Option.iter - (fun (_, state) -> - Option.iter - (fun oldstate -> - let old_ga = oldstate.Vm.guest_agent in - let new_ga = state.Vm.guest_agent in - (* Remove memory keys *) - let ignored_keys = - ["data/meminfo_free"; "data/updated"; "data/update_cnt"] - in - let remove_ignored ga = - List.fold_left - (fun acc k -> List.filter (fun x -> fst x <> k) acc) - ga ignored_keys - in - let old_ga = remove_ignored old_ga in - let new_ga = remove_ignored new_ga in - if new_ga <> old_ga then ( - debug - "Will update VM.allowed_operations because guest_agent \ - has changed." ; - should_update_allowed_operations := true - ) else - debug - "Supressing VM.allowed_operations update because \ - guest_agent data is largely the same" - ) - previous ; - List.iter - (fun domid -> - try - debug "xenopsd event: Updating VM %s domid %d guest_agent" - id domid ; - Xapi_guest_agent.all (lookup state) (list state) - ~__context ~domid ~uuid:id - ~pV_drivers_detected:state.pv_drivers_detected - with e -> - error "Caught %s: while updating VM %s guest_agent" - (Printexc.to_string e) id - ) - state.domids - ) - info - in - (* Notes on error handling: if something fails we log and continue, to - maximise the amount of state which is correctly synced. If something - does fail then we may end up permanently out-of-sync until either a - process restart or an event is generated. We may wish to periodically - inject artificial events IF there has been an event sync failure? *) - let power_state = - xenapi_of_xenops_power_state - (Option.map (fun x -> (snd x).Vm.power_state) info) - in - let power_state_before_update = - Db.VM.get_power_state ~__context ~self - in - (* We preserve the current_domain_type of suspended VMs like we preserve - the currently_attached fields for VBDs/VIFs etc - it's important to know - whether suspended VMs are going to resume into PV or PVinPVH for example. - We do this before updating the power_state to maintain the invariant that - any VM that's not `Halted cannot have an unspecified current_domain_type *) - if different (fun x -> x.domain_type) && power_state <> `Suspended - then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - let update domain_type = - debug - "xenopsd event: Updating VM %s current_domain_type <- %s" id - (Record_util.domain_type_to_string domain_type) ; - Db.VM_metrics.set_current_domain_type ~__context ~self:metrics - ~value:domain_type - in - match state.Vm.domain_type with - | Domain_HVM -> - update `hvm - | Domain_PV -> - update `pv - | Domain_PVinPVH -> - update `pv_in_pvh - | Domain_PVH -> - update `pvh - | Domain_undefined -> - if power_state <> `Halted then - debug - "xenopsd returned an undefined domain type for \ - non-halted VM %s;assuming this is transient, so not \ - updating current_domain_type" - id - else - update `unspecified - ) - info ; - ( if different (fun x -> x.power_state) then - try - debug - "Will update VM.allowed_operations because power_state has \ - changed." ; - should_update_allowed_operations := true ; - (* Update ha_always_run before the power_state (if needed), to avoid racing - with the HA monitor thread. *) - let pool = Helpers.get_pool ~__context in - if - power_state = `Halted - && not - (Db.Pool.get_ha_reboot_vm_on_internal_shutdown ~__context - ~self:pool - ) - then ( - Db.VM.set_ha_always_run ~__context ~self ~value:false ; - debug "Setting ha_always_run on vm=%s as false after shutdown" - (Ref.string_of self) - ) ; - debug "xenopsd event: Updating VM %s power_state <- %s" id - (Record_util.vm_power_state_to_string power_state) ; - - (* NOTE: Pull xenopsd metadata as soon as possible so that - nothing comes inbetween the power state change and the - Xenopsd_metadata.pull and overwrites it. *) - ( if power_state = `Suspended then - let md = Xenopsd_metadata.pull ~__context id in - match md.Metadata.domains with - | None -> - error "Suspended VM has no domain-specific metadata" - | Some x -> - Db.VM.set_last_booted_record ~__context ~self ~value:x ; - debug "VM %s last_booted_record set to %s" - (Ref.string_of self) x - ) ; - - (* This will mark VBDs, VIFs as detached and clear resident_on - if the VM has permanently shutdown. current-operations - should not be reset as there maybe a checkpoint is ongoing*) - Xapi_vm_lifecycle.force_state_reset_keep_current_operations - ~__context ~self ~value:power_state ; - if power_state = `Running then - create_guest_metrics_if_needed () ; - if power_state = `Suspended || power_state = `Halted then ( - Xapi_network.detach_for_vm ~__context ~host:localhost ~vm:self ; - Storage_access.reset ~__context ~vm:self - ) ; - if power_state = `Halted then ( - Xenopsd_metadata.delete ~__context id ; - !trigger_xenapi_reregister () - ) - with e -> - error "Caught %s: while updating VM %s power_state" - (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.domids) then - try - debug - "Will update VM.allowed_operations because domid has changed." ; - should_update_allowed_operations := true ; - debug "xenopsd event: Updating VM %s domid" id ; - Option.iter - (fun (_, state) -> - match state.Vm.domids with - | value :: _ -> - Db.VM.set_domid ~__context ~self - ~value:(Int64.of_int value) - | [] -> - () - (* happens when the VM is shutdown *) - ) - info ; - (* If this is a storage domain, attempt to plug the PBD *) - Option.iter - (fun pbd -> - let (_ : Thread.t) = - Thread.create - (fun () -> - (* Don't block the database update thread *) - Xapi_pbd.plug ~__context ~self:pbd - ) - () - in - () - ) - (System_domains.pbd_of_vm ~__context ~vm:self) - with e -> - error "Caught %s: while updating VM %s domids" - (Printexc.to_string e) id - ) ; - (* consoles *) - ( if different (fun x -> x.consoles) then - try - debug "xenopsd event: Updating VM %s consoles" id ; - Option.iter - (fun (_, state) -> - let address = - Db.Host.get_address ~__context ~self:localhost - in - let uri = - Uri.( - make ~scheme:"https" ~host:address - ~path:Constants.console_uri () - |> to_string - ) - in - let get_uri_from_location loc = - try - let n = String.index loc '?' in - String.sub loc 0 n - with Not_found -> loc - in - let current_protocols = - List.map - (fun self -> - ( ( Db.Console.get_protocol ~__context ~self - |> to_xenops_console_protocol - , Db.Console.get_location ~__context ~self - |> get_uri_from_location - ) - , self - ) - ) - (Db.VM.get_consoles ~__context ~self) - in - let new_protocols = - List.map - (fun c -> ((c.Vm.protocol, uri), c)) - state.Vm.consoles - in - (* Destroy consoles that have gone away *) - List.iter - (fun protocol -> - let self = List.assoc protocol current_protocols in - Db.Console.destroy ~__context ~self - ) - (Listext.set_difference - (List.map fst current_protocols) - (List.map fst new_protocols) - ) ; - (* Create consoles that have appeared *) - List.iter - (fun (protocol, _) -> - let ref = Ref.make () in - let uuid = Uuidx.to_string (Uuidx.make ()) in - let location = Printf.sprintf "%s?uuid=%s" uri uuid in - let port = - try - Int64.of_int - (List.find - (fun c -> c.Vm.protocol = protocol) - state.Vm.consoles - ) - .port - with Not_found -> -1L - in - Db.Console.create ~__context ~ref ~uuid - ~protocol:(to_xenapi_console_protocol protocol) - ~location ~vM:self ~other_config:[] ~port - ) - (Listext.set_difference - (List.map fst new_protocols) - (List.map fst current_protocols) - ) - ) - info - with e -> - error "Caught %s: while updating VM %s consoles" - (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.memory_target) then + match List.filter (fun x -> x <> "") (String.split '/' rest) with + | x :: _ -> + Some x + | _ -> + None + else + None + ) + state.Vm.guest_agent + |> Listext.setify + in + results + in + let create_guest_metrics_if_needed () = + let gm = Db.VM.get_guest_metrics ~__context ~self in + if gm = Ref.null then + Option.iter + (fun (_, state) -> + List.iter + (fun domid -> try - Option.iter - (fun (_, state) -> - debug "xenopsd event: Updating VM %s memory_target <- %Ld" - id state.Vm.memory_target ; - Db.VM.set_memory_target ~__context ~self - ~value:state.memory_target - ) - info + let new_gm_ref = + Xapi_guest_agent.create_and_set_guest_metrics (lookup state) + (list state) ~__context ~domid ~uuid:id + ~pV_drivers_detected:state.pv_drivers_detected + in + debug "xenopsd event: created guest metrics %s for VM %s" + (Ref.string_of new_gm_ref) id with e -> - error "Caught %s: while updating VM %s consoles" + error "Caught %s: while creating VM %s guest metrics" (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.rtc_timeoffset) then + ) + state.domids + ) + info + in + let check_guest_agent () = + Option.iter + (fun (_, state) -> + Option.iter + (fun oldstate -> + let old_ga = oldstate.Vm.guest_agent in + let new_ga = state.Vm.guest_agent in + (* Remove memory keys *) + let ignored_keys = + ["data/meminfo_free"; "data/updated"; "data/update_cnt"] + in + let remove_ignored ga = + List.fold_left + (fun acc k -> List.filter (fun x -> fst x <> k) acc) + ga ignored_keys + in + let old_ga = remove_ignored old_ga in + let new_ga = remove_ignored new_ga in + if new_ga <> old_ga then ( + debug + "Will update VM.allowed_operations because guest_agent has \ + changed." ; + should_update_allowed_operations := true + ) else + debug + "Supressing VM.allowed_operations update because guest_agent \ + data is largely the same" + ) + previous ; + List.iter + (fun domid -> + try + debug "xenopsd event: Updating VM %s domid %d guest_agent" id + domid ; + Xapi_guest_agent.all (lookup state) (list state) ~__context ~domid + ~uuid:id ~pV_drivers_detected:state.pv_drivers_detected + with e -> + error "Caught %s: while updating VM %s guest_agent" + (Printexc.to_string e) id + ) + state.domids + ) + info + in + (* Notes on error handling: if something fails we log and continue, to + maximise the amount of state which is correctly synced. If something + does fail then we may end up permanently out-of-sync until either a + process restart or an event is generated. We may wish to periodically + inject artificial events IF there has been an event sync failure? *) + let power_state = + xenapi_of_xenops_power_state + (Option.map (fun x -> (snd x).Vm.power_state) info) + in + let power_state_before_update = Db.VM.get_power_state ~__context ~self in + (* We preserve the current_domain_type of suspended VMs like we preserve + the currently_attached fields for VBDs/VIFs etc - it's important to know + whether suspended VMs are going to resume into PV or PVinPVH for example. + We do this before updating the power_state to maintain the invariant that + any VM that's not `Halted cannot have an unspecified current_domain_type *) + if different (fun x -> x.domain_type) && power_state <> `Suspended then + Option.iter + (fun (_, state) -> + let metrics = Db.VM.get_metrics ~__context ~self in + let update domain_type = + debug "xenopsd event: Updating VM %s current_domain_type <- %s" id + (Record_util.domain_type_to_string domain_type) ; + Db.VM_metrics.set_current_domain_type ~__context ~self:metrics + ~value:domain_type + in + match state.Vm.domain_type with + | Domain_HVM -> + update `hvm + | Domain_PV -> + update `pv + | Domain_PVinPVH -> + update `pv_in_pvh + | Domain_PVH -> + update `pvh + | Domain_undefined -> + if power_state <> `Halted then + debug + "xenopsd returned an undefined domain type for non-halted VM \ + %s;assuming this is transient, so not updating \ + current_domain_type" + id + else + update `unspecified + ) + info ; + ( if different (fun x -> x.power_state) then + try + debug + "Will update VM.allowed_operations because power_state has changed." ; + should_update_allowed_operations := true ; + (* Update ha_always_run before the power_state (if needed), to avoid racing + with the HA monitor thread. *) + let pool = Helpers.get_pool ~__context in + if + power_state = `Halted + && not + (Db.Pool.get_ha_reboot_vm_on_internal_shutdown ~__context + ~self:pool + ) + then ( + Db.VM.set_ha_always_run ~__context ~self ~value:false ; + debug "Setting ha_always_run on vm=%s as false after shutdown" + (Ref.string_of self) + ) ; + debug "xenopsd event: Updating VM %s power_state <- %s" id + (Record_util.vm_power_state_to_string power_state) ; + + (* NOTE: Pull xenopsd metadata as soon as possible so that + nothing comes inbetween the power state change and the + Xenopsd_metadata.pull and overwrites it. *) + ( if power_state = `Suspended then + let md = Xenopsd_metadata.pull ~__context id in + match md.Metadata.domains with + | None -> + error "Suspended VM has no domain-specific metadata" + | Some x -> + Db.VM.set_last_booted_record ~__context ~self ~value:x ; + debug "VM %s last_booted_record set to %s" (Ref.string_of self) + x + ) ; + + (* This will mark VBDs, VIFs as detached and clear resident_on + if the VM has permanently shutdown. current-operations + should not be reset as there maybe a checkpoint is ongoing*) + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self ~value:power_state ; + if power_state = `Running then + create_guest_metrics_if_needed () ; + if power_state = `Suspended || power_state = `Halted then ( + Xapi_network.detach_for_vm ~__context ~host:localhost ~vm:self ; + Storage_access.reset ~__context ~vm:self + ) ; + if power_state = `Halted then ( + Xenopsd_metadata.delete ~__context id ; + !trigger_xenapi_reregister () + ) + with e -> + error "Caught %s: while updating VM %s power_state" + (Printexc.to_string e) id + ) ; + ( if different (fun x -> x.domids) then + try + debug "Will update VM.allowed_operations because domid has changed." ; + should_update_allowed_operations := true ; + debug "xenopsd event: Updating VM %s domid" id ; + Option.iter + (fun (_, state) -> + match state.Vm.domids with + | value :: _ -> + Db.VM.set_domid ~__context ~self ~value:(Int64.of_int value) + | [] -> + () + (* happens when the VM is shutdown *) + ) + info ; + (* If this is a storage domain, attempt to plug the PBD *) + Option.iter + (fun pbd -> + let (_ : Thread.t) = + Thread.create + (fun () -> + (* Don't block the database update thread *) + Xapi_pbd.plug ~__context ~self:pbd + ) + () + in + () + ) + (System_domains.pbd_of_vm ~__context ~vm:self) + with e -> + error "Caught %s: while updating VM %s domids" (Printexc.to_string e) id + ) ; + (* consoles *) + ( if different (fun x -> x.consoles) then + try + debug "xenopsd event: Updating VM %s consoles" id ; + Option.iter + (fun (_, state) -> + let address = Db.Host.get_address ~__context ~self:localhost in + let uri = + Uri.( + make ~scheme:"https" ~host:address ~path:Constants.console_uri + () + |> to_string + ) + in + let get_uri_from_location loc = try - Option.iter - (fun (_, state) -> - if state.Vm.rtc_timeoffset <> "" then ( - debug - "xenopsd event: Updating VM %s platform:timeoffset <- \ - %s" - id state.rtc_timeoffset ; - ( try - Db.VM.remove_from_platform ~__context ~self - ~key:Vm_platform.timeoffset - with _ -> () - ) ; - Db.VM.add_to_platform ~__context ~self - ~key:Vm_platform.timeoffset ~value:state.rtc_timeoffset + let n = String.index loc '?' in + String.sub loc 0 n + with Not_found -> loc + in + let current_protocols = + List.map + (fun self -> + ( ( Db.Console.get_protocol ~__context ~self + |> to_xenops_console_protocol + , Db.Console.get_location ~__context ~self + |> get_uri_from_location ) + , self ) - info - with e -> - error "Caught %s: while updating VM %s rtc/timeoffset" - (Printexc.to_string e) id - ) ; - if different (fun x -> x.hvm) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s hvm <- %s" id - (string_of_bool state.Vm.hvm) ; - Db.VM_metrics.set_hvm ~__context ~self:metrics - ~value:state.Vm.hvm - ) - info ; - if different (fun x -> x.nomigrate) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s nomigrate <- %s" id - (string_of_bool state.Vm.nomigrate) ; - Db.VM_metrics.set_nomigrate ~__context ~self:metrics - ~value:state.Vm.nomigrate + ) + (Db.VM.get_consoles ~__context ~self) + in + let new_protocols = + List.map (fun c -> ((c.Vm.protocol, uri), c)) state.Vm.consoles + in + (* Destroy consoles that have gone away *) + List.iter + (fun protocol -> + let self = List.assoc protocol current_protocols in + Db.Console.destroy ~__context ~self ) - info ; - if different (fun x -> x.nested_virt) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s nested_virt <- %s" id - (string_of_bool state.Vm.nested_virt) ; - Db.VM_metrics.set_nested_virt ~__context ~self:metrics - ~value:state.Vm.nested_virt + (Listext.set_difference + (List.map fst current_protocols) + (List.map fst new_protocols) + ) ; + (* Create consoles that have appeared *) + List.iter + (fun (protocol, _) -> + let ref = Ref.make () in + let uuid = Uuidx.to_string (Uuidx.make ()) in + let location = Printf.sprintf "%s?uuid=%s" uri uuid in + let port = + try + Int64.of_int + (List.find + (fun c -> c.Vm.protocol = protocol) + state.Vm.consoles + ) + .port + with Not_found -> -1L + in + Db.Console.create ~__context ~ref ~uuid + ~protocol:(to_xenapi_console_protocol protocol) + ~location ~vM:self ~other_config:[] ~port ) - info ; - let update_pv_drivers_detected () = - Option.iter - (fun (_, state) -> - try - let gm = Db.VM.get_guest_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s PV drivers detected %b" - id state.Vm.pv_drivers_detected ; - Db.VM_guest_metrics.set_PV_drivers_detected ~__context - ~self:gm ~value:state.Vm.pv_drivers_detected ; - Db.VM_guest_metrics.set_PV_drivers_up_to_date ~__context - ~self:gm ~value:state.Vm.pv_drivers_detected - with e -> - debug "Caught %s: while updating VM %s PV drivers" - (Printexc.to_string e) id + (Listext.set_difference + (List.map fst new_protocols) + (List.map fst current_protocols) ) - info - in - (* Chack last_start_time before updating anything in the guest metrics *) - ( if different (fun x -> x.last_start_time) then - try - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - (* Clamp time to full seconds, stored timestamps do not - have decimals *) - let start_time = - Float.floor state.Vm.last_start_time |> Date.of_unix_time - in - let expected_time = - Db.VM_metrics.get_start_time ~__context ~self:metrics - in - if Date.is_later ~than:expected_time start_time then ( - debug - "xenopsd event: Updating VM %s last_start_time <- %s" id - Date.(to_rfc3339 (of_unix_time state.Vm.last_start_time)) ; - Db.VM_metrics.set_start_time ~__context ~self:metrics - ~value:start_time ; - if - (* VM start and VM reboot *) - power_state = `Running - && power_state_before_update <> `Suspended - then ( - Xapi_vm_lifecycle.remove_pending_guidance ~__context - ~self ~value:`restart_device_model ; - Xapi_vm_lifecycle.remove_pending_guidance ~__context - ~self ~value:`restart_vm - ) - ) ; - create_guest_metrics_if_needed () ; - let gm = Db.VM.get_guest_metrics ~__context ~self in - let update_time = - Db.VM_guest_metrics.get_last_updated ~__context ~self:gm - in - if update_time < start_time then ( - debug - "VM %s guest metrics update time (%s) < VM start time \ - (%s): deleting" - id - (Date.to_rfc3339 update_time) - (Date.to_rfc3339 start_time) ; - Xapi_vm_helpers.delete_guest_metrics ~__context ~self ; - check_guest_agent () - ) - ) - info - with e -> - error "Caught %s: while updating VM %s last_start_time" - (Printexc.to_string e) id - ) ; - Option.iter - (fun (_, state) -> - List.iter - (fun domid -> - (* Guest metrics could have been destroyed during the last_start_time check - by recreating them, we avoid CA-223387 *) - create_guest_metrics_if_needed () ; - if different (fun x -> x.Vm.uncooperative_balloon_driver) then - debug - "xenopsd event: VM %s domid %d \ - uncooperative_balloon_driver = %b" - id domid state.Vm.uncooperative_balloon_driver ; - if different (fun x -> x.Vm.guest_agent) then - check_guest_agent () ; - if different (fun x -> x.Vm.pv_drivers_detected) then - update_pv_drivers_detected () ; - ( if different (fun x -> x.Vm.xsdata_state) then - try - debug "xenopsd event: Updating VM %s domid %d xsdata" id - domid ; - Db.VM.set_xenstore_data ~__context ~self - ~value:state.Vm.xsdata_state - with e -> - error "Caught %s: while updating VM %s xsdata" - (Printexc.to_string e) id - ) ; - if different (fun x -> x.Vm.memory_target) then - try - debug - "xenopsd event: Updating VM %s domid %d memory target" - id domid ; - Rrdd.update_vm_memory_target domid state.Vm.memory_target - with e -> - error "Caught %s: while updating VM %s memory_target" - (Printexc.to_string e) id - ) - state.Vm.domids + ) + info + with e -> + error "Caught %s: while updating VM %s consoles" (Printexc.to_string e) + id + ) ; + ( if different (fun x -> x.memory_target) then + try + Option.iter + (fun (_, state) -> + debug "xenopsd event: Updating VM %s memory_target <- %Ld" id + state.Vm.memory_target ; + Db.VM.set_memory_target ~__context ~self ~value:state.memory_target + ) + info + with e -> + error "Caught %s: while updating VM %s consoles" (Printexc.to_string e) + id + ) ; + ( if different (fun x -> x.rtc_timeoffset) then + try + Option.iter + (fun (_, state) -> + if state.Vm.rtc_timeoffset <> "" then ( + debug "xenopsd event: Updating VM %s platform:timeoffset <- %s" id + state.rtc_timeoffset ; + ( try + Db.VM.remove_from_platform ~__context ~self + ~key:Vm_platform.timeoffset + with _ -> () + ) ; + Db.VM.add_to_platform ~__context ~self ~key:Vm_platform.timeoffset + ~value:state.rtc_timeoffset ) - info ; - if different (fun x -> x.Vm.vcpu_target) then - Option.iter - (fun (_, state) -> - try - debug "xenopsd event: Updating VM %s vcpu_target <- %d" id - state.Vm.vcpu_target ; - let metrics = Db.VM.get_metrics ~__context ~self in - Db.VM_metrics.set_VCPUs_number ~__context ~self:metrics - ~value:(Int64.of_int state.Vm.vcpu_target) - with e -> - error "Caught %s: while updating VM %s VCPUs_number" - (Printexc.to_string e) id + ) + info + with e -> + error "Caught %s: while updating VM %s rtc/timeoffset" + (Printexc.to_string e) id + ) ; + if different (fun x -> x.hvm) then + Option.iter + (fun (_, state) -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s hvm <- %s" id + (string_of_bool state.Vm.hvm) ; + Db.VM_metrics.set_hvm ~__context ~self:metrics ~value:state.Vm.hvm + ) + info ; + if different (fun x -> x.nomigrate) then + Option.iter + (fun (_, state) -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s nomigrate <- %s" id + (string_of_bool state.Vm.nomigrate) ; + Db.VM_metrics.set_nomigrate ~__context ~self:metrics + ~value:state.Vm.nomigrate + ) + info ; + if different (fun x -> x.nested_virt) then + Option.iter + (fun (_, state) -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s nested_virt <- %s" id + (string_of_bool state.Vm.nested_virt) ; + Db.VM_metrics.set_nested_virt ~__context ~self:metrics + ~value:state.Vm.nested_virt + ) + info ; + let update_pv_drivers_detected () = + Option.iter + (fun (_, state) -> + try + let gm = Db.VM.get_guest_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s PV drivers detected %b" id + state.Vm.pv_drivers_detected ; + Db.VM_guest_metrics.set_PV_drivers_detected ~__context ~self:gm + ~value:state.Vm.pv_drivers_detected ; + Db.VM_guest_metrics.set_PV_drivers_up_to_date ~__context ~self:gm + ~value:state.Vm.pv_drivers_detected + with e -> + debug "Caught %s: while updating VM %s PV drivers" + (Printexc.to_string e) id + ) + info + in + (* Chack last_start_time before updating anything in the guest metrics *) + ( if different (fun x -> x.last_start_time) then + try + Option.iter + (fun (_, state) -> + let metrics = Db.VM.get_metrics ~__context ~self in + (* Clamp time to full seconds, stored timestamps do not + have decimals *) + let start_time = + Float.floor state.Vm.last_start_time |> Date.of_unix_time + in + let expected_time = + Db.VM_metrics.get_start_time ~__context ~self:metrics + in + if Date.is_later ~than:expected_time start_time then ( + debug "xenopsd event: Updating VM %s last_start_time <- %s" id + Date.(to_rfc3339 (of_unix_time state.Vm.last_start_time)) ; + Db.VM_metrics.set_start_time ~__context ~self:metrics + ~value:start_time ; + if + (* VM start and VM reboot *) + power_state = `Running + && power_state_before_update <> `Suspended + then ( + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_device_model ; + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_vm ) - info ; - ( if different (fun x -> x.shadow_multiplier_target) then + ) ; + create_guest_metrics_if_needed () ; + let gm = Db.VM.get_guest_metrics ~__context ~self in + let update_time = + Db.VM_guest_metrics.get_last_updated ~__context ~self:gm + in + if update_time < start_time then ( + debug + "VM %s guest metrics update time (%s) < VM start time (%s): \ + deleting" + id + (Date.to_rfc3339 update_time) + (Date.to_rfc3339 start_time) ; + Xapi_vm_helpers.delete_guest_metrics ~__context ~self ; + check_guest_agent () + ) + ) + info + with e -> + error "Caught %s: while updating VM %s last_start_time" + (Printexc.to_string e) id + ) ; + Option.iter + (fun (_, state) -> + List.iter + (fun domid -> + (* Guest metrics could have been destroyed during the last_start_time check + by recreating them, we avoid CA-223387 *) + create_guest_metrics_if_needed () ; + if different (fun x -> x.Vm.uncooperative_balloon_driver) then + debug + "xenopsd event: VM %s domid %d uncooperative_balloon_driver = %b" + id domid state.Vm.uncooperative_balloon_driver ; + if different (fun x -> x.Vm.guest_agent) then + check_guest_agent () ; + if different (fun x -> x.Vm.pv_drivers_detected) then + update_pv_drivers_detected () ; + ( if different (fun x -> x.Vm.xsdata_state) then try - Option.iter - (fun (_, state) -> - debug - "xenopsd event: Updating VM %s shadow_multiplier <- %.2f" - id state.Vm.shadow_multiplier_target ; - if - state.Vm.power_state <> Halted - && state.Vm.shadow_multiplier_target >= 0.0 - then - Db.VM.set_HVM_shadow_multiplier ~__context ~self - ~value:state.Vm.shadow_multiplier_target - ) - info + debug "xenopsd event: Updating VM %s domid %d xsdata" id domid ; + Db.VM.set_xenstore_data ~__context ~self + ~value:state.Vm.xsdata_state with e -> - error "Caught %s: while updating VM %s HVM_shadow_multiplier" + error "Caught %s: while updating VM %s xsdata" (Printexc.to_string e) id ) ; - (* Preserve last_boot_CPU_flags when suspending (see current_domain_type) *) - if different (fun x -> x.Vm.featureset) && power_state <> `Suspended - then - Option.iter - (fun (_, state) -> - try - debug - "xenopsd event: Updating VM %s last_boot_CPU_flags <- %s" id - state.Vm.featureset ; - let vendor = - Db.Host.get_cpu_info ~__context ~self:localhost - |> List.assoc Constants.cpu_info_vendor_key - in - let value = - [ - (Constants.cpu_info_vendor_key, vendor) - ; (Constants.cpu_info_features_key, state.Vm.featureset) - ] - in - Db.VM.set_last_boot_CPU_flags ~__context ~self ~value - with e -> - error "Caught %s: while updating VM %s last_boot_CPU_flags" - (Printexc.to_string e) id - ) - info ; - Xenops_cache.update_vm id (Option.map snd info) ; - if !should_update_allowed_operations then - Helpers.call_api_functions ~__context (fun rpc session_id -> - XenAPI.VM.update_allowed_operations ~rpc ~session_id ~self - ) + if different (fun x -> x.Vm.memory_target) then + try + debug "xenopsd event: Updating VM %s domid %d memory target" id + domid ; + Rrdd.update_vm_memory_target domid state.Vm.memory_target + with e -> + error "Caught %s: while updating VM %s memory_target" + (Printexc.to_string e) id ) + state.Vm.domids + ) + info ; + if different (fun x -> x.Vm.vcpu_target) then + Option.iter + (fun (_, state) -> + try + debug "xenopsd event: Updating VM %s vcpu_target <- %d" id + state.Vm.vcpu_target ; + let metrics = Db.VM.get_metrics ~__context ~self in + Db.VM_metrics.set_VCPUs_number ~__context ~self:metrics + ~value:(Int64.of_int state.Vm.vcpu_target) + with e -> + error "Caught %s: while updating VM %s VCPUs_number" + (Printexc.to_string e) id + ) + info ; + ( if different (fun x -> x.shadow_multiplier_target) then + try + Option.iter + (fun (_, state) -> + debug "xenopsd event: Updating VM %s shadow_multiplier <- %.2f" id + state.Vm.shadow_multiplier_target ; + if + state.Vm.power_state <> Halted + && state.Vm.shadow_multiplier_target >= 0.0 + then + Db.VM.set_HVM_shadow_multiplier ~__context ~self + ~value:state.Vm.shadow_multiplier_target + ) + info + with e -> + error "Caught %s: while updating VM %s HVM_shadow_multiplier" + (Printexc.to_string e) id + ) ; + (* Preserve last_boot_CPU_flags when suspending (see current_domain_type) *) + if different (fun x -> x.Vm.featureset) && power_state <> `Suspended then + Option.iter + (fun (_, state) -> + try + debug "xenopsd event: Updating VM %s last_boot_CPU_flags <- %s" id + state.Vm.featureset ; + let vendor = + Db.Host.get_cpu_info ~__context ~self:localhost + |> List.assoc Constants.cpu_info_vendor_key + in + let value = + [ + (Constants.cpu_info_vendor_key, vendor) + ; (Constants.cpu_info_features_key, state.Vm.featureset) + ] + in + Db.VM.set_last_boot_CPU_flags ~__context ~self ~value + with e -> + error "Caught %s: while updating VM %s last_boot_CPU_flags" + (Printexc.to_string e) id + ) + info ; + Xenops_cache.update_vm id (Option.map snd info) ; + if !should_update_allowed_operations then + Helpers.call_api_functions ~__context (fun rpc session_id -> + XenAPI.VM.update_allowed_operations ~rpc ~session_id ~self + ) + +let update_vm ~__context id = + try + if Events_from_xenopsd.are_suppressed id then + debug "xenopsd event: ignoring event for VM (VM %s migrating away)" id + else + let self = Db.VM.get_by_uuid ~__context ~uuid:id in + let localhost = Helpers.get_localhost ~__context in + if Db.VM.get_resident_on ~__context ~self = localhost then + let previous = Xenops_cache.find_vm id in + let dbg = Context.string_of_task_and_tracing __context in + let module Client = + (val make_client (queue_of_vm ~__context ~self) : XENOPS) + in + let info = try Some (Client.VM.stat dbg id) with _ -> None in + if Option.map snd info <> previous then + update_vm_internal ~__context ~id ~self ~previous ~info ~localhost with e -> error "xenopsd event: Caught %s while updating VM: has this VM been removed \ From 58e9def69a7ba8d7db045ccfff52001c79313c8d Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 20 Jun 2025 03:29:43 +0000 Subject: [PATCH 303/492] CA-408552: 1/3 Improve bootstrom performance by save db ops events_from_xenopsd thread is critical as it sync up VM status in case of bootstorm, this thread is flood as lots of events comes from xenopsd waiting for process. During processing of the events, VM/VDI/VBD update_allowed_operations will be called to refresh the allowed operations. However, for each ops (start/suspend,etc) for the same object(VM), the object info is always the same no matter what the ops is. Thus, it is not necessary to query the object information over and over again. Disclosure is used to resovle the issue. Query once and the disclosure will just remember the query result. The performance test for starting 500 VM on 4 hosts improve around 10% performance for both XS8 and XS9 This commit just introduce disclosure and caller call the disclosure instead of the original function Signed-off-by: Lin Liu --- ocaml/xapi/xapi_vdi.ml | 685 ++++++++++++++++---------------- ocaml/xapi/xapi_vm_lifecycle.ml | 7 +- 2 files changed, 347 insertions(+), 345 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 15dff1df4d8..9691f3831cd 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -63,7 +63,7 @@ let check_sm_feature_error (op : API.vdi_operations) sm_features sr = specified, it should contain at least all the VBD records from the database that are linked to this VDI. *) let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) - ?vbd_records ha_enabled record _ref' op = + ?vbd_records ha_enabled record _ref' = let ( let* ) = Result.bind in let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in @@ -83,360 +83,370 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) 5. HA prevents you from deleting statefiles or metadata volumes 6. During rolling pool upgrade, only operations known by older releases are allowed *) - let* () = - if - Helpers.rolling_upgrade_in_progress ~__context - && not - (Xapi_globs.Vdi_operations_set.mem op - Xapi_globs.rpu_allowed_vdi_operations - ) - then - Error (Api_errors.not_supported_during_upgrade, []) - else - Ok () - in - let* () = - (* Don't fail with other_operation_in_progress if VDI mirroring is in - progress and destroy is called as part of VDI mirroring *) - let is_vdi_mirroring_in_progress = - op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops + fun op -> + let* () = + if + Helpers.rolling_upgrade_in_progress ~__context + && not + (Xapi_globs.Vdi_operations_set.mem op + Xapi_globs.rpu_allowed_vdi_operations + ) + then + Error (Api_errors.not_supported_during_upgrade, []) + else + Ok () in - if - List.exists (fun (_, op) -> op <> `copy) current_ops - && not is_vdi_mirroring_in_progress - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () - in - (* check to see whether it's a local cd drive *) - let sr = record.Db_actions.vDI_SR in - let sr_type = Db.SR.get_type ~__context ~self:sr in - let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in - (* Check to see if any PBDs are attached *) - let open Xapi_database.Db_filter_types in - let pbds_attached = - match pbd_records with - | [] -> - Db.PBD.get_records_where ~__context - ~expr: - (And - ( Eq (Field "SR", Literal (Ref.string_of sr)) - , Eq (Field "currently_attached", Literal "true") - ) + let* () = + (* Don't fail with other_operation_in_progress if VDI mirroring is in + progress and destroy is called as part of VDI mirroring *) + let is_vdi_mirroring_in_progress = + op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops + in + if + List.exists (fun (_, op) -> op <> `copy) current_ops + && not is_vdi_mirroring_in_progress + then + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + else + Ok () + in + (* check to see whether it's a local cd drive *) + let sr = record.Db_actions.vDI_SR in + let sr_type = Db.SR.get_type ~__context ~self:sr in + let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in + (* Check to see if any PBDs are attached *) + let open Xapi_database.Db_filter_types in + let pbds_attached = + match pbd_records with + | [] -> + Db.PBD.get_records_where ~__context + ~expr: + (And + ( Eq (Field "SR", Literal (Ref.string_of sr)) + , Eq (Field "currently_attached", Literal "true") + ) + ) + | _ -> + List.filter + (fun (_, pbd_record) -> + pbd_record.API.pBD_SR = sr + && pbd_record.API.pBD_currently_attached ) - | _ -> - List.filter - (fun (_, pbd_record) -> - pbd_record.API.pBD_SR = sr && pbd_record.API.pBD_currently_attached - ) - pbd_records - in - let* () = - if pbds_attached = [] && op = `resize then - Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) - else - Ok () - in - - (* check to see whether VBDs exist which are using this VDI *) - - (* Only a 'live' operation can be performed if there are active (even RO) devices *) - let my_active_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Or - ( Eq (Field "currently_attached", Literal "true") - , Eq (Field "reserved", Literal "true") - ) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records - in - let my_active_rw_vbd_records = - List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records - in - (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) - let my_has_current_operation_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Not (Eq (Field "current_operations", Literal "()")) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records - in - (* If the VBD is currently_attached then some operations can still be - performed ie: VDI.clone (if the VM is suspended we have to have the - 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; - 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked - to a VM, but the implementation first waits for the VDI's VBDs to be - unplugged and destroyed, and the checks are performed there. - *) - let operation_can_be_performed_live = - match op with - | `snapshot - | `resize_online - | `blocked - | `clone - | `mirror - | `enable_cbt - | `disable_cbt - | `data_destroy -> - true - | _ -> - false - in - let operation_can_be_performed_with_ro_attach = - operation_can_be_performed_live - || match op with `copy -> true | _ -> false - in - (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) - let blocked_by_attach = - let blocked_by_attach = - if operation_can_be_performed_live then - false - else if operation_can_be_performed_with_ro_attach then - my_active_rw_vbd_records <> [] + pbd_records + in + let* () = + if pbds_attached = [] && op = `resize then + Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) else - my_active_vbd_records <> [] + Ok () in - let allow_attached_vbds = - (* We use Valid_ref_list.list to ignore exceptions due to invalid - references that could propagate to the message forwarding layer, which - calls this function to check for errors - these exceptions would - prevent the actual XenAPI function from being run. Checks called from - the message forwarding layer should not fail with an exception. *) - let true_for_all_active_vbds f = - Valid_ref_list.for_all f my_active_vbd_records - in + + (* check to see whether VBDs exist which are using this VDI *) + + (* Only a 'live' operation can be performed if there are active (even RO) devices *) + let my_active_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Or + ( Eq (Field "currently_attached", Literal "true") + , Eq (Field "reserved", Literal "true") + ) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) + ) + records + in + let my_active_rw_vbd_records = + List.filter + (fun vbd -> vbd.Db_actions.vBD_mode = `RW) + my_active_vbd_records + in + (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) + let my_has_current_operation_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Not (Eq (Field "current_operations", Literal "()")) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] + ) + records + in + (* If the VBD is currently_attached then some operations can still be + performed ie: VDI.clone (if the VM is suspended we have to have the + 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; + 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked + to a VM, but the implementation first waits for the VDI's VBDs to be + unplugged and destroyed, and the checks are performed there. + *) + let operation_can_be_performed_live = match op with - | `list_changed_blocks -> - let vbd_connected_to_vm_snapshot vbd = - let vm = vbd.Db_actions.vBD_VM in - Db.is_valid_ref __context vm - && Db.VM.get_is_a_snapshot ~__context ~self:vm - in - (* We allow list_changed_blocks on VDIs attached to snapshot VMs, - because VM.checkpoint may set the currently_attached fields of the - snapshot's VBDs to true, and this would block list_changed_blocks. *) - true_for_all_active_vbds vbd_connected_to_vm_snapshot + | `snapshot + | `resize_online + | `blocked + | `clone + | `mirror + | `enable_cbt + | `disable_cbt + | `data_destroy -> + true | _ -> false in - blocked_by_attach && not allow_attached_vbds - in - let* () = - if blocked_by_attach then - Error - (Api_errors.vdi_in_use, [_ref; Record_util.vdi_operations_to_string op]) - else if - (* data_destroy first waits for all the VBDs to disappear in its - implementation, so it is harmless to allow it when any of the VDI's - VBDs have operations in progress. This ensures that we avoid the retry - mechanism of message forwarding and only use the event loop. *) - my_has_current_operation_vbd_records <> [] && op <> `data_destroy - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () - in - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in - let* () = check_sm_feature_error op sm_features sr in - let allowed_for_cbt_metadata_vdi = - match op with - | `clone - | `copy - | `disable_cbt - | `enable_cbt - | `mirror - | `resize - | `resize_online - | `snapshot - | `set_on_boot -> - false - | `blocked - | `data_destroy - | `destroy - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `update -> - true - in - let* () = - if - (not allowed_for_cbt_metadata_vdi) - && record.Db_actions.vDI_type = `cbt_metadata - then - Error - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string `cbt_metadata] - ) - else - Ok () - in - let allowed_when_cbt_enabled = - match op with - | `mirror | `set_on_boot -> - false - | `blocked - | `clone - | `copy - | `data_destroy - | `destroy - | `disable_cbt - | `enable_cbt - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `resize - | `resize_online - | `snapshot - | `update -> - true - in - let* () = - if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.vdi_cbt_enabled, [_ref]) - else - Ok () - in - let check_destroy () = - if sr_type = "udev" then - Error (Api_errors.vdi_is_a_physical_device, [_ref]) - else if is_tools_sr then - Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Error (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then - Error (Api_errors.ha_is_enabled, []) - else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - && Xapi_pool_helpers.ha_enable_in_progress ~__context - then - Error (Api_errors.ha_enable_in_progress, []) - else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - && Xapi_pool_helpers.ha_disable_in_progress ~__context - then - Error (Api_errors.ha_disable_in_progress, []) - else - Ok () - in - match op with - | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + let operation_can_be_performed_with_ro_attach = + operation_can_be_performed_live + || match op with `copy -> true | _ -> false + in + (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) + let blocked_by_attach = + let blocked_by_attach = + if operation_can_be_performed_live then + false + else if operation_can_be_performed_with_ro_attach then + my_active_rw_vbd_records <> [] + else + my_active_vbd_records <> [] + in + let allow_attached_vbds = + (* We use Valid_ref_list.list to ignore exceptions due to invalid + references that could propagate to the message forwarding layer, which + calls this function to check for errors - these exceptions would + prevent the actual XenAPI function from being run. Checks called from + the message forwarding layer should not fail with an exception. *) + let true_for_all_active_vbds f = + Valid_ref_list.for_all f my_active_vbd_records + in + match op with + | `list_changed_blocks -> + let vbd_connected_to_vm_snapshot vbd = + let vm = vbd.Db_actions.vBD_VM in + Db.is_valid_ref __context vm + && Db.VM.get_is_a_snapshot ~__context ~self:vm + in + (* We allow list_changed_blocks on VDIs attached to snapshot VMs, + because VM.checkpoint may set the currently_attached fields of the + snapshot's VBDs to true, and this would block list_changed_blocks. *) + true_for_all_active_vbds vbd_connected_to_vm_snapshot + | _ -> + false + in + blocked_by_attach && not allow_attached_vbds + in + let* () = + if blocked_by_attach then + Error + ( Api_errors.vdi_in_use + , [_ref; Record_util.vdi_operations_to_string op] + ) + else if + (* data_destroy first waits for all the VBDs to disappear in its + implementation, so it is harmless to allow it when any of the VDI's + VBDs have operations in progress. This ensures that we avoid the retry + mechanism of message forwarding and only use the event loop. *) + my_has_current_operation_vbd_records <> [] && op <> `data_destroy then - Error (Api_errors.ha_is_enabled, []) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Error (Api_errors.vdi_has_rrds, [_ref]) + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) else Ok () - | `destroy -> - check_destroy () - | `data_destroy -> - if not record.Db_actions.vDI_is_a_snapshot then + in + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type + in + let* () = check_sm_feature_error op sm_features sr in + let allowed_for_cbt_metadata_vdi = + match op with + | `clone + | `copy + | `disable_cbt + | `enable_cbt + | `mirror + | `resize + | `resize_online + | `snapshot + | `set_on_boot -> + false + | `blocked + | `data_destroy + | `destroy + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `update -> + true + in + let* () = + if + (not allowed_for_cbt_metadata_vdi) + && record.Db_actions.vDI_type = `cbt_metadata + then Error - (Api_errors.operation_not_allowed, ["VDI is not a snapshot: " ^ _ref]) - else if not record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string `cbt_metadata] + ) else - check_destroy () - | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + Ok () + in + let allowed_when_cbt_enabled = + match op with + | `mirror | `set_on_boot -> + false + | `blocked + | `clone + | `copy + | `data_destroy + | `destroy + | `disable_cbt + | `enable_cbt + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `resize + | `resize_online + | `snapshot + | `update -> + true + in + let* () = + if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.ha_is_enabled, []) + Error (Api_errors.vdi_cbt_enabled, [_ref]) else Ok () - | `resize_online -> - if + in + let check_destroy () = + if sr_type = "udev" then + Error (Api_errors.vdi_is_a_physical_device, [_ref]) + else if is_tools_sr then + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else if ha_enabled && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then Error (Api_errors.ha_is_enabled, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + && Xapi_pool_helpers.ha_enable_in_progress ~__context + then + Error (Api_errors.ha_enable_in_progress, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + && Xapi_pool_helpers.ha_disable_in_progress ~__context + then + Error (Api_errors.ha_disable_in_progress, []) else Ok () - | `snapshot when record.Db_actions.vDI_sharable -> - Error (Api_errors.vdi_is_sharable, [_ref]) - | (`snapshot | `copy) when reset_on_boot -> - Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) - | `snapshot -> - if List.exists (fun (_, op) -> op = `copy) current_ops then - Error - ( Api_errors.operation_not_allowed - , ["Snapshot operation not allowed during copy."] - ) - else - Ok () - | `copy -> - if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then - Error - ( Api_errors.operation_not_allowed - , [ - "VDI containing HA statefile or redo log cannot be copied (check \ - the VDI's allowed operations)." - ] - ) - else - Ok () - | `enable_cbt | `disable_cbt -> - if record.Db_actions.vDI_is_a_snapshot then - Error (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) - else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then - Error - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] - ) - else if reset_on_boot then + in + match op with + | `forget -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else + Ok () + | `destroy -> + check_destroy () + | `data_destroy -> + if not record.Db_actions.vDI_is_a_snapshot then + Error + ( Api_errors.operation_not_allowed + , ["VDI is not a snapshot: " ^ _ref] + ) + else if not record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + else + check_destroy () + | `resize -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `resize_online -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `snapshot when record.Db_actions.vDI_sharable -> + Error (Api_errors.vdi_is_sharable, [_ref]) + | (`snapshot | `copy) when reset_on_boot -> Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) - else + | `snapshot -> + if List.exists (fun (_, op) -> op = `copy) current_ops then + Error + ( Api_errors.operation_not_allowed + , ["Snapshot operation not allowed during copy."] + ) + else + Ok () + | `copy -> + if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then + Error + ( Api_errors.operation_not_allowed + , [ + "VDI containing HA statefile or redo log cannot be copied \ + (check the VDI's allowed operations)." + ] + ) + else + Ok () + | `enable_cbt | `disable_cbt -> + if record.Db_actions.vDI_is_a_snapshot then + Error + (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) + else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] + ) + else if reset_on_boot then + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + else + Ok () + | `mirror + | `clone + | `generate_config + | `force_unlock + | `set_on_boot + | `list_changed_blocks + | `blocked + | `update -> Ok () - | `mirror - | `clone - | `generate_config - | `force_unlock - | `set_on_boot - | `list_changed_blocks - | `blocked - | `update -> - Ok () let assert_operation_valid ~__context ~self ~(op : API.vdi_operations) = let pool = Helpers.get_pool ~__context in @@ -486,16 +496,11 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records v in let allowed = - let check x = - match - check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records - ha_enabled all self x - with - | Ok () -> - true - | _ -> - false + let check' = + check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records + ha_enabled all self in + let check x = match check' x with Ok () -> true | _ -> false in all_ops |> Xapi_globs.Vdi_operations_set.filter check in let allowed = diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index fc281c70de0..6e3a3955fca 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -777,12 +777,9 @@ let allowable_ops = List.filter (fun op -> not (List.mem op ignored_ops)) API.vm_operations__all let update_allowed_operations ~__context ~self = + let check' = check_operation_error ~__context ~ref:self in let check accu op = - match check_operation_error ~__context ~ref:self ~op ~strict:true with - | None -> - op :: accu - | Some _err -> - accu + match check' ~op ~strict:true with None -> op :: accu | Some _err -> accu in let allowed = List.fold_left check [] allowable_ops in (* FIXME: need to be able to deal with rolling-upgrade for orlando as well *) From e8440b3a01720f09baf04eec5b8482c8e3e93ff8 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 20 Jun 2025 05:47:09 +0000 Subject: [PATCH 304/492] CA-408552: 2/3 Improve bootstrom performance by save db ops Define db ops into variables and keep them inside returned function for code review Signed-off-by: Lin Liu --- ocaml/xapi/xapi_vdi.ml | 40 ++++++++++++++++----------------- ocaml/xapi/xapi_vm_lifecycle.ml | 32 +++++++++++++++++--------- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 9691f3831cd..1090ae01f1d 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -85,8 +85,11 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) *) fun op -> let* () = - if + let rolling_upgrade_in_progress = Helpers.rolling_upgrade_in_progress ~__context + in + if + rolling_upgrade_in_progress && not (Xapi_globs.Vdi_operations_set.mem op Xapi_globs.rpu_allowed_vdi_operations @@ -338,25 +341,31 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in + let vdi_is_ha_state_or_redolog = + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + in let check_destroy () = + let ha_enable_in_progress = + Xapi_pool_helpers.ha_enable_in_progress ~__context + in + let ha_disable_in_progress = + Xapi_pool_helpers.ha_disable_in_progress ~__context + in if sr_type = "udev" then Error (Api_errors.vdi_is_a_physical_device, [_ref]) else if is_tools_sr then Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) else if List.mem record.Db_actions.vDI_type [`rrd] then Error (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + else if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + vdi_is_ha_state_or_redolog && Xapi_pool_helpers.ha_enable_in_progress ~__context then Error (Api_errors.ha_enable_in_progress, []) else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + vdi_is_ha_state_or_redolog && Xapi_pool_helpers.ha_disable_in_progress ~__context then Error (Api_errors.ha_disable_in_progress, []) @@ -365,10 +374,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) in match op with | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else if List.mem record.Db_actions.vDI_type [`rrd] then Error (Api_errors.vdi_has_rrds, [_ref]) @@ -387,18 +393,12 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else check_destroy () | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else Ok () | `resize_online -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else Ok () @@ -415,7 +415,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () | `copy -> - if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then + if vdi_is_ha_state_or_redolog then Error ( Api_errors.operation_not_allowed , [ diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 6e3a3955fca..7e95933d8d2 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -528,10 +528,10 @@ let check_operation_error ~__context ~ref = in let current_error = let metrics = Db.VM.get_metrics ~__context ~self:ref in + let is_nested_virt = nested_virt ~__context ref metrics in check current_error (fun () -> match op with - | `changing_dynamic_range - when nested_virt ~__context ref metrics && strict -> + | `changing_dynamic_range when is_nested_virt && strict -> Some (Api_errors.vm_is_using_nested_virt, [ref_str]) | _ -> None @@ -542,13 +542,11 @@ let check_operation_error ~__context ~ref = (* make use of the Helpers.ballooning_enabled_for_vm function. *) let current_error = check current_error (fun () -> - let vm_ref () = + let is_domain_zero = Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid + |> Helpers.is_domain_zero ~__context in - if - (op = `changing_VCPUs || op = `destroy) - && Helpers.is_domain_zero ~__context (vm_ref ()) - then + if (op = `changing_VCPUs || op = `destroy) && is_domain_zero then Some ( Api_errors.operation_not_allowed , ["This operation is not allowed on dom0"] @@ -668,8 +666,11 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being in an appliance. *) let current_error = + let is_appliance_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_appliance + in check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_appliance then + if is_appliance_valid then check_appliance ~vmr ~op ~ref_str else None @@ -677,8 +678,11 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a protection policy. *) let current_error = + let is_protection_policy_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy + in check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy then + if is_protection_policy_valid then check_protection_policy ~vmr ~op ~ref_str else None @@ -686,8 +690,11 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a snapshot schedule. *) let current_error = + let is_snapshort_schedule_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule + in check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule then + if is_snapshort_schedule_valid then check_snapshot_schedule ~vmr ~ref_str op else None @@ -709,9 +716,12 @@ let check_operation_error ~__context ~ref = ) in let current_error = + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in check current_error (fun () -> if - Helpers.rolling_upgrade_in_progress ~__context + rolling_upgrade_in_progress && not (List.mem op Xapi_globs.rpu_allowed_vm_operations) then Some (Api_errors.not_supported_during_upgrade, []) From e1a57fa5ef4b97adab32bcf1a24d9af61bccc3c4 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 20 Jun 2025 06:35:20 +0000 Subject: [PATCH 305/492] CA-408552: 3/3 Improve bootstrom performance by save db ops Move ops unrelated db operation outside of returned function Signed-off-by: Lin Liu --- ocaml/xapi/xapi_vdi.ml | 176 +++++++++++++++----------------- ocaml/xapi/xapi_vm_lifecycle.ml | 76 +++++++------- 2 files changed, 123 insertions(+), 129 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 1090ae01f1d..0f9904d72fb 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -68,6 +68,84 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in let reset_on_boot = record.Db_actions.vDI_on_boot = `reset in + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in + (* check to see whether it's a local cd drive *) + let sr = record.Db_actions.vDI_SR in + let sr_type = Db.SR.get_type ~__context ~self:sr in + let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in + (* Check to see if any PBDs are attached *) + let open Xapi_database.Db_filter_types in + let pbds_attached = + match pbd_records with + | [] -> + Db.PBD.get_records_where ~__context + ~expr: + (And + ( Eq (Field "SR", Literal (Ref.string_of sr)) + , Eq (Field "currently_attached", Literal "true") + ) + ) + | _ -> + List.filter + (fun (_, pbd_record) -> + pbd_record.API.pBD_SR = sr && pbd_record.API.pBD_currently_attached + ) + pbd_records + in + + (* Only a 'live' operation can be performed if there are active (even RO) devices *) + let my_active_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Or + ( Eq (Field "currently_attached", Literal "true") + , Eq (Field "reserved", Literal "true") + ) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) + ) + records + in + let my_active_rw_vbd_records = + List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records + in + (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) + let my_has_current_operation_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Not (Eq (Field "current_operations", Literal "()")) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] + ) + records + in + (* Policy: 1. any current_operation besides copy implies exclusivity; fail everything else; except vdi mirroring is in current operations and destroy is performed @@ -83,11 +161,15 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) 5. HA prevents you from deleting statefiles or metadata volumes 6. During rolling pool upgrade, only operations known by older releases are allowed *) + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type + in + let vdi_is_ha_state_or_redolog = + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + in + fun op -> let* () = - let rolling_upgrade_in_progress = - Helpers.rolling_upgrade_in_progress ~__context - in if rolling_upgrade_in_progress && not @@ -113,30 +195,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in - (* check to see whether it's a local cd drive *) - let sr = record.Db_actions.vDI_SR in - let sr_type = Db.SR.get_type ~__context ~self:sr in - let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in - (* Check to see if any PBDs are attached *) - let open Xapi_database.Db_filter_types in - let pbds_attached = - match pbd_records with - | [] -> - Db.PBD.get_records_where ~__context - ~expr: - (And - ( Eq (Field "SR", Literal (Ref.string_of sr)) - , Eq (Field "currently_attached", Literal "true") - ) - ) - | _ -> - List.filter - (fun (_, pbd_record) -> - pbd_record.API.pBD_SR = sr - && pbd_record.API.pBD_currently_attached - ) - pbd_records - in let* () = if pbds_attached = [] && op = `resize then Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) @@ -146,58 +204,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) (* check to see whether VBDs exist which are using this VDI *) - (* Only a 'live' operation can be performed if there are active (even RO) devices *) - let my_active_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Or - ( Eq (Field "currently_attached", Literal "true") - , Eq (Field "reserved", Literal "true") - ) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records - in - let my_active_rw_vbd_records = - List.filter - (fun vbd -> vbd.Db_actions.vBD_mode = `RW) - my_active_vbd_records - in - (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) - let my_has_current_operation_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Not (Eq (Field "current_operations", Literal "()")) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records - in (* If the VBD is currently_attached then some operations can still be performed ie: VDI.clone (if the VM is suspended we have to have the 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; @@ -275,9 +281,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in let* () = check_sm_feature_error op sm_features sr in let allowed_for_cbt_metadata_vdi = match op with @@ -341,16 +344,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in - let vdi_is_ha_state_or_redolog = - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - in let check_destroy () = - let ha_enable_in_progress = - Xapi_pool_helpers.ha_enable_in_progress ~__context - in - let ha_disable_in_progress = - Xapi_pool_helpers.ha_disable_in_progress ~__context - in if sr_type = "udev" then Error (Api_errors.vdi_is_a_physical_device, [_ref]) else if is_tools_sr then diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 7e95933d8d2..5ec4ca6d792 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -393,8 +393,7 @@ let nested_virt ~__context vm metrics = let key = "nested-virt" in Vm_platform.is_true ~key ~platformdata ~default:false -let is_mobile ~__context vm strict = - let metrics = Db.VM.get_metrics ~__context ~self:vm in +let is_mobile ~__context vm strict metrics = (not @@ nomigrate ~__context vm metrics) && (not @@ nested_virt ~__context vm metrics) || not strict @@ -447,6 +446,42 @@ let check_operation_error ~__context ~ref = vmr.Db_actions.vM_VBDs |> List.filter (Db.is_valid_ref __context) in + let current_ops = vmr.Db_actions.vM_current_operations in + let metrics = Db.VM.get_metrics ~__context ~self:ref in + let is_nested_virt = nested_virt ~__context ref metrics in + let is_domain_zero = + Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid + |> Helpers.is_domain_zero ~__context + in + let vdis_reset_and_caching = + List.filter_map + (fun vdi -> + try + let sm_config = Db.VDI.get_sm_config ~__context ~self:vdi in + Some + ( List.assoc_opt "on_boot" sm_config = Some "reset" + , bool_of_assoc "caching" sm_config + ) + with _ -> None + ) + vdis + in + let sriov_pcis = nvidia_sriov_pcis ~__context vmr.Db_actions.vM_VGPUs in + let is_not_sriov pci = not @@ List.mem pci sriov_pcis in + let pcis = vmr.Db_actions.vM_attached_PCIs in + let is_appliance_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_appliance + in + let is_protection_policy_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy + in + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in + let is_snapshort_schedule_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule + in + fun ~op ~strict -> let current_error = None in let check c f = match c with Some e -> Some e | None -> f () in @@ -470,7 +505,6 @@ let check_operation_error ~__context ~ref = (* if other operations are in progress, check that the new operation is allowed concurrently with them. *) let current_error = check current_error (fun () -> - let current_ops = vmr.Db_actions.vM_current_operations in if List.length current_ops <> 0 && not (is_allowed_concurrently ~op ~current_ops) @@ -520,15 +554,13 @@ let check_operation_error ~__context ~ref = check current_error (fun () -> match op with | (`suspend | `checkpoint | `pool_migrate | `migrate_send) - when not (is_mobile ~__context ref strict) -> + when not (is_mobile ~__context ref strict metrics) -> Some (Api_errors.vm_is_immobile, [ref_str]) | _ -> None ) in let current_error = - let metrics = Db.VM.get_metrics ~__context ~self:ref in - let is_nested_virt = nested_virt ~__context ref metrics in check current_error (fun () -> match op with | `changing_dynamic_range when is_nested_virt && strict -> @@ -542,10 +574,6 @@ let check_operation_error ~__context ~ref = (* make use of the Helpers.ballooning_enabled_for_vm function. *) let current_error = check current_error (fun () -> - let is_domain_zero = - Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid - |> Helpers.is_domain_zero ~__context - in if (op = `changing_VCPUs || op = `destroy) && is_domain_zero then Some ( Api_errors.operation_not_allowed @@ -592,19 +620,6 @@ let check_operation_error ~__context ~ref = (* Check for an error due to VDI caching/reset behaviour *) let current_error = check current_error (fun () -> - let vdis_reset_and_caching = - List.filter_map - (fun vdi -> - try - let sm_config = Db.VDI.get_sm_config ~__context ~self:vdi in - Some - ( List.assoc_opt "on_boot" sm_config = Some "reset" - , bool_of_assoc "caching" sm_config - ) - with _ -> None - ) - vdis - in if op = `checkpoint || op = `snapshot @@ -633,9 +648,6 @@ let check_operation_error ~__context ~ref = (* If a PCI device is passed-through, check if the operation is allowed *) let current_error = check current_error @@ fun () -> - let sriov_pcis = nvidia_sriov_pcis ~__context vmr.Db_actions.vM_VGPUs in - let is_not_sriov pci = not @@ List.mem pci sriov_pcis in - let pcis = vmr.Db_actions.vM_attached_PCIs in match op with | (`suspend | `checkpoint | `pool_migrate | `migrate_send) when List.exists is_not_sriov pcis -> @@ -666,9 +678,6 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being in an appliance. *) let current_error = - let is_appliance_valid = - Db.is_valid_ref __context vmr.Db_actions.vM_appliance - in check current_error (fun () -> if is_appliance_valid then check_appliance ~vmr ~op ~ref_str @@ -678,9 +687,6 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a protection policy. *) let current_error = - let is_protection_policy_valid = - Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy - in check current_error (fun () -> if is_protection_policy_valid then check_protection_policy ~vmr ~op ~ref_str @@ -690,9 +696,6 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a snapshot schedule. *) let current_error = - let is_snapshort_schedule_valid = - Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule - in check current_error (fun () -> if is_snapshort_schedule_valid then check_snapshot_schedule ~vmr ~ref_str op @@ -716,9 +719,6 @@ let check_operation_error ~__context ~ref = ) in let current_error = - let rolling_upgrade_in_progress = - Helpers.rolling_upgrade_in_progress ~__context - in check current_error (fun () -> if rolling_upgrade_in_progress From d236751e762d53ee09dcf9110845d178341a3e87 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 19 Jun 2025 13:19:13 +0100 Subject: [PATCH 306/492] xenops_server_plugin: Refer to the type alias instead of its definition This allows changing the type definition in Updates without modifying the types here as well in the future. Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/lib/xenops_server_plugin.ml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index 1a52749a9f3..4c8c73773f8 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -288,10 +288,7 @@ module type S = sig end module UPDATES : sig - val get : - Updates.id option - -> int option - -> Dynamic.barrier list * Dynamic.id list * Updates.id + val get : Updates.id option -> int option -> Updates.get_result end module DEBUG : sig From 7a3788a9e9d2153cba1a14d9a857e811af4e3263 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 19 Jun 2025 13:28:00 +0100 Subject: [PATCH 307/492] xapi-idl/updates: Make filterfn in inject_barrier only look at keys The only usage of it ignores values, so drop them altogether. This allows changing the type of the values in the future without modifying inject_barrier and its users in any way. Signed-off-by: Andrii Sultanov --- ocaml/xapi-idl/lib/updates.ml | 1 + ocaml/xapi-idl/lib/updates.mli | 2 +- ocaml/xapi-idl/lib_test/updates_test.ml | 8 ++++---- ocaml/xenopsd/lib/xenops_server.ml | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi-idl/lib/updates.ml b/ocaml/xapi-idl/lib/updates.ml index 93904f2b65b..f6420da6834 100644 --- a/ocaml/xapi-idl/lib/updates.ml +++ b/ocaml/xapi-idl/lib/updates.ml @@ -66,6 +66,7 @@ functor ) let inject_barrier id filterfn t = + let filterfn key _ = filterfn key in ( { map= t.map ; barriers= diff --git a/ocaml/xapi-idl/lib/updates.mli b/ocaml/xapi-idl/lib/updates.mli index a054c5581d8..9b678a28839 100644 --- a/ocaml/xapi-idl/lib/updates.mli +++ b/ocaml/xapi-idl/lib/updates.mli @@ -64,7 +64,7 @@ module Updates : functor (Interface : INTERFACE) -> sig (* [inject_barrier n p t] Inject a barrier identified by [n] into [t]. The barrier will contain a snapshot of all current updates that match the predicate [p]. *) - val inject_barrier : int -> (Interface.Dynamic.id -> int -> bool) -> t -> unit + val inject_barrier : int -> (Interface.Dynamic.id -> bool) -> t -> unit (* Removes a barrier *) val remove_barrier : int -> t -> unit diff --git a/ocaml/xapi-idl/lib_test/updates_test.ml b/ocaml/xapi-idl/lib_test/updates_test.ml index 66c5f09450e..790e72854c1 100644 --- a/ocaml/xapi-idl/lib_test/updates_test.ml +++ b/ocaml/xapi-idl/lib_test/updates_test.ml @@ -84,7 +84,7 @@ let test_inject_barrier () = let u = M.empty scheduler in M.add update_a u ; M.add update_b u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; M.add update_a u ; M.add update_c u ; let barriers, updates, _id = M.get "dbg" None (Some 1) u in @@ -107,7 +107,7 @@ let test_remove_barrier () = let u = M.empty scheduler in M.add update_a u ; M.add update_b u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; M.add update_a u ; M.add update_c u ; M.remove_barrier 1 u ; @@ -125,7 +125,7 @@ let test_inject_barrier_rpc () = let u = M.empty scheduler in M.add update_a u ; M.add update_b u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; M.add update_a u ; M.add update_c u ; let barriers, updates, _id = M.get "dbg" None (Some 1) u in @@ -175,7 +175,7 @@ let test_filter () = let test_dump () = let u = M.empty scheduler in M.add update_a u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; let dump = M.Dump.make u in let dumped_rpc = M.Dump.rpc_of_dump dump in let expected_rpc = diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 8fe027630fe..569dabc11a1 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -4039,7 +4039,7 @@ module UPDATES = struct Debug.with_thread_associated dbg (fun () -> debug "UPDATES.inject_barrier %s %d" vm_id id ; - let filter k _ = + let filter k = match k with | Dynamic.Task _ -> false From 4316e6bd97bfe012c88942b5b4b32c260b1e6e01 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 20 Jun 2025 10:54:59 +0100 Subject: [PATCH 308/492] xapi_xenops: Refactor update_vm_internal Drop the first member of the (Vm.t * Vm.state) tuple as it's never used. This removes the need for several 'snd info', 'Option.iter (fun (_, state) ->) info' constructs. Replace all the following constructs: ``` if different (fun x -> x.field) && predicate then Option.iter (fun state -> ...) info ``` With this: ``` different (fun x -> x.field) ((&&) predicate) (fun field -> ...); ``` It 1) removes the additional level of indentation inside Option.iter (fun state), hides the duplication of this construct 2) makes it obvious where the whole 'state' is accessed and not only the field that was checked to be different Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_xenops.ml | 602 +++++++++++++++++++------------------- 1 file changed, 302 insertions(+), 300 deletions(-) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index dbefffb1571..4b7b738b000 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -1857,10 +1857,17 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = if info = None then debug "xenopsd event: VM state missing: assuming VM has shut down" ; let should_update_allowed_operations = ref false in - let different f = - let a = Option.map (fun x -> f (snd x)) info in - let b = Option.map f previous in - a <> b + + (* If a field (accessed by [accessor] for [Vm.state]) changed in an + update and [predicate has_changed], call [f (accessor info)] *) + let different accessor predicate f = + let a = Option.map (fun x -> accessor x) info in + let b = Option.map accessor previous in + let diff = a <> b in + if predicate diff then + Option.iter f a + else + () in (* Helpers to create and update guest metrics when needed *) let lookup state key = List.assoc_opt key state.Vm.guest_agent in @@ -1896,7 +1903,7 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = let gm = Db.VM.get_guest_metrics ~__context ~self in if gm = Ref.null then Option.iter - (fun (_, state) -> + (fun state -> List.iter (fun domid -> try @@ -1917,7 +1924,7 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = in let check_guest_agent () = Option.iter - (fun (_, state) -> + (fun state -> Option.iter (fun oldstate -> let old_ga = oldstate.Vm.guest_agent in @@ -1965,8 +1972,7 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = process restart or an event is generated. We may wish to periodically inject artificial events IF there has been an event sync failure? *) let power_state = - xenapi_of_xenops_power_state - (Option.map (fun x -> (snd x).Vm.power_state) info) + xenapi_of_xenops_power_state (Option.map (fun x -> x.Vm.power_state) info) in let power_state_before_update = Db.VM.get_power_state ~__context ~self in (* We preserve the current_domain_type of suspended VMs like we preserve @@ -1974,37 +1980,40 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = whether suspended VMs are going to resume into PV or PVinPVH for example. We do this before updating the power_state to maintain the invariant that any VM that's not `Halted cannot have an unspecified current_domain_type *) - if different (fun x -> x.domain_type) && power_state <> `Suspended then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - let update domain_type = - debug "xenopsd event: Updating VM %s current_domain_type <- %s" id - (Record_util.domain_type_to_string domain_type) ; - Db.VM_metrics.set_current_domain_type ~__context ~self:metrics - ~value:domain_type - in - match state.Vm.domain_type with - | Domain_HVM -> - update `hvm - | Domain_PV -> - update `pv - | Domain_PVinPVH -> - update `pv_in_pvh - | Domain_PVH -> - update `pvh - | Domain_undefined -> - if power_state <> `Halted then - debug - "xenopsd returned an undefined domain type for non-halted VM \ - %s;assuming this is transient, so not updating \ - current_domain_type" - id - else - update `unspecified - ) - info ; - ( if different (fun x -> x.power_state) then + different + (fun x -> x.Vm.domain_type) + (( && ) (power_state <> `Suspended)) + (fun domain_type -> + let metrics = Db.VM.get_metrics ~__context ~self in + let update domain_type = + debug "xenopsd event: Updating VM %s current_domain_type <- %s" id + (Record_util.domain_type_to_string domain_type) ; + Db.VM_metrics.set_current_domain_type ~__context ~self:metrics + ~value:domain_type + in + match domain_type with + | Vm.Domain_HVM -> + update `hvm + | Domain_PV -> + update `pv + | Domain_PVinPVH -> + update `pv_in_pvh + | Domain_PVH -> + update `pvh + | Domain_undefined -> + if power_state <> `Halted then + debug + "xenopsd returned an undefined domain type for non-halted VM \ + %s;assuming this is transient, so not updating \ + current_domain_type" + id + else + update `unspecified + ) ; + different + (fun x -> x.Vm.power_state) + Fun.id + (fun _ -> try debug "Will update VM.allowed_operations because power_state has changed." ; @@ -2058,14 +2067,17 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = with e -> error "Caught %s: while updating VM %s power_state" (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.domids) then + ) ; + different + (fun x -> x.Vm.domids) + Fun.id + (fun _ -> try debug "Will update VM.allowed_operations because domid has changed." ; should_update_allowed_operations := true ; debug "xenopsd event: Updating VM %s domid" id ; Option.iter - (fun (_, state) -> + (fun state -> match state.Vm.domids with | value :: _ -> Db.VM.set_domid ~__context ~self ~value:(Int64.of_int value) @@ -2090,306 +2102,296 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = (System_domains.pbd_of_vm ~__context ~vm:self) with e -> error "Caught %s: while updating VM %s domids" (Printexc.to_string e) id - ) ; + ) ; (* consoles *) - ( if different (fun x -> x.consoles) then + different + (fun x -> x.Vm.consoles) + Fun.id + (fun consoles -> try debug "xenopsd event: Updating VM %s consoles" id ; - Option.iter - (fun (_, state) -> - let address = Db.Host.get_address ~__context ~self:localhost in - let uri = - Uri.( - make ~scheme:"https" ~host:address ~path:Constants.console_uri - () - |> to_string + let address = Db.Host.get_address ~__context ~self:localhost in + let uri = + Uri.( + make ~scheme:"https" ~host:address ~path:Constants.console_uri () + |> to_string + ) + in + let get_uri_from_location loc = + try + let n = String.index loc '?' in + String.sub loc 0 n + with Not_found -> loc + in + let current_protocols = + List.map + (fun self -> + ( ( Db.Console.get_protocol ~__context ~self + |> to_xenops_console_protocol + , Db.Console.get_location ~__context ~self + |> get_uri_from_location + ) + , self ) - in - let get_uri_from_location loc = + ) + (Db.VM.get_consoles ~__context ~self) + in + let new_protocols = + List.map (fun c -> ((c.Vm.protocol, uri), c)) consoles + in + (* Destroy consoles that have gone away *) + List.iter + (fun protocol -> + let self = List.assoc protocol current_protocols in + Db.Console.destroy ~__context ~self + ) + (Listext.set_difference + (List.map fst current_protocols) + (List.map fst new_protocols) + ) ; + (* Create consoles that have appeared *) + List.iter + (fun (protocol, _) -> + let ref = Ref.make () in + let uuid = Uuidx.to_string (Uuidx.make ()) in + let location = Printf.sprintf "%s?uuid=%s" uri uuid in + let port = try - let n = String.index loc '?' in - String.sub loc 0 n - with Not_found -> loc - in - let current_protocols = - List.map - (fun self -> - ( ( Db.Console.get_protocol ~__context ~self - |> to_xenops_console_protocol - , Db.Console.get_location ~__context ~self - |> get_uri_from_location - ) - , self - ) - ) - (Db.VM.get_consoles ~__context ~self) - in - let new_protocols = - List.map (fun c -> ((c.Vm.protocol, uri), c)) state.Vm.consoles + Int64.of_int + (List.find (fun c -> c.Vm.protocol = protocol) consoles).port + with Not_found -> -1L in - (* Destroy consoles that have gone away *) - List.iter - (fun protocol -> - let self = List.assoc protocol current_protocols in - Db.Console.destroy ~__context ~self - ) - (Listext.set_difference - (List.map fst current_protocols) - (List.map fst new_protocols) - ) ; - (* Create consoles that have appeared *) - List.iter - (fun (protocol, _) -> - let ref = Ref.make () in - let uuid = Uuidx.to_string (Uuidx.make ()) in - let location = Printf.sprintf "%s?uuid=%s" uri uuid in - let port = - try - Int64.of_int - (List.find - (fun c -> c.Vm.protocol = protocol) - state.Vm.consoles - ) - .port - with Not_found -> -1L - in - Db.Console.create ~__context ~ref ~uuid - ~protocol:(to_xenapi_console_protocol protocol) - ~location ~vM:self ~other_config:[] ~port - ) - (Listext.set_difference - (List.map fst new_protocols) - (List.map fst current_protocols) - ) + Db.Console.create ~__context ~ref ~uuid + ~protocol:(to_xenapi_console_protocol protocol) + ~location ~vM:self ~other_config:[] ~port + ) + (Listext.set_difference + (List.map fst new_protocols) + (List.map fst current_protocols) ) - info with e -> error "Caught %s: while updating VM %s consoles" (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.memory_target) then + ) ; + different + (fun x -> x.Vm.memory_target) + Fun.id + (fun memory_target -> try - Option.iter - (fun (_, state) -> - debug "xenopsd event: Updating VM %s memory_target <- %Ld" id - state.Vm.memory_target ; - Db.VM.set_memory_target ~__context ~self ~value:state.memory_target - ) - info + debug "xenopsd event: Updating VM %s memory_target <- %Ld" id + memory_target ; + Db.VM.set_memory_target ~__context ~self ~value:memory_target with e -> error "Caught %s: while updating VM %s consoles" (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.rtc_timeoffset) then + ) ; + different + (fun x -> x.rtc_timeoffset) + Fun.id + (fun rtc_timeoffset -> try - Option.iter - (fun (_, state) -> - if state.Vm.rtc_timeoffset <> "" then ( - debug "xenopsd event: Updating VM %s platform:timeoffset <- %s" id - state.rtc_timeoffset ; - ( try - Db.VM.remove_from_platform ~__context ~self - ~key:Vm_platform.timeoffset - with _ -> () - ) ; - Db.VM.add_to_platform ~__context ~self ~key:Vm_platform.timeoffset - ~value:state.rtc_timeoffset - ) - ) - info + if rtc_timeoffset <> "" then ( + debug "xenopsd event: Updating VM %s platform:timeoffset <- %s" id + rtc_timeoffset ; + ( try + Db.VM.remove_from_platform ~__context ~self + ~key:Vm_platform.timeoffset + with _ -> () + ) ; + Db.VM.add_to_platform ~__context ~self ~key:Vm_platform.timeoffset + ~value:rtc_timeoffset + ) with e -> error "Caught %s: while updating VM %s rtc/timeoffset" (Printexc.to_string e) id - ) ; - if different (fun x -> x.hvm) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s hvm <- %s" id - (string_of_bool state.Vm.hvm) ; - Db.VM_metrics.set_hvm ~__context ~self:metrics ~value:state.Vm.hvm - ) - info ; - if different (fun x -> x.nomigrate) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s nomigrate <- %s" id - (string_of_bool state.Vm.nomigrate) ; - Db.VM_metrics.set_nomigrate ~__context ~self:metrics - ~value:state.Vm.nomigrate - ) - info ; - if different (fun x -> x.nested_virt) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s nested_virt <- %s" id - (string_of_bool state.Vm.nested_virt) ; - Db.VM_metrics.set_nested_virt ~__context ~self:metrics - ~value:state.Vm.nested_virt - ) - info ; - let update_pv_drivers_detected () = - Option.iter - (fun (_, state) -> - try - let gm = Db.VM.get_guest_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s PV drivers detected %b" id - state.Vm.pv_drivers_detected ; - Db.VM_guest_metrics.set_PV_drivers_detected ~__context ~self:gm - ~value:state.Vm.pv_drivers_detected ; - Db.VM_guest_metrics.set_PV_drivers_up_to_date ~__context ~self:gm - ~value:state.Vm.pv_drivers_detected - with e -> - debug "Caught %s: while updating VM %s PV drivers" - (Printexc.to_string e) id - ) - info - in + ) ; + different + (fun x -> x.hvm) + Fun.id + (fun hvm -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s hvm <- %s" id (string_of_bool hvm) ; + Db.VM_metrics.set_hvm ~__context ~self:metrics ~value:hvm + ) ; + different + (fun x -> x.nomigrate) + Fun.id + (fun nomigrate -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s nomigrate <- %s" id + (string_of_bool nomigrate) ; + Db.VM_metrics.set_nomigrate ~__context ~self:metrics ~value:nomigrate + ) ; + different + (fun x -> x.nested_virt) + Fun.id + (fun nested_virt -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s nested_virt <- %s" id + (string_of_bool nested_virt) ; + Db.VM_metrics.set_nested_virt ~__context ~self:metrics ~value:nested_virt + ) ; (* Chack last_start_time before updating anything in the guest metrics *) - ( if different (fun x -> x.last_start_time) then + different + (fun x -> x.last_start_time) + Fun.id + (fun last_start_time -> try - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - (* Clamp time to full seconds, stored timestamps do not - have decimals *) - let start_time = - Float.floor state.Vm.last_start_time |> Date.of_unix_time - in - let expected_time = - Db.VM_metrics.get_start_time ~__context ~self:metrics - in - if Date.is_later ~than:expected_time start_time then ( - debug "xenopsd event: Updating VM %s last_start_time <- %s" id - Date.(to_rfc3339 (of_unix_time state.Vm.last_start_time)) ; - Db.VM_metrics.set_start_time ~__context ~self:metrics - ~value:start_time ; - if - (* VM start and VM reboot *) - power_state = `Running - && power_state_before_update <> `Suspended - then ( - Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self - ~value:`restart_device_model ; - Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self - ~value:`restart_vm - ) - ) ; - create_guest_metrics_if_needed () ; - let gm = Db.VM.get_guest_metrics ~__context ~self in - let update_time = - Db.VM_guest_metrics.get_last_updated ~__context ~self:gm - in - if update_time < start_time then ( - debug - "VM %s guest metrics update time (%s) < VM start time (%s): \ - deleting" - id - (Date.to_rfc3339 update_time) - (Date.to_rfc3339 start_time) ; - Xapi_vm_helpers.delete_guest_metrics ~__context ~self ; - check_guest_agent () - ) + let metrics = Db.VM.get_metrics ~__context ~self in + (* Clamp time to full seconds, stored timestamps do not + have decimals *) + let start_time = Float.floor last_start_time |> Date.of_unix_time in + let expected_time = + Db.VM_metrics.get_start_time ~__context ~self:metrics + in + if Date.is_later ~than:expected_time start_time then ( + debug "xenopsd event: Updating VM %s last_start_time <- %s" id + Date.(to_rfc3339 (of_unix_time last_start_time)) ; + Db.VM_metrics.set_start_time ~__context ~self:metrics + ~value:start_time ; + if + (* VM start and VM reboot *) + power_state = `Running && power_state_before_update <> `Suspended + then ( + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_device_model ; + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_vm ) - info + ) ; + create_guest_metrics_if_needed () ; + let gm = Db.VM.get_guest_metrics ~__context ~self in + let update_time = + Db.VM_guest_metrics.get_last_updated ~__context ~self:gm + in + if update_time < start_time then ( + debug + "VM %s guest metrics update time (%s) < VM start time (%s): \ + deleting" + id + (Date.to_rfc3339 update_time) + (Date.to_rfc3339 start_time) ; + Xapi_vm_helpers.delete_guest_metrics ~__context ~self ; + check_guest_agent () + ) with e -> error "Caught %s: while updating VM %s last_start_time" (Printexc.to_string e) id - ) ; + ) ; Option.iter - (fun (_, state) -> + (fun state -> List.iter (fun domid -> (* Guest metrics could have been destroyed during the last_start_time check by recreating them, we avoid CA-223387 *) create_guest_metrics_if_needed () ; - if different (fun x -> x.Vm.uncooperative_balloon_driver) then - debug - "xenopsd event: VM %s domid %d uncooperative_balloon_driver = %b" - id domid state.Vm.uncooperative_balloon_driver ; - if different (fun x -> x.Vm.guest_agent) then - check_guest_agent () ; - if different (fun x -> x.Vm.pv_drivers_detected) then - update_pv_drivers_detected () ; - ( if different (fun x -> x.Vm.xsdata_state) then + different + (fun x -> x.Vm.uncooperative_balloon_driver) + Fun.id + (fun uncooperative_balloon_driver -> + debug + "xenopsd event: VM %s domid %d uncooperative_balloon_driver = \ + %b" + id domid uncooperative_balloon_driver + ) ; + different + (fun x -> x.Vm.guest_agent) + Fun.id + (fun _ -> check_guest_agent ()) ; + different + (fun x -> x.Vm.pv_drivers_detected) + Fun.id + (fun pv_drivers_detected -> + try + let gm = Db.VM.get_guest_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s PV drivers detected %b" id + pv_drivers_detected ; + Db.VM_guest_metrics.set_PV_drivers_detected ~__context ~self:gm + ~value:pv_drivers_detected ; + Db.VM_guest_metrics.set_PV_drivers_up_to_date ~__context + ~self:gm ~value:pv_drivers_detected + with e -> + debug "Caught %s: while updating VM %s PV drivers" + (Printexc.to_string e) id + ) ; + different + (fun x -> x.Vm.xsdata_state) + Fun.id + (fun xsdata_state -> try debug "xenopsd event: Updating VM %s domid %d xsdata" id domid ; - Db.VM.set_xenstore_data ~__context ~self - ~value:state.Vm.xsdata_state + Db.VM.set_xenstore_data ~__context ~self ~value:xsdata_state with e -> error "Caught %s: while updating VM %s xsdata" (Printexc.to_string e) id - ) ; - if different (fun x -> x.Vm.memory_target) then - try - debug "xenopsd event: Updating VM %s domid %d memory target" id - domid ; - Rrdd.update_vm_memory_target domid state.Vm.memory_target - with e -> - error "Caught %s: while updating VM %s memory_target" - (Printexc.to_string e) id + ) ; + different + (fun x -> x.Vm.memory_target) + Fun.id + (fun memory_target -> + try + debug "xenopsd event: Updating VM %s domid %d memory target" id + domid ; + Rrdd.update_vm_memory_target domid memory_target + with e -> + error "Caught %s: while updating VM %s memory_target" + (Printexc.to_string e) id + ) ) state.Vm.domids ) info ; - if different (fun x -> x.Vm.vcpu_target) then - Option.iter - (fun (_, state) -> - try - debug "xenopsd event: Updating VM %s vcpu_target <- %d" id - state.Vm.vcpu_target ; - let metrics = Db.VM.get_metrics ~__context ~self in - Db.VM_metrics.set_VCPUs_number ~__context ~self:metrics - ~value:(Int64.of_int state.Vm.vcpu_target) - with e -> - error "Caught %s: while updating VM %s VCPUs_number" - (Printexc.to_string e) id - ) - info ; - ( if different (fun x -> x.shadow_multiplier_target) then + different + (fun x -> x.Vm.vcpu_target) + Fun.id + (fun vcpu_target -> try - Option.iter - (fun (_, state) -> - debug "xenopsd event: Updating VM %s shadow_multiplier <- %.2f" id - state.Vm.shadow_multiplier_target ; - if - state.Vm.power_state <> Halted - && state.Vm.shadow_multiplier_target >= 0.0 - then - Db.VM.set_HVM_shadow_multiplier ~__context ~self - ~value:state.Vm.shadow_multiplier_target - ) - info + debug "xenopsd event: Updating VM %s vcpu_target <- %d" id vcpu_target ; + let metrics = Db.VM.get_metrics ~__context ~self in + Db.VM_metrics.set_VCPUs_number ~__context ~self:metrics + ~value:(Int64.of_int vcpu_target) + with e -> + error "Caught %s: while updating VM %s VCPUs_number" + (Printexc.to_string e) id + ) ; + different + (fun x -> x.shadow_multiplier_target) + Fun.id + (fun shadow_multiplier_target -> + try + debug "xenopsd event: Updating VM %s shadow_multiplier <- %.2f" id + shadow_multiplier_target ; + if power_state <> `Halted && shadow_multiplier_target >= 0.0 then + Db.VM.set_HVM_shadow_multiplier ~__context ~self + ~value:shadow_multiplier_target with e -> error "Caught %s: while updating VM %s HVM_shadow_multiplier" (Printexc.to_string e) id - ) ; + ) ; (* Preserve last_boot_CPU_flags when suspending (see current_domain_type) *) - if different (fun x -> x.Vm.featureset) && power_state <> `Suspended then - Option.iter - (fun (_, state) -> - try - debug "xenopsd event: Updating VM %s last_boot_CPU_flags <- %s" id - state.Vm.featureset ; - let vendor = - Db.Host.get_cpu_info ~__context ~self:localhost - |> List.assoc Constants.cpu_info_vendor_key - in - let value = - [ - (Constants.cpu_info_vendor_key, vendor) - ; (Constants.cpu_info_features_key, state.Vm.featureset) - ] - in - Db.VM.set_last_boot_CPU_flags ~__context ~self ~value - with e -> - error "Caught %s: while updating VM %s last_boot_CPU_flags" - (Printexc.to_string e) id - ) - info ; - Xenops_cache.update_vm id (Option.map snd info) ; + different + (fun x -> x.Vm.featureset) + (( && ) (power_state <> `Suspended)) + (fun featureset -> + try + debug "xenopsd event: Updating VM %s last_boot_CPU_flags <- %s" id + featureset ; + let vendor = + Db.Host.get_cpu_info ~__context ~self:localhost + |> List.assoc Constants.cpu_info_vendor_key + in + let value = + [ + (Constants.cpu_info_vendor_key, vendor) + ; (Constants.cpu_info_features_key, featureset) + ] + in + Db.VM.set_last_boot_CPU_flags ~__context ~self ~value + with e -> + error "Caught %s: while updating VM %s last_boot_CPU_flags" + (Printexc.to_string e) id + ) ; + Xenops_cache.update_vm id info ; if !should_update_allowed_operations then Helpers.call_api_functions ~__context (fun rpc session_id -> XenAPI.VM.update_allowed_operations ~rpc ~session_id ~self @@ -2408,8 +2410,8 @@ let update_vm ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self) : XENOPS) in - let info = try Some (Client.VM.stat dbg id) with _ -> None in - if Option.map snd info <> previous then + let info = try Some (snd (Client.VM.stat dbg id)) with _ -> None in + if info <> previous then update_vm_internal ~__context ~id ~self ~previous ~info ~localhost with e -> error From ba144c2f46942b91b13e4d64aabc53887030310e Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 5 Jun 2025 11:04:14 +0100 Subject: [PATCH 309/492] CP-308253: Instrument `Consumers` Spans in `Message-switch`. Instruments `process`/`Consumers` spans of message-switch service in xenopsd. Signed-off-by: Gabriel Buica --- ocaml/xenopsd/lib/xenopsd.ml | 83 +++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index ccacea0ed8b..5ad6401730b 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -300,29 +300,74 @@ let json_path () = path () ^ ".json" let rpc_fn call = (* Upgrade import_metadata API call *) - let call' = + let call', call_name, span_parent = match (call.Rpc.name, call.Rpc.params) with - | "VM.import_metadata", [debug_info; metadata] -> + | ("VM.import_metadata" as call_name), [Rpc.String debug_info; metadata] -> debug "Upgrading VM.import_metadata" ; - Rpc. - { - name= "VM.import_metadata" - ; params= - [Rpc.Dict [("debug_info", debug_info); ("metadata", metadata)]] - ; is_notification= false - } - | "query", [debug_info; unit_p] -> + let span_parent = + let di = debug_info |> Debug_info.of_string in + di.tracing + in + ( Rpc. + { + name= "VM.import_metadata" + ; params= + [ + Rpc.Dict + [ + ("debug_info", Rpc.String debug_info) + ; ("metadata", metadata) + ] + ] + ; is_notification= false + } + , call_name + , span_parent + ) + | ("query" as call_name), [Rpc.String debug_info; unit_p] -> debug "Upgrading query" ; - Rpc. - { - name= "query" - ; params= [Rpc.Dict [("debug_info", debug_info); ("unit", unit_p)]] - ; is_notification= false - } - | _ -> - call + let span_parent = + let di = debug_info |> Debug_info.of_string in + di.tracing + in + ( Rpc. + { + name= "query" + ; params= + [ + Rpc.Dict + [("debug_info", Rpc.String debug_info); ("unit", unit_p)] + ] + ; is_notification= false + } + , call_name + , span_parent + ) + | call_name, [Rpc.Dict kv_list] -> + let span_parent = + kv_list + |> List.find_map (function + | "debug_info", Rpc.String debug_info -> + let di = debug_info |> Debug_info.of_string in + di.tracing + | _ -> + None + ) + in + (call, call_name, span_parent) + | call_name, _ -> + (call, call_name, None) in - Idl.Exn.server Xenops_server.Server.implementation call' + Tracing.with_tracing + ~attributes: + [ + ("messaging.operation.name", "process") + ; ("messaging.system", "message-switch") + ; ("messaging.destination.name", !Xenops_interface.queue_name) + ] + ~span_kind:Tracing.SpanKind.Consumer ~parent:span_parent + ~name:("process" ^ " " ^ call_name) + @@ fun _ -> Idl.Exn.server Xenops_server.Server.implementation call' let handle_received_fd this_connection = let msg_size = 16384 in From 7a49235a7cae690a93837100e81a25263b7ce97b Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 5 Jun 2025 17:51:59 +0100 Subject: [PATCH 310/492] CP-50001: Instrument `xapi_xenops.ml` -- `dbg` carrier Intruments functions in `xapi_xenops.ml` that carry the traceparent/tracecontext through dbg. `Debug_info.with_dbg` now accepts setting up attributes for spans. Also, instruments `Events_from_xenopsd` to capture the event spans: `subscribe`/`settle`. It's nto straight forward to link them on the same trace. For now the only way they are connnected is having the same `message.id` attribute. Signed-off-by: Gabriel Buica --- ocaml/xapi-idl/lib/debug_info.ml | 5 +++-- ocaml/xapi-idl/lib/debug_info.mli | 3 ++- ocaml/xapi/xapi_xenops.ml | 31 +++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-idl/lib/debug_info.ml b/ocaml/xapi-idl/lib/debug_info.ml index edf3c4979a8..e3845fa080d 100644 --- a/ocaml/xapi-idl/lib/debug_info.ml +++ b/ocaml/xapi-idl/lib/debug_info.ml @@ -76,13 +76,14 @@ let to_log_string t = t.log (* Sets the logging context based on `dbg`. Also adds a new tracing span, linked to the parent span from `dbg`, if available. *) -let with_dbg ?(with_thread = false) ?(module_name = "") ~name ~dbg f = +let with_dbg ?attributes ?(with_thread = false) ?(module_name = "") ~name ~dbg f + = let di = of_string dbg in let f_with_trace () = let name = match module_name with "" -> name | _ -> module_name ^ "." ^ name in - Tracing.with_tracing ~parent:di.tracing ~name (fun span -> + Tracing.with_tracing ?attributes ~parent:di.tracing ~name (fun span -> match span with Some _ -> f {di with tracing= span} | None -> f di ) in diff --git a/ocaml/xapi-idl/lib/debug_info.mli b/ocaml/xapi-idl/lib/debug_info.mli index 9db63471035..2b0244ac94a 100644 --- a/ocaml/xapi-idl/lib/debug_info.mli +++ b/ocaml/xapi-idl/lib/debug_info.mli @@ -23,7 +23,8 @@ val to_string : t -> string val to_log_string : t -> string val with_dbg : - ?with_thread:bool + ?attributes:(string * string) list + -> ?with_thread:bool -> ?module_name:string -> name:string -> dbg:string diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index dbefffb1571..39629e75aee 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -48,6 +48,8 @@ let check_power_state_is ~__context ~self ~expected = (Record_util.vm_power_state_to_lowercase_string expected) let event_wait queue_name dbg ?from p = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let finished = ref false in let event_id = ref from in let module Client = (val make_client queue_name : XENOPS) in @@ -58,6 +60,8 @@ let event_wait queue_name dbg ?from p = done let task_ended queue_name dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in match (Client.TASK.stat dbg id).Task.state with | Task.Completed _ | Task.Failed _ -> @@ -66,6 +70,8 @@ let task_ended queue_name dbg id = false let wait_for_task queue_name dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in let finished = function | Dynamic.Task id' -> @@ -1419,6 +1425,8 @@ let id_of_vm ~__context ~self = Db.VM.get_uuid ~__context ~self let vm_of_id ~__context uuid = Db.VM.get_by_uuid ~__context ~uuid let vm_exists_in_xenopsd queue_name dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in Client.VM.exists dbg id @@ -1793,6 +1801,18 @@ module Events_from_xenopsd = struct let module Client = (val make_client queue_name : XENOPS) in let t = make () in let id = register t in + Debug_info.with_dbg + ~attributes: + [ + ("messaging.operation.name", "subscribe") + ; ("messaging.system", "event") + ; ("messaging.destination.subscription.name", vm_id) + ; ("messaging.message.id", string_of_int id) + ] + ~name:("subscribe" ^ " " ^ queue_name) + ~dbg + @@ fun di -> + let dbg = Debug_info.to_string di in debug "Client.UPDATES.inject_barrier %d" id ; Client.UPDATES.inject_barrier dbg vm_id id ; with_lock t.m (fun () -> @@ -1802,6 +1822,17 @@ module Events_from_xenopsd = struct ) let wakeup queue_name dbg id = + Debug_info.with_dbg + ~attributes: + [ + ("messaging.operation.name", "settle") + ; ("messaging.system", "event") + ; ("messaging.message.id", string_of_int id) + ] + ~name:("settle" ^ " " ^ queue_name) + ~dbg + @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in Client.UPDATES.remove_barrier dbg id ; let t = From d9a3268152a624e0733397180da39c02aae6bed7 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Fri, 6 Jun 2025 10:08:36 +0100 Subject: [PATCH 311/492] CP-50001: Instrument `xapi_xenops.ml` -- `context` carrier Intruments the functions in `xapi_xenops.ml` that carry the traceparent/tracecontext through `context`. Signed-off-by: Gabriel Buica --- ocaml/xapi/xapi_xenops.ml | 94 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 39629e75aee..70102faae44 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -33,7 +33,10 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let rpc_of t x = Rpcmarshal.marshal t.Rpc.Types.ty x +let ( let@ ) f x = f x + let check_power_state_is ~__context ~self ~expected = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if expected <> `Running then Xapi_vm_lifecycle.assert_final_power_state_is ~__context ~self ~expected else @@ -112,6 +115,7 @@ let xenops_vdi_locator_of sr vdi = (Storage_interface.Vdi.string_of vdi) let xenops_vdi_locator ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let sr = Db.VDI.get_SR ~__context ~self in let sr_uuid = Db.SR.get_uuid ~__context ~self:sr in let vdi_location = Db.VDI.get_location ~__context ~self in @@ -120,9 +124,11 @@ let xenops_vdi_locator ~__context ~self = (Storage_interface.Vdi.of_string vdi_location) let disk_of_vdi ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try Some (VDI (xenops_vdi_locator ~__context ~self)) with _ -> None let vdi_of_disk ~__context x = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in match String.split ~limit:2 '/' x with | [sr_uuid; location] -> ( let open Xapi_database.Db_filter_types in @@ -157,6 +163,7 @@ let backend_of_network net = (* PR-1255 *) let backend_of_vif ~__context ~vif = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vif_record = Db.VIF.get_record_internal ~__context ~self:vif in let net = Db.Network.get_record ~__context ~self:vif_record.Db_actions.vIF_network @@ -261,6 +268,7 @@ let firmware_of_vm vm = default_firmware let varstore_rm_with_sandbox ~__context ~vm_uuid f = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let domid = 0 in let chroot, socket_path = @@ -271,6 +279,7 @@ let varstore_rm_with_sandbox ~__context ~vm_uuid f = (fun () -> Xenops_sandbox.Varstore_guard.stop dbg ~domid ~vm_uuid) let nvram_post_clone ~__context ~self ~uuid = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in match Db.VM.get_NVRAM ~__context ~self with | [] -> () @@ -298,6 +307,7 @@ let nvram_post_clone ~__context ~self ~uuid = debug "VM %s: NVRAM changed due to clone" uuid let rtc_timeoffset_of_vm ~__context (vm, vm_t) vbds = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let timeoffset = string vm_t.API.vM_platform "0" Vm_platform.timeoffset in (* If any VDI has on_boot = reset AND has a VDI.other_config:timeoffset then we override the platform/timeoffset. This is needed because windows @@ -371,6 +381,7 @@ let kernel_path filename = Ok real_path let builder_of_vm ~__context (vmref, vm) timeoffset pci_passthrough vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vm in let video_mode = if vgpu then @@ -531,6 +542,7 @@ let builder_of_vm ~__context (vmref, vm) timeoffset pci_passthrough vgpu = Helpers.internal_error "invalid boot configuration" let list_net_sriov_vf_pcis ~__context ~vm = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in vm.API.vM_VIFs |> List.filter (fun self -> Db.VIF.get_currently_attached ~__context ~self) |> List.filter_map (fun vif -> @@ -545,6 +557,7 @@ module MD = struct (** Convert between xapi DB records and xenopsd records *) let of_vbd ~__context ~vm ~vbd = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let hvm = match vm.API.vM_domain_type with | `hvm -> @@ -697,6 +710,7 @@ module MD = struct } let of_pvs_proxy ~__context vif proxy = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let site = Db.PVS_proxy.get_site ~__context ~self:proxy in let site_uuid = Db.PVS_site.get_uuid ~__context ~self:site in let servers = Db.PVS_site.get_servers ~__context ~self:site in @@ -716,6 +730,7 @@ module MD = struct (site_uuid, servers, interface) let of_vif ~__context ~vm ~vif:(vif_ref, vif) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let net = Db.Network.get_record ~__context ~self:vif.API.vIF_network in let net_mtu = Int64.to_int net.API.network_MTU in let mtu = @@ -859,6 +874,7 @@ module MD = struct } let pcis_of_vm ~__context (vmref, vm) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vgpu_pcidevs = Vgpuops.list_pcis_for_passthrough ~__context ~vm:vmref in let devs = List.concat_map (fun (_, dev) -> dev) (Pciops.sort_pcidevs vgpu_pcidevs) @@ -889,6 +905,7 @@ module MD = struct devs let get_target_pci_address ~__context vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let pgpu = if Db.is_valid_ref __context @@ -917,6 +934,7 @@ module MD = struct * is passed trough completely. *) let sriov_vf ~__context vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let is_sriov () = let ty = vgpu.Db_actions.vGPU_type in match Db.VGPU_type.get_implementation ~__context ~self:ty with @@ -937,6 +955,7 @@ module MD = struct Xenops_interface.Pci.address_of_string str |> fun addr -> Some addr let of_nvidia_vgpu ~__context vm vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vgpu in (* Get the PCI address. *) let physical_pci_address = get_target_pci_address ~__context vgpu in @@ -973,6 +992,7 @@ module MD = struct } let of_gvt_g_vgpu ~__context vm vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vgpu in (* Get the PCI address. *) let physical_pci_address = get_target_pci_address ~__context vgpu in @@ -1013,6 +1033,7 @@ module MD = struct failwith "Intel GVT-g settings invalid" let of_mxgpu_vgpu ~__context vm vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vgpu in (* Get the PCI address. *) let physical_pci_address = get_target_pci_address ~__context vgpu in @@ -1049,6 +1070,7 @@ module MD = struct failwith "AMD MxGPU settings invalid" let vgpus_of_vm ~__context (_, vm) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in List.fold_left (fun acc vgpu -> let vgpu_record = Db.VGPU.get_record_internal ~__context ~self:vgpu in @@ -1070,6 +1092,7 @@ module MD = struct [] vm.API.vM_VGPUs let of_vusb ~__context ~vm ~pusb = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vusb in try let path = pusb.API.pUSB_path in @@ -1093,6 +1116,7 @@ module MD = struct raise e let vusbs_of_vm ~__context (_, vm) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in vm.API.vM_VUSBs |> List.map (fun self -> Db.VUSB.get_record ~__context ~self) |> List.filter (fun self -> self.API.vUSB_currently_attached) @@ -1102,6 +1126,7 @@ module MD = struct |> List.map (fun pusb -> of_vusb ~__context ~vm ~pusb) let of_vm ~__context (vmref, vm) vbds pci_passthrough vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let on_action_behaviour = function | `preserve -> [Vm.Pause] @@ -1357,6 +1382,7 @@ module Guest_agent_features = struct auto_update_enabled @ auto_update_url let of_config ~__context config = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Features in let vss = let name = Features.name_of_feature VSS in @@ -1376,6 +1402,7 @@ module Guest_agent_features = struct end let apply_guest_agent_config ~__context config = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let features = Guest_agent_features.of_config ~__context config in let module Client = (val make_client (default_xenopsd ()) : XENOPS) in @@ -1383,6 +1410,7 @@ let apply_guest_agent_config ~__context config = (* Create an instance of Metadata.t, suitable for uploading to the xenops service *) let create_metadata ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VM.get_record ~__context ~self in let vbds = List.filter @@ -1635,6 +1663,7 @@ module Xenopsd_metadata = struct (* If the VM has Xapi_globs.persist_xenopsd_md -> filename in its other_config, we persist the xenopsd metadata to a well-known location in the filesystem *) let maybe_persist_md ~__context ~self md = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let oc = Db.VM.get_other_config ~__context ~self in if List.mem_assoc Xapi_globs.persist_xenopsd_md oc then let file_path = @@ -1655,6 +1684,7 @@ module Xenopsd_metadata = struct ) let push ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in with_lock metadata_m (fun () -> let md = create_metadata ~__context ~self in let txt = md |> rpc_of Metadata.t |> Jsonrpc.to_string in @@ -1671,6 +1701,7 @@ module Xenopsd_metadata = struct ) let delete_nolock ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in info "xenops: VM.remove %s" id ; try @@ -1695,6 +1726,7 @@ module Xenopsd_metadata = struct (* Unregisters a VM with xenopsd, and cleans up metadata and caches *) let pull ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in with_lock metadata_m (fun () -> info "xenops: VM.export_metadata %s" id ; let dbg = Context.string_of_task_and_tracing __context in @@ -1725,9 +1757,11 @@ module Xenopsd_metadata = struct ) let delete ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in with_lock metadata_m (fun () -> delete_nolock ~__context id) let update ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let id = id_of_vm ~__context ~self in let queue_name = queue_of_vm ~__context ~self in with_lock metadata_m (fun () -> @@ -2427,6 +2461,7 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = ) let update_vm ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try if Events_from_xenopsd.are_suppressed id then debug "xenopsd event: ignoring event for VM (VM %s migrating away)" id @@ -2449,6 +2484,7 @@ let update_vm ~__context id = (string_of_exn e) let update_vbd ~__context (id : string * string) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VBD (VM %s migrating away)" @@ -2551,6 +2587,7 @@ let update_vbd ~__context (id : string * string) = error "xenopsd event: Caught %s while updating VBD" (string_of_exn e) let update_vif ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VIF (VM %s migrating away)" @@ -2659,6 +2696,7 @@ let update_vif ~__context id = error "xenopsd event: Caught %s while updating VIF" (string_of_exn e) let update_pci ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for PCI (VM %s migrating away)" @@ -2727,6 +2765,7 @@ let update_pci ~__context id = error "xenopsd event: Caught %s while updating PCI" (string_of_exn e) let update_vgpu ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VGPU (VM %s migrating away)" @@ -2791,6 +2830,7 @@ let update_vgpu ~__context id = error "xenopsd event: Caught %s while updating VGPU" (string_of_exn e) let update_vusb ~__context (id : string * string) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VUSB (VM %s migrating away)" @@ -2846,14 +2886,17 @@ let unwrap x = raise Not_a_xenops_task let register_task __context ?cancellable queue_name id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in TaskHelper.register_task __context ?cancellable (wrap queue_name id) ; id let unregister_task __context queue_name id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in TaskHelper.unregister_task __context (wrap queue_name id) ; id let update_task ~__context queue_name id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try let self = TaskHelper.id_to_task_exn (TaskHelper.Xenops (queue_name, id)) in (* throws Not_found *) @@ -2887,6 +2930,7 @@ let update_task ~__context queue_name id = error "xenopsd event: Caught %s while updating task" (string_of_exn e) let rec events_watch ~__context cancel queue_name from = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in if Xapi_fist.delay_xenopsd_event_threads () then Thread.delay 30.0 ; let module Client = (val make_client queue_name : XENOPS) in @@ -2954,6 +2998,7 @@ let events_from_xenopsd queue_name = ) let refresh_vm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let id = id_of_vm ~__context ~self in info "xenops: UPDATES.refresh_vm %s" id ; let dbg = Context.string_of_task_and_tracing __context in @@ -2963,6 +3008,7 @@ let refresh_vm ~__context ~self = Events_from_xenopsd.wait queue_name dbg id () let resync_resident_on ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let localhost = Helpers.get_localhost ~__context in let domain0 = Helpers.get_domain_zero ~__context in @@ -3105,6 +3151,7 @@ let resync_resident_on ~__context = xapi_vms_not_in_xenopsd let resync_all_vms ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in (* This should now be correct *) let localhost = Helpers.get_localhost ~__context in let domain0 = Helpers.get_domain_zero ~__context in @@ -3116,11 +3163,13 @@ let resync_all_vms ~__context = (* experimental feature for hard-pinning vcpus *) let hard_numa_enabled ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let pool = Helpers.get_pool ~__context in let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in List.assoc_opt "restrict_hard_numa" restrictions = Some "false" let set_numa_affinity_policy ~__context ~value = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task __context in let open Xapi_xenops_queue in let module Client = (val make_client (default_xenopsd ()) : XENOPS) in @@ -3139,6 +3188,7 @@ let set_numa_affinity_policy ~__context ~value = Client.HOST.set_numa_affinity_policy dbg value let on_xapi_restart ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let host = Helpers.get_localhost ~__context in let value = Db.Host.get_numa_affinity_policy ~__context ~self:host in info "Setting NUMA affinity policy in xenopsd on startup to %s" @@ -3162,6 +3212,7 @@ let on_xapi_restart ~__context = apply_guest_agent_config ~__context config let assert_resident_on ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let localhost = Helpers.get_localhost ~__context in if not (Db.VM.get_resident_on ~__context ~self = localhost) then Helpers.internal_error "the VM %s is not resident on this host" @@ -3494,6 +3545,7 @@ let transform_xenops_exn ~__context ~vm queue_name f = should not be any other suppression going on. *) let set_resident_on ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let id = id_of_vm ~__context ~self in debug "VM %s set_resident_on" id ; let localhost = Helpers.get_localhost ~__context in @@ -3508,6 +3560,7 @@ let set_resident_on ~__context ~self = Xenopsd_metadata.update ~__context ~self let update_debug_info __context t = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let task = Context.get_task_id __context in let debug_info = List.map (fun (k, v) -> ("debug_info:" ^ k, v)) t.Task.debug_info @@ -3522,6 +3575,7 @@ let update_debug_info __context t = debug_info let sync_with_task_result __context ?cancellable queue_name x = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in x |> register_task __context ?cancellable queue_name @@ -3533,6 +3587,7 @@ let sync_with_task __context ?cancellable queue_name x = sync_with_task_result __context ?cancellable queue_name x |> ignore let sync __context queue_name x = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in x |> wait_for_task queue_name dbg @@ -3540,6 +3595,7 @@ let sync __context queue_name x = |> ignore let pause ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3553,6 +3609,7 @@ let pause ~__context ~self = ) let unpause ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3565,6 +3622,7 @@ let unpause ~__context ~self = ) let request_rdp ~__context ~self enabled = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3577,6 +3635,7 @@ let request_rdp ~__context ~self enabled = ) let run_script ~__context ~self script = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3593,6 +3652,7 @@ let run_script ~__context ~self script = ) let set_xenstore_data ~__context ~self xsdata = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3604,6 +3664,7 @@ let set_xenstore_data ~__context ~self xsdata = ) let set_vcpus ~__context ~self n = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3631,6 +3692,7 @@ let set_vcpus ~__context ~self n = ) let set_shadow_multiplier ~__context ~self target = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3660,6 +3722,7 @@ let set_shadow_multiplier ~__context ~self target = ) let set_memory_dynamic_range ~__context ~self min max = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3672,6 +3735,7 @@ let set_memory_dynamic_range ~__context ~self min max = ) let maybe_refresh_vm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let queue_name = queue_of_vm ~__context ~self in let id = id_of_vm ~__context ~self in @@ -3684,6 +3748,7 @@ let maybe_refresh_vm ~__context ~self = ) let start ~__context ~self paused force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> @@ -3745,6 +3810,7 @@ let start ~__context ~self paused force = ) let start ~__context ~self paused force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> try start ~__context ~self paused force @@ -3770,6 +3836,7 @@ let start ~__context ~self paused force = ) let reboot ~__context ~self timeout = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> assert_resident_on ~__context ~self ; @@ -3792,6 +3859,7 @@ let reboot ~__context ~self timeout = ) let shutdown ~__context ~self timeout = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> assert_resident_on ~__context ~self ; @@ -3825,6 +3893,7 @@ let shutdown ~__context ~self timeout = ) let suspend ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> assert_resident_on ~__context ~self ; @@ -3901,6 +3970,7 @@ let suspend ~__context ~self = ) let resume ~__context ~self ~start_paused ~force:_ = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let queue_name = queue_of_vm ~__context ~self in let vm_id = id_of_vm ~__context ~self in @@ -3954,6 +4024,7 @@ let resume ~__context ~self ~start_paused ~force:_ = ~expected:(if start_paused then `Paused else `Running) let s3suspend ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3965,6 +4036,7 @@ let s3suspend ~__context ~self = ) let s3resume ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3976,12 +4048,14 @@ let s3resume ~__context ~self = ) let md_of_vbd ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in MD.of_vbd ~__context ~vm:(Db.VM.get_record ~__context ~self:vm) ~vbd:(Db.VBD.get_record ~__context ~self) let vbd_plug ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let vm_id = id_of_vm ~__context ~self:vm in let queue_name = queue_of_vm ~__context ~self:vm in @@ -4008,6 +4082,7 @@ let vbd_plug ~__context ~self = ) let vbd_unplug ~__context ~self force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4037,6 +4112,7 @@ let vbd_unplug ~__context ~self force = ) let vbd_eject_hvm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4059,6 +4135,7 @@ let vbd_eject_hvm ~__context ~self = ) let vbd_insert_hvm ~__context ~self ~vdi = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4084,6 +4161,7 @@ let vbd_insert_hvm ~__context ~self ~vdi = ) let has_qemu ~__context ~vm = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let id = Db.VM.get_uuid ~__context ~self:vm in let queue_name = queue_of_vm ~__context ~self:vm in @@ -4092,10 +4170,12 @@ let has_qemu ~__context ~vm = state.Vm.domain_type = Domain_HVM let ejectable ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in has_qemu ~__context ~vm let vbd_eject ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if ejectable ~__context ~self then vbd_eject_hvm ~__context ~self else ( @@ -4105,6 +4185,7 @@ let vbd_eject ~__context ~self = ) let vbd_insert ~__context ~self ~vdi = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if ejectable ~__context ~self then vbd_insert_hvm ~__context ~self ~vdi else ( @@ -4114,12 +4195,14 @@ let vbd_insert ~__context ~self ~vdi = ) let md_of_vif ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in MD.of_vif ~__context ~vm:(Db.VM.get_record ~__context ~self:vm) ~vif:(self, Db.VIF.get_record ~__context ~self) let vif_plug ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let vm_id = id_of_vm ~__context ~self:vm in let queue_name = queue_of_vm ~__context ~self:vm in @@ -4148,6 +4231,7 @@ let vif_plug ~__context ~self = ) let vif_set_locking_mode ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4162,6 +4246,7 @@ let vif_set_locking_mode ~__context ~self = ) let vif_set_pvs_proxy ~__context ~self creating = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4177,6 +4262,7 @@ let vif_set_pvs_proxy ~__context ~self creating = ) let vif_unplug ~__context ~self force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4199,6 +4285,7 @@ let vif_unplug ~__context ~self force = ) let vif_move ~__context ~self _network = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4225,6 +4312,7 @@ let vif_move ~__context ~self _network = ) let vif_set_ipv4_configuration ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4241,6 +4329,7 @@ let vif_set_ipv4_configuration ~__context ~self = ) let vif_set_ipv6_configuration ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4257,6 +4346,7 @@ let vif_set_ipv6_configuration ~__context ~self = ) let task_cancel ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try let queue_name, id = TaskHelper.task_to_id_exn self |> unwrap in let module Client = (val make_client queue_name : XENOPS) in @@ -4272,6 +4362,7 @@ let task_cancel ~__context ~self = false let md_of_vusb ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VUSB.get_VM ~__context ~self in let usb_group = Db.VUSB.get_USB_group ~__context ~self in let pusb = Helpers.get_first_pusb ~__context usb_group in @@ -4279,6 +4370,7 @@ let md_of_vusb ~__context ~self = MD.of_vusb ~__context ~vm:(Db.VM.get_record ~__context ~self:vm) ~pusb:pusbr let vusb_unplug_hvm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VUSB.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4295,10 +4387,12 @@ let vusb_unplug_hvm ~__context ~self = ) let vusb_plugable ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VUSB.get_VM ~__context ~self in has_qemu ~__context ~vm let vusb_unplug ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if vusb_plugable ~__context ~self then vusb_unplug_hvm ~__context ~self else From cb64cd6571ea50ea997c1e954def3527979cb7bc Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Fri, 6 Jun 2025 15:20:24 +0100 Subject: [PATCH 312/492] CP-50001: Instrument `Xapi_xenops.events_watch` This function is called recursively and the context span is only closed on failure. This makes it hard to read the trace. Therefore, I am resetting the context tracing each recursion step and now each recursion step will have its own trace. Signed-off-by: Gabriel Buica --- ocaml/xapi/xapi_xenops.ml | 111 ++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 53 deletions(-) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 70102faae44..0dc0f2780c9 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2930,60 +2930,65 @@ let update_task ~__context queue_name id = error "xenopsd event: Caught %s while updating task" (string_of_exn e) let rec events_watch ~__context cancel queue_name from = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in - let dbg = Context.string_of_task_and_tracing __context in - if Xapi_fist.delay_xenopsd_event_threads () then Thread.delay 30.0 ; - let module Client = (val make_client queue_name : XENOPS) in - let barriers, events, next = Client.UPDATES.get dbg from None in - if !cancel then - raise (Api_errors.Server_error (Api_errors.task_cancelled, [])) ; - let done_events = ref [] in - let already_done x = List.mem x !done_events in - let add_event x = done_events := x :: !done_events in - let do_updates l = - let open Dynamic in - List.iter - (fun ev -> - debug "Processing event: %s" - (ev |> Dynamic.rpc_of_id |> Jsonrpc.to_string) ; - if already_done ev then - debug "Skipping (already processed this round)" - else ( - add_event ev ; - match ev with - | Vm id -> - debug "xenops event on VM %s" id ; - update_vm ~__context id - | Vbd id -> - debug "xenops event on VBD %s.%s" (fst id) (snd id) ; - update_vbd ~__context id - | Vif id -> - debug "xenops event on VIF %s.%s" (fst id) (snd id) ; - update_vif ~__context id - | Pci id -> - debug "xenops event on PCI %s.%s" (fst id) (snd id) ; - update_pci ~__context id - | Vgpu id -> - debug "xenops event on VGPU %s.%s" (fst id) (snd id) ; - update_vgpu ~__context id - | Vusb id -> - debug "xenops event on VUSB %s.%s" (fst id) (snd id) ; - update_vusb ~__context id - | Task id -> - debug "xenops event on Task %s" id ; - update_task ~__context queue_name id - ) - ) - l - in - List.iter - (fun (id, b_events) -> - debug "Processing barrier %d" id ; - do_updates b_events ; - Events_from_xenopsd.wakeup queue_name dbg id + Context.complete_tracing __context ; + let next = + Context.with_tracing ~__context __FUNCTION__ (fun __context -> + let dbg = Context.string_of_task_and_tracing __context in + if Xapi_fist.delay_xenopsd_event_threads () then Thread.delay 30.0 ; + let module Client = (val make_client queue_name : XENOPS) in + let barriers, events, next = Client.UPDATES.get dbg from None in + if !cancel then + raise (Api_errors.Server_error (Api_errors.task_cancelled, [])) ; + let done_events = ref [] in + let already_done x = List.mem x !done_events in + let add_event x = done_events := x :: !done_events in + let do_updates l = + let open Dynamic in + List.iter + (fun ev -> + debug "Processing event: %s" + (ev |> Dynamic.rpc_of_id |> Jsonrpc.to_string) ; + if already_done ev then + debug "Skipping (already processed this round)" + else ( + add_event ev ; + match ev with + | Vm id -> + debug "xenops event on VM %s" id ; + update_vm ~__context id + | Vbd id -> + debug "xenops event on VBD %s.%s" (fst id) (snd id) ; + update_vbd ~__context id + | Vif id -> + debug "xenops event on VIF %s.%s" (fst id) (snd id) ; + update_vif ~__context id + | Pci id -> + debug "xenops event on PCI %s.%s" (fst id) (snd id) ; + update_pci ~__context id + | Vgpu id -> + debug "xenops event on VGPU %s.%s" (fst id) (snd id) ; + update_vgpu ~__context id + | Vusb id -> + debug "xenops event on VUSB %s.%s" (fst id) (snd id) ; + update_vusb ~__context id + | Task id -> + debug "xenops event on Task %s" id ; + update_task ~__context queue_name id + ) + ) + l + in + List.iter + (fun (id, b_events) -> + debug "Processing barrier %d" id ; + do_updates b_events ; + Events_from_xenopsd.wakeup queue_name dbg id + ) + barriers ; + do_updates events ; + next ) - barriers ; - do_updates events ; + in events_watch ~__context cancel queue_name (Some next) let events_from_xenopsd queue_name = From 7bede8387a58d97de61e0f11b4d96c7cda912824 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Wed, 18 Jun 2025 13:19:38 +0100 Subject: [PATCH 313/492] CP-50001: Add attributes to updates in `events_watch` Signed-off-by: Gabriel Buica --- ocaml/xapi/context.ml | 3 ++- ocaml/xapi/context.mli | 7 ++++++- ocaml/xapi/xapi_xenops.ml | 42 ++++++++++++++++++++++++++++++++------- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index f03ce60e2a0..a49c8ecd1bb 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -532,11 +532,12 @@ let with_forwarded_task ?http_other_config ?session_id ?origin task_id f = in finally_destroy_context ~__context f -let with_tracing ?originator ~__context name f = +let with_tracing ?(attributes = []) ?originator ~__context name f = let open Tracing in let parent = __context.tracing in let span_attributes = Attributes.attr_of_originator originator + @ attributes @ make_attributes ~task_id:__context.task_id ?session_id:__context.session_id () in diff --git a/ocaml/xapi/context.mli b/ocaml/xapi/context.mli index 281f67ca4b2..ac3250f8569 100644 --- a/ocaml/xapi/context.mli +++ b/ocaml/xapi/context.mli @@ -185,6 +185,11 @@ val with_forwarded_task : then ensure [__context] is destroyed.*) val with_tracing : - ?originator:string -> __context:t -> string -> (t -> 'a) -> 'a + ?attributes:(string * string) list + -> ?originator:string + -> __context:t + -> string + -> (t -> 'a) + -> 'a val set_client_span : t -> Tracing.Span.t option diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 0dc0f2780c9..5d9a1cc0a41 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2461,7 +2461,11 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = ) let update_vm ~__context id = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed id then debug "xenopsd event: ignoring event for VM (VM %s migrating away)" id @@ -2484,7 +2488,11 @@ let update_vm ~__context id = (string_of_exn e) let update_vbd ~__context (id : string * string) = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vbd", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VBD (VM %s migrating away)" @@ -2587,7 +2595,11 @@ let update_vbd ~__context (id : string * string) = error "xenopsd event: Caught %s while updating VBD" (string_of_exn e) let update_vif ~__context id = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vif", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VIF (VM %s migrating away)" @@ -2696,7 +2708,11 @@ let update_vif ~__context id = error "xenopsd event: Caught %s while updating VIF" (string_of_exn e) let update_pci ~__context id = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.pci", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for PCI (VM %s migrating away)" @@ -2765,7 +2781,11 @@ let update_pci ~__context id = error "xenopsd event: Caught %s while updating PCI" (string_of_exn e) let update_vgpu ~__context id = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vgpu", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VGPU (VM %s migrating away)" @@ -2830,7 +2850,11 @@ let update_vgpu ~__context id = error "xenopsd event: Caught %s while updating VGPU" (string_of_exn e) let update_vusb ~__context (id : string * string) = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vusb", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VUSB (VM %s migrating away)" @@ -2896,7 +2920,11 @@ let unregister_task __context queue_name id = id let update_task ~__context queue_name id = - let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.task", id)] + ~__context __FUNCTION__ + in try let self = TaskHelper.id_to_task_exn (TaskHelper.Xenops (queue_name, id)) in (* throws Not_found *) From 9d2468978f1ea067863dffee9e065c8ac40b0632 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 16 Jun 2025 07:31:13 +0100 Subject: [PATCH 314/492] CA-406770: Improve error message 1. WLB request will raise `wlb_authentication_failed` when catching `Http_client.Http_error`. But actually only error code 401 and 403 should raise this type exception. For other error code, raise `wlb_connection_reset`. Also print the detail error code and message. 2. `message_forwarding` raises same error for `Http_request_rejected` and `Connection_reset` so we don't know which exception actually be raised. Print detailed logs for these 2 exceptions. Signed-off-by: Bengang Yuan --- ocaml/xapi/message_forwarding.ml | 27 +++++++++------------------ ocaml/xapi/workload_balancing.ml | 15 +++++++++++++-- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index d1773e4f0c6..b52aaaa20ec 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -143,24 +143,15 @@ let do_op_on_common ~local_fn ~__context ~host ~remote_fn f = let task_opt = set_forwarding_on_task ~__context ~host in f __context host task_opt remote_fn with - | Xmlrpc_client.Connection_reset | Http_client.Http_request_rejected _ -> - warn - "Caught Connection_reset when contacting host %s; converting into \ - CANNOT_CONTACT_HOST" - (Ref.string_of host) ; - raise - (Api_errors.Server_error - (Api_errors.cannot_contact_host, [Ref.string_of host]) - ) - | Xmlrpc_client.Stunnel_connection_failed -> - warn - "Caught Stunnel_connection_failed while contacting host %s; converting \ - into CANNOT_CONTACT_HOST" - (Ref.string_of host) ; - raise - (Api_errors.Server_error - (Api_errors.cannot_contact_host, [Ref.string_of host]) - ) + | ( Xmlrpc_client.Connection_reset + | Http_client.Http_request_rejected _ + | Xmlrpc_client.Stunnel_connection_failed ) as e + -> + error + "%s: Caught %s when contacting host %s; converting into \ + CANNOT_CONTACT_HOST" + __FUNCTION__ (Printexc.to_string e) (Ref.string_of host) ; + raise Api_errors.(Server_error (cannot_contact_host, [Ref.string_of host])) (* regular forwarding fn, with session and live-check. Used by most calls, will use the connection cache. *) diff --git a/ocaml/xapi/workload_balancing.ml b/ocaml/xapi/workload_balancing.ml index 27fa184da84..7108032dbf7 100644 --- a/ocaml/xapi/workload_balancing.ml +++ b/ocaml/xapi/workload_balancing.ml @@ -329,8 +329,19 @@ let wlb_request ~__context ~host ~port ~auth ~meth ~params ~handler ~enable_log with | Remote_requests.Timed_out -> raise_timeout timeout - | Http_client.Http_request_rejected _ | Http_client.Http_error _ -> - raise_authentication_failed () + | Http_client.Http_error (code, _) as e -> ( + error "%s: Caught %s when contacting WLB" __FUNCTION__ + (Printexc.to_string e) ; + match code with + | "401" | "403" -> + raise_authentication_failed () + | _ -> + raise_connection_reset () + ) + | Http_client.Http_request_rejected _ as e -> + error "%s: Caught %s when contacting WLB" __FUNCTION__ + (Printexc.to_string e) ; + raise_connection_reset () | Xmlrpc_client.Connection_reset -> raise_connection_reset () | Stunnel.Stunnel_verify_error reason -> From ad5cbe5e3518863ce380f9509d1d7a4eddd8db31 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 23 Jun 2025 10:06:45 +0100 Subject: [PATCH 315/492] xenopsd: Remove data/updated from the list of watched paths "data/updated" is not read or used anywhere in xenopsd or xapi: * xapi_guest_agent's last_updated field is just Unix.gettimeofday (). * xapi_xenops removes "data/updated" from the guest agent state altogether before checking if it's changed: ``` let ignored_keys = ["data/meminfo_free"; "data/updated"; "data/update_cnt"] ``` So there is no need to watch this path at all. This greatly reduces unnecessary traffic between xapi and xenopsd, since any VM with a guest agent would write to data/updated once every 60 seconds, which would generate a Dynamic.Vm event, making xapi call xenopsd's VM.stat to rescan the domain's xenstore tree and perform several hypercalls. Almost always, this would be completely unnecessary as nothing else about the VM would change, but a lot of work would be done anyhow. Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/xc/xenops_server_xen.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index cdc54d32873..b8577746b88 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -4935,7 +4935,6 @@ module Actions = struct let open Printf in [ sprintf "/local/domain/%d/attr" domid - ; sprintf "/local/domain/%d/data/updated" domid ; sprintf "/local/domain/%d/data/ts" domid ; sprintf "/local/domain/%d/data/service" domid ; sprintf "/local/domain/%d/memory/target" domid From 5d0fb87b4edff8c076eda285c576c752ad4aba11 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 23 Jun 2025 11:05:21 +0100 Subject: [PATCH 316/492] xapi_xenops: Simplify update_* functions Drop the first member of the (X.t * X.state) tuple coming from X.stat immediately as it's never used. This removes the need for several 'snd info', 'Option.iter (fun (_, state) ->) info' constructs. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_xenops.ml | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 4b7b738b000..8be594f9657 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2433,8 +2433,8 @@ let update_vbd ~__context (id : string * string) = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VBD.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VBD.stat dbg id)) with _ -> None in + if info <> previous then ( let vbds = Db.VM.get_VBDs ~__context ~self:vm in let vbdrs = List.map @@ -2469,7 +2469,7 @@ let update_vbd ~__context (id : string * string) = debug "VBD %s.%s matched device %s" (fst id) (snd id) vbd_r.API.vBD_userdevice ; Option.iter - (fun (_, state) -> + (fun state -> let currently_attached = state.Vbd.plugged || state.Vbd.active in debug "xenopsd event: Updating VBD %s.%s device <- %s; \ @@ -2512,7 +2512,7 @@ let update_vbd ~__context (id : string * string) = ) ) info ; - Xenops_cache.update_vbd id (Option.map snd info) ; + Xenops_cache.update_vbd id info ; Xapi_vbd_helpers.update_allowed_operations ~__context ~self:vbd ; if not (Db.VBD.get_empty ~__context ~self:vbd) then let vdi = Db.VBD.get_VDI ~__context ~self:vbd in @@ -2535,8 +2535,8 @@ let update_vif ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VIF.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VIF.stat dbg id)) with _ -> None in + if info <> previous then ( let vifs = Db.VM.get_VIFs ~__context ~self:vm in let vifrs = List.map @@ -2547,7 +2547,7 @@ let update_vif ~__context id = List.find (fun (_, vifr) -> vifr.API.vIF_device = snd id) vifrs in Option.iter - (fun (_, state) -> + (fun state -> if not (state.Vif.plugged || state.Vif.active) then ( ( try Xapi_network.deregister_vif ~__context vif with e -> @@ -2623,7 +2623,7 @@ let update_vif ~__context id = ~value:(state.plugged || state.active) ) info ; - Xenops_cache.update_vif id (Option.map snd info) ; + Xenops_cache.update_vif id info ; Xapi_vif_helpers.update_allowed_operations ~__context ~self:vif ) with e -> @@ -2643,8 +2643,8 @@ let update_pci ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.PCI.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.PCI.stat dbg id)) with _ -> None in + if info <> previous then ( let pcis = Db.Host.get_PCIs ~__context ~self:localhost in let pcirs = List.map @@ -2661,7 +2661,7 @@ let update_pci ~__context id = List.mem vm (Db.PCI.get_attached_VMs ~__context ~self:pci) in Option.iter - (fun (_, state) -> + (fun state -> debug "xenopsd event: Updating PCI %s.%s currently_attached <- %b" (fst id) (snd id) state.Pci.plugged ; if attached_in_db && not state.Pci.plugged then @@ -2692,7 +2692,7 @@ let update_pci ~__context id = vgpu_opt ) info ; - Xenops_cache.update_pci id (Option.map snd info) + Xenops_cache.update_pci id info ) with e -> error "xenopsd event: Caught %s while updating PCI" (string_of_exn e) @@ -2711,8 +2711,8 @@ let update_vgpu ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VGPU.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VGPU.stat dbg id)) with _ -> None in + if info <> previous then ( let vgpus = Db.VM.get_VGPUs ~__context ~self:vm in let vgpu_records = List.map @@ -2733,7 +2733,7 @@ let update_vgpu ~__context id = = None then Option.iter - (fun (_, state) -> + (fun state -> ( if state.Vgpu.plugged then let scheduled = Db.VGPU.get_scheduled_to_be_resident_on ~__context @@ -2756,7 +2756,7 @@ let update_vgpu ~__context id = ) ) info ; - Xenops_cache.update_vgpu id (Option.map snd info) + Xenops_cache.update_vgpu id info ) with e -> error "xenopsd event: Caught %s while updating VGPU" (string_of_exn e) @@ -2775,8 +2775,8 @@ let update_vusb ~__context (id : string * string) = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VUSB.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VUSB.stat dbg id)) with _ -> None in + if info <> previous then ( let pusb, _ = Db.VM.get_VUSBs ~__context ~self:vm |> List.map (fun self -> Db.VUSB.get_USB_group ~__context ~self) @@ -2791,7 +2791,7 @@ let update_vusb ~__context (id : string * string) = let usb_group = Db.PUSB.get_USB_group ~__context ~self:pusb in let vusb = Helpers.get_first_vusb ~__context usb_group in Option.iter - (fun (_, state) -> + (fun state -> debug "xenopsd event: Updating USB %s.%s; plugged <- %b" (fst id) (snd id) state.Vusb.plugged ; let currently_attached = state.Vusb.plugged in @@ -2799,7 +2799,7 @@ let update_vusb ~__context (id : string * string) = ~value:currently_attached ) info ; - Xenops_cache.update_vusb id (Option.map snd info) ; + Xenops_cache.update_vusb id info ; Xapi_vusb_helpers.update_allowed_operations ~__context ~self:vusb ) with e -> From acb973204e52f3a196801e56d479c56381588723 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 20 Jun 2025 16:58:56 +0100 Subject: [PATCH 317/492] CP-308201: make unimplemented function more obvious Rename the "u" function used across storage and observer_helpers to make its meaning more obvious and use __FUNCTION__ instead of hardcoding the function name. Signed-off-by: Steven Woods --- ocaml/xapi-idl/lib/observer_skeleton.ml | 30 ++-- ocaml/xapi-idl/storage/storage_interface.ml | 2 + ocaml/xapi-idl/storage/storage_skeleton.ml | 190 ++++++++++++-------- ocaml/xapi/storage_mux.ml | 15 +- ocaml/xapi/storage_smapiv1_migrate.ml | 6 +- ocaml/xapi/storage_smapiv1_wrapper.ml | 25 +-- ocaml/xapi/storage_smapiv3_migrate.ml | 19 +- ocaml/xenopsd/lib/xenops_server_skeleton.ml | 85 +++++---- 8 files changed, 203 insertions(+), 169 deletions(-) diff --git a/ocaml/xapi-idl/lib/observer_skeleton.ml b/ocaml/xapi-idl/lib/observer_skeleton.ml index 8cf5e2f5221..e53a45f958c 100644 --- a/ocaml/xapi-idl/lib/observer_skeleton.ml +++ b/ocaml/xapi-idl/lib/observer_skeleton.ml @@ -13,36 +13,36 @@ *) [@@@ocaml.warning "-27"] -let u x = raise Observer_helpers.(Observer_error (Errors.Unimplemented x)) +let unimplemented x = + raise Observer_helpers.(Observer_error (Errors.Unimplemented x)) module Observer = struct type context = unit let create ctx ~dbg ~uuid ~name_label ~attributes ~endpoints ~enabled = - u "Observer.create" + unimplemented __FUNCTION__ - let destroy ctx ~dbg ~uuid = u "Observer.destroy" + let destroy ctx ~dbg ~uuid = unimplemented __FUNCTION__ - let set_enabled ctx ~dbg ~uuid ~enabled = u "Observer.set_enabled" + let set_enabled ctx ~dbg ~uuid ~enabled = unimplemented __FUNCTION__ - let set_attributes ctx ~dbg ~uuid ~attributes = u "Observer.set_attributes" + let set_attributes ctx ~dbg ~uuid ~attributes = unimplemented __FUNCTION__ - let set_endpoints ctx ~dbg ~uuid ~endpoints = u "Observer.set_endpoints" + let set_endpoints ctx ~dbg ~uuid ~endpoints = unimplemented __FUNCTION__ - let init ctx ~dbg = u "Observer.init" + let init ctx ~dbg = unimplemented __FUNCTION__ - let set_trace_log_dir ctx ~dbg ~dir = u "Observer.set_trace_log_dir" + let set_trace_log_dir ctx ~dbg ~dir = unimplemented __FUNCTION__ - let set_export_interval ctx ~dbg ~interval = u "Observer.set_export_interval" + let set_export_interval ctx ~dbg ~interval = unimplemented __FUNCTION__ - let set_max_spans ctx ~dbg ~spans = u "Observer.set_max_spans" + let set_max_spans ctx ~dbg ~spans = unimplemented __FUNCTION__ - let set_max_traces ctx ~dbg ~traces = u "Observer.set_max_traces" + let set_max_traces ctx ~dbg ~traces = unimplemented __FUNCTION__ - let set_max_file_size ctx ~dbg ~file_size = u "Observer.set_max_file_size" + let set_max_file_size ctx ~dbg ~file_size = unimplemented __FUNCTION__ - let set_host_id ctx ~dbg ~host_id = u "Observer.set_host_id" + let set_host_id ctx ~dbg ~host_id = unimplemented __FUNCTION__ - let set_compress_tracing_files ctx ~dbg ~enabled = - u "Observer.set_compress_tracing_files" + let set_compress_tracing_files ctx ~dbg ~enabled = unimplemented __FUNCTION__ end diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 14ca03e6cb8..eaabacc9e8f 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -425,6 +425,8 @@ end exception Storage_error of Errors.error +let unimplemented x = raise (Storage_error (Errors.Unimplemented x)) + let () = (* register printer *) let sprintf = Printf.sprintf in diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 290c09d6230..a2d2d04ab08 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -13,8 +13,6 @@ *) [@@@ocaml.warning "-27"] -let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - type context = unit module UPDATES = struct @@ -27,193 +25,231 @@ module UPDATES = struct end module Query = struct - let query ctx ~dbg = u "Query.query" + let query ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ - let diagnostics ctx ~dbg = u "Query.diagnostics" + let diagnostics ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ end module DP = struct - let create ctx ~dbg ~id = u "DP.create" + let create ctx ~dbg ~id = Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~dp ~allow_leak = u "DP.destroy" + let destroy ctx ~dbg ~dp ~allow_leak = + Storage_interface.unimplemented __FUNCTION__ - let destroy2 ctx ~dbg ~dp ~sr ~vdi ~vm ~allow_leak = u "DP.destroy2" + let destroy2 ctx ~dbg ~dp ~sr ~vdi ~vm ~allow_leak = + Storage_interface.unimplemented __FUNCTION__ - let attach_info ctx ~dbg ~sr ~vdi ~dp ~vm = u "DP.attach_info" + let attach_info ctx ~dbg ~sr ~vdi ~dp ~vm = + Storage_interface.unimplemented __FUNCTION__ - let diagnostics ctx () = u "DP.diagnostics" + let diagnostics ctx () = Storage_interface.unimplemented __FUNCTION__ - let stat_vdi ctx ~dbg ~sr ~vdi () = u "DP.stat_vdi" + let stat_vdi ctx ~dbg ~sr ~vdi () = + Storage_interface.unimplemented __FUNCTION__ end module SR = struct let create ctx ~dbg ~sr ~name_label ~name_description ~device_config ~physical_size = - u "SR.create" + Storage_interface.unimplemented __FUNCTION__ - let attach ctx ~dbg ~sr ~device_config = u "SR.attach" + let attach ctx ~dbg ~sr ~device_config = + Storage_interface.unimplemented __FUNCTION__ - let set_name_label ctx ~dbg ~sr ~new_name_label = u "SR.set_name_label" + let set_name_label ctx ~dbg ~sr ~new_name_label = + Storage_interface.unimplemented __FUNCTION__ let set_name_description ctx ~dbg ~sr ~new_name_description = - u "SR.set_name_description" + Storage_interface.unimplemented __FUNCTION__ - let detach ctx ~dbg ~sr = u "SR.detach" + let detach ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let reset ctx ~dbg ~sr = u "SR.reset" + let reset ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~sr = u "SR.destroy" + let destroy ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let probe ctx ~dbg ~queue ~device_config ~sm_config = u "SR.probe" + let probe ctx ~dbg ~queue ~device_config ~sm_config = + Storage_interface.unimplemented __FUNCTION__ - let scan ctx ~dbg ~sr = u "SR.scan" + let scan ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let scan2 ctx ~dbg ~sr = u "SR.scan2" + let scan2 ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ let update_snapshot_info_src ctx ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs = - u "SR.update_snapshot_info_src" + Storage_interface.unimplemented __FUNCTION__ let update_snapshot_info_dest ctx ~dbg ~sr ~vdi ~src_vdi ~snapshot_pairs = - u "SR.update_snapshot_info_dest" + Storage_interface.unimplemented __FUNCTION__ - let stat ctx ~dbg ~sr = u "SR.stat" + let stat ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let list ctx ~dbg = u "SR.list" + let list ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ end module VDI = struct - let create ctx ~dbg ~sr ~vdi_info = u "VDI.create" + let create ctx ~dbg ~sr ~vdi_info = + Storage_interface.unimplemented __FUNCTION__ - let set_name_label ctx ~dbg ~sr ~vdi ~new_name_label = u "VDI.set_name_label" + let set_name_label ctx ~dbg ~sr ~vdi ~new_name_label = + Storage_interface.unimplemented __FUNCTION__ let set_name_description ctx ~dbg ~sr ~vdi ~new_name_description = - u "VDI.set_name_description" + Storage_interface.unimplemented __FUNCTION__ - let snapshot ctx ~dbg ~sr ~vdi_info = u "VDI.snapshot" + let snapshot ctx ~dbg ~sr ~vdi_info = + Storage_interface.unimplemented __FUNCTION__ - let clone ctx ~dbg ~sr ~vdi_info = u "VDI.clone" + let clone ctx ~dbg ~sr ~vdi_info = + Storage_interface.unimplemented __FUNCTION__ - let resize ctx ~dbg ~sr ~vdi ~new_size = u "VDI.resize" + let resize ctx ~dbg ~sr ~vdi ~new_size = + Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~sr ~vdi = u "VDI.destroy" + let destroy ctx ~dbg ~sr ~vdi = Storage_interface.unimplemented __FUNCTION__ - let stat ctx ~dbg ~sr ~vdi = u "VDI.stat" + let stat ctx ~dbg ~sr ~vdi = Storage_interface.unimplemented __FUNCTION__ - let introduce ctx ~dbg ~sr ~uuid ~sm_config ~location = u "VDI.introduce" + let introduce ctx ~dbg ~sr ~uuid ~sm_config ~location = + Storage_interface.unimplemented __FUNCTION__ - let set_persistent ctx ~dbg ~sr ~vdi ~persistent = u "VDI.set_persistent" + let set_persistent ctx ~dbg ~sr ~vdi ~persistent = + Storage_interface.unimplemented __FUNCTION__ let epoch_begin ctx ~dbg ~sr ~vdi ~vm ~persistent = () - let attach ctx ~dbg ~dp ~sr ~vdi ~read_write = u "VDI.attach" + let attach ctx ~dbg ~dp ~sr ~vdi ~read_write = + Storage_interface.unimplemented __FUNCTION__ - let attach2 ctx ~dbg ~dp ~sr ~vdi ~read_write = u "VDI.attach2" + let attach2 ctx ~dbg ~dp ~sr ~vdi ~read_write = + Storage_interface.unimplemented __FUNCTION__ - let attach3 ctx ~dbg ~dp ~sr ~vdi ~vm ~read_write = u "VDI.attach3" + let attach3 ctx ~dbg ~dp ~sr ~vdi ~vm ~read_write = + Storage_interface.unimplemented __FUNCTION__ - let activate ctx ~dbg ~dp ~sr ~vdi = u "VDI.activate" + let activate ctx ~dbg ~dp ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let activate3 ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.activate3" + let activate3 ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let activate_readonly ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.activate_readonly" + let activate_readonly ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let deactivate ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.deactivate" + let deactivate ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let detach ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.detach" + let detach ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ let epoch_end ctx ~dbg ~sr ~vdi ~vm = () - let get_url ctx ~dbg ~sr ~vdi = u "VDI.get_url" + let get_url ctx ~dbg ~sr ~vdi = Storage_interface.unimplemented __FUNCTION__ - let similar_content ctx ~dbg ~sr ~vdi = u "VDI.similar_content" + let similar_content ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let get_by_name ctx ~dbg ~sr ~name = u "VDI.get_by_name" + let get_by_name ctx ~dbg ~sr ~name = + Storage_interface.unimplemented __FUNCTION__ - let set_content_id ctx ~dbg ~sr ~vdi ~content_id = u "VDI.set_content_id" + let set_content_id ctx ~dbg ~sr ~vdi ~content_id = + Storage_interface.unimplemented __FUNCTION__ - let compose ctx ~dbg ~sr ~vdi1 ~vdi2 = u "VDI.compose" + let compose ctx ~dbg ~sr ~vdi1 ~vdi2 = + Storage_interface.unimplemented __FUNCTION__ - let add_to_sm_config ctx ~dbg ~sr ~vdi ~key ~value = u "VDI.add_to_sm_config" + let add_to_sm_config ctx ~dbg ~sr ~vdi ~key ~value = + Storage_interface.unimplemented __FUNCTION__ let remove_from_sm_config ctx ~dbg ~sr ~vdi ~key = - u "VDI.remove_from_sm_config" + Storage_interface.unimplemented __FUNCTION__ - let enable_cbt ctx ~dbg ~sr ~vdi = u "VDI.enable_cbt" + let enable_cbt ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let disable_cbt ctx ~dbg ~sr ~vdi = u "VDI.disable_cbt" + let disable_cbt ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let data_destroy ctx ~dbg ~sr ~vdi = u "VDI.data_destroy" + let data_destroy ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ let list_changed_blocks ctx ~dbg ~sr ~vdi_from ~vdi_to = - u "VDI.list_changed_blocks" + Storage_interface.unimplemented __FUNCTION__ end -let get_by_name ctx ~dbg ~name = u "get_by_name" +let get_by_name ctx ~dbg ~name = Storage_interface.unimplemented __FUNCTION__ module DATA = struct - let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = u "DATA.copy" + let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = + Storage_interface.unimplemented __FUNCTION__ - let mirror ctx ~dbg ~sr ~vdi ~vm ~dest = u "DATA.mirror" + let mirror ctx ~dbg ~sr ~vdi ~vm ~dest = + Storage_interface.unimplemented __FUNCTION__ - let stat ctx ~dbg ~sr ~vdi ~vm ~key = u "DATA.stat" + let stat ctx ~dbg ~sr ~vdi ~vm ~key = + Storage_interface.unimplemented __FUNCTION__ let import_activate ctx ~dbg ~dp ~sr ~vdi ~vm = - u "DATA.MIRROR.import_activate" + Storage_interface.unimplemented __FUNCTION__ - let get_nbd_server ctx ~dbg ~dp ~sr ~vdi ~vm = u "DATA.MIRROR.get_nbd_server" + let get_nbd_server ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ module MIRROR = struct type context = unit let send_start ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = - u "DATA.MIRROR.send_start" + Storage_interface.unimplemented __FUNCTION__ let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = - u "DATA.MIRROR.receive_start" + Storage_interface.unimplemented __FUNCTION__ let receive_start2 ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = - u "DATA.MIRROR.receive_start2" + Storage_interface.unimplemented __FUNCTION__ let receive_start3 ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url ~verify_dest = - u "DATA.MIRROR.receive_start3" + Storage_interface.unimplemented __FUNCTION__ - let receive_finalize ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize" + let receive_finalize ctx ~dbg ~id = + Storage_interface.unimplemented __FUNCTION__ - let receive_finalize2 ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize2" + let receive_finalize2 ctx ~dbg ~id = + Storage_interface.unimplemented __FUNCTION__ let receive_finalize3 ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = - u "DATA.MIRROR.receive_finalize3" + Storage_interface.unimplemented __FUNCTION__ - let receive_cancel ctx ~dbg ~id = u "DATA.MIRROR.receive_cancel" + let receive_cancel ctx ~dbg ~id = + Storage_interface.unimplemented __FUNCTION__ let receive_cancel2 ctx ~dbg ~mirror_id ~url ~verify_dest = - u "DATA.MIRROR.receive_cancel2" + Storage_interface.unimplemented __FUNCTION__ let pre_deactivate_hook ctx ~dbg ~dp ~sr ~vdi = - u "DATA.MIRROR.pre_deactivate_hook" + Storage_interface.unimplemented __FUNCTION__ let has_mirror_failed ctx ~dbg ~mirror_id ~sr = - u "DATA.MIRROR.has_mirror_failed" + Storage_interface.unimplemented __FUNCTION__ - let list ctx ~dbg = u "DATA.MIRROR.list" + let list ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ - let stat ctx ~dbg ~id = u "DATA.MIRROR.stat" + let stat ctx ~dbg ~id = Storage_interface.unimplemented __FUNCTION__ end end module Policy = struct - let get_backend_vm ctx ~dbg ~vm ~sr ~vdi = u "Policy.get_backend_vm" + let get_backend_vm ctx ~dbg ~vm ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ end module TASK = struct - let stat ctx ~dbg ~task = u "TASK.stat" + let stat ctx ~dbg ~task = Storage_interface.unimplemented __FUNCTION__ - let cancel ctx ~dbg ~task = u "TASK.cancel" + let cancel ctx ~dbg ~task = Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~task = u "TASK.destroy" + let destroy ctx ~dbg ~task = Storage_interface.unimplemented __FUNCTION__ - let list ctx ~dbg = u "TASK.list" + let list ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ end diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 1ea91e94078..7e66e1a4d87 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -844,12 +844,11 @@ module Mux = struct module MIRROR = struct type context = unit - let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = - u "DATA.MIRROR.send_start" (* see storage_smapi{v1,v3}_migrate.ml *) + Storage_interface.unimplemented + __FUNCTION__ (* see storage_smapi{v1,v3}_migrate.ml *) let receive_start () ~dbg ~sr ~vdi_info ~id ~similar = with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun _di -> @@ -880,7 +879,7 @@ module Mux = struct (** see storage_smapiv{1,3}_migrate.receive_start3 *) let receive_start3 () ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ = - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let receive_finalize () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_finalize" ~dbg @@ fun di -> @@ -893,7 +892,7 @@ module Mux = struct Storage_smapiv1_migrate.MIRROR.receive_finalize2 () ~dbg:di.log ~id let receive_finalize3 () ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let receive_cancel () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_cancel" ~dbg @@ fun di -> @@ -901,13 +900,13 @@ module Mux = struct Storage_smapiv1_migrate.MIRROR.receive_cancel () ~dbg:di.log ~id let receive_cancel2 () ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let pre_deactivate_hook _ctx ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = - u "DATA.MIRROR.pre_deactivate_hook" + Storage_interface.unimplemented __FUNCTION__ let has_mirror_failed _ctx ~dbg:_ ~mirror_id:_ ~sr:_ = - u "DATA.MIRROR.has_mirror_failed" + Storage_interface.unimplemented __FUNCTION__ let list () ~dbg = with_dbg ~name:"DATA.MIRROR.list" ~dbg @@ fun di -> diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml index fe291d44d66..c850d61f842 100644 --- a/ocaml/xapi/storage_smapiv1_migrate.ml +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -567,8 +567,6 @@ let mirror_cleanup ~dbg ~sr ~snapshot = module MIRROR : SMAPIv2_MIRROR = struct type context = unit - let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - let send_start _ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = D.debug @@ -878,9 +876,9 @@ module MIRROR : SMAPIv2_MIRROR = struct | _ -> false - let list _ctx = u __FUNCTION__ + let list _ctx = Storage_interface.unimplemented __FUNCTION__ - let stat _ctx = u __FUNCTION__ + let stat _ctx = Storage_interface.unimplemented __FUNCTION__ let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = let (module Remote) = diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 7066a649ce2..86879780fba 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1137,16 +1137,16 @@ functor end module DATA = struct - let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - let copy context ~dbg ~sr ~vdi ~vm ~url ~dest = info "DATA.copy dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; Impl.DATA.copy context ~dbg ~sr ~vdi ~vm ~url ~dest - let mirror _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~dest:_ = u "DATA.mirror" + let mirror _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~dest:_ = + Storage_interface.unimplemented __FUNCTION__ - let stat _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~key:_ = u "DATA.stat" + let stat _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~key:_ = + Storage_interface.unimplemented __FUNCTION__ (* tapdisk supports three kind of nbd servers, the old style nbdserver, the new style nbd server and a real nbd server. The old and new style nbd servers @@ -1195,7 +1195,7 @@ functor let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = - u "DATA.MIRROR.send_start" + Storage_interface.unimplemented __FUNCTION__ let receive_start context ~dbg ~sr ~vdi_info ~id ~similar = info "DATA.MIRROR.receive_start dbg:%s sr:%s id:%s similar:[%s]" dbg @@ -1215,7 +1215,7 @@ functor let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ ~vm:_ = (* See Storage_smapiv1_migrate.receive_start3 *) - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let receive_finalize context ~dbg ~id = info "DATA.MIRROR.receive_finalize dbg:%s id:%s" dbg id ; @@ -1228,24 +1228,25 @@ functor let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = (* see storage_smapiv{1,3}_migrate *) - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let receive_cancel context ~dbg ~id = info "DATA.MIRROR.receive_cancel dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_cancel context ~dbg ~id let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let pre_deactivate_hook _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = - u __FUNCTION__ + Storage_interface.unimplemented __FUNCTION__ - let list _context ~dbg:_ = u __FUNCTION__ + let list _context ~dbg:_ = Storage_interface.unimplemented __FUNCTION__ - let stat _context ~dbg:_ ~id:_ = u __FUNCTION__ + let stat _context ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml index d9d34ffbe08..774239c0804 100644 --- a/ocaml/xapi/storage_smapiv3_migrate.ml +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -108,8 +108,6 @@ let mirror_wait ~dbg ~sr ~vdi ~vm ~mirror_id mirror_key = module MIRROR : SMAPIv2_MIRROR = struct type context = unit - let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - let send_start _ctx ~dbg ~task_id:_ ~dp ~sr ~vdi ~mirror_vm ~mirror_id ~local_vdi:_ ~copy_vm:_ ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = @@ -187,10 +185,10 @@ module MIRROR : SMAPIv2_MIRROR = struct ) let receive_start _ctx ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = - u "DATA.MIRROR.receive_start" + Storage_interface.unimplemented __FUNCTION__ let receive_start2 _ctx ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ ~vm:_ = - u "DATA.MIRROR.receive_start2" + Storage_interface.unimplemented __FUNCTION__ let receive_start3 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar:_ ~vm ~url ~verify_dest = @@ -269,9 +267,11 @@ module MIRROR : SMAPIv2_MIRROR = struct !on_fail ; raise e - let receive_finalize _ctx ~dbg:_ ~id:_ = u "DATA.MIRROR.receive_finalize" + let receive_finalize _ctx ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ - let receive_finalize2 _ctx ~dbg:_ ~id:_ = u "DATA.MIRROR.receive_finalize2" + let receive_finalize2 _ctx ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg @@ -289,11 +289,12 @@ module MIRROR : SMAPIv2_MIRROR = struct recv_state ; State.remove_receive_mirror mirror_id - let receive_cancel _ctx ~dbg:_ ~id:_ = u __FUNCTION__ + let receive_cancel _ctx ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ - let list _ctx = u __FUNCTION__ + let list _ctx = Storage_interface.unimplemented __FUNCTION__ - let stat _ctx = u __FUNCTION__ + let stat _ctx = Storage_interface.unimplemented __FUNCTION__ let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = D.debug "%s dbg:%s mirror_id:%s url:%s verify_dest:%B" __FUNCTION__ dbg diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index 2055837c47c..b938927a2e4 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -64,50 +64,49 @@ module VM = struct let remove _ = () - let create _ _ _ _ = unimplemented "VM.create" + let create _ _ _ _ = unimplemented __FUNCTION__ - let build ?restore_fd:_ _ _ _ _ _ = unimplemented "VM.build" + let build ?restore_fd:_ _ _ _ _ _ = unimplemented __FUNCTION__ - let create_device_model _ _ _ _ _ = unimplemented "VM.create_device_model" + let create_device_model _ _ _ _ _ = unimplemented __FUNCTION__ - let destroy_device_model _ _ = unimplemented "VM.destroy_device_model" + let destroy_device_model _ _ = unimplemented __FUNCTION__ - let destroy _ _ = unimplemented "VM.destroy" + let destroy _ _ = unimplemented __FUNCTION__ - let pause _ _ = unimplemented "VM.pause" + let pause _ _ = unimplemented __FUNCTION__ - let unpause _ _ = unimplemented "VM.unpause" + let unpause _ _ = unimplemented __FUNCTION__ - let set_xsdata _ _ _ = unimplemented "VM.set_xsdata" + let set_xsdata _ _ _ = unimplemented __FUNCTION__ - let set_vcpus _ _ _ = unimplemented "VM.set_vcpus" + let set_vcpus _ _ _ = unimplemented __FUNCTION__ - let set_shadow_multiplier _ _ _ = unimplemented "VM.set_shadow_multipler" + let set_shadow_multiplier _ _ _ = unimplemented __FUNCTION__ - let set_memory_dynamic_range _ _ _ _ = - unimplemented "VM.set_memory_dynamic_range" + let set_memory_dynamic_range _ _ _ _ = unimplemented __FUNCTION__ - let request_shutdown _ _ _ _ = unimplemented "VM.request_shutdown" + let request_shutdown _ _ _ _ = unimplemented __FUNCTION__ - let wait_shutdown _ _ _ _ = unimplemented "VM.wait_shutdown" + let wait_shutdown _ _ _ _ = unimplemented __FUNCTION__ - let assert_can_save _ = unimplemented "VM.assert_can_save" + let assert_can_save _ = unimplemented __FUNCTION__ - let save _ _ _ _ _ _ _ = unimplemented "VM.save" + let save _ _ _ _ _ _ _ = unimplemented __FUNCTION__ - let restore _ _ _ _ _ _ _ = unimplemented "VM.restore" + let restore _ _ _ _ _ _ _ = unimplemented __FUNCTION__ - let s3suspend _ _ = unimplemented "VM.s3suspend" + let s3suspend _ _ = unimplemented __FUNCTION__ - let s3resume _ _ = unimplemented "VM.s3resume" + let s3resume _ _ = unimplemented __FUNCTION__ - let soft_reset _ _ = unimplemented "VM.soft_reset" + let soft_reset _ _ = unimplemented __FUNCTION__ let get_state _ = Xenops_utils.halted_vm - let request_rdp _ _ = unimplemented "VM.request_rdp" + let request_rdp _ _ = unimplemented __FUNCTION__ - let run_script _ _ _ = unimplemented "VM.run_script" + let run_script _ _ _ = unimplemented __FUNCTION__ let set_domain_action_request _ _ = () @@ -131,9 +130,9 @@ module PCI = struct let dequarantine _ = () - let plug _ _ _ = unimplemented "PCI.plug" + let plug _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ = unimplemented "PCI.unplug" + let unplug _ _ _ = unimplemented __FUNCTION__ let get_device_action_request _ _ = None end @@ -145,17 +144,17 @@ module VBD = struct let epoch_end _ _ _ = () - let attach _ _ _ = unimplemented "VBD.attach" + let attach _ _ _ = unimplemented __FUNCTION__ - let activate _ _ _ = unimplemented "VBD.activate" + let activate _ _ _ = unimplemented __FUNCTION__ - let deactivate _ _ _ _ = unimplemented "VBD.deactivate" + let deactivate _ _ _ _ = unimplemented __FUNCTION__ - let detach _ _ _ = unimplemented "VBD.detach" + let detach _ _ _ = unimplemented __FUNCTION__ - let insert _ _ _ _ = unimplemented "VBD.insert" + let insert _ _ _ _ = unimplemented __FUNCTION__ - let eject _ _ _ = unimplemented "VBD.eject" + let eject _ _ _ = unimplemented __FUNCTION__ let set_qos _ _ _ = () @@ -167,23 +166,21 @@ end module VIF = struct let set_active _ _ _ _ = () - let plug _ _ _ = unimplemented "VIF.plug" + let plug _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ _ = unimplemented "VIF.unplug" + let unplug _ _ _ _ = unimplemented __FUNCTION__ - let move _ _ _ _ = unimplemented "VIF.move" + let move _ _ _ _ = unimplemented __FUNCTION__ - let set_carrier _ _ _ _ = unimplemented "VIF.set_carrier" + let set_carrier _ _ _ _ = unimplemented __FUNCTION__ - let set_locking_mode _ _ _ _ = unimplemented "VIF.set_locking_mode" + let set_locking_mode _ _ _ _ = unimplemented __FUNCTION__ - let set_ipv4_configuration _ _ _ _ = - unimplemented "VIF.set_ipv4_configuration" + let set_ipv4_configuration _ _ _ _ = unimplemented __FUNCTION__ - let set_ipv6_configuration _ _ _ _ = - unimplemented "VIF.set_ipv6_configuration" + let set_ipv6_configuration _ _ _ _ = unimplemented __FUNCTION__ - let set_pvs_proxy _ _ _ _ = unimplemented "VIF.set_pvs_proxy" + let set_pvs_proxy _ _ _ _ = unimplemented __FUNCTION__ let get_state _ _ = unplugged_vif @@ -191,7 +188,7 @@ module VIF = struct end module VGPU = struct - let start _ _ _ _ = unimplemented "VGPU.start" + let start _ _ _ _ = unimplemented __FUNCTION__ let set_active _ _ _ _ = () @@ -199,9 +196,9 @@ module VGPU = struct end module VUSB = struct - let plug _ _ _ = unimplemented "VUSB.plug" + let plug _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ = unimplemented "VUSB.unplug" + let unplug _ _ _ = unimplemented __FUNCTION__ let get_state _ _ = unplugged_vusb @@ -216,4 +213,4 @@ module UPDATES = struct assert false end -module DEBUG = struct let trigger _ _ = unimplemented "DEBUG.trigger" end +module DEBUG = struct let trigger _ _ = unimplemented __FUNCTION__ end From 3ae8ff956f862a40fde3ee9d517afb860baba8b7 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Wed, 2 Apr 2025 21:19:54 +0100 Subject: [PATCH 318/492] Use just id_of vbd for attached_vdis key instead of the (VM, VBD) tuple The VM id part of Vbd.id is unnecessary in the attached_vdis key as the DB is already indexed by the VM id. This also prevents problems when the VM is renamed. Signed-off-by: Steven Woods --- ocaml/xenopsd/xc/xenops_server_xen.ml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index b8577746b88..ccf3eac9764 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -187,7 +187,7 @@ module VmExtra = struct ; pv_drivers_detected: bool [@default false] ; xen_platform: (int * int) option (* (device_id, revision) for QEMU *) ; platformdata: (string * string) list [@default []] - ; attached_vdis: (Vbd.id * attached_vdi) list [@default []] + ; attached_vdis: (string * attached_vdi) list [@default []] } [@@deriving rpcty] @@ -3682,9 +3682,13 @@ module VBD = struct persistent= { vm_t.VmExtra.persistent with + (* Index by id_of vbd rather than vbd.id as VmExtra is + already indexed by VM id, so the VM id part of + vbd.id is unnecessary and causes issues finding the + attached_vdi when the VM is renamed. *) attached_vdis= - (vbd.Vbd.id, vdi) - :: List.remove_assoc vbd.Vbd.id + (id_of vbd, vdi) + :: List.remove_assoc (id_of vbd) vm_t.persistent.attached_vdis } } @@ -3706,7 +3710,7 @@ module VBD = struct let activate task vm vbd = let vmextra = DB.read_exn vm in - match List.assoc_opt vbd.id vmextra.persistent.attached_vdis with + match List.assoc_opt (id_of vbd) vmextra.persistent.attached_vdis with | None -> debug "No attached_vdi info, so not activating" | Some vdi -> @@ -3857,7 +3861,7 @@ module VBD = struct ) vm ) - (fun () -> cleanup_attached_vdis vm vbd.id) + (fun () -> cleanup_attached_vdis vm (id_of vbd)) let deactivate task vm vbd force = with_xc_and_xs (fun xc xs -> @@ -4021,7 +4025,7 @@ module VBD = struct | _ -> () ) ; - cleanup_attached_vdis vm vbd.id + cleanup_attached_vdis vm (id_of vbd) let insert task vm vbd d = on_frontend From 0728527b0598c86f497bda5a6c387c360f3132ee Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 24 Jun 2025 14:47:58 +0100 Subject: [PATCH 319/492] CA-410965: Modify default ref of console The `ref` parameter within the `location` attribute of the console can refer to either the VM's ref or the console's own ref. Currently, the console's location uses the VM's ref by default. This causes an issue: when executing xs console, the requested location contains the VM's ref. If the ref points to the VM, xapi will attempt to use the RFB console (which is graphical) by default, rather than the VT100 console (which is text-based). As a result, the xs console command fails to open the console and hangs. **Solution:** Update the default ref in the console's `location` to the console's own ref. With this change, whether accessing the RFB or the VT100 console, the ref in the `location` will always point to the respective console itself. Signed-off-by: Bengang Yuan --- ocaml/xapi/create_misc.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/create_misc.ml b/ocaml/xapi/create_misc.ml index cd3412156cd..26d2c886d52 100644 --- a/ocaml/xapi/create_misc.ml +++ b/ocaml/xapi/create_misc.ml @@ -307,7 +307,7 @@ and create_domain_zero_console_record_with_protocol ~__context ~domain_zero_ref let location = Uri.( make ~scheme:"https" ~host:address ~path:Constants.console_uri - ~query:[("ref", [Ref.string_of domain_zero_ref])] + ~query:[("ref", [Ref.string_of console_ref])] () |> to_string ) From 38256eb03af53bce840e224692f2b2e9b61bf78a Mon Sep 17 00:00:00 2001 From: Guillaume Date: Thu, 13 Mar 2025 17:59:39 +0100 Subject: [PATCH 320/492] Design proposal for supported image formats (v3) Add details on specifying image format for VDI and VM migration. In particular This revision explains how to choose the destination image format during VDI creation and migration, including VM migration scenarios. Also fixes minor typos in the document. Signed-off-by: Guillaume --- .../design/sm-supported-image-formats.md | 138 +++++++++++++++--- 1 file changed, 114 insertions(+), 24 deletions(-) diff --git a/doc/content/design/sm-supported-image-formats.md b/doc/content/design/sm-supported-image-formats.md index fd1118e885d..3d860c2833f 100644 --- a/doc/content/design/sm-supported-image-formats.md +++ b/doc/content/design/sm-supported-image-formats.md @@ -2,7 +2,7 @@ title: Add supported image formats in sm-list layout: default design_doc: true -revision: 2 +revision: 3 status: proposed --- @@ -22,32 +22,16 @@ available formats. # Design Proposal To expose the available image formats to clients (e.g., XenCenter, XenOrchestra, etc.), -we propose adding a new field called `supported-image-formats` to the Storage Manager (SM) -module. This field will be included in the output of the `SM.get_all_records` call. +we propose adding a new field called `supported_image_formats` to the Storage Manager +(SM) module. This field will be included in the output of the `SM.get_all_records` call. -The `supported-image-formats` field will be populated by retrieving information -from the SMAPI drivers. Specifically, each driver will update its `DRIVER_INFO` -dictionary with a new key, `supported_image_formats`, which will contain a list -of strings representing the supported image formats -(for example: `["vhd", "raw", "qcow2"]`). - -The list designates the driver's preferred VDI format as its first entry. That -means that when migrating a VDI, the destination storage repository will -attempt to create a VDI in this preferred format. If the default format cannot -be used (e.g., due to size limitations), an error will be generated. - -If a driver does not provide this information (as is currently the case with existing -drivers), the default value will be an empty array. This signifies that it is the -driver that decides which format it will use. This ensures that the modification -remains compatible with both current and future drivers. - -With this new information, listing all parameters of the SM object will return: +- With this new information, listing all parameters of the SM object will return: ```bash # xe sm-list params=all ``` -will output something like: +Output of the command will look like (notice that CLI uses hyphens): ``` uuid ( RO) : c6ae9a43-fff6-e482-42a9-8c3f8c533e36 @@ -65,12 +49,118 @@ required-cluster-stack ( RO) : supported-image-formats ( RO) : vhd, raw, qcow2 ``` -This change impacts the SM data model, and as such, the XAPI database version will -be incremented. +## Implementation details + +The `supported_image_formats` field will be populated by retrieving information +from the SMAPI drivers. Specifically, each driver will update its `DRIVER_INFO` +dictionary with a new key, `supported_image_formats`, which will contain a list +of strings representing the supported image formats +(for example: `["vhd", "raw", "qcow2"]`). Although the formats are listed as a +list of strings, they are treated as a set-specifying the same format multiple +times has no effect. + +### Driver behavior without `supported_image_formats` + +If a driver does not provide this information (as is currently the case with +existing drivers), the default value will be an empty list. This signifies +that the driver determines which format to use when creating VDI. During a migration, +the destination driver will choose the format of the VDI if none is explicitly +specified. This ensures backward compatibility with both current and future drivers. + +### Specifying image formats for VDIs creation + +If the supported image format is exposed to the client, then, when creating new VDI, +user can specify the desired format via the `sm_config` parameter `image-format=qcow2` (or +any format that is supported). If no format is specified, the driver will use its +preferred default format. If the specified format is not supported, an error will be +generated indicating that the SR does not support it. Here is how it can be achieved +using the XE CLI: + +```bash +# xe vdi-create \ + sr-uuid=cbe2851e-9f9b-f310-9bca-254c1cf3edd8 \ + name-label="A new VDI" \ + virtual-size=10240 \ + sm-config:image-format=vhd +``` + +### Specifying image formats for VDIs migration + +When migrating a VDI, an API client may need to specify the desired image format if +the destination SR supports multiple storage formats. + +#### VDI pool migrate + +To support this, a new parameter, `dest_img_format`, is introduced to +`VDI.pool_migrate`. This field accepts a string specifying the desired format (e.g., *qcow2*), +ensuring that the VDI is migrated in the correct format. The new signature of +`VDI.pool_migrate` will be +`VDI ref pool_migrate (session ref, VDI ref, SR ref, string, (string -> string) map)`. + +If the specified format is not supported or cannot be used (e.g., due to size limitations), +an error will be generated. Validation will be performed as early as possible to prevent +disruptions during migration. These checks can be performed by examining the XAPI database +to determine whether the SR provided as the destination has a corresponding SM object with +the expected format. If this is not the case, a `format not found` error will be returned. +If no format is specified by the client, the destination driver will determine the appropriate +format. + +```bash +# xe vdi-pool-migrate \ + uuid= \ + sr-uuid= \ + dest-img-format=qcow2 +``` + +#### VM migration to remote host + +A VDI migration can also occur during a VM migration. In this case, we need to +be able to specify the expected destination format as well. Unlike `VDI.pool_migrate`, +which applies to a single VDI, VM migration may involve multiple VDIs. +The current signature of `VM.migrate_send` is `(session ref, VM ref, (string -> string) map, +bool, (VDI ref -> SR ref) map, (VIF ref -> network ref) map, (string -> string) map, +(VGPU ref -> GPU_group ref) map)`. Thus there is already a parameter that maps each source +VDI to its destination SR. We propose to add a new parameter that allows specifying the +desired destination format for a given source VDI: `(VDI ref -> string)`. It is +similar to the VDI-to-SR mapping. We will update the XE cli to support this new format. +It would be `image_format:=`: + +```bash +# xe vm-migrate \ + host-uuid= \ + remote-master= \ + remote-password= \ + remote-username= \ + vdi:= \ + vdi:= \ + image-format:=vhd \ + image-format:=qcow2 \ + uuid= +``` +The destination image format would be a string such as *vhd*, *qcow2*, or another +supported format. It is optional to specify a format. If omitted, the driver +managing the destination SR will determine the appropriate format. +As with VDI pool migration, if this parameter is not supported by the SM driver, +a `format not found` error will be returned. The validation must happen before +sending a creation message to the SM driver, ideally at the same time as checking +whether all VDIs can be migrated. + +To be able to check the format, we will need to modify `VM.assert_can_migrate` and +add the mapping from VDI references to their image formats, as is done in `VM.migrate_send`. # Impact -- **Data Model:** A new field (`supported-image-formats`) is added to the SM records. +It should have no impact on existing storage repositories that do not provide any information +about the supported image format. + +This change impacts the SM data model, and as such, the XAPI database version will +be incremented. It also impacts the API. + +- **Data Model:** + - A new field (`supported_image_formats`) is added to the SM records. + - A new parameter is added to `VM.migrate_send`: `(VDI ref -> string) map` + - A new parameter is added to `VM.assert_can_migrate`: `(VDI ref -> string) map` + - A new parameter is added to `VDI.pool_migrate`: `string` - **Client Awareness:** Clients like the `xe` CLI will now be able to query and display the supported image formats for a given SR. - **Database Versioning:** The XAPI database version will be updated to reflect this change. From ec612aebb261a6f3b1ad0e27531bb602f622c204 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 24 Jun 2025 11:02:45 +0100 Subject: [PATCH 321/492] CA-411477: Fix SM API version check failure When shutdown a VM, xapi will check SM API version to decide if to call `post_deactivate_hook`. But if the SR has already been unplugged, the checking will fail. Solution: Check if the dp of the SR still exists. If not, skip the SM API checking. Signed-off-by: Bengang Yuan --- ocaml/xapi/storage_mux.ml | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 7e66e1a4d87..0427f76ca54 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -644,16 +644,23 @@ module Mux = struct with_dbg ~name:"VDI.deativate" ~dbg @@ fun di -> info "VDI.deactivate dbg:%s dp:%s sr:%s vdi:%s vm:%s" dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in - C.VDI.deactivate (Debug_info.to_string di) dp sr vdi vm ; - (*XX The hook should not be called here, nor should storage_mux care about - the SMAPI version of the SR, but as xapi-storage-script cannot call code - xapi, and smapiv1_wrapper has state tracking logic, the hook has to be placed - here for now. *) - if smapi_version_of_sr sr = SMAPIv3 then - Storage_migrate.post_deactivate_hook ~sr ~vdi ~dp + let open DP_info in + match read dp with + | Some {sr; vdi; vm; _} -> + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.VDI.deactivate (Debug_info.to_string di) dp sr vdi vm ; + (*XX The hook should not be called here, nor should storage_mux care about + the SMAPI version of the SR, but as xapi-storage-script cannot call code + xapi, and smapiv1_wrapper has state tracking logic, the hook has to be placed + here for now. *) + if smapi_version_of_sr sr = SMAPIv3 then + Storage_migrate.post_deactivate_hook ~sr ~vdi ~dp + | None -> + info + "dp %s is not associated with a locally attached VDI; nothing to do" + dp let detach () ~dbg ~dp ~sr ~vdi ~vm = with_dbg ~name:"VDI.detach" ~dbg @@ fun di -> From 5351b0b62dfa3365cbb884f91a14b69c4ab6f812 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Wed, 2 Apr 2025 21:31:01 +0100 Subject: [PATCH 322/492] CP-54207: Move VBD_attach outside of VM migrate downtime VBDs can be attached to multiple VMs, so now that VBD_plug has been split into VBD_attach and VBD_activate, the attach can happen outside of the VM migrate downtime. This doesn't change the overall duration of the migration but can reduce the downtime by several seconds. This new functionality is dependent on two flags: firstly, xenopsd_vbd_plug_unplug_legacy must be false so that the VBD_attach and VBD_activate are separate atoms. This is off by default. Then there is another flag can_attach_early which is currently true iff the VBD's SM has required_api_version >= 3.0 Signed-off-by: Steven Woods --- ocaml/xapi-idl/xen/xenops_interface.ml | 1 + ocaml/xapi/xapi_sr.ml | 12 ++++++ ocaml/xapi/xapi_xenops.ml | 30 ++++++++++++++ ocaml/xenopsd/cli/xn.ml | 1 + ocaml/xenopsd/lib/xenops_server.ml | 55 +++++++++++++++++++++----- 5 files changed, 89 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 9b3f2941910..41eb44e0875 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -303,6 +303,7 @@ module Vbd = struct ; extra_private_keys: (string * string) list [@default []] ; qos: qos option [@default None] ; persistent: bool [@default true] + ; can_attach_early: bool [@default false] } [@@deriving rpcty] diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 4a0684147af..8261757bb5e 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -1080,3 +1080,15 @@ let get_live_hosts ~__context ~sr = Xapi_vm_helpers.assert_can_see_specified_SRs ~__context ~reqd_srs:[sr] ~host in Xapi_vm_helpers.possible_hosts ~__context ~choose_fn () + +let required_api_version_of_sr ~__context ~sr = + let sr_type = Db.SR.get_type ~__context ~self:sr in + let expr = + Xapi_database.Db_filter_types.(Eq (Field "type", Literal sr_type)) + in + match Db.SM.get_records_where ~__context ~expr with + | (_, sm) :: _ -> + Some sm.API.sM_required_api_version + | [] -> + warn "Couldn't find SM with type %s" sr_type ; + None diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 8ad3c8b9962..83469f58eaf 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -553,6 +553,10 @@ let list_net_sriov_vf_pcis ~__context ~vm = None ) +module StringMap = Map.Make (String) + +let sr_version_cache = ref StringMap.empty + module MD = struct (** Convert between xapi DB records and xenopsd records *) @@ -684,6 +688,31 @@ module MD = struct ) else disk_of_vdi ~__context ~self:vbd.API.vBD_VDI in + let can_attach_early = + let sr_opt = + try Some (Db.VDI.get_SR ~__context ~self:vbd.API.vBD_VDI) + with _ -> None + in + match sr_opt with + | Some sr -> ( + let sr_key = Ref.string_of sr in + match StringMap.find_opt sr_key !sr_version_cache with + | Some cached_api_version -> + Version.String.ge cached_api_version "3.0" + | None -> ( + match Xapi_sr.required_api_version_of_sr ~__context ~sr with + | Some api_version -> + sr_version_cache := + StringMap.add sr_key api_version !sr_version_cache ; + Version.String.ge api_version "3.0" + | None -> + false + ) + ) + | None -> + (* If we can't get the SR, we have to default to false *) + false + in { id= (vm.API.vM_uuid, Device_number.to_linux_device device_number) ; position= Some device_number @@ -707,6 +736,7 @@ module MD = struct ( try Db.VDI.get_on_boot ~__context ~self:vbd.API.vBD_VDI = `persist with _ -> true ) + ; can_attach_early } let of_pvs_proxy ~__context vif proxy = diff --git a/ocaml/xenopsd/cli/xn.ml b/ocaml/xenopsd/cli/xn.ml index a6ed6a884bd..24fecb9cf09 100644 --- a/ocaml/xenopsd/cli/xn.ml +++ b/ocaml/xenopsd/cli/xn.ml @@ -278,6 +278,7 @@ let vbd_of_disk_info vm_id info = ; extra_private_keys= [] ; qos= None ; persistent= true + ; can_attach_early= false } let print_disk vbd = diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 569dabc11a1..15715ac7ac7 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1763,7 +1763,8 @@ let rec atomics_of_operation = function serial "VIF.activate_and_plug" ~id [VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id] ) - | VM_restore_devices (id, restore_vifs) -> + | VM_restore_devices (id, migration) -> + let restore_vifs = not migration in let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in let vgpus = VGPU_DB.vgpus id in let pcis = PCI_DB.pcis id |> pci_plug_order in @@ -1773,8 +1774,22 @@ let rec atomics_of_operation = function let name_multi = pf "VBDs.activate_and_plug %s" typ in let name_one = pf "VBD.activate_and_plug %s" typ in parallel_map name_multi ~id vbds (fun vbd -> - serial name_one ~id - [VBD_set_active (vbd.Vbd.id, true); vbd_plug vbd.Vbd.id] + (* When migrating, attach early if the vbd's SM allows it. + Note: there is a bug here for SxM if migrating between API + versions as the Vbd's new SR won't have propagated to xenopsd + yet. This means can_attach_early will be based on the origin SR. + This is a non-issue as v1 <-> v3 migration is still experimental + and v1 is already early-attaching in SxM through mirroring. + *) + if + migration + && (not !xenopsd_vbd_plug_unplug_legacy) + && vbd.Vbd.can_attach_early + then + [VBD_activate vbd.Vbd.id] + else + serial name_one ~id + [VBD_set_active (vbd.Vbd.id, true); vbd_plug vbd.Vbd.id] ) in [ @@ -1897,7 +1912,7 @@ let rec atomics_of_operation = function ] ; vgpu_start_operations ; [VM_restore (id, data, vgpu_data)] - ; atomics_of_operation (VM_restore_devices (id, true)) + ; atomics_of_operation (VM_restore_devices (id, false)) ; [ (* At this point the domain is considered survivable. *) VM_set_domain_action_request (id, None) @@ -2696,9 +2711,9 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = | VM_restore_vifs id -> debug "VM_restore_vifs %s" id ; perform_atomics (atomics_of_operation op) t - | VM_restore_devices (id, restore_vifs) -> + | VM_restore_devices (id, migration) -> (* XXX: this is delayed due to the 'attach'/'activate' behaviour *) - debug "VM_restore_devices %s %b" id restore_vifs ; + debug "VM_restore_devices %s %b" id migration ; perform_atomics (atomics_of_operation op) t | VM_resume (id, _data) -> debug "VM.resume %s" id ; @@ -3022,11 +3037,31 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ( try let no_sharept = VGPU_DB.vgpus id |> List.exists is_no_sharept in debug "VM %s no_sharept=%b (%s)" id no_sharept __LOC__ ; + (* If plug is split into activate and attach, we could attach + early so that it is outside of the VM downtime (if the SM + supports this) *) + let early_attach = + parallel_map "VBDs.set_active_and_attach" ~id (VBD_DB.vbds id) + (fun vbd -> + if + (not !xenopsd_vbd_plug_unplug_legacy) + && vbd.Vbd.can_attach_early + then + serial "VBD.set_active_and_attach" ~id + [ + VBD_set_active (vbd.Vbd.id, true) + ; VBD_attach vbd.Vbd.id + ] + else + [] + ) + in perform_atomics ([VM_create (id, Some memory_limit, Some final_id, no_sharept)] - @ (* Perform as many operations as possible on the destination - domain before pausing the original domain *) - atomics_of_operation (VM_restore_vifs id) + (* Perform as many operations as possible on the destination + domain before pausing the original domain *) + @ atomics_of_operation (VM_restore_vifs id) + @ early_attach ) t ; Handshake.send s Handshake.Success @@ -3142,7 +3177,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ) ; debug "VM.receive_memory: restoring remaining devices and unpausing" ; perform_atomics - (atomics_of_operation (VM_restore_devices (final_id, false)) + (atomics_of_operation (VM_restore_devices (final_id, true)) @ [ VM_unpause final_id ; VM_set_domain_action_request (final_id, None) From 4697489ae095af240c205c662bf36581c270bd81 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 19 Jun 2025 16:05:39 +0100 Subject: [PATCH 323/492] xenopsd/xc: do try to allocate pages to a particular NUMA node This reverts commit 9e6fb15bb069404a64836dc7c6603d41226cc6bb Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/domain.ml | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index c1561b862a5..c4730673295 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -886,7 +886,7 @@ let numa_placement domid ~vcpus ~memory affinity = Array.map2 NUMAResource.min_memory (Array.of_list nodes) a in numa_resources := Some nodea ; - let _ = + let memory_plan = match Softaffinity.plan ~vm host nodea with | None -> D.debug "NUMA-aware placement failed for domid %d" domid ; @@ -898,9 +898,28 @@ let numa_placement domid ~vcpus ~memory affinity = done ; mem_plan in - (* Neither xenguest nor emu-manager allow allocating pages to a single - NUMA node, don't return any NUMA in any case. Claiming the memory - would be done here, but it conflicts with DMC. *) + (* Xen only allows a single node when using memory claims, or none at all. *) + let* numa_node, node = + match memory_plan with + | [Node node] -> + Some (Xenctrlext.NumaNode.from node, node) + | [] | _ :: _ :: _ -> + D.debug + "%s: domain %d can't fit a single NUMA node, falling back to \ + default behaviour" + __FUNCTION__ domid ; + None + in + let nr_pages = Int64.div memory 4096L |> Int64.to_int in + try + Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; + Some (node, memory) + with Xenctrlext.Unix_error (errno, _) -> + D.info + "%s: unable to claim enough memory, domain %d won't be hosted in a \ + single NUMA node. (error %s)" + __FUNCTION__ domid + Unix.(error_message errno) ; None ) From d09bcec2ec83578832edab73fbb1b193a177e29c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 19 Jun 2025 16:14:23 +0100 Subject: [PATCH 324/492] xenopsd/xc: Expect an error when claiming pages from a single NUMA node On xen versions that don't support this call yet, xenctrlext will simply fail and continue to behave like before. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/domain.ml | 19 ++++++++++++------- ocaml/xenopsd/xc/xenctrlext.ml | 2 ++ ocaml/xenopsd/xc/xenctrlext.mli | 6 +++++- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index c4730673295..287c1c77b27 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -914,13 +914,18 @@ let numa_placement domid ~vcpus ~memory affinity = try Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; Some (node, memory) - with Xenctrlext.Unix_error (errno, _) -> - D.info - "%s: unable to claim enough memory, domain %d won't be hosted in a \ - single NUMA node. (error %s)" - __FUNCTION__ domid - Unix.(error_message errno) ; - None + with + | Xenctrlext.Not_available -> + (* Xen does not provide the interface to claim pages from a single NUMA + node, ignore the error and continue. *) + None + | Xenctrlext.Unix_error (errno, _) -> + D.info + "%s: unable to claim enough memory, domain %d won't be hosted in a \ + single NUMA node. (error %s)" + __FUNCTION__ domid + Unix.(error_message errno) ; + None ) let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = diff --git a/ocaml/xenopsd/xc/xenctrlext.ml b/ocaml/xenopsd/xc/xenctrlext.ml index 4078ee7b945..3760d1ab35d 100644 --- a/ocaml/xenopsd/xc/xenctrlext.ml +++ b/ocaml/xenopsd/xc/xenctrlext.ml @@ -125,5 +125,7 @@ module NumaNode = struct let from = Fun.id end +exception Not_available + let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages = stub_domain_claim_pages handle domid numa_node nr_pages diff --git a/ocaml/xenopsd/xc/xenctrlext.mli b/ocaml/xenopsd/xc/xenctrlext.mli index 2199f42c452..1572a1a8589 100644 --- a/ocaml/xenopsd/xc/xenctrlext.mli +++ b/ocaml/xenopsd/xc/xenctrlext.mli @@ -102,5 +102,9 @@ module NumaNode : sig val from : int -> t end +exception Not_available + val domain_claim_pages : handle -> domid -> ?numa_node:NumaNode.t -> int -> unit -(** Raises {Unix_error} if there's not enough memory to claim in the system *) +(** Raises {Unix_error} if there's not enough memory to claim in the system. + Raises {Not_available} if a single numa node is requested and xen does not + provide page claiming for single numa nodes. *) From 43a7ab2f8c6126b4e3515be055746e4eeaa48b1b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 19 Jun 2025 16:15:58 +0100 Subject: [PATCH 325/492] xenopsd/xc: fail when claiming pages for a single NUMA node This interface is not yet available in xen, so fail before doing the hypercall. This patch is meant to be reverted on system that provide the new interface for easily test it. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/xc/xenctrlext.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocaml/xenopsd/xc/xenctrlext.ml b/ocaml/xenopsd/xc/xenctrlext.ml index 3760d1ab35d..1c983daae26 100644 --- a/ocaml/xenopsd/xc/xenctrlext.ml +++ b/ocaml/xenopsd/xc/xenctrlext.ml @@ -128,4 +128,6 @@ end exception Not_available let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages = + if numa_node <> NumaNode.none then + raise Not_available ; stub_domain_claim_pages handle domid numa_node nr_pages From 24ebb5b1356247da57f667b73852be7d9419a93b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 26 Jun 2025 14:46:55 +0100 Subject: [PATCH 326/492] idl: Remove unused vm_lacks_feature_* errors Most of these have been unused for almost 10 years since c4ccc564e ("CA-217842: Replace instances of vm_lacks_feature_x with vm_lacks_feature") Drop them. Signed-off-by: Andrii Sultanov --- ocaml/idl/datamodel_errors.ml | 20 -------------------- ocaml/xapi-consts/api_errors.ml | 9 --------- 2 files changed, 29 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index b22d91f9715..30e185ac192 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -532,26 +532,6 @@ let _ = "You attempted an operation on a VM which requires a more recent version \ of the PV drivers. Please upgrade your PV drivers." () ; - error Api_errors.vm_lacks_feature_shutdown ["vm"] - ~doc: - "You attempted an operation which needs the cooperative shutdown feature \ - on a VM which lacks it." - () ; - error Api_errors.vm_lacks_feature_vcpu_hotplug ["vm"] - ~doc: - "You attempted an operation which needs the VM hotplug-vcpu feature on a \ - VM which lacks it." - () ; - error Api_errors.vm_lacks_feature_suspend ["vm"] - ~doc: - "You attempted an operation which needs the VM cooperative suspend \ - feature on a VM which lacks it." - () ; - error Api_errors.vm_lacks_feature_static_ip_setting ["vm"] - ~doc: - "You attempted an operation which needs the VM static-ip-setting feature \ - on a VM which lacks it." - () ; error Api_errors.vm_lacks_feature ["vm"] ~doc:"You attempted an operation on a VM which lacks the feature." () ; error Api_errors.vm_is_template ["vm"] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index d5927c91bfb..077a8dacbf9 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -440,15 +440,6 @@ let vm_old_pv_drivers = add_error "VM_OLD_PV_DRIVERS" let vm_lacks_feature = add_error "VM_LACKS_FEATURE" -let vm_lacks_feature_shutdown = add_error "VM_LACKS_FEATURE_SHUTDOWN" - -let vm_lacks_feature_suspend = add_error "VM_LACKS_FEATURE_SUSPEND" - -let vm_lacks_feature_vcpu_hotplug = add_error "VM_LACKS_FEATURE_VCPU_HOTPLUG" - -let vm_lacks_feature_static_ip_setting = - add_error "VM_LACKS_FEATURE_STATIC_IP_SETTING" - let vm_cannot_delete_default_template = add_error "VM_CANNOT_DELETE_DEFAULT_TEMPLATE" From d9233f99d3a34c7ad399786f45b4581cdad35279 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 22 Apr 2025 09:30:40 +0100 Subject: [PATCH 327/492] python: Add qcow2-to-stdout.py script Taken from https://github.com/qemu/qemu/blob/a9cd5bc6399a80fcf233ed0fffe6067b731227d8/scripts/qcow2-to-stdout.py > This tool converts a disk image to qcow2, writing the result directly > to stdout. This can be used for example to send the generated file > over the network. > > This is equivalent to using qemu-img to convert a file to qcow2 and > then writing the result to stdout, with the difference that this tool > does not need to create this temporary qcow2 file and therefore does > not need any additional disk space. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_globs.ml | 3 + python3/Makefile | 1 + python3/libexec/qcow2-to-stdout.py | 450 +++++++++++++++++++++++++++++ 3 files changed, 454 insertions(+) create mode 100755 python3/libexec/qcow2-to-stdout.py diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index e3957deea71..2390b93ab01 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -805,6 +805,8 @@ let sparse_dd = ref "sparse_dd" let vhd_tool = ref "vhd-tool" +let qcow_to_stdout = ref "/opt/xensource/libexec/qcow2-to-stdout.py" + let fence = ref "fence" let host_bugreport_upload = ref "host-bugreport-upload" @@ -1799,6 +1801,7 @@ module Resources = struct ) ; ("sparse_dd", sparse_dd, "Path to sparse_dd") ; ("vhd-tool", vhd_tool, "Path to vhd-tool") + ; ("qcow_to_stdout", qcow_to_stdout, "Path to qcow-to-stdout script") ; ("fence", fence, "Path to fence binary, used for HA host fencing") ; ( "host-bugreport-upload" , host_bugreport_upload diff --git a/python3/Makefile b/python3/Makefile index fb13068ca0e..3646ad9f54a 100644 --- a/python3/Makefile +++ b/python3/Makefile @@ -30,6 +30,7 @@ install: $(IPROG) libexec/mail-alarm $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/backup-sr-metadata.py $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/restore-sr-metadata.py $(DESTDIR)$(LIBEXECDIR) + $(IPROG) libexec/qcow2-to-stdout.py $(DESTDIR)$(LIBEXECDIR) $(IPROG) bin/hfx_filename $(DESTDIR)$(OPTDIR)/bin $(IPROG) bin/xe-reset-networking $(DESTDIR)$(OPTDIR)/bin diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py new file mode 100755 index 00000000000..8109f8fc351 --- /dev/null +++ b/python3/libexec/qcow2-to-stdout.py @@ -0,0 +1,450 @@ +#!/usr/bin/env python3 + +# This tool reads a disk image in any format and converts it to qcow2, +# writing the result directly to stdout. +# +# Copyright (C) 2024 Igalia, S.L. +# +# Authors: Alberto Garcia +# Madeeha Javed +# +# SPDX-License-Identifier: GPL-2.0-or-later +# +# qcow2 files produced by this script are always arranged like this: +# +# - qcow2 header +# - refcount table +# - refcount blocks +# - L1 table +# - L2 tables +# - Data clusters +# +# A note about variable names: in qcow2 there is one refcount table +# and one (active) L1 table, although each can occupy several +# clusters. For the sake of simplicity the code sometimes talks about +# refcount tables and L1 tables when referring to those clusters. + +import argparse +import errno +import math +import os +import signal +import struct +import subprocess +import sys +import tempfile +import time +from contextlib import contextmanager + +QCOW2_DEFAULT_CLUSTER_SIZE = 65536 +QCOW2_DEFAULT_REFCOUNT_BITS = 16 +QCOW2_FEATURE_NAME_TABLE = 0x6803F857 +QCOW2_DATA_FILE_NAME_STRING = 0x44415441 +QCOW2_V3_HEADER_LENGTH = 112 # Header length in QEMU 9.0. Must be a multiple of 8 +QCOW2_INCOMPAT_DATA_FILE_BIT = 2 +QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1 +QCOW_OFLAG_COPIED = 1 << 63 +QEMU_STORAGE_DAEMON = "qemu-storage-daemon" + + +def bitmap_set(bitmap, idx): + bitmap[idx // 8] |= 1 << (idx % 8) + + +def bitmap_is_set(bitmap, idx): + return (bitmap[idx // 8] & (1 << (idx % 8))) != 0 + + +def bitmap_iterator(bitmap, length): + for idx in range(length): + if bitmap_is_set(bitmap, idx): + yield idx + + +def align_up(num, d): + return d * math.ceil(num / d) + + +# Holes in the input file contain only zeroes so we can skip them and +# save time. This function returns the indexes of the clusters that +# are known to contain data. Those are the ones that we need to read. +def clusters_with_data(fd, cluster_size): + data_to = 0 + while True: + try: + data_from = os.lseek(fd, data_to, os.SEEK_DATA) + data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size) + for idx in range(data_from // cluster_size, data_to // cluster_size): + yield idx + except OSError as err: + if err.errno == errno.ENXIO: # End of file reached + break + raise err + + +# write_qcow2_content() expects a raw input file. If we have a different +# format we can use qemu-storage-daemon to make it appear as raw. +@contextmanager +def get_input_as_raw_file(input_file, input_format): + if input_format == "raw": + yield input_file + return + try: + temp_dir = tempfile.mkdtemp() + pid_file = os.path.join(temp_dir, "pid") + raw_file = os.path.join(temp_dir, "raw") + open(raw_file, "wb").close() + ret = subprocess.run( + [ + QEMU_STORAGE_DAEMON, + "--daemonize", + "--pidfile", pid_file, + "--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on", + "--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on", + "--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off", + ], + capture_output=True, + ) + if ret.returncode != 0: + sys.exit("[Error] Could not start the qemu-storage-daemon:\n" + + ret.stderr.decode().rstrip('\n')) + yield raw_file + finally: + # Kill the storage daemon on exit + # and remove all temporary files + if os.path.exists(pid_file): + with open(pid_file, "r") as f: + pid = int(f.readline()) + os.kill(pid, signal.SIGTERM) + while os.path.exists(pid_file): + time.sleep(0.1) + os.unlink(raw_file) + os.rmdir(temp_dir) + + +def write_features(cluster, offset, data_file_name): + if data_file_name is not None: + encoded_name = data_file_name.encode("utf-8") + padded_name_len = align_up(len(encoded_name), 8) + struct.pack_into(f">II{padded_name_len}s", cluster, offset, + QCOW2_DATA_FILE_NAME_STRING, + len(encoded_name), + encoded_name) + offset += 8 + padded_name_len + + qcow2_features = [ + # Incompatible + (0, 0, "dirty bit"), + (0, 1, "corrupt bit"), + (0, 2, "external data file"), + (0, 3, "compression type"), + (0, 4, "extended L2 entries"), + # Compatible + (1, 0, "lazy refcounts"), + # Autoclear + (2, 0, "bitmaps"), + (2, 1, "raw external data"), + ] + struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE) + struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48) + offset += 8 + for feature_type, feature_bit, feature_name in qcow2_features: + struct.pack_into(">BB46s", cluster, offset, + feature_type, feature_bit, feature_name.encode("ascii")) + offset += 48 + + +def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw): + # Some basic values + l1_entries_per_table = cluster_size // 8 + l2_entries_per_table = cluster_size // 8 + refcounts_per_table = cluster_size // 8 + refcounts_per_block = cluster_size * 8 // refcount_bits + + # Virtual disk size, number of data clusters and L1 entries + disk_size = align_up(os.path.getsize(input_file), 512) + total_data_clusters = math.ceil(disk_size / cluster_size) + l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) + allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) + + # Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h) + if (l1_entries * 8) > (32 * 1024 * 1024): + sys.exit("[Error] The image size is too large. Try using a larger cluster size.") + + # Two bitmaps indicating which L1 and L2 entries are set + l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8) + l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8) + allocated_l2_tables = 0 + allocated_data_clusters = 0 + + if data_file_raw: + # If data_file_raw is set then all clusters are allocated and + # we don't need to read the input file at all. + allocated_l2_tables = l1_entries + for idx in range(l1_entries): + bitmap_set(l1_bitmap, idx) + for idx in range(total_data_clusters): + bitmap_set(l2_bitmap, idx) + else: + # Open the input file for reading + fd = os.open(input_file, os.O_RDONLY) + zero_cluster = bytes(cluster_size) + # Read all the clusters that contain data + for idx in clusters_with_data(fd, cluster_size): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + # If a cluster has non-zero data then it must be allocated + # in the output file and its L2 entry must be set + if cluster != zero_cluster: + bitmap_set(l2_bitmap, idx) + allocated_data_clusters += 1 + # Allocated data clusters also need their corresponding L1 entry and L2 table + l1_idx = math.floor(idx / l2_entries_per_table) + if not bitmap_is_set(l1_bitmap, l1_idx): + bitmap_set(l1_bitmap, l1_idx) + allocated_l2_tables += 1 + + # Total amount of allocated clusters excluding the refcount blocks and table + total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables + if data_file_name is None: + total_allocated_clusters += allocated_data_clusters + + # Clusters allocated for the refcount blocks and table + allocated_refcount_blocks = math.ceil(total_allocated_clusters / refcounts_per_block) + allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table) + + # Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables... + # (a) increase total_allocated_clusters, and + # (b) need to be recalculated when total_allocated_clusters is increased + # So we need to repeat the calculation as long as the numbers change + while True: + new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks + new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block) + if new_allocated_refcount_blocks > allocated_refcount_blocks: + allocated_refcount_blocks = new_allocated_refcount_blocks + allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table) + else: + break + + # Now that we have the final numbers we can update total_allocated_clusters + total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks + + # At this point we have the exact number of clusters that the output + # image is going to use so we can calculate all the offsets. + current_cluster_idx = 1 + + refcount_table_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_refcount_tables + + refcount_block_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_refcount_blocks + + l1_table_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_l1_tables + + l2_table_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_l2_tables + + data_clusters_offset = current_cluster_idx * cluster_size + + # Calculate some values used in the qcow2 header + if allocated_l1_tables == 0: + l1_table_offset = 0 + + hdr_cluster_bits = int(math.log2(cluster_size)) + hdr_refcount_bits = int(math.log2(refcount_bits)) + hdr_length = QCOW2_V3_HEADER_LENGTH + hdr_incompat_features = 0 + if data_file_name is not None: + hdr_incompat_features |= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT + hdr_autoclear_features = 0 + if data_file_raw: + hdr_autoclear_features |= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT + + ### Write qcow2 header + cluster = bytearray(cluster_size) + struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0, + b"QFI\xfb", # QCOW magic string + 3, # version + 0, # backing file offset + 0, # backing file sizes + hdr_cluster_bits, + disk_size, + 0, # encryption method + l1_entries, + l1_table_offset, + refcount_table_offset, + allocated_refcount_tables, + 0, # number of snapshots + 0, # snapshot table offset + hdr_incompat_features, + 0, # compatible features + hdr_autoclear_features, + hdr_refcount_bits, + hdr_length, + ) + + write_features(cluster, hdr_length, data_file_name) + + sys.stdout.buffer.write(cluster) + + ### Write refcount table + cur_offset = refcount_block_offset + remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block + while remaining_refcount_table_entries > 0: + cluster = bytearray(cluster_size) + to_write = min(remaining_refcount_table_entries, refcounts_per_table) + remaining_refcount_table_entries -= to_write + for idx in range(to_write): + struct.pack_into(">Q", cluster, idx * 8, cur_offset) + cur_offset += cluster_size + sys.stdout.buffer.write(cluster) + + ### Write refcount blocks + remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster + for tbl in range(allocated_refcount_blocks): + cluster = bytearray(cluster_size) + to_write = min(remaining_refcount_block_entries, refcounts_per_block) + remaining_refcount_block_entries -= to_write + # All refcount entries contain the number 1. The only difference + # is their bit width, defined when the image is created. + for idx in range(to_write): + if refcount_bits == 64: + struct.pack_into(">Q", cluster, idx * 8, 1) + elif refcount_bits == 32: + struct.pack_into(">L", cluster, idx * 4, 1) + elif refcount_bits == 16: + struct.pack_into(">H", cluster, idx * 2, 1) + elif refcount_bits == 8: + cluster[idx] = 1 + elif refcount_bits == 4: + cluster[idx // 2] |= 1 << ((idx % 2) * 4) + elif refcount_bits == 2: + cluster[idx // 4] |= 1 << ((idx % 4) * 2) + elif refcount_bits == 1: + cluster[idx // 8] |= 1 << (idx % 8) + sys.stdout.buffer.write(cluster) + + ### Write L1 table + cur_offset = l2_table_offset + for tbl in range(allocated_l1_tables): + cluster = bytearray(cluster_size) + for idx in range(l1_entries_per_table): + l1_idx = tbl * l1_entries_per_table + idx + if bitmap_is_set(l1_bitmap, l1_idx): + struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED) + cur_offset += cluster_size + sys.stdout.buffer.write(cluster) + + ### Write L2 tables + cur_offset = data_clusters_offset + for tbl in range(l1_entries): + # Skip the empty L2 tables. We can identify them because + # there is no L1 entry pointing at them. + if bitmap_is_set(l1_bitmap, tbl): + cluster = bytearray(cluster_size) + for idx in range(l2_entries_per_table): + l2_idx = tbl * l2_entries_per_table + idx + if bitmap_is_set(l2_bitmap, l2_idx): + if data_file_name is None: + struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED) + cur_offset += cluster_size + else: + struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) | QCOW_OFLAG_COPIED) + sys.stdout.buffer.write(cluster) + + ### Write data clusters + if data_file_name is None: + for idx in bitmap_iterator(l2_bitmap, total_data_clusters): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + sys.stdout.buffer.write(cluster) + + if not data_file_raw: + os.close(fd) + + +def main(): + # Command-line arguments + parser = argparse.ArgumentParser( + description="This program converts a QEMU disk image to qcow2 " + "and writes it to the standard output" + ) + parser.add_argument("input_file", help="name of the input file") + parser.add_argument( + "-f", + dest="input_format", + metavar="input_format", + help="format of the input file (default: raw)", + default="raw", + ) + parser.add_argument( + "-c", + dest="cluster_size", + metavar="cluster_size", + help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})", + default=QCOW2_DEFAULT_CLUSTER_SIZE, + type=int, + choices=[1 << x for x in range(9, 22)], + ) + parser.add_argument( + "-r", + dest="refcount_bits", + metavar="refcount_bits", + help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})", + default=QCOW2_DEFAULT_REFCOUNT_BITS, + type=int, + choices=[1 << x for x in range(7)], + ) + parser.add_argument( + "-d", + dest="data_file", + help="create an image with input_file as an external data file", + action="store_true", + ) + parser.add_argument( + "-R", + dest="data_file_raw", + help="enable data_file_raw on the generated image (implies -d)", + action="store_true", + ) + args = parser.parse_args() + + if args.data_file_raw: + args.data_file = True + + if not os.path.isfile(args.input_file): + sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.") + + if args.data_file and args.input_format != "raw": + sys.exit("[Error] External data files can only be used with raw input images") + + # A 512 byte header is too small for the data file name extension + if args.data_file and args.cluster_size == 512: + sys.exit("[Error] External data files require a larger cluster size") + + if sys.stdout.isatty(): + sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.") + + if args.data_file: + data_file_name = args.input_file + else: + data_file_name = None + + with get_input_as_raw_file(args.input_file, args.input_format) as raw_file: + write_qcow2_content( + raw_file, + args.cluster_size, + args.refcount_bits, + data_file_name, + args.data_file_raw, + ) + + +if __name__ == "__main__": + main() + From 978765bc46b8e9da4c9602ed03e024cbd424bb7f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 26 Jun 2025 15:22:13 +0100 Subject: [PATCH 328/492] python3/qcow2-to-stdout: Remove unused code We are only ever going to use this script with "raw" files. Signed-off-by: Andrii Sultanov --- python3/libexec/qcow2-to-stdout.py | 71 +++--------------------------- 1 file changed, 7 insertions(+), 64 deletions(-) diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index 8109f8fc351..724b0d309d8 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -28,13 +28,8 @@ import errno import math import os -import signal import struct -import subprocess import sys -import tempfile -import time -from contextlib import contextmanager QCOW2_DEFAULT_CLUSTER_SIZE = 65536 QCOW2_DEFAULT_REFCOUNT_BITS = 16 @@ -44,7 +39,6 @@ QCOW2_INCOMPAT_DATA_FILE_BIT = 2 QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1 QCOW_OFLAG_COPIED = 1 << 63 -QEMU_STORAGE_DAEMON = "qemu-storage-daemon" def bitmap_set(bitmap, idx): @@ -82,46 +76,6 @@ def clusters_with_data(fd, cluster_size): raise err -# write_qcow2_content() expects a raw input file. If we have a different -# format we can use qemu-storage-daemon to make it appear as raw. -@contextmanager -def get_input_as_raw_file(input_file, input_format): - if input_format == "raw": - yield input_file - return - try: - temp_dir = tempfile.mkdtemp() - pid_file = os.path.join(temp_dir, "pid") - raw_file = os.path.join(temp_dir, "raw") - open(raw_file, "wb").close() - ret = subprocess.run( - [ - QEMU_STORAGE_DAEMON, - "--daemonize", - "--pidfile", pid_file, - "--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on", - "--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on", - "--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off", - ], - capture_output=True, - ) - if ret.returncode != 0: - sys.exit("[Error] Could not start the qemu-storage-daemon:\n" + - ret.stderr.decode().rstrip('\n')) - yield raw_file - finally: - # Kill the storage daemon on exit - # and remove all temporary files - if os.path.exists(pid_file): - with open(pid_file, "r") as f: - pid = int(f.readline()) - os.kill(pid, signal.SIGTERM) - while os.path.exists(pid_file): - time.sleep(0.1) - os.unlink(raw_file) - os.rmdir(temp_dir) - - def write_features(cluster, offset, data_file_name): if data_file_name is not None: encoded_name = data_file_name.encode("utf-8") @@ -375,13 +329,6 @@ def main(): "and writes it to the standard output" ) parser.add_argument("input_file", help="name of the input file") - parser.add_argument( - "-f", - dest="input_format", - metavar="input_format", - help="format of the input file (default: raw)", - default="raw", - ) parser.add_argument( "-c", dest="cluster_size", @@ -420,9 +367,6 @@ def main(): if not os.path.isfile(args.input_file): sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.") - if args.data_file and args.input_format != "raw": - sys.exit("[Error] External data files can only be used with raw input images") - # A 512 byte header is too small for the data file name extension if args.data_file and args.cluster_size == 512: sys.exit("[Error] External data files require a larger cluster size") @@ -435,14 +379,13 @@ def main(): else: data_file_name = None - with get_input_as_raw_file(args.input_file, args.input_format) as raw_file: - write_qcow2_content( - raw_file, - args.cluster_size, - args.refcount_bits, - data_file_name, - args.data_file_raw, - ) + write_qcow2_content( + args.input_file, + args.cluster_size, + args.refcount_bits, + data_file_name, + args.data_file_raw, + ) if __name__ == "__main__": From 7708c456295989f716473cac020458d4312dded6 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 22 Apr 2025 16:16:53 +0100 Subject: [PATCH 329/492] python3/qcow2-to-stdout: Update to handle block special files The original script is designed to only work on regular files. Adapt it slightly to work on special block devices (since that's what we want to export from). Aside from dropping some unnecessary checks and replacing os.path.getsize with a functional equivalent that works on block special files, these changes remove detection of "holes" in the raw file, since SEEK_DATA and SEEK_HOLE are unimplemented in this case. Signed-off-by: Andrii Sultanov --- python3/libexec/qcow2-to-stdout.py | 33 ++++++++---------------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index 724b0d309d8..8824564e6fa 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -25,7 +25,6 @@ # refcount tables and L1 tables when referring to those clusters. import argparse -import errno import math import os import struct @@ -59,23 +58,6 @@ def align_up(num, d): return d * math.ceil(num / d) -# Holes in the input file contain only zeroes so we can skip them and -# save time. This function returns the indexes of the clusters that -# are known to contain data. Those are the ones that we need to read. -def clusters_with_data(fd, cluster_size): - data_to = 0 - while True: - try: - data_from = os.lseek(fd, data_to, os.SEEK_DATA) - data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size) - for idx in range(data_from // cluster_size, data_to // cluster_size): - yield idx - except OSError as err: - if err.errno == errno.ENXIO: # End of file reached - break - raise err - - def write_features(cluster, offset, data_file_name): if data_file_name is not None: encoded_name = data_file_name.encode("utf-8") @@ -115,8 +97,12 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, refcounts_per_table = cluster_size // 8 refcounts_per_block = cluster_size * 8 // refcount_bits + # Open the input file for reading + fd = os.open(input_file, os.O_RDONLY) + # Virtual disk size, number of data clusters and L1 entries - disk_size = align_up(os.path.getsize(input_file), 512) + block_device_size = os.lseek(fd, 0, os.SEEK_END) + disk_size = align_up(block_device_size, 512) total_data_clusters = math.ceil(disk_size / cluster_size) l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) @@ -140,11 +126,10 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, for idx in range(total_data_clusters): bitmap_set(l2_bitmap, idx) else: - # Open the input file for reading - fd = os.open(input_file, os.O_RDONLY) zero_cluster = bytes(cluster_size) + last_cluster = align_up(block_device_size, cluster_size) // cluster_size # Read all the clusters that contain data - for idx in clusters_with_data(fd, cluster_size): + for idx in range(0, last_cluster): cluster = os.pread(fd, cluster_size, cluster_size * idx) # If the last cluster is smaller than cluster_size pad it with zeroes if len(cluster) < cluster_size: @@ -364,8 +349,8 @@ def main(): if args.data_file_raw: args.data_file = True - if not os.path.isfile(args.input_file): - sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.") + if not os.path.exists(args.input_file): + sys.exit(f"[Error] {args.input_file} does not exist.") # A 512 byte header is too small for the data file name extension if args.data_file and args.cluster_size == 512: From 8d3bf09d662d5d33166c64d08418e99cff79885e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 15 May 2025 11:16:30 +0100 Subject: [PATCH 330/492] python3/qcow2-to-stdout: Add --diff option to export only changed clusters When specified, only the clusters that have changed between diff_file_name and input_file will be allocated and exported in a sparse manner. This is analogous to vhd-tool's --relative-to option. Example usage: ``` Create an empty raw file: $ dd if=/dev/zero of=disk.before.img bs=1M count=100 Fill the first three clusters with random data $ dd if=/dev/random conv=notrunc of=disk.before.img bs=65536 count=3 $ python3/libexec/qcow2-to-stdout.py disk.before.img > disk.before.qcow2 Check that only the first three clusters are allocated (each is 0x10000 in length) $ qemu-img map disk.before.qcow2 Offset Length Mapped to File 0 0x30000 0x50000 disk.before.qcow2 Overwrite the 2nd and 3rd clusters with new data $ cp disk.before.img disk.after.img $ dd if=/dev/random conv=notrunc of=disk.after.img bs=65536 count=2 oseek=1 Export the difference, verifying that only the 2nd and 3rd clusters are allocated $ python3/libexec/qcow2-to-stdout.py disk.after.img --diff disk.before.img > disk.diff.qcow2 $ qemu-img map disk.diff.qcow2 Offset Length Mapped to File 0x10000 0x20000 0x50000 disk.diff.qcow2 The image can be recreated if the base is imported first, with the difference overwriting it $ qemu-img convert -f qcow2 -O raw disk.before.qcow2 disk.test.img $ qemu-img convert -f qcow2 -O raw disk.diff.qcow2 disk.test.img --target-is-zero -n $ diff disk.test.img disk.after.img ``` Signed-off-by: Andrii Sultanov --- python3/libexec/qcow2-to-stdout.py | 62 +++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index 8824564e6fa..b0638bc5904 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -90,7 +90,8 @@ def write_features(cluster, offset, data_file_name): offset += 48 -def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw): +def write_qcow2_content(input_file, cluster_size, refcount_bits, + data_file_name, data_file_raw, diff_file_name): # Some basic values l1_entries_per_table = cluster_size // 8 l2_entries_per_table = cluster_size // 8 @@ -126,17 +127,17 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, for idx in range(total_data_clusters): bitmap_set(l2_bitmap, idx) else: - zero_cluster = bytes(cluster_size) - last_cluster = align_up(block_device_size, cluster_size) // cluster_size - # Read all the clusters that contain data - for idx in range(0, last_cluster): - cluster = os.pread(fd, cluster_size, cluster_size * idx) + # Allocates a cluster in the appropriate bitmaps if it's different + # from cluster_to_compare_with + def check_cluster_allocate(idx, cluster, cluster_to_compare_with): + nonlocal allocated_data_clusters + nonlocal allocated_l2_tables # If the last cluster is smaller than cluster_size pad it with zeroes if len(cluster) < cluster_size: cluster += bytes(cluster_size - len(cluster)) - # If a cluster has non-zero data then it must be allocated - # in the output file and its L2 entry must be set - if cluster != zero_cluster: + # If a cluster has different data from the cluster_to_compare_with then it + # must be allocated in the output file and its L2 entry must be set + if cluster != cluster_to_compare_with: bitmap_set(l2_bitmap, idx) allocated_data_clusters += 1 # Allocated data clusters also need their corresponding L1 entry and L2 table @@ -145,6 +146,36 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, bitmap_set(l1_bitmap, l1_idx) allocated_l2_tables += 1 + zero_cluster = bytes(cluster_size) + last_cluster = align_up(block_device_size, cluster_size) // cluster_size + if diff_file_name: + # Read all the clusters that differ from the diff_file_name + diff_fd = os.open(diff_file_name, os.O_RDONLY) + diff_block_device_size = os.lseek(diff_fd, 0, os.SEEK_END) + last_diff_cluster = align_up(diff_block_device_size, cluster_size) // cluster_size + # In case input_file is bigger than diff_file_name, first check + # if clusters from diff_file_name differ, and then check if the + # rest contain data + for idx in range(0, last_diff_cluster): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + original_cluster = os.pread(diff_fd, cluster_size, cluster_size * idx) + + # If a cluster has different data from the original_cluster + # then it must be allocated + check_cluster_allocate(idx, cluster, original_cluster) + for idx in range(last_diff_cluster, last_cluster): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + + # If a cluster has different data from the original_cluster + # then it must be allocated + check_cluster_allocate(idx, cluster, zero_cluster) + else: + # Read all the clusters that contain data + for idx in range(0, last_cluster): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + # If a cluster has non-zero data then it must be allocated + check_cluster_allocate(idx, cluster, zero_cluster) + # Total amount of allocated clusters excluding the refcount blocks and table total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables if data_file_name is None: @@ -314,6 +345,15 @@ def main(): "and writes it to the standard output" ) parser.add_argument("input_file", help="name of the input file") + parser.add_argument( + "--diff", + dest="diff_file_name", + metavar="diff_file_name", + help=("name of the original file to compare input_file against. " + "If specified, will only export clusters that are different " + "between the files"), + default=None, + ) parser.add_argument( "-c", dest="cluster_size", @@ -352,6 +392,9 @@ def main(): if not os.path.exists(args.input_file): sys.exit(f"[Error] {args.input_file} does not exist.") + if args.diff_file_name and not os.path.exists(args.diff_file_name): + sys.exit(f"[Error] {args.diff_file_name} does not exist.") + # A 512 byte header is too small for the data file name extension if args.data_file and args.cluster_size == 512: sys.exit("[Error] External data files require a larger cluster size") @@ -370,6 +413,7 @@ def main(): args.refcount_bits, data_file_name, args.data_file_raw, + args.diff_file_name ) From c115a6d81dbf39c649c5f67b21ad3c2659825b64 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 26 Jun 2025 15:19:05 +0100 Subject: [PATCH 331/492] rrdp-squeezed: Don't collect metrics from a domain with missing counters Previously if only some of the values was missing, the arithmetic operations where done and they were taken into account into the host total. When a domain's values are missing, they are not taken into the sum. Also prepare the code to add per-VM metrics by using shared infrastructure and using a record. Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdp-squeezed/dune | 2 +- .../bin/rrdp-squeezed/rrdp_squeezed.ml | 104 +++++++++++------- 2 files changed, 65 insertions(+), 41 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune b/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune index d45dd928de1..ca5b6ae7d88 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune @@ -2,8 +2,8 @@ (modes exe) (name rrdp_squeezed) (libraries - rrdd-plugin + rrdd_plugin_xenctrl rrdd_plugins_libs xapi-stdext-std ezxenstore diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml index 4c0b13cf3e3..455cb03f736 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml @@ -17,10 +17,6 @@ open Rrdd_plugin module Process = Process (struct let name = "xcp-rrdd-squeezed" end) -open Process - -let with_xc f = Xenctrl.with_intf f - module Xs = struct module Client = Xs_client_unix.Client (Xs_transport_unix_client) include Client @@ -38,10 +34,6 @@ module Xs = struct c end -(* Return a list of domids of VMs running on this host *) -let get_running_domains xc = - Xenctrl.domain_getinfolist xc 0 |> List.map (fun di -> di.Xenctrl.domid) - module D = Debug.Make (struct let name = "rrdd-plugins" end) module XSW = Ez_xenstore_watch.Make (D) @@ -106,40 +98,61 @@ end module Watcher = WatchXenstore (MemoryActions) -(* Return a tuple (dynamic-max, dynamic-min, target) for a running VM *) -let get_squeezed_data domid = - let get_current_value ~label current_values = - try IntMap.find domid !current_values - with _ -> - if domid <> 0 then - D.warn "Couldn't find cached %s value for domain %d, using 0" label - domid ; - 0L +type values = { + dynamic_max: int64 option + ; dynamic_min: int64 option + ; target: int64 option +} + +let get_values (_, uuid, domid) = + let get_current_value current_values = + IntMap.find_opt domid !current_values in - ( get_current_value ~label:"dynamic-max" current_dynamic_max_values - , get_current_value ~label:"dynamic-min" current_dynamic_min_values - , get_current_value ~label:"target" current_target_values + ( (uuid, domid) + , { + dynamic_max= get_current_value current_dynamic_max_values + ; dynamic_min= get_current_value current_dynamic_min_values + ; target= get_current_value current_target_values + } ) -let get_datas () = - (* Create a tuple (dynamic-max, dynamic-min, target) for each VM running on the host *) - let domids = with_xc get_running_domains in - List.map get_squeezed_data domids +let get_domain_stats xc = + let _, domains, _ = Xenctrl_lib.domain_snapshot xc in + List.map get_values domains -let generate_squeezed_dss () = +let generate_host_sources counters = let memory_reclaimed, memory_possibly_reclaimed = - get_datas () - (* Calculate metrics - - Host memory reclaimed by squeezed = - sum_across_running_vms(dynamic_max - target) - - Host memory that could be reclaimed by squeezed = - sum_across_running_vms(target - dynamic_min) + (* Calculate host metrics + - Host memory reclaimed by squeezed = + sum_across_running_vms(dynamic_max - target) + - Host memory that could be reclaimed by squeezed = + sum_across_running_vms(target - dynamic_min) *) + let ( let* ) = Option.bind in + counters |> List.fold_left - (fun (acc1, acc2) (max, min, target) -> - ( Int64.add acc1 (Int64.sub max target) - , Int64.add acc2 (Int64.sub target min) - ) + (fun (acc1, acc2) (_, {dynamic_max; dynamic_min; target}) -> + let r = + let* target in + let acc1 = + let* max = dynamic_max in + Some (Int64.add acc1 (Int64.sub max target)) + in + let acc2 = + let* min = dynamic_min in + Some (Int64.add acc2 (Int64.sub target min)) + in + Some (acc1, acc2) + in + match r with + | None | Some (None, None) -> + (acc1, acc2) + | Some (Some acc1, Some acc2) -> + (acc1, acc2) + | Some (Some acc1, None) -> + (acc1, acc2) + | Some (None, Some acc2) -> + (acc1, acc2) ) (Int64.zero, Int64.zero) in @@ -159,11 +172,22 @@ let generate_squeezed_dss () = ) ] +let generate_sources xc () = + let counters = get_domain_stats xc in + generate_host_sources counters + (* This plugin always reports two datasources only, so one page is fine. *) -let shared_page_count = 1 +let host_page_count = 1 + +let vm_page_count = 0 -let _ = - initialise () ; +let shared_page_count = host_page_count + vm_page_count + +let () = Watcher.create_watcher_thread () ; - main_loop ~neg_shift:0.5 ~target:(Reporter.Local shared_page_count) - ~protocol:Rrd_interface.V2 ~dss_f:generate_squeezed_dss + Process.initialise () ; + Xenctrl.with_intf (fun xc -> + Process.main_loop ~neg_shift:0.5 + ~target:(Reporter.Local shared_page_count) ~protocol:Rrd_interface.V2 + ~dss_f:(generate_sources xc) + ) From 165bdec3ab1650beaf0b9ae8c6b0703a632d7ac0 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 26 Jun 2025 15:53:57 +0100 Subject: [PATCH 332/492] rrdp-squeezed: generate per-vm memory target datasources The daemon was already watching the vm memory targets, so it makes sense to make it generate the datasources directly as well. This uses the shared xenctrl infrastructure that's needed to fetch the domains' uuids. This means that xcp-rrdd does not need to collect this anymore, and xenopsd does not need to send it to xcp-rrdd. The function in the idl is safe to delete because the only user was xenopsd, and it was well-protected against errors. This means that if an old version of xenopsd tries to call the function while xcp-rrdd has already been updated, it won't interrupt xneopsd functionality. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-idl/rrd/cli-help.t | 5 --- ocaml/xapi-idl/rrd/rrd_interface.ml | 12 ------ ocaml/xapi/xapi_xenops.ml | 12 ------ ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml | 3 -- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 5 --- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 - ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 5 --- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 25 +---------- .../bin/rrdp-squeezed/rrdp_squeezed.ml | 43 ++++++++++++++++--- 9 files changed, 38 insertions(+), 74 deletions(-) diff --git a/ocaml/xapi-idl/rrd/cli-help.t b/ocaml/xapi-idl/rrd/cli-help.t index a503e0b75bb..1a15779d7f7 100644 --- a/ocaml/xapi-idl/rrd/cli-help.t +++ b/ocaml/xapi-idl/rrd/cli-help.t @@ -166,11 +166,6 @@ observed values will be created alongside the standard archive of average values - update_vm_memory_target [OPTION]… domid target - Sets the `memory_target` value for a VM. This is called by xapi - when it is told by xenopsd that squeezed has changed the target - for a VM. - COMMON OPTIONS --help[=FMT] (default=auto) Show this help in format FMT. The value FMT must be one of auto, diff --git a/ocaml/xapi-idl/rrd/rrd_interface.ml b/ocaml/xapi-idl/rrd/rrd_interface.ml index 1cfa1e39a2f..066912eacf2 100644 --- a/ocaml/xapi-idl/rrd/rrd_interface.ml +++ b/ocaml/xapi-idl/rrd/rrd_interface.ml @@ -412,18 +412,6 @@ module RPC_API (R : RPC) = struct ] (value_p @-> returning unit_p rrd_err) - let update_vm_memory_target = - let target_p = - Param.mk ~name:"target" ~description:["VM memory target"] Types.int64 - in - declare "update_vm_memory_target" - [ - "Sets the `memory_target` value for a VM. This is called by xapi when \ - it is told by" - ; "xenopsd that squeezed has changed the target for a VM." - ] - (domid_p @-> target_p @-> returning unit_p rrd_err) - let set_cache_sr = declare "set_cache_sr" [ diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 8ad3c8b9962..213a30c5aff 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2389,18 +2389,6 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = with e -> error "Caught %s: while updating VM %s xsdata" (Printexc.to_string e) id - ) ; - different - (fun x -> x.Vm.memory_target) - Fun.id - (fun memory_target -> - try - debug "xenopsd event: Updating VM %s domid %d memory target" id - domid ; - Rrdd.update_vm_memory_target domid memory_target - with e -> - error "Caught %s: while updating VM %s memory_target" - (Printexc.to_string e) id ) ) state.Vm.domids diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml index f5d977d632c..3ddc24e462f 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml @@ -47,8 +47,6 @@ module type RRDD_IMPLEMENTATION = sig val update_use_min_max : bool -> unit - val update_vm_memory_target : int -> int64 -> unit - val set_cache_sr : string -> unit val unset_cache_sr : unit -> unit @@ -119,7 +117,6 @@ module Make (Impl : RRDD_IMPLEMENTATION) = struct Server.query_possible_sr_dss Impl.query_possible_sr_dss ; Server.query_sr_ds Impl.query_sr_ds ; Server.update_use_min_max Impl.update_use_min_max ; - Server.update_vm_memory_target Impl.update_vm_memory_target ; Server.set_cache_sr Impl.set_cache_sr ; Server.unset_cache_sr Impl.unset_cache_sr ; (* module Plugin*) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 6a1212f178a..15eee76cfe6 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -571,11 +571,6 @@ let update_use_min_max (value : bool) : unit = debug "Updating use_min_max: New value=%b" value ; use_min_max := value -let update_vm_memory_target (domid : int) (target : int64) : unit = - with_lock memory_targets_m (fun _ -> - Hashtbl.replace memory_targets domid target - ) - let set_cache_sr (sr_uuid : string) : unit = with_lock cache_sr_lock (fun () -> cache_sr_uuid := Some sr_uuid) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 000c53de121..bd8ae2e6c99 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -50,8 +50,6 @@ val query_sr_ds : string -> string -> float val update_use_min_max : bool -> unit -val update_vm_memory_target : int -> int64 -> unit - val set_cache_sr : string -> unit val unset_cache_sr : unit -> unit diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 816860e5815..b15e91b50cb 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -30,11 +30,6 @@ let next_iteration_start : Clock.Timer.t ref = (* The mutex that protects the next_iteration_start against data corruption. *) let next_iteration_start_m : Mutex.t = Mutex.create () -(** Cache memory/target values *) -let memory_targets : (int, int64) Hashtbl.t = Hashtbl.create 20 - -let memory_targets_m = Mutex.create () - let cache_sr_uuid : string option ref = ref None let cache_sr_lock = Mutex.create () diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 7f110d7e576..d6850621d0c 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -292,9 +292,6 @@ let domain_snapshot xc = let domains = Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain in - let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in - let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in - Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; domains |> List.to_seq let dss_mem_vms xc = @@ -311,23 +308,6 @@ let dss_mem_vms xc = ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () ) in - let memory_target_opt = - with_lock Rrdd_shared.memory_targets_m (fun _ -> - Hashtbl.find_opt Rrdd_shared.memory_targets domid - ) - in - let mem_target_ds = - Option.map - (fun memory_target -> - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_target" - ~description:"Target of VM balloon driver" ~units:"B" - ~value:(Rrd.VT_Int64 memory_target) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - ) - memory_target_opt - in let other_ds = if domid = 0 then match mem_available () with @@ -359,10 +339,7 @@ let dss_mem_vms xc = ) with Not_found -> None in - let metrics = - List.concat - [main_mem_ds :: Option.to_list other_ds; Option.to_list mem_target_ds] - in + let metrics = List.concat [main_mem_ds :: Option.to_list other_ds] in Some (List.to_seq metrics) in (* CA-34383: Memory updates from paused domains serve no useful purpose. diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml index 455cb03f736..a0a6fab1f65 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml @@ -104,11 +104,11 @@ type values = { ; target: int64 option } -let get_values (_, uuid, domid) = +let get_values ((_, _, domid) as dom) = let get_current_value current_values = IntMap.find_opt domid !current_values in - ( (uuid, domid) + ( dom , { dynamic_max= get_current_value current_dynamic_max_values ; dynamic_min= get_current_value current_dynamic_min_values @@ -172,14 +172,45 @@ let generate_host_sources counters = ) ] +let generate_vm_sources domains = + let metrics_of ((dom, uuid, _), {target; _}) = + let target = + Option.map + (fun target -> + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_target" + ~description:"Target of VM balloon driver" ~units:"B" + ~value:(Rrd.VT_Int64 target) ~ty:Rrd.Gauge ~min:0.0 ~default:true + () + ) + ) + target + in + (* CA-34383: Memory updates from paused domains serve no useful purpose. + During a migrate such updates can also cause undesirable + discontinuities in the observed value of memory_actual. Hence, we + ignore changes from paused domains: *) + if dom.Xenctrl.paused then + [] + else + Option.to_list target + in + + List.concat_map metrics_of domains + let generate_sources xc () = - let counters = get_domain_stats xc in - generate_host_sources counters + let domain_stats = get_domain_stats xc in + generate_host_sources domain_stats @ generate_vm_sources domain_stats + +(** The json-like serialization for 3 dss in dss_mem_vms takes 622 bytes. These + bytes plus some overhead make 1024 bytes an upper bound. *) + +let bytes_per_mem_vm = 1024 -(* This plugin always reports two datasources only, so one page is fine. *) let host_page_count = 1 -let vm_page_count = 0 +let vm_page_count = + ((Rrd_interface.max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 let shared_page_count = host_page_count + vm_page_count From a38ee7f6df17f34e15c131a8de0b4d0223bf6312 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 26 Jun 2025 18:07:43 +0100 Subject: [PATCH 333/492] rrdp-squeezed: collect agent-collected per-vm free memory metrics Since this daemon already uses xenstore to watch other memory keys in xenstore, move another one in here, allows to delete quite a bit of code from xcp-rrdd and drop dependencies as well. Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdd/dune | 4 - ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 144 +----------------- .../bin/rrdp-squeezed/rrdp_squeezed.ml | 92 +++++++++-- 3 files changed, 83 insertions(+), 157 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/dune b/ocaml/xcp-rrdd/bin/rrdd/dune index d84e06e46fd..6ce134dd522 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/dune +++ b/ocaml/xcp-rrdd/bin/rrdd/dune @@ -41,7 +41,6 @@ (libraries astring ezxenstore.core - ezxenstore.watch forkexec http_lib httpsvr @@ -66,9 +65,6 @@ xapi-stdext-threads xapi-stdext-unix xenctrl - xenstore - xenstore.unix - xenstore_transport.unix ) ) diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index d6850621d0c..40e5ab34b79 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -119,84 +119,6 @@ let start (xmlrpc_path, http_fwd_path) process = let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -(*****************************************************) -(* xenstore related code *) -(*****************************************************) - -module XSW_Debug = Debug.Make (struct let name = "xenstore_watch" end) - -module Watch = Ez_xenstore_watch.Make (XSW_Debug) - -module Xs = struct - module Client = Xs_client_unix.Client (Xs_transport_unix_client) - - let client = ref None - - (* Initialise the clients on demand - must be done after daemonisation! *) - let get_client () = - match !client with - | Some client -> - client - | None -> - let c = Client.make () in - client := Some c ; - c -end - -(* Map from domid to the latest seen meminfo_free value *) -let current_meminfofree_values = ref Watch.IntMap.empty - -let meminfo_path domid = - Printf.sprintf "/local/domain/%d/data/meminfo_free" domid - -module Meminfo = struct - let watch_token domid = Printf.sprintf "xcp-rrdd:domain-%d" domid - - let interesting_paths_for_domain domid _uuid = [meminfo_path domid] - - let fire_event_on_vm domid domains = - let d = int_of_string domid in - if not (Watch.IntMap.mem d domains) then - info "Ignoring watch on shutdown domain %d" d - else - let path = meminfo_path d in - try - let client = Xs.get_client () in - let meminfo_free_string = - Xs.Client.immediate client (fun xs -> Xs.Client.read xs path) - in - let meminfo_free = Int64.of_string meminfo_free_string in - info "memfree has changed to %Ld in domain %d" meminfo_free d ; - current_meminfofree_values := - Watch.IntMap.add d meminfo_free !current_meminfofree_values - with Xs_protocol.Enoent _hint -> - info - "Couldn't read path %s; forgetting last known memfree value for \ - domain %d" - path d ; - current_meminfofree_values := - Watch.IntMap.remove d !current_meminfofree_values - - let watch_fired _ _xc path domains _ = - match - List.filter (fun x -> x <> "") Astring.String.(cuts ~sep:"/" path) - with - | ["local"; "domain"; domid; "data"; "meminfo_free"] -> - fire_event_on_vm domid domains - | _ -> - debug "Ignoring unexpected watch: %s" path - - let unmanaged_domain _ _ = false - - let found_running_domain _ _ = () - - let domain_appeared _ _ _ = () - - let domain_disappeared _ _ _ = () -end - -module Watcher = Watch.WatchXenstore (Meminfo) - (*****************************************************) (* memory stats *) (*****************************************************) @@ -231,30 +153,6 @@ let bytes_per_mem_vm = 1024 let mem_vm_writer_pages = ((Rrd_interface.max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 -let res_error fmt = Printf.ksprintf Result.error fmt - -let ok x = Result.ok x - -let ( let* ) = Result.bind - -let finally f always = Fun.protect ~finally:always f - -let scanning path f = - let io = Scanf.Scanning.open_in path in - finally (fun () -> f io) (fun () -> Scanf.Scanning.close_in io) - -let scan path = - try - scanning path @@ fun io -> - Scanf.bscanf io {|MemTotal: %_d %_s MemFree: %_d %_s MemAvailable: %Ld %s|} - (fun size kb -> ok (size, kb) - ) - with _ -> res_error "failed to scan %s" path - -let mem_available () = - let* size, kb = scan "/proc/meminfo" in - match kb with "kB" -> ok size | _ -> res_error "unexpected unit: %s" kb - let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] module IntSet = Set.Make (Int) @@ -295,7 +193,7 @@ let domain_snapshot xc = domains |> List.to_seq let dss_mem_vms xc = - let mem_metrics_of (dom, uuid, domid) = + let mem_metrics_of (dom, uuid, _) = let vm_metrics () = let kib = Xenctrl.pages_to_kib (Int64.of_nativeint dom.Xenctrl.total_memory_pages) @@ -308,39 +206,7 @@ let dss_mem_vms xc = ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () ) in - let other_ds = - if domid = 0 then - match mem_available () with - | Ok mem -> - Some - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Dom0 current free memory" - ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - | Error msg -> - let _ = - error "%s: retrieving Dom0 free memory failed: %s" __FUNCTION__ - msg - in - None - else - try - let mem_free = - Watch.IntMap.find domid !current_meminfofree_values - in - Some - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Memory used as reported by the guest agent" - ~value:(Rrd.VT_Int64 mem_free) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - with Not_found -> None - in - let metrics = List.concat [main_mem_ds :: Option.to_list other_ds] in - Some (List.to_seq metrics) + Some main_mem_ds in (* CA-34383: Memory updates from paused domains serve no useful purpose. During a migrate such updates can also cause undesirable @@ -349,7 +215,7 @@ let dss_mem_vms xc = if dom.Xenctrl.paused then None else vm_metrics () in let domains = domain_snapshot xc in - Seq.filter_map mem_metrics_of domains |> Seq.concat |> List.of_seq + Seq.filter_map mem_metrics_of domains |> List.of_seq (**** Local cache SR stuff *) @@ -786,10 +652,6 @@ let _ = (List.map (fun (name, _) -> writer_basename name) stats_to_write) ; ignore @@ GCLog.start () ; debug "Starting xenstore-watching thread .." ; - let () = - try Watcher.create_watcher_thread () - with _ -> error "xenstore-watching thread has failed" - in let module Daemon = Xapi_stdext_unix.Unixext.Daemon in if Daemon.systemd_booted () then if Daemon.systemd_notify Daemon.State.Ready then diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml index a0a6fab1f65..a3091a90daf 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml @@ -45,12 +45,19 @@ let current_dynamic_min_values = ref IntMap.empty let current_target_values = ref IntMap.empty +let current_free_values = ref IntMap.empty + module MemoryActions = struct let interesting_paths_for_domain domid _ = - let keys = ["dynamic-max"; "dynamic-min"; "target"] in - List.map - (fun key -> Printf.sprintf "/local/domain/%d/memory/%s" domid key) - keys + let keys = + [ + "memory/dynamic-max" + ; "memory/dynamic-min" + ; "memory/target" + ; "data/meminfo_free" + ] + in + List.map (fun key -> Printf.sprintf "/local/domain/%d/%s" domid key) keys let watch_token domid = Printf.sprintf "xcp-rrdd-plugins/squeezed:domain-%d" domid @@ -65,10 +72,7 @@ module MemoryActions = struct try let client = Xs.get_client () in let value = - Xs.immediate client (fun xs -> Xs.read xs path) - |> Int64.of_string - |> Int64.mul 1024L - (* convert from KiB to bytes *) + Xs.immediate client (fun xs -> Xs.read xs path) |> Int64.of_string in current_memory_values := IntMap.add domid value !current_memory_values with Xs_protocol.Enoent _ -> @@ -84,6 +88,8 @@ module MemoryActions = struct read_new_value domid current_dynamic_min_values | ["local"; "domain"; domid; "memory"; "target"] -> read_new_value domid current_target_values + | ["local"; "domain"; domid; "data"; "meminfo_free"] -> + read_new_value domid current_free_values | _ -> D.debug "Ignoring unexpected watch: %s" path @@ -98,10 +104,12 @@ end module Watcher = WatchXenstore (MemoryActions) +(** All these values are reported in KiB *) type values = { dynamic_max: int64 option ; dynamic_min: int64 option ; target: int64 option + ; free: int64 option } let get_values ((_, _, domid) as dom) = @@ -113,6 +121,7 @@ let get_values ((_, _, domid) as dom) = dynamic_max= get_current_value current_dynamic_max_values ; dynamic_min= get_current_value current_dynamic_min_values ; target= get_current_value current_target_values + ; free= get_current_value current_free_values } ) @@ -120,6 +129,8 @@ let get_domain_stats xc = let _, domains, _ = Xenctrl_lib.domain_snapshot xc in List.map get_values domains +let bytes_of_kib kib = Int64.mul 1024L kib + let generate_host_sources counters = let memory_reclaimed, memory_possibly_reclaimed = (* Calculate host metrics @@ -131,7 +142,7 @@ let generate_host_sources counters = let ( let* ) = Option.bind in counters |> List.fold_left - (fun (acc1, acc2) (_, {dynamic_max; dynamic_min; target}) -> + (fun (acc1, acc2) (_, {dynamic_max; dynamic_min; target; _}) -> let r = let* target in let acc1 = @@ -156,6 +167,8 @@ let generate_host_sources counters = ) (Int64.zero, Int64.zero) in + let memory_reclaimed = bytes_of_kib memory_reclaimed in + let memory_possibly_reclaimed = bytes_of_kib memory_possibly_reclaimed in (* Build corresponding Ds.ds values *) [ ( Rrd.Host @@ -172,11 +185,60 @@ let generate_host_sources counters = ) ] +let res_error fmt = Printf.ksprintf Result.error fmt + +let finally f finally = Fun.protect ~finally f + +let scanning path f = + let io = Scanf.Scanning.open_in path in + finally (fun () -> f io) (fun () -> Scanf.Scanning.close_in io) + +let scan path = + try + scanning path @@ fun io -> + Scanf.bscanf io {|MemTotal: %_d %_s MemFree: %_d %_s MemAvailable: %Ld %s|} + (fun size kb -> Ok (size, kb) + ) + with _ -> res_error "failed to scan %s" path + +let free_dom0 uuid = + let result = + match scan "/proc/meminfo" with + | Ok (size, "kB") -> + Ok size + | Ok (_, unit) -> + res_error "unexpected unit: %s" unit + | Error e -> + Error e + in + match result with + | Ok mem -> + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" + ~description:"Dom0 current free memory" ~value:(Rrd.VT_Int64 mem) + ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + | Error msg -> + let _ = + D.error "%s: retrieving Dom0 free memory failed: %s" __FUNCTION__ msg + in + None + +let free_other uuid free = + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" + ~description:"Memory used as reported by the guest agent" + ~value:(Rrd.VT_Int64 free) ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + let generate_vm_sources domains = - let metrics_of ((dom, uuid, _), {target; _}) = - let target = + let metrics_of ((dom, uuid, domid), {target; free; _}) = + let target () = Option.map (fun target -> + let target = bytes_of_kib target in ( Rrd.VM uuid , Ds.ds_make ~name:"memory_target" ~description:"Target of VM balloon driver" ~units:"B" @@ -186,6 +248,12 @@ let generate_vm_sources domains = ) target in + let free () = + if domid = 0 then + free_dom0 uuid + else + Option.bind free (free_other uuid) + in (* CA-34383: Memory updates from paused domains serve no useful purpose. During a migrate such updates can also cause undesirable discontinuities in the observed value of memory_actual. Hence, we @@ -193,7 +261,7 @@ let generate_vm_sources domains = if dom.Xenctrl.paused then [] else - Option.to_list target + Option.to_list (target ()) @ Option.to_list (free ()) in List.concat_map metrics_of domains From 0fef829d3e13a7a22ffd637a71a24f7a1c7144d7 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 12 Jun 2025 15:44:03 +0100 Subject: [PATCH 334/492] monitor_mem: Prepare xapi to consolidate memory metrics into a single file Xapi currently reads directly from memory-mapped metrics files. Since we want to move all the memory metrics to rrdp_squeezed, and this will output all the metrics into a single memory-mapped file, move the host and VM memory metrics into a single module, and make them share a bit of code. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/monitor_dbcalls.ml | 3 +- ocaml/xapi/monitor_mem.ml | 177 +++++++++++++++++++++++++++++++++ ocaml/xapi/monitor_mem.mli | 18 ++++ ocaml/xapi/monitor_mem_host.ml | 98 ------------------ ocaml/xapi/monitor_mem_vms.ml | 89 ----------------- quality-gate.sh | 2 +- 6 files changed, 197 insertions(+), 190 deletions(-) create mode 100644 ocaml/xapi/monitor_mem.ml create mode 100644 ocaml/xapi/monitor_mem.mli delete mode 100644 ocaml/xapi/monitor_mem_host.ml delete mode 100644 ocaml/xapi/monitor_mem_vms.ml diff --git a/ocaml/xapi/monitor_dbcalls.ml b/ocaml/xapi/monitor_dbcalls.ml index ab521155d2c..48b96bbd92a 100644 --- a/ocaml/xapi/monitor_dbcalls.ml +++ b/ocaml/xapi/monitor_dbcalls.ml @@ -127,8 +127,7 @@ let monitor_dbcall_thread () = try let rrd_files = Monitor_types.find_rrd_files () in pifs_update_fn () ; - Monitor_mem_host.update rrd_files ; - Monitor_mem_vms.update rrd_files ; + Monitor_mem.update rrd_files ; Monitor_pvs_proxy.update rrd_files ; Thread.delay 5. with e -> diff --git a/ocaml/xapi/monitor_mem.ml b/ocaml/xapi/monitor_mem.ml new file mode 100644 index 00000000000..502f0d6ca7a --- /dev/null +++ b/ocaml/xapi/monitor_mem.ml @@ -0,0 +1,177 @@ +(* + * Copyright (C) Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Mtxext = Xapi_stdext_threads.Threadext.Mutex +module Mcache = Monitor_dbcalls_cache + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D + +let get_datasources ~prefix rrd_files = + List.filter (String.starts_with ~prefix) rrd_files + |> List.map (fun fn -> (fn, Monitor_types.datasources_from_filename fn)) + +module Host = struct + let get_changes datasources = + let named_dss = + List.concat_map + (fun (filename, datasources) -> + try + Mcache.log_errors_from filename ; + datasources + |> List.filter_map (function + | Rrd.Host, ds + when List.mem ds.Ds.ds_name + ["memory_total_kib"; "memory_free_kib"] -> + Some ds + | _ -> + None (* we are only interested in Host memory stats *) + ) + |> List.map (function ds -> + let value = + match ds.Ds.ds_value with + | Rrd.VT_Int64 v -> + Memory.bytes_of_kib v + | Rrd.VT_Float v -> + Memory.bytes_of_kib (Int64.of_float v) + | Rrd.VT_Unknown -> + -1L + in + (ds.Ds.ds_name, value) + ) + with e -> + if not (Mcache.is_ignored filename) then ( + error "Unable to read host memory metrics from %s: %s" filename + (Printexc.to_string e) ; + Mcache.ignore_errors_from filename + ) ; + [] + ) + datasources + in + let free_bytes = List.assoc_opt "memory_free_kib" named_dss in + let total_bytes = List.assoc_opt "memory_total_kib" named_dss in + (* Check if anything has changed since our last reading. *) + match (free_bytes, total_bytes) with + | Some free, Some total + when !Mcache.host_memory_free_cached <> free + || !Mcache.host_memory_total_cached <> total -> + Some (free, total) + | _ -> + None + + let set_changes (free_bytes, total_bytes) = + Mtxext.execute Mcache.host_memory_m (fun _ -> + Mcache.host_memory_free_cached := free_bytes ; + Mcache.host_memory_total_cached := total_bytes + ) + + let update rrd_files = + Server_helpers.exec_with_new_task "Updating host memory metrics" + (fun __context -> + let datasources = + get_datasources ~prefix:Xapi_globs.metrics_prefix_mem_host rrd_files + in + let changes = get_changes datasources in + match changes with + | None -> + () + | Some ((free, total) as c) -> ( + try + let host = Helpers.get_localhost ~__context in + let metrics = Db.Host.get_metrics ~__context ~self:host in + Db.Host_metrics.set_memory_total ~__context ~self:metrics + ~value:total ; + Db.Host_metrics.set_memory_free ~__context ~self:metrics ~value:free ; + set_changes c + with e -> + error "Unable to update host memory metrics: %s" + (Printexc.to_string e) + ) + ) +end + +module VMs = struct + let get_changes datasources = + List.iter + (fun (filename, datasources) -> + try + Mcache.log_errors_from filename ; + datasources + |> List.filter_map (function + | Rrd.VM vm_uuid, ds when ds.Ds.ds_name = "memory" -> + Some (vm_uuid, ds) + | _ -> + None (* we are only interested in VM stats *) + ) + |> List.iter (function vm_uuid, ds -> + let value = + match ds.Ds.ds_value with + | Rrd.VT_Int64 v -> + v + | Rrd.VT_Float v -> + Int64.of_float v + | Rrd.VT_Unknown -> + -1L + in + Hashtbl.add Mcache.vm_memory_tmp vm_uuid value + ) + with e -> + if not (Mcache.is_ignored filename) then ( + error "Unable to read memory usage for VM %s: %s" filename + (Printexc.to_string e) ; + Mcache.ignore_errors_from filename + ) + ) + datasources ; + (* Check if anything has changed since our last reading. *) + Mcache.get_updates_map ~before:Mcache.vm_memory_cached + ~after:Mcache.vm_memory_tmp + + let set_changes ?except () = + Mtxext.execute Mcache.vm_memory_cached_m (fun _ -> + Mcache.transfer_map ?except ~source:Mcache.vm_memory_tmp + ~target:Mcache.vm_memory_cached () + ) + + let update rrd_files = + Server_helpers.exec_with_new_task "Updating VM memory usage" + (fun __context -> + let datasources = + get_datasources ~prefix:Xapi_globs.metrics_prefix_mem_vms rrd_files + in + let host = Helpers.get_localhost ~__context in + let keeps = ref [] in + List.iter + (fun (vm_uuid, memory) -> + try + let vm = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in + let vmm = Db.VM.get_metrics ~__context ~self:vm in + if Db.VM.get_resident_on ~__context ~self:vm = host then + Db.VM_metrics.set_memory_actual ~__context ~self:vmm + ~value:memory + else + Mcache.clear_cache_for_vm ~vm_uuid + with e -> + keeps := vm_uuid :: !keeps ; + error "Unable to update memory usage for VM %s: %s" vm_uuid + (Printexc.to_string e) + ) + (get_changes datasources) ; + set_changes ~except:!keeps () + ) +end + +let update rrd_files = Host.update rrd_files ; VMs.update rrd_files diff --git a/ocaml/xapi/monitor_mem.mli b/ocaml/xapi/monitor_mem.mli new file mode 100644 index 00000000000..c2b74b2512f --- /dev/null +++ b/ocaml/xapi/monitor_mem.mli @@ -0,0 +1,18 @@ +(* Copyright (C) Cloud Software Group Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. +*) + +module Mcache = Monitor_dbcalls_cache + +val update : Mcache.StringSet.elt list -> unit +(** [update rrd_files] Reads rrd_files and update the host and VM memory + metrics in xapi's cache. *) diff --git a/ocaml/xapi/monitor_mem_host.ml b/ocaml/xapi/monitor_mem_host.ml deleted file mode 100644 index e4c2f012a24..00000000000 --- a/ocaml/xapi/monitor_mem_host.ml +++ /dev/null @@ -1,98 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -module Mtxext = Xapi_stdext_threads.Threadext.Mutex -module Mcache = Monitor_dbcalls_cache - -module D = Debug.Make (struct let name = "monitor_mem_host" end) - -open D - -let get_changes rrd_files = - let named_dss = - List.concat_map - (fun filename -> - try - let datasources = Monitor_types.datasources_from_filename filename in - Mcache.log_errors_from filename ; - datasources - |> List.filter_map (function - | Rrd.Host, ds - when List.mem ds.Ds.ds_name - ["memory_total_kib"; "memory_free_kib"] -> - Some ds - | _ -> - None (* we are only interested in Host memory stats *) - ) - |> List.map (function ds -> - let value = - match ds.Ds.ds_value with - | Rrd.VT_Int64 v -> - Memory.bytes_of_kib v - | Rrd.VT_Float v -> - Memory.bytes_of_kib (Int64.of_float v) - | Rrd.VT_Unknown -> - -1L - in - (ds.Ds.ds_name, value) - ) - with e -> - if not (Mcache.is_ignored filename) then ( - error "Unable to read host memory metrics from %s: %s" filename - (Printexc.to_string e) ; - Mcache.ignore_errors_from filename - ) ; - [] - ) - rrd_files - in - let free_bytes = List.assoc_opt "memory_free_kib" named_dss in - let total_bytes = List.assoc_opt "memory_total_kib" named_dss in - (* Check if anything has changed since our last reading. *) - match (free_bytes, total_bytes) with - | Some free, Some total - when !Mcache.host_memory_free_cached <> free - || !Mcache.host_memory_total_cached <> total -> - Some (free, total) - | _ -> - None - -let set_changes (free_bytes, total_bytes) = - Mtxext.execute Mcache.host_memory_m (fun _ -> - Mcache.host_memory_free_cached := free_bytes ; - Mcache.host_memory_total_cached := total_bytes - ) - -let update rrd_files = - let is_host_rrd = - Astring.String.is_prefix ~affix:Xapi_globs.metrics_prefix_mem_host - in - let rrd_files = List.filter is_host_rrd rrd_files in - Server_helpers.exec_with_new_task "Updating host memory metrics" - (fun __context -> - let changes = get_changes rrd_files in - match changes with - | None -> - () - | Some ((free, total) as c) -> ( - try - let host = Helpers.get_localhost ~__context in - let metrics = Db.Host.get_metrics ~__context ~self:host in - Db.Host_metrics.set_memory_total ~__context ~self:metrics ~value:total ; - Db.Host_metrics.set_memory_free ~__context ~self:metrics ~value:free ; - set_changes c - with e -> - error "Unable to update host memory metrics: %s" (Printexc.to_string e) - ) - ) diff --git a/ocaml/xapi/monitor_mem_vms.ml b/ocaml/xapi/monitor_mem_vms.ml deleted file mode 100644 index 37d737d92df..00000000000 --- a/ocaml/xapi/monitor_mem_vms.ml +++ /dev/null @@ -1,89 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -module Mtxext = Xapi_stdext_threads.Threadext.Mutex -module Mcache = Monitor_dbcalls_cache - -module D = Debug.Make (struct let name = "monitor_mem_vms" end) - -open D - -let get_changes rrd_files = - List.iter - (fun filename -> - try - let datasources = Monitor_types.datasources_from_filename filename in - Mcache.log_errors_from filename ; - datasources - |> List.filter_map (function - | Rrd.VM vm_uuid, ds when ds.Ds.ds_name = "memory" -> - Some (vm_uuid, ds) - | _ -> - None (* we are only interested in VM stats *) - ) - |> List.iter (function vm_uuid, ds -> - let value = - match ds.Ds.ds_value with - | Rrd.VT_Int64 v -> - v - | Rrd.VT_Float v -> - Int64.of_float v - | Rrd.VT_Unknown -> - -1L - in - Hashtbl.add Mcache.vm_memory_tmp vm_uuid value - ) - with e -> - if not (Mcache.is_ignored filename) then ( - error "Unable to read memory usage for VM %s: %s" filename - (Printexc.to_string e) ; - Mcache.ignore_errors_from filename - ) - ) - rrd_files ; - (* Check if anything has changed since our last reading. *) - Mcache.get_updates_map ~before:Mcache.vm_memory_cached - ~after:Mcache.vm_memory_tmp - -let set_changes ?except () = - Mtxext.execute Mcache.vm_memory_cached_m (fun _ -> - Mcache.transfer_map ?except ~source:Mcache.vm_memory_tmp - ~target:Mcache.vm_memory_cached () - ) - -let update rrd_files = - let is_vm_rrd = - Astring.String.is_prefix ~affix:Xapi_globs.metrics_prefix_mem_vms - in - let rrd_files = List.filter is_vm_rrd rrd_files in - Server_helpers.exec_with_new_task "Updating VM memory usage" (fun __context -> - let host = Helpers.get_localhost ~__context in - let keeps = ref [] in - List.iter - (fun (vm_uuid, memory) -> - try - let vm = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in - let vmm = Db.VM.get_metrics ~__context ~self:vm in - if Db.VM.get_resident_on ~__context ~self:vm = host then - Db.VM_metrics.set_memory_actual ~__context ~self:vmm ~value:memory - else - Mcache.clear_cache_for_vm ~vm_uuid - with e -> - keeps := vm_uuid :: !keeps ; - error "Unable to update memory usage for VM %s: %s" vm_uuid - (Printexc.to_string e) - ) - (get_changes rrd_files) ; - set_changes ~except:!keeps () - ) diff --git a/quality-gate.sh b/quality-gate.sh index 6455846d21b..f6540cb2a1f 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=469 + N=467 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From f601c12094810e3c0c5db2f169a64f35e66bb1fa Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 12 Jun 2025 11:36:05 +0100 Subject: [PATCH 335/492] rrdp-squeezed: move remaining memory metrics to this plugin This allows to xcp-rrdd to stop writing to memory-mapped files This needed xapi to be adapted since it read the contents of the memory-mapped files. Now it can get those metric from a single memory-mapped file. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/monitor_mem.ml | 105 +++++++-------- ocaml/xapi/xapi_globs.ml | 4 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 126 ++---------------- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.mli | 0 .../bin/rrdp-squeezed/rrdp_squeezed.ml | 40 +++++- quality-gate.sh | 2 +- 6 files changed, 101 insertions(+), 176 deletions(-) create mode 100644 ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.mli diff --git a/ocaml/xapi/monitor_mem.ml b/ocaml/xapi/monitor_mem.ml index 502f0d6ca7a..79cf3cadf9d 100644 --- a/ocaml/xapi/monitor_mem.ml +++ b/ocaml/xapi/monitor_mem.ml @@ -19,9 +19,15 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D -let get_datasources ~prefix rrd_files = - List.filter (String.starts_with ~prefix) rrd_files - |> List.map (fun fn -> (fn, Monitor_types.datasources_from_filename fn)) +let get_datasources rrd_files = + List.filter_map + (fun filename -> + if String.starts_with ~prefix:Xapi_globs.metrics_prefix_mem filename then + Some (filename, Monitor_types.datasources_from_filename filename) + else + None + ) + rrd_files module Host = struct let get_changes datasources = @@ -78,28 +84,19 @@ module Host = struct Mcache.host_memory_total_cached := total_bytes ) - let update rrd_files = - Server_helpers.exec_with_new_task "Updating host memory metrics" - (fun __context -> - let datasources = - get_datasources ~prefix:Xapi_globs.metrics_prefix_mem_host rrd_files - in - let changes = get_changes datasources in - match changes with - | None -> - () - | Some ((free, total) as c) -> ( - try - let host = Helpers.get_localhost ~__context in - let metrics = Db.Host.get_metrics ~__context ~self:host in - Db.Host_metrics.set_memory_total ~__context ~self:metrics - ~value:total ; - Db.Host_metrics.set_memory_free ~__context ~self:metrics ~value:free ; - set_changes c - with e -> - error "Unable to update host memory metrics: %s" - (Printexc.to_string e) - ) + let update __context datasources = + match get_changes datasources with + | None -> + () + | Some ((free, total) as c) -> ( + try + let host = Helpers.get_localhost ~__context in + let metrics = Db.Host.get_metrics ~__context ~self:host in + Db.Host_metrics.set_memory_total ~__context ~self:metrics ~value:total ; + Db.Host_metrics.set_memory_free ~__context ~self:metrics ~value:free ; + set_changes c + with e -> + error "Unable to update host memory metrics: %s" (Printexc.to_string e) ) end @@ -146,32 +143,36 @@ module VMs = struct ~target:Mcache.vm_memory_cached () ) - let update rrd_files = - Server_helpers.exec_with_new_task "Updating VM memory usage" - (fun __context -> - let datasources = - get_datasources ~prefix:Xapi_globs.metrics_prefix_mem_vms rrd_files - in - let host = Helpers.get_localhost ~__context in - let keeps = ref [] in - List.iter - (fun (vm_uuid, memory) -> - try - let vm = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in - let vmm = Db.VM.get_metrics ~__context ~self:vm in - if Db.VM.get_resident_on ~__context ~self:vm = host then - Db.VM_metrics.set_memory_actual ~__context ~self:vmm - ~value:memory - else - Mcache.clear_cache_for_vm ~vm_uuid - with e -> - keeps := vm_uuid :: !keeps ; - error "Unable to update memory usage for VM %s: %s" vm_uuid - (Printexc.to_string e) - ) - (get_changes datasources) ; - set_changes ~except:!keeps () - ) + let update __context datasources = + let host = Helpers.get_localhost ~__context in + let keeps = ref [] in + List.iter + (fun (vm_uuid, memory) -> + try + let vm = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in + let vmm = Db.VM.get_metrics ~__context ~self:vm in + if Db.VM.get_resident_on ~__context ~self:vm = host then + Db.VM_metrics.set_memory_actual ~__context ~self:vmm ~value:memory + else + Mcache.clear_cache_for_vm ~vm_uuid + with e -> + keeps := vm_uuid :: !keeps ; + error "Unable to update memory usage for VM %s: %s" vm_uuid + (Printexc.to_string e) + ) + (get_changes datasources) ; + set_changes ~except:!keeps () end -let update rrd_files = Host.update rrd_files ; VMs.update rrd_files +let update rrd_files = + let ( let@ ) f x = f x in + let@ __context = + Server_helpers.exec_with_new_task "Updating memory metrics" + in + let datasources = get_datasources rrd_files in + if datasources = [] then + error "%s: no memory datasources found!" __FUNCTION__ + else ( + Host.update __context datasources ; + VMs.update __context datasources + ) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index e3957deea71..ad8914e9de7 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -635,9 +635,7 @@ let event_hook_auth_on_xapi_initialize_succeeded = ref false let metrics_root = "/dev/shm/metrics" -let metrics_prefix_mem_host = "xcp-rrdd-mem_host" - -let metrics_prefix_mem_vms = "xcp-rrdd-mem_vms" +let metrics_prefix_mem = "xcp-rrdd-squeezed" let metrics_prefix_pvs_proxy = "pvsproxy-" diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 40e5ab34b79..588f2de37dd 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -119,40 +119,6 @@ let start (xmlrpc_path, http_fwd_path) process = let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -(*****************************************************) -(* memory stats *) -(*****************************************************) -let dss_mem_host xc = - let physinfo = Xenctrl.physinfo xc in - let total_kib = - Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.total_pages) - and free_kib = - Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.free_pages) - in - [ - ( Rrd.Host - , Ds.ds_make ~name:"memory_total_kib" - ~description:"Total amount of memory in the host" - ~value:(Rrd.VT_Int64 total_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true - ~units:"KiB" () - ) - ; ( Rrd.Host - , Ds.ds_make ~name:"memory_free_kib" - ~description:"Total amount of free memory" - ~value:(Rrd.VT_Int64 free_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true - ~units:"KiB" () - ) - ] - -(** estimate the space needed to serialize all the dss_mem_vms in a host. the - json-like serialization for the 3 dss in dss_mem_vms takes 622 bytes. these - bytes plus some overhead make 1024 bytes an upper bound. *) - -let bytes_per_mem_vm = 1024 - -let mem_vm_writer_pages = - ((Rrd_interface.max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 - let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] module IntSet = Set.Make (Int) @@ -192,31 +158,6 @@ let domain_snapshot xc = in domains |> List.to_seq -let dss_mem_vms xc = - let mem_metrics_of (dom, uuid, _) = - let vm_metrics () = - let kib = - Xenctrl.pages_to_kib (Int64.of_nativeint dom.Xenctrl.total_memory_pages) - in - let memory = Int64.mul kib 1024L in - let main_mem_ds = - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory" - ~description:"Memory currently allocated to VM" ~units:"B" - ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () - ) - in - Some main_mem_ds - in - (* CA-34383: Memory updates from paused domains serve no useful purpose. - During a migrate such updates can also cause undesirable - discontinuities in the observed value of memory_actual. Hence, we - ignore changes from paused domains: *) - if dom.Xenctrl.paused then None else vm_metrics () - in - let domains = domain_snapshot xc in - Seq.filter_map mem_metrics_of domains |> List.of_seq - (**** Local cache SR stuff *) type last_vals = { @@ -323,8 +264,6 @@ let handle_exn log f default = let dom0_stat_generators = [ ("ha", fun _ _ -> Rrdd_ha_stats.all ()) - ; ("mem_host", fun xc _ -> dss_mem_host xc) - ; ("mem_vms", fun xc _ -> dss_mem_vms xc) ; ("cache", fun _ timestamp -> dss_cache timestamp) ] @@ -335,23 +274,9 @@ let generate_all_dom0_stats xc = in List.map handle_generator dom0_stat_generators -let write_dom0_stats writers tagged_dss = - let write_dss (name, writer) = - match List.assoc_opt name tagged_dss with - | None -> - debug - "Could not write stats for \"%s\": no stats were associated with \ - this name" - name - | Some (timestamp, dss) -> - writer.Rrd_writer.write_payload {timestamp; datasources= dss} - in - List.iter write_dss writers - -let do_monitor_write domains_before xc writers = +let do_monitor_write domains_before xc = Rrdd_libs.Stats.time_this "monitor" (fun _ -> let tagged_dom0_stats = generate_all_dom0_stats xc in - write_dom0_stats writers tagged_dom0_stats ; let dom0_stats = tagged_dom0_stats |> List.to_seq @@ -380,14 +305,14 @@ let do_monitor_write domains_before xc writers = domains_after ) -let monitor_write_loop writers = +let monitor_write_loop () = Debug.with_thread_named "monitor_write" (fun () -> Xenctrl.with_intf (fun xc -> let domains = ref Seq.empty in while true do try - domains := do_monitor_write !domains xc writers ; + domains := do_monitor_write !domains xc ; with_lock Rrdd_shared.next_iteration_start_m (fun _ -> Rrdd_shared.next_iteration_start := Clock.Timer.extend_by !Rrdd_shared.timeslice @@ -579,45 +504,15 @@ let doc = the datasources and records historical data in RRD format." ] -(** write memory stats to the filesystem so they can be propagated to xapi, - along with the number of pages they require to be allocated *) -let stats_to_write = [("mem_host", 1); ("mem_vms", mem_vm_writer_pages)] - -let writer_basename = ( ^ ) "xcp-rrdd-" - -let configure_writers () = - List.map - (fun (name, n_pages) -> - let path = Rrdd_server.Plugin.get_path (writer_basename name) in - ignore (Xapi_stdext_unix.Unixext.mkdir_safe (Filename.dirname path) 0o644) ; - let writer = - snd - (Rrd_writer.FileWriter.create - {path; shared_page_count= n_pages} - Rrd_protocol_v2.protocol - ) - in - (name, writer) - ) - stats_to_write - -(** we need to make sure we call exit on fatal signals to make sure profiling - data is dumped *) -let stop err writers signal = - debug "caught signal %a" Debug.Pp.signal signal ; - List.iter (fun (_, writer) -> writer.Rrd_writer.cleanup ()) writers ; - exit err - (* Entry point. *) -let _ = +let () = Rrdd_bindings.Rrd_daemon.bind () ; (* bind PPX-generated server calls to implementation of API *) - let writers = configure_writers () in (* Prevent shutdown due to sigpipe interrupt. This protects against potential stunnel crashes. *) Sys.set_signal Sys.sigpipe Sys.Signal_ignore ; - Sys.set_signal Sys.sigterm (Sys.Signal_handle (stop 1 writers)) ; - Sys.set_signal Sys.sigint (Sys.Signal_handle (stop 0 writers)) ; + Sys.set_signal Sys.sigterm (Sys.Signal_handle (fun _ -> exit 1)) ; + Sys.set_signal Sys.sigint (Sys.Signal_handle (fun _ -> exit 0)) ; (* Enable the new logging library. *) Debug.set_facility Syslog.Local5 ; (* Read configuration file. *) @@ -647,11 +542,8 @@ let _ = start (!Rrd_interface.default_path, !Rrd_interface.forwarded_path) (fun () -> Idl.Exn.server Rrdd_bindings.Server.implementation ) ; - ignore - @@ Discover.start - (List.map (fun (name, _) -> writer_basename name) stats_to_write) ; - ignore @@ GCLog.start () ; - debug "Starting xenstore-watching thread .." ; + let _ : Thread.t = Discover.start [] in + let _ : Thread.t = GCLog.start () in let module Daemon = Xapi_stdext_unix.Unixext.Daemon in if Daemon.systemd_booted () then if Daemon.systemd_notify Daemon.State.Ready then @@ -660,7 +552,7 @@ let _ = warn "Sending systemd notification failed at %s" __LOC__ ; debug "Creating monitoring loop thread .." ; let () = - try Debug.with_thread_associated "main" monitor_write_loop writers + try Debug.with_thread_associated "main" monitor_write_loop () with _ -> error "monitoring loop thread has failed" in while true do diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.mli b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml index a3091a90daf..df49dca259f 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml @@ -131,7 +131,7 @@ let get_domain_stats xc = let bytes_of_kib kib = Int64.mul 1024L kib -let generate_host_sources counters = +let generate_host_sources xc counters = let memory_reclaimed, memory_possibly_reclaimed = (* Calculate host metrics - Host memory reclaimed by squeezed = @@ -169,6 +169,13 @@ let generate_host_sources counters = in let memory_reclaimed = bytes_of_kib memory_reclaimed in let memory_possibly_reclaimed = bytes_of_kib memory_possibly_reclaimed in + let physinfo = Xenctrl.physinfo xc in + let total_kib = + Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.total_pages) + in + let free_kib = + Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.free_pages) + in (* Build corresponding Ds.ds values *) [ ( Rrd.Host @@ -183,6 +190,18 @@ let generate_host_sources counters = ~value:(Rrd.VT_Int64 memory_possibly_reclaimed) ~ty:Rrd.Gauge ~default:true ~units:"B" () ) + ; ( Rrd.Host + , Ds.ds_make ~name:"memory_total_kib" + ~description:"Total amount of memory in the host" + ~value:(Rrd.VT_Int64 total_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true + ~units:"KiB" () + ) + ; ( Rrd.Host + , Ds.ds_make ~name:"memory_free_kib" + ~description:"Total amount of free memory" + ~value:(Rrd.VT_Int64 free_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true + ~units:"KiB" () + ) ] let res_error fmt = Printf.ksprintf Result.error fmt @@ -233,6 +252,8 @@ let free_other uuid free = ~value:(Rrd.VT_Int64 free) ~ty:Rrd.Gauge ~min:0.0 ~default:true () ) +let get_list f = Option.to_list (f ()) + let generate_vm_sources domains = let metrics_of ((dom, uuid, domid), {target; free; _}) = let target () = @@ -254,6 +275,19 @@ let generate_vm_sources domains = else Option.bind free (free_other uuid) in + let total () = + let memory = + Int64.of_nativeint dom.Xenctrl.total_memory_pages + |> Xenctrl.pages_to_kib + |> bytes_of_kib + in + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory" + ~description:"Memory currently allocated to VM" ~units:"B" + ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + in (* CA-34383: Memory updates from paused domains serve no useful purpose. During a migrate such updates can also cause undesirable discontinuities in the observed value of memory_actual. Hence, we @@ -261,14 +295,14 @@ let generate_vm_sources domains = if dom.Xenctrl.paused then [] else - Option.to_list (target ()) @ Option.to_list (free ()) + get_list target @ get_list free @ get_list total in List.concat_map metrics_of domains let generate_sources xc () = let domain_stats = get_domain_stats xc in - generate_host_sources domain_stats @ generate_vm_sources domain_stats + generate_host_sources xc domain_stats @ generate_vm_sources domain_stats (** The json-like serialization for 3 dss in dss_mem_vms takes 622 bytes. These bytes plus some overhead make 1024 bytes an upper bound. *) diff --git a/quality-gate.sh b/quality-gate.sh index f6540cb2a1f..ceb82f67f65 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=467 + N=466 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From fa5bc72d44ec181a90e21a6b2175704bfa16c18b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 26 Jun 2025 18:53:55 +0100 Subject: [PATCH 336/492] xcp-rrdd: remove duplicated code to fetch domains Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdd/dune | 1 + ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 42 ++--------------------------- 2 files changed, 3 insertions(+), 40 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/dune b/ocaml/xcp-rrdd/bin/rrdd/dune index 6ce134dd522..2f215e8a7cf 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/dune +++ b/ocaml/xcp-rrdd/bin/rrdd/dune @@ -50,6 +50,7 @@ rpclib.json rpclib.xml rrdd_libs_internal + rrdd_plugin_xenctrl rrd-transport threads.posix uuid diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 588f2de37dd..17ca619440d 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -119,45 +119,6 @@ let start (xmlrpc_path, http_fwd_path) process = let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] - -module IntSet = Set.Make (Int) - -let domain_snapshot xc = - let metadata_of_domain dom = - let ( let* ) = Option.bind in - let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in - let uuid = Uuidx.to_string uuid_raw in - let domid = dom.Xenctrl.domid in - let start = String.sub uuid 0 18 in - (* Actively hide migrating VM uuids, these are temporary and xenops writes - the original and the final uuid to xenstore *) - let uuid_from_key key = - let path = Printf.sprintf "/vm/%s/%s" uuid key in - try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path)) - with Xs_protocol.Enoent _hint -> - info "Couldn't read path %s; falling back to actual uuid" path ; - uuid - in - let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in - if List.mem start uuid_blacklist then - None - else - let key = - if Astring.String.is_suffix ~affix:"000000000000" uuid then - Some "origin-uuid" - else if Astring.String.is_suffix ~affix:"000000000001" uuid then - Some "final-uuid" - else - None - in - Some (dom, stable_uuid key, domid) - in - let domains = - Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain - in - domains |> List.to_seq - (**** Local cache SR stuff *) type last_vals = { @@ -285,7 +246,8 @@ let do_monitor_write domains_before xc = ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in - let domains_after = domain_snapshot xc in + let _, domains_after, _ = Xenctrl_lib.domain_snapshot xc in + let domains_after = List.to_seq domains_after in let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; (* merge the domain ids from the previous iteration and the current one From 85c749c4cf0ab6e8d14c781b3fb2a7687126a12b Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Fri, 27 Jun 2025 10:35:08 +0000 Subject: [PATCH 337/492] CA-412854 Fix ssh_expiry drift after XAPI restart - Move `set_ssh_expiry` outside of the `schedule_disable_ssh_job` to avoid restart XAPI reset host.ssh_expiry - Ensure the SSH service is enabled during auto-mode Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi/xapi_host.ml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index e22aa2d7cef..1af2a5d4808 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -3155,6 +3155,8 @@ let set_ssh_auto_mode ~__context ~self ~value = SSH is only enabled during emergency scenarios (e.g., when XAPI is down) to allow administrative access for troubleshooting. *) if value then ( + (* Ensure SSH is always enabled when SSH auto mode is on*) + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_monitor_service ; Xapi_systemctl.start ~wait_until_success:false @@ -3183,8 +3185,7 @@ let disable_ssh_internal ~__context ~self = Helpers.internal_error "Failed to disable SSH access, host: %s" (Ref.string_of self) -let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = - let host_uuid = Helpers.get_localhost_uuid () in +let set_expiry ~__context ~self ~timeout = let expiry_time = match Ptime.add_span (Ptime_clock.now ()) @@ -3201,6 +3202,10 @@ let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = | Some t -> Ptime.to_float_s t |> Date.of_unix_time in + Db.Host.set_ssh_expiry ~__context ~self ~value:expiry_time + +let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = + let host_uuid = Helpers.get_localhost_uuid () in debug "Scheduling SSH disable job for host %s with timeout %Ld seconds" host_uuid timeout ; @@ -3217,9 +3222,7 @@ let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = (* re-enable SSH auto mode if it was enabled before calling host.enable_ssh *) if auto_mode then set_ssh_auto_mode ~__context ~self ~value:true - ) ; - - Db.Host.set_ssh_expiry ~__context ~self ~value:expiry_time + ) let enable_ssh ~__context ~self = try @@ -3239,6 +3242,7 @@ let enable_ssh ~__context ~self = !Xapi_globs.job_for_disable_ssh ; Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch | t -> + set_expiry ~__context ~self ~timeout:t ; schedule_disable_ssh_job ~__context ~self ~timeout:t ~auto_mode:cached_ssh_auto_mode ) ; @@ -3279,6 +3283,7 @@ let set_ssh_enabled_timeout ~__context ~self ~value = !Xapi_globs.job_for_disable_ssh ; Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch | t -> + set_expiry ~__context ~self ~timeout:t ; schedule_disable_ssh_job ~__context ~self ~timeout:t ~auto_mode:false let set_console_idle_timeout ~__context ~self ~value = From 57cbad4fc5f32ecb2aad5caba209713f3c95851a Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Wed, 25 Jun 2025 21:46:39 +0100 Subject: [PATCH 338/492] Move common retry_econnrefused function to xcp_client Signed-off-by: Steven Woods --- ocaml/xapi-idl/lib/observer_helpers.ml | 20 +++----------------- ocaml/xapi-idl/lib/xcp_client.ml | 18 ++++++++++++++++++ ocaml/xapi-idl/rrd/rrd_client.ml | 22 ++++------------------ ocaml/xapi-idl/storage/storage_client.ml | 22 ++++------------------ 4 files changed, 29 insertions(+), 53 deletions(-) diff --git a/ocaml/xapi-idl/lib/observer_helpers.ml b/ocaml/xapi-idl/lib/observer_helpers.ml index 125ba101722..24f7ee3db46 100644 --- a/ocaml/xapi-idl/lib/observer_helpers.ml +++ b/ocaml/xapi-idl/lib/observer_helpers.ml @@ -241,24 +241,10 @@ module Server (Impl : Server_impl) () = struct let process call = Idl.Exn.server S.implementation call end -let rec retry_econnrefused f = - try f () with - | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> - (* debug "Caught ECONNREFUSED; retrying in 5s"; *) - Thread.delay 5. ; retry_econnrefused f - | e -> - (* error "Caught %s: does the observer service need restarting?" - (Printexc.to_string e); *) - raise e - module Client = ObserverAPI (Idl.Exn.GenClient (struct - open Xcp_client - let rpc call = - retry_econnrefused (fun () -> - if !use_switch then - json_switch_rpc queue_name call - else - xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr:queue_name uri call + Xcp_client.( + retry_and_switch_rpc call ~use_switch:!use_switch ~queue_name + ~dststr:queue_name ~uri ) end)) diff --git a/ocaml/xapi-idl/lib/xcp_client.ml b/ocaml/xapi-idl/lib/xcp_client.ml index 435a63e3126..a7ebd1f996a 100644 --- a/ocaml/xapi-idl/lib/xcp_client.ml +++ b/ocaml/xapi-idl/lib/xcp_client.ml @@ -190,3 +190,21 @@ let binary_rpc string_of_call response_of_string ?(srcstr = "unset") let json_binary_rpc = binary_rpc Jsonrpc.string_of_call Jsonrpc.response_of_string + +let rec retry_econnrefused f = + try f () with + | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> + (* debug "Caught ECONNREFUSED; retrying in 5s"; *) + Thread.delay 5. ; retry_econnrefused f + | e -> + (* error "Caught %s: does the service need restarting?" + (Printexc.to_string e); *) + raise e + +let retry_and_switch_rpc call ~use_switch ~queue_name ~dststr ~uri = + retry_econnrefused (fun () -> + if use_switch then + json_switch_rpc queue_name call + else + xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr uri call + ) diff --git a/ocaml/xapi-idl/rrd/rrd_client.ml b/ocaml/xapi-idl/rrd/rrd_client.ml index abb12a118de..08a9b731f71 100644 --- a/ocaml/xapi-idl/rrd/rrd_client.ml +++ b/ocaml/xapi-idl/rrd/rrd_client.ml @@ -13,26 +13,12 @@ *) open Rrd_interface -open Xcp_client - -let rec retry_econnrefused f = - try f () with - | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> - (* debug "Caught ECONNREFUSED; retrying in 5s"; *) - Thread.delay 5. ; retry_econnrefused f - | e -> - (* error "Caught %s: does the rrd service need restarting?" - (Printexc.to_string e); *) - raise e +(* TODO: use_switch=false as the message switch doesn't handle raw HTTP very well *) let rpc call = - retry_econnrefused (fun () -> - (* TODO: the message switch doesn't handle raw HTTP very well *) - if (* !use_switch *) false then - json_switch_rpc !queue_name call - else - xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr:"rrd" Rrd_interface.uri - call + Xcp_client.( + retry_and_switch_rpc call ~use_switch:false ~queue_name:!queue_name + ~dststr:"rrd" ~uri ) module Client = RPC_API (Idl.Exn.GenClient (struct let rpc = rpc end)) diff --git a/ocaml/xapi-idl/storage/storage_client.ml b/ocaml/xapi-idl/storage/storage_client.ml index b66636daf6a..eeb0e765170 100644 --- a/ocaml/xapi-idl/storage/storage_client.ml +++ b/ocaml/xapi-idl/storage/storage_client.ml @@ -13,25 +13,11 @@ *) open Storage_interface -open Xcp_client -let rec retry_econnrefused f = - try f () with - | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> - (* debug "Caught ECONNREFUSED; retrying in 5s"; *) - Thread.delay 5. ; retry_econnrefused f - | e -> - (* error "Caught %s: does the storage service need restarting?" - (Printexc.to_string e); *) - raise e - -module Client = Storage_interface.StorageAPI (Idl.Exn.GenClient (struct +module Client = StorageAPI (Idl.Exn.GenClient (struct let rpc call = - retry_econnrefused (fun () -> - if !use_switch then - json_switch_rpc !queue_name call - else - xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr:"storage" - Storage_interface.uri call + Xcp_client.( + retry_and_switch_rpc call ~use_switch:!use_switch ~queue_name:!queue_name + ~dststr:"storage" ~uri ) end)) From 267c414d5753b4697119d20a363a5170c6444209 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 17 Jun 2025 02:20:45 +0000 Subject: [PATCH 339/492] CA-412636: hostname changed to localhost with static IP and reboot According to https://www.freedesktop.org/software/systemd/man/latest/hostname.html Systemd set hostname with following sequence - kernel parameter, systemd.hostname - static hostname in /etc/hostname - transient hostname like DHCP - localhost at systemd compile time Once the host is configured with static IP and reboot, it would just lost its hostname as no DHCP or static IP available. However, the hostname is critical to AD function as it construct the machine account. The hostname should be persisted as static name during joining AD, this is also what PBIS does. Note: the static hostname is not cleaned during domain leave. This is by intention to avoid losing hostname after reboot with static IP cba2f1d5e tried to resovle the issue and update /etc/resolv.conf However, /etc/resolv.conf does not help and conflict with xcp-networkd, as networkd override the configure every 5 minutes Here we just revert the resolv.conf update. Other parts of that commit can still benifit as it push the hostname to DNS Signed-off-by: Lin Liu --- ocaml/xapi/extauth_plugin_ADwinbind.ml | 27 +++++--------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index a279de5c5c7..6def6c5bb64 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -1434,23 +1434,6 @@ module ConfigHosts = struct |> write_string_to_file path end -module ResolveConfig = struct - let path = "/etc/resolv.conf" - - type t = Add | Remove - - let handle op domain = - let open Xapi_stdext_unix.Unixext in - let config = Printf.sprintf "search %s" domain in - read_lines ~path |> List.filter (fun x -> x <> config) |> fun x -> - (match op with Add -> config :: x | Remove -> x) |> fun x -> - x @ [""] |> String.concat "\n" |> write_string_to_file path - - let join ~domain = handle Add domain - - let leave ~domain = handle Remove domain -end - module DNSSync = struct let task_name = "Sync hostname with DNS" @@ -1827,7 +1810,11 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct ClosestKdc.trigger_update ~start:0. ; RotateMachinePassword.trigger_rotate ~start:0. ; ConfigHosts.join ~domain:service_name ~name:netbios_name ; - ResolveConfig.join ~domain:service_name ; + let _, _ = + Forkhelpers.execute_command_get_output !Xapi_globs.set_hostname + [get_localhost_name ()] + in + (* Trigger right now *) DNSSync.trigger_sync ~start:0. ; Winbind.set_machine_account_encryption_type netbios_name ; debug "Succeed to join domain %s" service_name @@ -1836,7 +1823,6 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct error "Join domain: %s error: %s" service_name stdout ; clear_winbind_config () ; ConfigHosts.leave ~domain:service_name ~name:netbios_name ; - ResolveConfig.leave ~domain:service_name ; (* The configure is kept for debug purpose with max level *) raise (Auth_service_error (stdout |> tag_from_err_msg, stdout)) | Xapi_systemctl.Systemctl_fail _ -> @@ -1844,7 +1830,6 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct error "Start daemon error: %s" msg ; config_winbind_daemon ~domain:None ~workgroup:None ~netbios_name:None ; ConfigHosts.leave ~domain:service_name ~name:netbios_name ; - ResolveConfig.leave ~domain:service_name ; raise (Auth_service_error (E_GENERIC, msg)) | e -> let msg = @@ -1856,7 +1841,6 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct error "Enable extauth error: %s" msg ; clear_winbind_config () ; ConfigHosts.leave ~domain:service_name ~name:netbios_name ; - ResolveConfig.leave ~domain:service_name ; raise (Auth_service_error (E_GENERIC, msg)) (* unit on_disable() @@ -1871,7 +1855,6 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct let user = List.assoc_opt "user" config_params in let pass = List.assoc_opt "pass" config_params in let {service_name; netbios_name; _} = get_domain_info_from_db () in - ResolveConfig.leave ~domain:service_name ; DNSSync.stop_sync () ; ( match netbios_name with | Some netbios -> From 4927eefd44e1940def75c16da231cfdfdca5b842 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 16 Jun 2025 11:15:20 +0100 Subject: [PATCH 340/492] Add mlis for observer_helpers and observer_skeleton Signed-off-by: Steven Woods --- ocaml/xapi-idl/lib/observer_helpers.mli | 227 +++++++++++++++++++++++ ocaml/xapi-idl/lib/observer_skeleton.mli | 46 +++++ quality-gate.sh | 2 +- 3 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 ocaml/xapi-idl/lib/observer_helpers.mli create mode 100644 ocaml/xapi-idl/lib/observer_skeleton.mli diff --git a/ocaml/xapi-idl/lib/observer_helpers.mli b/ocaml/xapi-idl/lib/observer_helpers.mli new file mode 100644 index 00000000000..cd23d2d1e80 --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_helpers.mli @@ -0,0 +1,227 @@ +val queue_name : string + +val default_path : string + +module Errors : sig + type error = + | Internal_error of string + | Unimplemented of string + | Unknown_error + + val typ_of_error : error Rpc.Types.typ + + val error : error Rpc.Types.def +end + +exception Observer_error of Errors.error + +type debug_info = string + +(** ObserverAPI contains the declarations for the RPCs which are sent to + Observer modules when the corresponding function is called on the Observer + see ocaml/libs/tracing/ and ocaml/xapi/xapi_observer.ml *) +module ObserverAPI : functor (R : Idl.RPC) -> sig + val description : Idl.Interface.description + + val implementation : R.implementation + + val create : + ( debug_info + -> string + -> string + -> (string * string) list + -> string list + -> bool + -> (unit, Errors.error) R.comp + ) + R.res + (** [create dbg uuid name attributes endpoints enabled] notifies the + forwarder that an Observer with [uuid] has been created. The subsequent + parameters are the fields the Observer was created with. *) + + val destroy : (debug_info -> string -> (unit, Errors.error) R.comp) R.res + (** [destroy dbg uuid] notifies the forwarder that an Observer with [uuid] + has been destroyed. *) + + val set_enabled : + (debug_info -> string -> bool -> (unit, Errors.error) R.comp) R.res + (** [set_enabled dbg uuid enabled] notifies the fowarder that the Observer + with [uuid] has had its enabled field set to [enabled]. *) + + val set_attributes : + ( debug_info + -> string + -> (string * string) list + -> (unit, Errors.error) R.comp + ) + R.res + (** [set_attributes dbg uuid attributes] notifies the fowarder that the + Observer with [uuid] has had its attributes field set to [attributes]. *) + + val set_endpoints : + (debug_info -> string -> string list -> (unit, Errors.error) R.comp) R.res + (** [set_endpoints dbg uuid endpoints] notifies the fowarder that the Observer + with [uuid] has had its endpoints field set to [endpoints]. *) + + val init : (debug_info -> (unit, Errors.error) R.comp) R.res + (** [init dbg] notifies the forwarder that it should perform any tracing + initialisation. *) + + val set_trace_log_dir : + (debug_info -> string -> (unit, Errors.error) R.comp) R.res + (** [set_trace_log_dir dbg dir] notifies the fowarder that the trace_log_dir + has been set to [dir]. *) + + val set_export_interval : + (debug_info -> float -> (unit, Errors.error) R.comp) R.res + (** [set_export_interval dbg interval] notifies the fowarder that the interval + between trace exports has been set to [interval]. *) + + val set_max_spans : (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_spans dbg spans] notifies the fowarder that the max number of + spans has been set to [spans]. *) + + val set_max_traces : (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_traces dbg traces] notifies the fowarder that the max number of + traces has been set to [traces]. *) + + val set_max_file_size : + (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_file_size dbg file_size] notifies the fowarder that the max file + size has been set to [file_size]. *) + + val set_host_id : (debug_info -> string -> (unit, Errors.error) R.comp) R.res + (** [set_host_id dbg host_id] notifies the fowarder that the host to be traced + has been set to [host_id]. *) + + val set_compress_tracing_files : + (debug_info -> bool -> (unit, Errors.error) R.comp) R.res + (** [set_compress_tracing_files dbg enabled] notifies the fowarder that the + compression of tracing files has been set to [enabled]. *) +end + +(** A Server_impl module will define how the Server responds to ObserverAPI calls *) +module type Server_impl = sig + type context = unit + + val create : + context + -> dbg:debug_info + -> uuid:string + -> name_label:string + -> attributes:(string * string) list + -> endpoints:string list + -> enabled:bool + -> unit + + val destroy : context -> dbg:debug_info -> uuid:string -> unit + + val set_enabled : + context -> dbg:debug_info -> uuid:string -> enabled:bool -> unit + + val set_attributes : + context + -> dbg:debug_info + -> uuid:string + -> attributes:(string * string) list + -> unit + + val set_endpoints : + context -> dbg:debug_info -> uuid:string -> endpoints:string list -> unit + + val init : context -> dbg:debug_info -> unit + + val set_trace_log_dir : context -> dbg:debug_info -> dir:string -> unit + + val set_export_interval : context -> dbg:debug_info -> interval:float -> unit + + val set_max_spans : context -> dbg:debug_info -> spans:int -> unit + + val set_max_traces : context -> dbg:debug_info -> traces:int -> unit + + val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit + + val set_host_id : context -> dbg:debug_info -> host_id:string -> unit + + val set_compress_tracing_files : + context -> dbg:debug_info -> enabled:bool -> unit +end + +(** A Server for receiving ObserverAPI calls *) +module Server : functor (_ : Server_impl) () -> sig + module S : sig + val create : + ( debug_info + -> string + -> string + -> (string * string) list + -> string list + -> bool + -> unit + ) + -> unit + + val destroy : (debug_info -> string -> unit) -> unit + + val set_enabled : (debug_info -> string -> bool -> unit) -> unit + + val set_attributes : + (debug_info -> string -> (string * string) list -> unit) -> unit + + val set_endpoints : (debug_info -> string -> string list -> unit) -> unit + + val init : (debug_info -> unit) -> unit + + val set_trace_log_dir : (debug_info -> string -> unit) -> unit + + val set_export_interval : (debug_info -> float -> unit) -> unit + + val set_max_spans : (debug_info -> int -> unit) -> unit + + val set_max_traces : (debug_info -> int -> unit) -> unit + + val set_max_file_size : (debug_info -> int -> unit) -> unit + + val set_host_id : (debug_info -> string -> unit) -> unit + + val set_compress_tracing_files : (debug_info -> bool -> unit) -> unit + end + + val process : Rpc.call -> Rpc.response +end + +(** A client for sending ObserverAPI calls to the above queue_name *) +module Client : sig + val create : + debug_info + -> string + -> string + -> (string * string) list + -> string list + -> bool + -> unit + + val destroy : debug_info -> string -> unit + + val set_enabled : debug_info -> string -> bool -> unit + + val set_attributes : debug_info -> string -> (string * string) list -> unit + + val set_endpoints : debug_info -> string -> string list -> unit + + val init : debug_info -> unit + + val set_trace_log_dir : debug_info -> string -> unit + + val set_export_interval : debug_info -> float -> unit + + val set_max_spans : debug_info -> int -> unit + + val set_max_traces : debug_info -> int -> unit + + val set_max_file_size : debug_info -> int -> unit + + val set_host_id : debug_info -> string -> unit + + val set_compress_tracing_files : debug_info -> bool -> unit +end diff --git a/ocaml/xapi-idl/lib/observer_skeleton.mli b/ocaml/xapi-idl/lib/observer_skeleton.mli new file mode 100644 index 00000000000..c99b77f9a34 --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_skeleton.mli @@ -0,0 +1,46 @@ +(** This module provides dummy implementations for each Observer function. + These are intended to be used to fill in the functions that the module will + not ever use, as they will raise an Unimplemented error if called *) +module Observer : sig + type context = unit + + val create : + context + -> dbg:string + -> uuid:string + -> name_label:string + -> attributes:(string * string) list + -> endpoints:string list + -> enabled:bool + -> unit + + val destroy : context -> dbg:string -> uuid:string -> unit + + val set_enabled : context -> dbg:string -> uuid:string -> enabled:bool -> unit + + val set_attributes : + context + -> dbg:string + -> uuid:string + -> attributes:(string * string) list + -> unit + + val set_endpoints : + context -> dbg:string -> uuid:string -> endpoints:string list -> unit + + val init : context -> dbg:string -> unit + + val set_trace_log_dir : context -> dbg:string -> dir:string -> unit + + val set_export_interval : context -> dbg:string -> interval:float -> unit + + val set_max_spans : context -> dbg:string -> spans:int -> unit + + val set_max_traces : context -> dbg:string -> traces:int -> unit + + val set_max_file_size : context -> dbg:string -> file_size:int -> unit + + val set_host_id : context -> dbg:string -> host_id:string -> unit + + val set_compress_tracing_files : context -> dbg:string -> enabled:bool -> unit +end diff --git a/quality-gate.sh b/quality-gate.sh index ceb82f67f65..7591e3c4ff4 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=466 + N=464 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From fc5f98b80badd2c9bb952b9a6ee7d7367bcb4f70 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 1 Jul 2025 15:56:18 +0800 Subject: [PATCH 341/492] CA-393417: Drop device controller of cgroup v1 For deprivileged qemu, following ops are performed - bind mount /dev/ to qemu chroot, so qemu can access it - cgroup controller deny all devices, except the target usb device However, new XS updated to cgroup v2 and the devices controller available anymore. Instead of bind mount all /dev folder, only the permitted usb devices are created into the chroot. Thus, the cgroup controller is no longer necessary. Besides, there are following updates accordingly - qemu pid is no longer necessary as command line args, as cgroup is dropped. - save and restore system /etc/ devices file ownership is no longer necessary. New file is cloned into chroot instead of bind mount system device file, so only need to set ownership of chroot file directly Signed-off-by: Lin Liu --- python3/libexec/usb_reset.py | 267 ++++++++--------------------------- 1 file changed, 55 insertions(+), 212 deletions(-) diff --git a/python3/libexec/usb_reset.py b/python3/libexec/usb_reset.py index 573936ae1c3..3e5ff849060 100755 --- a/python3/libexec/usb_reset.py +++ b/python3/libexec/usb_reset.py @@ -15,36 +15,30 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # attach -# ./usb_reset.py attach device -d dom-id -p pid [-r] +# ./usb_reset.py attach device -d dom-id [-r] # ./usb_reset.py attach 2-2 -d 12 -p 4130 # ./usb_reset.py attach 2-2 -d 12 -p 4130 -r # 1. reset device -# if without -r, do step 2~4 +# if without -r, do step 2~3 # 2. if it's the first USB device to pass-through -# a) bind mount /dev /sys in chroot directory (/var/xen/qemu/root-) -# b) create new cgroup devices:/qemu-, -# c) blacklist all and add default device whitelist, -# d) join current qemu process to this cgroup -# 3. save device uid/gid to /var/run/nonpersistent/usb/ -# 4. set device file uid/gid to (qemu_base + dom-id) -# 5. add current device to whitelist +# a) bind mount /sys in chroot directory (/var/xen/qemu/root-) +# b) clone (create the device with same major/minor number and mode) in chroot directory with same path +# 3. set device file uid/gid to (qemu_base + dom-id) # # detach # ./usb_reset.py detach device -d dom-id # ./usb_reset.py detach 2-2 -d 12 -# 1. restore device file uid/gid from /var/run/nonpersistent/usb/ -# 2. remove current device from whitelist +# 1. Remove the cloned device file in chroot directory # # cleanup # ./usb_reset.py cleanup -d dom-id # ./usb_reset.py cleanup -d 12 -# 1.remove the cgroup if one has been created. -# 2.umount /dev, /sys from chroot directory if they are mounted. +# 1.umount /sys from chroot directory if they are mounted. +# 2.remove /dev/bus directory in chroot directory if it exists import argparse import ctypes import ctypes.util -import errno import fcntl import grp import xcp.logger as log # pytype: disable=import-error @@ -52,7 +46,7 @@ import os import pwd import re -from stat import S_ISCHR, S_ISBLK +import shutil def parse_arg(): @@ -64,8 +58,6 @@ def parse_arg(): attach.add_argument("device", help="the target usb device") attach.add_argument("-d", dest="domid", type=int, required=True, help="specify the domid of the VM") - attach.add_argument("-p", dest="pid", type=int, required=True, - help="the process id of QEMU") attach.add_argument("-r", dest="reset_only", action="store_true", help="reset device only, for privileged mode") @@ -85,56 +77,6 @@ def get_root_dir(domid): return "/var/xen/qemu/root-{}".format(domid) -def get_cg_dir(domid): - return "/sys/fs/cgroup/devices/qemu-{}".format(domid) - - -def get_ids_path(device): - usb_dir = "/var/run/nonpersistent/usb" - try: - os.makedirs(usb_dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - return os.path.join(usb_dir, device) - - -def save_device_ids(device): - path = dev_path(device) - - try: - stat = os.stat(path) - ids_info = "{} {}".format(stat.st_uid, stat.st_gid) - except OSError as e: - log.error("Failed to stat {}: {}".format(path, str(e))) - exit(1) - - try: - with open(get_ids_path(device), "w") as f: - f.write(ids_info) - except IOError as e: - log.error("Failed to save device ids {}: {}".format(path, str(e))) - exit(1) - - -def load_device_ids(device): - ids_path = get_ids_path(device) - try: - with open(ids_path) as f: - uid, gid = list(map(int, f.readline().split())) - except (IOError, ValueError) as e: - log.error("Failed to load device ids: {}".format(str(e))) - - try: - os.remove(ids_path) - except OSError as e: - # ignore and continue - log.warning("Failed to remove device ids: {}".format(str(e))) - - return uid, gid # pyright: ignore[reportPossiblyUnboundVariable] # pragma: no cover - - # throw IOError, ValueError def read_int(path): with open(path) as f: @@ -157,109 +99,6 @@ def dev_path(device): exit(1) -def get_ctl(path, mode): # type:(str, str) -> str - """get the string to control device access for cgroup - :param path: the device file path - :param mode: either "r" or "rw" - :return: the string to control device access - """ - try: - st = os.stat(path) - except OSError as e: - log.error("Failed to get stat of {}: {}".format(path, str(e))) - raise - - t = "" - if S_ISBLK(st.st_mode): - t = "b" - elif S_ISCHR(st.st_mode): - t = "c" - if t and mode in ("r", "rw"): - return "{} {}:{} {}".format(t, os.major(st.st_rdev), os.minor( - st.st_rdev), mode) - raise RuntimeError("Failed to get control string of {}".format(path)) - - -def _device_ctl(path, domid, allow): - cg_dir = get_cg_dir(domid) - file_name = "/devices.allow" if allow else "/devices.deny" - try: - with open(cg_dir + file_name, "w") as f: - f.write(get_ctl(path, "rw")) - except (IOError, OSError, RuntimeError) as e: - log.error("Failed to {} {}: {}".format( - "allow" if allow else "deny", path, str(e))) - exit(1) - - -def allow_device(path, domid): - _device_ctl(path, domid, True) - - -def deny_device(path, domid): - _device_ctl(path, domid, False) - - -def setup_cgroup(domid, pid): # type:(str, str) -> None - """ - Associate the given process id (pid) with the given Linux kernel control group - and limit it's device access to only /dev/null. - - :param domid (str): The control group ID string (passed on from the command line) - :param pid (str): The process ID string (passed on from the command line) - - If the control group directory does not exist yet, the control group is created. - - - The pid goes into the file "tasks" to associate the process with the cgroup. - - Deny device access by default by writing "a" to devices.deny. - - Grant read-write access to /dev/null, writing it's device IDs to devices.allow. - - If any error occur during the setup process, the error is logged and - the program exits with a status code of 1. - """ - cg_dir = get_cg_dir(domid) - - try: - os.mkdir(cg_dir, 0o755) - except OSError as e: - if e.errno != errno.EEXIST: - log.error("Failed to create cgroup: {}".format(cg_dir)) - exit(1) - - try: - # unbuffered write to ensure each one is flushed immediately - # to the kernel's control group filesystem: - # - # The order of writes is likely not important, but the writes - # may have to be a single write() system call for the entire string. - # - # Using the unbuffered Raw IO mode, we know the write was done - # in exactly this way by the write function call itself, not later. - # - # With small writes like this , splitting them because of overflowing the - # buffer is not expected to happen. To stay safe and keep using unbuffered I/O - # We have to migrate to binary mode in python3,as python3 supports unbuffered - # raw I/O in binary mode. - # - with open(cg_dir + "/tasks", "wb", 0) as tasks, \ - open(cg_dir + "/devices.deny", "wb", 0) as deny, \ - open(cg_dir + "/devices.allow", "wb", 0) as allow: - - # deny all - deny.write(b"a") - - # To write bytes, we've to encode the strings to bytes below: - - # grant rw access to /dev/null by default - allow.write(get_ctl("/dev/null", "rw").encode()) - - tasks.write(str(pid).encode()) - - except (IOError, OSError, RuntimeError) as e: - log.error("Failed to setup cgroup: {}".format(str(e))) - exit(1) - - def mount(source, target, fs, flags=0): if ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True ).mount(source.encode(), target.encode(), fs.encode(), flags, None) < 0: @@ -277,7 +116,43 @@ def umount(target): format(target, os.strerror(ctypes.get_errno()))) -def attach(device, domid, pid, reset_only): +def clone_device(path, root_dir, domid): + """ + Clone the device file into the chroot directory. + + :param path: The source device file under system /dev to clone. + :param root_dir: The root directory of the chroot environment. + :param domid: The domain ID of the VM, used to set the device file's uid/gid. + """ + target_path = os.path.join(root_dir, path.lstrip(os.path.sep)) + if os.path.exists(target_path): + log.info("Device file {} already exists in chroot".format(target_path)) + return + + os.makedirs(os.path.dirname(target_path), exist_ok=True, mode=0o755) + + try: + st = os.stat(path) + except OSError as e: + log.error("Failed to get stat of {}: {}".format(path, str(e))) + exit(1) + + mode = st.st_mode + major = os.major(st.st_rdev) + minor = os.minor(st.st_rdev) + clone_device_id = os.makedev(major, minor) + os.mknod(target_path, mode, clone_device_id) + + # set device file uid/gid + try: + os.chown(target_path, pwd.getpwnam("qemu_base").pw_uid + domid, + grp.getgrnam("qemu_base").gr_gid + domid) + except OSError as e: + log.error("Failed to chown device file {}: {}".format(path, str(e))) + exit(1) + + +def attach(device, domid, reset_only): path = dev_path(device) # reset device @@ -293,27 +168,13 @@ def attach(device, domid, pid, reset_only): if reset_only: return - save_device_ids(device) - - # set device file uid/gid - try: - os.chown(path, pwd.getpwnam("qemu_base").pw_uid + domid, - grp.getgrnam("qemu_base").gr_gid + domid) - except OSError as e: - log.error("Failed to chown device file {}: {}".format(path, str(e))) - exit(1) - root_dir = get_root_dir(domid) dev_dir = root_dir + "/dev" if not os.path.isdir(root_dir) or not os.path.isdir(dev_dir): log.error("Error: The chroot or dev directory doesn't exist") exit(1) - if not os.path.isdir(dev_dir + "/bus"): - # first USB device to pass-through - MS_BIND = 4096 # mount flags, from fs.h - mount("/dev", dev_dir, "", MS_BIND) - setup_cgroup(domid, pid) + clone_device(path, root_dir, domid) sys_dir = root_dir + "/sys" # sys_dir could already be mounted because of PCI pass-through @@ -326,41 +187,23 @@ def attach(device, domid, pid, reset_only): if not os.path.isdir(sys_dir + "/devices"): mount("/sys", sys_dir, "sysfs") - # add device to cgroup allow list - allow_device(path, domid) - def detach(device, domid): path = dev_path(device) - uid, gid = load_device_ids(device) - - # restore uid, gid of the device file. - try: - os.chown(path, uid, gid) - except OSError as e: - log.error("Failed to chown device file {}: {}".format(path, str(e))) - exit(1) - - # remove device from cgroup allow list - deny_device(path, domid) + root_dir = get_root_dir(domid) + target_path = os.path.join(root_dir, path.lstrip(os.path.sep)) + os.remove(target_path) def cleanup(domid): - # remove the cgroup if one has been created. - if os.path.isdir(get_cg_dir(domid)): - try: - os.rmdir(get_cg_dir(domid)) - except OSError as e: - # log and continue - log.error("Failed to remove cgroup qemu-{}: {}" - .format(domid, str(e))) - # umount /dev, /sys from chroot directory if they are mounted. root_dir = get_root_dir(domid) dev_dir = root_dir + "/dev" sys_dir = root_dir + "/sys" - if os.path.isdir(dev_dir + "/bus"): - umount(dev_dir) + bus_dir = dev_dir + "/bus" + if os.path.isdir(bus_dir): + log.info("Removing bus directory: {} for cleanup".format(bus_dir)) + shutil.rmtree(bus_dir) if os.path.isdir(sys_dir + "/devices"): umount(sys_dir) @@ -371,7 +214,7 @@ def cleanup(domid): arg = parse_arg() if "attach" == arg.command: - attach(arg.device, arg.domid, arg.pid, arg.reset_only) + attach(arg.device, arg.domid, arg.reset_only) elif "detach" == arg.command: detach(arg.device, arg.domid) elif "cleanup" == arg.command: From 83a48882655d36daa6f6593603e9ab95a39ee664 Mon Sep 17 00:00:00 2001 From: Anthoine Bourgeois Date: Fri, 27 Jun 2025 10:48:26 +0200 Subject: [PATCH 342/492] xenopsd: set xen-platform-pci-bar-uc key in xenstore This patch add a new parameter named 'xen-platform-pci-bar-uc' in xenopsd config file who has a default value of 'true' to keep the default behavior of hvmloader. Putting 'false' to this parameter will tell xenopsd to add a xenstore key of '0' in: '/local/domain//hvmloader/pci/xen-platform-pci-bar-uc'. Only this key set to 0 will change the behavior of hvmloader. This changeset is link to this xen commit: https://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=22650d6054625be10172fe0c78b9cadd1a39bd63 Signed-off-by: Anthoine Bourgeois --- ocaml/xenopsd/lib/xenopsd.ml | 10 ++++++++++ ocaml/xenopsd/xc/domain.ml | 3 +++ ocaml/xenopsd/xenopsd.conf | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index 5ad6401730b..275cdcb79fa 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -49,6 +49,8 @@ let default_vbd_backend_kind = ref "vbd" let ca_140252_workaround = ref false +let xen_platform_pci_bar_uc = ref true + let action_after_qemu_crash = ref None let additional_ballooning_timeout = ref 120. @@ -207,6 +209,14 @@ let options = , (fun () -> string_of_bool !ca_140252_workaround) , "Workaround for evtchn misalignment for legacy PV tools" ) + ; ( "xen-platform-pci-bar-uc" + , Arg.Bool (fun x -> xen_platform_pci_bar_uc := x) + , (fun () -> string_of_bool !xen_platform_pci_bar_uc) + , "Controls whether, when the VM starts in HVM mode, the Xen PCI MMIO used \ + by grant tables is mapped as Uncached (UC, the default) or WriteBack \ + (WB, the workaround). WB mapping could improve performance of devices \ + using grant tables. This is useful on AMD platform only." + ) ; ( "additional-ballooning-timeout" , Arg.Set_float additional_ballooning_timeout , (fun () -> string_of_float !additional_ballooning_timeout) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 287c1c77b27..5e335874fe8 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -501,6 +501,9 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = xs.Xs.writev (dom_path ^ "/bios-strings") vm_info.bios_strings ; if vm_info.is_uefi then xs.Xs.write (dom_path ^ "/hvmloader/bios") "ovmf" ; + xs.Xs.write + (dom_path ^ "/hvmloader/pci/xen-platform-pci-bar-uc") + (if !Xenopsd.xen_platform_pci_bar_uc then "1" else "0") ; (* If a toolstack sees a domain which it should own in this state then the domain is not completely setup and should be shutdown. *) xs.Xs.write (dom_path ^ "/action-request") "poweroff" ; diff --git a/ocaml/xenopsd/xenopsd.conf b/ocaml/xenopsd/xenopsd.conf index e80194c1f55..447d6cde54a 100644 --- a/ocaml/xenopsd/xenopsd.conf +++ b/ocaml/xenopsd/xenopsd.conf @@ -108,3 +108,9 @@ disable-logging-for=http tracing tracing_export # time to wait for in-guest PV drivers to acknowledge a shutdown request # before we conclude that the drivers have failed # domain_shutdown_ack_timeout = 60 + +# Controls whether, when the VM starts in HVM mode, the Xen PCI MMIO used +# by grant tables is mapped as Uncached (UC, the default) or WriteBack +# (WB, the workaround). WB mapping could improve performance of devices +# using grant tables. This is useful on AMD platform only. +# xen-platform-pci-bar-uc=true From 56bd7c62c3ede7e063fc492a551e3c073877cb56 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 9 Jun 2025 10:20:57 +0100 Subject: [PATCH 343/492] CP-308455 VM.sysprep start with skeleton Add a new API call for VM sysprep and the corresponding XE implementation. This is mostly scaffolding. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_lifecycle.ml | 2 + ocaml/idl/datamodel_vm.ml | 11 +++++ ocaml/idl/schematest.ml | 2 +- ocaml/xapi-cli-server/cli_frontend.ml | 9 ++++ ocaml/xapi-cli-server/cli_operations.ml | 18 ++++++++ ocaml/xapi-cli-server/record_util.ml | 1 + ocaml/xapi/message_forwarding.ml | 11 +++++ ocaml/xapi/vm_sysprep.ml | 61 +++++++++++++++++++++++++ ocaml/xapi/vm_sysprep.mli | 15 ++++++ ocaml/xapi/xapi_vm.ml | 3 ++ ocaml/xapi/xapi_vm.mli | 2 + ocaml/xapi/xapi_vm_lifecycle.ml | 1 + 12 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 ocaml/xapi/vm_sysprep.ml create mode 100644 ocaml/xapi/vm_sysprep.mli diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index bbab96b8f0f..1973f0ed506 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -239,6 +239,8 @@ let prototyped_of_message = function Some "25.2.0" | "host", "set_numa_affinity_policy" -> Some "24.0.0" + | "VM", "sysprep" -> + Some "25.21.0-next" | "VM", "get_secureboot_readiness" -> Some "24.17.0" | "VM", "set_uefi_mode" -> diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index e72721b4ce0..5e4134afd0b 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2211,6 +2211,7 @@ let operations = ; ("reverting", "Reverting the VM to a previous snapshotted state") ; ("destroy", "refers to the act of uninstalling the VM") ; ("create_vtpm", "Creating and adding a VTPM to this VM") + ; ("sysprep", "Performing a Windows sysprep on this VM") ] ) @@ -2369,6 +2370,15 @@ let restart_device_models = ~allowed_roles:(_R_VM_POWER_ADMIN ++ _R_CLIENT_CERT) () +let sysprep = + call ~name:"sysprep" ~lifecycle:[] + ~params: + [ + (Ref _vm, "self", "The VM") + ; (String, "unattend", "XML content passed to sysprep") + ] + ~doc:"Pass unattend.xml to Windows sysprep" ~allowed_roles:_R_VM_ADMIN () + let vm_uefi_mode = Enum ( "vm_uefi_mode" @@ -2571,6 +2581,7 @@ let t = ; set_blocked_operations ; add_to_blocked_operations ; remove_from_blocked_operations + ; sysprep ] ~contents: ([ diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index c8abcb1f999..7bd70cb3aa5 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "4cd835e2557dd7b5cbda6c681730c447" +let last_known_schema_hash = "9cd32d98d092440c36617546a3d995bd" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index b066d7a8dd5..255f2be789e 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -2764,6 +2764,15 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "vm-sysprep" + , { + reqd= ["filename"] + ; optn= [] + ; help= "Pass and execute sysprep configuration file" + ; implementation= With_fd Cli_operations.vm_sysprep + ; flags= [Vm_selectors] + } + ) ; ( "diagnostic-vm-status" , { reqd= ["uuid"] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 65c91a031fc..f51c50851d4 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3588,6 +3588,24 @@ let vm_data_source_forget printer rpc session_id params = params ["data-source"] ) +let vm_sysprep fd printer rpc session_id params = + let filename = List.assoc "filename" params in + let unattend = + match get_client_file fd filename with + | Some xml -> + xml + | None -> + marshal fd (Command (PrintStderr "Failed to read file.\n")) ; + raise (ExitWithError 1) + in + ignore + (do_vm_op printer rpc session_id + (fun vm -> + Client.VM.sysprep ~rpc ~session_id ~self:(vm.getref ()) ~unattend + ) + params ["filename"] + ) + (* APIs to collect SR level RRDs *) let sr_data_source_list printer rpc session_id params = ignore diff --git a/ocaml/xapi-cli-server/record_util.ml b/ocaml/xapi-cli-server/record_util.ml index d28b6b5f763..a11b30decb3 100644 --- a/ocaml/xapi-cli-server/record_util.ml +++ b/ocaml/xapi-cli-server/record_util.ml @@ -75,6 +75,7 @@ let vm_operation_table = ; (`csvm, "csvm") ; (`call_plugin, "call_plugin") ; (`create_vtpm, "create_vtpm") + ; (`sysprep, "sysprep") ] (* Intentional shadowing - data_souces_op, assertoperationinvalid, diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index b52aaaa20ec..15b984ad993 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -3115,6 +3115,17 @@ functor (vm_uuid ~__context self) ; Local.VM.remove_from_blocked_operations ~__context ~self ~key ; Xapi_vm_lifecycle.update_allowed_operations ~__context ~self + + let sysprep ~__context ~self ~unattend = + info "VM.sysprep: self = '%s'" (vm_uuid ~__context self) ; + let local_fn = Local.VM.sysprep ~self ~unattend in + let remote_fn = Client.VM.sysprep ~self ~unattend in + let policy = Helpers.Policy.fail_immediately in + with_vm_operation ~__context ~self ~doc:"VM.sysprep" ~op:`sysprep + ~policy (fun () -> + forward_vm_op ~local_fn ~__context ~vm:self ~remote_fn + ) ; + Xapi_vm_lifecycle.update_allowed_operations ~__context ~self end module VM_metrics = struct end diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml new file mode 100644 index 00000000000..28be828daab --- /dev/null +++ b/ocaml/xapi/vm_sysprep.ml @@ -0,0 +1,61 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D +open Xapi_stdext_unix + +let ( // ) = Filename.concat + +let finally = Xapi_stdext_pervasives.Pervasiveext.finally + +let tmp_dir = Filename.get_temp_dir_name () + +let sr_dir = "/opt/opt/iso" + +let genisoimage = "/usr/bin/genisoimage" + +(** name of the ISO we will use for a VMi; this is not a path *) +let iso_name ~vm_uuid = + let now = Ptime_clock.now () |> Ptime.to_rfc3339 in + Printf.sprintf "config-%s-%s.iso" vm_uuid now + +(** taken from OCaml 5 stdlib *) +let temp_dir ?(dir = tmp_dir) ?(perms = 0o700) prefix suffix = + let rec try_name counter = + let name = Filename.temp_file ~temp_dir:dir prefix suffix in + try Sys.mkdir name perms ; name + with Sys_error _ as e -> + if counter >= 20 then raise e else try_name (counter + 1) + in + try_name 0 + +(** Crteate a temporary directory, and pass its path to [f]. Once [f] + returns the directory is removed again *) +let with_temp_dir ?(dir = tmp_dir) ?(perms = 0o700) prefix suffix f = + let dir = temp_dir ~dir ~perms prefix suffix in + finally (fun () -> f dir) (fun () -> Unixext.rm_rec dir) + +let make_iso ~vm_uuid ~unattend = + try + let _iso = sr_dir // iso_name ~vm_uuid in + Xapi_stdext_unix.Unixext.mkdir_rec sr_dir 0o755 + (* Unixext.write_string_to_file path unattend *) + with e -> + let msg = Printexc.to_string e in + Helpers.internal_error "%s failed: %s" __FUNCTION__ msg + +(* This function is executed on the host where [vm] is running *) +let sysprep ~__context ~vm ~unattend = debug "%s" __FUNCTION__ diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli new file mode 100644 index 00000000000..9f6f9ab9724 --- /dev/null +++ b/ocaml/xapi/vm_sysprep.mli @@ -0,0 +1,15 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val sysprep : __context:Context.t -> vm:API.ref_VM -> unattend:string -> unit diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 8a1ca5e493a..f76c632665f 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1701,3 +1701,6 @@ let get_secureboot_readiness ~__context ~self = ) ) ) + +let sysprep ~__context ~self ~unattend = + Vm_sysprep.sysprep ~__context ~vm:self ~unattend diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index 363e68b03d1..005b4cae4ae 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -450,3 +450,5 @@ val add_to_blocked_operations : val remove_from_blocked_operations : __context:Context.t -> self:API.ref_VM -> key:API.vm_operations -> unit + +val sysprep : __context:Context.t -> self:API.ref_VM -> unattend:string -> unit diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 5ec4ca6d792..50d7fcddb58 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -61,6 +61,7 @@ let allowed_power_states ~__context ~vmr ~(op : API.vm_operations) = | `send_sysrq | `send_trigger | `snapshot_with_quiesce + | `sysprep | `suspend -> [`Running] | `changing_dynamic_range -> From 8d35ebc9b61aefe4fe9f7398ac86957eb24f9ccc Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 11 Jun 2025 14:31:42 +0100 Subject: [PATCH 344/492] CP-308455 VM.sysprep implement mkdtemp We want to create a temporary directory that will be used to hold files for creating an ISO. There is no existing function that creates all necessary directories in a predicatble way. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_lifecycle.ml | 2 +- ocaml/xapi/vm_sysprep.ml | 63 +++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 1973f0ed506..ef79f8aec15 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -240,7 +240,7 @@ let prototyped_of_message = function | "host", "set_numa_affinity_policy" -> Some "24.0.0" | "VM", "sysprep" -> - Some "25.21.0-next" + Some "25.22.0" | "VM", "get_secureboot_readiness" -> Some "24.17.0" | "VM", "set_uefi_mode" -> diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 28be828daab..03a8e456af4 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -21,41 +21,68 @@ let ( // ) = Filename.concat let finally = Xapi_stdext_pervasives.Pervasiveext.finally -let tmp_dir = Filename.get_temp_dir_name () +let temp_dir = Filename.get_temp_dir_name () let sr_dir = "/opt/opt/iso" let genisoimage = "/usr/bin/genisoimage" -(** name of the ISO we will use for a VMi; this is not a path *) -let iso_name ~vm_uuid = - let now = Ptime_clock.now () |> Ptime.to_rfc3339 in - Printf.sprintf "config-%s-%s.iso" vm_uuid now +let failwith_fmt fmt = Printf.ksprintf failwith fmt + +let prng = Random.State.make_self_init () + +let temp_name prefix suffix = + let rnd = Random.State.bits prng land 0xFFFFFF in + Printf.sprintf "%s%06x%s" prefix rnd suffix -(** taken from OCaml 5 stdlib *) -let temp_dir ?(dir = tmp_dir) ?(perms = 0o700) prefix suffix = - let rec try_name counter = - let name = Filename.temp_file ~temp_dir:dir prefix suffix in - try Sys.mkdir name perms ; name - with Sys_error _ as e -> - if counter >= 20 then raise e else try_name (counter + 1) +(** [mkdtmp] creates a directory in [dir] and returns its path. If [dir] + does not yet exist it is created. It is a an error if [dir] is not a + directory. *) +let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = + ( match Sys.file_exists dir with + | true when not (Sys.is_directory dir) -> + failwith_fmt "s: %s is not a directory" __FUNCTION__ dir + | true -> + () + | false -> + Unixext.mkdir_rec dir perms + ) ; + let rec loop = function + | n when n >= 20 -> + failwith_fmt "s: can't create directory in %s" __FUNCTION__ dir + | n -> ( + let path = Filename.concat dir (temp_name prefix suffix) in + try Sys.mkdir path perms ; path with Sys_error _ -> loop (n + 1) + ) in - try_name 0 + loop 0 (** Crteate a temporary directory, and pass its path to [f]. Once [f] returns the directory is removed again *) -let with_temp_dir ?(dir = tmp_dir) ?(perms = 0o700) prefix suffix f = - let dir = temp_dir ~dir ~perms prefix suffix in +let with_temp_dir ?(dir = temp_dir) ?(perms = 0o700) prefix suffix f = + let dir = mkdtemp ~dir ~perms prefix suffix in finally (fun () -> f dir) (fun () -> Unixext.rm_rec dir) +(** name of the ISO we will use for a VMi; this is not a path *) +let iso_name ~vm_uuid = + let now = Ptime_clock.now () |> Ptime.to_rfc3339 in + Printf.sprintf "config-%s-%s.iso" vm_uuid now + let make_iso ~vm_uuid ~unattend = try let _iso = sr_dir // iso_name ~vm_uuid in - Xapi_stdext_unix.Unixext.mkdir_rec sr_dir 0o755 - (* Unixext.write_string_to_file path unattend *) + Xapi_stdext_unix.Unixext.mkdir_rec sr_dir 0o755 ; + with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" @@ fun temp_dir -> + debug "%s: %s = %b" __FUNCTION__ temp_dir (Sys.file_exists temp_dir) ; + let path = temp_dir // "unattend.xml" in + Unixext.write_string_to_file path unattend ; + debug "%s: written to %s" __FUNCTION__ path with e -> let msg = Printexc.to_string e in Helpers.internal_error "%s failed: %s" __FUNCTION__ msg (* This function is executed on the host where [vm] is running *) -let sysprep ~__context ~vm ~unattend = debug "%s" __FUNCTION__ +let sysprep ~__context ~vm ~unattend = + debug "%s" __FUNCTION__ ; + let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + make_iso ~vm_uuid ~unattend From c6bee52b7d37f89c3894a44dafed082e5fc0b11b Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 12 Jun 2025 13:19:26 +0100 Subject: [PATCH 345/492] CP-308455 VM.sysprep make iso Implement creating an ISO from a temporary directory Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 03a8e456af4..16764b824a3 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -36,8 +36,8 @@ let temp_name prefix suffix = Printf.sprintf "%s%06x%s" prefix rnd suffix (** [mkdtmp] creates a directory in [dir] and returns its path. If [dir] - does not yet exist it is created. It is a an error if [dir] is not a - directory. *) + does not yet exist it is created. It is a an error if [dir] exists + and is not a directory. *) let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = ( match Sys.file_exists dir with | true when not (Sys.is_directory dir) -> @@ -68,15 +68,18 @@ let iso_name ~vm_uuid = let now = Ptime_clock.now () |> Ptime.to_rfc3339 in Printf.sprintf "config-%s-%s.iso" vm_uuid now +(** Create an ISO in [sr_dir] with content [unattend]. [sr_dir] is + created if it not already exists. *) let make_iso ~vm_uuid ~unattend = try - let _iso = sr_dir // iso_name ~vm_uuid in + let iso = sr_dir // iso_name ~vm_uuid in Xapi_stdext_unix.Unixext.mkdir_rec sr_dir 0o755 ; with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" @@ fun temp_dir -> - debug "%s: %s = %b" __FUNCTION__ temp_dir (Sys.file_exists temp_dir) ; let path = temp_dir // "unattend.xml" in Unixext.write_string_to_file path unattend ; - debug "%s: written to %s" __FUNCTION__ path + debug "%s: written to %s" __FUNCTION__ path ; + let args = ["-r"; "-J"; "-o"; iso; temp_dir] in + Forkhelpers.execute_command_get_output genisoimage args |> ignore with e -> let msg = Printexc.to_string e in Helpers.internal_error "%s failed: %s" __FUNCTION__ msg From 0e6f6ee78b759a7980bdc6297671debc01e6fbad Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 12 Jun 2025 13:59:23 +0100 Subject: [PATCH 346/492] CP-308455 VM.sysprep Add logging Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 16764b824a3..dd6c5906dc3 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -23,7 +23,7 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let temp_dir = Filename.get_temp_dir_name () -let sr_dir = "/opt/opt/iso" +let sr_dir = "/var/opt/iso" let genisoimage = "/usr/bin/genisoimage" @@ -69,17 +69,19 @@ let iso_name ~vm_uuid = Printf.sprintf "config-%s-%s.iso" vm_uuid now (** Create an ISO in [sr_dir] with content [unattend]. [sr_dir] is - created if it not already exists. *) + created if it not already exists. Returns the path of the ISO image *) let make_iso ~vm_uuid ~unattend = try let iso = sr_dir // iso_name ~vm_uuid in Xapi_stdext_unix.Unixext.mkdir_rec sr_dir 0o755 ; - with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" @@ fun temp_dir -> - let path = temp_dir // "unattend.xml" in - Unixext.write_string_to_file path unattend ; - debug "%s: written to %s" __FUNCTION__ path ; - let args = ["-r"; "-J"; "-o"; iso; temp_dir] in - Forkhelpers.execute_command_get_output genisoimage args |> ignore + with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> + let path = temp_dir // "unattend.xml" in + Unixext.write_string_to_file path unattend ; + debug "%s: written to %s" __FUNCTION__ path ; + let args = ["-r"; "-J"; "-o"; iso; temp_dir] in + Forkhelpers.execute_command_get_output genisoimage args |> ignore ; + iso + ) with e -> let msg = Printexc.to_string e in Helpers.internal_error "%s failed: %s" __FUNCTION__ msg @@ -88,4 +90,5 @@ let make_iso ~vm_uuid ~unattend = let sysprep ~__context ~vm ~unattend = debug "%s" __FUNCTION__ ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in - make_iso ~vm_uuid ~unattend + let iso = make_iso ~vm_uuid ~unattend in + debug "%s: created %s" __FUNCTION__ iso From b88c199d419dc14ee1f805618fbfc80eec38de37 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 12 Jun 2025 16:27:58 +0100 Subject: [PATCH 347/492] CP-308455 VM.sysprep Add SR creation Create a local SR unless it exists. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 43 ++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index dd6c5906dc3..87f2689e0dd 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -23,14 +23,18 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let temp_dir = Filename.get_temp_dir_name () -let sr_dir = "/var/opt/iso" - let genisoimage = "/usr/bin/genisoimage" let failwith_fmt fmt = Printf.ksprintf failwith fmt let prng = Random.State.make_self_init () +module SR = struct + let dir = "/var/opt/iso" + + let name hostname = Printf.sprintf "SYSPREP-%s" hostname +end + let temp_name prefix suffix = let rnd = Random.State.bits prng land 0xFFFFFF in Printf.sprintf "%s%06x%s" prefix rnd suffix @@ -64,16 +68,16 @@ let with_temp_dir ?(dir = temp_dir) ?(perms = 0o700) prefix suffix f = finally (fun () -> f dir) (fun () -> Unixext.rm_rec dir) (** name of the ISO we will use for a VMi; this is not a path *) -let iso_name ~vm_uuid = +let iso_basename ~vm_uuid = let now = Ptime_clock.now () |> Ptime.to_rfc3339 in - Printf.sprintf "config-%s-%s.iso" vm_uuid now + Printf.sprintf "sysprep-%s-%s.iso" vm_uuid now -(** Create an ISO in [sr_dir] with content [unattend]. [sr_dir] is +(** Create an ISO in [SR.dir] with content [unattend]. [SR.dir] is created if it not already exists. Returns the path of the ISO image *) let make_iso ~vm_uuid ~unattend = try - let iso = sr_dir // iso_name ~vm_uuid in - Xapi_stdext_unix.Unixext.mkdir_rec sr_dir 0o755 ; + let iso = SR.dir // iso_basename ~vm_uuid in + Xapi_stdext_unix.Unixext.mkdir_rec SR.dir 0o755 ; with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> let path = temp_dir // "unattend.xml" in Unixext.write_string_to_file path unattend ; @@ -86,9 +90,32 @@ let make_iso ~vm_uuid ~unattend = let msg = Printexc.to_string e in Helpers.internal_error "%s failed: %s" __FUNCTION__ msg +(** create a local ISO SR when necessary and update it such that it + recognises any ISO we added or removed *) +let update_sr ~__context = + let host = Helpers.get_localhost ~__context in + let hostname = Db.Host.get_hostname ~__context ~self:host in + let label = SR.name hostname in + let mib n = Int64.(n * 1024 * 1024 |> of_int) in + let sr = + match Db.SR.get_by_name_label ~__context ~label with + | [sr] -> + sr + | sr :: _ -> + warn "%s: more than one SR with label %s" __FUNCTION__ label ; + sr + | [] -> + let device_config = [("location", SR.dir); ("legcay_mode", "true")] in + Xapi_sr.create ~__context ~host ~name_label:label ~device_config + ~content_type:"iso" ~_type:"iso" ~name_description:"Sysprep ISOs" + ~shared:false ~sm_config:[] ~physical_size:(mib 512) + in + Xapi_sr.scan ~__context ~sr + (* This function is executed on the host where [vm] is running *) let sysprep ~__context ~vm ~unattend = debug "%s" __FUNCTION__ ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in let iso = make_iso ~vm_uuid ~unattend in - debug "%s: created %s" __FUNCTION__ iso + debug "%s: created ISO %s" __FUNCTION__ iso ; + update_sr ~__context From 2061e2e6f22c89fdf7012878dbc477facb8acc8b Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 16 Jun 2025 13:57:01 +0100 Subject: [PATCH 348/492] CP-308455 VM.sysprep Find VBD for VM's CDR We need to locate the CD drive of the VM. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 56 +++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 87f2689e0dd..84b1336827d 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -21,8 +21,6 @@ let ( // ) = Filename.concat let finally = Xapi_stdext_pervasives.Pervasiveext.finally -let temp_dir = Filename.get_temp_dir_name () - let genisoimage = "/usr/bin/genisoimage" let failwith_fmt fmt = Printf.ksprintf failwith fmt @@ -35,10 +33,13 @@ module SR = struct let name hostname = Printf.sprintf "SYSPREP-%s" hostname end +(** create a name with a random infix *) let temp_name prefix suffix = let rnd = Random.State.bits prng land 0xFFFFFF in Printf.sprintf "%s%06x%s" prefix rnd suffix +let temp_dir = Filename.get_temp_dir_name () + (** [mkdtmp] creates a directory in [dir] and returns its path. If [dir] does not yet exist it is created. It is a an error if [dir] exists and is not a directory. *) @@ -67,7 +68,7 @@ let with_temp_dir ?(dir = temp_dir) ?(perms = 0o700) prefix suffix f = let dir = mkdtemp ~dir ~perms prefix suffix in finally (fun () -> f dir) (fun () -> Unixext.rm_rec dir) -(** name of the ISO we will use for a VMi; this is not a path *) +(** name of the ISO we will use for a VM; this is not a path *) let iso_basename ~vm_uuid = let now = Ptime_clock.now () |> Ptime.to_rfc3339 in Printf.sprintf "sysprep-%s-%s.iso" vm_uuid now @@ -76,7 +77,8 @@ let iso_basename ~vm_uuid = created if it not already exists. Returns the path of the ISO image *) let make_iso ~vm_uuid ~unattend = try - let iso = SR.dir // iso_basename ~vm_uuid in + let basename = iso_basename ~vm_uuid in + let iso = SR.dir // basename in Xapi_stdext_unix.Unixext.mkdir_rec SR.dir 0o755 ; with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> let path = temp_dir // "unattend.xml" in @@ -84,7 +86,7 @@ let make_iso ~vm_uuid ~unattend = debug "%s: written to %s" __FUNCTION__ path ; let args = ["-r"; "-J"; "-o"; iso; temp_dir] in Forkhelpers.execute_command_get_output genisoimage args |> ignore ; - iso + (iso, basename) ) with e -> let msg = Printexc.to_string e in @@ -105,17 +107,53 @@ let update_sr ~__context = warn "%s: more than one SR with label %s" __FUNCTION__ label ; sr | [] -> - let device_config = [("location", SR.dir); ("legcay_mode", "true")] in + let device_config = [("location", SR.dir); ("legacy_mode", "true")] in Xapi_sr.create ~__context ~host ~name_label:label ~device_config ~content_type:"iso" ~_type:"iso" ~name_description:"Sysprep ISOs" ~shared:false ~sm_config:[] ~physical_size:(mib 512) in - Xapi_sr.scan ~__context ~sr + Xapi_sr.scan ~__context ~sr ; + sr + +let find_cdr_vbd ~__context ~vm = + let vbds = Db.VM.get_VBDs ~__context ~self:vm in + let vbds' = + List.map (fun self -> (self, Db.VBD.get_record ~__context ~self)) vbds + in + let is_cd (_rf, rc) = + let open API in + rc.vBD_type = `CD && rc.vBD_empty + in + let uuid = Db.VM.get_uuid ~__context ~self:vm in + match List.filter is_cd vbds' with + | [] -> + failwith_fmt "%s: can't find CDR for VM %s" __FUNCTION__ uuid + | [(rf, rc)] -> + debug "%s: for VM %s using VBD %s" __FUNCTION__ uuid rc.API.vBD_uuid ; + rf + | (rf, rc) :: _ -> + debug "%s: for VM %s using VBD %s" __FUNCTION__ uuid rc.API.vBD_uuid ; + warn "%s: for VM %s found additions VBDs" __FUNCTION__ uuid ; + rf + +let find_vdi ~__context ~label = + match Db.VDI.get_by_name_label ~__context ~label with + | [] -> + failwith_fmt "%s: can't find VDI for %s" __FUNCTION__ label + | [vdi] -> + vdi + | vdi :: _ -> + warn "%s: more than one VDI with label %s" __FUNCTION__ label ; + vdi (* This function is executed on the host where [vm] is running *) let sysprep ~__context ~vm ~unattend = debug "%s" __FUNCTION__ ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in - let iso = make_iso ~vm_uuid ~unattend in + let iso, label = make_iso ~vm_uuid ~unattend in debug "%s: created ISO %s" __FUNCTION__ iso ; - update_sr ~__context + let _sr = update_sr ~__context in + let vbd = find_cdr_vbd ~__context ~vm in + let vdi = find_vdi ~__context ~label in + debug "%s: inserting Syspep VDI for VM %s" __FUNCTION__ vm_uuid ; + Xapi_vbd.insert ~__context ~vdi ~vbd From b22f441be1dfa0706eec4895e2947deff3a859fc Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 17 Jun 2025 11:03:18 +0100 Subject: [PATCH 349/492] CP-308455 VM.sysprep Implement trigger The VM is notified to perform a sysprep by writing to XenStore. The VM picks this up via its guest agent. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 84b1336827d..ae3acbb030e 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -115,6 +115,7 @@ let update_sr ~__context = Xapi_sr.scan ~__context ~sr ; sr +(** Find the VBD for the CD drive on [vm] *) let find_cdr_vbd ~__context ~vm = let vbds = Db.VM.get_VBDs ~__context ~self:vm in let vbds' = @@ -136,6 +137,8 @@ let find_cdr_vbd ~__context ~vm = warn "%s: for VM %s found additions VBDs" __FUNCTION__ uuid ; rf +(** Find the VDI that contains the unattend.xml based on its name. This + should be unique *) let find_vdi ~__context ~label = match Db.VDI.get_by_name_label ~__context ~label with | [] -> @@ -146,14 +149,29 @@ let find_vdi ~__context ~label = warn "%s: more than one VDI with label %s" __FUNCTION__ label ; vdi +let trigger ~domid = + let open Ezxenstore_core.Xenstore in + let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in + with_xs (fun xs -> + xs.Xs.write (control // "filename") "D://unattend.xml" ; + Thread.delay 5.0 ; + xs.Xs.write (control // "action") "sysprep" + ) ; + debug "%s: notified domain %Ld" __FUNCTION__ domid + (* This function is executed on the host where [vm] is running *) let sysprep ~__context ~vm ~unattend = debug "%s" __FUNCTION__ ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + let domid = Db.VM.get_domid ~__context ~self:vm in + if domid <= 0L then + failwith_fmt "%s: VM %s does not have a domain" __FUNCTION__ vm_uuid ; let iso, label = make_iso ~vm_uuid ~unattend in debug "%s: created ISO %s" __FUNCTION__ iso ; let _sr = update_sr ~__context in let vbd = find_cdr_vbd ~__context ~vm in let vdi = find_vdi ~__context ~label in - debug "%s: inserting Syspep VDI for VM %s" __FUNCTION__ vm_uuid ; - Xapi_vbd.insert ~__context ~vdi ~vbd + debug "%s: inserting Sysppep VDI for VM %s" __FUNCTION__ vm_uuid ; + Xapi_vbd.insert ~__context ~vdi ~vbd ; + Thread.delay 5.0 ; + trigger ~domid From 84e5a47de81be1c3e564752144536530a428c2f1 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 18 Jun 2025 10:28:32 +0100 Subject: [PATCH 350/492] CP-308455 VM.sysprep log sysprep status Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index ae3acbb030e..69459107ced 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -149,15 +149,19 @@ let find_vdi ~__context ~label = warn "%s: more than one VDI with label %s" __FUNCTION__ label ; vdi +(** notify the VM with [domid] to run sysprep and where to find the + file. *) let trigger ~domid = let open Ezxenstore_core.Xenstore in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in - with_xs (fun xs -> - xs.Xs.write (control // "filename") "D://unattend.xml" ; - Thread.delay 5.0 ; - xs.Xs.write (control // "action") "sysprep" - ) ; - debug "%s: notified domain %Ld" __FUNCTION__ domid + with_xs @@ fun xs -> + xs.Xs.write (control // "filename") "D://unattend.xml" ; + Thread.delay 5.0 ; + xs.Xs.write (control // "action") "sysprep" ; + debug "%s: notified domain %Ld" __FUNCTION__ domid ; + Thread.delay 5.0 ; + let action = xs.Xs.read (control // "action") in + debug "%s: sysprep for domain %Ld reports %S" __FUNCTION__ domid action (* This function is executed on the host where [vm] is running *) let sysprep ~__context ~vm ~unattend = From 637267205c3f7b9c7ab7c0b73c40ce95049fe92f Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 18 Jun 2025 13:33:11 +0100 Subject: [PATCH 351/492] CP-308455 VM.sysprep watch execution, clean up Introduce on_startup(), called on xapi startup, to clean up the local SR to avoid accumulating ISO files. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_errors.ml | 3 + ocaml/xapi-consts/api_errors.ml | 6 ++ ocaml/xapi/vm_sysprep.ml | 102 +++++++++++++++++++++++++------- ocaml/xapi/vm_sysprep.mli | 12 +++- ocaml/xapi/xapi.ml | 4 ++ ocaml/xapi/xapi_vm.ml | 13 +++- 6 files changed, 117 insertions(+), 23 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 30e185ac192..acd470a6f46 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -2048,6 +2048,9 @@ let _ = enable it in XC or run xe pool-enable-tls-verification instead." () ; + error Api_errors.sysprep ["vm"; "message"] + ~doc:"VM.sysprep error with details in the message" () ; + message (fst Api_messages.ha_pool_overcommitted) ~doc: diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 077a8dacbf9..210bebe1b2a 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1429,3 +1429,9 @@ let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = add_error "TLS_VERIFICATION_NOT_ENABLED_IN_POOL" + +(* VM.sysprep *) + +(* Using a single error during development, might want to expand this + later *) +let sysprep = add_error "SYSPREP" diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 69459107ced..52f16473633 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -31,8 +31,36 @@ module SR = struct let dir = "/var/opt/iso" let name hostname = Printf.sprintf "SYSPREP-%s" hostname + + let find_opt ~__context ~label = + match Db.SR.get_by_name_label ~__context ~label with + | [sr] -> + Some sr + | sr :: _ -> + warn "%s: more than one SR with label %s" __FUNCTION__ label ; + Some sr + | [] -> + None end +(** This is called on xapi startup. Opportunity to set up or clean up. + We destroy all VDIs that are unused. *) +let on_startup ~__context = + let host = Helpers.get_localhost ~__context in + let hostname = Db.Host.get_hostname ~__context ~self:host in + match SR.find_opt ~__context ~label:(SR.name hostname) with + | None -> + () + | Some sr -> ( + Db.SR.get_VDIs ~__context ~self:sr + |> List.iter @@ fun self -> + match Db.VDI.get_record ~__context ~self with + | API.{vDI_VBDs= []; vDI_location= _location; _} -> + Xapi_vdi.destroy ~__context ~self + | _ -> + () + ) + (** create a name with a random infix *) let temp_name prefix suffix = let rnd = Random.State.bits prng land 0xFFFFFF in @@ -52,15 +80,16 @@ let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = | false -> Unixext.mkdir_rec dir perms ) ; - let rec loop = function - | n when n >= 20 -> + let rec try_upto = function + | n when n < 0 -> failwith_fmt "s: can't create directory in %s" __FUNCTION__ dir | n -> ( let path = Filename.concat dir (temp_name prefix suffix) in - try Sys.mkdir path perms ; path with Sys_error _ -> loop (n + 1) + try Sys.mkdir path perms ; path + with Sys_error _ -> try_upto (n - 1) ) in - loop 0 + try_upto 20 (** Crteate a temporary directory, and pass its path to [f]. Once [f] returns the directory is removed again *) @@ -100,13 +129,10 @@ let update_sr ~__context = let label = SR.name hostname in let mib n = Int64.(n * 1024 * 1024 |> of_int) in let sr = - match Db.SR.get_by_name_label ~__context ~label with - | [sr] -> - sr - | sr :: _ -> - warn "%s: more than one SR with label %s" __FUNCTION__ label ; + match SR.find_opt ~__context ~label with + | Some sr -> sr - | [] -> + | None -> let device_config = [("location", SR.dir); ("legacy_mode", "true")] in Xapi_sr.create ~__context ~host ~name_label:label ~device_config ~content_type:"iso" ~_type:"iso" ~name_description:"Sysprep ISOs" @@ -154,28 +180,62 @@ let find_vdi ~__context ~label = let trigger ~domid = let open Ezxenstore_core.Xenstore in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in - with_xs @@ fun xs -> - xs.Xs.write (control // "filename") "D://unattend.xml" ; - Thread.delay 5.0 ; - xs.Xs.write (control // "action") "sysprep" ; - debug "%s: notified domain %Ld" __FUNCTION__ domid ; - Thread.delay 5.0 ; - let action = xs.Xs.read (control // "action") in - debug "%s: sysprep for domain %Ld reports %S" __FUNCTION__ domid action + with_xs (fun xs -> + xs.Xs.write (control // "filename") "D://unattend.xml" ; + Thread.delay 5.0 ; + xs.Xs.write (control // "action") "sysprep" ; + debug "%s: notified domain %Ld" __FUNCTION__ domid ; + let rec wait n = + match (n, xs.Xs.read (control // "action")) with + | _, "running" -> + "running" + | n, action when n < 0 -> + action + | _, _ -> + Thread.delay 1.0 ; + wait (n - 1) + in + (* wait up to 5 iterations for runnung to appear or report whatever + is the status at the end *) + wait 5 + ) (* This function is executed on the host where [vm] is running *) let sysprep ~__context ~vm ~unattend = + let open Ezxenstore_core.Xenstore in debug "%s" __FUNCTION__ ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in let domid = Db.VM.get_domid ~__context ~self:vm in + let control = Printf.sprintf "/local/domain/%Ld/control" domid in if domid <= 0L then - failwith_fmt "%s: VM %s does not have a domain" __FUNCTION__ vm_uuid ; + failwith_fmt "%s: VM %s is not running" __FUNCTION__ vm_uuid ; + if String.length unattend > 32 * 1024 then + fail "%s: provided file for %s larger than 32KiB" __FUNCTION__ vm_uuid ; + with_xs (fun xs -> + match xs.Xs.read (control // "feature-sysprep") with + | "1" -> + debug "%s: VM %s supports sysprep" __FUNCTION__ vm_uuid + | _ -> + fail "VM %s does not support sysprep" vm_uuid + | exception _ -> + fail "VM %s does not support sysprep" vm_uuid + ) ; let iso, label = make_iso ~vm_uuid ~unattend in debug "%s: created ISO %s" __FUNCTION__ iso ; let _sr = update_sr ~__context in let vbd = find_cdr_vbd ~__context ~vm in let vdi = find_vdi ~__context ~label in - debug "%s: inserting Sysppep VDI for VM %s" __FUNCTION__ vm_uuid ; + debug "%s: inserting Sysprep VDI for VM %s" __FUNCTION__ vm_uuid ; Xapi_vbd.insert ~__context ~vdi ~vbd ; Thread.delay 5.0 ; - trigger ~domid + match trigger ~domid with + | "running" -> + debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; + Xapi_vbd.eject ~__context ~vbd ; + Sys.remove iso ; + Result.ok () + | status -> + debug "%s: sysprep %S, ejecting CD" __FUNCTION__ status ; + Xapi_vbd.eject ~__context ~vbd ; + Sys.remove iso ; + Result.error status diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index 9f6f9ab9724..5c04935d27a 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -12,4 +12,14 @@ * GNU Lesser General Public License for more details. *) -val sysprep : __context:Context.t -> vm:API.ref_VM -> unattend:string -> unit +val on_startup : __context:Context.t -> unit +(** clean up on toolstart start up *) + +val sysprep : + __context:Context.t + -> vm:API.ref_VM + -> unattend:string + -> (unit, string) Result.t +(** Execute sysprep on [vm] using script [unattend]. This requires + driver support from the VM and is checked. [unattend:string] must + not exceed 32kb. *) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index a12e3ec0c83..3d908bdec0f 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1170,6 +1170,10 @@ let server_init () = , [Startup.OnThread] , Remote_requests.handle_requests ) + ; ( "Remove local ISO SR" + , [Startup.OnThread] + , fun () -> Vm_sysprep.on_startup ~__context + ) ] ; ( match Pool_role.get_role () with | Pool_role.Master -> diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index f76c632665f..a2cca867c63 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1703,4 +1703,15 @@ let get_secureboot_readiness ~__context ~self = ) let sysprep ~__context ~self ~unattend = - Vm_sysprep.sysprep ~__context ~vm:self ~unattend + match Vm_sysprep.sysprep ~__context ~vm:self ~unattend with + | Ok _ -> + () + | Error msg -> + let uuid = Db.VM.get_uuid ~__context ~self in + raise + Api_errors.( + Server_error (sysprep, [uuid; "Sysprep not found running: " ^ msg]) + ) + | exception Failure msg -> + let uuid = Db.VM.get_uuid ~__context ~self in + raise Api_errors.(Server_error (sysprep, [uuid; msg])) From bf3ef79d9167d0c90e725490c1f5759bf46a16d3 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 19 Jun 2025 14:21:30 +0100 Subject: [PATCH 352/492] CP-308455 VM.sysprep add feature flag For simplicity, add vm-sysprep-enabled = true/false to xapi.conf rather than using a full V6D feature flag. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 5 +++-- ocaml/xapi/xapi_globs.ml | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 52f16473633..660f2e1212f 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -85,8 +85,7 @@ let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = failwith_fmt "s: can't create directory in %s" __FUNCTION__ dir | n -> ( let path = Filename.concat dir (temp_name prefix suffix) in - try Sys.mkdir path perms ; path - with Sys_error _ -> try_upto (n - 1) + try Sys.mkdir path perms ; path with Sys_error _ -> try_upto (n - 1) ) in try_upto 20 @@ -204,6 +203,8 @@ let trigger ~domid = let sysprep ~__context ~vm ~unattend = let open Ezxenstore_core.Xenstore in debug "%s" __FUNCTION__ ; + if not !Xapi_globs.vm_sysprep_enabled then + fail "Experimental VM.sysprep API call is not enabled" ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in let domid = Db.VM.get_domid ~__context ~self:vm in let control = Printf.sprintf "/local/domain/%Ld/control" domid in diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index ad8914e9de7..fa0cc8d3451 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1089,6 +1089,9 @@ let reuse_pool_sessions = ref false let validate_reusable_pool_session = ref false (* Validate a reusable session before each use. This is slower and should not be required *) +let vm_sysprep_enabled = ref false +(* enable VM.sysprep API *) + let test_open = ref 0 let xapi_requests_cgroup = @@ -1751,6 +1754,11 @@ let other_options = , (fun () -> string_of_bool !validate_reusable_pool_session) , "Enable validation of reusable pool sessions before use" ) + ; ( "vm-sysprep-enabled" + , Arg.Set vm_sysprep_enabled + , (fun () -> string_of_bool !vm_sysprep_enabled) + , "Enable VM.sysprep API" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. From 4c3e27ddcdc6ebc638ded75ffce458944e7b04b2 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 19 Jun 2025 16:46:55 +0100 Subject: [PATCH 353/492] CP-308455 VM.syspreo add some comments Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 660f2e1212f..da0e08bb68d 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -27,6 +27,8 @@ let failwith_fmt fmt = Printf.ksprintf failwith fmt let prng = Random.State.make_self_init () +(* A local ISO SR; we create an ISO that holds an unattend.xml file that + is than passed as CD to a VM *) module SR = struct let dir = "/var/opt/iso" @@ -61,7 +63,8 @@ let on_startup ~__context = () ) -(** create a name with a random infix *) +(** create a name with a random infix. We need random names for + temporay directories to avoid collition *) let temp_name prefix suffix = let rnd = Random.State.bits prng land 0xFFFFFF in Printf.sprintf "%s%06x%s" prefix rnd suffix From 20e33e76ae9ffd8f42c1a08173c51cdc8c8bd975 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 20 Jun 2025 10:16:33 +0100 Subject: [PATCH 354/492] CP-308455 VM.sysprep unify error handling Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 24 ++++++++++++++---------- ocaml/xapi/vm_sysprep.mli | 10 ++++------ ocaml/xapi/xapi_vm.ml | 19 ++++++++++--------- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index da0e08bb68d..647d35bffa3 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -23,7 +23,12 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let genisoimage = "/usr/bin/genisoimage" -let failwith_fmt fmt = Printf.ksprintf failwith fmt +(** This will be shown to the user to explain a failure *) +exception Sysprep of string + +let fail fmt = Printf.ksprintf (fun msg -> raise (Sysprep msg)) fmt + +let internal_error = Helpers.internal_error let prng = Random.State.make_self_init () @@ -77,7 +82,7 @@ let temp_dir = Filename.get_temp_dir_name () let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = ( match Sys.file_exists dir with | true when not (Sys.is_directory dir) -> - failwith_fmt "s: %s is not a directory" __FUNCTION__ dir + internal_error "s: %s is not a directory" __FUNCTION__ dir | true -> () | false -> @@ -85,7 +90,7 @@ let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = ) ; let rec try_upto = function | n when n < 0 -> - failwith_fmt "s: can't create directory in %s" __FUNCTION__ dir + internal_error "%s: can't create directory %S" __FUNCTION__ dir | n -> ( let path = Filename.concat dir (temp_name prefix suffix) in try Sys.mkdir path perms ; path with Sys_error _ -> try_upto (n - 1) @@ -156,7 +161,7 @@ let find_cdr_vbd ~__context ~vm = let uuid = Db.VM.get_uuid ~__context ~self:vm in match List.filter is_cd vbds' with | [] -> - failwith_fmt "%s: can't find CDR for VM %s" __FUNCTION__ uuid + fail "can't find CDR for VM %s" uuid | [(rf, rc)] -> debug "%s: for VM %s using VBD %s" __FUNCTION__ uuid rc.API.vBD_uuid ; rf @@ -170,7 +175,7 @@ let find_cdr_vbd ~__context ~vm = let find_vdi ~__context ~label = match Db.VDI.get_by_name_label ~__context ~label with | [] -> - failwith_fmt "%s: can't find VDI for %s" __FUNCTION__ label + internal_error "%s: can't find VDI for %s" __FUNCTION__ label | [vdi] -> vdi | vdi :: _ -> @@ -212,7 +217,7 @@ let sysprep ~__context ~vm ~unattend = let domid = Db.VM.get_domid ~__context ~self:vm in let control = Printf.sprintf "/local/domain/%Ld/control" domid in if domid <= 0L then - failwith_fmt "%s: VM %s is not running" __FUNCTION__ vm_uuid ; + fail " VM %s is not running" __FUNCTION__ vm_uuid ; if String.length unattend > 32 * 1024 then fail "%s: provided file for %s larger than 32KiB" __FUNCTION__ vm_uuid ; with_xs (fun xs -> @@ -236,10 +241,9 @@ let sysprep ~__context ~vm ~unattend = | "running" -> debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; Xapi_vbd.eject ~__context ~vbd ; - Sys.remove iso ; - Result.ok () + Sys.remove iso | status -> debug "%s: sysprep %S, ejecting CD" __FUNCTION__ status ; Xapi_vbd.eject ~__context ~vbd ; - Sys.remove iso ; - Result.error status + fail "VM %s sysprep not found running as expected: %S" vm_uuid status + status diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index 5c04935d27a..e577af93a07 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -12,14 +12,12 @@ * GNU Lesser General Public License for more details. *) +exception Failure of string + val on_startup : __context:Context.t -> unit (** clean up on toolstart start up *) -val sysprep : - __context:Context.t - -> vm:API.ref_VM - -> unattend:string - -> (unit, string) Result.t +val sysprep : __context:Context.t -> vm:API.ref_VM -> unattend:string -> unit (** Execute sysprep on [vm] using script [unattend]. This requires driver support from the VM and is checked. [unattend:string] must - not exceed 32kb. *) + not exceed 32kb. Raised [Failure] that must be handled, *) diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index a2cca867c63..f2a8b5bfe8a 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1703,15 +1703,16 @@ let get_secureboot_readiness ~__context ~self = ) let sysprep ~__context ~self ~unattend = + let uuid = Db.VM.get_uuid ~__context ~self in + debug "%s %S (1/2)" __FUNCTION__ uuid ; match Vm_sysprep.sysprep ~__context ~vm:self ~unattend with - | Ok _ -> + | () -> + debug "%s %S (2/2)" __FUNCTION__ uuid ; () - | Error msg -> - let uuid = Db.VM.get_uuid ~__context ~self in - raise - Api_errors.( - Server_error (sysprep, [uuid; "Sysprep not found running: " ^ msg]) - ) - | exception Failure msg -> - let uuid = Db.VM.get_uuid ~__context ~self in + | exception Vm_sysprep.Sysprep msg -> + debug "%s %S (2/2)" __FUNCTION__ uuid ; + raise Api_errors.(Server_error (sysprep, [uuid; msg])) + | exception e -> + debug "%s %S (2/2)" __FUNCTION__ uuid ; + let msg = Printexc.to_string e in raise Api_errors.(Server_error (sysprep, [uuid; msg])) From 57dbab7513ffa5c09be53dbc3339b36d9bf9c790 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 23 Jun 2025 14:48:11 +0100 Subject: [PATCH 355/492] CP-308455 VM.sysprep list genisoimage as resource Add the paths to genisoimage so xapi_globs and list it as non-essential binary. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 4 ++-- ocaml/xapi/vm_sysprep.mli | 3 ++- ocaml/xapi/xapi_globs.ml | 3 +++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 647d35bffa3..22abb870a35 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -21,7 +21,7 @@ let ( // ) = Filename.concat let finally = Xapi_stdext_pervasives.Pervasiveext.finally -let genisoimage = "/usr/bin/genisoimage" +let genisoimage = !Xapi_globs.genisoimage_path (** This will be shown to the user to explain a failure *) exception Sysprep of string @@ -245,5 +245,5 @@ let sysprep ~__context ~vm ~unattend = | status -> debug "%s: sysprep %S, ejecting CD" __FUNCTION__ status ; Xapi_vbd.eject ~__context ~vbd ; + Sys.remove iso ; fail "VM %s sysprep not found running as expected: %S" vm_uuid status - status diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index e577af93a07..db4a18455e7 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -12,7 +12,8 @@ * GNU Lesser General Public License for more details. *) -exception Failure of string +(** error message that may be passed to API clients *) +exception Sysprep of string val on_startup : __context:Context.t -> unit (** clean up on toolstart start up *) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index fa0cc8d3451..88b957c2f2f 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1097,6 +1097,8 @@ let test_open = ref 0 let xapi_requests_cgroup = "/sys/fs/cgroup/cpu/control.slice/xapi.service/request" +let genisoimage_path = ref "/usr/bin/genisoimage" + (* Event.{from,next} batching delays *) let make_batching name ~delay_before ~delay_between = let name = Printf.sprintf "%s_delay" name in @@ -1950,6 +1952,7 @@ module Resources = struct , pvsproxy_close_cache_vdi , "Path to close-cache-vdi.sh" ) + ; ("genisoimage", genisoimage_path, "Path to genisoimage") ] let essential_files = From b16b6d8f4b593314fcb395b4378b593182ffdba1 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Mon, 23 Jun 2025 14:48:11 +0100 Subject: [PATCH 356/492] CP-308455 VM.sysprep improve error handling, use API.Client - use API.Client to make sure API calls are properly forwarded. - unify error handlign and use a sum type for the error - invoke on_startup later during xapi startup Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_lifecycle.ml | 2 +- ocaml/xapi-consts/api_errors.ml | 4 --- ocaml/xapi/message_forwarding.ml | 3 +- ocaml/xapi/vm_sysprep.ml | 60 +++++++++++++++++++++----------- ocaml/xapi/vm_sysprep.mli | 11 +++++- ocaml/xapi/xapi.ml | 8 ++--- ocaml/xapi/xapi_vm.ml | 32 +++++++++++++---- 7 files changed, 80 insertions(+), 40 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index ef79f8aec15..2880742c9cd 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -240,7 +240,7 @@ let prototyped_of_message = function | "host", "set_numa_affinity_policy" -> Some "24.0.0" | "VM", "sysprep" -> - Some "25.22.0" + Some "25.23.0-next" | "VM", "get_secureboot_readiness" -> Some "24.17.0" | "VM", "set_uefi_mode" -> diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 210bebe1b2a..42722c118d6 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1430,8 +1430,4 @@ let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = add_error "TLS_VERIFICATION_NOT_ENABLED_IN_POOL" -(* VM.sysprep *) - -(* Using a single error during development, might want to expand this - later *) let sysprep = add_error "SYSPREP" diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 15b984ad993..4c79f91cf5f 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -3124,8 +3124,7 @@ functor with_vm_operation ~__context ~self ~doc:"VM.sysprep" ~op:`sysprep ~policy (fun () -> forward_vm_op ~local_fn ~__context ~vm:self ~remote_fn - ) ; - Xapi_vm_lifecycle.update_allowed_operations ~__context ~self + ) end module VM_metrics = struct end diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 22abb870a35..fd769cc8099 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -15,6 +15,7 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D +open Client open Xapi_stdext_unix let ( // ) = Filename.concat @@ -23,15 +24,27 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let genisoimage = !Xapi_globs.genisoimage_path -(** This will be shown to the user to explain a failure *) -exception Sysprep of string +type error = + | API_not_enabled + | Other of string + | VM_CDR_not_found + | VM_misses_feature + | VM_not_running + | VM_sysprep_timeout + | XML_too_large -let fail fmt = Printf.ksprintf (fun msg -> raise (Sysprep msg)) fmt +exception Sysprep of error + +let _fail_fmt fmt = Printf.ksprintf (fun msg -> raise (Sysprep (Other msg))) fmt + +let fail error = raise (Sysprep error) let internal_error = Helpers.internal_error let prng = Random.State.make_self_init () +let call = Helpers.call_api_functions + (* A local ISO SR; we create an ISO that holds an unattend.xml file that is than passed as CD to a VM *) module SR = struct @@ -62,14 +75,15 @@ let on_startup ~__context = Db.SR.get_VDIs ~__context ~self:sr |> List.iter @@ fun self -> match Db.VDI.get_record ~__context ~self with - | API.{vDI_VBDs= []; vDI_location= _location; _} -> - Xapi_vdi.destroy ~__context ~self + | API.{vDI_VBDs= []; _} -> + call ~__context @@ fun rpc session_id -> + Client.VDI.destroy ~rpc ~session_id ~self | _ -> () ) (** create a name with a random infix. We need random names for - temporay directories to avoid collition *) + temporary directories to avoid collisions of concurrent API calls *) let temp_name prefix suffix = let rnd = Random.State.bits prng land 0xFFFFFF in Printf.sprintf "%s%06x%s" prefix rnd suffix @@ -141,11 +155,13 @@ let update_sr ~__context = sr | None -> let device_config = [("location", SR.dir); ("legacy_mode", "true")] in - Xapi_sr.create ~__context ~host ~name_label:label ~device_config + call ~__context @@ fun rpc session_id -> + Client.SR.create ~rpc ~session_id ~host ~name_label:label ~device_config ~content_type:"iso" ~_type:"iso" ~name_description:"Sysprep ISOs" ~shared:false ~sm_config:[] ~physical_size:(mib 512) in - Xapi_sr.scan ~__context ~sr ; + call ~__context @@ fun rpc session_id -> + Client.SR.scan ~rpc ~session_id ~sr ; sr (** Find the VBD for the CD drive on [vm] *) @@ -161,7 +177,7 @@ let find_cdr_vbd ~__context ~vm = let uuid = Db.VM.get_uuid ~__context ~self:vm in match List.filter is_cd vbds' with | [] -> - fail "can't find CDR for VM %s" uuid + fail VM_CDR_not_found | [(rf, rc)] -> debug "%s: for VM %s using VBD %s" __FUNCTION__ uuid rc.API.vBD_uuid ; rf @@ -189,7 +205,6 @@ let trigger ~domid = let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in with_xs (fun xs -> xs.Xs.write (control // "filename") "D://unattend.xml" ; - Thread.delay 5.0 ; xs.Xs.write (control // "action") "sysprep" ; debug "%s: notified domain %Ld" __FUNCTION__ domid ; let rec wait n = @@ -209,25 +224,27 @@ let trigger ~domid = (* This function is executed on the host where [vm] is running *) let sysprep ~__context ~vm ~unattend = - let open Ezxenstore_core.Xenstore in debug "%s" __FUNCTION__ ; if not !Xapi_globs.vm_sysprep_enabled then - fail "Experimental VM.sysprep API call is not enabled" ; + fail API_not_enabled ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in let domid = Db.VM.get_domid ~__context ~self:vm in let control = Printf.sprintf "/local/domain/%Ld/control" domid in if domid <= 0L then - fail " VM %s is not running" __FUNCTION__ vm_uuid ; + fail VM_not_running ; if String.length unattend > 32 * 1024 then - fail "%s: provided file for %s larger than 32KiB" __FUNCTION__ vm_uuid ; - with_xs (fun xs -> + fail XML_too_large ; + Ezxenstore_core.Xenstore.with_xs (fun xs -> + let open Ezxenstore_core.Xenstore in match xs.Xs.read (control // "feature-sysprep") with | "1" -> debug "%s: VM %s supports sysprep" __FUNCTION__ vm_uuid | _ -> - fail "VM %s does not support sysprep" vm_uuid + debug "%s: VM %s does not support sysprep" __FUNCTION__ vm_uuid ; + fail VM_misses_feature | exception _ -> - fail "VM %s does not support sysprep" vm_uuid + debug "%s: VM %s does not support sysprep" __FUNCTION__ vm_uuid ; + fail VM_misses_feature ) ; let iso, label = make_iso ~vm_uuid ~unattend in debug "%s: created ISO %s" __FUNCTION__ iso ; @@ -235,15 +252,16 @@ let sysprep ~__context ~vm ~unattend = let vbd = find_cdr_vbd ~__context ~vm in let vdi = find_vdi ~__context ~label in debug "%s: inserting Sysprep VDI for VM %s" __FUNCTION__ vm_uuid ; - Xapi_vbd.insert ~__context ~vdi ~vbd ; + call ~__context @@ fun rpc session_id -> + Client.VBD.insert ~rpc ~session_id ~vdi ~vbd ; Thread.delay 5.0 ; match trigger ~domid with | "running" -> debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; - Xapi_vbd.eject ~__context ~vbd ; + Client.VBD.eject ~rpc ~session_id ~vbd ; Sys.remove iso | status -> debug "%s: sysprep %S, ejecting CD" __FUNCTION__ status ; - Xapi_vbd.eject ~__context ~vbd ; + Client.VBD.eject ~rpc ~session_id ~vbd ; Sys.remove iso ; - fail "VM %s sysprep not found running as expected: %S" vm_uuid status + fail VM_sysprep_timeout diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index db4a18455e7..80f1874d7e9 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -13,7 +13,16 @@ *) (** error message that may be passed to API clients *) -exception Sysprep of string +type error = + | API_not_enabled + | Other of string + | VM_CDR_not_found + | VM_misses_feature + | VM_not_running + | VM_sysprep_timeout + | XML_too_large + +exception Sysprep of error val on_startup : __context:Context.t -> unit (** clean up on toolstart start up *) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 3d908bdec0f..56561d76e06 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1170,10 +1170,6 @@ let server_init () = , [Startup.OnThread] , Remote_requests.handle_requests ) - ; ( "Remove local ISO SR" - , [Startup.OnThread] - , fun () -> Vm_sysprep.on_startup ~__context - ) ] ; ( match Pool_role.get_role () with | Pool_role.Master -> @@ -1384,6 +1380,10 @@ let server_init () = , cache_metadata_vdis ) ; ("Stats reporting thread", [], Xapi_stats.start) + ; ( "Remove local ISO SR" + , [Startup.OnThread] + , fun () -> Vm_sysprep.on_startup ~__context + ) ] ; if !debug_dummy_data then Startup.run ~__context diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index f2a8b5bfe8a..f53f506e522 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1704,15 +1704,33 @@ let get_secureboot_readiness ~__context ~self = let sysprep ~__context ~self ~unattend = let uuid = Db.VM.get_uuid ~__context ~self in - debug "%s %S (1/2)" __FUNCTION__ uuid ; + debug "%s %S" __FUNCTION__ uuid ; match Vm_sysprep.sysprep ~__context ~vm:self ~unattend with | () -> - debug "%s %S (2/2)" __FUNCTION__ uuid ; + debug "%s %S success" __FUNCTION__ uuid ; () - | exception Vm_sysprep.Sysprep msg -> - debug "%s %S (2/2)" __FUNCTION__ uuid ; + | exception Vm_sysprep.Sysprep API_not_enabled -> + raise Api_errors.(Server_error (sysprep, [uuid; "API call is disabled"])) + | exception Vm_sysprep.Sysprep VM_CDR_not_found -> + raise Api_errors.(Server_error (sysprep, [uuid; "CD-ROM drive not found"])) + | exception Vm_sysprep.Sysprep VM_misses_feature -> + raise + Api_errors.( + Server_error (sysprep, [uuid; "VM driver does not support sysprep"]) + ) + | exception Vm_sysprep.Sysprep VM_not_running -> + raise Api_errors.(Server_error (sysprep, [uuid; "VM is not running"])) + | exception Vm_sysprep.Sysprep VM_sysprep_timeout -> + raise + Api_errors.( + Server_error (sysprep, [uuid; "sysprep not found running - timeout"]) + ) + | exception Vm_sysprep.Sysprep XML_too_large -> + raise + Api_errors.( + Server_error (sysprep, [uuid; "unattend.xml file too large"]) + ) + | exception Vm_sysprep.Sysprep (Other msg) -> raise Api_errors.(Server_error (sysprep, [uuid; msg])) | exception e -> - debug "%s %S (2/2)" __FUNCTION__ uuid ; - let msg = Printexc.to_string e in - raise Api_errors.(Server_error (sysprep, [uuid; msg])) + raise e From c9e3bfd30677416406f860a30288da51ffc00deb Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 1 Jul 2025 15:36:29 +0100 Subject: [PATCH 357/492] CP-308455 VM.sysprep declare XML content as SecretString Desclare the string parameter holding unattend.xml as secret to avoid logging it. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_vm.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 2 +- ocaml/xapi-types/secretString.ml | 2 ++ ocaml/xapi-types/secretString.mli | 2 ++ ocaml/xapi/vm_sysprep.ml | 4 ++-- ocaml/xapi/vm_sysprep.mli | 5 +++-- ocaml/xapi/xapi_vm.mli | 3 ++- 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 5e4134afd0b..886b125659f 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2375,7 +2375,7 @@ let sysprep = ~params: [ (Ref _vm, "self", "The VM") - ; (String, "unattend", "XML content passed to sysprep") + ; (SecretString, "unattend", "XML content passed to sysprep") ] ~doc:"Pass unattend.xml to Windows sysprep" ~allowed_roles:_R_VM_ADMIN () diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index f51c50851d4..a2cf537550e 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3593,7 +3593,7 @@ let vm_sysprep fd printer rpc session_id params = let unattend = match get_client_file fd filename with | Some xml -> - xml + xml |> SecretString.of_string | None -> marshal fd (Command (PrintStderr "Failed to read file.\n")) ; raise (ExitWithError 1) diff --git a/ocaml/xapi-types/secretString.ml b/ocaml/xapi-types/secretString.ml index 781dac86697..b552e46edfd 100644 --- a/ocaml/xapi-types/secretString.ml +++ b/ocaml/xapi-types/secretString.ml @@ -24,6 +24,8 @@ let write_to_channel c s = output_string c s let equal = String.equal +let length = String.length + let pool_secret = "pool_secret" let with_cookie t cookies = (pool_secret, t) :: cookies diff --git a/ocaml/xapi-types/secretString.mli b/ocaml/xapi-types/secretString.mli index 82d97eaaa72..6d85364d04e 100644 --- a/ocaml/xapi-types/secretString.mli +++ b/ocaml/xapi-types/secretString.mli @@ -25,6 +25,8 @@ val of_string : string -> t val equal : t -> t -> bool +val length : t -> int + val json_rpc_of_t : t -> Rpc.t val t_of_rpc : Rpc.t -> t diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index fd769cc8099..6892898e9fb 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -132,7 +132,7 @@ let make_iso ~vm_uuid ~unattend = Xapi_stdext_unix.Unixext.mkdir_rec SR.dir 0o755 ; with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> let path = temp_dir // "unattend.xml" in - Unixext.write_string_to_file path unattend ; + SecretString.write_to_file path unattend ; debug "%s: written to %s" __FUNCTION__ path ; let args = ["-r"; "-J"; "-o"; iso; temp_dir] in Forkhelpers.execute_command_get_output genisoimage args |> ignore ; @@ -232,7 +232,7 @@ let sysprep ~__context ~vm ~unattend = let control = Printf.sprintf "/local/domain/%Ld/control" domid in if domid <= 0L then fail VM_not_running ; - if String.length unattend > 32 * 1024 then + if SecretString.length unattend > 32 * 1024 then fail XML_too_large ; Ezxenstore_core.Xenstore.with_xs (fun xs -> let open Ezxenstore_core.Xenstore in diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index 80f1874d7e9..5c11ef7dfb7 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -27,7 +27,8 @@ exception Sysprep of error val on_startup : __context:Context.t -> unit (** clean up on toolstart start up *) -val sysprep : __context:Context.t -> vm:API.ref_VM -> unattend:string -> unit +val sysprep : + __context:Context.t -> vm:API.ref_VM -> unattend:SecretString.t -> unit (** Execute sysprep on [vm] using script [unattend]. This requires - driver support from the VM and is checked. [unattend:string] must + driver support from the VM and is checked. [unattend] must not exceed 32kb. Raised [Failure] that must be handled, *) diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index 005b4cae4ae..12515874aeb 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -451,4 +451,5 @@ val add_to_blocked_operations : val remove_from_blocked_operations : __context:Context.t -> self:API.ref_VM -> key:API.vm_operations -> unit -val sysprep : __context:Context.t -> self:API.ref_VM -> unattend:string -> unit +val sysprep : + __context:Context.t -> self:API.ref_VM -> unattend:SecretString.t -> unit From 903737db24ee30cd8cdb8e19d00873b3ba4742f8 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 10:37:30 +0100 Subject: [PATCH 358/492] Revert "CP-308455 VM.sysprep declare XML content as SecretString" This reverts commit d6ee7d150e2d439ba22816bc28ee43edd9390315. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_vm.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 2 +- ocaml/xapi-types/secretString.ml | 2 -- ocaml/xapi-types/secretString.mli | 2 -- ocaml/xapi/vm_sysprep.ml | 4 ++-- ocaml/xapi/vm_sysprep.mli | 5 ++--- ocaml/xapi/xapi_vm.mli | 3 +-- 7 files changed, 7 insertions(+), 13 deletions(-) diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 886b125659f..5e4134afd0b 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2375,7 +2375,7 @@ let sysprep = ~params: [ (Ref _vm, "self", "The VM") - ; (SecretString, "unattend", "XML content passed to sysprep") + ; (String, "unattend", "XML content passed to sysprep") ] ~doc:"Pass unattend.xml to Windows sysprep" ~allowed_roles:_R_VM_ADMIN () diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index a2cf537550e..f51c50851d4 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3593,7 +3593,7 @@ let vm_sysprep fd printer rpc session_id params = let unattend = match get_client_file fd filename with | Some xml -> - xml |> SecretString.of_string + xml | None -> marshal fd (Command (PrintStderr "Failed to read file.\n")) ; raise (ExitWithError 1) diff --git a/ocaml/xapi-types/secretString.ml b/ocaml/xapi-types/secretString.ml index b552e46edfd..781dac86697 100644 --- a/ocaml/xapi-types/secretString.ml +++ b/ocaml/xapi-types/secretString.ml @@ -24,8 +24,6 @@ let write_to_channel c s = output_string c s let equal = String.equal -let length = String.length - let pool_secret = "pool_secret" let with_cookie t cookies = (pool_secret, t) :: cookies diff --git a/ocaml/xapi-types/secretString.mli b/ocaml/xapi-types/secretString.mli index 6d85364d04e..82d97eaaa72 100644 --- a/ocaml/xapi-types/secretString.mli +++ b/ocaml/xapi-types/secretString.mli @@ -25,8 +25,6 @@ val of_string : string -> t val equal : t -> t -> bool -val length : t -> int - val json_rpc_of_t : t -> Rpc.t val t_of_rpc : Rpc.t -> t diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 6892898e9fb..fd769cc8099 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -132,7 +132,7 @@ let make_iso ~vm_uuid ~unattend = Xapi_stdext_unix.Unixext.mkdir_rec SR.dir 0o755 ; with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> let path = temp_dir // "unattend.xml" in - SecretString.write_to_file path unattend ; + Unixext.write_string_to_file path unattend ; debug "%s: written to %s" __FUNCTION__ path ; let args = ["-r"; "-J"; "-o"; iso; temp_dir] in Forkhelpers.execute_command_get_output genisoimage args |> ignore ; @@ -232,7 +232,7 @@ let sysprep ~__context ~vm ~unattend = let control = Printf.sprintf "/local/domain/%Ld/control" domid in if domid <= 0L then fail VM_not_running ; - if SecretString.length unattend > 32 * 1024 then + if String.length unattend > 32 * 1024 then fail XML_too_large ; Ezxenstore_core.Xenstore.with_xs (fun xs -> let open Ezxenstore_core.Xenstore in diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index 5c11ef7dfb7..80f1874d7e9 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -27,8 +27,7 @@ exception Sysprep of error val on_startup : __context:Context.t -> unit (** clean up on toolstart start up *) -val sysprep : - __context:Context.t -> vm:API.ref_VM -> unattend:SecretString.t -> unit +val sysprep : __context:Context.t -> vm:API.ref_VM -> unattend:string -> unit (** Execute sysprep on [vm] using script [unattend]. This requires - driver support from the VM and is checked. [unattend] must + driver support from the VM and is checked. [unattend:string] must not exceed 32kb. Raised [Failure] that must be handled, *) diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index 12515874aeb..005b4cae4ae 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -451,5 +451,4 @@ val add_to_blocked_operations : val remove_from_blocked_operations : __context:Context.t -> self:API.ref_VM -> key:API.vm_operations -> unit -val sysprep : - __context:Context.t -> self:API.ref_VM -> unattend:SecretString.t -> unit +val sysprep : __context:Context.t -> self:API.ref_VM -> unattend:string -> unit From 6118d47249aa9bd27a7ed5961b8c0bbdf2891437 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 30 Jun 2025 18:26:32 +0100 Subject: [PATCH 359/492] xapi_vm_lifecycle: Stop assuming PV driver's presence implies all features A 10+ year old note justified assuming that feature-suspend and others are always available by referring to a buggy Windows guest agent. It has been correctly writing 1 to control/feature-suspend for at least a decade now (tested on old Windows PV drivers), so there is no reason to maintain this workaround. This allows PV drivers to potentially disable such features. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vm_lifecycle.ml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 5ec4ca6d792..cfbc8162e53 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -166,10 +166,6 @@ let has_definitely_booted_pv ~vmmr = ) (** Return an error iff vmr is an HVM guest and lacks a needed feature. - * Note: it turned out that the Windows guest agent does not write "feature-suspend" - * on resume (only on startup), so we cannot rely just on that flag. We therefore - * add a clause that enables all features when PV drivers are present using the - * old-style check. * The "strict" param should be true for determining the allowed_operations list * (which is advisory only) and false (more permissive) when we are potentially about * to perform an operation. This makes a difference for ops that require the guest to @@ -180,8 +176,6 @@ let check_op_for_feature ~__context ~vmr:_ ~vmmr ~vmgmr ~power_state ~op ~ref power_state <> `Running (* PV guests offer support implicitly *) || has_definitely_booted_pv ~vmmr - || Xapi_pv_driver_version.(has_pv_drivers (of_guest_metrics vmgmr)) - (* Full PV drivers imply all features *) then None else From a215870ac06f217810fcb21c4509eda13608a219 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 14:49:25 +0100 Subject: [PATCH 360/492] CP-308455 Save backtrace in error case Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index fd769cc8099..8bde41a0d77 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -145,6 +145,7 @@ let make_iso ~vm_uuid ~unattend = (** create a local ISO SR when necessary and update it such that it recognises any ISO we added or removed *) let update_sr ~__context = + Backtrace.is_important e ; let host = Helpers.get_localhost ~__context in let hostname = Db.Host.get_hostname ~__context ~self:host in let label = SR.name hostname in From 01981bd874bfc8f6c03075c35276ec7f47595457 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 14:49:25 +0100 Subject: [PATCH 361/492] CP-308455 VM.sysprep create better SR name label We identify the local SR by name. Make sure it is very unlikely to conflict we a user's SR and check its type, too. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 8bde41a0d77..74632f781c9 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -50,7 +50,13 @@ let call = Helpers.call_api_functions module SR = struct let dir = "/var/opt/iso" - let name hostname = Printf.sprintf "SYSPREP-%s" hostname + (* We create a deterministic unique name label to protect us against a + user using the same name *) + let name hostname = + let digest str = + Digest.(string str |> to_hex) |> fun hex -> String.sub hex 0 4 + in + Printf.sprintf "SYSPREP-%s-%s" hostname (digest hostname) let find_opt ~__context ~label = match Db.SR.get_by_name_label ~__context ~label with From 9ce41c58925866504ace1a7fe06fdd29da5009b6 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 14:49:25 +0100 Subject: [PATCH 362/492] CP-308455 improve locating local ISO SR Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 74632f781c9..20b2333cfcc 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -59,14 +59,14 @@ module SR = struct Printf.sprintf "SYSPREP-%s-%s" hostname (digest hostname) let find_opt ~__context ~label = - match Db.SR.get_by_name_label ~__context ~label with - | [sr] -> - Some sr - | sr :: _ -> - warn "%s: more than one SR with label %s" __FUNCTION__ label ; - Some sr - | [] -> - None + let check sr = + match Db.SR.get_record ~__context ~self:sr with + | API.{sR_type= "iso"; _} -> + true + | _ -> + false + in + Db.SR.get_by_name_label ~__context ~label |> List.find_opt check end (** This is called on xapi startup. Opportunity to set up or clean up. From 191db166bacb01e0add3148196d0951f9ff238c0 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 15:23:31 +0100 Subject: [PATCH 363/492] CP-308455 VM.sysprep save backtrace Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 20b2333cfcc..b360bd56d55 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -145,6 +145,7 @@ let make_iso ~vm_uuid ~unattend = (iso, basename) ) with e -> + Backtrace.is_important e ; let msg = Printexc.to_string e in Helpers.internal_error "%s failed: %s" __FUNCTION__ msg From d736b7359e3122e608841fbcce17c3f08135733e Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 15:23:31 +0100 Subject: [PATCH 364/492] CP-308455 VM.sysprep fix saving backtrace Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index b360bd56d55..c2067cde22e 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -152,7 +152,6 @@ let make_iso ~vm_uuid ~unattend = (** create a local ISO SR when necessary and update it such that it recognises any ISO we added or removed *) let update_sr ~__context = - Backtrace.is_important e ; let host = Helpers.get_localhost ~__context in let hostname = Db.Host.get_hostname ~__context ~self:host in let label = SR.name hostname in From 630ec5776d2b1ab6bb6032b828f054680a4c53a9 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 15:23:31 +0100 Subject: [PATCH 365/492] CP-308455 VM.sysprep write VDI UUID to xenstore In addition to the file name, write the VDI UUID to xenstore for the guest agent to pick up. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index c2067cde22e..1af1db06059 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -207,11 +207,12 @@ let find_vdi ~__context ~label = (** notify the VM with [domid] to run sysprep and where to find the file. *) -let trigger ~domid = +let trigger ~domid ~uuid = let open Ezxenstore_core.Xenstore in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in with_xs (fun xs -> xs.Xs.write (control // "filename") "D://unattend.xml" ; + xs.Xs.write (control // "vdi-uuid") uuid ; xs.Xs.write (control // "action") "sysprep" ; debug "%s: notified domain %Ld" __FUNCTION__ domid ; let rec wait n = @@ -258,11 +259,12 @@ let sysprep ~__context ~vm ~unattend = let _sr = update_sr ~__context in let vbd = find_cdr_vbd ~__context ~vm in let vdi = find_vdi ~__context ~label in + let uuid = Db.VDI.get_uuid ~__context ~self:vdi in debug "%s: inserting Sysprep VDI for VM %s" __FUNCTION__ vm_uuid ; call ~__context @@ fun rpc session_id -> Client.VBD.insert ~rpc ~session_id ~vdi ~vbd ; Thread.delay 5.0 ; - match trigger ~domid with + match trigger ~domid ~uuid with | "running" -> debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; Client.VBD.eject ~rpc ~session_id ~vbd ; From eaf2050465f057016b0b141b03a4bc9e534207ff Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 15:23:31 +0100 Subject: [PATCH 366/492] CP-308455 VM.sysprep make delay configurable Mostly for development: when inserting a CD we wait before we expect the VM to have recognised it such that we can start sysprep. Make this configuratble for easier experimentation. Signed-off-by: Christian Lindig --- ocaml/xapi/xapi_globs.ml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 88b957c2f2f..d2c591e4f2c 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1092,6 +1092,8 @@ let validate_reusable_pool_session = ref false let vm_sysprep_enabled = ref false (* enable VM.sysprep API *) +let vm_sysprep_wait = ref 5.0 (* seconds *) + let test_open = ref 0 let xapi_requests_cgroup = @@ -1761,6 +1763,11 @@ let other_options = , (fun () -> string_of_bool !vm_sysprep_enabled) , "Enable VM.sysprep API" ) + ; ( "vm-sysprep-wait" + , Arg.Set_float vm_sysprep_wait + , (fun () -> string_of_float !vm_sysprep_wait) + , "Time in seconds to wait for VM to recognise inserted CD" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. From 9153fb544119128b31899abf64f7e22ef39a7f1b Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 2 Jul 2025 15:23:31 +0100 Subject: [PATCH 367/492] CP-308455 VM.sysprep guard on_startup with feature flag Remove VDIs only when the feature is enabled. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 1af1db06059..16d485025b1 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -75,9 +75,7 @@ let on_startup ~__context = let host = Helpers.get_localhost ~__context in let hostname = Db.Host.get_hostname ~__context ~self:host in match SR.find_opt ~__context ~label:(SR.name hostname) with - | None -> - () - | Some sr -> ( + | Some sr when !Xapi_globs.vm_sysprep_enabled -> ( Db.SR.get_VDIs ~__context ~self:sr |> List.iter @@ fun self -> match Db.VDI.get_record ~__context ~self with @@ -87,6 +85,8 @@ let on_startup ~__context = | _ -> () ) + | _ -> + () (* none found or not enabled *) (** create a name with a random infix. We need random names for temporary directories to avoid collisions of concurrent API calls *) From c895469f5499b0cfb68001e37dca8b1ce294b249 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 2 Jul 2025 15:20:44 +0100 Subject: [PATCH 368/492] xapi_vm_lifecycle: Disallow suspend when cant_suspend_reason is present When data/cant_suspend_reason is present in xenstore (renamed data-cant-suspend-reason in "other" guest metrics), it would exclude operations involving suspend from the allowed operations list, but still allow actually suspending (strict=true during allowed operations checks but could be false otherwise. This was additionally overriden by assuming that PV driver presence guarantees feature-suspend). Suspending in such a situation would always result in a crash, usually with the following error: ``` $ xe vm-suspend uuid=d546c8c2-bcb1-0ed6-7931-d0fc9393ccb2 The server failed to handle your request, due to an internal error. The given message may give details useful for debugging the problem. message: xenopsd internal error: Device_common.QMP_Error(8, "{\"error\": {\"class\":\"GenericError\", \"desc\":\"State blocked by non-migratable device '0000:00:07.0/nvme'\", \"data\":{} }, \"id\":\"qmp-000006-8\"}") ``` So disallow suspending altogether when cant_suspend_reason is present. Log the QEMU error directly in the XAPI error as well: ``` $ xe vm-suspend uuid=d546c8c2-bcb1-0ed6-7931-d0fc9393ccb2 You attempted an operation on a VM which lacks the feature. vm: d546c8c2-bcb1-0ed6-7931-d0fc9393ccb2 (Windows 10 (64-bit)) reason: {"error":{"class":"GenericError","desc":"State blocked by non-migratable device '0000:00:07.0/nvme'","data":{}},"id":"qmp-000012-9"} ``` Signed-off-by: Andrii Sultanov --- ocaml/idl/datamodel_errors.ml | 2 ++ ocaml/xapi-consts/api_errors.ml | 2 ++ ocaml/xapi/xapi_vm_lifecycle.ml | 18 +++++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 30e185ac192..b205b670159 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -534,6 +534,8 @@ let _ = () ; error Api_errors.vm_lacks_feature ["vm"] ~doc:"You attempted an operation on a VM which lacks the feature." () ; + error Api_errors.vm_non_suspendable ["vm"; "reason"] + ~doc:"You attempted an operation on a VM which is not suspendable." () ; error Api_errors.vm_is_template ["vm"] ~doc:"The operation attempted is not valid for a template VM" () ; error Api_errors.other_operation_in_progress ["class"; "object"] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 077a8dacbf9..89ab735194b 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -440,6 +440,8 @@ let vm_old_pv_drivers = add_error "VM_OLD_PV_DRIVERS" let vm_lacks_feature = add_error "VM_LACKS_FEATURE" +let vm_non_suspendable = add_error "VM_NON_SUSPENDABLE" + let vm_cannot_delete_default_template = add_error "VM_CANNOT_DELETE_DEFAULT_TEMPLATE" diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index cfbc8162e53..f5a9d7dfd57 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -151,6 +151,12 @@ let has_feature ~vmgmr ~feature = try List.assoc feature other = "1" with Not_found -> false ) +let get_feature ~vmgmr ~feature = + Option.bind vmgmr (fun gmr -> + let other = gmr.Db_actions.vM_guest_metrics_other in + List.assoc_opt feature other + ) + (* Returns `true` only if we are certain that the VM has booted PV (if there * is no metrics record, then we can't tell) *) let has_definitely_booted_pv ~vmmr = @@ -194,9 +200,15 @@ let check_op_for_feature ~__context ~vmr:_ ~vmmr ~vmgmr ~power_state ~op ~ref some_err Api_errors.vm_lacks_feature | `changing_VCPUs_live when lack_feature "feature-vcpu-hotplug" -> some_err Api_errors.vm_lacks_feature - | (`suspend | `checkpoint | `pool_migrate | `migrate_send) - when strict && lack_feature "feature-suspend" -> - some_err Api_errors.vm_lacks_feature + | `suspend | `checkpoint | `pool_migrate | `migrate_send -> ( + match get_feature ~vmgmr ~feature:"data-cant-suspend-reason" with + | Some reason -> + Some (Api_errors.vm_non_suspendable, [Ref.string_of ref; reason]) + | None when strict && lack_feature "feature-suspend" -> + some_err Api_errors.vm_lacks_feature + | None -> + None + ) | _ -> None From 30c0ba15002ce05ce302946acbd03fc5a16eee7e Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Thu, 3 Jul 2025 15:33:40 +0100 Subject: [PATCH 369/492] CA-413304: Restore VBD.unplug function to keep old functionality This is a partial revert of 1a46f33be768. It retains the deactivate and detach functions introduced but restores the original unplug function so that the VBD_unplug atom is completely unchanged when xenops_vbd_plug_unplug_legacy=true instead of running deactivate followed by detach. This will fix the S(Does_not_exist) Xenopsd errors we are seeing in some VBD_unplug calls, until a fix for the split functions is found. Signed-off-by: Steven Woods --- ocaml/xenopsd/lib/xenops_server.ml | 5 +- ocaml/xenopsd/lib/xenops_server_plugin.ml | 2 + ocaml/xenopsd/lib/xenops_server_simulator.ml | 2 + ocaml/xenopsd/lib/xenops_server_skeleton.ml | 2 + ocaml/xenopsd/xc/xenops_server_xen.ml | 121 +++++++++++++++++++ 5 files changed, 128 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 15715ac7ac7..36a2ea92fed 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -2097,10 +2097,7 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) | VBD_unplug (id, force) -> debug "VBD.unplug %s" (VBD_DB.string_of_id id) ; finally - (fun () -> - B.VBD.deactivate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force ; - B.VBD.detach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) - ) + (fun () -> B.VBD.unplug t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force) (fun () -> VBD_DB.signal id) | VBD_deactivate (id, force) -> debug "VBD.deactivate %s" (VBD_DB.string_of_id id) ; diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index 4c8c73773f8..19ab155aa92 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -211,6 +211,8 @@ module type S = sig val activate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit + val unplug : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit + val deactivate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit val detach : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index f8c0afab8ab..13ae583c7da 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -677,6 +677,8 @@ module VBD = struct let activate _ (_vm : Vm.id) (_vbd : Vbd.t) = () + let unplug _ vm vbd _ = with_lock m (remove_vbd vm vbd) + let deactivate _ vm vbd _ = with_lock m (remove_vbd vm vbd) let detach _ _vm _vbd = () diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index b938927a2e4..f5ef9ed027c 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -148,6 +148,8 @@ module VBD = struct let activate _ _ _ = unimplemented __FUNCTION__ + let unplug _ _ _ _ = unimplemented __FUNCTION__ + let deactivate _ _ _ _ = unimplemented __FUNCTION__ let detach _ _ _ = unimplemented __FUNCTION__ diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index ccf3eac9764..61e5d45fb84 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3863,6 +3863,127 @@ module VBD = struct ) (fun () -> cleanup_attached_vdis vm (id_of vbd)) + let unplug task vm vbd force = + with_xc_and_xs (fun xc xs -> + try + (* On destroying the datapath + + 1. if the device has already been shutdown and deactivated (as in + suspend) we must call DP.destroy here to avoid leaks + + 2. if the device is successfully shutdown here then we must call + DP.destroy because no-one else will + + 3. if the device shutdown is rejected then we should leave the DP + alone and rely on the event thread calling us again later. *) + let domid = domid_of_uuid ~xs (uuid_of_string vm) in + (* If the device is gone then we don't need to shut it down but we do + need to free any storage resources. *) + let dev = + try + Some (device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd)) + with + | Xenopsd_error (Does_not_exist (_, _)) -> + debug "VM = %s; VBD = %s; Ignoring missing domain" vm (id_of vbd) ; + None + | Xenopsd_error Device_not_connected -> + debug "VM = %s; VBD = %s; Ignoring missing device" vm (id_of vbd) ; + None + in + let backend = + match dev with + | None -> + None + | Some dv -> ( + match + Rpcmarshal.unmarshal typ_of_backend + (Device.Generic.get_private_key ~xs dv _vdi_id + |> Jsonrpc.of_string + ) + with + | Ok x -> + x + | Error (`Msg m) -> + internal_error "Failed to unmarshal VBD backend: %s" m + ) + in + Option.iter + (fun dev -> + if force && not (Device.can_surprise_remove ~xs dev) then + debug + "VM = %s; VBD = %s; Device is not surprise-removable \ + (ignoring and removing anyway)" + vm (id_of vbd) ; + (* this happens on normal shutdown too *) + (* Case (1): success; Case (2): success; Case (3): an exception is + thrown *) + with_tracing ~task ~name:"VBD_device_shutdown" @@ fun () -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.clean_shutdown %s" (id_of vbd)) + (fun () -> + (if force then Device.hard_shutdown else Device.clean_shutdown) + task ~xs dev + ) + ) + dev ; + (* We now have a shutdown device but an active DP: we should destroy + the DP if the backend is of type VDI *) + finally + (fun () -> + with_tracing ~task ~name:"VBD_device_release" (fun () -> + Option.iter + (fun dev -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.release %s" (id_of vbd)) + (fun () -> Device.Vbd.release task ~xc ~xs dev) + ) + dev + ) ; + (* If we have a qemu frontend, detach this too. *) + with_tracing ~task ~name:"VBD_detach_qemu" @@ fun () -> + let _ = + DB.update vm + (Option.map (fun vm_t -> + let persistent = vm_t.VmExtra.persistent in + if List.mem_assoc vbd.Vbd.id persistent.VmExtra.qemu_vbds + then ( + let _, qemu_vbd = + List.assoc vbd.Vbd.id persistent.VmExtra.qemu_vbds + in + (* destroy_vbd_frontend ignores 'refusing to close' + transients' *) + destroy_vbd_frontend ~xc ~xs task qemu_vbd ; + VmExtra. + { + persistent= + { + persistent with + qemu_vbds= + List.remove_assoc vbd.Vbd.id + persistent.qemu_vbds + } + } + ) else + vm_t + ) + ) + in + () + ) + (fun () -> + with_tracing ~task ~name:"VBD_dp_destroy" @@ fun () -> + match (domid, backend) with + | Some x, None | Some x, Some (VDI _) -> + Storage.dp_destroy task + (Storage.id_of (string_of_int x) vbd.Vbd.id) + | _ -> + () + ) + with Device_common.Device_error (_, s) -> + debug "Caught Device_error: %s" s ; + raise (Xenopsd_error (Device_detach_rejected ("VBD", id_of vbd, s))) + ) + let deactivate task vm vbd force = with_xc_and_xs (fun xc xs -> try From 8017a4ec3fdc7daef5e21d6e32a70e932ed2ac2d Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 3 Jul 2025 11:57:13 +0000 Subject: [PATCH 370/492] CP-308455 VM.sysprep make delay configurable This was missing from previous commit to control the time waited for a VM to recognise a CD. In the long run we would like to replace this with a protocol that tells us when the guest is ready. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 16d485025b1..3db126dae08 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -263,7 +263,7 @@ let sysprep ~__context ~vm ~unattend = debug "%s: inserting Sysprep VDI for VM %s" __FUNCTION__ vm_uuid ; call ~__context @@ fun rpc session_id -> Client.VBD.insert ~rpc ~session_id ~vdi ~vbd ; - Thread.delay 5.0 ; + Thread.delay !Xapi_globs.vm_sysprep_wait ; match trigger ~domid ~uuid with | "running" -> debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; From 0fd510f8d48031d41942637d8cd04480105a2b08 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 4 Jul 2025 15:40:00 +0800 Subject: [PATCH 371/492] CA-393417: Bind mount /proc/ into chroot From strace/gdb, XS9 qemu requires /proc/self/fd/ to work well This is due to systemd/libudev update. Just bind mount /proc/self/ to the chroot to permit qemu access ``` 1047 openat(AT_FDCWD, "/proc/self/fd/46", O_RDONLY|O_NOCTTY|O_CLOEXEC|O_PATH) = -1 ENOENT (No such file or directory) 1048 openat(AT_FDCWD, "/proc/", O_RDONLY|O_NOCTTY|O_CLOEXEC|O_PATH) = -1 ENOENT (No such file or directory) ../sysdeps/unix/sysv/linux/fstatfs64.c:30 out>, dir_fd=) at ../src/basic/stat-util.c:566 magic_value=1650812274) at ../src/basic/stat-util.c:369 fd=) at ../src/basic/stat-util.h:66 verify=) at ../src/libsystemd/sd-device/sd-device.c:221 (ret=0x7ffc67ebba20, syspath=0x7ffc67ebb950 "/sys/bus/usb/devices/usb1", strict=true) at ../src/libsystemd/sd-device/sd-device.c:271 (syspath=0x7ffc67ebb950 "/sys/bus/usb/devices/usb1", ret=0x7ffc67ebba20) at ../src/libsystemd/sd-device/sd-device.c:280 ``` Signed-off-by: Lin Liu --- python3/libexec/usb_reset.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/python3/libexec/usb_reset.py b/python3/libexec/usb_reset.py index 3e5ff849060..010fd134862 100755 --- a/python3/libexec/usb_reset.py +++ b/python3/libexec/usb_reset.py @@ -15,7 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # attach -# ./usb_reset.py attach device -d dom-id [-r] +# ./usb_reset.py attach device -d dom-id -p pid [-r] # ./usb_reset.py attach 2-2 -d 12 -p 4130 # ./usb_reset.py attach 2-2 -d 12 -p 4130 -r # 1. reset device @@ -23,18 +23,21 @@ # 2. if it's the first USB device to pass-through # a) bind mount /sys in chroot directory (/var/xen/qemu/root-) # b) clone (create the device with same major/minor number and mode) in chroot directory with same path +# c) bind mount /proc/ to chroot directory (/var/xen/qemu/root-/proc/self) # 3. set device file uid/gid to (qemu_base + dom-id) # # detach # ./usb_reset.py detach device -d dom-id # ./usb_reset.py detach 2-2 -d 12 # 1. Remove the cloned device file in chroot directory +# 2. Umount /proc/self from chroot directory if it is mounted # # cleanup # ./usb_reset.py cleanup -d dom-id # ./usb_reset.py cleanup -d 12 # 1.umount /sys from chroot directory if they are mounted. -# 2.remove /dev/bus directory in chroot directory if it exists +# 2.umount /proc/self from chroot directory if they are mounted. +# 3.remove /dev/bus directory in chroot directory if it exists import argparse import ctypes @@ -58,6 +61,8 @@ def parse_arg(): attach.add_argument("device", help="the target usb device") attach.add_argument("-d", dest="domid", type=int, required=True, help="specify the domid of the VM") + attach.add_argument("-p", dest="pid", type=int, required=True, + help="the process id of QEMU") attach.add_argument("-r", dest="reset_only", action="store_true", help="reset device only, for privileged mode") @@ -152,7 +157,7 @@ def clone_device(path, root_dir, domid): exit(1) -def attach(device, domid, reset_only): +def attach(device, domid, pid, reset_only): path = dev_path(device) # reset device @@ -177,16 +182,19 @@ def attach(device, domid, reset_only): clone_device(path, root_dir, domid) sys_dir = root_dir + "/sys" + proc_dir = root_dir + "/proc" # sys_dir could already be mounted because of PCI pass-through - if not os.path.isdir(sys_dir): - try: - os.mkdir(sys_dir, 0o755) - except OSError: - log.error("Failed to create sys dir in chroot") - exit(1) + os.makedirs(sys_dir, exist_ok=True, mode=0o755) if not os.path.isdir(sys_dir + "/devices"): mount("/sys", sys_dir, "sysfs") + self_dir = os.path.join(proc_dir, "self") + os.makedirs(self_dir , exist_ok=True, mode=0o755) + fd_dir = os.path.join(self_dir, "fd") + if not os.path.isdir(fd_dir): + MS_BIND = 4096 # mount flags, from fs.h + mount(f"/proc/{pid}/", self_dir, "", MS_BIND) + def detach(device, domid): path = dev_path(device) @@ -201,11 +209,17 @@ def cleanup(domid): dev_dir = root_dir + "/dev" sys_dir = root_dir + "/sys" bus_dir = dev_dir + "/bus" + proc_dir = root_dir + "/proc" + self_dir = proc_dir + "/self" if os.path.isdir(bus_dir): log.info("Removing bus directory: {} for cleanup".format(bus_dir)) shutil.rmtree(bus_dir) if os.path.isdir(sys_dir + "/devices"): umount(sys_dir) + if os.path.exists(sys_dir) and os.path.ismount(self_dir): + umount(self_dir) + log.info("Removing proc directory: {} for cleanup".format(proc_dir)) + shutil.rmtree(proc_dir) if __name__ == "__main__": @@ -214,7 +228,7 @@ def cleanup(domid): arg = parse_arg() if "attach" == arg.command: - attach(arg.device, arg.domid, arg.reset_only) + attach(arg.device, arg.domid, arg.pid, arg.reset_only) elif "detach" == arg.command: detach(arg.device, arg.domid) elif "cleanup" == arg.command: From 3f906d1654371ff0919b55ad79b1a589d39bf70d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 6 Nov 2024 08:42:47 +0000 Subject: [PATCH 372/492] CP-40265 - xenopsd: Drop max_maptrack_frames to 0 by default on domain creation max_maptrack_frames should only be >0 if there are reasons for other domains to grant pages to the domain being created, which should only be happening for Dom0 (not handled by the toolstack), and driver domains (currently none exist). Signed-off-by: Andrii Sultanov Suggested-by: Andrew Cooper --- ocaml/xenopsd/xc/domain.ml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 287c1c77b27..32b1632e80d 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -390,7 +390,13 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = ; max_maptrack_frames= ( try int_of_string (List.assoc "max_maptrack_frames" vm_info.platformdata) - with _ -> 1024 + with _ -> + 0 + (* This should be >0 only for driver domains (Dom0 startup is not + handled by the toolstack), which currently do not exist. + To support these in the future, xenopsd would need to check what + type of domain is being started. + *) ) ; max_grant_version= (if List.mem CAP_Gnttab_v2 host_info.capabilities then 2 else 1) From a88f4ff0d57d0f2994d093da9a538ad7fa475659 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 11 Nov 2024 15:29:08 +0000 Subject: [PATCH 373/492] CP-40265 - xenopsd: Calculate max_grant_frames dynamically Plumb the expected number of VBDs and VIFs to xenopsd's VM_create micro-op, allowing the domain creation code to estimate how big max_grant_frames value needs to be. Previously it was hardcoded to 64 but that was found lacking in situations with a lot of VBDs and VIFs under load, as they'd run out of grant table entries and the VM would crash. Now, when fewer VIFs and VBDs are expected, fewer grant table entries are needed, and the other way around. An expectation of 1 hotplugged VIF and VBD each is accounted for as well, this can possibly be incremented in the future if we decide, say, 2 hotplugged devices are common enough. The VBD calculation is complex and approximate, as explained in the code. Size of the ring was calculated with the following program (important because struct sizes can change, and the code comment also outlines how this can be improved in the future with feature detection, in which case more constants would need to be considered), attaching it here because the BLK_RING_SIZE macro is otherwise hard to decipher by hand: ``` // Can be compiled with just gcc test.c // # ./test // 64 // 112 // 32 \#include \#include \#include struct tester { unsigned int req_prod, req_event; unsigned int rsp_prod, rsp_event; uint8_t __pad[48]; int ring; }; typedef uint32_t grant_ref_t; typedef uint16_t blkif_vdev_t; typedef uint64_t blkif_sector_t; \#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 \#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 struct blkif_request_segment { grant_ref_t gref; uint8_t first_sect, last_sect; }; struct blkif_request_rw { uint8_t nr_segments; blkif_vdev_t handle; uint32_t _pad1; uint64_t id; blkif_sector_t sector_number; struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } __attribute__((__packed__)); struct blkif_request_discard { uint8_t flag; blkif_vdev_t _pad1; uint32_t _pad2; uint64_t id; blkif_sector_t sector_number; uint64_t nr_sectors; uint8_t _pad3; } __attribute__((__packed__)); struct blkif_request_other { uint8_t _pad1; blkif_vdev_t _pad2; uint32_t _pad3; uint64_t id; } __attribute__((__packed__)); struct blkif_request_indirect { uint8_t indirect_op; uint16_t nr_segments; uint32_t _pad1; uint64_t id; blkif_sector_t sector_number; blkif_vdev_t handle; uint16_t _pad2; grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; uint32_t _pad3; } __attribute__((__packed__)); struct blkif_request { uint8_t operation; union { struct blkif_request_rw rw; struct blkif_request_discard discard; struct blkif_request_other other; struct blkif_request_indirect indirect; } u; } __attribute__((__packed__)); union blkif_sring_entry { \ struct blkif_request req; \ struct blkif_request rsp; \ }; \#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) \#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) \#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) \#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) \#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) int main () { size_t offset =offsetof(struct tester, ring); size_t size = sizeof(union blkif_sring_entry); printf("%d\n", offset); printf("%zu\n", size); printf("%d\n", __RD32((4096- offset)/size)); } ``` Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/lib/xenops_server.ml | 11 +++- ocaml/xenopsd/lib/xenops_server_plugin.ml | 2 + ocaml/xenopsd/lib/xenops_server_simulator.ml | 3 +- ocaml/xenopsd/xc/domain.ml | 65 +++++++++++++++++++- ocaml/xenopsd/xc/domain.mli | 2 + ocaml/xenopsd/xc/xenops_server_xen.ml | 6 +- 6 files changed, 82 insertions(+), 7 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 36a2ea92fed..20de6e0f667 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -2297,11 +2297,18 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "VM.destroy %s" id ; B.VM.destroy t (VM_DB.read_exn id) | VM_create (id, memory_upper_bound, final_id, no_sharept) -> - debug "VM.create %s memory_upper_bound = %s" id + let num_of_vbds = List.length (VBD_DB.vbds id) in + let num_of_vifs = List.length (VIF_DB.vifs id) in + debug + "VM.create %s memory_upper_bound = %s, num_of_vbds = %d, num_of_vifs = \ + %d" + id (Option.value ~default:"None" (Option.map Int64.to_string memory_upper_bound) - ) ; + ) + num_of_vbds num_of_vifs ; B.VM.create t memory_upper_bound (VM_DB.read_exn id) final_id no_sharept + num_of_vbds num_of_vifs | VM_build (id, force) -> debug "VM.build %s" id ; let vbds : Vbd.t list = VBD_DB.vbds id |> vbd_plug_order in diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index 19ab155aa92..e4a61bb9ac8 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -84,6 +84,8 @@ module type S = sig -> Vm.t -> Vm.id option -> bool (* no_sharept*) + -> int (* num_of_vbds *) + -> int (* num_of_vifs *) -> unit val build : diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index 13ae583c7da..0c6ac3f606b 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -547,7 +547,8 @@ module VM = struct let remove _vm = () - let create _ memory_limit vm _ _ = with_lock m (create_nolock memory_limit vm) + let create _ memory_limit vm _ _ _ _ = + with_lock m (create_nolock memory_limit vm) let destroy _ vm = with_lock m (destroy_nolock vm) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 32b1632e80d..af3ec71a7c7 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -269,7 +269,8 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib in wait 0 -let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = +let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept + num_of_vbds num_of_vifs = let open Xenctrl in let host_info = Xenctrl.physinfo xc in @@ -385,7 +386,67 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = ; max_evtchn_port= -1 ; max_grant_frames= ( try int_of_string (List.assoc "max_grant_frames" vm_info.platformdata) - with _ -> 64 + with _ -> + let max_per_vif = 8 in + (* 1 VIF takes up (256 rx entries + 256 tx entries) * 8 queues max + * 8 bytes per grant table entry / 4096 bytes size of frame *) + let reasonable_per_vbd = 1 in + (* (1 ring (itself taking up one granted page) + 1 ring * + 32 requests * 11 grant refs contained in each * 8 bytes ) / + 4096 bytes size of frame = 0.6875, rounded up *) + let frames_number = + (max_per_vif * (num_of_vifs + 1)) + + (reasonable_per_vbd * (num_of_vbds + 1)) + in + debug "estimated max_grant_frames = %d" frames_number ; + frames_number + (* max_per_vif * (num_of_vifs + 1 hotplugged future one) + + max_per_vbd * (num_of_vbds + 1 hotplugged future one) *) + + (* NOTE: While the VIF calculation is precise, the VBD one is a + very rough approximation of a reasonable value of + RING_SIZE * MAX_SEGMENTS_PER_REQUEST + PAGES_FOR_RING_ITSELF + The following points should allow for a rough understanding + of the scale of the problem of better estimation: + + 1) The blkfront driver can consume different numbers of grant + pages depending on the features advertised by the back driver + (and negotiated with it). These features can differ per VBD, and + right now aren't even known at the time of domain creation. + These include: + 1.1) indirect segments - these contain + BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST grants at most, and each + of these frames contains GRANTS_PER_INDIRECT_FRAME grants in + turn (stored in blkif_request_segment). + In practice, this means a catastrophic explosion - we should + not really aim to detect if indirect requests feature is on, + but turn it off to get reasonable estimates. + 1.2) persistent grants - these are an optimization, so + shouldn't really change the calculations, worst case is none + of the grants are persistent. + 1.3) multi-page rings - these change the RING_SIZE, but not in + a trivial manner (see ring-page-order) + 1.4) multi-queue - these change the number of rings, adding + another multiplier. + 2) The "8 bytes" multiplier for a grant table entry only applies + to grants_v1. v2 grants take up 16 bytes per entry. And it's + impossible to detect this feature at the moment. + 3) A dynamically-sized grant table itself could be a solution? + Used to exist before, caused a lot of XSAs, hard to get right. + 4) Drivers might need to be more explicitly limited in how many + pages they can consume + 5) VBD backdriver's features should be managed by XAPI on the + object itself and (their max bound) known at the time of domain + creation. + + So for this estimate, there is only 1 ring which is 1 page, with + 32 entries, each entry (request) can have up to 11 pages + (excluding indirect pages and other complications). + + SEE: xen-blkfront.c, blkif.h, and the backdriver to understand + the process of negotiation (visible in xenstore, in kernel + module parameters in the sys filesystem afterwards) + *) ) ; max_maptrack_frames= ( try diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 4fac8ccde5a..a7681827029 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -149,6 +149,8 @@ val make : -> [`VM] Uuidx.t -> string option -> bool (* no_sharept *) + -> int (* num_of_vbds *) + -> int (* num_of_vifs *) -> domid (** Create a fresh (empty) domain with a specific UUID, returning the domain ID *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 61e5d45fb84..18383a04c00 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1389,7 +1389,8 @@ module VM = struct in (device_id, revision) - let create_exn task memory_upper_bound vm final_id no_sharept = + let create_exn task memory_upper_bound vm final_id no_sharept num_of_vbds + num_of_vifs = let k = vm.Vm.id in with_xc_and_xs (fun xc xs -> (* Ensure the DB contains something for this VM - this is to avoid a @@ -1518,7 +1519,8 @@ module VM = struct let create_info = generate_create_info ~xs vm persistent in let domid = Domain.make ~xc ~xs create_info vm.vcpu_max domain_config - (uuid_of_vm vm) final_id no_sharept + (uuid_of_vm vm) final_id no_sharept num_of_vbds + num_of_vifs in Mem.transfer_reservation_to_domain dbg domid reservation_id ; let initial_target = From 0c5d12120e1dafed4de5c58bbcb716b6f6dbc016 Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Mon, 7 Jul 2025 02:42:08 +0000 Subject: [PATCH 374/492] CA-413328 Enable auto-mode when XAPI failed for a extend period that exceeds the timeout duration Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi/xapi_periodic_scheduler_init.ml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index ff7c3187c20..e238e9daa62 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -91,9 +91,14 @@ let register ~__context = let remaining = Int64.sub expiry_time current_time in Xapi_host.schedule_disable_ssh_job ~__context ~self ~timeout:remaining ~auto_mode:true - (* handle the case where XAPI is not active when the SSH timeout expires *) - else if Fe_systemctl.is_active ~service:!Xapi_globs.ssh_service then - Xapi_host.disable_ssh ~__context ~self + (* Handle the case where XAPI is not active when the SSH timeout expires. + This typically occurs when XAPI has been down for an extended period that + exceeds the timeout duration. In this scenario, we need to enable SSH auto + mode to ensure the SSH service remains continuously available. *) + else if Fe_systemctl.is_active ~service:!Xapi_globs.ssh_service then ( + Xapi_host.disable_ssh ~__context ~self ; + Xapi_host.set_ssh_auto_mode ~__context ~self ~value:true + ) in let update_all_subjects_delay = 10.0 in (* initial delay = 10 seconds *) From 6c6e7b6da912d0232621aa5179222fd3edd44b9f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 14 Nov 2024 09:01:34 +0000 Subject: [PATCH 375/492] Treat 64 max_grant_frames as the lower bound 64 was the old hard-coded value for max_grant_frames, so play it safe here and keep it as the lower bound - users might be used to being able to hotplug several VIFs below 7, for example, which would have been broken otherwise as we only estimate for a single hotplug with the current algorithm. Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/xc/domain.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index af3ec71a7c7..e4bca3a2839 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -395,8 +395,10 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept 32 requests * 11 grant refs contained in each * 8 bytes ) / 4096 bytes size of frame = 0.6875, rounded up *) let frames_number = - (max_per_vif * (num_of_vifs + 1)) - + (reasonable_per_vbd * (num_of_vbds + 1)) + max 64 + ((max_per_vif * (num_of_vifs + 1)) + + (reasonable_per_vbd * (num_of_vbds + 1)) + ) in debug "estimated max_grant_frames = %d" frames_number ; frames_number From 593bd8c4e9317db266bcb736731b95d0007ddeb4 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 19 Nov 2024 12:57:39 +0000 Subject: [PATCH 376/492] xenopsd: Don't iterate over StringMaps twice Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/lib/xenops_server.ml | 18 ++++++++++-------- ocaml/xenopsd/lib/xenops_utils.ml | 25 +++++++++++++------------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 20de6e0f667..b47344a30e6 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -848,10 +848,11 @@ module Queues = struct let get tag qs = with_lock qs.m (fun () -> - if StringMap.mem tag qs.qs then - StringMap.find tag qs.qs - else - Queue.create () + match StringMap.find_opt tag qs.qs with + | Some x -> + x + | None -> + Queue.create () ) let tags qs = @@ -862,10 +863,11 @@ module Queues = struct let push_with_coalesce should_keep tag item qs = with_lock qs.m (fun () -> let q = - if StringMap.mem tag qs.qs then - StringMap.find tag qs.qs - else - Queue.create () + match StringMap.find_opt tag qs.qs with + | Some x -> + x + | None -> + Queue.create () in push_with_coalesce should_keep item q ; qs.qs <- StringMap.add tag q qs.qs ; diff --git a/ocaml/xenopsd/lib/xenops_utils.ml b/ocaml/xenopsd/lib/xenops_utils.ml index 481ad1b6101..53dc73709a1 100644 --- a/ocaml/xenopsd/lib/xenops_utils.ml +++ b/ocaml/xenopsd/lib/xenops_utils.ml @@ -227,11 +227,13 @@ module MemFS = struct match (path, fs) with | [], Dir d -> d - | p :: ps, Dir d -> - if StringMap.mem p !d then - aux ps (StringMap.find p !d) - else + | p :: ps, Dir d -> ( + match StringMap.find_opt p !d with + | Some x -> + aux ps x + | None -> raise Not_dir + ) | _, Leaf _ -> raise Not_dir in @@ -285,14 +287,13 @@ module MemFS = struct (fun p -> let dir = dir_locked (dirname p) in let deletable = - if StringMap.mem (filename p) !dir then - match StringMap.find (filename p) !dir with - | Dir child -> - StringMap.is_empty !child - | Leaf _ -> - true - else - false + match StringMap.find_opt (filename p) !dir with + | Some (Dir child) -> + StringMap.is_empty !child + | Some (Leaf _) -> + true + | None -> + false in if deletable then dir := StringMap.remove (filename p) !dir ) From 947e4965be47a28b0b549a5dfa10afe6e2b710e4 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 4 Jul 2025 15:54:59 +0100 Subject: [PATCH 377/492] xapi_vm_helpers: Raise allowed_VIF limit from 7 to 16 With the previously hardcoded value of max_grant_frames, we could only support 7 VIFs at most (and fewer if there were also many VBDs), since having 9 and more VIFs would result in grant allocation errors. Now that max_grant_frames is dynamically estimated on domain creation given the number of VIFs and VBDs a VM has, we can easily support 16 VIFs. Given the current behaviour of the XenServer/XCP-ng system (hypervisor+drivers), where more VIFs allow for higher overall networking throughput, this is highly beneficial - in testing overall throughput with 16 VIFs was 18-27% higher than with 8 VIFs (tested with multiple iperf3 instances running on all interfaces simultaneously) Moreover, some users coming from VMWare are used to networking setups with dozens of VIFs, and this is a step towards allowing that without encountering any other bottlenecks in the system. NOTE: We are currently only allocating enough grants for 1 hotplugged VIF above 7. Therefore, technically we shouldn't allow creating more than one VIF when the VM is running (or paused), but there is currently no way in xapi to check how many and which VIFs were hotplugged as far as I know, and allowed_VIFs are honoured by clients on VIF creation, not hotplug. Since this is in keeping with the previous behaviour of this field (if VM has many VBDs, it wouldn't have been able to allocate 7 VIFs before) as an "advice", and not a "guarantee" or "limit", I've decided to keep it as-is. A more detailed technical explanation of the supported limit should be described elsewhere in support statements. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vm_helpers.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 9daab6113ea..9556096fe4e 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -1304,9 +1304,9 @@ let allowed_VBD_devices_HVM_floppy = (fun x -> Device_number.(make Floppy ~disk:x ~partition:0)) (inclusive_range 0 1) -let allowed_VIF_devices_HVM = vif_inclusive_range 0 6 +let allowed_VIF_devices_HVM = vif_inclusive_range 0 15 -let allowed_VIF_devices_PV = vif_inclusive_range 0 6 +let allowed_VIF_devices_PV = vif_inclusive_range 0 15 (** [possible_VBD_devices_of_string s] returns a list of Device_number.t which represent possible interpretations of [s]. *) From c725281e3a2ae0a60c3b181cc469d163315965bb Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 3 Jul 2025 11:50:38 +0100 Subject: [PATCH 378/492] xapi-stdext-threads: calibrate ratio for delay times On very busy systems, the wait may take much longer than expected. Instead of hard-coding the expected value, wait once to estimate the time aded to the delays, and then use it to compare the times. Also change to use Mtime.Spans instead of using integers. Signed-off-by: Pau Ruiz Safont --- .../lib/xapi-stdext-threads/scheduler_test.ml | 94 +++++++++++-------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml index 0a4a847403f..259a24ee260 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml @@ -14,90 +14,106 @@ module Scheduler = Xapi_stdext_threads_scheduler.Scheduler +let calibrated_ratio () = + let expected = Mtime.Span.(100 * ms |> to_float_ns) in + let elapsed = Mtime_clock.counter () in + (* Add a 10% leeway to the ratio calculated *) + Thread.delay 0.11 ; + let actual = Mtime_clock.count elapsed |> Mtime.Span.to_float_ns in + let ratio = actual /. expected in + Alcotest.(check bool) (Printf.sprintf "ratio is %f" ratio) true true ; + ratio + let started = Atomic.make false let start_schedule () = if not (Atomic.exchange started true) then Thread.create Scheduler.loop () |> ignore -let send event data = Event.(send event data |> sync) +let send event data () = Event.(send event data |> sync) let receive event = Event.(receive event |> sync) -let elapsed_ms cnt = - let elapsed_ns = Mtime_clock.count cnt |> Mtime.Span.to_uint64_ns in - Int64.(div elapsed_ns 1000000L |> to_int) +let is_less ratio a b = + let a = + Mtime.Span.to_float_ns a + |> Float.mul ratio + |> Int64.of_float + |> Mtime.Span.of_uint64_ns + in + Mtime.Span.is_shorter ~than:a b -let is_less = Alcotest.(testable (pp int)) Stdlib.( > ) +let mtime_span () = + let cmp = is_less (calibrated_ratio ()) in + Alcotest.(testable Mtime.Span.pp) cmp let test_single () = let finished = Event.new_channel () in - Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> - send finished true - ) ; + Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (send finished true) ; start_schedule () ; Alcotest.(check bool) "result" true (receive finished) -let test_remove_self () = +let test_remove_self mtime_span () = let which = Event.new_channel () in Scheduler.add_to_queue "self" (Scheduler.Periodic 0.001) 0.001 (fun () -> (* this should remove the periodic scheduling *) Scheduler.remove_from_queue "self" ; (* add an operation to stop the test *) - Scheduler.add_to_queue "stop" Scheduler.OneShot 0.1 (fun () -> - send which "stop" - ) ; - send which "self" + Scheduler.add_to_queue "stop" Scheduler.OneShot 0.1 (send which "stop") ; + send which "self" () ) ; start_schedule () ; - let cnt = Mtime_clock.counter () in + + let from_wait_to_receive = Mtime_clock.counter () in Alcotest.(check string) "same event name" "self" (receive which) ; Alcotest.(check string) "same event name" "stop" (receive which) ; - let elapsed_ms = elapsed_ms cnt in - Alcotest.check is_less "small time" 300 elapsed_ms -let test_empty () = + let elapsed = Mtime_clock.count from_wait_to_receive in + let expected = Mtime.Span.(300 * ms) in + Alcotest.check mtime_span "small time" expected elapsed + +let test_empty mtime_span () = let finished = Event.new_channel () in - Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> - send finished true - ) ; + Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (send finished true) ; start_schedule () ; Alcotest.(check bool) "finished" true (receive finished) ; (* wait loop to go to wait with no work to do *) Thread.delay 0.1 ; - Scheduler.add_to_queue "two" Scheduler.OneShot 0.001 (fun () -> - send finished true - ) ; - let cnt = Mtime_clock.counter () in + Scheduler.add_to_queue "two" Scheduler.OneShot 0.001 (send finished true) ; + + let from_wait_to_receive = Mtime_clock.counter () in Alcotest.(check bool) "finished" true (receive finished) ; - let elapsed_ms = elapsed_ms cnt in - Alcotest.check is_less "small time" 100 elapsed_ms -let test_wakeup () = + let elapsed = Mtime_clock.count from_wait_to_receive in + let expected = Mtime.Span.(100 * ms) in + Alcotest.check mtime_span "small time" expected elapsed + +let test_wakeup mtime_span () = let which = Event.new_channel () in (* schedule a long event *) - Scheduler.add_to_queue "long" Scheduler.OneShot 2.0 (fun () -> - send which "long" - ) ; + Scheduler.add_to_queue "long" Scheduler.OneShot 2.0 (send which "long") ; start_schedule () ; (* wait loop to go to wait with no work to do *) Thread.delay 0.1 ; - let cnt = Mtime_clock.counter () in + (* schedule a quick event, should wake up the loop *) - Scheduler.add_to_queue "quick" Scheduler.OneShot 0.1 (fun () -> - send which "quick" - ) ; + Scheduler.add_to_queue "quick" Scheduler.OneShot 0.1 (send which "quick") ; + + let from_wait_to_receive_quick = Mtime_clock.counter () in Alcotest.(check string) "same event name" "quick" (receive which) ; + Scheduler.remove_from_queue "long" ; - let elapsed_ms = elapsed_ms cnt in - Alcotest.check is_less "small time" 150 elapsed_ms + let elapsed = Mtime_clock.count from_wait_to_receive_quick in + let expected = Mtime.Span.(100 * ms) in + Alcotest.check mtime_span "small time" expected elapsed let tests = + let mtime_span = mtime_span () in [ ("test_single", `Quick, test_single) - ; ("test_remove_self", `Quick, test_remove_self) - ; ("test_empty", `Quick, test_empty) - ; ("test_wakeup", `Quick, test_wakeup) + ; ("test_remove_self", `Quick, test_remove_self mtime_span) + ; ("test_empty", `Quick, test_empty mtime_span) + ; ("test_wakeup", `Quick, test_wakeup mtime_span) ] let () = Alcotest.run "Scheduler" [("generic", tests)] From 51a97e7cfb4219a3890ca05e8f55d598b318d457 Mon Sep 17 00:00:00 2001 From: Guillaume Date: Mon, 7 Jul 2025 13:47:45 +0200 Subject: [PATCH 379/492] Downgrade unknown SM.feature errors to warnings Previously, encountering unknown features such as ATOMIC_PAUSE or SR_CACHING in SM.feature would trigger an error in xapi. However, these features can be used internally by SM and are not necessarily indicative of a misconfiguration. This change downgrades such cases from error to warning, allowing normal operation while still notifying the user that an unrecognized feature is present. Signed-off-by: Guillaume --- ocaml/xapi/smint.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index e58340b5239..1b4e4d45e47 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -132,7 +132,7 @@ module Feature = struct Some (feature, 1L) ) | feature :: _ -> - error "SM.feature: unknown feature %s" feature ; + warn "SM.feature: unknown feature %s" feature ; None (** [compat_features features1 features2] finds the compatible features in the input From 03980fc5e423ef93d2db1f81e041400e6e05cf75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Thu, 3 Jul 2025 15:57:59 +0000 Subject: [PATCH 380/492] CP-308455 VM.sysprep use watch to detect sysprep running Replace code that loops waiting for an update in xenstore with a watch. This should eliminate the chance of a race condition. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 3db126dae08..effdecabd83 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -209,25 +209,20 @@ let find_vdi ~__context ~label = file. *) let trigger ~domid ~uuid = let open Ezxenstore_core.Xenstore in + let module Watch = Ezxenstore_core.Watch in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in with_xs (fun xs -> xs.Xs.write (control // "filename") "D://unattend.xml" ; xs.Xs.write (control // "vdi-uuid") uuid ; xs.Xs.write (control // "action") "sysprep" ; debug "%s: notified domain %Ld" __FUNCTION__ domid ; - let rec wait n = - match (n, xs.Xs.read (control // "action")) with - | _, "running" -> - "running" - | n, action when n < 0 -> - action - | _, _ -> - Thread.delay 1.0 ; - wait (n - 1) - in - (* wait up to 5 iterations for runnung to appear or report whatever - is the status at the end *) - wait 5 + try + Watch.( + wait_for ~xs ~timeout:5.0 + (value_to_become (control // "action") "running") + ) ; + "running" + with Watch.Timeout _ -> xs.Xs.read (control // "action") ) (* This function is executed on the host where [vm] is running *) @@ -267,6 +262,7 @@ let sysprep ~__context ~vm ~unattend = match trigger ~domid ~uuid with | "running" -> debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; + Thread.delay 1.0 ; Client.VBD.eject ~rpc ~session_id ~vbd ; Sys.remove iso | status -> From 5416081c9f0ae28d4eaea455c0d1aeda6f8a91f7 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Mon, 7 Jul 2025 10:53:18 +0800 Subject: [PATCH 381/492] CA-393417: Fix CI failure - Add unitest for usb_reset for coverage - Move mock to unittest.mock as python3 only now - exit -> sys.exit Signed-off-by: Lin Liu --- python3/libexec/usb_reset.py | 20 +++--- python3/tests/import_helper.py | 4 +- python3/tests/test_usb_reset.py | 109 ++++++++++++++++++++++++++++++++ python3/tests/test_usb_scan.py | 10 +-- 4 files changed, 127 insertions(+), 16 deletions(-) create mode 100644 python3/tests/test_usb_reset.py diff --git a/python3/libexec/usb_reset.py b/python3/libexec/usb_reset.py index 010fd134862..941259d6182 100755 --- a/python3/libexec/usb_reset.py +++ b/python3/libexec/usb_reset.py @@ -22,7 +22,8 @@ # if without -r, do step 2~3 # 2. if it's the first USB device to pass-through # a) bind mount /sys in chroot directory (/var/xen/qemu/root-) -# b) clone (create the device with same major/minor number and mode) in chroot directory with same path +# b) clone (create the device with same major/minor number and mode) +# in chroot directory with same path # c) bind mount /proc/ to chroot directory (/var/xen/qemu/root-/proc/self) # 3. set device file uid/gid to (qemu_base + dom-id) # @@ -44,13 +45,14 @@ import ctypes.util import fcntl import grp -import xcp.logger as log # pytype: disable=import-error import logging import os import pwd import re import shutil +import sys +import xcp.logger as log # pytype: disable=import-error def parse_arg(): parser = argparse.ArgumentParser( @@ -94,14 +96,14 @@ def dev_path(device): pat = re.compile(r"\d+-\d+(\.\d+)*$") if pat.match(device) is None: log.error("Unexpected device node: {}".format(device)) - exit(1) + sys.exit(1) try: bus = read_int("/sys/bus/usb/devices/{}/busnum".format(device)) dev = read_int("/sys/bus/usb/devices/{}/devnum".format(device)) return "/dev/bus/usb/{0:03d}/{1:03d}".format(bus, dev) except (IOError, ValueError) as e: log.error("Failed to get device path {}: {}".format(device, str(e))) - exit(1) + sys.exit(1) def mount(source, target, fs, flags=0): @@ -110,7 +112,7 @@ def mount(source, target, fs, flags=0): log.error("Failed to mount {} ({}) to {} with flags {}: {}". format(source, fs, target, flags, os.strerror(ctypes.get_errno()))) - exit(1) + sys.exit(1) def umount(target): @@ -140,7 +142,7 @@ def clone_device(path, root_dir, domid): st = os.stat(path) except OSError as e: log.error("Failed to get stat of {}: {}".format(path, str(e))) - exit(1) + sys.exit(1) mode = st.st_mode major = os.major(st.st_rdev) @@ -154,7 +156,7 @@ def clone_device(path, root_dir, domid): grp.getgrnam("qemu_base").gr_gid + domid) except OSError as e: log.error("Failed to chown device file {}: {}".format(path, str(e))) - exit(1) + sys.exit(1) def attach(device, domid, pid, reset_only): @@ -177,7 +179,7 @@ def attach(device, domid, pid, reset_only): dev_dir = root_dir + "/dev" if not os.path.isdir(root_dir) or not os.path.isdir(dev_dir): log.error("Error: The chroot or dev directory doesn't exist") - exit(1) + sys.exit(1) clone_device(path, root_dir, domid) @@ -235,4 +237,4 @@ def cleanup(domid): cleanup(arg.domid) else: log.error("Unexpected command: {}".format(arg.command)) - exit(1) + sys.exit(1) diff --git a/python3/tests/import_helper.py b/python3/tests/import_helper.py index 2fdbd922b95..6e1c5946558 100644 --- a/python3/tests/import_helper.py +++ b/python3/tests/import_helper.py @@ -5,7 +5,7 @@ from types import ModuleType from typing import Generator -from mock import Mock +from unittest.mock import MagicMock @contextmanager @@ -28,7 +28,7 @@ def mocked_modules(*module_names: str) -> Generator[None, None, None]: ``` """ for module_name in module_names: - sys.modules[module_name] = Mock() + sys.modules[module_name] = MagicMock() yield for module_name in module_names: sys.modules.pop(module_name) diff --git a/python3/tests/test_usb_reset.py b/python3/tests/test_usb_reset.py new file mode 100644 index 00000000000..43dae790cb1 --- /dev/null +++ b/python3/tests/test_usb_reset.py @@ -0,0 +1,109 @@ +import unittest +from unittest import mock +from unittest.mock import MagicMock +import sys + +# some mocked arguemtn is not used in the tests, but as side-effects +# disabled pylint warning for unused arguments +# pylint: disable=unused-argument + +from python3.tests.import_helper import import_file_as_module +# mock modules to avoid dependencies +sys.modules["xcp"] = MagicMock() +sys.modules["xcp.logger"] = MagicMock() +usb_reset = import_file_as_module("python3/libexec/usb_reset.py") + + +class TestUsbReset(unittest.TestCase): + @mock.patch("usb_reset.open", new_callable=mock.mock_open, read_data="5\n") + def test_read_int(self, mock_open): + self.assertEqual(usb_reset.read_int("/fake/path"), 5) + mock_open.assert_called_with("/fake/path") + + @mock.patch("usb_reset.read_int", side_effect=[1, 2]) + @mock.patch("usb_reset.log") + def test_dev_path_valid(self, mock_log, mock_read_int): + device = "1-2.3" + path = usb_reset.dev_path(device) + self.assertEqual(path, "/dev/bus/usb/001/002") + mock_log.error.assert_not_called() + + @mock.patch("usb_reset.log") + def test_dev_path_invalid(self, mock_log): + with self.assertRaises(SystemExit): + usb_reset.dev_path("invalid-device") + mock_log.error.assert_called() + + @mock.patch("usb_reset.ctypes.CDLL") + @mock.patch("usb_reset.ctypes.util.find_library", return_value="libc.so.6") + @mock.patch("usb_reset.log") + def test_mount_success(self, mock_log, mock_find_lib, mock_cdll): + mock_cdll.return_value.mount.return_value = 0 + usb_reset.mount("src", "tgt", "fs") + mock_cdll.return_value.mount.assert_called() + + @mock.patch("usb_reset.ctypes.CDLL") + @mock.patch("usb_reset.ctypes.util.find_library", return_value="libc.so.6") + @mock.patch("usb_reset.log") + def test_mount_fail(self, mock_log, mock_find_lib, mock_cdll): + mock_cdll.return_value.mount.return_value = -1 + with self.assertRaises(SystemExit): + usb_reset.mount("src", "tgt", "fs") + mock_log.error.assert_called() + + @mock.patch("usb_reset.ctypes.CDLL") + @mock.patch("usb_reset.ctypes.util.find_library", return_value="libc.so.6") + @mock.patch("usb_reset.log") + def test_umount(self, mock_log, mock_find_lib, mock_cdll): + mock_cdll.return_value.umount.return_value = -1 + usb_reset.umount("tgt") + mock_log.error.assert_called() + + @mock.patch("usb_reset.os") + @mock.patch("usb_reset.pwd.getpwnam") + @mock.patch("usb_reset.grp.getgrnam") + @mock.patch("usb_reset.log") + def test_clone_device(self, mock_log, mock_grp, mock_pwd, mock_os): + mock_os.path.exists.return_value = False + mock_os.path.sep = "/" + mock_os.stat.return_value.st_mode = 0o600 + mock_os.stat.return_value.st_rdev = 0 + mock_os.major.return_value = 1 + mock_os.minor.return_value = 2 + mock_os.makedev.return_value = 1234 + mock_pwd.return_value.pw_uid = 1000 + mock_grp.return_value.gr_gid = 1000 + usb_reset.clone_device("/dev/bus/usb/001/002", "/root", 1) + mock_os.mknod.assert_called() + mock_os.chown.assert_called() + + @mock.patch("usb_reset.dev_path", return_value="/dev/bus/usb/001/002") + @mock.patch("usb_reset.open", new_callable=mock.mock_open) + @mock.patch("usb_reset.fcntl.ioctl") + @mock.patch("usb_reset.log") + def test_attach_reset_only(self, mock_log, mock_ioctl, mock_open, mock_dev_path): + usb_reset.attach("1-2", 1, 123, True) + mock_open.assert_called() + mock_ioctl.assert_called() + + @mock.patch("usb_reset.dev_path", return_value="/dev/bus/usb/001/002") + @mock.patch("usb_reset.os.remove") + @mock.patch("usb_reset.get_root_dir", return_value="/root") + def test_detach(self, mock_get_root_dir, mock_remove, mock_dev_path): + usb_reset.detach("1-2", 1) + mock_remove.assert_called() + + @mock.patch("usb_reset.shutil.rmtree") + @mock.patch("usb_reset.os.path.isdir", return_value=True) + @mock.patch("usb_reset.os.path.exists", return_value=True) + @mock.patch("usb_reset.os.path.ismount", return_value=True) + @mock.patch("usb_reset.umount") + @mock.patch("usb_reset.log") + #pylint: disable=too-many-arguments + def test_cleanup(self, mock_log, mock_umount, mock_ismount, + mock_exists, mock_isdir, mock_rmtree): + usb_reset.cleanup(1) + mock_rmtree.assert_called() + +if __name__ == "__main__": + unittest.main() diff --git a/python3/tests/test_usb_scan.py b/python3/tests/test_usb_scan.py index 8b886194c74..45bfc78e569 100644 --- a/python3/tests/test_usb_scan.py +++ b/python3/tests/test_usb_scan.py @@ -9,14 +9,14 @@ import unittest from collections.abc import Mapping from typing import cast +from unittest.mock import MagicMock -import mock from python3.tests.import_helper import import_file_as_module # mock modules to avoid dependencies -sys.modules["xcp"] = mock.Mock() -sys.modules["xcp.logger"] = mock.Mock() -sys.modules["pyudev"] = mock.Mock() +sys.modules["xcp"] = MagicMock() +sys.modules["xcp.logger"] = MagicMock() +sys.modules["pyudev"] = MagicMock() usb_scan = import_file_as_module("python3/libexec/usb_scan.py") @@ -90,7 +90,7 @@ def mock_setup(mod, devices, interfaces, path): mod.log.error = verify_log mod.log.debug = verify_log mod.Policy._PATH = path - mod.pyudev.Context = mock.Mock( + mod.pyudev.Context = MagicMock( return_value=MocContext(devices, interfaces)) From 41cb7df40230ddc6d13f5729e49bf9f7b848112f Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 8 Jul 2025 09:05:53 +0100 Subject: [PATCH 382/492] datamodel_lifecycle: automatic update Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_lifecycle.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 2880742c9cd..cf4d59eae47 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -240,7 +240,7 @@ let prototyped_of_message = function | "host", "set_numa_affinity_policy" -> Some "24.0.0" | "VM", "sysprep" -> - Some "25.23.0-next" + Some "25.24.0" | "VM", "get_secureboot_readiness" -> Some "24.17.0" | "VM", "set_uefi_mode" -> From d34d581e7e5137217e65b2953fdc4ba4d4103dfd Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Tue, 8 Jul 2025 06:28:38 +0000 Subject: [PATCH 383/492] CA-413319: Ensure console timeout file exists when timeout is configured For the following scenarios, console timeout may not reflect the real status of the database: - XenServer upgrade to a new version - User accidentally deletes the configuration file Add checks to ensure database and real status are matched. Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi/dbsync_slave.ml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 9aff8823ea9..ab8a6a3ef24 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -396,7 +396,17 @@ let update_env __context sync_keys = and user may have disabled monitor service by mistake as well, so we need to check the status. *) if auto_mode_in_db <> ssh_monitor_enabled then Xapi_host.set_ssh_auto_mode ~__context ~self:localhost - ~value:auto_mode_in_db + ~value:auto_mode_in_db ; + let console_timeout = + Db.Host.get_console_idle_timeout ~__context ~self:localhost + in + let console_timeout_file_exists = + Sys.file_exists !Xapi_globs.console_timeout_profile_path + in + (* Ensure the console timeout profile file exists if the timeout is configured *) + if console_timeout > 0L && not console_timeout_file_exists then + Xapi_host.set_console_idle_timeout ~__context ~self:localhost + ~value:console_timeout ) ; remove_pending_guidances ~__context From 12826e8d0b4e5fda224c96e62305294eb76757cc Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 8 Jul 2025 11:30:50 +0100 Subject: [PATCH 384/492] CA-413412: Fail to designate new master The user attempted to designate a new master, but the operation failed. The root cause is as follows: After the new proposed master successfully sent the `commit_new_master` API call to the old master, it attempted to send a `logout` request. However, at this point, the old master was already rebooting its xapi service, causing the `logout` to fail. As a result, the process of designating the new master was marked as failed, and the status changed to `broken`. In high-load environments, there can be a delay in sending the logout request, increasing the likelihood that it is sent after the old master has already started rebooting. If `commit_new_master` has already been successful, the success of the subsequent `logout` operation should not be considered critical. Therefore, the solution is to ignore the result of the `logout` request if `commit_new_master` was successful. Signed-off-by: Bengang Yuan --- ocaml/xapi/helpers.ml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 318ddfecf8d..aff1b815566 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -591,6 +591,7 @@ let call_api_functions ~__context f = call_api_functions_internal ~__context f let call_emergency_mode_functions hostname f = + let __FUN = __FUNCTION__ in let open Xmlrpc_client in let transport = SSL @@ -609,7 +610,13 @@ let call_emergency_mode_functions hostname f = in finally (fun () -> f rpc session_id) - (fun () -> Client.Client.Session.local_logout ~rpc ~session_id) + (fun () -> + try Client.Client.Session.local_logout ~rpc ~session_id + with _ -> + (* This is an emergency mode function, so we don't care about the error + in logout *) + debug "%s: The logout failed in emergency mode function" __FUN + ) let is_domain_zero_with_record ~__context vm_ref vm_rec = let host_ref = vm_rec.API.vM_resident_on in From 97e50cfb2e596660859aaae37ee74d876a78f6e2 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Thu, 10 Jul 2025 07:21:40 +0100 Subject: [PATCH 385/492] CA-413424: Enhance xe help output The previous `xe` help is as below: ``` Usage: xe [-s server] [-p port] ([-u username] [-pw password] or [-pwf ]) [--traceparent traceparent] A full list of commands can be obtained by running xe help -s -p ``` The previous `xe` help output lacked debug-related options and did not provide detailed parameter description. The new `xe` help output is as follows: ``` Usage: xe [ -s ] XenServer host [ -p ] XenServer port number [ -u -pw | -pwf ] User authentication (password or file) [ --nossl ] Disable SSL/TLS [ --debug ] Enable debug output [ --debug-on-fail ] Enable debug output only on failure [ --traceparent ] Distributed tracing context [ ... ] Command-specific options A full list of commands can be obtained by running xe help -s -p ``` Signed-off-by: Bengang Yuan --- ocaml/xe-cli/newcli.ml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index c33e32a2e0a..6d32834c524 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -67,13 +67,22 @@ exception Usage let usage () = error - "Usage: %s [-s server] [-p port] ([-u username] [-pw password] or \ - [-pwf ]) [--traceparent traceparent] \n" + "Usage:\n\ + \ %s \n\ + \ [ -s ] XenServer host \n\ + \ [ -p ] XenServer port number \n\ + \ [ -u -pw | -pwf ] \n\ + \ User authentication (password or file) \n\ + \ [ --nossl ] Disable SSL/TLS \n\ + \ [ --debug ] Enable debug output \n\ + \ [ --debug-on-fail ] Enable debug output only on failure \n\ + \ [ --traceparent ] Distributed tracing context \n\ + \ [ ... ] Command-specific options \n" Sys.argv.(0) ; error "\n\ A full list of commands can be obtained by running \n\ - \t%s help -s -p \n" + \ %s help -s -p \n" Sys.argv.(0) let is_localhost ip = ip = "127.0.0.1" From d958124ee6a7724fdc12d6fbb634a3a6bdfb67f7 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Thu, 10 Jul 2025 22:02:51 +0800 Subject: [PATCH 386/492] XSI-1954: Only block pool join for clustering on non-management VLAN It is because the known issue is only with non-management VLAN: https://docs.xenserver.com/en-us/xenserver/8/storage/format#thin-provisioned-shared-gfs2-block-storage We recommend not to use a GFS2 SR with a VLAN due to a known issue where you cannot add or remove hosts on a clustered pool if the cluster network is on a non-management VLAN. Signed-off-by: Gang Ji --- ocaml/xapi/xapi_pool.ml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 5425ef05188..4b4240cb7e9 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -118,9 +118,15 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = | pif when pif = Ref.null -> () | pif -> ( - match Client.PIF.get_VLAN ~rpc ~session_id ~self:pif with - | vlan when vlan > 0L -> - error "Cannot join pool whose clustering is enabled on VLAN network" ; + match + ( Client.PIF.get_VLAN ~rpc ~session_id ~self:pif + , Client.PIF.get_management ~rpc ~session_id ~self:pif + ) + with + | vlan, false when vlan > 0L -> + error + "Cannot join pool whose clustering is enabled on a \ + non-management VLAN network" ; raise (Api_errors.Server_error ( Api_errors @@ -128,7 +134,7 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = , [Int64.to_string vlan] ) ) - | 0L | _ -> ( + | _ -> ( let clustering_bridges_in_pool = ( match Client.PIF.get_bond_master_of ~rpc ~session_id ~self:pif From edb0d819424929861dc3f5a74889604a4336c539 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 11 Jul 2025 10:19:55 +0100 Subject: [PATCH 387/492] xapi/nm: Send non-empty dns to networkd when using IPv6 autoconf Because Autoconf is not DHCP, networkd uses the dns value to write to resolv.conf. This is done on ocaml/networkd/bin/network_server.ml line 745 This allows to have non-empty resolv.conf when using IPv6 autoconf. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/nm.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index 229b53adbe2..77f8c078ed3 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -634,6 +634,7 @@ let bring_pif_up ~__context ?(management_interface = false) (pif : API.ref_PIF) rc.API.pIF_ip_configuration_mode = `Static | `IPv6 -> rc.API.pIF_ipv6_configuration_mode = `Static + || rc.API.pIF_ipv6_configuration_mode = `Autoconf in let dns = match (static, rc.API.pIF_DNS) with From bdbd975af50bfb0d2660f69101b82cf029ee8c58 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 11 Jul 2025 12:03:05 +0100 Subject: [PATCH 388/492] xapi-idl/network: Remove code duplication for DNS persistence decisions Previously both xapi and networkd had to inspect the IP configuration to decide whether the DNS values should be persistend into /etc/resolv.conf. This actually lead to a mismatch in them. Instead use an option value for DNS that simply means that if there's a value, it must be persisted. Now xapi decides the instances where these values are written. Signed-off-by: Pau Ruiz Safont --- ocaml/networkd/bin/network_server.ml | 16 +++++------- ocaml/networkd/bin_db/networkd_db.ml | 29 ++++++++++++--------- ocaml/networkd/lib/network_config.ml | 15 ++++++----- ocaml/xapi-idl/network/network_interface.ml | 7 +++-- ocaml/xapi/nm.ml | 16 +++++------- 5 files changed, 42 insertions(+), 41 deletions(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 59c76e319f3..5e056f73eb7 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -554,7 +554,8 @@ module Interface = struct let set_dns _ dbg ~name ~nameservers ~domains = Debug.with_thread_associated dbg (fun () -> - update_config name {(get_config name) with dns= (nameservers, domains)} ; + update_config name + {(get_config name) with dns= Some (nameservers, domains)} ; debug "Configuring DNS for %s: nameservers: [%s]; domains: [%s]" name (String.concat ", " (List.map Unix.string_of_inet_addr nameservers)) (String.concat ", " domains) ; @@ -727,7 +728,7 @@ module Interface = struct ; ipv6_conf ; ipv6_gateway ; ipv4_routes - ; dns= nameservers, domains + ; dns ; mtu ; ethtool_settings ; ethtool_offload @@ -736,15 +737,10 @@ module Interface = struct ) ) -> update_config name c ; exec (fun () -> - (* We only apply the DNS settings when not in a DHCP mode - to avoid conflicts. The `dns` field - should really be an option type so that we don't have to - derive the intention of the caller by looking at other - fields. *) - match (ipv4_conf, ipv6_conf) with - | Static4 _, _ | _, Static6 _ | _, Autoconf6 -> + match dns with + | Some (nameservers, domains) -> set_dns () dbg ~name ~nameservers ~domains - | _ -> + | None -> () ) ; exec (fun () -> set_ipv4_conf dbg name ipv4_conf) ; diff --git a/ocaml/networkd/bin_db/networkd_db.ml b/ocaml/networkd/bin_db/networkd_db.ml index f62021828fa..bffe93a32bc 100644 --- a/ocaml/networkd/bin_db/networkd_db.ml +++ b/ocaml/networkd/bin_db/networkd_db.ml @@ -74,20 +74,25 @@ let _ = [("gateway", Unix.string_of_inet_addr addr)] in let dns = - let dns' = - List.map Unix.string_of_inet_addr (fst interface_config.dns) - in - if dns' = [] then - [] - else - [("dns", String.concat "," dns')] + interface_config.dns + |> Option.map fst + |> Option.map (List.map Unix.string_of_inet_addr) + |> Option.fold ~none:[] ~some:(function + | [] -> + [] + | dns' -> + [("dns", String.concat "," dns')] + ) in let domains = - let domains' = snd interface_config.dns in - if domains' = [] then - [] - else - [("domain", String.concat "," domains')] + interface_config.dns + |> Option.map snd + |> Option.fold ~none:[] ~some:(function + | [] -> + [] + | domains' -> + [("domain", String.concat "," domains')] + ) in mode @ addrs @ gateway @ dns @ domains | None4 -> diff --git a/ocaml/networkd/lib/network_config.ml b/ocaml/networkd/lib/network_config.ml index 56eef61ce3d..3d034f05284 100644 --- a/ocaml/networkd/lib/network_config.ml +++ b/ocaml/networkd/lib/network_config.ml @@ -37,7 +37,6 @@ let bridge_naming_convention (device : string) = let get_list_from ~sep ~key args = List.assoc_opt key args |> Option.map (fun v -> Astring.String.cuts ~empty:false ~sep v) - |> Option.value ~default:[] let parse_ipv4_config args = function | Some "static" -> @@ -73,11 +72,13 @@ let parse_ipv6_config args = function (None6, None) let parse_dns_config args = - let nameservers = - get_list_from ~sep:"," ~key:"DNS" args |> List.map Unix.inet_addr_of_string + let ( let* ) = Option.bind in + let* nameservers = + get_list_from ~sep:"," ~key:"DNS" args + |> Option.map (List.map Unix.inet_addr_of_string) in - let domains = get_list_from ~sep:" " ~key:"DOMAIN" args in - (nameservers, domains) + let* domains = get_list_from ~sep:" " ~key:"DOMAIN" args in + Some (nameservers, domains) let read_management_conf () = try @@ -103,7 +104,7 @@ let read_management_conf () = let device = (* Take 1st member of bond *) match (bond_mode, bond_members) with - | None, _ | _, [] -> ( + | None, _ | _, (None | Some []) -> ( match List.assoc_opt "LABEL" args with | Some x -> x @@ -111,7 +112,7 @@ let read_management_conf () = error "%s: missing LABEL in %s" __FUNCTION__ management_conf ; raise Read_error ) - | _, hd :: _ -> + | _, Some (hd :: _) -> hd in Inventory.reread_inventory () ; diff --git a/ocaml/xapi-idl/network/network_interface.ml b/ocaml/xapi-idl/network/network_interface.ml index 2f3368fc131..06d38ff1a87 100644 --- a/ocaml/xapi-idl/network/network_interface.ml +++ b/ocaml/xapi-idl/network/network_interface.ml @@ -158,7 +158,10 @@ type interface_config_t = { ; ipv6_conf: ipv6 [@default None6] ; ipv6_gateway: Unix.inet_addr option [@default None] ; ipv4_routes: ipv4_route_t list [@default []] - ; dns: Unix.inet_addr list * string list [@default [], []] + ; dns: (Unix.inet_addr list * string list) option [@default None] + (** the list + of nameservers and domains to persist in /etc/resolv.conf. Must be None when + using a DHCP mode *) ; mtu: int [@default 1500] ; ethtool_settings: (string * string) list [@default []] ; ethtool_offload: (string * string) list [@default [("lro", "off")]] @@ -200,7 +203,7 @@ let default_interface = ; ipv6_conf= None6 ; ipv6_gateway= None ; ipv4_routes= [] - ; dns= ([], []) + ; dns= None ; mtu= 1500 ; ethtool_settings= [] ; ethtool_offload= [("lro", "off")] diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index 77f8c078ed3..fbc37a5fedc 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -639,24 +639,20 @@ let bring_pif_up ~__context ?(management_interface = false) (pif : API.ref_PIF) let dns = match (static, rc.API.pIF_DNS) with | false, _ | true, "" -> - ([], []) + None | true, pif_dns -> let nameservers = List.map Unix.inet_addr_of_string - (String.split ',' pif_dns) + (String.split_on_char ',' pif_dns) in let domains = match List.assoc_opt "domain" rc.API.pIF_other_config with - | None -> + | None | Some "" -> [] - | Some domains -> ( - try String.split ',' domains - with _ -> - warn "Invalid DNS search domains: %s" domains ; - [] - ) + | Some domains -> + String.split_on_char ',' domains in - (nameservers, domains) + Some (nameservers, domains) in let mtu = determine_mtu rc net_rc in let ethtool_settings, ethtool_offload = From 761ca78736658fe134c97b9ac431b9be1f819d9c Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 10 Jul 2025 16:59:40 +0100 Subject: [PATCH 389/492] CP-308455 VM.sysprep add timeout parameter The timeout (in seconds) we wait for the current domain to shut down before we return. This is supplied by the user; for the XE CLI we can provide a default. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_vm.ml | 1 + ocaml/xapi-cli-server/cli_frontend.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 12 +++++++++++- ocaml/xapi/message_forwarding.ml | 6 +++--- ocaml/xapi/xapi_vm.ml | 4 ++-- ocaml/xapi/xapi_vm.mli | 7 ++++++- 6 files changed, 24 insertions(+), 8 deletions(-) diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 5e4134afd0b..4774d49390e 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2376,6 +2376,7 @@ let sysprep = [ (Ref _vm, "self", "The VM") ; (String, "unattend", "XML content passed to sysprep") + ; (Float, "timeout", "timeout in seconds") ] ~doc:"Pass unattend.xml to Windows sysprep" ~allowed_roles:_R_VM_ADMIN () diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 255f2be789e..d6b553567e4 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -2767,7 +2767,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "vm-sysprep" , { reqd= ["filename"] - ; optn= [] + ; optn= ["timeout"] ; help= "Pass and execute sysprep configuration file" ; implementation= With_fd Cli_operations.vm_sysprep ; flags= [Vm_selectors] diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index f51c50851d4..9e3a8937f32 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3590,6 +3590,15 @@ let vm_data_source_forget printer rpc session_id params = let vm_sysprep fd printer rpc session_id params = let filename = List.assoc "filename" params in + let timeout = + match List.assoc "timeout" params |> float_of_string with + | exception _ -> + 0.0 + | s when s < 0.0 -> + 0.0 + | s -> + s + in let unattend = match get_client_file fd filename with | Some xml -> @@ -3602,8 +3611,9 @@ let vm_sysprep fd printer rpc session_id params = (do_vm_op printer rpc session_id (fun vm -> Client.VM.sysprep ~rpc ~session_id ~self:(vm.getref ()) ~unattend + ~timeout ) - params ["filename"] + params ["filename"; "timeout"] ) (* APIs to collect SR level RRDs *) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 4c79f91cf5f..ca168ad4d08 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -3116,10 +3116,10 @@ functor Local.VM.remove_from_blocked_operations ~__context ~self ~key ; Xapi_vm_lifecycle.update_allowed_operations ~__context ~self - let sysprep ~__context ~self ~unattend = + let sysprep ~__context ~self ~unattend ~timeout = info "VM.sysprep: self = '%s'" (vm_uuid ~__context self) ; - let local_fn = Local.VM.sysprep ~self ~unattend in - let remote_fn = Client.VM.sysprep ~self ~unattend in + let local_fn = Local.VM.sysprep ~self ~unattend ~timeout in + let remote_fn = Client.VM.sysprep ~self ~unattend ~timeout in let policy = Helpers.Policy.fail_immediately in with_vm_operation ~__context ~self ~doc:"VM.sysprep" ~op:`sysprep ~policy (fun () -> diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index f53f506e522..0841e043188 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1702,9 +1702,9 @@ let get_secureboot_readiness ~__context ~self = ) ) -let sysprep ~__context ~self ~unattend = +let sysprep ~__context ~self ~unattend ~timeout = let uuid = Db.VM.get_uuid ~__context ~self in - debug "%s %S" __FUNCTION__ uuid ; + debug "%s %S (timeout %f)" __FUNCTION__ uuid timeout ; match Vm_sysprep.sysprep ~__context ~vm:self ~unattend with | () -> debug "%s %S success" __FUNCTION__ uuid ; diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index 005b4cae4ae..2e861e8601b 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -451,4 +451,9 @@ val add_to_blocked_operations : val remove_from_blocked_operations : __context:Context.t -> self:API.ref_VM -> key:API.vm_operations -> unit -val sysprep : __context:Context.t -> self:API.ref_VM -> unattend:string -> unit +val sysprep : + __context:Context.t + -> self:API.ref_VM + -> unattend:string + -> timeout:float + -> unit From db1492628cf56597119c575bed7f47e34c536767 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 11 Jul 2025 11:16:15 +0100 Subject: [PATCH 390/492] CP-308455 VM.sysprep wait for shutdown An easy way to wait for the shutdown of a domain is to watch its xenstore tree to disappear. Wait for this after triggering sysprep via the gueat agent. Signed-off-by: Christian Lindig --- ocaml/xapi-cli-server/cli_operations.ml | 2 +- ocaml/xapi/vm_sysprep.ml | 25 +++++++++++++++---------- ocaml/xapi/vm_sysprep.mli | 7 ++++++- ocaml/xapi/xapi_vm.ml | 7 ++++++- 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 9e3a8937f32..40c5b4a9de3 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3593,7 +3593,7 @@ let vm_sysprep fd printer rpc session_id params = let timeout = match List.assoc "timeout" params |> float_of_string with | exception _ -> - 0.0 + 3.0 *. 60.0 (* default in the CLI, no default in the API *) | s when s < 0.0 -> 0.0 | s -> diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index effdecabd83..2d99dace0ed 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -207,27 +207,33 @@ let find_vdi ~__context ~label = (** notify the VM with [domid] to run sysprep and where to find the file. *) -let trigger ~domid ~uuid = +let trigger ~domid ~uuid ~timeout = let open Ezxenstore_core.Xenstore in let module Watch = Ezxenstore_core.Watch in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in + let domain = Printf.sprintf "/local/domain/%Ld" domid in with_xs (fun xs -> xs.Xs.write (control // "filename") "D://unattend.xml" ; xs.Xs.write (control // "vdi-uuid") uuid ; xs.Xs.write (control // "action") "sysprep" ; debug "%s: notified domain %Ld" __FUNCTION__ domid ; try + (* wait for sysprep to start, then domain to dissapear *) Watch.( wait_for ~xs ~timeout:5.0 (value_to_become (control // "action") "running") ) ; - "running" - with Watch.Timeout _ -> xs.Xs.read (control // "action") + debug "%s: sysprep is runnung; waiting for shutdown" __FUNCTION__ ; + Watch.(wait_for ~xs ~timeout (key_to_disappear domain)) ; + true + with Watch.Timeout _ -> + debug "%s: sysprep timeout" __FUNCTION__ ; + false ) (* This function is executed on the host where [vm] is running *) -let sysprep ~__context ~vm ~unattend = - debug "%s" __FUNCTION__ ; +let sysprep ~__context ~vm ~unattend ~timeout = + debug "%s (timeout %f)" __FUNCTION__ timeout ; if not !Xapi_globs.vm_sysprep_enabled then fail API_not_enabled ; let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in @@ -259,14 +265,13 @@ let sysprep ~__context ~vm ~unattend = call ~__context @@ fun rpc session_id -> Client.VBD.insert ~rpc ~session_id ~vdi ~vbd ; Thread.delay !Xapi_globs.vm_sysprep_wait ; - match trigger ~domid ~uuid with - | "running" -> + match trigger ~domid ~uuid ~timeout with + | true -> debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; - Thread.delay 1.0 ; Client.VBD.eject ~rpc ~session_id ~vbd ; Sys.remove iso - | status -> - debug "%s: sysprep %S, ejecting CD" __FUNCTION__ status ; + | false -> + debug "%s: sysprep timeout, ejecting CD" __FUNCTION__ ; Client.VBD.eject ~rpc ~session_id ~vbd ; Sys.remove iso ; fail VM_sysprep_timeout diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index 80f1874d7e9..746c260badc 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -27,7 +27,12 @@ exception Sysprep of error val on_startup : __context:Context.t -> unit (** clean up on toolstart start up *) -val sysprep : __context:Context.t -> vm:API.ref_VM -> unattend:string -> unit +val sysprep : + __context:Context.t + -> vm:API.ref_VM + -> unattend:string + -> timeout:float + -> unit (** Execute sysprep on [vm] using script [unattend]. This requires driver support from the VM and is checked. [unattend:string] must not exceed 32kb. Raised [Failure] that must be handled, *) diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 0841e043188..eeaa9b99c91 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1705,7 +1705,12 @@ let get_secureboot_readiness ~__context ~self = let sysprep ~__context ~self ~unattend ~timeout = let uuid = Db.VM.get_uuid ~__context ~self in debug "%s %S (timeout %f)" __FUNCTION__ uuid timeout ; - match Vm_sysprep.sysprep ~__context ~vm:self ~unattend with + if timeout < 0.0 then + raise + Api_errors.( + Server_error (invalid_value, ["timeout"; string_of_float timeout]) + ) ; + match Vm_sysprep.sysprep ~__context ~vm:self ~unattend ~timeout with | () -> debug "%s %S success" __FUNCTION__ uuid ; () From 25154ce6e95619a5de94f11092ce4cf27f4f2c15 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 11 Jul 2025 11:16:15 +0100 Subject: [PATCH 391/492] CP-308455 VM.sysprep update documentation Explain the timeout parameter. Signed-off-by: Christian Lindig --- ocaml/idl/datamodel_vm.ml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 4774d49390e..34ffda62d6b 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2376,9 +2376,12 @@ let sysprep = [ (Ref _vm, "self", "The VM") ; (String, "unattend", "XML content passed to sysprep") - ; (Float, "timeout", "timeout in seconds") + ; (Float, "timeout", "timeout in seconds for expected reboot") ] - ~doc:"Pass unattend.xml to Windows sysprep" ~allowed_roles:_R_VM_ADMIN () + ~doc: + "Pass unattend.xml to Windows sysprep and wait for the VM to shut down \ + as part of a reboot." + ~allowed_roles:_R_VM_ADMIN () let vm_uefi_mode = Enum From e75712e2c59b616d2dfc4d44ab3da83236177acb Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 11 Jul 2025 11:16:15 +0100 Subject: [PATCH 392/492] CP-308455 VM.sysprep wait for "action" key to disappear Monitor the VM's reaction more carefully: wait for sysprep to terminate. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 2d99dace0ed..bebffe47edc 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -223,7 +223,10 @@ let trigger ~domid ~uuid ~timeout = wait_for ~xs ~timeout:5.0 (value_to_become (control // "action") "running") ) ; - debug "%s: sysprep is runnung; waiting for shutdown" __FUNCTION__ ; + debug "%s: sysprep is runnung; waiting for sysprep to finish" + __FUNCTION__ ; + Watch.(wait_for ~xs ~timeout (key_to_disappear (control // "action"))) ; + debug "%s sysprep is finished" __FUNCTION__ ; Watch.(wait_for ~xs ~timeout (key_to_disappear domain)) ; true with Watch.Timeout _ -> From 60052319f8f95cb9ad28d6fc8ceaf4a59b27fd4d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 10:58:24 +0100 Subject: [PATCH 393/492] xapi_sr_operations: Report more useful info when raising other_operation_in_progress error Compare before, where it's unclear which operation is precluding progress: ``` Caught exception while marking SR for VDI.clone in message forwarder: OTHER_OPERATION_IN_PROGRESS: [ SR; OpaqueRef:d019f26a-b2e8-529b-bb66-fd4f008d4f82; VDI.clone ] ``` And after, where the ID of the operation makes it easier to navigate the logs: ``` Caught exception while marking SR for VDI.clone in message forwarder: OTHER_OPERATION_IN_PROGRESS: [ SR; OpaqueRef:b0f54a40-485f-f48f-fdc7-6db28a64d3fd; scan; OpaqueRef:832fb22d-2ce5-0d26-8a00-a4165e78b34d ] ``` Also, avoid iterating over all the current_operations when looking for non-parallelisable ops, stop on the first one. Report the non-parallelisable op instead of the first current operation as the reason for the error as well. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_sr_operations.ml | 41 ++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/ocaml/xapi/xapi_sr_operations.ml b/ocaml/xapi/xapi_sr_operations.ml index 75a3c695af4..b08a82c20f2 100644 --- a/ocaml/xapi/xapi_sr_operations.ml +++ b/ocaml/xapi/xapi_sr_operations.ml @@ -200,24 +200,35 @@ let valid_operations ~__context ?op record _ref' : table = let check_parallel_ops ~__context _record = let safe_to_parallelise = [`plug] in let current_ops = - Xapi_stdext_std.Listext.List.setify (List.map snd current_ops) + List.sort_uniq + (fun (_ref1, op1) (_ref2, op2) -> compare op1 op2) + current_ops in (* If there are any current operations, all the non_parallelisable operations must definitely be stopped *) - if current_ops <> [] then - set_errors Api_errors.other_operation_in_progress - ["SR"; _ref; sr_operation_to_string (List.hd current_ops)] - (Xapi_stdext_std.Listext.List.set_difference all_ops safe_to_parallelise) ; - let all_are_parallelisable = - List.fold_left ( && ) true - (List.map (fun op -> List.mem op safe_to_parallelise) current_ops) - in - (* If not all are parallelisable (eg a vdi_resize), ban the otherwise - parallelisable operations too *) - if not all_are_parallelisable then - set_errors Api_errors.other_operation_in_progress - ["SR"; _ref; sr_operation_to_string (List.hd current_ops)] - safe_to_parallelise + match current_ops with + | (current_op_ref, current_op_type) :: _ -> + set_errors Api_errors.other_operation_in_progress + ["SR"; _ref; sr_operation_to_string current_op_type; current_op_ref] + (Xapi_stdext_std.Listext.List.set_difference all_ops + safe_to_parallelise + ) ; + let non_parallelisable_op = + List.find_opt + (fun (_, op) -> not (List.mem op safe_to_parallelise)) + current_ops + in + (* If not all are parallelisable (eg a vdi_resize), ban the otherwise + parallelisable operations too *) + Option.iter + (fun (op_ref, op_type) -> + set_errors Api_errors.other_operation_in_progress + ["SR"; _ref; sr_operation_to_string op_type; op_ref] + safe_to_parallelise + ) + non_parallelisable_op + | [] -> + () in let check_cluster_stack_compatible ~__context _record = (* Check whether there are any conflicts with HA that prevent us from From d80df842e18567338838751acd1ac0ed1e68f114 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 14 Jul 2025 15:20:35 +0100 Subject: [PATCH 394/492] CA-413713: Change bash-completion shortcut The shortcut to show required parameters is Ctrl-rq but this conflicts with the default readline shortcut for reverse-search-history. Usually, it finds the first match but subsequent presses of Ctrl-r do not find any further matches. Fix this by switching to Meta-q (aka Alt-q or Esc-q) which is unused. Signed-off-by: Ross Lagerwall --- ocaml/xe-cli/bash-completion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 5ecc3890214..b4568c16b74 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -909,4 +909,4 @@ __autocomplete_reqd_params_names() return 0 } -bind -x '"\C-rq":"__autocomplete_reqd_params_names"' +bind -x '"\eq":"__autocomplete_reqd_params_names"' From f72162953b8b9822358e3f6d483daca113194e40 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 15 Jul 2025 08:05:17 +0100 Subject: [PATCH 395/492] Replace `List.fold_left (||) false (List.map f lst)` with `List.exists f lst` It's an equivalent, but much cleaner construct, which in addition avoids needlessly iterating over the list after the first match is found. Signed-off-by: Andrii Sultanov --- ocaml/rrd2csv/src/rrd2csv.ml | 2 +- ocaml/tests/binpack_test.ml | 5 +---- ocaml/xapi-cli-server/cli_util.ml | 2 +- ocaml/xapi-idl/storage/storage_test.ml | 2 +- ocaml/xapi-idl/storage/vdi_automaton.ml | 10 ++-------- ocaml/xapi/helpers.ml | 2 +- ocaml/xapi/pool_features_helpers.ml | 11 +++++------ ocaml/xapi/xapi_bond.ml | 6 +++--- ocaml/xapi/xapi_globs.ml | 18 +++++++----------- ocaml/xapi/xapi_host.ml | 18 +++++++++--------- 10 files changed, 31 insertions(+), 45 deletions(-) diff --git a/ocaml/rrd2csv/src/rrd2csv.ml b/ocaml/rrd2csv/src/rrd2csv.ml index a6866874ee2..37e00f8148d 100644 --- a/ocaml/rrd2csv/src/rrd2csv.ml +++ b/ocaml/rrd2csv/src/rrd2csv.ml @@ -304,7 +304,7 @@ module Ds_selector = struct if fs = [] then true else - List.fold_left (fun acc f -> acc || filter11 f d) false fs + List.exists (fun f -> filter11 f d) fs (* Returns the d \in ds that passes at least one of the filters fs *) diff --git a/ocaml/tests/binpack_test.ml b/ocaml/tests/binpack_test.ml index 27ab15e9f33..4544d7ffcb8 100644 --- a/ocaml/tests/binpack_test.ml +++ b/ocaml/tests/binpack_test.ml @@ -45,10 +45,7 @@ let check_plan config dead_hosts plan = let memory_remaining = account config.hosts config.vms plan in (* List.iter (fun mem -> Printf.printf "%Ld\n" mem) free; *) (* No host should be overcommitted: *) - if - List.fold_left ( || ) false - (List.map (fun x -> x < 0L) (List.map snd memory_remaining)) - then + if List.exists (fun (_, x) -> x < 0L) memory_remaining then raise BadPlan ; (* All failed VMs should be restarted: *) let failed_vms = get_failed_vms config dead_hosts in diff --git a/ocaml/xapi-cli-server/cli_util.ml b/ocaml/xapi-cli-server/cli_util.ml index 75c4f30360f..b71c9f1f3a3 100644 --- a/ocaml/xapi-cli-server/cli_util.ml +++ b/ocaml/xapi-cli-server/cli_util.ml @@ -91,7 +91,7 @@ let track callback rpc (session_id : API.ref_session) task = | _ -> false in - finished := List.fold_left ( || ) false (List.map matches events) + finished := List.exists matches events done with | Api_errors.Server_error (code, _) diff --git a/ocaml/xapi-idl/storage/storage_test.ml b/ocaml/xapi-idl/storage/storage_test.ml index f4145ceccc2..d86c6b69df5 100644 --- a/ocaml/xapi-idl/storage/storage_test.ml +++ b/ocaml/xapi-idl/storage/storage_test.ml @@ -63,7 +63,7 @@ let names = let vdi_exists sr vdi = let all = Client.SR.scan dbg sr in - List.fold_left (fun acc vdi_info -> acc || vdi_info.vdi = vdi) false all + List.exists (fun vdi_info -> vdi_info.vdi = vdi) all let create sr name_label = let vdi_info = diff --git a/ocaml/xapi-idl/storage/vdi_automaton.ml b/ocaml/xapi-idl/storage/vdi_automaton.ml index e36de90e2ba..3192fd585d9 100644 --- a/ocaml/xapi-idl/storage/vdi_automaton.ml +++ b/ocaml/xapi-idl/storage/vdi_automaton.ml @@ -94,15 +94,9 @@ let ( + ) state operation = let superstate states = let activated = - List.fold_left - (fun acc s -> acc || s = Activated RO || s = Activated RW) - false states - in - let rw = - List.fold_left - (fun acc s -> acc || s = Activated RW || s = Attached RW) - false states + List.exists (fun s -> s = Activated RO || s = Activated RW) states in + let rw = List.exists (fun s -> s = Activated RW || s = Attached RW) states in if states = [] then Detached else if activated then diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index aff1b815566..46f4eb743d4 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -1012,7 +1012,7 @@ let pool_has_different_host_platform_versions ~__context = let is_different_to_me platform_version = platform_version <> Xapi_version.platform_version () in - List.fold_left ( || ) false (List.map is_different_to_me platform_versions) + List.exists is_different_to_me platform_versions (* Checks that a host has a PBD for a particular SR (meaning that the SR is visible to the host) *) diff --git a/ocaml/xapi/pool_features_helpers.ml b/ocaml/xapi/pool_features_helpers.ml index dda8619013c..36e7e7a0252 100644 --- a/ocaml/xapi/pool_features_helpers.ml +++ b/ocaml/xapi/pool_features_helpers.ml @@ -58,17 +58,16 @@ let rec compute_additional_restrictions all_host_params = function [] | flag :: rest -> let switches = - List.map + List.exists (function | params -> - if List.mem_assoc flag params then - bool_of_string (List.assoc flag params) - else - true + List.assoc_opt flag params + |> Fun.flip Option.bind bool_of_string_opt + |> Option.value ~default:true ) all_host_params in - (flag, string_of_bool (List.fold_left ( || ) false switches)) + (flag, string_of_bool switches) :: compute_additional_restrictions all_host_params rest (* Combine the host-level feature restrictions into pool-level ones, and write diff --git a/ocaml/xapi/xapi_bond.ml b/ocaml/xapi/xapi_bond.ml index 72d762ff193..f0265bd50a4 100644 --- a/ocaml/xapi/xapi_bond.ml +++ b/ocaml/xapi/xapi_bond.ml @@ -427,9 +427,9 @@ let create ~__context ~network ~members ~mAC ~mode ~properties = in let disallow_unplug = (* this is always true if one of the PIFs is a cluster_host.PIF *) - List.fold_left - (fun a m -> Db.PIF.get_disallow_unplug ~__context ~self:m || a) - false members + List.exists + (fun m -> Db.PIF.get_disallow_unplug ~__context ~self:m) + members in (* Validate constraints: *) (* 1. Members must not be in a bond already *) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 1e803610a34..8a28146eea2 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1334,18 +1334,14 @@ let gen_list_option name desc of_string string_of opt = let sm_plugins = ref [] let accept_sm_plugin name = - List.( - fold_left ( || ) false - (map - (function - | `All -> - true - | `Sm x -> - String.lowercase_ascii x = String.lowercase_ascii name - ) - !sm_plugins + List.exists + (function + | `All -> + true + | `Sm x -> + String.lowercase_ascii x = String.lowercase_ascii name ) - ) + !sm_plugins let nvidia_multi_vgpu_enabled_driver_versions = ref ["430.42"; "430.62"; "440.00+"] diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 92297c2251f..c5501249090 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -2185,19 +2185,19 @@ let reset_networking ~__context ~host = (Db.PIF.get_all ~__context) in let bond_is_local bond = - List.fold_left - (fun a pif -> Db.Bond.get_master ~__context ~self:bond = pif || a) - false local_pifs + List.exists + (fun pif -> Db.Bond.get_master ~__context ~self:bond = pif) + local_pifs in let vlan_is_local vlan = - List.fold_left - (fun a pif -> Db.VLAN.get_untagged_PIF ~__context ~self:vlan = pif || a) - false local_pifs + List.exists + (fun pif -> Db.VLAN.get_untagged_PIF ~__context ~self:vlan = pif) + local_pifs in let tunnel_is_local tunnel = - List.fold_left - (fun a pif -> Db.Tunnel.get_access_PIF ~__context ~self:tunnel = pif || a) - false local_pifs + List.exists + (fun pif -> Db.Tunnel.get_access_PIF ~__context ~self:tunnel = pif) + local_pifs in let bonds = List.filter bond_is_local (Db.Bond.get_all ~__context) in List.iter From 37a42e0f4d810026a2cbd477228a5bed2a2c70cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 11 Jul 2025 17:08:29 +0100 Subject: [PATCH 396/492] CP-308875: set Xen PCI MMIO BAR to WB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default for the Xen PCI MMIO BAR is UnCachable. Setting this to WriteBack in the MTRR shows a massive performance improvement on AMD, at least for Linux guests. On Intel this is already set to WriteBack by Xen via another mechanism. To be effective this also requires the corresponding [Xen commit](https://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=22650d6054625be10172fe0c78b9cadd1a39bd63), old versions will just ignore this xenstore key. The optimization is not enabled by default in Xen due to the wide range of guests it supports, but XAPI supports a much narrower set of guest OSes. Setting the cache attribute to WB is done by setting UC=false. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/xenopsd.ml | 9 ++++++++- ocaml/xenopsd/xenopsd.conf | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index 275cdcb79fa..c127b02f673 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -49,7 +49,14 @@ let default_vbd_backend_kind = ref "vbd" let ca_140252_workaround = ref false -let xen_platform_pci_bar_uc = ref true +(* Optimize performance: set MTRR WB attribute on Xen PCI MMIO BAR. + This is useful for AMD, and mostly a noop on Intel (which achieves a similar + effect using Intel-only features in Xen) + + Turning on WB is done by disabling UC: + UnCached=false -> WriteBack=true +*) +let xen_platform_pci_bar_uc = ref false let action_after_qemu_crash = ref None diff --git a/ocaml/xenopsd/xenopsd.conf b/ocaml/xenopsd/xenopsd.conf index 447d6cde54a..e1c3c87c7cb 100644 --- a/ocaml/xenopsd/xenopsd.conf +++ b/ocaml/xenopsd/xenopsd.conf @@ -113,4 +113,6 @@ disable-logging-for=http tracing tracing_export # by grant tables is mapped as Uncached (UC, the default) or WriteBack # (WB, the workaround). WB mapping could improve performance of devices # using grant tables. This is useful on AMD platform only. -# xen-platform-pci-bar-uc=true +# On Intel a similar effect is already achieved with iPAT in Xen, +# but setting this to 0 works on Intel too. +# xen-platform-pci-bar-uc=false From ac52de9cdbfafd29828e535087780c3700cea5a7 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 11 Jul 2025 11:03:11 +0000 Subject: [PATCH 397/492] Add message argument to LICENSE_CHECKOUT_ERROR The first argument of this API error is used for error codes as defined by v6d. These product-specific error codes can be matched on by clients who know about them (e.g. XenCenter for XenServer). The new, second argument allows v6d to return an error message in English that xe can print directly, while xe remains product agnostic and does not need to know v6d's error definitions. This replaces some awkward API-message handling code in cli_operations. Signed-off-by: Rob Hoes --- ocaml/idl/datamodel_errors.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 27 +++---------------------- ocaml/xapi-idl/v6/v6_interface.ml | 3 ++- ocaml/xapi/xapi_host.ml | 4 ++-- 4 files changed, 8 insertions(+), 28 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 20acb06f60b..1c9646bc902 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -68,7 +68,7 @@ let _ = "The license-server connection details (address or port) were missing or \ incomplete." () ; - error Api_errors.license_checkout_error ["reason"] + error Api_errors.license_checkout_error ["code"; "message"] ~doc:"The license for the edition you requested is not available." () ; error Api_errors.license_file_deprecated [] ~doc: diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 40c5b4a9de3..020eec9f193 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -5338,9 +5338,8 @@ let with_license_server_changes printer rpc session_id params hosts f = ) hosts ) ; - let now = Unix.gettimeofday () in try f rpc session_id with - | Api_errors.Server_error (name, _) as e + | Api_errors.Server_error (name, [_; msg]) when name = Api_errors.license_checkout_error -> (* Put back original license_server_details *) List.iter @@ -5349,28 +5348,8 @@ let with_license_server_changes printer rpc session_id params hosts f = ~value:license_server ) current_license_servers ; - let alerts = - Client.Message.get_since ~rpc ~session_id - ~since:(Date.of_unix_time (now -. 1.)) - in - let print_if_checkout_error (ref, msg) = - if - false - || msg.API.message_name = fst Api_messages.v6_rejected - || msg.API.message_name = fst Api_messages.v6_comm_error - || msg.API.message_name - = fst Api_messages.v6_license_server_version_obsolete - then ( - Client.Message.destroy ~rpc ~session_id ~self:ref ; - printer (Cli_printer.PStderr (msg.API.message_body ^ "\n")) - ) - in - if alerts = [] then - raise e - else ( - List.iter print_if_checkout_error alerts ; - raise (ExitWithError 1) - ) + printer (Cli_printer.PStderr (msg ^ "\n")) ; + raise (ExitWithError 1) | Api_errors.Server_error (name, _) as e when name = Api_errors.invalid_edition -> let host = get_host_from_session rpc session_id in diff --git a/ocaml/xapi-idl/v6/v6_interface.ml b/ocaml/xapi-idl/v6/v6_interface.ml index ba42aa259ec..3098713c598 100644 --- a/ocaml/xapi-idl/v6/v6_interface.ml +++ b/ocaml/xapi-idl/v6/v6_interface.ml @@ -78,7 +78,8 @@ type errors = (** Thrown by license_check when expiry date matches or precedes current date *) | License_processing_error (** License could not be processed *) - | License_checkout_error of string (** License could not be checked out *) + | License_checkout_error of string * string + (** License could not be checked out *) | Missing_connection_details (** Thrown if connection port or address parameter not supplied to check_license *) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index c5501249090..0c80ad2a338 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -2079,8 +2079,8 @@ let apply_edition_internal ~__context ~host ~edition ~additional = raise Api_errors.(Server_error (license_processing_error, [])) | V6_interface.(V6_error Missing_connection_details) -> raise Api_errors.(Server_error (missing_connection_details, [])) - | V6_interface.(V6_error (License_checkout_error s)) -> - raise Api_errors.(Server_error (license_checkout_error, [s])) + | V6_interface.(V6_error (License_checkout_error (code, msg))) -> + raise Api_errors.(Server_error (license_checkout_error, [code; msg])) | V6_interface.(V6_error (Internal_error e)) -> Helpers.internal_error "%s" e in From 2ec4461b8e7c7fa8282c8e69928721db00fdc30f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 11:27:54 +0100 Subject: [PATCH 398/492] xapi_cluster_helpers: Correctly report other_operation_in_progress error other_operation_in_progress has separate fields for the main class of the object ("Cluster" or "Cluster_host") and the operation that's blocking progress, do not concatenate these into one string. Also report the task ref that's blocking progress. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_cluster_helpers.ml | 14 +++++++++----- ocaml/xapi/xapi_cluster_host_helpers.ml | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/xapi_cluster_helpers.ml b/ocaml/xapi/xapi_cluster_helpers.ml index a4d30bcedaa..1afdefb2864 100644 --- a/ocaml/xapi/xapi_cluster_helpers.ml +++ b/ocaml/xapi/xapi_cluster_helpers.ml @@ -24,19 +24,23 @@ let is_allowed_concurrently ~op:_ ~current_ops:_ = false let report_concurrent_operations_error ~current_ops ~ref_str = - let current_ops_str = + let current_ops_ref_str, current_ops_str = let op_to_str = Record_util.cluster_operation_to_string in + let ( >> ) f g x = g (f x) in match current_ops with | [] -> failwith "No concurrent operation to report" - | [(_, cop)] -> - op_to_str cop + | [(op_ref, cop)] -> + (op_ref, op_to_str cop) | l -> - "{" ^ String.concat "," (List.map op_to_str (List.map snd l)) ^ "}" + ( Printf.sprintf "{%s}" (String.concat "," (List.map fst l)) + , Printf.sprintf "{%s}" + (String.concat "," (List.map (snd >> op_to_str) l)) + ) in Some ( Api_errors.other_operation_in_progress - , ["Cluster." ^ current_ops_str; ref_str] + , ["Cluster"; ref_str; current_ops_str; current_ops_ref_str] ) (** Take an internal Cluster record and a proposed operation. Return None iff the operation diff --git a/ocaml/xapi/xapi_cluster_host_helpers.ml b/ocaml/xapi/xapi_cluster_host_helpers.ml index 59e5141da73..abdaa58c285 100644 --- a/ocaml/xapi/xapi_cluster_host_helpers.ml +++ b/ocaml/xapi/xapi_cluster_host_helpers.ml @@ -22,19 +22,23 @@ let is_allowed_concurrently ~op:_ ~current_ops:_ = false let report_concurrent_operations_error ~current_ops ~ref_str = - let current_ops_str = + let current_ops_ref_str, current_ops_str = let op_to_str = Record_util.cluster_host_operation_to_string in + let ( >> ) f g x = g (f x) in match current_ops with | [] -> failwith "No concurrent operation to report" - | [(_, cop)] -> - op_to_str cop + | [(op_ref, cop)] -> + (op_ref, op_to_str cop) | l -> - "{" ^ String.concat "," (List.map op_to_str (List.map snd l)) ^ "}" + ( Printf.sprintf "{%s}" (String.concat "," (List.map fst l)) + , Printf.sprintf "{%s}" + (String.concat "," (List.map (snd >> op_to_str) l)) + ) in Some ( Api_errors.other_operation_in_progress - , ["Cluster_host." ^ current_ops_str; ref_str] + , ["Cluster_host"; ref_str; current_ops_str; current_ops_ref_str] ) (** Take an internal Cluster_host record and a proposed operation. Return None iff the operation From b6fb47ce164e702934f6ea1214b2a5d5704d3122 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 14:44:12 +0100 Subject: [PATCH 399/492] xapi_vm_lifecycle: Correctly report other_operation_in_progress error other_operation_in_progress has separate fields for the main class of the object ("VM") and the operation that's blocking progress, do not concatenate these into one string. Also report the task ref that's blocking progress. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vm_lifecycle.ml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 30a6a4b3307..6db1c70a84c 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -276,20 +276,24 @@ let report_power_state_error ~__context ~vmr ~power_state ~op ~ref_str = Some (Api_errors.vm_bad_power_state, [ref_str; expected; actual]) let report_concurrent_operations_error ~current_ops ~ref_str = - let current_ops_str = + let current_ops_ref_str, current_ops_str = + let op_to_str = Record_util.vm_operation_to_string in + let ( >> ) f g x = g (f x) in match current_ops with | [] -> failwith "No concurrent operation to report" - | [(_, cop)] -> - Record_util.vm_operation_to_string cop + | [(op_ref, cop)] -> + (op_ref, op_to_str cop) | l -> - "{" - ^ String.concat "," - (List.map Record_util.vm_operation_to_string (List.map snd l)) - ^ "}" + ( Printf.sprintf "{%s}" (String.concat "," (List.map fst l)) + , Printf.sprintf "{%s}" + (String.concat "," (List.map (snd >> op_to_str) l)) + ) in Some - (Api_errors.other_operation_in_progress, ["VM." ^ current_ops_str; ref_str]) + ( Api_errors.other_operation_in_progress + , ["VM"; ref_str; current_ops_str; current_ops_ref_str] + ) let check_vgpu ~__context ~op ~ref_str ~vgpus ~power_state = let is_migratable vgpu = From 0f57ba8da8d5580a38d3ca7fd64a5e56509cfe18 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 16 Jul 2025 10:10:57 +0100 Subject: [PATCH 400/492] qcow-stream-tool: Add a minimal CLI wrapper for Qcow_stream qcow-stream uses Lwt, which is not thread-safe, so we want to avoid using it in the xapi process. Create a CLI wrapper for calls to qcow-stream. Signed-off-by: Andrii Sultanov --- Makefile | 11 ++++---- dune-project | 9 +++++++ ocaml/qcow-stream-tool/dune | 10 +++++++ ocaml/qcow-stream-tool/qcow_stream_tool.ml | 29 +++++++++++++++++++++ ocaml/qcow-stream-tool/qcow_stream_tool.mli | 0 ocaml/xapi/xapi_globs.ml | 3 +++ opam/qcow-stream-tool.opam | 29 +++++++++++++++++++++ 7 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 ocaml/qcow-stream-tool/dune create mode 100644 ocaml/qcow-stream-tool/qcow_stream_tool.ml create mode 100644 ocaml/qcow-stream-tool/qcow_stream_tool.mli create mode 100644 opam/qcow-stream-tool.opam diff --git a/Makefile b/Makefile index dde13fc24a6..a1d5a628f33 100644 --- a/Makefile +++ b/Makefile @@ -147,7 +147,8 @@ install-extra: DESTDIR=$(DESTDIR) SBINDIR=$(SBINDIR) QEMU_WRAPPER_DIR=$(QEMU_WRAPPER_DIR) XENOPSD_LIBEXECDIR=$(XENOPSD_LIBEXECDIR) ETCDIR=$(ETCDIR) ./ocaml/xenopsd/scripts/make-custom-xenopsd.conf # common flags and packages for 'dune install' and 'dune uninstall' -DUNE_IU_PACKAGES1=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(PREFIX) --libdir=$(LIBDIR) --mandir=$(MANDIR) +DUNE_IU_COMMON=-j $(JOBS) --destdir=$(DESTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) +DUNE_IU_PACKAGES1=$(DUNE_IU_COMMON) --prefix=$(PREFIX) DUNE_IU_PACKAGES1+=--libexecdir=$(XENOPSD_LIBEXECDIR) --datadir=$(SDKDIR) DUNE_IU_PACKAGES1+=xapi-client xapi-schema xapi-consts xapi-cli-protocol xapi-datamodel xapi-types DUNE_IU_PACKAGES1+=xen-api-client xen-api-client-lwt rrdd-plugin rrd-transport @@ -163,17 +164,17 @@ install-dune1: # dune can install libraries and several other files into the right locations dune install $(DUNE_IU_PACKAGES1) -DUNE_IU_PACKAGES2=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(OPTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) --libexecdir=$(OPTDIR)/libexec --datadir=$(DOCDIR) xapi xe +DUNE_IU_PACKAGES2=$(DUNE_IU_COMMON) --prefix=$(OPTDIR) --libexecdir=$(OPTDIR)/libexec --datadir=$(DOCDIR) xapi xe install-dune2: dune install $(DUNE_IU_PACKAGES2) -DUNE_IU_PACKAGES3=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(OPTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) --libexecdir=$(OPTDIR)/libexec --bindir=$(OPTDIR)/debug --datadir=$(OPTDIR)/debug xapi-debug +DUNE_IU_PACKAGES3=$(DUNE_IU_COMMON) --prefix=$(OPTDIR) --libexecdir=$(OPTDIR)/libexec --bindir=$(OPTDIR)/debug --datadir=$(OPTDIR)/debug xapi-debug install-dune3: dune install $(DUNE_IU_PACKAGES3) -DUNE_IU_PACKAGES4=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(PREFIX) --libdir=$(LIBDIR) --libexecdir=/usr/libexec --mandir=$(MANDIR) vhd-tool forkexec +DUNE_IU_PACKAGES4=$(DUNE_IU_COMMON) --prefix=$(PREFIX) --libexecdir=/usr/libexec vhd-tool forkexec qcow-stream-tool install-dune4: dune install $(DUNE_IU_PACKAGES4) @@ -186,7 +187,7 @@ install: chmod +x $(DESTDIR)$(DOCDIR)/doc-convert.sh # backward compat with existing specfile, to be removed after it is updated find $(DESTDIR) -name '*.cmxs' -delete - for pkg in xapi-debug xapi xe xapi-tools xapi-sdk vhd-tool; do for f in CHANGELOG LICENSE README.markdown; do rm $(DESTDIR)$(OPTDIR)/doc/$$pkg/$$f $(DESTDIR)$(PREFIX)/doc/$$pkg/$$f -f; done; for f in META dune-package opam; do rm $(DESTDIR)$(LIBDIR)/$$pkg/$$f -f; done; done; + for pkg in xapi-debug xapi xe xapi-tools xapi-sdk vhd-tool qcow-stream-tool; do for f in CHANGELOG LICENSE README.markdown; do rm $(DESTDIR)$(OPTDIR)/doc/$$pkg/$$f $(DESTDIR)$(PREFIX)/doc/$$pkg/$$f -f; done; for f in META dune-package opam; do rm $(DESTDIR)$(LIBDIR)/$$pkg/$$f -f; done; done; uninstall: diff --git a/dune-project b/dune-project index 002f1c481f1..8b720b99442 100644 --- a/dune-project +++ b/dune-project @@ -577,6 +577,15 @@ :with-test (>= "2.4.0"))))) +(package + (name qcow-stream-tool) + (synopsis "Minimal CLI wrapper for qcow-stream") + (depends + qcow-stream + cmdliner + ) +) + (package (name varstored-guard)) diff --git a/ocaml/qcow-stream-tool/dune b/ocaml/qcow-stream-tool/dune new file mode 100644 index 00000000000..2125dea0a0c --- /dev/null +++ b/ocaml/qcow-stream-tool/dune @@ -0,0 +1,10 @@ +(executable + (modes exe) + (name qcow_stream_tool) + (public_name qcow-stream-tool) + (package qcow-stream-tool) + (libraries + qcow-stream + cmdliner + ) +) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.ml b/ocaml/qcow-stream-tool/qcow_stream_tool.ml new file mode 100644 index 00000000000..7158867c248 --- /dev/null +++ b/ocaml/qcow-stream-tool/qcow_stream_tool.ml @@ -0,0 +1,29 @@ +module Impl = struct + let stream_decode output = + Qcow_stream.stream_decode Unix.stdin output ; + `Ok () +end + +module Cli = struct + open Cmdliner + + let stream_decode_cmd = + let doc = "decode qcow2 formatted data from stdin and write a raw image" in + let man = + [ + `S "DESCRIPTION" + ; `P "Decode qcow2 formatted data from stdin and write to a raw file." + ] + in + let output default = + let doc = Printf.sprintf "Path to the output file." in + Arg.(value & pos 0 string default & info [] ~doc) + in + Cmd.v + (Cmd.info "stream_decode" ~doc ~man) + Term.(ret (const Impl.stream_decode $ output "test.raw")) + + let main () = Cmd.eval stream_decode_cmd +end + +let () = exit (Cli.main ()) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.mli b/ocaml/qcow-stream-tool/qcow_stream_tool.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 8a28146eea2..7bdd070793f 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -805,6 +805,8 @@ let vhd_tool = ref "vhd-tool" let qcow_to_stdout = ref "/opt/xensource/libexec/qcow2-to-stdout.py" +let qcow_stream_tool = ref "qcow-stream-tool" + let fence = ref "fence" let host_bugreport_upload = ref "host-bugreport-upload" @@ -1813,6 +1815,7 @@ module Resources = struct ; ("sparse_dd", sparse_dd, "Path to sparse_dd") ; ("vhd-tool", vhd_tool, "Path to vhd-tool") ; ("qcow_to_stdout", qcow_to_stdout, "Path to qcow-to-stdout script") + ; ("qcow_stream_tool", qcow_stream_tool, "Path to qcow-stream-tool") ; ("fence", fence, "Path to fence binary, used for HA host fencing") ; ( "host-bugreport-upload" , host_bugreport_upload diff --git a/opam/qcow-stream-tool.opam b/opam/qcow-stream-tool.opam new file mode 100644 index 00000000000..a7c3ab6ef3c --- /dev/null +++ b/opam/qcow-stream-tool.opam @@ -0,0 +1,29 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Minimal CLI wrapper for qcow-stream" +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" +homepage: "https://xapi-project.github.io/" +bug-reports: "https://github.com/xapi-project/xen-api/issues" +depends: [ + "dune" {>= "3.15"} + "qcow-stream" + "cmdliner" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" From d5d9999bbac7c0a7d20219071ceb08cea0b69246 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 1 May 2025 10:24:41 +0100 Subject: [PATCH 401/492] {export,import}_raw_vdi: add qcow as supported format This patch allows to pass "qcow2" as a supported format when calling VDI export and import. Qcow_tool_wrapper is added as a helper that calls the Python script for export (conversion from raw to qcow2 stream) and the qcow-tool CLI tool (ocaml-qcow library) for import (conversion from qcow2 stream to raw). Signed-off-by: Guillaume Signed-off-by: Andrii Sultanov --- ocaml/xapi/export_raw_vdi.ml | 20 ++++++++---- ocaml/xapi/import_raw_vdi.ml | 27 ++++++++++++++++ ocaml/xapi/importexport.ml | 16 +++++++-- ocaml/xapi/qcow_tool_wrapper.ml | 57 +++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 ocaml/xapi/qcow_tool_wrapper.ml diff --git a/ocaml/xapi/export_raw_vdi.ml b/ocaml/xapi/export_raw_vdi.ml index cea32fb5533..4a54283cc2b 100644 --- a/ocaml/xapi/export_raw_vdi.ml +++ b/ocaml/xapi/export_raw_vdi.ml @@ -47,12 +47,18 @@ let localhost_handler rpc session_id vdi (req : Http.Request.t) let copy base_path path size = try debug "Copying VDI contents..." ; - Vhd_tool_wrapper.send ?relative_to:base_path - (Vhd_tool_wrapper.update_task_progress __context) - "none" - (Importexport.Format.to_string format) - s path size "" ; - debug "Copying VDI complete." + match format with + | Qcow -> + Qcow_tool_wrapper.send + (Qcow_tool_wrapper.update_task_progress __context) + s path size + | Vhd | Tar | Raw -> + Vhd_tool_wrapper.send ?relative_to:base_path + (Vhd_tool_wrapper.update_task_progress __context) + "none" + (Importexport.Format.to_string format) + s path size "" ; + debug "Copying VDI complete." with Unix.Unix_error (Unix.EIO, _, _) -> raise (Api_errors.Server_error @@ -73,7 +79,7 @@ let localhost_handler rpc session_id vdi (req : Http.Request.t) in Http_svr.headers s headers ; match format with - | Raw | Vhd -> + | Raw | Vhd | Qcow -> let size = Db.VDI.get_virtual_size ~__context ~self:vdi in if format = Vhd && size > Constants.max_vhd_size then raise diff --git a/ocaml/xapi/import_raw_vdi.ml b/ocaml/xapi/import_raw_vdi.ml index 565c29e7d8e..8eacfe0a786 100644 --- a/ocaml/xapi/import_raw_vdi.ml +++ b/ocaml/xapi/import_raw_vdi.ml @@ -106,6 +106,10 @@ let localhost_handler rpc session_id vdi_opt (req : Request.t) ) ) | None -> + (* FIXME: Currently, when importing an image with a virtual + size that's bigger than the VDI's virtual size, we fail in + an unhelpful manner on some write. + We could instead parse the header first and fail early. *) let vdi = match ( vdi_opt @@ -122,6 +126,22 @@ let localhost_handler rpc session_id vdi_opt (req : Request.t) ~virtual_size:length ~_type:`user ~sharable:false ~read_only:false ~other_config:[] ~xenstore_data:[] ~sm_config:[] ~tags:[] + | None, Importexport.Format.Qcow, _, _ -> + error + "Importing a QCOW2 directly into an SR not yet \ + supported" ; + raise + (HandleError + ( Api_errors.Server_error + ( Api_errors.internal_error + , [ + "Importing a QCOW2 directly into an SR not \ + yet supported" + ] + ) + , Http.http_400_badrequest ~version:"1.0" () + ) + ) | None, Importexport.Format.Vhd, _, _ -> error "Importing a VHD directly into an SR not yet supported" ; @@ -158,6 +178,13 @@ let localhost_handler rpc session_id vdi_opt (req : Request.t) in Http_svr.headers s headers ; ( match format with + | Qcow -> + Sm_fs_ops.with_block_attached_device __context rpc + session_id vdi `RW (fun path -> + Qcow_tool_wrapper.receive + (Qcow_tool_wrapper.update_task_progress __context) + s path + ) | Raw | Vhd -> let prezeroed = not diff --git a/ocaml/xapi/importexport.ml b/ocaml/xapi/importexport.ml index a210bda04d6..6ba6769b7ef 100644 --- a/ocaml/xapi/importexport.ml +++ b/ocaml/xapi/importexport.ml @@ -430,9 +430,17 @@ let sr_of_req ~__context (req : Http.Request.t) = None module Format = struct - type t = Raw | Vhd | Tar + type t = Raw | Vhd | Tar | Qcow - let to_string = function Raw -> "raw" | Vhd -> "vhd" | Tar -> "tar" + let to_string = function + | Raw -> + "raw" + | Vhd -> + "vhd" + | Tar -> + "tar" + | Qcow -> + "qcow2" let of_string x = match String.lowercase_ascii x with @@ -442,6 +450,8 @@ module Format = struct Some Vhd | "tar" -> Some Tar + | "qcow2" -> + Some Qcow | _ -> None @@ -457,6 +467,8 @@ module Format = struct "application/vhd" | Tar -> "application/x-tar" + | Qcow -> + "application/x-qemu-disk" let _key = "format" diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml new file mode 100644 index 00000000000..6e28299cfb0 --- /dev/null +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -0,0 +1,57 @@ +(* + * Copyright (C) 2025 Vates. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D + +let run_qcow_tool qcow_tool ?input_fd ?output_fd (_progress_cb : int -> unit) + (args : string list) = + info "Executing %s %s" qcow_tool (String.concat " " args) ; + let open Forkhelpers in + match + with_logfile_fd "qcow-tool" (fun log_fd -> + let pid = + safe_close_and_exec input_fd output_fd (Some log_fd) [] qcow_tool args + in + let _, status = waitpid pid in + if status <> Unix.WEXITED 0 then ( + error "qcow-tool failed, returning VDI_IO_ERROR" ; + raise + (Api_errors.Server_error + (Api_errors.vdi_io_error, ["Device I/O errors"]) + ) + ) + ) + with + | Success (out, _) -> + debug "qcow-tool successful export (%s)" out + | Failure (out, _e) -> + error "qcow-tool output: %s" out ; + raise (Api_errors.Server_error (Api_errors.vdi_io_error, [out])) + +let update_task_progress (__context : Context.t) (x : int) = + TaskHelper.set_progress ~__context (float_of_int x /. 100.) + +let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) + (path : string) = + let args = [path] in + let qcow_tool = !Xapi_globs.qcow_stream_tool in + run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd + +let send (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) + (_size : Int64.t) = + let args = [path] in + let qcow_tool = !Xapi_globs.qcow_to_stdout in + run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd From c86e2611a3dac742b51cf06717f344c6308ea7cd Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 15 May 2025 14:17:18 +0100 Subject: [PATCH 402/492] export_raw_vdi: Add support for differential QCOW2 export with base Also add an mli file for qcow_tool_wrapper Signed-off-by: Andrii Sultanov --- ocaml/xapi/export_raw_vdi.ml | 2 +- ocaml/xapi/qcow_tool_wrapper.ml | 8 +++++--- ocaml/xapi/qcow_tool_wrapper.mli | 25 +++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 ocaml/xapi/qcow_tool_wrapper.mli diff --git a/ocaml/xapi/export_raw_vdi.ml b/ocaml/xapi/export_raw_vdi.ml index 4a54283cc2b..df3d778d579 100644 --- a/ocaml/xapi/export_raw_vdi.ml +++ b/ocaml/xapi/export_raw_vdi.ml @@ -49,7 +49,7 @@ let localhost_handler rpc session_id vdi (req : Http.Request.t) debug "Copying VDI contents..." ; match format with | Qcow -> - Qcow_tool_wrapper.send + Qcow_tool_wrapper.send ?relative_to:base_path (Qcow_tool_wrapper.update_task_progress __context) s path size | Vhd | Tar | Raw -> diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index 6e28299cfb0..30d0eb63811 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -50,8 +50,10 @@ let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) let qcow_tool = !Xapi_globs.qcow_stream_tool in run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd -let send (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) - (_size : Int64.t) = - let args = [path] in +let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) + (path : string) (_size : Int64.t) = + let args = + [path] @ match relative_to with None -> [] | Some vdi -> ["--diff"; vdi] + in let qcow_tool = !Xapi_globs.qcow_to_stdout in run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd diff --git a/ocaml/xapi/qcow_tool_wrapper.mli b/ocaml/xapi/qcow_tool_wrapper.mli new file mode 100644 index 00000000000..51c3c626567 --- /dev/null +++ b/ocaml/xapi/qcow_tool_wrapper.mli @@ -0,0 +1,25 @@ +(* + * Copyright (C) 2025 Vates. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val update_task_progress : Context.t -> int -> unit + +val receive : (int -> unit) -> Unix.file_descr -> string -> unit + +val send : + ?relative_to:string + -> (int -> unit) + -> Unix.file_descr + -> string + -> int64 + -> unit From 832eda569a6ecaea92538e3045d7ff1d9b862a18 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 17 Jul 2025 13:53:39 +0100 Subject: [PATCH 403/492] CP-52334 MVD - add -d option to mock driver-tool The API supports a driver deselect call for which Xapi calls the underlying driver-tool. The mock driver is missing the corresponding -d option, which this patch adds. This fixes an internal error that Xapi raises otherwise. An alternative would be to ignore the error from the driver-tool. Signed-off-by: Christian Lindig --- ocaml/xapi/xapi_host_driver_tool.ml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_host_driver_tool.ml b/ocaml/xapi/xapi_host_driver_tool.ml index 80fe5d208b1..0dd837dda4d 100644 --- a/ocaml/xapi/xapi_host_driver_tool.ml +++ b/ocaml/xapi/xapi_host_driver_tool.ml @@ -243,6 +243,15 @@ module Mock = struct set -o errexit set -o pipefail +function deselect { + cat <&2 #>&2 redirects error message to stderr exit 1 @@ -656,6 +670,11 @@ if $s_flag; then selection "$n_value" "$v_value" exit 0 fi + +if [ -n "$d_value" ]; then + deselect "$d_value" + exit 0 +fi |} let install () = From e8ade77f29bf70b5f1ab2b89663614f90a3f5793 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 13:16:56 +0100 Subject: [PATCH 404/492] xapi_vbd_helpers: Fix operation reporting when raising other_operation_in_progress Fix incorrect operations being reported - e.g. when other_operation_in_progress is reported only when there are non-parallelisable operations currently happening, we can't just report the first current op, we need to report the first *non-parallelisable* op. This required some refactoring, moving to options instead of bools, etc. Additionally report the blocking operation's reference when raising the error. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vbd_helpers.ml | 77 +++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/ocaml/xapi/xapi_vbd_helpers.ml b/ocaml/xapi/xapi_vbd_helpers.ml index 3e74dfe1f88..d23d161e988 100644 --- a/ocaml/xapi/xapi_vbd_helpers.ml +++ b/ocaml/xapi/xapi_vbd_helpers.ml @@ -42,7 +42,9 @@ type table = (API.vbd_operations, (string * string list) option) Hashtbl.t let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let _ref = Ref.string_of _ref' in let current_ops = - Listext.List.setify (List.map snd record.Db_actions.vBD_current_operations) + List.sort_uniq + (fun (_ref1, op1) (_ref2, op2) -> compare op1 op2) + record.Db_actions.vBD_current_operations in (* Policy: * current_ops must be empty [ will make exceptions later for eg eject/unplug of attached vbd ] @@ -74,30 +76,48 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let safe_to_parallelise = [`pause; `unpause] in (* Any current_operations preclude everything that isn't safe to parallelise *) ( if current_ops <> [] then - let concurrent_op = List.hd current_ops in + let concurrent_op_ref, concurrent_op_type = List.hd current_ops in set_errors Api_errors.other_operation_in_progress - ["VBD"; _ref; vbd_operations_to_string concurrent_op] + [ + "VBD" + ; _ref + ; vbd_operations_to_string concurrent_op_type + ; concurrent_op_ref + ] (Listext.List.set_difference all_ops safe_to_parallelise) ) ; (* If not all operations are parallisable then preclude pause *) - let all_are_parallelisable = - List.fold_left ( && ) true - (List.map (fun op -> List.mem op safe_to_parallelise) current_ops) + let non_parallelisable_op = + List.find_opt + (fun (_, op) -> not (List.mem op safe_to_parallelise)) + current_ops in (* If not all are parallelisable, ban the otherwise parallelisable operations too *) - if not all_are_parallelisable then - set_errors Api_errors.other_operation_in_progress - ["VBD"; _ref; vbd_operations_to_string (List.hd current_ops)] - [`pause] ; + ( match non_parallelisable_op with + | Some (concurrent_op_ref, concurrent_op_type) -> + set_errors Api_errors.other_operation_in_progress + [ + "VBD" + ; _ref + ; vbd_operations_to_string concurrent_op_type + ; concurrent_op_ref + ] + [`pause] + | None -> + () + ) ; + (* If something other than `pause `unpause *and* `attach (for VM.reboot, see CA-24282) then disallow unpause *) - if - Listext.List.set_difference current_ops (`attach :: safe_to_parallelise) - <> [] - then - set_errors Api_errors.other_operation_in_progress - ["VBD"; _ref; vbd_operations_to_string (List.hd current_ops)] - [`unpause] ; + let set_difference a b = List.filter (fun (_, x) -> not (List.mem x b)) a in + ( match set_difference current_ops (`attach :: safe_to_parallelise) with + | (op_ref, op_type) :: _ -> + set_errors Api_errors.other_operation_in_progress + ["VBD"; _ref; vbd_operations_to_string op_type; op_ref] + [`unpause] + | [] -> + () + ) ; (* Drives marked as not unpluggable cannot be unplugged *) if not record.Db_actions.vBD_unpluggable then set_errors Api_errors.vbd_not_unpluggable [_ref] [`unplug; `unplug_force] ; @@ -128,7 +148,10 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let bad_ops = [`plug; `unplug; `unplug_force] in (* However allow VBD pause and unpause if the VM is paused: *) let bad_ops' = - if power_state = `Paused then bad_ops else `pause :: `unpause :: bad_ops + if power_state = `Paused then + bad_ops + else + `pause :: `unpause :: bad_ops in set_errors Api_errors.vm_bad_power_state [Ref.string_of vm; expected; actual] @@ -226,17 +249,23 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = | _ -> true in - List.exists + List.find_opt (fun (_, operation) -> is_illegal_operation operation) vdi_record.Db_actions.vDI_current_operations in - ( if vdi_operations_besides_copy then - let concurrent_op = - snd (List.hd vdi_record.Db_actions.vDI_current_operations) - in + + ( match vdi_operations_besides_copy with + | Some (concurrent_op_ref, concurrent_op_type) -> set_errors Api_errors.other_operation_in_progress - ["VDI"; Ref.string_of vdi; vdi_operations_to_string concurrent_op] + [ + "VDI" + ; Ref.string_of vdi + ; vdi_operations_to_string concurrent_op_type + ; concurrent_op_ref + ] [`attach; `plug; `insert] + | None -> + () ) ; if (not record.Db_actions.vBD_currently_attached) && expensive_sharing_checks From d8a24ef54451755e79e18916478f10bcdf5abc9e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 11:41:35 +0100 Subject: [PATCH 405/492] xapi_vdi: Report more useful information when raising other_operation_in_progress Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vdi.ml | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 0f9904d72fb..624875c21e5 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -187,13 +187,18 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) let is_vdi_mirroring_in_progress = op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops in - if - List.exists (fun (_, op) -> op <> `copy) current_ops - && not is_vdi_mirroring_in_progress - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () + match + ( is_vdi_mirroring_in_progress + , List.find_opt (fun (_, op) -> op <> `copy) current_ops + ) + with + | false, Some (op_ref, op_type) -> + Error + ( Api_errors.other_operation_in_progress + , ["VDI"; _ref; API.vdi_operations_to_string op_type; op_ref] + ) + | _ -> + Ok () in let* () = if pbds_attached = [] && op = `resize then @@ -277,7 +282,15 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) mechanism of message forwarding and only use the event loop. *) my_has_current_operation_vbd_records <> [] && op <> `data_destroy then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + let op_ref, op_type = + List.hd + (List.hd my_has_current_operation_vbd_records) + .Db_actions.vBD_current_operations + in + Error + ( Api_errors.other_operation_in_progress + , ["VDI"; _ref; API.vbd_operations_to_string op_type; op_ref] + ) else Ok () in From 8015edbbeac10fe3301adfb8ecaa62d33f6fd113 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 11:51:50 +0100 Subject: [PATCH 406/492] xapi_{vif,vusb}_helpers: Report more useful information when raising other_operation_in_progress Instead of logging all the blocking operations with 'debug', just report them with the error. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vif_helpers.ml | 26 ++++++++++++++------------ ocaml/xapi/xapi_vusb_helpers.ml | 28 ++++++++++++++-------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index 5ab6f146339..34682f9aa78 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -50,18 +50,20 @@ let valid_operations ~__context record _ref' : table = in let vm = Db.VIF.get_VM ~__context ~self:_ref' in (* Any current_operations preclude everything else *) - if current_ops <> [] then ( - debug "No operations are valid because current-operations = [ %s ]" - (String.concat "; " - (List.map - (fun (task, op) -> task ^ " -> " ^ vif_operations_to_string op) - current_ops - ) - ) ; - let concurrent_op = snd (List.hd current_ops) in - set_errors Api_errors.other_operation_in_progress - ["VIF"; _ref; vif_operations_to_string concurrent_op] - all_ops + ( if current_ops <> [] then + let concurrent_op_refs, concurrent_op_types = + List.fold_left + (fun (refs, types) (ref, op) -> + (ref :: refs, vif_operations_to_string op :: types) + ) + ([], []) current_ops + in + let format x = Printf.sprintf "{%s}" (String.concat "; " x) in + let concurrent_op_refs = format concurrent_op_refs in + let concurrent_op_types = format concurrent_op_types in + set_errors Api_errors.other_operation_in_progress + ["VIF"; _ref; concurrent_op_types; concurrent_op_refs] + all_ops ) ; (* No hotplug on dom0 *) if Helpers.is_domain_zero ~__context vm then diff --git a/ocaml/xapi/xapi_vusb_helpers.ml b/ocaml/xapi/xapi_vusb_helpers.ml index 9b1870cf141..5c17c5e8130 100644 --- a/ocaml/xapi/xapi_vusb_helpers.ml +++ b/ocaml/xapi/xapi_vusb_helpers.ml @@ -16,8 +16,6 @@ open Xapi_stdext_std.Xstringext module D = Debug.Make (struct let name = "xapi_vusb_helpers" end) -open D - (**************************************************************************************) (* current/allowed operations checking *) @@ -48,18 +46,20 @@ let valid_operations ~__context record _ref' : table = ops in (* Any current_operations preclude everything else *) - if current_ops <> [] then ( - debug "No operations are valid because current-operations = [ %s ]" - (String.concat "; " - (List.map - (fun (task, op) -> task ^ " -> " ^ vusb_operations_to_string op) - current_ops - ) - ) ; - let concurrent_op = snd (List.hd current_ops) in - set_errors Api_errors.other_operation_in_progress - ["VUSB"; _ref; vusb_operations_to_string concurrent_op] - all_ops + ( if current_ops <> [] then + let concurrent_op_refs, concurrent_op_types = + List.fold_left + (fun (refs, types) (ref, op) -> + (ref :: refs, vusb_operations_to_string op :: types) + ) + ([], []) current_ops + in + let format x = Printf.sprintf "{%s}" (String.concat "; " x) in + let concurrent_op_refs = format concurrent_op_refs in + let concurrent_op_types = format concurrent_op_types in + set_errors Api_errors.other_operation_in_progress + ["VUSB"; _ref; concurrent_op_types; concurrent_op_refs] + all_ops ) ; let vm = Db.VUSB.get_VM ~__context ~self:_ref' in let power_state = Db.VM.get_power_state ~__context ~self:vm in From cbd5f1745a887e9941b76371d4542879c87b47f3 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 14:01:17 +0100 Subject: [PATCH 407/492] message_forwarding: Report more info when raising other_operation_in_progress Signed-off-by: Andrii Sultanov --- ocaml/xapi/message_forwarding.ml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 4c79f91cf5f..8adf9ea632e 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -5720,14 +5720,21 @@ functor if Helpers.i_am_srmaster ~__context ~sr then List.iter (fun vdi -> - if Db.VDI.get_current_operations ~__context ~self:vdi <> [] - then - raise - (Api_errors.Server_error - ( Api_errors.other_operation_in_progress - , [Datamodel_common._vdi; Ref.string_of vdi] - ) - ) + match Db.VDI.get_current_operations ~__context ~self:vdi with + | (op_ref, op_type) :: _ -> + raise + (Api_errors.Server_error + ( Api_errors.other_operation_in_progress + , [ + Datamodel_common._vdi + ; Ref.string_of vdi + ; API.vdi_operations_to_string op_type + ; op_ref + ] + ) + ) + | [] -> + () ) (Db.SR.get_VDIs ~__context ~self:sr) ; SR.mark_sr ~__context ~sr ~doc ~op From c5773da3bad0da72e51bf46ff4e9e2f688380070 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 14:25:15 +0100 Subject: [PATCH 408/492] xapi_pool_helpers: Report more info when raising other_operation_in_progress error Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_pool_helpers.ml | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/ocaml/xapi/xapi_pool_helpers.ml b/ocaml/xapi/xapi_pool_helpers.ml index 14f4c37d030..bdd4e0454b1 100644 --- a/ocaml/xapi/xapi_pool_helpers.ml +++ b/ocaml/xapi/xapi_pool_helpers.ml @@ -99,7 +99,7 @@ type validity = Unknown | Allowed | Disallowed of string * string list let compute_valid_operations ~__context record pool : API.pool_allowed_operations -> validity = let ref = Ref.string_of pool in - let current_ops = List.map snd record.Db_actions.pool_current_operations in + let current_ops = record.Db_actions.pool_current_operations in let table = (Hashtbl.create 32 : (all_operations, validity) Hashtbl.t) in let set_validity = Hashtbl.replace table in (* Start by assuming all operations are allowed. *) @@ -118,30 +118,45 @@ let compute_valid_operations ~__context record pool : in List.iter populate ops in - let other_operation_in_progress = - (Api_errors.other_operation_in_progress, [Datamodel_common._pool; ref]) + let other_operation_in_progress waiting_op = + let additional_info = + match waiting_op with + | Some (op_ref, op_type) -> + [API.pool_allowed_operations_to_string op_type; op_ref] + | _ -> + [] + in + ( Api_errors.other_operation_in_progress + , [Datamodel_common._pool; ref] @ additional_info + ) + in + let is_current_op op = + List.exists (fun (_, current_op) -> op = current_op) current_ops in - let is_current_op = Fun.flip List.mem current_ops in let blocking = List.find_opt (fun (op, _) -> is_current_op op) blocking_ops_table in - let waiting = List.find_opt is_current_op waiting_ops in + let waiting = + List.find_opt + (fun (_, current_op) -> List.mem current_op waiting_ops) + current_ops + in ( match (blocking, waiting) with - | Some (_, reason), _ -> + | Some (_, reason), waiting_current_op -> (* Mark all potentially blocking operations as invalid due to the specific blocking operation's "in progress" error. *) set_errors blocking_ops (reason, []) ; (* Mark all waiting operations as invalid for the generic "OTHER_OPERATION_IN_PROGRESS" reason. *) - set_errors waiting_ops other_operation_in_progress + set_errors waiting_ops (other_operation_in_progress waiting_current_op) (* Note that all_operations ⊆ blocking_ops ∪ waiting_ops, so this invalidates all operations (with the reason partitioned between whether the operation is blocking or waiting). *) - | None, Some _ -> + | None, (Some _ as waiting_current_op) -> (* If there's no blocking operation in current operations, but there is a waiting operation, invalidate all operations for the generic reason. Again, this covers every operation. *) - set_errors all_operations other_operation_in_progress + set_errors all_operations (other_operation_in_progress waiting_current_op) | None, None -> ( (* If there's no blocking or waiting operation in current operations (i.e. current operations is empty), we can report From 8923f7a6c6311c675e63af1d1dbb45d26377559d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 15:14:11 +0100 Subject: [PATCH 409/492] xapi_pif: Report more info when raising other_operation_in_progress error Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_pif.ml | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index 163e1f31d57..eaf4b37b8b9 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -926,17 +926,25 @@ let assert_cluster_host_operation_not_in_progress ~__context = match Db.Cluster.get_all ~__context with | [] -> () - | cluster :: _ -> - let ops = - Db.Cluster.get_current_operations ~__context ~self:cluster - |> List.map snd - in - if List.mem `enable ops || List.mem `add ops then - raise - Api_errors.( - Server_error - (other_operation_in_progress, ["Cluster"; Ref.string_of cluster]) - ) + | cluster :: _ -> ( + let ops = Db.Cluster.get_current_operations ~__context ~self:cluster in + match List.find_opt (fun (_, op) -> op = `enable || op = `add) ops with + | Some (op_ref, op_type) -> + raise + Api_errors.( + Server_error + ( other_operation_in_progress + , [ + "Cluster" + ; Ref.string_of cluster + ; API.cluster_operation_to_string op_type + ; op_ref + ] + ) + ) + | None -> + () + ) (* Block allowing unplug if - a cluster host is enabled on this PIF From 443a317c4df32218794444b588dbf77cdbe73c41 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 15:19:09 +0100 Subject: [PATCH 410/492] xapi_vm_appliance_lifecycle: Report more info when raising other_operation_in_progress error Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vm_appliance_lifecycle.ml | 72 +++++++++++++---------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/ocaml/xapi/xapi_vm_appliance_lifecycle.ml b/ocaml/xapi/xapi_vm_appliance_lifecycle.ml index 330d028cf1c..765fd9c3568 100644 --- a/ocaml/xapi/xapi_vm_appliance_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_appliance_lifecycle.ml @@ -18,38 +18,48 @@ let check_operation_error ~__context record self op = let _ref = Ref.string_of self in let current_ops = record.Db_actions.vM_appliance_current_operations in (* Only allow one operation of [`start | `clean_shutdown | `hard_shutdown | `shutdown ] at a time. *) - if current_ops <> [] then - Some (Api_errors.other_operation_in_progress, ["VM_appliance"; _ref]) - else - let vms = Db.VM_appliance.get_VMs ~__context ~self in - if vms = [] then - Some (Api_errors.operation_not_allowed, ["Appliance has no VMs."]) - else (* Allow the op if any VMs are in a state where the op makes sense. *) - let power_states = - List.map (fun vm -> Db.VM.get_power_state ~__context ~self:vm) vms - in - let predicate, error = - match op with - (* Can start if any are halted. *) - | `start -> - ( (fun power_state -> power_state = `Halted) - , "There are no halted VMs in this appliance." - ) - (* Can clean_shutdown if any are running. *) - | `clean_shutdown -> - ( (fun power_state -> power_state = `Running) - , "There are no running VMs in this appliance." - ) - (* Can hard_shutdown/shutdown if any are not halted. *) - | `hard_shutdown | `shutdown -> - ( (fun power_state -> power_state <> `Halted) - , "All VMs in this appliance are halted." - ) - in - if List.exists predicate power_states then - None + match current_ops with + | (op_ref, op_type) :: _ -> + Some + ( Api_errors.other_operation_in_progress + , [ + "VM_appliance" + ; _ref + ; API.vm_appliance_operation_to_string op_type + ; op_ref + ] + ) + | [] -> + let vms = Db.VM_appliance.get_VMs ~__context ~self in + if vms = [] then + Some (Api_errors.operation_not_allowed, ["Appliance has no VMs."]) else - Some (Api_errors.operation_not_allowed, [error]) + (* Allow the op if any VMs are in a state where the op makes sense. *) + let power_states = + List.map (fun vm -> Db.VM.get_power_state ~__context ~self:vm) vms + in + let predicate, error = + match op with + (* Can start if any are halted. *) + | `start -> + ( (fun power_state -> power_state = `Halted) + , "There are no halted VMs in this appliance." + ) + (* Can clean_shutdown if any are running. *) + | `clean_shutdown -> + ( (fun power_state -> power_state = `Running) + , "There are no running VMs in this appliance." + ) + (* Can hard_shutdown/shutdown if any are not halted. *) + | `hard_shutdown | `shutdown -> + ( (fun power_state -> power_state <> `Halted) + , "All VMs in this appliance are halted." + ) + in + if List.exists predicate power_states then + None + else + Some (Api_errors.operation_not_allowed, [error]) let assert_operation_valid ~__context ~self ~(op : API.vm_appliance_operation) = let all = Db.VM_appliance.get_record_internal ~__context ~self in From 0b843b60c95f76f873679b7a5e5c186a46a96283 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 15:22:11 +0100 Subject: [PATCH 411/492] xapi_vbd: Report more useful info when raising other_operation_in_progress error Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vbd.ml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_vbd.ml b/ocaml/xapi/xapi_vbd.ml index cf7ab173882..331284eb344 100644 --- a/ocaml/xapi/xapi_vbd.ml +++ b/ocaml/xapi/xapi_vbd.ml @@ -184,19 +184,26 @@ let create ~__context ~vM ~vDI ~device ~userdevice ~bootable ~mode ~_type (* CA-75697: Disallow VBD.create on a VM that's in the middle of a migration *) debug "Checking whether there's a migrate in progress..." ; let vm_current_ops = - Xapi_stdext_std.Listext.List.setify - (List.map snd (Db.VM.get_current_operations ~__context ~self:vM)) + List.sort_uniq + (fun (_ref1, op1) (_ref2, op2) -> compare op1 op2) + (Db.VM.get_current_operations ~__context ~self:vM) in + let migrate_ops = [`migrate_send; `pool_migrate] in let migrate_ops_in_progress = - List.filter (fun op -> List.mem op vm_current_ops) migrate_ops + List.filter (fun (_, op) -> List.mem op migrate_ops) vm_current_ops in match migrate_ops_in_progress with - | op :: _ -> + | (op_ref, op_type) :: _ -> raise (Api_errors.Server_error ( Api_errors.other_operation_in_progress - , ["VM"; Ref.string_of vM; Record_util.vm_operation_to_string op] + , [ + "VM" + ; Ref.string_of vM + ; Record_util.vm_operation_to_string op_type + ; op_ref + ] ) ) | _ -> From d797d797d3f4dae14682b08bf9c185d817645530 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 14 Jul 2025 09:12:36 +0100 Subject: [PATCH 412/492] xapi_host_helpers: Report more useful info when raising other_operation_in_progress error Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_host_helpers.ml | 65 ++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index 7b9ac9d7a2e..3523ceaefcf 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -31,7 +31,7 @@ let all_operations = API.host_allowed_operations__all (** Returns a table of operations -> API error options (None if the operation would be ok) *) let valid_operations ~__context record _ref' = let _ref = Ref.string_of _ref' in - let current_ops = List.map snd record.Db_actions.host_current_operations in + let current_ops = record.Db_actions.host_current_operations in let table = Hashtbl.create 10 in List.iter (fun x -> Hashtbl.replace table x None) all_operations ; let set_errors (code : string) (params : string list) @@ -49,40 +49,53 @@ let valid_operations ~__context record _ref' = let is_creating_new x = List.mem x [`provision; `vm_resume; `vm_migrate] in let is_removing x = List.mem x [`evacuate; `reboot; `shutdown] in let creating_new = - List.fold_left (fun acc op -> acc || is_creating_new op) false current_ops - in - let removing = - List.fold_left (fun acc op -> acc || is_removing op) false current_ops + List.find_opt (fun (_, op) -> is_creating_new op) current_ops in + let removing = List.find_opt (fun (_, op) -> is_removing op) current_ops in List.iter (fun op -> - if (is_creating_new op && removing) || (is_removing op && creating_new) - then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string (List.hd current_ops)] - [op] + match (is_creating_new op, removing, is_removing op, creating_new) with + | true, Some (op_ref, op_type), _, _ | _, _, true, Some (op_ref, op_type) + -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string op_type; op_ref] + [op] + | _ -> + () ) (List.filter (fun x -> x <> `power_on) all_operations) ; (* reboot, shutdown and apply_updates cannot run concurrently *) - if List.mem `reboot current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `reboot] - [`shutdown; `apply_updates] ; - if List.mem `shutdown current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `shutdown] - [`reboot; `apply_updates] ; - if List.mem `apply_updates current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `apply_updates] - [`reboot; `shutdown; `enable] ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `reboot; op_ref] + [`shutdown; `apply_updates] + ) + (List.find_opt (fun (_, op) -> op = `reboot) current_ops) ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `shutdown; op_ref] + [`reboot; `apply_updates] + ) + (List.find_opt (fun (_, op) -> op = `shutdown) current_ops) ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `apply_updates; op_ref] + [`reboot; `shutdown; `enable] + ) + (List.find_opt (fun (_, op) -> op = `apply_updates) current_ops) ; (* Prevent more than one provision happening at a time to prevent extreme dom0 load (in the case of the debian template). Once the template becomes a 'real' template we can relax this. *) - if List.mem `provision current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `provision] - [`provision] ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `provision; op_ref] + [`provision] + ) + (List.find_opt (fun (_, op) -> op = `provision) current_ops) ; (* The host must be disabled before reboots or shutdowns are permitted *) if record.Db_actions.host_enabled then set_errors Api_errors.host_not_disabled [] From 3500de136e52a3f785520e3e0f612f5caf46c19a Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Jul 2025 15:30:11 +0100 Subject: [PATCH 413/492] xapi/helpers: Fix handling of other_operation_in_progress delays Not all of other_operation_in_progress error contain two objects in the list ([cls; objref]), some contain three or four, but the match would miss them and they would be put into a non-interruptible Thread.sleep instead of the interruptible Delay.sleep that provides a mechanism for the other task to wake us up on its way out. Signed-off-by: Andrii Sultanov --- ocaml/xapi/helpers.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index aff1b815566..ddb957d119a 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -1685,7 +1685,7 @@ module Repeat_with_uniform_backoff : POLICY = struct debug "Waiting for up to %f seconds before retrying..." this_timeout ; let start = Unix.gettimeofday () in ( match e with - | Api_errors.Server_error (code, [cls; objref]) + | Api_errors.Server_error (code, cls :: objref :: _) when code = Api_errors.other_operation_in_progress -> Early_wakeup.wait (cls, objref) this_timeout | _ -> From 615aa21da57bbe974152113b7828c613ba9ccef8 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 14 Jul 2025 09:40:42 +0100 Subject: [PATCH 414/492] idl/datamodel_errors: Add operation_{type,ref} to other_operation_in_progress Most of the users report these when appropriate, so document them here. Signed-off-by: Andrii Sultanov --- ocaml/idl/datamodel_errors.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 20acb06f60b..4d019ce0564 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -538,7 +538,8 @@ let _ = ~doc:"You attempted an operation on a VM which is not suspendable." () ; error Api_errors.vm_is_template ["vm"] ~doc:"The operation attempted is not valid for a template VM" () ; - error Api_errors.other_operation_in_progress ["class"; "object"] + error Api_errors.other_operation_in_progress + ["class"; "object"; "operation_type"; "operation_ref"] ~doc:"Another operation involving the object is currently in progress" () ; error Api_errors.vbd_not_removable_media ["vbd"] ~doc:"Media could not be ejected because it is not removable" () ; From 5e895af2aba30e86b4aaaa0febf80c30be5c67fb Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 14 Jul 2025 09:42:49 +0100 Subject: [PATCH 415/492] Adjust tests after other_operation_in_progress refactoring quality-gate: Reduce expected List.hd count test_clustering: test for the blocking operation to be reported Signed-off-by: Andrii Sultanov --- ocaml/tests/test_clustering.ml | 18 ++++++++++++++---- quality-gate.sh | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ocaml/tests/test_clustering.ml b/ocaml/tests/test_clustering.ml index 9be97c5fdb5..edd33cb6025 100644 --- a/ocaml/tests/test_clustering.ml +++ b/ocaml/tests/test_clustering.ml @@ -581,11 +581,21 @@ let test_disallow_unplug_during_cluster_host_create () = let key = Context.get_task_id __context |> Ref.string_of in Db.Cluster.add_to_current_operations ~__context ~self:cluster ~key ~value in - let check_disallow_unplug_false_fails self msg = + let check_disallow_unplug_false_fails self op msg = + let op_ref, _ = + List.hd (Db.Cluster.get_current_operations ~__context ~self:cluster) + in Alcotest.check_raises msg Api_errors.( Server_error - (other_operation_in_progress, ["Cluster"; Ref.string_of cluster]) + ( other_operation_in_progress + , [ + "Cluster" + ; Ref.string_of cluster + ; API.cluster_operation_to_string op + ; op_ref + ] + ) ) (fun () -> Xapi_pif.set_disallow_unplug ~__context ~self ~value:false) in @@ -598,14 +608,14 @@ let test_disallow_unplug_during_cluster_host_create () = let test_with_current op = Xapi_pif.set_disallow_unplug ~__context ~self:pIF ~value:true ; add_op op ; - check_disallow_unplug_false_fails pIF + check_disallow_unplug_false_fails pIF op "disallow_unplug cannot be set to false during cluster_host creation or \ enable on same PIF" ; let other_pif = T.make_pif ~__context ~network ~host () in check_successful_disallow_unplug true other_pif "Should always be able to set disallow_unplug:true regardless of \ clustering operations" ; - check_disallow_unplug_false_fails other_pif + check_disallow_unplug_false_fails other_pif op "disallow_unplug cannot be set to false during cluster_host creation or \ enable on any PIF" ; let key = Context.get_task_id __context |> Ref.string_of in diff --git a/quality-gate.sh b/quality-gate.sh index 7591e3c4ff4..6f3a72b30a1 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=279 + N=274 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From 1a4a3395d8eeba1810e3a8ff4c66281f159b3507 Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Fri, 18 Jul 2025 09:07:19 +0000 Subject: [PATCH 416/492] CA-412420: Set vdi-type When Create snapshot on SMAPIv3 SR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the SMAPIv3 migration prepare step, it will create the VDI on the destination based on the source. The source retrieves vdi_info via copy_into_sr → find_vdi → SR.scan2 → sr_scan2_impl → COWVolume.ls, and sr_ls extracts all vdi_info from the database. However, when creating a snapshot with COWVolume.snapshot on V3, the vdi-type is not stored in the database, which causes getting an empty vdi-type and the migration prepare step to fail and result the folllowing error: `\'INTERNAL_ERROR\', \'Failure("Unknown tag/contents")\']]);S()]]]))]))]))])` This change adds a step to set vdi-type from the original VDI to the snapshot VDI to prevent this issue. Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/xapi-storage-script/main.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index e04a93203b3..1eccd3867fd 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1447,6 +1447,9 @@ module VDIImpl (M : META) = struct set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key ~key:_vdi_content_id_key ~value:vdi_info.content_id >>>= fun () -> + set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key + ~key:_vdi_type_key ~value:vdi_info.ty + >>>= fun () -> let response = { (vdi_of_volume response) with From 5d179f9dbd62e501800799da9578c628d1cb404f Mon Sep 17 00:00:00 2001 From: Guillaume Date: Wed, 16 Jul 2025 16:12:45 +0200 Subject: [PATCH 417/492] [doc] add documentation about tracing It is a quick overview of tracing and how to enable it in XAPI. Signed-off-by: Guillaume Signed-off-by: Mathieu Labourier --- .../toolstack/features/Tracing/index.md | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 doc/content/toolstack/features/Tracing/index.md diff --git a/doc/content/toolstack/features/Tracing/index.md b/doc/content/toolstack/features/Tracing/index.md new file mode 100644 index 00000000000..c54441bbb68 --- /dev/null +++ b/doc/content/toolstack/features/Tracing/index.md @@ -0,0 +1,137 @@ ++++ +title = "Tracing" ++++ + +Tracing is a powerful tool for observing system behavior across multiple components, making it especially +useful for debugging and performance analysis in complex environments. + +By integrating OpenTelemetry (a standard that unifies OpenTracing and OpenCensus) and the Zipkin v2 protocol, +XAPI enables efficient tracking and visualization of operations across internal and external systems. +This facilitates detailed analysis and improves collaboration between teams. + +Tracing is commonly used in high-level applications such as web services. As a result, less widely-used or +non-web-oriented languages may lack dedicated libraries for distributed tracing (An OCaml implementation +has been developed specifically for XenAPI). + +# How tracing works in XAPI + +## Spans and Trace Context + +- A *span* is the core unit of a trace, representing a single operation with a defined start and end time. + Spans can contain sub-spans that represent child tasks. This helps identify bottlenecks or areas that + can be parallelized. + - A span can contain several contextual elements such as *tags* (key-value pairs), + *events* (time-based data), and *errors*. +- The *TraceContext* HTTP standard defines how trace IDs and span contexts are propagated across systems, + enabling full traceability of operations. + +This data enables the creation of relationships between tasks and supports visualizations such as +architecture diagrams or execution flows. These help in identifying root causes of issues and bottlenecks, +and also assist newcomers in onboarding to the project. + +## Configuration + +- To enable tracing, you need to create an *Observer* object in XAPI. This can be done using the *xe* CLI: + ```sh + xe observer-create \ + name-label= \ + enabled=true \ + components=xapi,xenopsd \ + ``` +- By default, if you don't specify `enabled=true`, the observer will be disabled. +- To add an HTTP endpoint, make sure the server is up and running, then run: + ```sh + xe observer-param-set uuid= endpoints=bugtool,http://:9411/api/v2/spans + ``` + If you specify an invalid or unreachable HTTP endpoint, the configuration will fail. +- **components**: Specify which internal components (e.g., *xapi*, *xenopsd*) should be traced. + Additional components are expected to be supported in future releases. An experimental *smapi* component + is also available and requires additional configuration (explained below). + +- **endpoints**: The observer can collect traces locally in */var/log/dt* or forward them to external + visualization tools such as [Jaeger](https://www.jaegertracing.io/). Currently, only HTTP/S endpoints + are supported, and they require additional configuration steps (see next section). + +- To disable tracing you just need to set *enabled* to false: + ```sh + xe observer-param-set uuid= enabled=false + ``` + +### Enabling smapi component + +- *smapi* component is currently considered experimental and is filtered by default. To enable it, you must + explicitly configure the following in **xapi.conf**: + ```ini + observer-experimental-components="" + ``` + This tells XAPI that no components are considered experimental, thereby allowing *smapi* to be traced. + A modification to **xapi.conf** requires a restart of the XAPI toolstack. + +### Enabling HTTP/S endpoints + +- By default HTTP and HTTPS endpoints are disabled. To enable them, add the following lines to **xapi.conf**: + ```ini + observer-endpoint-http-enabled=true + observer-endpoint-https-enabled=true + ``` + As with enabling *smapi* component, modifying **xapi.conf** requires a restart of the XAPI toolstack. + *Note*: HTTPS endpoint support is available but not tested and may not work. + +### Sending local trace to endpoint + +By default, traces are generated locally in the `/var/log/dt` directory. You can copy or forward +these traces to another location or endpoint using the `xs-trace` tool. For example, if you have +a *Jaeger* server running locally, you can run: + +```sh +xs-trace /var/log/dt/ http://127.0.0.1:9411/api/v2/spans +``` + +You will then be able to visualize the traces in Jaeger. + +### Tagging Trace Sessions for Easier Search + +#### Specific attributes +To make trace logs easier to locate and analyze, it can be helpful to add custom attributes around the +execution of specific commands. For example: + +```sh +# xe observer-param-set uuid= attributes:custom.random=1234 +# xe vm-start ... +# xe observer-param-clear uuid= param-name=attributes param-key=custom.random +``` + +This technique adds a temporary attribute, *custom.random=1234*, which will appear in the generated trace +spans, making it easier to search for specific activity in trace visualisation tools. It may also be possible +to achieve similar tagging using baggage parameters directly in individual *xe* commands, but this approach +is currently undocumented. + +#### Baggage + +*Baggage*, contextual information that resides alongside the context, is supported. This means you can run +the following command: + +```sh +BAGGAGE="mybaggage=apples" xe vm-list +``` + +You will be able to search for tags `mybaggage=apples`. + +#### Traceparent + +Another way to assist in trace searching is to use the `TRACEPARENT` HTTP header. It is an HTTP header field that +identifies the incoming request. It has a [specific format](https://www.w3.org/TR/trace-context/#traceparent-header) +and it is supported by **XAPI**. Once generated you can run command as: + +```sh +TRACEPARENT="00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01" xe vm-list +``` + +And you will be able to look for trace *4bf92f3577b34da6a3ce929d0e0e4736*. + +### Links + +- [Opentelemetry](https://opentelemetry.io/) +- [Trace Context](https://www.w3.org/TR/trace-context/) +- [Baggage](https://opentelemetry.io/docs/concepts/signals/baggage/) +- [Ocaml opentelemetry module](https://ocaml.org/p/opentelemetry/latest) From ea84aaee146299d946e21aec9d93568550537963 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Thu, 10 Jul 2025 07:21:40 +0100 Subject: [PATCH 418/492] CA-413424: Enhance xe help output The previous `xe` help is as below: ``` Usage: xe [-s server] [-p port] ([-u username] [-pw password] or [-pwf ]) [--traceparent traceparent] A full list of commands can be obtained by running xe help -s -p ``` The previous `xe` help output lacked debug-related options and did not provide detailed parameter description. The new `xe` help output is as follows: ``` Usage: xe [ -s ] XenServer host [ -p ] XenServer port number [ -u -pw | -pwf ] User authentication (password or file) [ --nossl ] Disable SSL/TLS [ --debug ] Enable debug output [ --debug-on-fail ] Enable debug output only on failure [ --traceparent ] Distributed tracing context [ ... ] Command-specific options A full list of commands can be obtained by running xe help -s -p ``` Signed-off-by: Bengang Yuan --- ocaml/xapi-cli-server/cli_frontend.ml | 64 +++++++++++++-------------- ocaml/xe-cli/newcli.ml | 38 ++++++++-------- 2 files changed, 52 insertions(+), 50 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index d6b553567e4..2b10741fc9c 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -4047,6 +4047,26 @@ let rio_help printer minimal cmd = let cmds = List.sort (fun (name1, _) (name2, _) -> compare name1 name2) cmds in + let help = + Printf.sprintf + {|Usage: + %s + [ -s ] XenServer host + [ -p ] XenServer port number + [ -u -pw | -pwf ] + User authentication (password or file) + [ --nossl ] Disable SSL/TLS + [ --debug ] Enable debug output + [ --debug-on-fail ] Enable debug output only on failure + [ --traceparent ] Distributed tracing context + [ ... ] Command-specific options + +To get help on a specific command: + %s help + +|} + cmd.argv0 cmd.argv0 + in if List.mem_assoc "all" cmd.params && List.assoc "all" cmd.params = "true" then let cmds = List.map fst cmds in @@ -4056,20 +4076,9 @@ let rio_help printer minimal cmd = let vm_cmds, other = List.partition (fun n -> Astring.String.is_prefix ~affix:"vm-" n) other in - let h = - "Usage: " - ^ cmd.argv0 - ^ " [-s server] [-pw passwd] [-p port] [-u user] [-pwf \ - password-file]\n" - in - let h = h ^ " [command specific arguments]\n\n" in - let h = - h - ^ "To get help on a specific command: " - ^ cmd.argv0 - ^ " help \n\n" - in - let h = h ^ "Full command list\n-----------------" in + let h = help ^ {|Full command list +----------------- +|} in if minimal then printer (Cli_printer.PList cmds) else ( @@ -4086,25 +4095,16 @@ let rio_help printer minimal cmd = in let cmds = List.map fst cmds in let h = - "Usage: " - ^ cmd.argv0 - ^ " [-s server] [-pw passwd] [-p port] [-u user] [-pwf \ - password-file]\n" - in - let h = h ^ " [command specific arguments]\n\n" in - let h = - h - ^ "To get help on a specific command: " - ^ cmd.argv0 - ^ " help \n" - in - let h = - h - ^ "To get a full listing of commands: " - ^ cmd.argv0 - ^ " help --all\n\n" + help + ^ Printf.sprintf + {|To get a full listing of commands: + %s help --all + +Common command list +------------------- +|} + cmd.argv0 in - let h = h ^ "Common command list\n-------------------" in if minimal then printer (Cli_printer.PList cmds) else ( diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index 6d32834c524..60ecce2a47d 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -66,24 +66,26 @@ let debug fmt = exception Usage let usage () = - error - "Usage:\n\ - \ %s \n\ - \ [ -s ] XenServer host \n\ - \ [ -p ] XenServer port number \n\ - \ [ -u -pw | -pwf ] \n\ - \ User authentication (password or file) \n\ - \ [ --nossl ] Disable SSL/TLS \n\ - \ [ --debug ] Enable debug output \n\ - \ [ --debug-on-fail ] Enable debug output only on failure \n\ - \ [ --traceparent ] Distributed tracing context \n\ - \ [ ... ] Command-specific options \n" - Sys.argv.(0) ; - error - "\n\ - A full list of commands can be obtained by running \n\ - \ %s help -s -p \n" - Sys.argv.(0) + let help = + Printf.sprintf + {|Usage: + %s + [ -s ] XenServer host + [ -p ] XenServer port number + [ -u -pw | -pwf ] + User authentication (password or file) + [ --nossl ] Disable SSL/TLS + [ --debug ] Enable debug output + [ --debug-on-fail ] Enable debug output only on failure + [ --traceparent ] Distributed tracing context + [ ... ] Command-specific options + +A full list of commands can be obtained by running + %s help -s -p +|} + Sys.argv.(0) Sys.argv.(0) + in + error "%s" help let is_localhost ip = ip = "127.0.0.1" From ca5a545aeca1258bee2edfedc084715e6dbb832a Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Tue, 22 Jul 2025 03:09:54 +0000 Subject: [PATCH 419/492] CP-54480 Update release number for ssh_auto_mode Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- ocaml/idl/datamodel_host.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 03f6f606230..97d35adf95a 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1339,7 +1339,7 @@ let create_params = param_type= Bool ; param_name= "ssh_auto_mode" ; param_doc= "True if SSH auto mode is enabled for the host" - ; param_release= numbered_release "25.14.0-next" + ; param_release= numbered_release "25.26.0-next" ; param_default= Some (VBool Constants.default_ssh_auto_mode) } ] From 6732c89702908c5700795729f8bb937c71f31fff Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Mon, 21 Jul 2025 17:51:28 +0800 Subject: [PATCH 420/492] CA-413587: Checking feature for old FreeBSD driver The FreeBSD driver used by NetScaler supports all power actions. However, older versions of the FreeBSD driver do not explicitly advertise these support. As a result, xapi does not attempt to signal these power actions. To address this as a workaround, all power actions should be permitted for FreeBSD guests. Additionally, virtual machines with an explicit `data/cant_suspend_reason` set aren't allowed to suspend, which would crash Windows and other UEFI VMs. Signed-off-by: Bengang Yuan --- ocaml/xapi/xapi_vm_lifecycle.ml | 69 ++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 6db1c70a84c..4daa9c3b56b 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -173,45 +173,58 @@ let has_definitely_booted_pv ~vmmr = ) (** Return an error iff vmr is an HVM guest and lacks a needed feature. + + * Note: The FreeBSD driver used by NetScaler supports all power actions. + * However, older versions of the FreeBSD driver do not explicitly advertise + * these support. As a result, xapi does not attempt to signal these power + * actions. To address this as a workaround, all power actions should be + * permitted for FreeBSD guests. + + * Additionally, VMs with an explicit `data/cant_suspend_reason` set aren't + * allowed to suspend, which would crash Windows and other UEFI VMs. + * The "strict" param should be true for determining the allowed_operations list * (which is advisory only) and false (more permissive) when we are potentially about * to perform an operation. This makes a difference for ops that require the guest to * react helpfully. *) let check_op_for_feature ~__context ~vmr:_ ~vmmr ~vmgmr ~power_state ~op ~ref ~strict = - if + let implicit_support = power_state <> `Running (* PV guests offer support implicitly *) || has_definitely_booted_pv ~vmmr - then - None - else - let some_err e = Some (e, [Ref.string_of ref]) in - let lack_feature feature = not (has_feature ~vmgmr ~feature) in - match op with - | `clean_shutdown - when strict - && lack_feature "feature-shutdown" - && lack_feature "feature-poweroff" -> - some_err Api_errors.vm_lacks_feature - | `clean_reboot - when strict - && lack_feature "feature-shutdown" - && lack_feature "feature-reboot" -> - some_err Api_errors.vm_lacks_feature - | `changing_VCPUs_live when lack_feature "feature-vcpu-hotplug" -> + || Xapi_pv_driver_version.(has_pv_drivers (of_guest_metrics vmgmr)) + (* Full PV drivers imply all features *) + in + let some_err e = Some (e, [Ref.string_of ref]) in + let lack_feature feature = not (has_feature ~vmgmr ~feature) in + match op with + | `suspend | `checkpoint | `pool_migrate | `migrate_send -> ( + match get_feature ~vmgmr ~feature:"data-cant-suspend-reason" with + | Some reason -> + Some (Api_errors.vm_non_suspendable, [Ref.string_of ref; reason]) + | None + when (not implicit_support) && strict && lack_feature "feature-suspend" -> some_err Api_errors.vm_lacks_feature - | `suspend | `checkpoint | `pool_migrate | `migrate_send -> ( - match get_feature ~vmgmr ~feature:"data-cant-suspend-reason" with - | Some reason -> - Some (Api_errors.vm_non_suspendable, [Ref.string_of ref; reason]) - | None when strict && lack_feature "feature-suspend" -> - some_err Api_errors.vm_lacks_feature - | None -> - None - ) - | _ -> + | None -> None + ) + | _ when implicit_support -> + None + | `clean_shutdown + when strict + && lack_feature "feature-shutdown" + && lack_feature "feature-poweroff" -> + some_err Api_errors.vm_lacks_feature + | `clean_reboot + when strict + && lack_feature "feature-shutdown" + && lack_feature "feature-reboot" -> + some_err Api_errors.vm_lacks_feature + | `changing_VCPUs_live when lack_feature "feature-vcpu-hotplug" -> + some_err Api_errors.vm_lacks_feature + | _ -> + None (* N.B. In the pattern matching above, "pat1 | pat2 | pat3" counts as * one pattern, and the whole thing can be guarded by a "when" clause. *) From b574b9a93fc11dfeceec7eb034631ffc8d225ea9 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 11 Jul 2025 11:16:15 +0100 Subject: [PATCH 421/492] CP-308455 VM.sysprep CA-414158 wait for "action" key to disappear The code misses the timeout exception becaue of we are using a module alias. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index bebffe47edc..f010e7b1fa4 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -209,7 +209,6 @@ let find_vdi ~__context ~label = file. *) let trigger ~domid ~uuid ~timeout = let open Ezxenstore_core.Xenstore in - let module Watch = Ezxenstore_core.Watch in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in let domain = Printf.sprintf "/local/domain/%Ld" domid in with_xs (fun xs -> @@ -219,17 +218,19 @@ let trigger ~domid ~uuid ~timeout = debug "%s: notified domain %Ld" __FUNCTION__ domid ; try (* wait for sysprep to start, then domain to dissapear *) - Watch.( + Ezxenstore_core.Watch.( wait_for ~xs ~timeout:5.0 (value_to_become (control // "action") "running") ) ; debug "%s: sysprep is runnung; waiting for sysprep to finish" __FUNCTION__ ; - Watch.(wait_for ~xs ~timeout (key_to_disappear (control // "action"))) ; + Ezxenstore_core.Watch.( + wait_for ~xs ~timeout (key_to_disappear (control // "action")) + ) ; debug "%s sysprep is finished" __FUNCTION__ ; - Watch.(wait_for ~xs ~timeout (key_to_disappear domain)) ; + Ezxenstore_core.Watch.(wait_for ~xs ~timeout (key_to_disappear domain)) ; true - with Watch.Timeout _ -> + with Ezxenstore_core.Watch.Timeout _ -> debug "%s: sysprep timeout" __FUNCTION__ ; false ) From fa24a4c0f8f5adca806910c01316ac0b6b2b37f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 24 Jul 2025 14:17:41 +0100 Subject: [PATCH 422/492] Disable SARIF upload for now: they are rejected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- .github/workflows/codechecker.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/codechecker.yml b/.github/workflows/codechecker.yml index da8ea12c005..8908822a16e 100644 --- a/.github/workflows/codechecker.yml +++ b/.github/workflows/codechecker.yml @@ -38,7 +38,7 @@ jobs: opam pin add -y dune-compiledb https://github.com/edwintorok/dune-compiledb/releases/download/0.6.0/dune-compiledb-0.6.0.tbz - name: Trim dune cache - run: opam exec -- dune cache trim --size=2GiB + run: opam exec -- dune cache trim --size=2GiB - name: Generate compile_commands.json run: opam exec -- make compile_commands.json @@ -73,7 +73,10 @@ jobs: name: codechecker_sarif path: codechecker.sarif - - name: Upload SARIF report - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: codechecker.sarif + # TODO: reenable after fixing + # https://github.blog/changelog/2025-07-21-code-scanning-will-stop-combining-multiple-sarif-runs-uploaded-in-the-same-sarif-file/ + # + #- name: Upload SARIF report + # uses: github/codeql-action/upload-sarif@v3 + # with: + # sarif_file: codechecker.sarif From b014ce1b16f9b5136ccac486f0beb223521c4b61 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 24 Jul 2025 13:46:19 +0000 Subject: [PATCH 423/492] CP-308455 VM.sysprep CA-414158 wait for "action" key to disappear (#6604) Change when a CD is ejected because ideally the VM is still running at this point: * wait for sysprep no longer being reported as running * eject Make sure we still eject the CD if we hit a timeout before reaching that point. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 47 +++++++++++++++++++++++---------------- ocaml/xapi/vm_sysprep.mli | 1 + ocaml/xapi/xapi_vm.ml | 2 ++ 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index f010e7b1fa4..d8c6e213395 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -28,6 +28,7 @@ type error = | API_not_enabled | Other of string | VM_CDR_not_found + | VM_CDR_eject | VM_misses_feature | VM_not_running | VM_sysprep_timeout @@ -205,9 +206,18 @@ let find_vdi ~__context ~label = warn "%s: more than one VDI with label %s" __FUNCTION__ label ; vdi +(* Ejecting the CD/VDI/ISO may fail with a timeout *) +let eject ~rpc ~session_id ~vbd ~iso = + try + Client.VBD.eject ~rpc ~session_id ~vbd ; + Sys.remove iso + with exn -> + warn "%s: ejecting CD failed: %s" __FUNCTION__ (Printexc.to_string exn) ; + fail VM_CDR_eject + (** notify the VM with [domid] to run sysprep and where to find the file. *) -let trigger ~domid ~uuid ~timeout = +let trigger ~rpc ~session_id ~domid ~uuid ~timeout ~vbd ~iso = let open Ezxenstore_core.Xenstore in let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in let domain = Printf.sprintf "/local/domain/%Ld" domid in @@ -217,17 +227,21 @@ let trigger ~domid ~uuid ~timeout = xs.Xs.write (control // "action") "sysprep" ; debug "%s: notified domain %Ld" __FUNCTION__ domid ; try - (* wait for sysprep to start, then domain to dissapear *) - Ezxenstore_core.Watch.( - wait_for ~xs ~timeout:5.0 - (value_to_become (control // "action") "running") - ) ; - debug "%s: sysprep is runnung; waiting for sysprep to finish" - __FUNCTION__ ; - Ezxenstore_core.Watch.( - wait_for ~xs ~timeout (key_to_disappear (control // "action")) - ) ; - debug "%s sysprep is finished" __FUNCTION__ ; + finally + (fun () -> + (* wait for sysprep to start, then domain to dissapear *) + Ezxenstore_core.Watch.( + wait_for ~xs ~timeout:5.0 + (value_to_become (control // "action") "running") + ) ; + debug "%s: sysprep is running; waiting for sysprep to finish" + __FUNCTION__ ; + Ezxenstore_core.Watch.( + wait_for ~xs ~timeout (key_to_disappear (control // "action")) + ) + ) + (fun () -> eject ~rpc ~session_id ~vbd ~iso) ; + debug "%s waiting for domain to dissapear" __FUNCTION__ ; Ezxenstore_core.Watch.(wait_for ~xs ~timeout (key_to_disappear domain)) ; true with Ezxenstore_core.Watch.Timeout _ -> @@ -269,13 +283,8 @@ let sysprep ~__context ~vm ~unattend ~timeout = call ~__context @@ fun rpc session_id -> Client.VBD.insert ~rpc ~session_id ~vdi ~vbd ; Thread.delay !Xapi_globs.vm_sysprep_wait ; - match trigger ~domid ~uuid ~timeout with + match trigger ~rpc ~session_id ~domid ~uuid ~timeout ~vbd ~iso with | true -> - debug "%s: sysprep running, ejecting CD" __FUNCTION__ ; - Client.VBD.eject ~rpc ~session_id ~vbd ; - Sys.remove iso + () | false -> - debug "%s: sysprep timeout, ejecting CD" __FUNCTION__ ; - Client.VBD.eject ~rpc ~session_id ~vbd ; - Sys.remove iso ; fail VM_sysprep_timeout diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index 746c260badc..badac1379e5 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -17,6 +17,7 @@ type error = | API_not_enabled | Other of string | VM_CDR_not_found + | VM_CDR_eject | VM_misses_feature | VM_not_running | VM_sysprep_timeout diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index eeaa9b99c91..d08e28c5fca 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1725,6 +1725,8 @@ let sysprep ~__context ~self ~unattend ~timeout = ) | exception Vm_sysprep.Sysprep VM_not_running -> raise Api_errors.(Server_error (sysprep, [uuid; "VM is not running"])) + | exception Vm_sysprep.Sysprep VM_CDR_eject -> + raise Api_errors.(Server_error (sysprep, [uuid; "VM failed to eject CD"])) | exception Vm_sysprep.Sysprep VM_sysprep_timeout -> raise Api_errors.( From 8d94d0251dd5318bd79aeb669350055ec55efe03 Mon Sep 17 00:00:00 2001 From: "Lunfan Zhang[Lunfan.Zhang]" Date: Tue, 29 Jul 2025 09:20:43 +0000 Subject: [PATCH 424/492] CP-309064 Add SSH Management feature design Signed-off-by: Lunfan Zhang[Lunfan.Zhang] --- doc/content/toolstack/features/SSH/index.md | 249 ++++++++++++++++++ .../features/SSH/ssh-status-trans.png | Bin 0 -> 147400 bytes 2 files changed, 249 insertions(+) create mode 100644 doc/content/toolstack/features/SSH/index.md create mode 100644 doc/content/toolstack/features/SSH/ssh-status-trans.png diff --git a/doc/content/toolstack/features/SSH/index.md b/doc/content/toolstack/features/SSH/index.md new file mode 100644 index 00000000000..a0a7c937706 --- /dev/null +++ b/doc/content/toolstack/features/SSH/index.md @@ -0,0 +1,249 @@ +# SSH Management + +SSH Management enables programmatic control of SSH access to XenServer hosts. This feature +allows administrators to enable/disable SSH services, configure timeout settings, and implement +automatic SSH management based on XAPI health status. + +## Architecture Overview + +The SSH Management feature is built around three core components: + +1. **SSH Service Control**: Direct enable/disable operations for SSH on individual hosts or entire pools +2. **Timeout Management**: Configurable timeouts for both SSH sessions and service duration limits +3. **Auto Mode**: Intelligent SSH management that automatically adjusts based on XAPI health status + +![SSH Status Transition](ssh-status-trans.png) + +## SSH Service Control + +### API Design + +#### Host APIs + +- `host.enable_ssh`: Enables SSH access on the specified host +- `host.disable_ssh`: Disables SSH access on the specified host +- `host.set_ssh_enabled_timeout`: Configures SSH service timeout duration (0-172800 seconds, maximum 2 days) +- `host.set_console_idle_timeout`: Sets idle timeout for SSH/VNC console sessions +- `host.set_ssh_auto_mode`: Controls SSH auto mode behavior (when true, SSH is normally disabled but enabled during XAPI downtime) + +#### Pool APIs + +- `pool.enable_ssh`: Enables SSH access across all hosts in the pool +- `pool.disable_ssh`: Disables SSH access across all hosts in the pool +- `pool.set_ssh_enabled_timeout`: Sets SSH service timeout for all pool hosts +- `pool.set_console_idle_timeout`: Configures console idle timeout for all pool hosts +- `pool.set_ssh_auto_mode`: Applies SSH auto mode configuration to all pool hosts + +### Implementation Details + +The enable/disable operations work by directly managing systemd services. The code starts and enables the sshd systemd service to enable SSH access, or stops and disables it to disable SSH access: + +```ocaml +Xapi_systemctl.start "sshd" +Xapi_systemctl.enable "sshd" + +Xapi_systemctl.stop "sshd" +Xapi_systemctl.disable "sshd" +``` + +#### SSH Timeout Management + +The timeout management uses the scheduler system to automatically disable SSH after a specified period. The function removes any existing disable job from the queue and creates a new one-shot job that will execute the SSH disable operation when the timeout expires. if the XAPI restart during this period, xapi will schedule a new job to disable SSH with remaining time: + +```ocaml +let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + !Xapi_globs.job_for_disable_ssh + Xapi_stdext_threads_scheduler.Scheduler.OneShot (Int64.to_float timeout) + (fun () -> + disable_ssh_internal ~__context ~self + ) +``` + +#### Console Idle Timeout + +The console idle timeout is configured by writing to a profile script that sets the TMOUT environment variable. The function generates appropriate content based on the timeout value and atomically writes it to the profile script file: + +```ocaml +let set_console_idle_timeout ~__context ~self ~value = + let content = match value with + | 0L -> "# Console timeout is disabled\n" + | timeout -> Printf.sprintf "# Console timeout configuration\nexport TMOUT=%Ld\n" timeout + in + Unixext.atomic_write_to_file !Xapi_globs.console_timeout_profile_path 0o0644 + (fun fd -> Unix.write fd (Bytes.of_string content) 0 (String.length content)) +``` + +#### SSH Auto Mode + +The SSH auto mode is configured by managing the monitoring service. The function updates the database with the auto mode setting and then enables or disables the SSH monitoring daemon accordingly. When auto mode is enabled, it starts the monitoring service and enable SSH service (Always enable SSH service for avoid both XAPI and Monitor service are down, user is still able to start SSH service by reboot host); when disabled, it stops and disables the monitoring service: + +```ocaml +let set_ssh_auto_mode ~__context ~self ~value = + Db.Host.set_ssh_auto_mode ~__context ~self ~value ; + if value then ( + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.start ~wait_until_success:false !Xapi_globs.ssh_monitor_service + ) else ( + Xapi_systemctl.stop ~wait_until_success:false !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.disable ~wait_until_success:false !Xapi_globs.ssh_monitor_service + ) +``` + +### CLI Commands + +```bash +# Enable/disable SSH on hosts +xe host-enable-ssh host= +xe host-disable-ssh host-uuid= + +# Configure timeouts on individual hosts +xe host-param-set uuid= ssh-enabled-timeout=3600 +xe host-param-set uuid= console-idle-timeout=300 +xe host-param-set uuid= ssh-auto-mode=true + +# Query host SSH parameters +xe host-param-get uuid= param-name=ssh-enabled +xe host-param-get uuid= param-name=ssh-expiry +xe host-param-get uuid= param-name=ssh-enabled-timeout +xe host-param-get uuid= param-name=console-idle-timeout +xe host-param-get uuid= param-name=ssh-auto-mode + +# Enable/disable SSH across pool +xe pool-enable-ssh +xe pool-disable-ssh + +# Configure timeouts across pool +xe pool-param-set uuid= ssh-enabled-timeout=3600 +xe pool-param-set uuid= console-idle-timeout=300 +xe pool-param-set uuid= ssh-auto-mode=true + +# Query pool SSH parameters +xe pool-param-get uuid= param-name=ssh-enabled +xe pool-param-get uuid= param-name=ssh-expiry +xe pool-param-get uuid= param-name=ssh-enabled-timeout +xe pool-param-get uuid= param-name=console-idle-timeout +xe pool-param-get uuid= param-name=ssh-auto-mode +``` + +## Auto Mode + +### Overview + +The auto mode feature intelligently manages SSH access based on XAPI health status: +- SSH is automatically enabled when XAPI becomes unhealthy +- SSH is automatically disabled when XAPI is healthy and running normally + +When the user enables the SSH service with `enable_ssh` API, SSH auto mode will be turned off. +| SSH service | auto mode | +|-------------|-----------| +| enabled | off | + +If SSH auto mode is enabled and XAPI becomes unresponsive, the system will automatically enable the SSH service to allow access. +| auto mode | xapi healthy | SSH service | +|-----------|--------------|-------------| +| on | yes | disable | +| on | no | enable | +| off | NA | NA | + +When SSH is temporarily enabled using the ssh-enabled-timeout setting and enable-ssh command, the system preserves the original SSH auto-mode state in cache. During the timeout period, SSH auto-mode is suspended (set to off) to allow SSH access. Once the timeout expires, the system restores the cached auto-mode state - if auto-mode was originally enabled, it will be reactivated and automatically stop the SSH service again +| auto mode before set enable timeout | SSH service before set enable timeout | auto mode during the limited time period | auto mode after enable timeout | +|-----------------------------------|--------------------------------------|----------------------------------------|-------------------------------| +| on | off | off | on | + +### Service Architecture + +#### Monitoring Daemon + +The monitoring daemon (`/opt/xensource/libexec/xapi-state-monitor`) operates continuously: + +1. Monitors current SSH service status +2. When auto mode is enabled: + - If XAPI is healthy and SSH is active → Stop SSH + - If XAPI is unhealthy and SSH is inactive → Start SSH +3. Implements retry logic with up to 3 attempts for failed operations +4. Pauses for 60 seconds between health check cycles + +### Health Check Integration + +The system leverages the existing `xapi-health-check` script for health monitoring: +- Returns 0 when XAPI is healthy +- Returns 1 when XAPI is unhealthy +- Triggers unhealthy status after 20 consecutive failures + +### Configuration + +#### Default Behavior + +- **XenServer 8**: `ssh_auto_mode=false` (SSH is enabled by default) +- **XenServer 9**: `ssh_auto_mode=true` (SSH is disabled by default) + +#### Configuration Files + +In XS8, the ssh_auto_mode default value will be overridden by the configuration file as below, while in XS9, there is no configuration file, so auto-mode will remain enabled by default. + +```bash +# XS8: /etc/xapi.conf.d/ssh-auto-mode.conf +ssh_auto_mode=false +``` + +## Pool Operations + +### Pool Join + +When a host joins a pool, the following sequence occurs: +1. The host inherits SSH configuration from the pool coordinator +2. SSH settings are applied before metadata updates +3. The xapi-ssh-monitor service is started if auto mode is enabled + +### Pool Eject + +When a host is ejected from a pool: +1. The host resets to its default configuration (e.g., in XS8 SSH enabled, no timeout) +2. Default SSH configuration is applied before the host becomes a coordinator + +## XAPI Restart Handling + +During XAPI startup, the system performs several key operations to handle different restart scenarios: + +#### SSH Status Synchronization +The database is updated to reflect the actual SSH service state, ensuring consistency between the database and the running system. + +#### Short XAPI Downtime Recovery +When `ssh_enabled_timeout > 0` and `ssh_expiry > current_time`, indicating that XAPI restarted during a temporary SSH disable period: +- The system reschedules the disable SSH job with the remaining time +- This ensures that the original timeout period is maintained even after XAPI restart + +#### Extended XAPI Downtime Handling +When a ssh_enabled_timeout is configured, `ssh_expiry < current_time` and the SSH service is currently active, indicating that XAPI was down for an extended period that exceeded the timeout duration: +- SSH is automatically disabled +- SSH auto mode is enabled to ensure continuous SSH availability + +This scenario typically occurs when XAPI is not active when the SSH timeout expires, requiring the system to disable SSH and enable auto mode for remains continuously available. + +## Error Handling + +### Retry Logic + +The system implements robust retry mechanisms: +- SSH disable operations are retried up to 3 times +- 5-second intervals are maintained between retry attempts + +## Integration Points + +### xsconsole Integration + +The xsconsole interface has been updated to use XAPI APIs rather than direct systemd commands for consistent with XAPI db status: +- Enable/Disable operations: Calls `host.enable_ssh`/`host.disable_ssh` +- Auto mode configuration: Calls `host.set_ssh_auto_mode` + +### Answerfile Support + +The following configuration in answerfile can be used, when configure ssh-mode to on, auto-mode will be disabled and SSH will be enabled, when configure ssh-mode to off, auto-mode will be disabled and SSH will be disabled as well, when configure to auto, the auto-mode will be enabled and SSH will be disabled by auto-mode once the XAPI is on: + +```xml +on|off|auto +``` \ No newline at end of file diff --git a/doc/content/toolstack/features/SSH/ssh-status-trans.png b/doc/content/toolstack/features/SSH/ssh-status-trans.png new file mode 100644 index 0000000000000000000000000000000000000000..40cf16255a7e420f05bb4afaf2158ad41db22f77 GIT binary patch literal 147400 zcmeFZc|4U}`#!vz?@B67Dvd}aNrntjDH$q-WF8WkDPuA<;C81JGKQ2P^Gs%?Qkmxv zlFT85%>sr@Z=Q_{hJkH}>?ibFVS+i=}Dhh?N zMoRLO9EGydpF&xt@Yiy@b2)gZ6#rRdA$R5^C83go3Tck69;kAFGg|*@ADi6P$8FVFq1vnlg?GLVwjK zvC;A0$bZ!HMw;Qyige3;Ey8nOHyTzSV@~hTdVb;`?Z@2iD=T&I_MEO6zk)Mt)B@g>wFNZx^WETGOl5}_8`k&wM({9$q zdjI1MXP>|P7yXZS=6(MzQyBzgr zrzg8Zn?)%UMxzFeTk!)OuG>v3*9d%h=CXQ=Vf!1GsqxGLlUCxOC=@NZg^kQ8P%v3$ z_G`S^tpDrlyFyNLxq@_*+IcJMwC+U9mXJlg4}sd76d zv&~U8S*i5tVoKq?PD>#xd|vf-YHVVD6W$^(?)G)uOUMr%*-D&|>N(cE-D2}ub2Hsy zL2!>}g*2y|8h%YU%}>~Ub7%W*DO9FSK1^|5d6(u_v=j!K(q-!E>ZZQkKUBIW2$xnd zs*kH8&$Zq3)rr+xxN7kT=hSZvq-T#e8CjL_HS9ldpd@!Yh4LaYCr4G&e%M8He#XLK zZnAsoSBe_bc(xqN^wh(SN>JTXVmM%Q( z#bmQK(My*v_rLHI<#FR|P`3#-VNZ@t$o_eQsStmxq=rLMA|sF)az zV>3UCy4F!IbXY$ioJy2aRKV1+c?iFs*oY7k>HkYhSbK)P4F8FumZ*$W_4Kj!F zuKGTC@+1Ox!(13l{p#)Frck^UFJJbE!^Ll~XaroP5e8jLV-yuVYwqE<5-K9R8Rwexoq3}0vPNv4K*lBybV5aj>84pa_ z_;b>UL%BciC@UHn9SEjH$m0@W3tzhMyz1p(!MtN?+(2{Y2CgNP-RH;Ue&7o&9c*b*T-UZ#?8FK3nqgKdigd`zozvg z`P4r0lh$!!!h>1eJ5>*giD{acm}G3jjAq}tv)JY29@T;L2925)n>G_%l)O9^oV@d* z7R#D*LN`ak(;Zr@8hU%GUIdCc<(upz-^CWz$Ky|Z_m+)os*aEugDp09EnFZXwv^^b z@-Ukfo4LHhVGWPXu|CYkb2F26!|%43qOHEe|lY^OKWFU66K zrL3%6KaT5#!;dv~(Z1!eX~9eQTuJuFE-^{R?jcsMh&5(58vK%l&6`q(zihhy&Uj%7 zlaCkDk*`0h#+LZ?Zi}W;bKCFt0$%5>mr$ImE?&A6VP@a4y?z|edSVN8u_@eK3y{If z9v*9IvckzU_So>-4S}K#+40!U62-rIYrK6U=J1zPILq*kf~Ya+*1*78Dstf~AZf zS9(Tb{*<8uEGN5b?FE^*m|&5ca&2?tnI@yO%SbG_MsrfWsn;qn z4ROliujnYw*RAU=T{b8SHR}*H2 zyd+lBX4Uzfh_XpYDZbfJ^o1=b%$)jH=c<#m3Dr8hL=Rgd%>*VftUc&m=D<(=K%xFZ z=uWanNXr;7a~uuIFv0AImO7;TXCMo+vw8ID7&fI5SvTr!rj7saw~Yqr{f+z+ev0;V|Gx2vfi@fezHwZ7Vd($9!@giZ|Gx3R7oq(BH79b4 zFoR6xrDX2Uo`VC-FlptvvLC*V#KyMSK^r^7YPhzI^loJaH?q_|m6csOfBxaf$Vi%A z*|yH_-~DS`SsC14Mns%ulMSiuEb+6N`F;Ie$90RqfByN0MJo1kVm%`LB{{h(=NCC& zFLtDP4UZKxw@O*fmT;IU?1JR{Z_oLc@83&U>^CvZR27@5^87w*TN;-3?#khTwgS!R z@d5c;w{irZZgwl$<5~QoEJ$E2$1lBn*C{y#h48K}z0@yJ-@aW*PfPRf^s#MO?Cm!x z?AjnqH7Ul`oR3#he{yBreBYqN_aMjq{fvsQ&U(FRPs-Iu{OA7tO%Ei>+7}_j9n{QM z14I(1_mh$n*?C!AJ@|`hW?z5*0&b;H{CO7Kmh+zb@2+rgaVc6^r3>r7--=!q%dbGBFu==~BXg;g1isnfaPvxd!tl zG*U&r%{V%#XbUXd%l~XjHXLNxUh=z^;ipfZQcOmD*E_hsyP}e$b^F`!D?y*Poi}>n zyfQL`g@u8E{puZ9Ken6qe95x7Ar##8sCI*8Y+M{~3HzkIXj!OOTa&2$?<5r!6=RDj zd3pI4|NOHm)%dWcxC1)}hn$5)s`8-x_3PK0+Sn3RwMKZHihtVCFq_V88r&~@<8Bmw zfRQdToh728TYmRC?vj7rU1`d;itX#`D-HA6wrv|(_7oO&0fEZ{0|R=E?=hGUkC{J} zlpJx}xXLBXads+gu^E@2Om{`7K*{D&zS=FYB-b}HJBs>uP@mglnePXxJyY(EcLkXR zM)+9&_^j>AB6U7W{`T?XTz(}?($R^DTij1Ar}K+u>+$7K+b1~ZuF?RQif`M{Wo||V&do=}-o5<$-IbWcMA^w>o3?C; z&Ck~WY$Qnyc|E$Qw08sfK});$VEBO+*6q_Odynja?bgRB%T=U2&LeQ^Q+att#sLlv z#%G z`Dfp<@)#KzT~byy@b#t}zqbA1Te;ZRTHNuh=WmTp+BfCiOucdA23DkcQcT~-C^ckz zdV2cQM_EUnuM?f-N7IZNxUfwE#E&zK+LY?&wbDYL-9jdmWd3biuQZ#n_tzU)3Mwiw z=|&CXTT8daST50984(eYJz6eoHCiSRwS#jT9ahlqwQJJyQCrXYe*T)pdYC@P0;fVs z!*L+1G>h)@sZUvg=kFqMYO(EjFx5JdFYr}OKD%v2B>(@ow~qT+GG`QSeSsEBZ{O+dIeXI&Wn0T6CSgDYCP!p@A&F{NvX% zjs>|ILrF!tUJ6KyzJ2?qEw+WVSJr7tUgboZ0zrGLTuPBFP;WTcLrVaYDz`4{g$qzg zx$3UGlOeM?e#I{P3rJ_T%3VGTrv>+xDxy#CCce}23`q*5rKL;T)BF>|hPhP|o6q)Y z`6_*~3}5rhA!-AYMV6YOnVCR|!SD&d3`6cn!dy0I|P z$L8j$Vaeg#DprajXmgFlwPCptR%aZx*WNf86R8)oZCP*7}SLP z@CvCSobE{hC7oVvL%SY#Za6H09e@-uQThDG?$PMHJazDpc#_NK&OH!hjerljzSee! znBx~tn#oW){#jrc8NG6IMe=4wN|<-@$jh?5wCb<_NE|2ovcx=M9-iS&bz6pskWsh z9>CCGs5P(4N7TyF(w~OnC|YWNpoh4koZNHfGY>y>-Utw9ex8uPHSkCo4h<$)I`a45 z#T_c?hMV@JUQa{Dnd%e%@};D%uCAW47rYe=+|bv%wythXtWI!ACvk(wMp=ow!DkHI zO3#Og82Hkr)%p7AKTcdmLnGwj4q<){8NC>`pYmi8LnN(#x>Qhllsd&?MT_u)?XvwYeX zLcuXFUsh&Wbd%gXtHspfQ<8$BBFDA|ft@}WXx*1Dgp`=H&tJbL5An*RCA*GO$A?7H z2p{+x9baQ&r*hI!vZ4Gk2Gc|Hvjst{(cOHsyp3}9)B>I>{9B51h3&VuE7LPGjWE^B z_2+&7njVpv{ZvtLdH1?XuoaO=G(L5YKKLLP@>t@qxpwYx-(eXD)_{I30(y_FCC*vckBK|ziK2jnr?2yu1w^^t*rfqPbz zJI?lN21TSj@$uQe?ysJi$x&i@;^IY*mRuXQwO5kaFD-ZR;zc>`*aFWmQ9bX|LM{E( z4j$Dj!xpUZx9p#%@&ABP!=3_zf~rH?5kIwr~CyiB2!XQrUrz-q+PmrF%0HL$8oh@ zm3P}E2Zy|YTfMUmobXZrVI$MCUFs&Z#o-LCxJJJ)xl&6bly+Zms-WQw z=f?}g1%-RRX=XEL1YsS6B771G=Il~^eBa&Qf?wXWn-{dy+JS%Mlj*srstRxsw@3^K ztSpDV8OsIZ2H(iZx^+K-GO#1R3~gcyeg54z(%RZ#%F^rr5u1KY)r8Jn^_2rI%@~0B zH)l+zI)R$;krP_PDitVflc4-&ze@5I7iH<*$}n-gkCD;QA2}m@Ol{ZGF6+PQ&ptV! zoOL4tm)+?Zo0eu-SXrN8(hQ^5ODgNNjBGgG^>gaGwUg+{^h*zxXR| zNgWxvVbYlLP9=nI+k+Tbm_}m5)4;dw=K5e<7HC6?fAzmJJ8COpa7Z>_oo8`fb@g*! z-_39kK}8NXjxwzR1>2Ycx+<0T8FrS#p}XKlp78XzR&`qnjg6EX3hMNM+ed&#hRW(! zg76`2AuSEc^SLk$>`}CKFBF?xzi%n{eOhHlgzJHIfBo7%#m>(DxT5<=33K^?b$`{F zU3?{s;x8th21Y00{q%fkJIa~e^N&X%(Pzg|b1emNy)|prjM$3k2}pIN}=`n)oM<6n(pqW!YC7o%Tj|B~T4G zSG}8`5m7ARB;V+6Wm)&v%djx*Q*Rz18s96SUCP<aFKYksAw8`BgERp&YUVfFdU#`%zCy9*u7)AU3E@0M8Nl*YZbeH= z5L^+k5D#7uQnxAT-pM_A2=j#VEJ>nQdl;kf3zbq>5+y4{%JL5*A`R#w)ojaQRU z84!1)?*(y~9^#ZYA9_oy!8EILGdNw>hnD`2KFd-WA?M)*)=aDYEqJtm`?I{%DPdgu zCw-&M5QXx8R9N}{q$v4c)fs2^_|*P?MDs0`cWL%16i1YJg^RrxGU*6bzx*pI;XvPy zskO1zpj_3*G$Yv^cRv$zx~`n7yOgrq;;OE$l$T!X=*xtuLp8iQ@97*Irg)7+a=NX@ ze-(Iz`{?AEY2F&UJX&fu{>$J~i0C=Z(7DU?1A?h~pS!3**ExEjdUE!-p7RgSDWo#_u zr!sDkJ>FMP{Usz>Fg4{?m;EN^{vD2mc@E)pMS3Hy6}EFhPjDXm3zSMT1HoSV3=Sk!+bUC8h=AXXUnNy z2E-^z{0{lm^zufL)%-xAxyRk&zv#VZ=Q?DqvO8)Huk?-}B$;#Rk;hB)?k3v~8kYq; zIrrBZ|F?S||Gm@x=Lwf0?bt5!qrZN4c*Z_|{)T@GE>-w@A^GUoo&2`5>d3BMOw5jB zqg8*qeoTpyt$C`!ro-CX6g&S(?9_nw$bXeQ(yLTOncIt>Bo&0 zdSyvMVGc&+0<2O8wv>6>y)oeYxbkWSY=7D6&HJ%8#JyOsfk9Pd2N378^&ZXhcswpo zIAGWPT{&;`wQ`6_JUcsrsj8Dmy9_xdP43FX5&Kb8$&SgKNz4;OeD| zv*)Jz$37iMdh1$g8%pa<^x(&VyxlPu$GYkn*CY{Ymd}cZ$cZh1=gD+vpryMa<;gRH2sP-e);J z2SbEaE%bEzRW0(Py&_hh;XCX5>6Ga8HI%}u0-MHbG+oN_SA|*qvXt2#yRNAzV)c__ z4uNFKI-lP>7F?09V#rkZVc1s0(Ae0Ag#=SEu{QqJoFe6z>*UTl3vQpTrs~KR*V+iq zTT{I8O1x%Y-dv47>p~v&tiq3|f|22f62(VOoY~LD@+n^vZ$-61Yr_;mpHYl*k)S!t{V$H9Y_kc3f-svd_Io^k0e&FnbW3s*0MrTplvNwXak z7@Jo;dPrxn?!NPCODK=BKFdZ8^#rV*`Yjhl=66VJP9aKIbjR?|#&21v@w%m3DrO_8 z>$)g1O`n$|v-uzbBFyQ))y?;#bCEPm#$GkUZsjPyZ7~P+py{0IokbZXsK{)xL(y~Z~d$EOjp_- znVGyZJ#>$d6@#Z?krEA+MJ=jR2{|1H2S?mtDU}T40I*SShlFsmKeaw*M@wt?J7BzVZ?3p2REv~ z@a|1duVah>YT?-FDdTlMSSSK9+K98Ng>$E8NxM11+W{bP9^XpKoED+$-|kt7k+tnu zaE{^Tze<-8d%hF)!I^FIhOiX32F`|mPLJ~B%P2b?g()5Hx*~Q=lwr$OOSSB{`OzO| z`dm z#)}{=cwmWUAF{0d83CeX$ymE9W|Nh_xO#BK?0Mz9WQ4)f z*906|FElMy;mG@gl|L138S9aDrCr3g{_Q>F(zTyI?}t?b^#m1pg)cB!(pB-Co9`>% zMkvGT-lw`vXZ8avy4<*cH=CGj#DV$daIxfK43Bv|r;&ld&jtm}GS zD*=>}J)!)mlY7NC>l`ZpKGlQU9()?GUdtY*i27f!uuWj6$9qS+8^3>7>jn@0u3Z(3 zVD(s#)x^k;i^bg4qh8aEM=B;h{*tb41WR;UngY@fetL{)_mPEVVkP$G(0>><-tPQ5 z#jXAer5MK45dh{LofQPfc zzVpe2rKZbl>4UQsH1M-1e!JoRI|Vg0<7M}a3=Pw+cSr~Zp9eB}}wU9o<*(&gla5kah=p|D-o9c(>m7?B+3=9&t} z@gqiArLCajG@NF~e1bnz4=D7SAGQ4c;jvFB!B&MoY3TxYbod6&5|~oRjL4IO8MCD& zbXYd~zx?-juj3XM+EENlFFO6>Na|CstTyQgnhkKuN_JJ0;I=YL%Ugw~A7fhTna&`EeubSFOA(scOOn5bC|a zslai>I}`u~9+lAk>|z8>Ik(fQkXmw*Y)bss>gkTrs+mXJio(o~#vy-b z78$e5QcE{{@@@F{v++G-b*dLO+CAU%Nyfup8adP_+y3&6(B^ph>fx!qQw>hDmv;!> zIOTUpvwpavc#~&wnSq?_hZA0=*}eN_$28~Dk&t)!u%+`fDlIRCM1z#CVD$RDfgAT; zrlvEOV$kNrJ8n+JSLI#XutUu@;RPumOaMQ%g_(01Q$)(H-M z|Di*&pne0)6T7Q;6)OOBigf03y|0Tscw8fFJ`Tq-HKQu{@E?wN*F&I>u zX^T)QClx7n`8XXJ(~!2mD^gHoj!Jyu{wmPtLCsXP*l#oi7fP7Vuf`eu~ideEj@@;J&!6ul<1%oHZ?kgNxKJ%S-~7Vd!|+CS!8=`?y=-4 z(@f!kXR`P@lv0kri*^Nyi`P%5AM-H}U;(%K^4YWFr~-iNVhZ{7zJGFwmVKNWucod` z$=Dm_wz~PbsCc{f$3uprJ2J!5nYx78Dh!RpV@>UhJr&JvX{c^ju>!cU*v4(_M!zHF zjV|~Jl(vv=8|v#nvKw!4ENFYxaG7va(QHfCVm7p{F1Q?RrehYbg~G;wY`c;+ajq7d z{p&T4WZvTay{=l-*j8ZpH-TXfV`oOWsWC-2CLw{}ZTZ1rTT?JTOxv~qy@2lFjOs)z zxCB2Fb18O8%`RQcu|O=|`+3LY?=yQJ@0qt3UvnpbVNw2LI}-tZevnSKpYD~uADvvX z+agXq`@$xh)|jH^UDe>NYv|3iRZ6r&kikyX>tx4vvCcNkXNj-_V-1^mN z;XDi_`R5c6Oql0BL+Tcrkx|d-nHA<*QX9LW@xq(SeIV6mr^{}54cpD=ppcqBJ%|Tj z-m~oiz2MjSPNCRrTn1D3WpU$519N1&* zdP3$$hlPbj*`8>&t6ig$;UNV9NE;(UxRO@dwBC%e5`O(--FEj=kv|w3G5*2}H1zT4 zF++)Vq>3rqdQRco**%xO+&u7_C)~29GA!-new4USmZ^)Dk0Pa)!9i2?Y)dJ;!xt6F zD<|{o+{UmA`{Q-T5Z(+84K2a0LyXd28!Zp4%@-Asrd_l=l{eyT$FS(#`yI2tqqaAp zyiQ1KLiM+{hKx7tl;mbSdw>bVAks^%*%?1BbvbeIySShrpA@Pt)lxklujGb`QT9a= z02^nX)%r5Gt-i(hS;1?ue9M5Ot;@>>tb-z0ZXYjP6@U^wm7x#%(s_d__?d*2qnL(& z`_jKmD5&lwg;_>=Y9?2>blobKDZ1@v`Oj3SRjTtH8qZ=P*yfi+r$fxE(fTysAFpf2 z0DuD{STGM~9x`22Rt`X^E-t&KR{=j-eTxxuTf2*2#+$~@$_d^0{x{MnlyX?5KZXg;6~WRq zw;bbNT!Yq@IQN0pR~lvtt%+EtprYem$9F$NzJrXt7xj#o=;({cF(D*Dos$L157a_d z`v!DXgqrz!F_9wTgPc=r889%EXZ(xU@wLXm2ACXl3C&Lt`xT2b5K={VHS7?!CZ)Yj z7O~l0*|KVXSlYQ&QhWjeQDb?rY`LFN1oQG@h-UkWf^&JAxFWUK)iL*b*D6|=|n z^%<&W`XfNWR~%GIIv#f6gLSAoC*wcAa+td4&BEx-ob(I5FFCU#b%yEzB_mQbh5glH z4nQY3j5l5*b;z4rqOUbRMJPo4(GuDbyzf3Z+<*ulYO(R^b)jGbmpG*S2hXZP#)8)g zC%Q44&JD66+P>9ut*j{iM$ie)eqABPEn|SRP}7Twr>r^X-HKK%4q#A!>&g-A47=X> zlO;LJSdjw;@10=LV=LxFIlBgc7Du?s&RGVA_=OqN2q3kXm4EAwMXmU!VO#E(~;PYUO8yr^> zKb>yZJld2tf4rbX+jkS!W$>VYUsr$|Q9J|7al^tQ{pv@raA&q~fmYihPS)!=Es70^ z`Elww^)LrFv6Qj96xt7xI8V}JV0#zlt3$XoO}g1tCd08l7v6$Jk;OUmj)LPjtuS{|3ZfJWmO0g znc$<>pH~1f;5}rffmC72T^di$xCE4atYSO~!L_xuSLvJW=cn5;T}n^}fLm37r$7z2 zp`R2NcDV zqm}v5eW=Lku}j4ys7G!$%dNtbBkBi496o$O(lD3$s9$f4_0{Qp>{4x;@>P~r6)F)% zh-I@%mKl3o-kR|Cg_C^QmY&*NF#VfXX(ir!a5y%kRDMB@Ggv6Hln44?}Bmu}jUEh_AZM*y5v zX@*`IB_&esE)N#Af;KliA{t4lJlA$FkRxif@YZA41@LkNaAzFWI*N-S*rj9ckrK(k zK9G+p=;-LEgl)BF4Zcu*d=YcR)+;A?cFrdgRevgP*znAvN9+=NjeTRL5AO?wyK;4P z^$9+s&eh=jdJOem)mNn`IDJmmp+d1cMgSi7N-=(CURvOk2&n0;P0=ke=1Xh{;<-8w zLRKkrVz4DB%_k560uU1m7q0;+*6^NGP{T21$Lg;&q#$tjB8Dr`Z(vS9$qiWe0%Aph zOvU2kW`2h@Xh2uYUz^m3kY2Uqmc}LSjfbPOyg-1GmJj-!A}^6t!$VbH9tl#TDS9V|)B+!6qyz!am#!3X_g5}mVwM~qxe=ok~;x<|aWWNnt8uPCua7|iS!TIopzm|Hv;eg6Mqg=;*Pl8T|zSe@@ug*R~=VPA0}- zR8#4s#i}Dkx?oMAuJ!o9GP~LFK%gmO8iKOg`ucq+E7zf1 zE^)7vm@XD|N0QN2?Af%L?`&d+WuyRh5K=AYW^N9A$x3b0wSHfO%($%j6^grjGE&u( zEu&OF6_Ezz7x2`BiH;%b9Fyi~x{Vj6W`~^2psgrxNQ{Y*g8<`idtL#7pisxpD8)lC z!1wixiAns;2JP)b-D@dymxhuW5jwy=Upt@t6j1$ItDPS!Z`4Z&qr%OVA-^djj+dL4 z1^^EG#*8XKI_Y~*RnKH?7e^n6LL~1j>{m0iUN1hqx5MJ3?z044MasZ(Ges{>q&V*>r*|o^?h|;U2K`07m&C&@;R@VG-sCV;JS zl>Nb;b8}8?fdIp-jpOnTTla4id^#&9q*;)jUdI_$Od^UFi_a#m2JHKOf`ca?s`8Sw z@}GBVCkgdzv1;FA!Zk)jyldJ=T^J8e8rD1d6FbQErdEN9)PFi#MGL*zPz#{z>(ZK149%1+6JW3W1$z5jnYC_YdXkLxPCPHIGb3 z!poN*;7gl~5`p1~D9k53EseKRPC~xXR%|Zw%Z>Xc!cP#vA+t*5d{AiA2O$M&{%_iU z`VUU*^BQ)XqZCT2h}dLs4CY=M5}W;7!}fd55{+EBHoLR2na9iyh8g5ve*JdyDi?^? zOiUVTYuBMq1q>J?l|ayXN~8!bZy2CKP~LU{WnFm7YpN%~s>i0JRP&pqAxAthSUmo# zIXw>0S#YUG?A#r0II^O?-Q)3Q4kersgs3fOGqYOrkHibq$_-qORwvsIUrFV?3a@0~ zq5vJiG~MLySw~Xq?_?o~gmU1>`|06=$*v%XJeo61WU^|%)yJ#4jRgXONM%*<9{c_E zE_;FF9f;H*Y=I$O05QDrB1`di38j0^W zvLrYmSJ9lGYQUorC9}TE4U{SoS<2YG!r4OYt2ram4C^?cFKiZ#D(<+5?+Ws{gF}L9 zqlB@$SPWnc!bouFG<8V&E}47#Y9^bO+yps8GkwTgAe>bZb9#PD#nd!m%pnad0t#$! zZX{-bWk4WRps75@x61{+Bsm~OCkzV+$oE0zC@}MkO zdw=Zc=L>$BXYZ8)Lp)>>e4d*-=XZbWLxdrKCFed>Pga+b9u?PNs!Qlt!`8{*NwtV%FQt>CUo1Q$^6qt!D2P~JU1z+wZ#L6Ov!sHUKjTj7V;*15E$kWH z{e2ME--s;6qNd$aXwzRyCHU0p8@&824~p-D^lMkLxnHBUIrmP--`c6X`s2{?22g z>($HlJ%b}O+S-FH4|@uf$CxN|8>oA7w>Zm5J@+LYh_W`4ds3^a+r0cyQm#b!>{W5~ zNTOU)dw35@5#M^5 zA~+PVzh9C<8j9Au>;zCoV<&$J^BZV0u|GIvIjEC2?1RaB@M(IveT7$#;do)lWh%eK zxV7tY!PJvY8aJ3NR8I+oZQwp3ep5T>*RSu0xB%Hz7q#ITC13x$SInCG9 zFY>Fj2aIUBH0lMaETr?u5v3uD703aiXJ#i4;7SvtbN>Ul4-m8<(gte)(YZ}w67)Ti zs{(+wL2Nb~>hz)}tCBY`DeCujrjxnt5>f>K$6Ul*c`V1>Rbg;7z2Ghsm979ncKNWN zRpE$?FS1GChKRhg(7D$n>lDL7L~2`d61|MFbP6Oo`0JYJ15c3CA;X0O7Krv^VLAtX zxx2HIor#{!s^y2w#fy*O^#2x3932_49Q-0kM2LNSrX!Q~MAiuWCCJ-LXekPh6x<&e zwZ6>QMwA{oLj&3woJJY!2*{Ao_)m`qw>=yw$Du<|&I9;uU_0-x8>A3g1VyHS+Y_Qg z=!&=5M0eUN+gDdMG_qxTLruCd!$;CLAnTjy0a^-lV>ObSXvAWq=|*IQz?#=1K%->C z`+SjqtDV!@`1tsLiaY9+FnmJKo4`WE zX1sciA`;n*X^%8ex?nm;`yuICu>v$|?iYDIU|rH{sp9#1q_YItq%s5fYTIK@+(*5w z#MWF51nG!c_;2SuDX?F|fy^!A&5O`yhVI%WG^e=#=kzS{p(7>y?wu3M8R(XSAvH+{ zsXg0wxnSFv_cwRe!(|D&eaZ|L0I>w;3f^1kK_moMxyEY>KQBl-8W;J!S3}5_zC4%d zDEy4TrNUD*ogSk$YymzzEd$jyQmfey9}2XI2H5ZryFRLXEc&D#O@sDgGY^gDoV2RG zG`+wXe((-0_VV{$_ou9Wdw3Z9uHj#&f0e2Z1&T|3TIJW@&L?28r<@T@D?~yc1zO}{ zza+_%&+5`p+j_KDAb#u~}v0E=_UU@Na=M_#NRoaxyY~LDdrZ_CjhQ7R`A|>-=3KP=<9xb zPG0Yn_!OkvFQiiYw>>aKMygnR1B1}wp-F-~9<`PzMb;>DCadY~edPPqI%f>j&GU)_ z3CxfGM%)^U)togvwbtJTa)o-=dLK4POqdb4*d&&&jfL(;kWg)zHrRb6qPfmv;?j#l z!}<1UTpyZJbnnA3%2JSg{OWn(`S5oGi84IPXdvsJt;kL!rP50*51~Lr^4Lx28rf5# zbf~`y)Nj!Eu1c`_O??YX_!4nC{^7^_e+=ii-Ys~s0b$FtXDaNk(l-+PT4ZDzDg0|c znpaR%nEczV)J{{u_mia4G2C{yx0>!|{Fg#k)>$%tGkvO|V9u&p=_FvhFQ;;~8MOZB z_aWk`R1pr6Q1qEpMA7*HLWPiSTjV0zNQv!Uj5zLSoqHrQ>7dD}05~adEaMD@%9zx@ zb$Mc3+*#x>?dC*5irQh9&z?!NL>RV=qa&2y>9gZ|pDkY!DBDuF#QF{g%_u$i@ zu^+P_g^SM0YTO1ZuZ}zyQh@dzQrPNhIo02B1?Tmd+u?JYlDZp!u&=~I zbEA4lWMoIe%g{SL9_Yx2YGch=5mX1#(!=UFn@K@oZDN)(wHnO5P`rhU1QXEp^V!(k z4to&Hs!=JeEoE3K(okYV%-)}lh6fYHriEV?|7@t0-3+U|XQW>f%a}KZ*@&(S7_JR=P3FxiuD=?^iJha{=i4Qo7fCCvI zVOSxfVr5qYHBW3k9Apr(b#nfI-px~1REt$KG0{J_%J0c0E;efRLSZJbz13-IJ%ZwR zBl}cOfM=S){G#~Q@!UIMe+zo-`D^K7lEDLpA^-}0Ax#J25qvT-GV#r%p!7hh`u&6L z@y&^OeEtP! zceZm&Dk{EG6V7$f+iCzaCp~^ZJpbYJ}d_)}uo z!2k5sawt@jLznLpyQBBw09CRgBll_IOWqPa2bW4%3snFHF$mb0M|OEpyFN~g;(o^? zX*z7W-8A)2hHpGCIb))`JXlXK0v_LKw!!IKwpW%-H;)v|X8Yr>O#mvS-Uk~_uDi?? z7^NvXqW(r#5-ox>hpS~6ji?a6CSI@6^;zTB(|a{tCXj@e?ooA1$y_Z;fFnTRdudEyG?aa!wD}jav)R-ylP#kxv3GsEO1d<%cZAWl$#R?8#>Qyia8=v* z#Mtl^{R<99s+~vHF$6WUm416z|H$%|$jd`z16M2(eZa~LCE9cl2)3aDi{ULuIqHUc z=~7yEmsB-MUUIIaJ>B^uP2XARhAbZKSLIDyaCWY1wp-4QHS`0+AO}=$YUlYQom0*@ z%0y2(omToS6JJl;(ATRME%}dSq(^M$ZEiovTf%s+wRrJVHYLtx%SgUZ@K`+>O=_ZK zjfTIyQvr4=xrXha^@gV32w(R=qSHDDRB41LJtJzfC)6Xp;PdB#_#@N|Q_)s{x-rT| zU6s#>qJX$H9<}%G%ju|kPl8}5wH7C;YRvLvDvdXe4PNl&xa!+;qrSoRBs4K-TGh|_ zOh6#$chW3l!i7hF^pWN)*f%anKo#WWy~l62UD(eel?}m}yvAdu`|nUH+=KSw0@9{O zjzD?-{P~GxbyM*DZcu`xkT!ZRBz3*OiQ6all+d&1OX_&r+~Z#OAxQ+ zU(ub)C4?L$?co+lxm&kxeFDl!_D9iPOZ}A|+HxULIe+s9F^ms1ii#q>f1})q6Sr$? zl*yibdS$}{iLufn75f?Kn&_D>{$>Q72r1K?j{I;L`Yf3RV3N*M}Y5aNw<+#6|i-uhQ*N+sVDxHcp+rj^GHZo_(^bZ zJUT_f)i$RJ(2PIYQ)hLa@Y9HGB#`lZyrf6PxK4K6!8XRtaFI||fnU!s4?xNTiW?z^ zNP`jM9snkBFeI-ilsM_so+7|8#7v7~@)wlbZ-A(5ia`Xyc{+b$MO|}vSOaKAt}OemTxx zq3#aXE6`npOdm;m*jSQ(I4uXJeCC z4Q3Tj(LEX))Q47gnFZq#+pd6X8&MshcT2bGSBxFB6pHUy497P;F*IbAn8=bH60jv@ zjFpk@Q|Aq8gC*vr@FlH1f)W0eRMdSBqI3->PpStaBXYZ*`Gu1*7w=wLg^jYD8DtWv z+=6|SEI5&xW2jLLhPn<72th?X;@pgBra@ccOj>i5aUHK;-k~$6(BHgm_42#WS`i(* zN!>Ipm5K;i-A+RNd15URYiMN1GFTLP5&;;M`M|*gRAK_S;ybyV(B7w-dCMuJ{d3wp zg*FR?k4gjh#7RhRA6PrKm6zb=mS3ND!iVy?Jv#Y(TFJn`p!&2vyd04_#ow$D8TBv` zX+I-2^;kKjcL$fe>~2Erq*FO;3HkrZmPJtLB@I}Rks3ke7DczMoyWsvp=AaNW~Lue zi#*auIdQ0RID8^74^xs~!M$y6)FzeI-cBT8X&}81{A~>s3g9LshjLsi8krqEw}Ry- z`?krN>Y*}3$;RQ{ynwVg8Qk^jVlLIc?vRf<&#L}TMCo1$w_@pr2B*mqO)o<@lOIGi z5fl4kwVb*BmO@EE$X|lS=TnJ%-;G7;es_625{7;HzIQ-4>gc?HDM3wah7+eS-_KDd zmLie~U>ZK?$EWD-(~cujZl-o3acj_8&>8O_rF+7uwX3yB!%4 z-H(Z>$AK)NPIFm*4rDPNOkJeKLaH$kzv#PVRx)|95PdCCP=VtmRIi{2EBfKF<)yvQ z<(8nut$)4>XDeN6nM1YTSSQ-Qod+igX>m*9Fc|2LU0HcxqRuCzIi0F@(N1ylUEyNB z^?eb}*0IE?V@A`Z0Y_9`nTMHxbY{Foj%}l!x&!~xhri%39&RrJjg7>Os;8@4`P;7F z2Y+A3DJ$r{x(cOGmgvYxiR}WWKBxjBV3GqJRHH`oxg?H=c}n#BCK#7njpn|VL+%>D zdY{IcxjFpI%pX&#J~4y70#bb1qvm>1*IIlMr=giN!%pn|r>|OsuAwj=T-dZIys!Y~ zUZ$lR~JDAkbaAW9sGL~7N}&b*#ShSQH- ziil-Q;3z5g;3s;Or`-<;ZG$pgG+Jtgx4Eg(1!pfinJDH zFq`a8e0`KKQDG9^H(EO{Ne87I!3xJluzCv-!;c=0B>h`~sbDkExjev}>j!dJJ3U(NRdt+8UEGl-kY zF*84;s^qxKw{Lf!5FDbMI0g%6tXLFG9iV}+7eUZiE z*yOQ-yq!z$zTV?jayBm~lLBk~Or;z`l6bPu9O|6!8GQ)||N4Afk1DCvzZf_tWEPK#+XK{?zi z3o&Fj>W>kQgd~7nTB4OF!w!%N$#|1tp;FF3Ml&>L@dMx)Pmz<}1mP=;)1bUUZ%1-Q z@cAgyoB@8A(+A!Ky{2?yz%*>(SW@>T?8Du?cVa&d68DW851w!(E6;kUOvx_%DFBDqLs0WLAY^H5&ZqUsHiF=u5 zvF4e72Cava^w3C^4iO~$q_LG&ZX7vN%;9U{BI=rM?-Kp|<;;@fk)z+cduO9|UBpaT zNcP?I%!PjxYPnv;6zCIjYfzAAxUwbIvRryaG6!;a9L_4Ep0=w;n#v0c?RI@N(bt#k z|I|*n<)%u*SklV|Uw7se-PX>MLj|=s6(Lp(XR#>`ToaoPAmZRAoQLOJH3|>ZN03NH z!FYNiIbNor?y{yy&Rf1=R zDKy|5!bD)(1L#LPqE}iOY||Q`_}S z+(Vr=_7vJS4Y(GP zT(-p7eSh7%%PR?+37K`s%r8IBr*Sm8AkO3s(Je6^qVmk#?a&;}{4MASqFMMDira`tiRA0z+K57aRrp=8T)wcFW@$WEkZ1&r8i9V!@NMH;cgZ zM?ydmp*U01l;0I-M85=8XWh{pEpv#J4N{38QG#Nh@2%Q-(ACpxMzixu&bzi}NeUu>;hT z4+VGZ*g=j#wA+;(pO8>bq;XhE@7HKK1RE#T+c?|$j*{8I{`NOEY@69abn-aRgiP>2 z>u<4r4l8~F`VBeFPosx2&9GPF3o$g0ebnvc&uypqy5Bmd?(^rDq#qpBgg{FaAJM3% znvOF*s67^D+;(my%Qq<9(yE9hRS?y9u#)ioCVacSaoVFSDsCA~INomi>_FRGJUQgh zbtto7HlDa0)p)p!wPG_#c-x!w5EA1@bMSLPmUFi!YJfg6?sw4^Pkv$XPywKpFD$u$ zLexSRC4%&qUVylQ2TjD+_z{K!sv^}g@6afulqhlO#V!dLj0`fZMk7=F09s|vRNQ#Lzr#9?^eK`t4Q5#5CF}+!P_>Wbs)1OyRlaZ?o zwCPy~Se<_Al+*2G%pv-JmD*Df|C@vkLB~fsgrsDiJd{K&$9eEj+S=3DADCpN7o8o>(`srhdvuZ z*HHsOunyji3CwhZ0sg^PVX+E%MP9Hvl?3wbY9S=0u8#*WDW!WE}*$3$i zUlbi97X&EvOFU8|2?XXhr^$tX6`G({x zpfgx>uU#wYoT|86hRSd1gCJTMWR(IgkvyPQPpkDL5gY@@NPI?}vwI&>r`mru9WYME zk-bR5n~~4$J_d#a^Ostu7^f75qd}__mpDf!(DWOUNay(7NJVCK{(KrW`co5v#necz zRZ5VC%Y}rzag}~+fL`z*CSP!TOLFr6#n+pMQ@ytB8hKq_`ocwH!MLA4-ypBL zo}X4~^{m+09fU~Kb5TBx%&Y7sK5Qo8%+IL4|F6{$c0#X$FD!87)l^7Wm;h;U40o1v z77R{Ir-Ynk_Bb(`MnCU}em}fW07-$9Au;McdI;b(IQ0&o1D@o2?FIao9z1w($M6K4 zTH;o6N)gw#pqDs_!7^b_Zs8SLw%n_hhZ6>pkmFT1)I}%*q zKjF++&vJk?SoG~t0HcSdzzi)RRmEP@l-tN%T;(t$cU9zMX+QLyeh3-lJ)9mYJOXw0 zM#2bCG7}bBkILZvxyNXPHVcA7d@ay=#h5sH?b;ann@%7l=$XBgL9o9J3N|vGMZY2l z|F68fU4t4Hs2{UBe83yzIJMm7RRNDnytWGDk6LD2R5V8RCUHHcC6diH%nQ+S)c|-ymPGkbA^b>@6EtH-Oyt5?dVwDvn7BcQ*3AiDrl6)}U zP(VQJ=wxGE-6_rm_{_aMJwXs$*ynb{0oJ~f%7ONgOWyrBCh^jn=Fxfymq`FS?z^-f zArzqzQRw~WnL3mKDM7k;)40C7&G*CJ1j;kw#RQyGkMbSk-}lF__~LtoGtsq44NAu+ zED>lcY{yT6xdU!VSsz%zAgW7kaU3dGAm~4m09QZ;rCT483@ZbV1$}$8maV@26}sqE zV{k}FOpHokABK*^{fI#qK{cfD4=prh7@iHlMaJg{kQg88ux0JXzuEx1Xwox;r^{6n zzg(wdF5+sJYe4Y8vkq-x=+21U4`56a;1o!bY?Bv%$70zP)6rF@m z+`t+!_#wUQoUtl00bqoIg_N=yuZp6_UF54#I{A$wTIKU*^ zvHF?5QnDa9&(-erXQu?nBU^7(bP<=%x-ca^u1{vr!6l+Ks+uhNO zA-?Bun7#-9^aD57ThB}}$b2Clpb0DLTB6P99n9^dUtF-nrMre5JezwUYJ;SCTd9 zhOvGpCyOamG-+oMFpG=Y!UgI2+p~ud6QQFkGx8?aVJPuJRZgrjL`VoT#r}$0SNg+w zd>s7)J@{Z7AIixkAcmL=;+e#wnlwCVcp%1}R{ClrtmM}L$C8;GBE-PIppRbab+j`b zKgy9W0}oL0Wn%0=PaWIv{4Oj41XYHVzTUz_j&P7gS?}iFqn3Q1$iGZP!n-; z1`#_7ro#Uo27tV9uBLvC)-tdpp}*mXVpB_|B?DiouWXfKBa}sB(7oMr%gjen0I*IO zf5W%l|Ix*Q2oee@J@8gjTzeQ-vn0wEw%itc!JMJ>TKx9kx{1sqq02=P1zIfBRWiGL ze6u){4oqS|@=JWi`g(ev*?{nGg7{IHPlsRiU(#y_%6$M&{grz%fD7R={{OcvL3vN|*^M@)v*NevX+}6P;$vVH2{^aC<;l6pX-^d{*L_kupVS-}q&pDeD6Dou~ z{_nKIq5N$j!f4QM8Xw5x$lvHJ_Z1{&&d6n^UJFKc z`kOOx$}qf&NN?>sO^!exGbQ^oopjy%ZFCvX7N8P1FPGDL@F`{$h+?eOCZd?M;Fb~h z2C$kZEt~K9E9_lvCS#;V6UiqE1*isE>G^UmP)VVZzW>$#edkEf{w0~v#epEXU$$P{ z>>Nxlb{NDd6cF;uq%Z%CAf6+QcN;Y^Zi`tXWY%L4dZUZ}74wAfQ|%394hrnYj94N* zGy!Gkz#OX$UI}PS5lAJ?MJnYepvQha>dI+_w1)6v#CBh-2^1GGWeU!!{SZIzU?&L2 zl;&f`h6oUAn*mVC6l^TZA4`;06aI{9l9U@7!}L_pW1A}6n~8Pvvn|5HwKB8EkV{sC zNlHjivU%VAOt=-t?Cst+zmG4uX#PCR_=hV6pV?wh+=vN(n4%DTvgsViHb{?KSU1(8 zD@Df53ESk1X3_rP2XaYE#-H0H70r$DQjNsL4}FRX!?wJC%|hc(n+>gL|Ge`!Xs65v zxA9V``+Po{>+{+gbJ3=?LjA&}Xw-~hzlY|93j?e0P~+xP`z1`sN($XW#MpWPrkb6% zr2L*_kB!=4T^lJYfOcCFbVO}0!7!W{Bt3o?*B@FR_W{oNE;H?XWNM*$gtJ;<0h^A} z*3&D(t_xYvM4pn02n{uI@4AD~GU-dbw;^PFMFbHiLNn0UyR^h}^2eRXFVN7T(=O8E zqzhX^P}fb4Zp64votx@a>}drF1WaY*J!uYC&hWC(fAtXO@(tVpK2rus{(^T$s|E%m zNqE%_b!Ro2{ml@i3_0!k6X8t$`)-{VNB2XE_NZ8eBqLi#hxkL$jX@y zQFr$K{a@yy`Kp>x&DjtoB9!b63-nZ>L}FqZfVxwR{*;?zxH%XmLo<7)DseXscA01F zQ=wDiV9BJy`!i*UUpYV1t^A-G6?L<7re!`sNJ;v7{h!Q?AEh7N8jAviZ_!&oK-Vkw z+rh#S=@cQsA)O>6#a=>byZ?L|x`#|{kV1cmJo-n7xn!Il*US#Il-%Tn)}k{-x{HJM zQB}ZH&#y4B6O?bJhy zX?qqdJl&Y-C_B+^YQgS%>fy7vp0g`xTFU4=5M4EOc?eNNGAIA>Pag*4hADrO)exAs zbTih&VZx>Q3B>`eIq z22Di5$1b2CKrpb&PCp9a#d;yF+NMg<6l>RBOcvRlb`ntt=<;zGH7r0 zydLf*FtINCEUu(bBMzsl1f^3!9B+IOT6UX2Bq&JCo9DQky(-f}}`4hQ4gUQlbaR&;(V*%na`Zy&eJuibFv!8|uRhE5VDADTnF8l}V zOGxTLNX9})adSx~B0|HB-z2fs&C02|QlaI&4dzQibO>4{W)`FkTMuVGYevqukXzzr zYtZJ027RpUf&$bC{R};3?>SD2BGK4__mc?#oor~$DZMZ`w?>%_At;#u6>JOB)9a)Z zIKYKyGg8LaUPGfq%jh}_STB;6NsiYYY6kLmF`R#l?M=V62v;pWj0#@nzl>Ir4|pV8 zK<}65{8wJt2QCNW$S!%nJ5O_95h7INMLZ%AD|0+_Uy}ESC!tmPn<#;~S=8=WRJd0C3;WNq zBb0d0xZAd&ddbhf@cwfKayE1RfGVWJeVGJ`4#XM9uGakh;#1H*3+L_{QMDKgfRLOg zuLwp$&2j08=seomH@GLZ?~?6)J0vyWZ-x|DjcsAOfjram6-qa2S7B7uZ>I?9&$vf1 zBkO1)i_DbMAV?tafb7u6Ld@#&QkGrwO0 zkqzupJ$Oz(8^d=H9S}8G_c%bk?h?^0y|SH?x9mM*J#l8DL_-&%^~Wu+Y#uG-P46xA z*^yWp$T>h%#Dh0R*l2{pRD70CKqPcc+U-{=k7ZjJ@9f&1U$4Ki7!6t?$;T{Wu*@5R z7$C+@Dt5I}6Aipu9X6L)Xn!upYpyvOCM9@^AoIvVuRT|p_BrVR;1X zx=*0}eeB3YCaeSQX(_)7S(&05xeYwKF2`Er@n>UOM9$WftW|_@C95xV-Wm9ape?GCu?rV*9Bb3QLtN8sE>8T8kHh zbB!Q0E`YdnHAymh_vi`@RP7*(Z10BzEe+T*whk+;$n5;`RIGwi zc-p0lX`eIrgfz3@WNi9#$Rw+W65=yIpkBBCEX>&W(*q(U*n^)+ia*BejKls5NfM1i zx$Ok5Drq7ww$Aq1{4?ESw3rs|f;6!6c+uH9IgFu}*t7bM5r7WZo}05)4cI#<^*0YM z;Ck=Gk)=BuguDsjpPtcPTEkraK?f7fr%DPon1wpLbptR1FQRp|dX2l=PPHE4J2-v5 zHG3&ng>59AVhtQ+GbPC6YZnNaTN~BgJBnQd9mU`u$P5e1&-|->NJ(e_oV3sWjmfg1 zj16ajS`No9Dcg{#vPR#zz8S!s5Zl?2$Z_xFC&EcC5+UR9E2ACLK!xi@9iJcfr{DoaN#v>fxrpZwYV?V+viQ!>fd2e zRV^|wo-$h5ORR0sJqIz!&f-^ww&w}G&6#NRw0Xe@;>*Oe{it#oV0sc>T*@Hohpe(fWvV8}7C7B-0sgsr zUmspPRfQNno@)v)6x$5Y*W3DeXXA=M&t}1XoXC8hHdEQFx0j)2huD+serx*?iXIp! zKZv+a216l;@mh5vGuc!|c|-Xg#^-Z=_8w+hY%JRa1S&Wt&dWir2mP$}PVT_?bz;@E zwap-v?KWrXJLJE{Bn0O3_u`4zNkUD8aN>xya3T?&o=+QcW*EJI|H`e%MfbPB-7N>> zy8oqJGG~K`*N@|2&lp)Bpo|t~ihXn~yj5yQYoK~f9%r%9o1TeexB_~W8I z(Q~yG{Z&T`%P0Be*VSm3EOD}FY^l&0QG?;QL(T_bAtl-Scm4?R9N`jR4w6`3sfO-7 z**>%DH_^)+8g}nki=U6+|4*f3V)NTxpN~{D^!()9gM6p;l^gD8I003xJiqB|-oL%p zoK~RhBbTGH0D+&sq8j5c~11sL9IK?%_ii^d?3MVn_eiDQ*Wz1>qW| zsx9&aS07T_!Vxfjfra?ZM_gpJAa}u?^$-4+=0agfwF^AJW4LvC5oU<^1+=w&9i6mu zhU*3i)shWXe$_;r10%UM8{nti%a+o@$MC;o**CZ@!}O76i&FbHeof1?=R+(WMBwz**#zbVRDzWmx>rNL|JtEJzQ$$u}p$Ye- z!Frm0C+}QW9INM4zGf1>*Hlw=0BT+56P*F~%Uivj*OYc_%eW$0q9Kl5L?V*f>vtZj zowe7~&8ke8SjA$a)g$l9FcR&{|A$NnyP|#9gz!2LxaIC4>*iELq|!PpYe}BN--gng za%vM|IE2C3abaqTOq^QHK}x+&Btg=6OORMZ0mQxB+1e;9+(0D0FKR6-s$Np3T7LVuP@i&qgnhx9Vi{Z3K;Xj5reE`XzGO0&$>sgxK7!W zb-FJpE!h^$H`Mkzg6ArP*9j=im7374YBJWTec9>833{wMV})09KOKGLYTe^}svSp_)9#L#|_EUHkW?2K0cl%1$j8zNgkK0!}e%uR@pz82eHpHHk386ECdKJXjDTg-qlQQG*U&^Rad6K(y|pMhJKO7ZeW{Px}|$Ml57}>FvabSW9kvqg6+BU(%ryIj5g$q5w&ib*^L4 zpF1x)PuwKq&oAHDR|$`;V&ULgfGcWaz9Y0R)3_vvA<}WBxZnLJBiD7X{BVpy2a@dR z^bhttNA0YwMPwv+5{t9p}~RD~*rll%Bv*SLWiHM=Le39PXZU zJ8%;h!NZhix5uO5g7n4cfu;!R#nyVy{H@)p@NK1Ejm1=v^)54TGXAWS98k` zkvzK(INVPjjoe4yyBNJS-)$5bZ*q$mC^PpMiADIIIKm$S1dX;&z?qK@*yBofZDv!&Z{*yMtSlFajodbx*LKzCLkt!lZlxlpr z`9L(lrfGFrl>2yHIR12{{*V+T7d1=99oFr*x!&AWf4-CBz$#G@66>_^gk1!GCezc5 z96IZ9A&yE!;ZC&k%>7j9cZYatt^1ylG@qFy*N)Ck#{r7nrjA*741xys1*N!Rx=1Xd z5!z|oS7HysbyDPBpyI3!h;2PfF?I|@qb)++C?G{!c`iMff=uO$6dEvYk=@oJyK`Qi zo1&53Y~fTN2bm2K?~k-Ul31MBXha|9t;m!quFWF5Obd)KosjM(K*{k80|#}-QnPK?ecljSyX^EO7M~|rz`JS5z{m;fz_d3;^r3@(FQb2b zG>Q@}>r}J0^6x&sWzDj#&2OMuW1Gf13wNgyRQE${ZPafbE}1KC9~!uQdtCU(n(LSG z$nXzxgundCD>Y<>7`{r_JEQ?(fg~@DXa)Jf&tfYB$~W!~PS~V)L^##s{jLSHQqR-F zCKjt+((>GFPlmrZJY6D-V0p;v#P7)QsiD@H-%RSq^wa&PJ$dCCdb6T@W-r(pg4l9E%J2rk{F{5KO5aoGH}&6Kxk#Ya@OaoD{}gv; zL9#H0d%}-Q+06?RYhzQSg;<)`t@6*F>4a&dxV?GZUtB8`LKudsCx@HZS4Dsmt?U{( z7>p_prYv_f;!|djBb|0(Ly@VcBRWMVjEx;rNKsZqMS?G|2awJ)Jbv3P7NWf&V`rW&Tu8b?iikuipJ^SL#;{#Iz&B9h^KJSb9*yM3yRmp6Q=6wg4 z_)ZOB;7j+)l`AApk|jCtrGV7WT#UWn6R`kG2d_ort@`#-A$F5W5ms?fffU0M{xj)v zNQ`viy@o7+*^e0bB-np=_JYl?7X}AtqajFO#_y0)*K&Xlv@BeQpfja2)Ddgx$VBu4 zTnkz53zgKhXiyZ{_2!y-X4x9ws?nmkaq02`ACvbwSTV6<{RW{%%M$NXcbvvtN#GS9 z=kA904OFkV6-`K{88U-~20|puLv*D~RxCA8n=< zdLp9O(ok!wtUcHwyH1c33Ns>Y%L=nRK3DqiqXOfK=4fbPW-;F)+>+f59!+_>^D?Y7 zwd0r5LcR^#MJtvRD1&|5Gqk<3wgJdmw=>$Hr!e*KqX`QMGh7c>?2JJ+O^U%hKABbr z3nheYLvXL3RwGA%i73A$7{iQ%49u0ET)$yMTcH1&UYErGMK%n_CZRmnh;Jyi?;Re_ zO?>AsNe&4psvSC%oN8HRcD8+9?o+41^0&!#EB_5DXIN>zlRX!?{QU3#J#6Q3 ztpB92^gCL(91^PDc)ho+AwDxgjmSF`dr1gKC&KKXPwC7f{JjYqHA-j^vH!0}=t(8O zH2(Vjm&Nul>?CSG!S&F~fC_8KD>MJ>^kF%$$}IcJFxC3mBZ|^5%>fTdlA+F#3-H8h zp&fWJdWc0@iJKH_b%6ZeeK;?Q22-ET_m<>ax%fn_f4U#pFqO*bO8s>LR(&@*6+ZlM z@cRCJm}9IzdNQ4CeJnZm9Ka0>9))OXqvXeV8xj#sAD%;67ut}4l&BqU`#H>2AEvM!1S3VuL(jRj1Nj580}qYbPyDxYxQ^(y?^DH?usu1$d#re2%32%MG1j-ZPsR?)q zQ%8RWC;Lo?Rb2jju3Fk$mWy|Kn0Qcn%nrU(S|X}!0|YV( zGM?uR?KNGA$A9j}DvQ$OZUIFJeCKzq>rmD4V$Lp8w=71n34gFiE@g9O1-FNtLFe_u zalOAfanD5Bm}qNn?ur`$-_&4}7nj_;_|jhV=d*tlJ7s-3nB6fbCZ_-BqziHd_lEuBJL98~RE^rn6>0cygzd4hM z-jv{fLg2D|FUTiPg--htCDKB0`#(^k7vdX?k=Zpep@e^{6_O*4y(Z_dt+VFOkk0uf(I%|MxFBLw9GtGmA?b@vfVD@;n58vaCX9!$N-g zTg@p#1#Fu$;=g?`Ap;6km>9qW$zD-x+d$`w1ApCq(9;b77*tK|qYdI!?pLO_x67pp zY~$qM021}rL-hp2j-(pkp4t!4(iUGVVk4aF;q<2D)k2zy)TC*rt7#FSis?#|z81yk z*23lUX(F5{#zmerXCgM(Meh}WWv@1BVZNvM0EjH!U!r0gtO2*#K8uxf9w8iv$wp>k z4#JDkymZq-rR@0}_$en~T_jY6pMa&1uFlYv;&PT1jX z*)(B^%h5os3EgvxedktsYpYD^L%85JO^$xUKw`4qkt2cR5gdBrpP6r!JY)`dqzs~I za$sMlt6|}phJ+MBu}ea+NxadcvlCpilK8v!>03>$#@eihM zzZ3>Eu9lgj#!!|IsY(ER!ZaSv2vf8xh4@VSq)%Pdvj?#X05&4V#?jyq(xo^4S=bO} zQ`WU`V6%8&g}C(*FO7BNKss64e-q;9XC#H$5}2_VuR+D^)~}5#Q189}-gCrK&MZ`& zzk=cQLFd@hu^!%C4?F#2*sOhrxVwiJ+XoM@ARkg;$L~q-*&N@Y7Icb4I>LELHgVX+f7k>O)Wj zG`Dj=tEO`N!(pX0+cN^H22QJk4PtHWN(#Zad>$GCtU5sVLP&phD@-2fr@;PE*G7#jlA*-~j#eDKr2!5OGs8Y1lHlJYw); zQCo53Yi=P3G-SM{#=<{moI<+{;Ze^A3ea+lp_Y0wOYW<$y3Qa%+j3psPnr}D*s&Vk zZsBW*KgF=U=q}Rc8M*^X_~loK&vWOyWiG-CXrJBzyt@8)z%DWx8X-MNQ%}pT4kHO; zrUh6}B+P%Em2m0m(QEojLns>XMywae2Wd4XP0QG>@4^D!~M#^h0udM*aZ0 z(Zn^h(MzE-Z=Z$fm1YZ$4#+RqqQ#3&25`C&0tpGDoSoKSJGfljBcMI8$_a+VSL`x z(iKBL2enk5o8ed<5itI`)o#xOtPaWrTFq~jr|y{cKwO$?;kiKyE|QlBVfj{Pb7n7F z*d%o8+K`FCqgb>LJ;xrq?z8#)f1>?Hg!iEQ^d=Y#h|e&h#Q6A0`)~ma%YAPneWlqn&~Rq|M({8D(+o*=B9LyPk`Q9 z++tX;Ovch1dTSEh&YgSn*$L%KLSdoDzRp~dkvt;!ka?icT|IQtw+_Nyt1_B*_{4B4C-!^s9ozl&(#FA zs5=#Q5Su%|_l)^grmFD`P;XHY=K5YH>qJN=w2;YtJUz%>}^YaKv+XhnS zDhCW>qzvKYBk-pX!1aHu7nJ1&@EB~(b&MuAig>;NgAj+(?RPmwLdb(>dh>{Xa-17? z3}C?bF@SM?r(wZyvTA)NaXp?J;h8)0>S_Ca>fOf!B&G)RTss~VEnqzPvtkAcd|Q?L zr>wX>V&K!0CK97$$1ZACZz=nn0jQIN)WCTd)&{IR3WA^%EE*Q#sQXq?uIVO=i75@- zR`ei<>pZa`6XRV6gUR%TE8ZPerefbVOi8^Z;@_h+<2#b+rlePR`Z@|H2^*S)24W$- zq$>5JqhdVnnDNFH3ilFXHOHIC_Nji9RT4hU!PZJ23|nw8|80nmwXh%f6tFQhh*&aXt;9;FoxGG zrI3*%o$zJImhP5qYjrzE>=e(+2^4tnuUQ7E%Jz^do$P~rVncqCAXRKU^Cs%D95 zzB}VZUnxsN-znu%ON0@=kME>4G9x$xQ(;?lAq;y89fXhEKfc}d2uP9EB9B~#J5nGS zLLTSEGjRSL>M`ofASVu$tM*Pj2i!&i%4H&@-q~(7$brFXk_SQ0kT?uhoTcl>7kkue zQBgo0)lEO%vGQAmtE=`npc5gk&@c{Gc-09K#gYZ}?l^DGB$}NQCsdU_KLgozWUB!k zyN(eDZD+vFqa5WRz~tZe0f%@(y!Dvzt0F0p<%#D8Tc3c(z`z?>4wRME_;cTmKbNzo zRg;GzQG8IavNj~G-Tnbia%YBLMzywJ9TgY$4C&~a-Kw(7T(qBXEN`?0n^3m|=mB5h zQalo!?=3@2H{owbKwJCJhz}1~l`Iq{gb9m>No*G&La70@eQVQ_ybcF&4d_8|GJqlA zj|CcHFKk#^3tW>xg=O(qyH`j?Ni%*$MGR^;Z)ODiJ%~UiZ3eUw`v(Phl2vl3b52(uH)VX9G%3ng|b7OH{5dGd{}& zDGFP}ZVtRkoU2FLvlR7)z8gn2ibX3>S*5vPO#|Msl8l6U5g94peDLgOXM#K*DePRF zs>_puS`ecNkWTpemj`Js?^u6R6tWmvjT#e`sbltvaE^#m=tvDfYy!H0F*a6{=oF#M zKAH_E{F??X&F!fEm{a6PzdKU|g4mzKxBc|0i#P`FJ}ebMyev@KEd>-dv3U^N$9>;RErB#7E3)^^Em45T6}lpg>lGfTSG{`;uiV z?a=3+;t`Q+_p1vOW%!YSj%xhfhr;7U6viCa(?ni#+ZU(eNk+@sAGCE{== z4SfXQP)?lO(%jP|^v}F3Skh~>xModuPHo=!>JfaUlZXi5^Z9r97tK_TJXjX7(yy9y zW%RcG$sukKsu2&nRI=ebRv0%AOc^ZHk6|f?(Gt9LP6`H;-r=yXX};OdjY1HZ znVfnVZhXtClL{V6-Ft+tfy{;~9GPZz#lrc>JDEHs0!T_-CB+5fmQ*dN4OZmH-?e7) za_KXbXGnL_roleTNtbVpCZ6^g2OZ!-NF2~12q=Ih4(>X!E6~Hng-w5mQ13^IdOsph z226_)@ncCIHa$-r^%D0B+F?s*ZumklRyu)+HrP%9vVqCxoFlmvuom3k+t`3Ky~39) z;Vl)f@LoE13K=BCw4r;DJ^9{GTt*L-sq7BoSr8yz2JRc9S$Dxm>Vet9?Rpm_pG39r z;>^Uh1y=B?BALg9;4_3bQOU#P4O&r&q)^f`Z0WR=e~O+z>A2{2jrB||Q#ZieCv>%O zFtTa?$)$!m2ftomiy0n^<>h@SYAv7huAtHKUUHbT1lybe%)4&BK;_zW@-7K; zrMVw70`3@6;oi|rGZwLUA^&R144I^*O75Cjx5yUH@#qj09hydZotAz>a z8XgdfsEKVKGc0W17{VUV>0t?Gt@|-*zzfwK%7Z8r<=uJ~#7ITr$`qMhujo?N8PEW_ zXL{n)zfVBP=-YCQvM`BzMgFj;F|NsvmSPDEy>!~Z*9(DKv*pGU0J{(8hCikNw%86r z=ze{_3)X4w*dx`t(vYH+EEd(Fp){O-v!!(%h!P(Yr}A+$S;SOzX$k^_uhG<>$jIIiN|D!iy2UG_pWAZ=PW~gK zd}u)z+|L5OhhsC;6Sc79`^BWC_T&+(Ib=&R(lxy>E<*W~?c$OIQd&<~$RNCF z-m=mU%IDUe$u|H=Q+_@PEHj#(2ZL{M%iA=m0MybWZpWkzX-5opevi1>={gP$C=SOB z?UZHsH0+GGehM{Dg&vl59Fyk`B>B57SMtZ`>p<@@UT}(_HJB$=)pGcKX}B;jj04H~eu_(>^s%5;{gVB;hM3*t;`5AS`FLxXP1LmThkVGGoqbY`SryBd- zv;)S`JgYwzC4;hOobaz$$bb6h)s!+&L}urJo9T@6$|n44y7MNklZM378%QwS$Cs=} zf+6Px<*t}bvqO7h-1|ah`#4w(tJZHY4qtVxUE{CY@6)^)J8X+gD_Bht*G+!kPgPd{ zx-+BX?=fj)ny;f{XiH&O(X*kwK$vdUc?(*sAWm`B%7dXv@lMqO&&do|FH{zs9&Tl` z>>oaU-2k~FG$L=^R}CV&h2C*5xaU*OqYF5`5xdSnS?=yLM`P?TN&PJQ$s@x)+xmZse(RdYPm^|w6DtS7?^iLIq?=Duvo02E~A>w<$ii-ghekd4}5K8rOmI(4jEWRIvi7HfPyH2XbE^hT%o4~ENQ-|8TAf+ zCmbXLf(i}!LvGi8E3~h)`)RplO?TN2V1BJrFVJ{&Xt|01T)H3q)}+*r!!lye!6w?A zABzWUrF9qn=6~6;Ar?ALNl`>ZL6^W_Li^hvNRjx1fB!w7F)?9_@nrb-hmq;*Nl%6T zY$zOvXlYzP3y?pwD62EUE8j<--h+eTI6Tp<$$$Q3`2y-Z^Z7|27yfr%~ zXZzhls!FG_@O80x;6lC1>08|;ZRKX=v*h&qUMG#oI{PDR*+i@2)}s^?nuJ;n!*K-E z|835T^D#53IG4%y6wA&M1*mG6qf3(ac4d}l9M0z`Cv`sCL-6yFS+1wPhZ2>C?`;CU z<{D}#NK*9;E5gB zzIu5WPFNzQZD#R~e77;-+r%pc?o4J+h*sl9vTT3j2IaTXEnu@0y4Q6U@Z={~dxmI#<{0!TBYRX*58|in?X&4bjg{^rP}WgR zqV+nV`&Q?FZEYwJ%AL9{J723370bSe>33P|Un7R`ai+}-b(%9>_7&-_G z=kfZyqSO64=aCpZdMQ|f`jHRbMzUAkR9_!YH)R+?O|n0(vOIQ%ndz^Y{yQ8X?k9O z6#pn6^{j140ah@$j5-ArE)7*9xYR|T$7}3%Iu_zxJ9l$S2#2G`Q*{G{mTk4pKieH`A7l)P#v#4K`s&(i9AjpqanciqQH_`Zg z?TS-%fyw+)I#NAId|zv1}0z@#rAS07YBBofmjQWJmwT!s^ZMghbwBtIbs&TQ$j;0dtxLBUH? zZ%4Jw1Owh*@DJ!5~MIV^mlBS+wB_WgQU{16JC|e1fRRI)0ht2Ft4UP_U=mQnyZ)YPS&~>1- z7rwr-f&pX{&`OvN!a|C_9z3^W&Gmyy(OXaeqFN2O9f4c|kI0|x|6tHMK$&QId>=gf zgcp`OZ~%Gf(>dt3jFu?7nCt3h9Ym9H5n+W!n*fp+-}@aoV`BT04rN!IG=H!_gsiqz zAf-H(Z;btOQ@9TSpGWfOIa#P}w3lQZ@ zs7ZY}bM*No#Y-5DhVXFMlIKa7m(Lk^CWNL4`uxrmul1Y&obj_$eMawhvUzmj)T|~K zC;YD^xpopCEw@;Y0y5Qhi(}?b8+(N7L7T%8WGDJler2MMsB(DIA3@M3<8h%`ME2f* z(6>Rsk9J!A;8G0)uEGP<44L}1qW-`6s06&>JR*k*VKgBi6tThIxa=k#MMLZ))6gmITtbU!h!%8QzGL(I(iVGE!Ux= z{y#w^pDMGFKCXoiHpI<@a0f}l!Pc>*v0Vc8Lpi&@_kJt?8A*~+_~W{8khSCus0fWpEbm>yw7w)oD0cc^=G4)q%V_!bZEIS5HqBkwtJ9;Q_0p zDdBK#rN^m`OS;lI{7NW00CWbw3Zn6g5}^I~eEz*Su8K1aitO2Be5Ks|FB`~r=mK*T zps2zd^C$daE6#-kT|5P+4tNOqH1Hashn|eVGNY}UTnUvyy@B7Hb#N@$Sh%x&2*l%R zUq@B+Y`Iopldp+vKeIR)FH|AJO<13>_7b&`{^u@L3PoygN{6G@OhPI_tLSW?@3E^B zPY;VCdFtPfmH*TE?=4i@p(SnJgdS;Tv1QKb4U8DOF&Ot3%9m3;jILeqd?>y zEo+u8Oiq1`2M4`k7yu)RKnM{NR)v5*2En5mOnhjkrEok9nIns5dJ%0SJ|-!rqz()d zG0CW=no4eg-SF)txxYO3Z63g|0^Vpg z#BKXstOqER7@Leis~*E^k&%(wm3GOuUMHPQ3*|!rIE_mCzfX<4C7C3yhNMTK^!o_% zq^^j`@n-y3R-_WwU~S+2)86e0oUzbGzt!^A?tJ^(@7Wkz7~euHPx0IFXq6-JQ3o+` z0mx3&K!p2*4r8@o2q@<#@7}%h=rfY?ryhEdTaJ8$WR5o`1qwizYgQj1Ioy@B5J1D} zaP%ETqrShlmz^Q*?c-BIShBp6%;PGAem$vd_mCVg{{w<E=iubkUfv~hIrBD9BLkS#bZlggSZ2oAl2Y?+ls!hoZ>8JL{-L4b!IGlu=fB?e z^JjpUb1yMQLQXwP?+0fk#^Sis> z4*&fP2v3=8e?WKkD);^iy-&jUP)vT{22QM0xSO7PmMRanU&Nb zLuk^^1G{&JfP}Yki!Dn)(fXI;WQVI1eqG7AQ_s%DuZ>9RLwUa*8W{rciZbq2e$ zHI+?=Mov71OmCO;YCJ+Hn8}*?K3)}z*1`t?&5_C$YsAVt#iAU{@pMC^mXDsJw-wBX zz^!{MfV|txm>eMsg0|m|zdtCnZO4wx)R?-2bm{uaAYI&=6P;KaZ?9MjxeuEAU=xUx zgY=taqS0c;P>~nvl{WJNHv-Umr6#1L zWY6-ShZ&T^^wDe0{gT|rN2X^;)A@|W2d8JmwgAdsdM6vN;s%s;7`i=-syVl0BTk_f zbzz+Jfj^h~Kz$54&x(?{qf)NrIF*Z*4#9Pkj`<+ul1GAq0n zhBxE@g!6X1DbF~UzdLouNkV@@@YQPw?y;tCvap7k50VMu>g_@BnI-;MKHx zf_KieWU9xI_#`86rAar>Ll^H_8_ldDIR}6NaIaWfl;b!rF93_Rh{3b8qqppv3Ke?Zx7_7IljMvWns+}4M^hqw6 zWu##r?n6{DWZl$QTmoX z|8w@sglM@|Jz|pr=V*ub=cuc~OnOm$9 zsxZ1t81q^CkGIU~d}pUxb7x;yVuc)FF?5%M=GOitTfiQ{L1bv#Ca|^L8_i1KH85Qaj;UF%SiF`mrTXNjeeY7DXDl*(;&Ls@AkvW_qK2;k# zJ9LVkg;jig?YfGiADj&b*IT5EFNDX@HM}OxGxc8Q+Ztp^A~;5_Ee3*o^ybYW4_%Ha z11()$>&w-s4o;^-Du>&!nexXe4QC4kEtbnAFMZ`>12DX{J-XXoh;m>DjI_4>6vX~) zXq{VRZ+%AN!kI~B_}gTSgcBiRa=a{HbpJq1KVL7VQ-~}`*QDJJ5zDU96 zu^`6-8*!m>#4!Kl zTgQPmH=Locbw*yGPe|WEJ(U7=Kb$G9;ELj8k>>rwxHuUv&AXOe1(QYCzRylu0kE(- zyLEJW1|Li8-2!}}E7TXtpqPW<6EWKV7F-3S;Oda`b5tOmm~TUbhDQ^UAC7oHMYzLB zxLCmYU^0pJE{qLx$7BE=$2W$XWik5W&TT;)w5L!*pnH`HHGQ^>Cu$9}aB7#I9UmL> zXaDGel49#BAo`G5Z?*vC>{DfnV?62) zatD696=NrH+AOYfSh^1qwe351O4^{K=IAYb_3UW(+wzak5ZP_{24V0ZYT-2c$gjBH z>F4VWYljs85zm^cpPVb*L=I@&3vT14WS+Yb!1v{xjynBa6@ie5pqY#ZV7jRBs;i4j z`w1toP>L>=blmBpgqjXi}Mo6C_<<62Ei;>R&ut%akN4!~yMMx(xq}VF@coT0R#T z3uqY8aG({ZISLg}JI(_nGd^&_N7C>>#<0-EQTdDXw7+U+2C#5Y)$%kG= zeP|P+$|Lgm?Ck1g;qz$SegMq&ia!+NJtZtq6Ty|yK4le;0y@cY5S~D3*@Km=FiQ+S zT5<6dn!8NAjLC3%r2;L6v&D7bOI1ku$Ajprx-*(iK526iE$ry8=)bE{JYAxqB%Pbq z@iVm=2Robtgk!V2{(BF%SX1ritKyGsLr-2!$uJ1Q=K+;+-1XyndN!BIiYUhS8&}JH zu+7nbY7+8;e?R*!45$432VFAZ{%RB`-9*Dd9jS59GS+|zG3U|{D6CyK?1Hr@PVu0M zf9fGn6qp16^HG4&QgVUOM;#@cFjO=IEEF#)wIJQ3<`Ue7>6}wYF{bB4A(^EIba-e& z(B_!mmQV-#H{aefM6a?$Er(~fL$Y%n+(wi8=SJs#DFzg^sq(EM0y?xJZEhcY<$mxr ztdb&YFlOX@a#bn7c0O~VF^j;QQ=LwmqXXjKNw5a}Uv`grJu6OhsaG+DOl!w0Owh>N zwfjEc0==9j+!j)<&+E&(xn=9>AJ7U_@Y zl{lu{IG}$(E?cA`IW|1)&_oEeTjgsrAA5id%k0SQxGBw%o%%6J*GJ$>Fjx&MiR~-B zX4!_VKti{C|F!`s5$Vy=br1KD88+9MH4^a6Y>ZvSkCoq}PX0rP{#ZZAEW|A99mzxo zF;DDIr8BzU-VZ3ceO0ANLQ1MNR}g)m+?5gY*rIWoFr2Jl6*x01 zCK6Q6OU6W@!$gN-f+pH4l$_6gx2Gv59-dj2W<-Ab>j5mTp?{WvqIK4k3!Uf@`@Dhk^q%}n`cgLJFiWj-_wX3p0)4g`rel_-3}Br# z$gg8?Sb?7|c;*z}n|zUS7wQWZJGoKg&Ebflv5zz?aM%g=jz$A0U#y9jeQ!aucWOvZx-{+Q}d z+j0A+lt?{_CG4VO;}vDLHXp%Jkp1F1l(#31y_)m_2W~9>Gz5Ye1vnXP14V&sPb6*3 z?tz@mvl>eI*;D^SjQ}mjK(+06Gr~ynbTyqO{c8MqyaA|DmI#yDSFu9ohBvTk{n4Yz z=v@>?n!`#@`CG=VTer^mRpTqI6XG0yuVhg{1cGigHoK+>F zDmk1pktT{xbE-+VNZGfV8BUCik%c^D9vrQSheQ?Pn0LQaNBRaXtgWH2LEifh3!EnK z2a0E>Y*E;>6r00_7ZH;P$eS(vX-&5kLVt8<4n7*HmnfcM#XCaT|g=H`r?E z#E2b0t2)YA{IK>Cx-2`pjiZgtF${u2f{kkn1WxHnU649s>SJ_DY@X5+=S8!ku*(ff#tA3Hh0_0t+wU7jZGR3kn`rRjo=mb%DGH z=DpzS4a|ubhJsR*j_|*ZR8`(zd^le8F~QO3wpC#(u|G2A9J>>iBS5M2xUPq*aB~1l zjQ>RUAHK3=J4$-g4F_{bH+8$DaA#Gx?oR$w02wop`g9tTQ_t1@!`ZY0U1Z=;2^(?y z=h26iz2CJ!otiFvukIxw2~aNKB%;d^91Fb{Wg#r`PZNGvNcVqZgx%3g&TSMcAAF8J zAEt2HA(JdMcm@=Cplex7PQ}S55GBP(aiNN8Dzd_DCRHvE`w0vs)_x1Q>hN7cQnDqG zmZyeTj@%Oh7Mm<>APwnuY5PHRRe%N{Q|(UP(;|XCGin#`52CV|?C&oahl)WKZP~k$P$Hr^l$80k+p8ekp2dWzM zbdHk}VDTqz%q4qFxdA7;j`e8n*dxP%*_k1lK69+9El1RBqXd7sXr#f5I~B-}Bh9>5xtc?EhTZUglh}=XKJ_${|RN^cqoKr09VeV0D**$X@`#Ce)_VLR+5&8duNaKeOyZW}O zqlF0rJ?P`G>D{_UGu#x>q`Kf~QRn(_2(q+n!Mj6>tJjZ#I^Ah+P}g`%nST$V*Gslw zwCw&|B8y*%;vkeksJ3wC`1YO+Q?qu-)!O84)^HK_Y3RM5ZpR{`ZzI#!twpeK`+t;u z2UL~kw(V9EjXkzVQBc8-fPkPPU@VjfqGDmw71)3@rAs%7ErJx4s-S`(ks@6{Kt%y% zBhsZu5$Pb(1O(n(dy|u#`|fySygTj~C&wIN@Bjb4wdR^@uDLkerM7mrHh6|gI8b%9 z9LCjQ%Gg6LLIMK{lucN7fe-d#Geb4H&u%mSMo0p%i6kO>*J@ddhM&YB0VJUg`jV>X zDLjcCZZ_Su)uhn}6c^F}N}wh4^Wg-Fhy>&SN&8|6km}bsO*`=t5|dgzUJkJWdhXQ8 zN`_6s9bmOE`|{|VKaY~K>h$pGuZpiV5%G}OOvFs6Uj6gWq`Kb`$<*nc(j`hpQ~QL#9J5)B@`kUAC~{1(ZYSoEsGep$8E6>(mnrr4eE@?NHR+&_}@QppqgQFI$?MmlL$lmS23OzB0FU>Vos9iVa&AWs+-u)IrF#AW$%= zXsj7BI{u@pG!Si-DV-?roU_Nf*QfwR1I<;{A`e4V<4tr!@)Bwbd7iVPpwKG zpNq-ki$J#K#L1G)$=|*9-eb@L0IEuz{|j?=)xnY`DK;&?fRwjs{=V>MpmdPTdTEf$ zjYDKzflSf!)!ON!x3&uM=C=nYQ)JI!RLVQvOJh+8&T{%NR8)58=>Icl&6b7%=H!6- zq}_?T;_yvou-&{0@gai>aP6QR6pj-aJx|wB8ooAwW8gEQg>`;fa4GIg4LI^%&RUy) z#_uS*(>g-5uzaQmk#H28RWgnWSUj2r0drhn-(S(~z~5~7+p{}~3VJl&uc_2L+wh|t zeyj7)8AlvIa@}}PEDaKfu-9YxYA>UO#4A+yxKN5{1 z?D(iV*cc4bW9K~-`Q!-UQ(sEGDEZ`G!L46V6sl$;z=KOU7)jbO%@TWVj}jjS73zO| z!0R4g<8S5)|IL$FlKKG-02M$Om6UeQ6e;3e8{N7+5A&vTuA82k;-r zF*fDsq2VswVm8~KXLb4^#{4_`c&IOPcRupJZF|-5fn(?;UtJpef0%4xSG{? zZJ+=ko}$u;CSdSnZf`%k2lW35;l+?rJsQHM*ZG}%yRFV(NDKm%iIp_!;%-!rWm2bg zF_6aV(u}KW((v}$izOWLD;3}P=!v0WMavw!tq5quchPNa{XNeZx=1~6nLD`iG`j%7 zf+|}K$V>wjYw~_>M2aV5=j?XOtg0;}W-*6QinHH&8w6D)+Na*O#1kXgfFFeB6B*(f z+^)LeNwaqGcl$kcY+)4G=^Sfyp}7x>18NR9zHEGHkyHN`7QcZ+a9_x2bzYu~%>}&g zlaLJvXq)s1LnV%J8XT4!+#Z{y+#{COXywjxZ&r-!=pXx2E~i(q zRP918HZ`TjLn=nHbpQ6i`cUHH2?f`&@Fn2_QSz*rfy~x_c|a% zUg&!OrB4;oE%UrKK3%HfT(^=+QI?yVTa`URDdgM#6_NYWL8T{u$9=I6sI9{klJGD9 ztyySKLB+n4N{eS(6vAPmd~v4_NhX1B@m2YpLy(y_2diG|jF~_STeF(P<=%3PjAt;~ zoy@=fDiQ;cFf3~Ss8?F^gy_HIzg8}>SM z{XKK;$z=W0JqfDAn2;$(!#LVD`LV&dbhB)xMuF71wfdPFMGl@0?W7esn>Iz>HK^TZ zRRQf2`etMpQC(RHuej2-ebkgkdESPtn1^BXcT5oI41@RQ>ija6lq!fnFc#~U;p#hF zZQSK)Qug!uTp0mRdLs8_9C=JW{fai;=Iw%WE?AB2R>O1!EMQAtR&wyltL@;RYy>%; z9iLXa3@0TmgC?B87KosQVLl0`tKf!t<#z}=Oyhi@hwqwExCI!S=Sp2|?IyI3NwW)| zE3B%3LdCzNm4BPgCpev!m)o9rpS17PKxrU4!OW9M<7qwDP+D{p2a2);J-#K{uY@$S ztn5_RLRz-|w4Ou27zm6`nd@-^Umoxh2aUal8&p}W=YCP#IwHDIRo?vNK1el;Pp6)) z+yua6*494Bb!GV&gh7$t*%zsQbKoa@M1%zHdOw;YNBVl%VR;zDjh+pl;ub`MeP8J9433?ac= z@?Dbolv|%$?DTvwm=1KD)#Uh`)xqFPbr4hV{tq305Un`<-hRCEWwbqO*4{p7PJ0y5 zyR_!Tg-C@1A^MkNRhG2u2c>6m$FCTv%>Tx$^I0LY$Jo|h1YPC~iS*fW0l<;u z8$fz<>Iz^Ahh(}P83Jf3AP{1NM@$>7BFxfv88ec1PXV%;nwU_T-1)ik-#+Kz$d+M$ z(DwIM#-tH|;v#PAj2LJ#uy;B&@cvdSQ$dIyqqAh;->{sh{smqE;Q&;6se>pVmtrPfzlLZ4tIe-7UcBOOw$g zgN>(b{w)J?C$fUPiJygW3#_9mt(J>I-8uSF%>8WL)@HuZ6$4=Nin`ZHErgbz)A<%J z*@fvFbb?E@%26)+7ESe4nzIl1!y%@-?^y94XLuR!yrZ>)FaG*~Y%givT|0uW`=WIC zFxVyc{v1+>w=t=~80gQ(k z%;e6in+Uu+t6Xj6Dx0I^pHrW!hN6e2o(jJ=yii1H6Hsi6MQ4b2Vy3^D)ozOKt>@ae zA4}z0(x_!DTXq*sFLOCTD^pl9nIrJ_qH9miUBzo5hO2#a@;7{@owBk8UE8I#9$Um7 zG!ZIkv^u(-VBh3I7r=amEvJ-u47M5@BN?mKdoL5!W~N*|!|Tb!*?xHc{(i7@&|g7P zbR1Kjuv;gl9n{w2hX~J#XS{)}9#E9xFmyE0lfB-j%T#(f9>XS}cx+_`Cy8xJoM8&Q zPS{jhR*+v@C(4dMVRTq98CHvifv4G2-lhryi4UW_7 z-gnc%#Z-`hYvl#FTKz|7S+GKA+oFhux9;f2=Kr>iVpL7mO?b5M=_Etm;3} zqCc@8CL3jM-xa|f?!eyniQuQ|+O6?-W44gK?pSi-#1w02^9(Y# zW!~$((EJ5P+9eQvCY}8C$NXH5z?IW(QxCYx7eg+~Nb^*>r^z<^YE3utDD_IHP4f^s z9`G8lBY@Y?qJ*z+u*P-u{M)<)sUMXAQMhvBsQn?A z`T6m@-EZnr%>!{|qvEt{G;vW7_TaPdw zFSYbDveR|W#*C-C&SHXwIEF!VLv6pXF}0mgkm$v zdPdNNq24Cz*m3vC6cpq|rn>BVefz5RuE$G04OOvTTb8_7laJCwL$gDcH@W{&Cl>#4 zQ81s>+=_-(m6f~+ylPNwA@K^g(`saEbR)AkJfuO`BcB4A(Oh)`RWChkRD&rt3$EE7 z6kbg1u5ddJYj~QKA~0cLVh{me@HAeNV?(ec+mmUx6$QRC4!;?vaV?)C=+kOjij9ph z;9jeO;sI&m2sgqdCd~Lg2=8RbjsIPblhuEYE$;y$l&l?Zs(}!b*x>B>Uo}t*L zJ`Jfjh1sl5KCoa> z$4Trqb{3^X?HgUGH*Cz}#a+*P@5^Z>=2-IZN?a#1%_c0T$`3A`=O}hPGCIup^*aFX z;fvHE!Q+Uk?lQ=~tcp`cvOiqEN*JwGv^qbIMPPzNjHLh2;p2R?a9HojvE0e5M3X>x zq5@B_iupiQp`LjDH37T)Kr^7~n<&dlsy<;Hs~(p1;Cg=-k$jTHjmIh`9u@1fLL^B7 z75_-I6(9#3Ta*_j=K{ZX{Qpw0kMF!iAa`geLZT;odj(WbJ${2yBCWWaWNM33VZ==D_t`&_;8-`$BT@+=KLhO3B(S!U5GTO_^H8W6W9cN0Fs- z5OqPocY=J$v}ySRVJGr#$qBwW(G9QQ+5w~|qyl#tDT3zfc)2+8{9HE==_*nKvVVu{ z6=66yfO8Jf^&`Uwu3O?Jo_Sg;e*^d?ZN2fZFn&j3fxvZPEX%llhF4q()DXdVy#&XK z&^uU=1u0|t{6O+2j)=Gjnha%jgo6wio!=94NYmbHh!|{-;Fk2)r*=Qa_wG@01DAQUYxR7_>O5- z4Ld*ehN1IF4tnc_YuEf|tr%K@D2U1&=m;&xO{&3Q;=9%?rs&^dKGO^ z8eTx)OPdgdnuA=39%rSa7mU|uF1O1Hw7hE0_h1h*jm1bLWp&P=V0E)uA;4*9KJWTcLH7P<+qXKbdZ<5i3D}H!{&w4mj(%{(TjFm5Sz#w2gyqWO-Op?~0a6elis1^385Cf4Ij4N_D6uIUWbt2&ra zE6e>>l&kqZ39tC&sl-@MMKrgF^Mv-~p~JVYXEzhdSzl31cl+NbUwynYcjBQv=CM)c znSGOgm)Kkv`U;;Ix%jwSBnIcSsxg=#G1Q|7m%8H7aoI2HWN?SI8g0`1^y&ev0PvdU zSz%Q$KnJ2F`CGUHHmk1TP!6{BB$DX72Y<2oFyAKC#HYQ2UVNgm(rSiVw^amNJl>4O z?~l9HF5xV{>R1f0M5fMk`TkLQwB$>+w2nqBd->iCy^+$yDD z(7R19>MlP{wCa7*Z_LS#CY0~CU5AVWDx%8@trW=bIY?$D@I~w@Wv~pWQ<8>IP7H`m zei0MW+m`vhA5Oh_?e#F1<0rJZ_} za;eCua|rzu%dYvPwNQh4>Mkc`P0)yRXJ%sJ*$PJCBR>U#vn6~lXB)V4_*W4s7VO7? zGk9Gb@^pV%pC?-vCSHY7@;~itXi{HkTRBL(buKlOD43#Ec0I)}Q=_ufj0;V!CHoZW zFb8JP3pUDl>!|BDus0pj*s^uC~zyLk0+e<7x-&!4# zx8I`s6FxDp-i53d{Ou#wES&>ur+W8&R_S$oC)cY$VVXaojM39$c>mP!Xqqlw@~urt zAt_z~MyG})RBo43ElJ-bW*!VX0tgXkt-vk7Pz{a|?b8*k+*i4v=Llp=Ku>Ir=^g4tS7f(XVP4^^ zVxIBbf8c>Vl;3YOgXq&>GJ>NNjM8SBjIgP)e zKT7BNcsI^9K4g`%l%V_9yY}M?CqFOD7hrgS=-pD)dxtbdS6;j`gfn}`M!J}XgH`j1 zdsAy~YsiKkod#ZMwi0-Ioj%B+CoL?c_z|i2S~v}ZsUZ|VeV?V9D8GR*hN+GSL=XkJ z)kq)}LI(JI;+Jk`I8|y+I`wgga!14MU_3&9zQXDECmp8grExvD^=YHRB(Jgu53p1> zO)w*FxXGg_Q(%HOPeu%63`hi}TW`>?V%aZ(pAt)WuIE{xagH+WZj%BiEpQ)I?x=j? zvBI#$CL?z65$(K92%dxXU`fXvNW=SL=}bKRs6Dx)U(PA#!jmSjxb(&-NADqB{WAUY}WJg~E3^5cckOy2}K0J)w0G*&{6u0oq`!gzvRWBrIZf+h|YAb{TNb3ZP zE+YR%hr8Em{aSM0!X^9nl^&;ln_VkVO?yD%K|*5;pQfH7m?V8hZ~8z5~vNPdT*-fXVTE6 z$(mFHT?(yv%ETLVM-LIvbQn|0nQ%UW)w$;@lS%E4kAk1nsin+b zFc(n9zDN)B{?jg*71Mc0Y7360csp*L6Pk#nNVF{P10YKX)9kq#z52xptRKQW)6?ol zhc?(195l)UTEuneCYXSVDdx^!byndB#Tw>V^3vPql(aKu+S7et2ykLtDw=+lv8sok z6CAH!%#dBsUh5rN6_SerEyx)sHwDDr6nM$)w17TNyqp^i4iAY%!2+f<@vmg!rt^jP z)J+k+H|rcvJ9Xunl}i~4#!3eU2XAqsG@uBqGl_z#bz}OsRson8ImT*y1O4PK_ zQQ&9v5PXM%4OP6x&p)4^crQ4yIy(R7n#pUvk#aekXrKW-BJqr_)x9W7Uu%cShA+ZH z`bKpc>>E4wG-0Uv$KQ09H zMh70xdm0J7AWT}wMhD#ILGadSl9$DX)&NQ8&$q^J9XU&Vito8uX%FTT0n&z1c zaLh1YL(=}CB$7mL+cl~Ie_`*+D;8@o8X`M;98(7sSVO&01%NXER(Tj|Klo_g(>lIE`#vdH! z+4ktrWj05crbyl*(t@J~6=#^DjrAKuMX7~L){7DHetvMrw4nPx=ei{v^URHKd4hIL z{UA=kyn-5?ySg{1Oy%LOsqO^{QaZriW4QZD?46%QYXJIysaKOjA_^a{Nhp`SlNsW= zz~z8qaP{rhPJT}IZ=@3>At}aMo_?fp8_ozjlZ;iO5f`_Gt)<{Q*GiUoAYfoQ&wSer_7^oS$mEn;^v>N#~5ln0eGt=AN??4pv8 z5B1DRDnZ4Q4NdRJ3D-JYBdW{b6PK^}t*d$K^5s8KLb^ys%y{`YCTI!+1a^|SA^(aD zi&($lyl1ozEh0==7*#w8RA9b+{XJTeH-)>Ka+`8c`O>(}qa=K6jMM`z_ZZwiNIM*8q~hy&b)H z$XJy95x%K#XZVLRAFU>3Qmaw#xU9kS9tcM)nKgx$487gLFaP)qBAvDtU{>>%45iRy9JdhT~ta z1qn@iS*WZR*a}AZl?6B%4BNLC8eYnz7i>DYf&q3 zWQw>M*~tnUY8Au_$wWsyf*Y7;;KB_phRN!`sH>(Uv{b~(MIyRU3<@SB2k(%gme+ia zJ7TA%hSvB!vStDwz={>_0kpIL)jcHO%V-(F^rw-|SogP9Wo)=N-MV-a1%u4@4P~Xp z zN;5Z?<2%U9wrM#Hl;b0u={;?Ew?}e0>GwT@xy<&p#7jjsW);vokD}{x82?M-Yz3S= za<=nLcO6ySo8NpEEKkR^W)U9@7kIxM7#jx`VxANS7Y(sLrhgE$x<*`cWG`Ylac1|s z#JztX;fJpL2PZNPlaQJ-C{Kqame`XzK^{;NLELr3pMr4b40U}cWpJOw6)=WEk%MC< zkbT$UlM;ckv}c`z9pEXL5t6Vs4pI7#s6i%7M`r#_v>h^XJ;@znxgUa!i`k)3Y$$%j1R>z zX^@Mj^Svj2XV8kFt-({quZ(ie0MOGF@Pq($At*LMVkfp*p3qiIy@{J!{K63))&y;P zV?mZlu<*tTpEnPDwn-$TYSi>8`0P}C64fGHWW}%JrT`w}%#yK9UGf{SHq72FUa#Rm z|K*)*(CzlM28d*=nOul@5m7>X(rd*PGY>($pM9LsGyWx#WF*Sx-2ok{qT5GoiWW6; zT#|$2E3Igp27kXXfGso}SF4N?nrzB62VG}g6-!&`XmLBbf2!KE3KA8KYxB^DHTXbX z8KA{f>Ib0^rZ(qOJFfh%K&MSoHfW2e2o^JVX57bU!Y!`-=F!!nl8*1iu~Ni_o!HOxY9DjF!qe5=PC~-L zh~l`rc(qy$E&nFOFUV1%s!&x0g#9rTMS;C-Sj3eSf2qK7`fky4 z9Wl|u+^}X07et!QqrOi&oYV5binhbp6k`F$;}R45;uRODA4M2_eLP6!DKCB9Y;AKF zkCokH3bTpRQSi5?j#+}fV)4){C(Qpd#p!mB~Q{l#CzR!#LrOwtjnue1Ng+JZ)!lBO2)(8*7lDCkinMbO0C?%D((E7hT2qf?^N^v+{1B9H~T5` zjA`5@RPb&O6}!tY9?Ky`ys{6p1z$ZuZV6A1PSvqGS*6yoCWf>48N1U^amD!sIRL0V zFquqaN-pj{n9mm+jIpf4htJme1ta3n2=pj5Y-y1qV|P3Z!_&uCrn-$ebk>5fg{bA$ zAsHo=(aHpOV_gqS*yJ}JWy0yS%B2ijQcjS^9`0d&0>_HU#F>*5+V@kXWw3xZE@D19 zfSn!L_n_6wUvcG}$k(o0-`qT+aXy7wP`hyT!(Im!AOr%864IN^NXE$hqs*;lnk?W@F_p`v9y+%N9ledaVD zN;K5m-n6i?1zpGSlORnpv5hGK#OXr$uXkvnVL{F|b1LB)un!s-^{D~aQWR|mjLvF^&K$jYv2u`Ku?ZSLq zB(@)Qq0o{Co!^N>1QdgAp13*e&nxtjyt+)Qs;f=9=8wTwr|Z5MjVevu+hczN&gNu? zk;R5?pVkn}dpa;h6ugtIN5BkxA8!m*#slKQvuorROL8_|!qEk5nsm`opr$BNKh1MF z1pxT|LgOBIO(+`I?~-2t2LLi9$jm{TDRI3D1g}uN@pXLO-atgs^wL@K<2t(8HIwPn z8J=~?xzSY%%Tk{rBHjrH(tA!dH?bdwTdM+v-$#3)IY77M-zA z0QfoH7y#etC%{NQiH1ccOD+hNnfS4(v*D5Y{oL}y_ylw34Ne~m7mW>B0*X>b)>2xtC}J}VA^~(< zTv@YdQjeCB(nq!G%@u>+W-ixH`R}Jm+2~&1?=bH!$9PM(Hf%}v$&IB`(;Rp~h~spr zy!U+)j7(zB&=yA6blj*cjs?dKa>_6<2=n8HP18M@C74=VUC$x>ur_8NDUY*_&H))C zeq+hmm0Lf&uR3w_Pm2rZG`^vrd5AQN<$4Umu`JLMqk9Ai4gAfft|oO?)zx<3gdNe) zA^{2RhE_{3)w~7~1WS9mV!O4ZyI>1kWt!$O(dq%}AWhIu*bVbX)cHFlrfzB!MYPQ{ zgM+5z50V)~Hujv|n-9UTpdLigS5>1HD(&smlD@jJC{XNST3XeE5<1iDQGR3a&dC$k zkUb-`&F7l;3xM5;n#)Bjr&4^UGghVB4eI$}Vei(g0D`9%HOZ55!Eo z#OZ$}D=#Y&A=}q&SSXGMqAJZj>s>UD;s8CR%Q8xI}+!9<6M)k`AhArJlOeR80?=9-%kJ003*z99n z1z3t|IAk1S@%PzdoJOhpGngh-A~re`%EyTsFxbT9fP|7}usjTbcgo3^1grym;l<32 zUY)5&BiAZkJGifm@>whJBs2i z;@I(_c7DIv{;^^{1a;mXt*WxVh6jZwk=@(ibAh_zO^wxc?5aqKcY$gz^BU-N9E$_~ zLp=?!sWa<3P)2UYW*a=ElR|4O_-4#vXxxRC2;eAUqGyHLsvi$=S^i283nY{H?Pr=g zxw6l}vG~8j7Wu~}3xwe5}$;2(sWyR}$?W)jsm0s{C9>mjOM zbn0D}V}jID0e@o227B8z{*Fr`i#vo|Rju-~AS8v|D*+j|+G-nJ# zoM{DV{o$cYqoRMwqV~?u`-MX|d!XF<`yBl~zHu_6cG)b3%`vGkU)-p}SpgUbkZR2e zAd_4?Xd|kDV;5OLDbk^r1_R_yuiJ8;4}rZRf%-OvC-gHQ%eswK$}p;b5{jf;@Rvx1 zAjMq>NJ^H&T{0lOeno!`X7)(+$@xGS2Q!GRgAk?)N&h-$tJKpKCl>~}_2!Oy)m#~}B>tPadP%_aSG+;XSm4;Yssxm=VmAVc$&~OW*;?<^7BgLb(HJRQd#M z|02ehYO7OWj2C?|hzJQyR+N?A9*lG%%I52z)GlSJ-z-rQ$}{$l?F;g+ zKnJF4KDr*sZ!Bw5USV}{zaF|^!VVr&EqiW ztYFS)O@Y8h2U#p{^MwYq{{9rnSkR?K5+dLnS?ip)KAf!y;9{0^IAc2jUuP!Pb;FB; zI#nh7yrOe2?7N#OF&j-vPYfn zB@dHOG^!wspIY78+HXda&-jo*LSjm^`WYE7^&rOp6d!d#;JIC4x7s3SFfy#r>KyWy zlA3NCseo&gujLN`Q}&G|I4eh4{NvOu+~3^1Pzv&!YML90?OwQtvJ!W)C;2Vd1}*F+ z$ad9+Ldx?B5q;1wa`4UJu|;b>_&*ChtSz77o; ze^5xjvY@3wl|_saNnB2NYlXz$t^|~j_oIcIm^*(F?g%_8$mq*)Ue=SDRnk}#p`_E$!E6|azP(nu- z98{V9#Fyn{0S9djU@c2z9m8v{S-ayxA6EqB{6;4@P|+UT(QHu_>|cC*cHMHK9r3F; zTOkDIX9q50z$!8aox!)7OP(T8X>c`}BGO@^EJw+MOBkQXwGU%GMaj)?S7rvt$@ZM- z)$vyrM$=z({)HmwEoear=mC&r06D~2cANx ziaGk){bL&GKx2jcl;2DUh;rccF~x55|2Q69P`A5kpgP(072Tbn!E_N+@kW2ob$hn= zT~he=^Kvo03c{vZll@2#TPLv7?O>0~H{)@I$%Xw_Ui#%+5iI|!Md4kM;b`M)&- zJi&=W3g6x@Om!Wx*`lALqcHMHy-RGGCzG~PJ18CWu1!?kt6Rib%3$}^H3@AK$e4fe z$FylIjB3UfsT@Hko`j(g3`F$6Hio0Ecfjh_rws_G)?}y0xY#&JJ;{&~aJnuXQv63x{KMN^T0&vPR5sZ=I(b0Ga56C4RUM0vQviL( zyG#_yC%+0#<{``{=aa=PL?cX@hnH}(BUk$D+P7R;g?cyE*4VbiSKYJ~`IO4sY}dLo zd|>r1dMF&g6{#jgsNGIXtd!OhzwH4hYssO5R#t7Wo$A$d(y`$ z>CLeFaZDA29i-Ji|oj!s=mG&ab{JUigV6M<=clHS#Fo z9UC`Pj;da-5}IaqUAm+|i&;E4;cBxj!)EOX_a|RQQh=mlnx3UR%gh?u)~zs+c>lPb zgc$30R6V_&~ z_Yku>MakP`fnuOcz|99wjKc0?=4|&S*J2TdvNsBp%7-V+yWS;cLN-Ni{+O{~%nLrC zG#s*aydZetB`KY#%>`K}^xcj@wvn5EZRJD!1=}Yxd=o`=GDX4_?(FZ>Khhlb+t%B| z@e>Tjoyv~WUJu-_ippQN~=tz-mw2uanHj~+L9i|H$KI;%hyw>86LArne4X8bY z5v5JtvCbPb zmOX1a!$8<&xNx@Ix99w&Ep;YpAC2P$rC)Sz#7bMuK1HHNswn(~uFws-BHv!)grXHYfj7Whm3es+F=1s+kfOjFYB`!=K}X z-Z_%5vovm$w^t}kG%K9OyW#Dt&z}5h)}Wi!aOA4j8AXE?4BstmPv-l9jkV0w`Z6fj zR?!xejh{oCz9}M9(JJs7VU-Yhw@ey)mF;=F7ji8YuJp#U@5_w0XifUdx9>_AmUgw) z%sFaiZCx$f`y>M5=5{ElSjoa>K%I~uRmlz48%j@m>U-t8BG-xC^&9u(duROeuvcf5 z`r)6e)GzR=!%hPJ1MDQ4MC%`6+mwO#qmF?&c5l~aIa-%Jo5k4O;eDg@we$52Mpb?6 zh!suK7|i$2?-ymZkCp5TEWi(3ukZTLcld3fLPOmQuAeNX|A4o(>t5=oP~Dk4o8h_o z@Nm`GsNvUN!}Jr`7JN1c6}P0V8)FqFM@m@Nh0U!3jyB2t6!bH(kWZdvi|YeLlM>AO+W87Zxds;G|!cYn}bnjOMZ?s zuQiOpxjam__C8XrsnJZfaC_6F*==K}X*kdOw?{~lmQ7aUHzS2xK7ItnN4`INk}R*9 zd5ovN8E4v)`T+~GL? zVm>1MMRksBO1$VWss?bylMh~AT|Ue^Z)n67=2zF_zbo6hbLWk~Kof_e4a-G!s~S@4 z?7O@MFWq%5UBKWeuG-dp7@4cIrP5LKP>XI;8u#-?6@Ev>@J}*2L!a{&KFrR(jY5zy zTlMzNV`zXme|>+cc-_swK+u`R_^>+kgEOP#Y~&|?1W@ zi@S|pv%v{zc04oi-6%P2)r^lVEiqU`N$DJIik&bVTd;B`hJVUUFp&qM9E0`3F*qM& z4pFnV*$YR`;kevVNt>9Fm$G=iui|91q6@#}t@lV0!KsLxj#HuX!-YMV1M~|1$rfQ` z^p(2%sTd%{7w%Y+aT*aKpcY3V)njtPOK|?Q-J?3d2|xs=3nnIGkSs!r%WfLfynfkl zHx_HgS~B|Mf=h$%F9~*l=Jk(+RJb!h+p1mC^+%YN{tZ-AgJ(0^$+aKi-y!pqqjys1wcZzK9llL1LEbr+P{>mQuo(V_ zG&Om~lh!_mni?C9aPApy51|!>yz;)Z$Al~Cl8(&qBs}%B&u7r6JnZ*pNmH2ycNrj>JF}W`X8VdNGLwchBJYUFYviAV-K%_)TL0Q%`{_YM z#9n`Fe#Wymbarb0h?qxB3 zEbG>WD;NYoY?m0nt#P4;dwbGC2+w@@}kl8O}##J+=zG58DJ+?L_7EvcC1&={__^|?ROV4%+9TS}OlOZbrBP>jqXT-q`Vs7pS?yXA_Vs;@_D;PF|>ocn-}}$oH4$URM=#%<{~n_ zYybC|XWDJ-q>|?bPVALjryq?7ojicUsf@+9tIk7-NTXw=Eh^X7k;Wrr=goz;@h@+Z zM=|E>z!`W~v5G_P(}2~NUt7nLH@Kp7GfMhqkSuv4LwVWrbDHbB1eXO3Kzf zybLdS&?DA4BT<+GJ3TvKKWx#ED}q`UU7(K>nFTa(%P058l^G(;=6s#|*GvSA_npD7 zH_(?vjTAgb(H|_(ej4yt{pEJAs}g$4Nfm6vt)8WZ{~S%ZlaFK1j4sVifA0e5ck2vu z4nD&E5{14OOJ?Vt#K$i^6I|4i1Q$|EJPR6eFKvd#%kNK^l0eCe`6#5?t?wm5i={U` zH}*fe-R*RJ%rk*=@2ht@e7ePi1q&z0BCiLv$h%qh={(0QhruD9XY<6V*9Ul0+H&jG z3bHu7zFs|z&OnUqCpNDd5`vt52+TH3Vcizs&QOdWFHkqv)zD7RZ~Wp8y8N;`DlrK&Mq44xz zM`c7$M$X@`jF1P8>%PY;8RBc$?%%$5D?8UHQm_q)+u1sA*v_!W322eA$_(Ys00Ow7 zqAHAq){zOtkD6E5PO2A2S(+u5RAP*QwxX;^~7|he@5FJ!H(AkoxgOA5|b} z*LQXbJm)$Ql^Hl?isf8)v!P`6UzkdDtFt12w%$IoUhgAJ47Q%?`h|?c) z`~~c7QLV7>*zPo;lsMsv&eUj&n1@*m*|r(xdnJ<4_brbkOO<^ih&oe)sP42*GbE}n z90(lj@mrFa<=~Q_|7K`jz}m6%+h} zgGZ&y!R2|3xXZQU@F-0&dbK#Y=s{Xy?C*44Ues!)y%Tx)*sV&c(Gh~Q8&FB<^u3Yo zZKIJ_yK|!h?N|)uZbZe~#VV=~)WYTEGQVNPSUwAg2kZ8ZK3 zRz9Ky{hj)gJDn$bCwh8CECHxfPQCmiRxmDBrf(0+-_EhyVGk^PyoT(USD1fL#JW(< zdmiXBQt_Z8Hghn)OFC?2761JG8M|{$Ic_vmN!V_00t6eFnY}Dp*X;mPZX|c*gu=_l zVySL-fGdSKsc9o8F>?3ob4+=u?y^zgfyg^4w-szg?vaPKGIl1=mcoz$e_@d&yY`!kSXT zI=HS`%qnhiA_)u^S&3tsYC3*kG_wB*l|w5_cD;7712=NyJGSqj`{Y;?N^xds3&;;G zB7Sm)w$1#C{-7=fH8<Ff$L zsQIB9Sb3J9tN2a{raoHTI7de5b;awjH23zt)yY!@h%|ksn%)E1iI{hZbRMm80wq0I(@8k7yV{S`HUOK9+UTMEiDbGd+$5E+(ac&7M9$G*r%Rnw%IWM^@NnM6@-5+0r`5n3VbqN=pbG&U#UgO1ld2LWTT}4TSWHx|DT%Q!L}pxWPR|{WYdqJwlIG25 zh&XgwWMOpzulnP8JZnZqMzk{RVtO7oFKBS6vjivPAZ4J=v%v)x5S|C2KI+DV`36FWEpcE@dB zuuLlfE>xPRKRn;3V=;=`Se-|xO+M!H-rn7dr#Kp|P;QW!yG&zH~S9`cATgawyH z1r^+N%A?dcg?nBvYDUm$&UWX(kM6_<3p|9JK@&bU@AZUoTq4ddRsIVV6Qcb6Am1AA zjAn|4eLLqcEkSC6WG}^3#Yt(Vj@w|R))S{htDF&5gKK{=PEJUCR?ki%TX9h(xRcG3HfKHf#5(SJ#gJ6g<=RRjiroYIK2p zf2U~KQ`^1u{Wy{)>i0`dY8PUYxgJ?g9NxsWyE&oX5;_C(B&Fe4;t=e8>sGZ2k0-Qy0JUa;I$+Aed-{ zS&&-U&6>UcgZUIX4wWY7RshYtkL5=f_y`_YPe!W%z@nn?bXKABr$cqv#dAy*L;v$~ zdwP0;^riaCmR%OBDSa&*1K6FByUp7G31au0U1^qqt`naB1kk4s`u-op<>A(h*06F| z%MZ3HOx9TV3&-i_>X9a%In#3yT;G$zc-J*!Tx|EJzqZd8b^vh6cKP@_h0az}W7Xe) z%EawD8lCk^aBCvaV<6<;$tHuKAGxJ+JY!yRgtgG;-+3SkK74(TO)YA(CStvApnxv6j z&SO**wr6k6ocN>kH6nsU6KGj`N6W4U2PN03SKw6r@Xc@^wFTiFW#4maaJ=yUi^lW% zj^f1E4;X-AzdXxV6Q3J#^X5-KwGA=LzkXSa5^!-`YXv+VV9UC-i0HDx*P9c9 zsHSrI=%+M*9#e=hmN;DUV>7%q{P%bHfW&aR-d4Q*P>t8CYO|IP=Ry?R=0sqT7{llR z_XY@t8iNo#3_2xwp3yBXy&Gh<^*`waAW*prhH)`f3HsJCYPYr@#(WUHAn>EXSfG_8 zpsxdiu#D{o4W+0)ZIrXwqg^LYJ)W^!)BE%5`!;R)mf&?9O>gmEV*y>MU{691%gm73 z_2AI%bsOQgg*6f)+c>7)&IfRD2I3%&M>j5@_S;B5Xf}8Jp~SL(ABKEWLgoN32kCd6 zZx$Tp-E`IKcp7?wAzRO_;e11{-TwiV+faOGkG8pGZgqT8^Ic>EP72A(4|w|#^K{Ee zC|GXb?Otc>=|VMMxbMFnV#LqFN2~CLKK9S@UvLxGmlDJtJbp&RQ_e|@+ZG$KIbmGy z+<^SDgW;#8rA*=BYycdew5(lvbJ;TSKOp^ox`8M`xwLM5`p;yQ{}2>;Ju?;YtO>m&lmA}pNkY}9K8ejMzZMXqYyclGdBl(G}+NbU!?)fHQc8i`4ZF%c_YQpjB%x7PX9_eE%O?C-G^lfi3Dh2%>H=1 zjv2mT)T)1f!@_+;D+B0^uG|!=;P!_=t}Rsp3CK$lS@?3Rin{RJ(d(5d;zJ8G`%9bK zrdw|=ALh}67iuM3OIGOW!958|H-a>~JE-kble9K-m+|YRATnyvKa=?W-v?AN>hkZe zfgSx2;Zez3I9%C^YK{62Q?ca-x`eCp@sZGZ@whS;aRG!sihq-U^Q#kK^ay#aQlPDk z4N0mgSeBvAiF^acXes-SqeZQaXnX*3c+(@D{$G#O?(e*pGd^wQ_KCB(dyIRm)viZB z@T4cp@c#lXwGH$!I|;QXhdO4?JhBOP2pttMqY(7pO9c$>KnsV<%nX0Lu6`OArnJR8 zK0^`xg@6XD^nn?kc0&JtzC7?l)C~aA=7PDf^+$2vJx^+&WOW|uc4ILqRQGS5$YQC6 znMzOFnCLusdi-0ff)9tkc#CEcrJMs8vhj5e#By7Vh*O%|p*2?E@l8o?tScFOkT0fI z{}rk8-UAw*mbcl18gD~S#lAkG_CKkI--9g7yIl;A*H>bSKQ2ysaU+;#jlTQHed@Rq zhAXY5#SY`-1VRkdUQTv}5qopnm+IUJd)+5ahtZ@@#q*7XVCI*jfE(Vy#XHWQSMZQx4Xj9Ib{ZAXhC9dR#j< z#BqR5hCy3{P0<#hK_|Nr7}C9U8*VTGAwd(C|3n49O@6zX9#J_JH`^~yT}#7X*5GB% zdkAv1;&JO8W)*6;c8C{)9P?Z zSLqU7?0X4zHu!3@)Z(o(jK_nAj-1$I3?eORq&$ieJf}|wa)Gfbq9S`RhbVGpfUsQd zsbKKcEZ(-tg&tq`j$2RGP5z{-2T~W~)?C23QB15F!6>4Lbh3A#4-3T0^cDTD^C>sq zuJ*vz!L|7DidPIx-n748?jA?PjT+i4ZFpB~3E#JBG7XUkG5Z6X+{Y}pXy?lVK4ktc z1-qx+e?{z?Zk&Bo*}pEh^;Hw;V%HrrF`@I+#*zGmZn>g2RLyZ zNm?nUek^v--L(e-f^=ubfpCXAQMpTR_IjFZ+?G|yN;d1M&j3yE1ig^8aKeEqESdku zyw?8qxb#LWmO84Wbg5=VLqY$SFO(=6@ORN*lFU+}m4@WSO)f4d=xGO4iqepV3MKG- zZ~kZy7zYH6mR~CuDkm~lrS%*=?}^W^!htfvKU{n8HZbfw(I7-COegrWdwRi}TjhCT z-?YN*!el~;oRQN#eh-=$$xz?>D7+0vZna{puNtGQlr^7z2DA^)!K8*#AQal@(o!+m zPGu%Aaj77rJ9TN6<;m)J{YU&moyKmPu@!h9z?iI>4&{1;KA^PDimt2P8xjGB3$ z-3YWTA=2Iq#NKuXX!AIP@h;0+sg(-uq(3RchBE@hLQDum5Q3F+^Q5o5vyip~aX{^Q zjc`N|_)~Zh&=fg z4P^lOux);sYDgK+ozALxc5B^gX5Yor3oh(|PIkr|8W=POQeg2iU%2Z1wO!7YbO$pE z6smIZxhFec6abkh%#R2rJe+YXMxR}@XeLTnR z*o8`pHlk=#q7AJ|QIU}r)F@ierfGHT71dxWQE642qG{2lEJ-S@WTc|BX_GeE=65|) z&gXo--~a3Xo!2?%PegmbG+@Y9hHE#Nw`C{t(x%W!OLaT>f#>N}{K-=CO~ zGb<+L`vITi+0&T8WIzQvXjDbP>qIIWi1`LjTQEdcMAJ)E?&&5%Zf2k(m~nr1rDcFV^eW3u3X2h8`0jXgpdno_I#~mlTz=4}a$0sg~u1&rCE4Scu zwj1?ISKyaF2`>58+5F1(Lpyv((~V@^IHojSSn0WtyBokyqR6qj2@U+OY`BQdZxHer zchk815S`GdemTRl zrrdU3^ByOj9pv)GN^O{|0fhUomM8Qq3*AYwnapA)`_!F@2PEOZ;el&4s9emtsLKwf zSH2dS*fx*lLedY1M}dU@rRT)C;H8A>VOA}`lrZDH_p@*NN-6u+^+o^dMvGwlJfSc# z`C~xp3YWK+DcJ`lJ?3Lm6_YV7?A3xK(NzoU&{bndP>wQU=&Qq$GQTgL#;n{N=1cME zId9cdJJGZ8wLt5{jRi#=+G?fzSG!W~rPk=#+J55|3&&ZDd#M#l&RU+95xz2lRni74 zM?>YEZoYXnT!(wh&7UyM3T}?CY5Y;a+NnE|p8i4IeMUr12YUo+5a|*fgLf>G0tr@d zg$TrLXyunntjjZ#^Q{Q_byhD{^-FxE2n_WgG{hsnw|BkR3=9>x85x-Z{109EN3~w} z6p$V0l#U;d6-?NS?Owjw?5WJHI2XPW^!Z;@MVwGh$h>oBySc)LBHLGAJLMGJlq`6V znsZSeq7{n@@9Bl!UASUrKKot}t$Frr)t^3DfNxb34z@aPoVO>K_pxjDAy7Qjr>iZ2 z)phMd3NyL{vO1G}`}R323bA}Cjz(6RrD&RPb2Y7il2Z=2J+okC*~A=MW3qK zZPVwx;yG0Bg+*;>gjp@TUd<)5gj8t@E84FWHFpfBUFaL&-)~n;7o|EGis^+$B`0rU zesV;#Aa@_s?AhF&odyMV_pNpnNXXo>O)AW3;9Ox*hojODBc`o7fdS06w-n?7wHHeW?H>ghX8y(C#Zvwk zp3!C%iDXZf-*?z@&d(ldw{Mim(=87Q6~S|<`;mTZLiQYnTwxoxO(os!_+Kol z7O!_`Ww`r|s2)|hBpB&G2@U>dOYXQF3WPxquo6yXr=r0TnWJvVXj|Thx14twQ2O-~W14^es25R}8JQ zf&_FBd3~%%!7>AjweTPH*}^@I`TJ-Y7|MsK93n{V@i~=~jUy^$_nB1#Z7Ncc$D2F4 z<=5tfjV{7zi`VPz@kBeAyiz4Rvryzx#glP#*Gg!xx?M3o>~;$eLZ42zBl%!1 zuD+WgrPcZD>TN)FrNd*}Z{E%!4L-8%!QUo0Gx1{>-)n3fOT6$iGa*oZLuH1U&3cFn zz|k*TG8b%uyW3oL1NhgO={FLCiUreVB#vD{{ZK7Yh&fTDtSDKL**H;~IAin&0EOG6 z(nZSbeRW0|3cj#COLuTSjfe2c3%IzKFHgjff=I8SFxTD&$&}h~dgV(`6^neWTS)XV zL=(cpfJ^UeJ0-!N(s3VoyT?Em5-1S zc*SFpoj?vVec>I<1sK}iMFj=<+Fa_O>l@c+mK73(2T_(${DSr70Byu;4R%%5?(&p_ z@h{4ic2|&)z@@o_2-tVx;K7%vETEv)0H==iw+RY3S>5S|d-q2D`0>N$GvutAK%`G8k7D`!5?qy@Y#YeI3`x-DvYLZ;UaW<=*2r36vmd=Kvu?FA zp$I4{D#8R*YV(PGlMfOsO{TalWFKdg$sE?KO=#4jr$Y>sH=p>@ch>p)=N*s`5ndB{ zX=edi9paU{K{#|S)x(jMpxu(rJ(%a@kBR&4;vz`C62bLOgaDt08uVHOD8M2*mlv*< z+017GJ!ZbvBL8;x)u8aBb9O9iQ?TuFHd@VPxi<52>XV^N2(`AlVl@1a<5e#X=xt5AaT6}aYS0ai9dM`e6e^>5PI zC8B{YAsvzm3)E$7+~(Da61xn%7lM(Z{^e?VRM(Bf1&JXP`S4w0+TnENzk7hW*{SVq zy2S9huA$H87j0PFK5lY1|7YQ5tpA0V&sV;^1frgd?rAcGnOZCqR@nv41}Qnvi^Hqn z5h0UdjV6&EC%WgMW%O92x(|?f)enpxCj?@tBmx(4%_k2#sDP+NZpU2!LIdZ5m2S?D zUSaGbVeJ&b{Nm_NGZ6GK)yYsdb}~){tUCMWVc$BrF_YnfOP z7j`_ypezttnEcJ?Ps>d0S6e6WqCq`83|2QpO$Bz>X+%+jcIZS2`@?-engf6zp!&jV zO(BAZ+8-R=;R}g%#{2ijefo5mtbXW9@elR@8Ee4Wa&KLvXO)Bw{XmMjJ!2{Mi6Mm& zhV>C$dkn)6-jtQ?2Um-w3{p}&PTx@L`RCO~eQm!yzj(sVF5nU$&Ojvr1H<9R)g3oU zVoW}pi_6vo6fGoldY2>Jjax+X0=pUkwo+9m{_08gy0&jJ*31Fv+SgnO)6}Sm1_;B% zg9;|BteOYl<8ib`I)><%p{-hWCQ_(YnAM>uZ0%x z5>Zxeu|z2}{>Pv}ZJPTU9>M<+eufJYa>)~guWbzwR z?%cW4Zj+}b@2512D7NB{RQ8{4hoYkOA^3GSZ`_Ch!E!zKys$E^S@%HxTd3M^6Ip%y z+nf7T^iGJj%W~cxav|Cz99CQgrJHj)-GrMQ;Fu1lmr@7{Fue2LQ^&HH9L-7PeIaqi zB4Hbuvi`Crl>GFpmS8P`U=jSN82`oXrZ|cq(1PBx1JQa5m?&yiBc(p<1Q$~6><8{vC5WfYHXKnqWTkMbc00Zd+pTz4@iBOko&O!mD^{_COU*K0pg#rFD(7cXj*15D- z+`HN3hI|ZUKMtjIeMSF^<5BrE@8D-4%X#KN#APY9UT5UlB5zB1T{;Q+qZH_*SW4fN z#|NtEu@K|K=Yz?sHm_dgBxu)?wGirbJIyOYAgy3J_EwX?TA!X`i>ib$yt-yZtpp;4 zU4%6TDTfjED-_!A;3MLCO9UGf=U&pidpp1vhHY*Idh%Z(#Y!H>Lzg@LmLGSYtWR`g z$qDdOUmD2>!7$N-B5wv}Aofna#j@~oQR)d3*E8;{KgHd9;oi!Hm*HK7k$?R4nN3%_ zOt!mr?cKls)1aiL{%i^Sm;|!TfS@xnV)}L6V$-jOjde?tKK+i?yI&IVv8I!1hB-4dvhw z`C(n)sR)M&Q< z+Y2iW3+F!Iu8zFnh8wXL91kFucois^0zYb^PK}BhUi2x50#FnRNo3OI5T~oOdb@7r ze^Xvw3#rong9k%@crDPLjl#(CUBA2Zs>o%)Odt)2+n8c+N=TRO8s(ei;decE&b7TA zGy%*eeGjgOs16doxjwPPxCSS_4rWRGjYnLzDaO3qe)0TSU^^UlohCIOye;FBUIQBh zptAT2gxo)FTFvP_Ro{OxYA+(Z^g6Fy=tcoVTTq_G#(7;P;mv06S*iyiE`rR<>#f{T z6^ErAy&ktFW(1vmxbScE9pM%JN@86clPOCT+&wrFq+HszN??>r2JSPYxI>geyPC`L zR==zYY3 zAk20mO`Pys(Xo&V*HUV56y_z|bNVTYdg`-eB>D-}iXvZ9uUK7YwKj_dPdqhQYspYV z1t}FZUJ5fhl7}j`bnY6GigY}SDTgya{UjkvjUUW?tejg|ANud)KQeGzHfKgt!>h3| zPeeXs1N}nMBm{&!c@blTqU$voHYZYs=HfHyB@L7o}M zo2>~fS~i^hYkcBn*XCW1&5&r9j(l=(GaTI`6kMc*IA}ep{yx0a9s@8cPf=`bZA(K6 zlzT(8A)3O;ebm8U+}yOh+tNI~ZG>LgIFb0l9JwuWl&V{U)3iz7oFq8i4-X}C0|-!b!&!wUFn$sAo0^KI*-KMyLUt_ zrBRB8dEV1{k0WE9+CAOb=Ooq@pTV{cJCB;$+4jTw|w^`4s=5It4V045xc7HQRI))qu2D^IfD<9(j8btNJtGf#@cv zOiQc#a_Z&b$cTja_;`@JdU{|d)>Bng6*?4=@U$oVim_vem4lX5WG|(2k4)eqo%)5B zV@BaK*;*(`Q}NEUV;z+@?Fnhn$AFC7`*D&DF7lhJnUcK4LxIEj&3EZ6^ZHlvEI@*+ z`D=E`392-Vx0g7&wVBko_%sJc4t1uD{aRh0zMWm(<1_k`eKOfqT`IECHv&qr8?;5s z6y@RiuW5jMSjXakVP+Y%b8yhGe3;AYF2YAk_Sw9KP?fy9rM0WYm_E@)7M;?f{d^*4 zo>%A$DL<(0=vp}Id}RruZpj(-q}t0}M|;+B1$sjwJ=4Ba7b@Bqgt zolGz5{}tWory!|Y9vnxzJ|A&?mGWjCR0%1llO)og?S7Y`{f%>_DlPQM;f4crq7r-F z1@neWG8nP-Ic_JR|2sV#LfZL^O2txpo85!1b1%Q(Ft2Mf+Xlh?avpGxPD_*9Mh~UA z+r!War(jHG$(K_$EpKk!Mri}8;vvF@JCkEC9}a7*2Y>zbQ+QVakzG|BRN&wrvxHb|69R40bM-ft>cbGAN0^47^mzw=xo7jf4?o|NAr00R-XBqqaW-t zE(kZwIRDHcp_%zJOY>VdWa~L*09a#+x zLm~G*Y*N;GuRU-$*k&MIopwFGw(h>Kvfw)XglfC%-?AEqL|Nm~(QWpDhXd6E@AXVm zS|nDj`mS!HP*v@hQ*a%Sxx;?J&hgOg%y(@ZWSSh)+@w3i*^q%x$6VdI-?Eu?>@RHl z3IDXtUt=HYoJ3NMa%)%j>GzqN4087l9?Uhu8g{R|Z>71bv-64FQ}f<#?%s6k3oM1o zt4m5XiQ%W6}bWPXI5rI@s za+}aTl;O`UMA}+fIzSQi-;`pJ-%u5ypZIY_;-NrWgff6Dr?oBt-47{)#2txrF^(lO z!}vCrL0aCSRI96T%FjtTbH6=N6e-*G-&cCBfSZKjlMtd{kq=)5IhMrgU-2LhtHCx#v+dzkWzRY4WLUUQYcc zxJ!{txNr{6o5}(Hg<7BXX$S#Bis|fvRVAE&SsJMIWqvg5?fCZ1@A?pO4PSS1lI0a)Oi+8X3f5Z2X6X=}EO6p@f(6V@0JV6t0$>}q}k4B43t zAz&wguUFl?LUt`gtAp&W|Hvau>pSH=@PmtmD$Wh3X?0UL3)>i_Go- zZp$(;y}g_x{mAnf@Qfy>dRu>A_MZ^haV)M;#0%j240t-;i*_jQZLpI! zeZF|NI=U1CY*%MztJ2v`oM&QSpr0V?@aTR5=80&QBZcxwON%qKnzvhrVY{vwe8_+Z zRk$z2Ng7?)xmI`mqvJm}2*~pPYz=oj^zpLa@SB^=tDi7jsj^=_{O{V&Xn|*Txxu6- zE$!r$3|&)G(}CRTj)1WEnwmQMEA3hOvQ{h0g#(qAD(4d>Ww;0oI^}69mCpMTtLluS zcZt5MdyvrAT%=X*LpVwDT*A811te7pGxH!c0Ky_B#26qK4{ATN)Z#m4T&hE$D5V#o zGaJ(;oYF5$4q^1(lOQM~=6{Ei^MMkP5+b#5wcDq)X6|M-<{_^KF&FtA$N;7O)Qgap z>Dp|p#L&R-m8)ftb)!%wA=LQ5fp}mbs7GF1SeTQYUGwo{5~>|ctr1sHFu`9s?en?v zMq_X0rwprjX3ed{KRqIl9L26Zh*B(!Gujb5M)~H=o9g8&SFSt>6Ry)fg*})xT~!s0 z!ZVZ$Nj%68H30 zlq*JE%>-75rS*h76;e<3I?BC%s$HaEvCh0GOIe*Tx~6?DU#* z>2i5nHDM#+44;OcMYg!~JFm^Z-g;Dq~87Dgv;6XF39PfGkJxm`BF`zlua?W5!WK0lUF zo{5Z4nSf1L(7I^i-0%5&m`LlgM|{Q|IsX~45Yv-miv|sxTx}LMFB?*AIG+$?26b^7 zat`>$Bw)l{M6d7&oH)r8L0jD0%E}BZ#HLG)L0P~Tu#V-UEW@UB-lC&I5W`<5b5zLc zoI>_9GTzH19hBk;(9imEYAZ@it?`x1CnH2WW+2^`J%0U+lT*x&n@GspjW@y5g_l>P zYftLhJb;iIWw$ zz_^GO1?A$c@bFcu_uECOuKxwcEDz?*G>{p2h8Vq@oKivg93vZ*GhX@KMo-n?8&=o2R( zmevV8+|_rMzHrH)2{2$PN|0#p&oX>S)c!6PZxJQg7?j?DPp`i%DKP>y7tUOz$)^dy zndqiPIJSVeMJ96c@*CEzBP|{;{YbE-zqS8@?R!BLHM34q_>5?O=NRA_VwU~n$rFnh zfd#jW?fs4aUT4HP+`YjOM0&<>HS*C)B<{axUUk_VD!#ql&0)Y3QW?W@Md0dcln>AE z3J?Q202zIBo6mTsh3Us8omL-W2!p#xy-N%j2eNpC9A=V|G@rKr9biGAw-R48=vp(#zfXUDHZvROtzkPQ_tx^gHiy$qWka6F zvG@#wTR3cDcTs~ESMHtBAd>j-VL%Xtj|?F|hcW9g5>|zhs#}YXB5$T#udJw8>~-y+ zv9VRygc56UjG{da_fKvHf!eA$*2(bZ^^2J18}ZEd>C4gQ)+n|r`#Wn>GSkAaI;~@w z5yDZTRVI&)J~B*~G)#}ET(fwN$d+ytpsB?B+g27SAHohy3kLZqCB){2*Zw3LLb#1c z8Fua3jVi_x&T z7>O%}#{_MB<|x{DyvnKorF6rSFX+%Nt)PusPaunnZp$-we7^4y?UE_&&jYbLImOov zqV(fdZv6Q1_Nh0ygkgtyy@|FKwCgufhc$bDS%FEf`}W;SO_gDa-y;=j`7l3aV+7Dw z8QN1U%rE2k8xq1ZA(JRc3yj&5l3~PX*++I;tQCtjFBdc& zjBg8+ZK5Dw6(WP_Uuf@^&J1*q9Q4eqt?RxD(81fF9eB0M(FRs=Ix|a0>Fw!Jznl3R*m*idpWcF2@g%Zk2n?#9pS#zJ z$;z%HNTO*PP`^o-aUG5hFWB_-H?4r3ZaoUP)>L{nLTOo#cyN+^u-_p5uGT1qF)|fV z(Ih#x~?5=jK>NP%e=wm%J z66Q)RBTl9s4ktzXwmok-qcJM>akj8wo*4@6k8S~^*oK71UU&C>ObUWqAKy+%f*UP0PGq@Rz%&hrhduHZzW61n-y9J82xNtwDhnDOyp z-zR#?vq>Wk=0-@AHl@_A>c8{FdE0)NEm}%_=+r(;cSfAnD$m5C`q1lKrEi8s%JdOFmkktT zaO8}`l6P*eSPMiI`6$yg1@^Xf4BV_#0V6A+PU)BHC+T8Hi$(_T^i)pTb#?u*3)uW$ z&n&B|{r)I8)jTf0lnh8hE)!jXt$AQQx8EkM_!wZdUK1bI!gd55Y(`>%N41mc{L3$h zm>UwYXpVyddQ6oqK!jrh)BoX}svE7p9T={@)J$cGQ&LYuz^kGYzn^RzZFh-r$|6>w z=hGvq(}L#0uZQz&xzq@&1(WP=C3`CRH(&?i8|nBtD1h`rW)-720W$e%;PNJ}ZnB{+v9YBlIc3UL^__wfh>CNv6%9XUI&+_>hJ)Damv z3p$*Ff~kX?i>u;+;MnKc^jL(zom8(68TO_FV}8pw!n}mbYMayTc6Fk|W^&gqQ21oY zphH8I85(zi9`@Xn9+{!^xX9GiP-2nER z7;E!P+IH&~eHXgrJkee$&FJhU;MxNXKJcA15{f+umx(&?2$a)~H|^HN0k1)_6cdM( zSNAm34Cbn}em}?h%W;4Q*YYlL%~9@nR9EgZ;e>AnyD;!P`13E<;z{B!c+H%Pfy?-X zEVYzSpVrJ(tk?VCFA?&B|Gm`Dac|NZru`^hheF9pe!4jFXBn2XLckW^YUul-Zf}mdY)KTKI@QB|zxdS6D&P-R! z?(<%ica^`Kj;JyC$*H{1)0G#=&!I!Nji2bvRbhj`C@ma57@wAAjBdEJa1fNXuqTvA zEbqa|OG2Q6+;L8uvlI3J#cVeI5Z;fUB-V|)pKwg6q~|RxZkSC1>eQjMOiDw{_hB1n z>=T~foq_PJT~O-lTN4M1DMcN_lP5P;hw!1D zh)sqMCrlW5xXT|^!or}6-@AT&AqobvhCyz?Kj;;Qy!fKM?Qko&JX#WPEm1gp>0bKL!g=V>Gx?7ZBm5j!duQ5vToN(Pd2XGn z37gGR*jcX+%_!lD-MMD5_Q(sFYV593I(pUgiGq@WGDq)f4pT}TD>W846sGVMjSFoJ zN4FEd>|~a!`12~l3?letbKlGHBUX-hZ-N7oSf-^~eXGJCdP$s!EoikmGXrU0_EmOP zMV7N*14CNXGMCPwn-0;71A~lu3x>nHzW#Lpw1zPkbgDq!biW1@FI275gIxYN$MsBx zOI$XzEg0v~ZBY)nQMCKgDu>_PLbZJ2nmWGFb%8oi_LO^(xw*OK(`^!uLQMd#nsDp+ zF<{#aoP3!*@tu};I^jLCiDF^PS!+7gy|ZR;!SP|hmdQ*KUgB`))l4RxwhykqM6Tl ztrz@4@w%;P9em1|0YuON5tKmrZ7oFzo`B zsZg)1q^nr}sTb8R6frxg(p#l$LwM zkTBya_&qv0S7Ca+Nzy!b>z9@rx01Vihern=Sd4F=j)I7Vytkwq!{33LWE@tBIM)@3 zG7i`n>1U!vC|Rqa+>-)gBZ90A6t2QOeV@X=WV7pWreyCys}GC`MA!Z&+yPMfcVQ&cXH;Reis#mzw(ETH-nwC+!aKac#0TXZ zu>mbtxSTl0Rdm_cy>~_{b0cSOVC~hy&G`RPR!G}9`qEkP!QCCW7qCANNiRp#&O?RA zsL%&$M~vuVGyEZz!U)KvJ%8ru)DA7WxD%AZb|;T-Z`j1d4qfB-H)oe3u6a-NM;47+ zCTzz2Fi`nrRmI*IHm)^(wk!7e-bQw@O30w-nMkpm1*1n*datc$?H{(uyUSUX^Vc;v z{=-BEW5ep+sjU*Ag`qbc``1n{TY%jl;eaO_o?3-{w*{0C@5`nf+gQ|(q%R@k;4$K< zLPB9)xwDIlEe^XA4i1du3*HX+W=l|BwKA(#eLJMJNQA;c+F_{^2Y zPwV{kL4fcpz`R0amw?4MaVb5X>s-&C+&tvJQinh%Ow|+drQ6LB1L?~QG0lizY^j=4 zC?{yP@`I*U9=K!g?RNjkX|c=TIDj42UV!Z2kLzt6JmE+NZ_m33riHTuTWyWh=Q@Ro zHKF25KkL3b<^l8$2Y|4c731wF`X;|$nqYydF<8j8-uxli7kl*Mli)Yqt_hxXpyu5t zK;@#GbC*hLj)|n6yO84C@&n+AY>70!Fj(<0!HJmG_|zCh-1LP{2IIlqb;zufI3D}o z-|i#%cG~zRWb7#Ojv;mcMbVFin{FiFaP@vt5*ZXyLWzSnn*=4+Y19!N&JNpMmv^H9 z{pN-B5){0=f~mXP#$ktmSCBqYkNxH?BUB(Z!BFG15H^X<#l1KM0iQ-A=qABxzP`R5 z9P=6yMw3s+o$z}a{JeK~-4@A6Y+mr3P*{<65BbaR6Yg43i>BF8M(S?VM9)7T&Cc{m zy;lZq0J2}4QX6^Zi;d@x!k3yp&jfoLhrfoS54a_Wd#E2vlckXt({IPit*-K7PsZC= zQwXAo8SW%UeLgR4_6jmBo#F>^520}p7v!j@HHR|mx=FJZ!#4*4+YoJJf!spuZqI0f zA$kfH1aMeU9Ryl5c6IEl!4-@wKhC&(mzR!mlNw?Uk~e(2w)34r$yHb2FQ}IU_C22Kt_=TfU^tMPEn((q1YN=QY|3gJX$8lB|V^>QuobGt~IW z#^Bc0*4X$qVo4eEY{QLu2XNQ(DA!R}Eo9zw?Z{?h;Fo0ipWx+GDF~p+(C=wzY%HP! z((2=y8~XqZFk#j}UQ6W2)0CQl9ET!?+Xn+p0=na&?4K+K(Nt(h3PUcc)L)wnX#>0x zUgPXr9MCRzC@nobEba;MXkl{WDc+bz0hbap)wM#<#wGQN7sSkrL9vstK&6UzbZ*iy zPa#9gXRH1^N7;R7TV}opg8PS5{=hP-|E#Ne!TVm$+7RX&UfrR`lk9g8reGAmCG%%3 z(>J=kspIMI$HhDOxDs(}s~rYWZ8G~OWI^CT`(b#u|I1^muHVXz%6<7uv&M(&P-<}O zLitw6hf5yOwgp%PqPu#g^uC`Pcsm*+Ma#RqLK}eE!%r0)SFGBT&xEt2O z16AzYU7&hzPZaNK8fRZIJ{)*uY-~&=VLfb=IRft$Xf1LyYMR*c&bQxMWnL`KH7kCY zkXaYx*KMn^u(`v%PPi&-e9M*2dNhtrK%1jnU*b3_+RrvhOsSLkgbk`Se@NPDcdPdv zXEV*1*F!&F39D`iCr_KJfQx8pT5rXTl&tjmGkkE!9c3SndF${DBCY&G6VVK6odW;{ zNqZI4bb`qzCG7^+mB;ysMEJmRhu~Nsq)5m5;;*YWewcVc&~vKz-%q9<&PWRA_x<^K~G;`c`xUOJ2;_IELGs)>g2Bc%1K(Vm6_E*2&J z{7?sA67`xABL70{X^goE{xlL`GAxa{Z1N}I{ZHJ4%yPcDJZ*^_V0*j1g^e9LGCWhI z`VX(N3_p(kAGMp*6%9HjA(G!wIL(n*VcdpP;Io^ zhHBR&yBXlzBGV-lHFLW|O4$Z+Rv9g)b`OsWN`EK{5)T{QQt85bt+czcu75k{i?CXu zHX~lBOnwsxb3YzD5DhKM4<-;77DS0$e1{e+^@nv}Q#_bjTP~G=# zF?J=D!Ob&-I!B?4%>3hAtiy{_!~lZgSBb7N*ftEw2Crc=AOVt@pOz{&R#h!s#@Xp9lqGRA6uohF(P%IX6bU%ELfE~r|+=K`)cb5uk6jXRP1%S6(Gg8fQ z0T|VQ(1gxT;P(B^=wZwn3?(5L&zW+)Z`g-De46c}4$A_BQ>;>u@6riT5X<`U<9%3Y za8}LL*3=&UokHl$;p`rs4XB+CD*gO?o)JViBEXN3+KFM?9UK{|g{54nA@L-`kwi3A zD4Z!suhnO`*HEl#2W2{#xfNWhUTv5ZasIiaVau*GW^)CobNt>q9sl+krlW}au`5l1%kQG56017>wFHe_Fx}^bkgk=6%BHJK8 z4kmtxU{hiK*6XDRHR6A|iOPyv2!To@r#nvftCQm1ktxXjS)_B(b5=AW8ZsX$uQK+) z%H_*NxA?L4(r~QL9(>ZJi!_d-rkB;+HnkziO#m^1Y=EyYhE&iUM=~3WQVyEQ;6$!H zFcIN7wfGg4*+I!;PvR3n9^~Y#?xA*hA~Zxpf@QD1yLLRV=FNM8J>s?N z>Tn&a2tGc=>mmbw1q6)>ym;2_TI8v65_vQy(@{I{FPZn0?->HNoIof=dNPG278hUjxGi;LXBwI3bUQqJ`NGR(@9tej*4>^_R$d;+ zipPNoUJEE-m^9W}C~Jrii&%+PFi}2aDWl}xhnNlSv-{)8(`g+P49q76p?v9 zoBSE$(J*)-oo~a@JCjoV(N8H&KtLSr>}qarIZ4kZ9B=Gu0*CaUKGIX2hsG<7j3P(b znJu6^*w%MvUU8Stl04@Ma0{R$i!^rv3< zSyZP48f3{?q5#3R!Evf%5f~v9*aq7i@r_z?x@{BHA$ZyxCXGc3IEE$7i@2oH5J~^J z*qSL6ziRD8E7aokPUXtti}nYI;4Nj3ztt}(!(M1D8_`|rbBj4V*tlzkC)!s*?5N5@pxAms2KWtCTfo;Q)#8qKn z<0sxF?qe8sCGhW!z?aCAE>Q z1UPi$)2xn96W9#rv%L}@xpseCo3#)@k0MQ{kPpLnCzrj$-M~ngJ1j8%jEqzG=LQ(V z6&^{zmJ;znX5@IO#5Ec!IxM3oivCpqPRcMQqbhdh5>2r9hP!pDuy!QHr29ee78lt# zKd@3pjuW#j>gL<=b-d?S?<1xIxMwi+4LUkS^CX`owP6fi-cT1Ka^a8wziA3XL1acz zPp#zvIE7-B>TZmQD%vNEw<6BNK?UH*111v+!#y|_sF^$j;n}kPh?2)}4~`UkYb2Ib zLv|gN2`Rkz$l2XPKI@tv7YWGfp2mqlAZXfZG`y3rOPHns1VJ;+d@B*i*YU~cQVE13 zWT*lW1^?%Me83c5iXR#@@2aYVfT{taw^2rEj-7*SjWN;353Pm7wL`_%=b50OLBkd~ zjSd}A7^xWK7vaX*Er_~ux-Z_&C(Mz;uBrq;5H_bC=jACUj_c$FG~Gm^LsFi&Cq%i6 zY_rXb9WujLUPHOApeqe$zN3sPtmO54 zZ`yl$t~b1YI*_aPm3wZex`^zCJ4hRVc-S!FHm;4tjF9ja9<0K$vNAk=*kRQ!r2|;% zHF^CIpMm78^7BzpPYB&Bw7!XWwdfimG2P0qe>RVX&nHoci)Qbrc^xZ6B#MRI4UB$ zzZIvyHA6Gkxkq(8xV_H=6i!aQDk3{`{bcj%_tIL^IpdFM7(w@@xz7e|Gh%vAvUmqd z8F0hppPn>d-(E_;Snpu4!OG?5_ajU>J0$!zjM$B}%Dl)#jx=%#|JYcJYK2wFBU}-3 zmfLaY|K-LeD9VYhI)#EOg5uZ5X^9nTiO348bf zj+@EqlNg$jvClxhM3z5)tqJ#1LEJ9z)c>aVNptKHgu^PEFYR_*o9L`FJd_VEsI}q} zr$hUzIW?3aC(&(!IQ|-M@mnPtw$C58miasvGO z`)vyDV~hfN?Q4-@0X1FAduvWXyK`TY1`)mgdxNJzGlDb&90RFvA`ym3S-Qut14%AP z;qdyzzi-k8CB#3nluJn9B0MnAZ{87?4~Wr;^1X}kPRt;}j?qM7m-s8vWnVf{FjQpu z2q?VaZvrN{NX=YynLub}@&gv#z%!o=3c&UutWTDxJ%Bx62%ITDg~W0mC<4YS$m>e; zD8JoMU*|*k>n-Xd^VXBD3{_klW;U2ti0GK`6=#W6viDx(Iw(=;+u{yo4-0Y?v~!wj zQ+G$$Tm}a#^1gj26Dilpu}RsTg?<6*681<6`vjwE1M5*4W^Nv1NMJ6D;cz*0zcFq8 zett)vs}|ouQ%$0izZkfes4d(JaU~d1po+pDty;uXv}Tg^mo1gC1P~Pf>(Z%D zM&xe-=IlWgzf{tK;V}v&q-Z9D!*%v!Kl^|nV6s>>XR=Y)@d)V>Tg228jH5q2pI*49 zr~^nHkh(MH92)bKRoAt^awL&D#jUTZF^`$XvO=prsl_?r@_ zg7flySOMyxl=hN2Gt%r>GR#k#;k1vFi1bo$W>$8BUWc-N_Hu_#@ms=k$3M?jMZt_N z@xXYmPospcNybT}l9cOU)ytg7uF^x9YGG)2lyTJA*%{iMFl(rB$?Oit@2(WEqN4!_Rm|*vZ{xEZ6O+?`IBbw+Bf9QGi3QGb zA`~~wI^R;#-Hp>o-I0lRYj<70X%mr=0NsEQS)}png!J?++4fFOnAdTDVZ1dC6bKZI z(j|DHb@^yxB>K_=Dy&l3Ih#>Pa|RDze*m>_L7+W2B4l2Q(uP8baxMzGem((^CHuG`R5Q25=v?43c5~~nIHAvbcCF5x2ljaN7BWCVN>Fz^k zF_yCXHW2kis+yvf^%M`%euShOXur5u z2nGInn(<6KqoNAz_-<8YA--v<5HrBmkat8c_CK#>O~kIt3kSD< z24M#r+G3lEuf((^=jG3LDc;tMXpGm61$#wg`S;0se~po2cuj3>?eNL`hE!|aH_xq?;D&*2C-{AiSGLRodvqaS z2^)(?%yc(KAuyebhlO^5#~g~of7`Fx-<~uu#e|g#OrVV^L93-20yg5zjF%c4b94_f zv^q^7DAYz``!UNYK?QnA>}8EQ99Mwg2rh@wri&=AW7}M8;A!z5x`NiXcOT^RJ~c(` z>kXQ<1?A2E^vm7OA=aKm6hw8#IK0UuU(*>~;P(^bqd5P+Jn3}HIt*{#u^`7zyIa@0 z8=WG*{rZ5LMMT8d&zC*(8L|I^xKMITLg>dqf4lSfqdEDVwG+x8DV1`9axcCU|SK z&>|~Y?uF!oI4{d~;yEXq|By`GGO_ToCPPxckP$Ahj4U&h4P+I6fUgF;axmHT_YZMt zFNK*y9&wjOMhgRYV0WC^S$KQ}-^s5y(|tnv`}+s~cJwN#N>B*4WMK$sIuO7~|1!>7 z`HCQy5FzeFskz7o$mOFi^Z0|(fpt3{O;H!|7qun-z}zs5uYQqQItR!{Y>2yreBe8= zzleqL%nd88;a!6xTsQIx<`NRW z)j{43qcG$-PFS7DLk>Z$!LWY10b@;^P7xbLP$-`~=jSct2b+m&{-S;!JAs(OEC2f7 zM@MGJe)hANbcP?rN=`7Sy)5W$!!eR>0PG(Pze}x<#fIW>3{^F!?xWYJkLvb> zWV6A5fIqm@X5no(2Ct_Iq1H`AQ}Y`cahEpaZ!xx$kPkQ(-xcoY(GN5WvbScB4W{^* zAq3O}2eQ>CZny0met- zjcx{u`H`_IpLV^rnYxn^job>~d%$wrkD%<##^Cno5+4vZ&}KZw`CghpK_*a4XoaL? zwepeCfOO&Y>|~kssr{cn)Dg zn}`(B^3{rGM6#7I8y>TGEx^Q2Cld&!yP+Ype8q}y$m1$Mf>Py-l0rA%N|diMV98{1 z_H&h{=CsLSIGn|$UBIu{g1=5{zjv84hIH#9<$)m1M}MBMRR2%Hz9t>{^jvhrl@H~6 z=f$;7l5q~6KugwA!2RnIG9JVpz6RNC5groh=O(+glUE%(Yid?+R)%AvLn4_=#_>vr zuYOQ&*Hk@n3##}fDtzw^I5f}WDdyZf@!*C|Gy)NIX}DW z{O|EN_*pVJ`Vr=|VCqbzAt0p+1D_Ei&VZO6Or){J@ZhGGSEXYv{%5hgo=@&EF{}k7 zOQoQ>{Ky>QGBP%I{?tf>{mh5Jh6%?ChzEJiU-CchY87-m?>>C6!7RcDDM{$AfOsvB zcCc?O8Aki$$ySFa4kCIK1xTkFQO=iEY7a(7`$8HZn`dSn@Osxgx6rHn*8IkT*Q2bI92P-Ntb; zxp*2hD#Y9ZhRpbFNwLFIbDbmqbxBfxNd3htHK!zGbL~)fU0EtgLBV>5Q2QFM>?_Xj zvTWxLI&vB^QhnHDKUq5F9^G4}MhstCA{>K9jJZBA@5D`={~SHS_`%NFaBI{s#OE6h zyGew2${7?zoCm65ltGL?o;aSZO|ihJIC6J<=X3XZDIqp{^WT_ewE+!t!_TDCf)957 z`g#ddhb{-h3~V4hGCC0yD$@SedHrrk^gQBl^#t#mYG-So$0Z#= z7z^0sGx4j9kEp1~G)Qj}*-%@oG84dWsj$bKrmW4?=0RJw9}vq3y#L|L4H<0*G2KO? z>{7Jat^K6Mu{Arm3SRdi^mSk+K_xmcgJfVR_82wfJ_5phSUHon6jlRXYJ6tsXEBBc z{?FlP9A|i`8L(M>2vC#EW24^S1XyGtoe=iF$84U5xOE+H2ChcWb&G4)K{L!lly;LxoFZkqHuow+4f?vC!%FpY2wTeu$ z`jv_iyx$;+Rv!EQg4TRZfj|I65Xs-mx&GIuP;WA=v)XL1FxGgL9!qsEokhlB%$on0 z%#j4h6G`?s5nK8_yM8ovO0J%y!YdKssCq)9p z*bbBvCNIw*JOKuzKfC6XwH{gG&aWRTwd2dF4|a^?06lXsNMR$Vp5lli3DqELi#2T` zGB_5PH_m+`*2WEgy|>jlRM2Z6n9Kl;z3i?hhU3%-1;`~9rjo{!K_R3AM#gJk#J2Ka zjnPPtTmD8AM0ohNRk1o`5xei>;<{8wRsoKPJ>l|u0F^h6_qe4Wc`*x%w*&kU@+RKD zN-Vx*)H%p*o`;Rvhw2n%8mEV^zjY)D-3xfFS&vA)%*3o69Ygq-fk6pt&(~^5|64;? zxS7hhnY{C6`#Kj-=0BYKJ%1*A0HL?mp_jB6>gxKltZQKC`9AvYT-$uH@5DiR zfkJEcTQ`M9)k|Teowszf*QmA@jk~_(Z-dy3s@)J)%HQ&(e=b*jGxpJL1B2ne2`p`v zbw)#>Uflu*%E%i413sO&@=C0hmIvtVwIkX|zn!LL@wt(kbE~)(FTv2+2LPyd(DRw>wrmI;HH(k7r0J=F$%458{3XXzuxA3i} zqsd^77~S%8=YhizIG!BNf~$+y@!ov4x5YRiFGeI-D%?+@mf;#A>YTRpttYoXb<5UCj#skU2Ff-XR`vWfql{@MWFrk1i8^b@iNM` z^t>6aF~gLQ+SZc`2l$>0zBh%l<#ca*h{{oaXXF20>$OG$g+xlPT(f#FGcxzmrSm;+ z%H8Wv1K)nnm!D|tg#!nEj0~3>T|14Ov>}_!=pw`}Gb8ib$?vEEe|;Nn^vOSMkB>U8 zq;Hb{3U>&&)dE44Pg{M2dl#(EdhgQl#-k`b;sLYvEQ-RXqxl$aa!r{_?K?38kskSf zY+VIdl-c+GP*!j)6oZfw6h%R4DZxTQK~N+I7m@BR2^A5Q5F{m)RA7*fArusp?hX;8 z2c(t!p9{D_T<<5xh}7Lv03# zS$gxJNtS&N9lulRvCC-Y@M;c(rZ)LTR_JA_K|{4}uy&6%p`~?TO%#4SEvIvMxL`#u z$1a0u2rjP87SP+;4S+xm{`UA|JCU{4_h1mhb96t7;?W1d|LPcD>Xr04-2P@VhL9?& zrKMHrMz*y>v}XZzL7@Q%A}yyDkZgkZD9fK^UTf>S?Rb9gFaKaH7?%y zWlk5L&8ba(hU?SrRU4(q6&(~_nzI|E%TYl`Pmp2GW2o)ZGYRS1m;TU%Qq+`3%;ld%<#>zt@XSha}jI&6o6u5P^Z$GUolH6Mi@ql6^a zqGBlea;w`eu8pN6#q4#3M7!9snkqf%0;7oa)iH#u1v(l5;}DRm^H~pHhqEB`!PAO- z)bkf#qUOh>9?D>|v)0lL(A)T)G)5;*Klk1S)V+ej`#VyhNB(E;(j`b+ldJCAsX^2? z>J(lK44m@ z_N8W(mmLn`N3{Zszxl_4&YJG_8sxG$I6IuKwy(LnRi(u!Nw5cM2f;zmh&2E;c5iH8 z;O+|CAl|oEhomQOxWd6yOUm^ zs>c*W;|30}C(e4pqwZD!H*X6@kZs+T3Cg*$b0AZ+;U(M}dOgvAYkf_KEH8EnT+p?< zG(A0e6EsrVW_BmnQVTQ0OVLj(y&O(tRb`G4n{@5Qf0~GF)m%vQ6<^KO8Tjm1WCitN zhOc#!MCyHTuJ&@|S6gg@3{zu5)@F9ooO$0fKk{Vk+nP=Gf~J;u6n^U5SJ&*q)HIOP z4_|HRY5h4>&SW=QJ3Bf)Ru5|2NmYFuAXl8U|d)38+n)GeSNX7#cdV#CRH)zrTrrOVhsZ zkKPp&vpS_uaB^NNUY$#I9&1igzCEMYirco6CK}dK1sK`+pl!PkCqmndHVXn;B6K;w zPJRPQ-FX2%DHcptyEh5q5i}E1OPf(dBANqd8V}9DU1*sNwbxcxihPU#mKrP=LEASP z+GF3HYT%p306Wif_uB{qveurI3mb;4|@; zuM#lwxzw^++^gB`>|MA_$jE9H+J#YxngULMdun?V5pw~QgXAt)buUOA7UVn1D(tRh|$qdo6$Q#5aO4d$~ts*8i8Twg_Vnk zoZk@E(q-`2WNjg+QWSy3_FEe;_q?a(R6i$>%Q%UjRgkBy5n zfS)MJvP(9S4}*jf7L_4B*>(}P%M8u1l@%4kHLHdYxQlF}TW`y21oy7a=M*iWwGzA= zmvmeEO7Fu~KqCDFrsMa9%)Y+AFL`zpa7S4YTko)}w9M73eDkL6Yc!ocJ*=SE9^Rol zI;vkW_I+*Wu$W_0+sp!Lr4%i^mUvY(QB<~C;2OEsb_;SLYMYwoeLvW{Xyyv%Q1C%9 zIINnaoZ154FW9>X2#b65@kNaC;6@vOxvI-yD1r{copsi;dUVPH$U)uuViBH#*>M;| zHleos1%2jw3n)B+LEG7*LvAMTy>tIFe@xA~b*Al(wl-0be)ZM=vad=03H6d z`=VKT(F8O%v`?%2EOvo#RHrrk#i%NTyJ%?@_u5fWPjjm#;DB^lTnlyUC-LhqBO*-3 z|FBHKVv>gn?)|b1TcBSWOa*ncO66j{$jp#k(1KkVZ+E)(YOC<|pQ#zku8hzE@dK{{ zO4yrsdW;4M4&ce_dnV?M#?6`%6>PU-R3DL%!{i@r78oWy8H_a7tw258;hrDNsowI& zRhh!A=eP0HMT$AyW4Ox&=Fc8!e*w~cBOEP~08r=_w8bai?Fdfx5eM~AGgM%%)#_(P zeT-h2-iySKCaopfLuiKeF#<58mSY-$Mm*33z5u(LfD<70pD$mVgKowyXZ$dpDjZ#F zJ_kyz(H3>HW;85_gfm6GVNm+?PF82(amuJ=$#Lq`tj(>a=BR5hHxOz6ZM!pwC7J*{ zBCVh3Zp_W64OfMPWW8&v3n&y=`=Z}pfY}~}d+S@EMV@KW zDF9uHpTg!2h>?IzCFoLW!N_OX@!XoO2hw)9XA_Q*f1%?A2ow3TU{h0ndS3}iI)a@02eJGbm4gh@+Imx z&cQ4d9fKL%U}6$3d+pj>w>S)lrnN>w$OB zzSPbYwhflT!uQc1Z!nu30}Fr$ z$la9)&6u$nSp1w{cJJWM1AP#pf`$HmB}!hP6@c@-QS7qPu}pE&70$?N;;j|!SBC+6 znx!vaz7#Ei8)+s8Q=@i%k@}fos6TME)X5O4?kxZuFdF}(p}j^~bCoP^-gEmp6)2*L79x%Q5D{5q znD}$;xe@@%v_n#Q__-{IHH$sZj>(1Vo6$qVZtrSRax&k%(=?qcs40q;5PD(6n171F ztqUgkLFiI?IA+WcBwD4rw~L+QEu;Y}2=;NYTh zOL{~^Pp*P*iY%+jt|NBBWUq7m$?IqEg1AYeu0f`j={*)UWpH$+Rx)9UZfVB$(kv4_ zcq1pY(H+CEIZ%ruktrP*Y@HCAgbm|*MN3QTxH8v@UiRegmt8Jp1$buCm{rybG{Y#t z6_%df$$u6)0ly|vH4D|C$y`>{3M8cWn$%$Lr^?$J6=PGOzKo0`f+CCCH zb8_gZP!@ry9-^fLoK!#6-J5S?^d7GOh4;&|7I)rHITNSWR5eci)xWlgOTQ~kNbQB_ z0mqn?=R^V=`Rq$cl(M@W3@qf*gG-QR8*a#p9r(cyu>xNoa|O^X@BA(L(9qDLs5#JH zLPAp0nQ+W4FBHvs&5&K2wqzS7U0B2Xv8g+>Wj52l14XuP~ z0NyAq9)JUL7jR=*r5;(dz3tttZIx>)^~Ks?G_&Y>-KnW5Np#7fHP2Qo`Wf8G)jrs} zqrbl&Fe(>>xDUzbioj>Mbqdv80FoM{g%LvvTKFBBv*qUIUaw#bq53QO@!_8;Cd;&2 z7^#eMA_3%~yQaOob~=L%F2Z(Yg3I16f9Vq`+tMmv+;#>9&YXoGrF$rLzU%Uw$UoRU zC%W2dwc+z=&L7Xfm7_?B;~xvu0S!yxge5Se-8QYbqNqM7Rx|bMkHp2sCc;z;$b_L= zi2^7}5)i{{7nmeP78yPEFqyhPfj=ABk#`RQep#0Pz>as;v(5*hO%G!x&+)5=k-)mG zO=VC2KUt#QUx1=Nh_O zbpQ`Ie!Wj|&uB?+1`_Enmm(NN`Qhx14$Xz)b<$#Wyftn?X!3U*?l_wWUH$`*C|lC<JqO$B&|El%)lonD^;VnNmD%_QE#wN>JW;kv-oJpxokK`=RSP`b4Ms@(MN z$VzX}K*nzww772yuc`kgDJg@=^d|&bb*NJRaZJ3%3B^D&c)PMdh#Dmespm+%uV) zU^Bl6LXgbELY5zY(xt{_w5G*T;1MwsR^A~vxiE%~EoG6e@R(2;N>J(kW4zS=RobMdA;Dvw^%(O4U z3E035dW(hGWe!|na&9u|cYq6vC&fO99J{HU+6xPmzbw`ZRSKD_o-qO$2~jR@VG#|$ za?7Gpl3G744T12D)+rmT4~~@WbFyg~*@;mJ>()l1GY16vc`7R_VZL&9sdud>aVthO z;*qhS)u%gZ6#RSggB!CpPsSW)0NoK|D#ckI|9avMQ}~!%f6EbO3tJ`V0dgZYb9>fl2YCfsO&{27$%unw?-}y-2Vn{+YMNvKK2D-e7ex zw|7IiQFV6Ww%_MI01 zYAyvPvTDT^!r{;9&S(fgHxEQhB|6vzy1yxBcXU=qARE*PcAX(tGaX0bf@71k6=;38 zV|`;VMb!!kZ9E-BTeIJuoi>E)aXNA>VhUl#MFsr^eIcs;?t7G@F&-o(!)LZZvcVs~8%te#89yHcA`v3VOpB>jU@Ak4=J#UKyS-Xk~&T|8t|Y|)u6zi#4h0>;TpjSH^QKZF8t1G|Kl&D+DD;w7E0gI z_^@G6^HDOat@N&?0K~ik0Tz!=#De3cKb-r?Jq2DPA9*Am8%)(?XqG7$@L&S+@74;l98s>2+OE z(chE+$Nf$A-d&Lk4~7gyAO~YTFhHmr|Srm%8`d!~e>l&YwR2 z#EjuBy1Cf^CVhdIII(8BojLjU>cGZH49k}`I&(_yHbKYj^!~vV zhuX}Z;=F5#$*1{TZ7&2@_GVYIXu+nH4G)ExneJ*LlgQnyWRVfLFI@=XAKIGh;z_XYCXgT}{3ByqvgG*tgcSeA)Z)pG3>3 zk>28B3wrK5?*9J%_tpx)XU8975|=jXH1vq@!h-txhPSkDWEhT)e#aYKzlJM3SKJo; z_T}m3x~5ZhxWbj^=MK;_#DxqW|0LJ}IcuHAD3yL&=gFD0{8o z+LMRhhR+^kp?mpquC4?0?{5Nmd9Q`($z}Uu5>o$KV#*z>=hI(IoK{l0k@PZu8!Q4b zrZuGy3(b*p+xdr1d1k@8s+{;|v~JfyH1*GWQ(3}iqo0AjnEUlMXfy`@G6e)ZH&=*ml>oYPvjq+4rrZB`35qe=hZ= zRbo^d=jQ!>&c#de?q3vBZCO~QJbDf$I)y45TL+w5X+A>OiLu-#wS;J8`zyHox3x*L zA4g1gy|^YLe?-dTT%*DQPpVR$YDFcTNiZ2R2nwNu`~7Ry(<5G&A>~}EyrH4l+3S)a zJ3l1HOz3D@b8n@3OX4Kpr;l<(+NBrZFv4GoO8i;#esU~VJ+6PlC$+X6NVVn=&2t#M zE5r-u!D8h6|5_rh86QG2{7@Xb+?%(9m2!U=8*hL3E#Rd+y%vvq^7ltN9LGg;gSWm$ z+XI)7|9&zc3a<4?geEO6+xQZ^ynPf)qR$HXKR$l6Pay7CPkhry`z0DGp8STws%Kof z*o93NVdJOa*r)sHV`JA3f?G9x?3^XWvRJb1Q82tC+Wm#<)*!Jh;q|l(q3f0Vc?;np z7d)i8=zmROGUJGfwwa3sGWcy3Zw(EJtPIn80=Osk_0=dj%k4?&)(IB*o|V@a5D|G7 zR`fXOin>wi_!bsKN~o4KTLhC=P#M6tu`ACx@GsiTj~RpF9}{@Ay-=Z1C! zfa!`bc-A*dsM%+^N@_0>l+!*%r!195_2+)c`4*uKl$@rDW9uqwlZ_68*R+{@%~@mvo62xo zy!Gr>?A7m^CDxzP-4nhU{4uF*jvcMZ0S@i)qjgTO8T_`NP8Qi1-YMAqy_Tm2lSuqm zBsgCovItJ53C5Q6HooQDzUP8BgZiaz*gEdJ&l4RCFG%I{ug9dC@~DuJ4{uq0dc(4{ zx%0}Fr`|sP5ND_-X&MaEF0RoPRvmiVkg+akh%zOV%7>UM(<8C!Eaf`X(gX*h5u;1o-dY&r?- z+>84t(R`u%CKa>sm+DG2nuiA7o=d&c8ih77r&9`|v-F5`g~jrVecL|m<=A@RV}9@I zdHQ+A`IJL)v@xH0*L1Ir)b1sTj&YGOe#^oMvxhzXh1BbaWY1$jSfjUJAa)DW{f9 zS8zQ-@2EfjQ$gmM7yHd^?&#*r61a?fObCTC97n$+3UsTjOocZ(?K;pa12u%HRoKz^cKRXl3D+iF)lHMhnD>(gB&vFpawMDIkptnQDW{yCEBI@c8icOd-H0e4_&O(8)DRgz=_HqW@G2)hxkfMSg z?lSqPVNUv!1-37tp=Fd;v4~rn{$r%4`%<7b-1vV!qMR6%o)Pzg9X{g32}SAc3{^)| zU>70EWEK2)&rb+Ey7TD`pZ+LV=_jT-7{2mvH_*-#+JjCng3`Uf8Ygx`4_KcUNFt&mBbO-y{|^E$F=v~}Q zPXF`5+KtByK5WUzwus?#bs6g*ni4g#4PMNS-45%nIlO!1*UNt&lhP2Xyv!|TKX#Tnc2 znUsi7gATAJWR}g>Y}fg<^#vWaGR?`fN>G)R=5B5Ef6h#7>!ZD&r}YpHVEaDMmOyn0 zQUrkHtE`7i6wbG&x%HvNfB-oRNVc$WK;gTNH{7BdUE4mXTyUCSm#1PpGT; z9k0O}z@zg4CUDdzazU8vyb_!AAszN`QGZ2)b9L1Yjvk_viMnlkrUmK=FMiCOhty^n zN_*G`!V~yTT0C?SJK6t)S^1a;CkoQVGBU&u;@(y1V&zE z5pF8oFk-O!6*Sy!Z zzv$$Q5dJ=5tZPN6-mU92Z(pV(04}3jJ8vy~F%^}H=i9jt5LVfsGAtzaL-gB-Xp$0J zB|f7p)L%&axtJ#t4S4wsoM86%7m1Y;gLWVOK~4}ZEp`fRom*CF`S37Oe5PDHXd?F_ z$4R550uozzfsSyo{vSM8mIT3!Rp_%qnG9e@m!b&{Br$gFXY z98y`S!m%D*NUKdG zX;osOJyV*4D?5qt(z}ELR+H~C^5yCCXAdiCX?^w4Sy>DnYtM*#uqDHm zlVXodTwLT*?`pQ5@v(dyn7qKaSn})X3W2^qPfs4ds;5hLe+Q;D_WuB?<0LS`Z33Px z^RN3jlR*C?fdQGjLgV6J_VY9BepAr3M!rY`r&*opCg?wdxK}jSDH~tuC)$K?IjLJZ zmd&|W@$eoEPO^!3&b%_4kfL`-JI!3;>asfT~M{Y8uYOj{4_N|2fm+9h?m zVIhh=j*>S81Uf_*xw#u6#Zq3S6^I%v*)H8&I_%T3WY^V~=P+3t)tmo#OcF+?I)$RGG-%rX2yv9WswOX&MtV>pJ@;Hn#tHd+8oo9i4f( zR^u+I-0^Mp4|Gz;HiL*6IvVJim~hC#WvtKhHx&&JH7o|{G=eii)L=5R;)|cl&x>zf zorfqZmQ@+7&UE8OzXM}unRmnTFWUnV*8yMy(tvuQs>h$^(RPgJXj~Zv3-DdtEK*3d zriu^k0V#7ajDv)r?{`zfcrx>)m^#PtcS$BnH>IHYu%BLbD1EFBYQJ4hdy*0h(ytw2 z#O(^wVcS!`BTp5G9X+$H5+4Z=h_nNK7+EEp5E1)=%;fjgYiKMHfFOD?^GK)1Pw?Q&!O>Ke7y2 zNWU+D8|wG091L^$J1avwIC+oH zmXY?8k%@IDccQ?3eS0q9`y&bz!_B1q6{-6J`+u@SiUT4SBg=hTivs+;vCT>y(*D0T zRj>x(&Of`ntNNY`m<2($kGSPt<>&=D9o+I1l=@Dx`5hK|=$UonPY4KC^lZa2WBxZo zmwX@%e8c_^DdL)%-b3`W%!{3LX|=+O%?!zk*h+D8&-;{rd7)@)TV6|c5xhU>P|(N} zry1Cy!$TdWl&B;q58W%F$10a!%-2%jyM}Cvzmos5_(jHkRX|QZBpO8LZ z6~aPnZ?yP4{%eTHacVQQeZH)YDo*&XG8dqj(i>k`A9oEp=(;9Cegyj}+}zYmKjsz7 zQbvcB{6Vtf8c{VKmCKwebrzqEyK?6*>T+|FSK&WwFeP!|G+Em>w!cS{2bWqM5f9Y{ zbTy)Pyfv)at^4Ll4y_SJdFX%ACQD8y@4-fKjf2$vwAv2k3=K?c!?fP*k<0f+^u#lg zQaCtbuQ1J7b2c^)^<UrIf;zI+{Bk##dbja+>wM^Tb&i+-75E4o~ z;9QbgTOr}{9 z+^$d^UB~>uKLFQNhiECYs%_fH?+62d{F2$}P&`yoZ#UPTKHNHtEF%C8wZuwDj~ug( z?Ccic3-Z5%KMD;15MH;~T#2aC;Ho`dHGe_(`SJsu4TQ+;qz}P^rj^?eE?9e@w?oBVo!kcQ*A4`}t#dY%V#Lik1===aK@mtm(gz6|g z$IBvQz4*qWDU_P;86;Cve1dV1f1?fucs@C~G2vrnje%925yr5dh2e+cq~?~ntk~EP zU*GG%q@qy$GDrJ%m_-PzDa4GCm^mw2SOe$Sczllv_3R&16C|Z{v{6f{8Bu0%SMZ2o z4v*`zLt#d#^5>?O8~dIEhU}6@VXn$lZ*TmoH>yA$p_E5P2trt%c(gGK$S0^k>K@DG z1NfwszK8%ZJ*gYKbSF0$l9P|FqURE^ziydVWp!T?H_^@tWgULUpRo)gN6u*sLi~K( zGNFhn1zaWCt`DV*exaKgkd&dq!H@>3H1ji?Tw6c>ff?Qb$I4kDtqI%=-@+5{inkLy;LtS}Cw3P~IUD5`sR^N(iDt#Ap5t z1rPn$r81hWo)}5UKN8y5ZfmV`g^oV%F;kF$3D4N}?U7vi+}yg|K?wR;pYuAn4V6t` z%Q5}l%0Q-ShKZoo$}NbFsN;78VyVf65{O|`+evKi(%XW9_kHQ^tS4_}672;{ z!xIRqPQKM&o+O%wy0)eRd7yn7ax((bQ0G~5ZgrzlfJB>o1JbMn%slNS*&PYCj=USF zwvUUIBG}3D^D8O^S}*7FS-f^AGa9OSe?HBkU(~ld?V{r#Z7rvG?~KOf%lDb6JvK|e zf9yIC@HJisqq+c%C_co~q9mnM41(l?1j^sG;C3cBMBkR#$jthC{I!M$f9RU@OG1~j zWu;6KQVMVFr9^mm=;5eb{vHS)y&2rWU8o))CQ61w?pF(}Q4lY0H2|JI&)vGeKN<7W zZH&+E=e+H@vRI;@6YbiO&m@q0LtT0xfWe5GOZJ*qf)Op|epWby%=0GcepNe7 zqJlPV);c3wPrlQlX8FfO#XC!tg{ zV}M= zT@oP1_+ml_J3`V5b2EpUC}jagzNdmmMjCi)Gbbn->i$v|y@e0RHWM_WW=A`ib*7Ku ziK?PSMUSFpi$3~Dz{DUKkf0HA94$zzk)pc;HqbR^1R{&HXMY<#rIypY6pTx@Z1FC* zdX2yQPRHThwKHsN_FMEs>>Tr7|AH#`?c0t?+k+*r$_Bt%fsPcFPtYhpg#Re!_QM*w zKxOs=5d0|*ZS_1(;uIy-6@d*;52TER(2T4sh(%N`0}iXvWq4GD|NfrLpzADwuAzXH zd#eqn73^M;RRZ^JYV61*aFH^+{ihrsSonluwYvzum%~18{&F+_y6YK(=#!6NdiOh0oZLgCy@^&SR=k&XQ49=D8yj zz58?t4=g|Z95hEZUDI0_!;Z4c(T$S{@T2p%0J;_V_6U~Hs!@)L<&V!o@&PN42W;%^2rC4QmSo@(0iIxF(2=8K4b+C#hu>95 z{eI6#V*Wc}`|C2-0yQ1KP*Y|E203216B>H|ismsk`wP0e9~U+ne%NC0T?U*S@ir@2 z1n6LzH5K;pK=(j_0(&2|^2-l3&UoBBNh*YBK?c#|-@msvXTv&prS05*DkD#W=v#1A z3Y8q;hn*6dpB{+hGSG_N^v!w)NkjnHK%oout`{t9c1%JQ96MNI0ubU~p|t1QE^m`w zE}@8dJfMb}N0kKytfxE2ByZhpa`}Afn55&tUILwE%gX#7wGA(giHwMh%?ZA$3{W;^ zHzvWX#DAO4h@zjH?9y2h$X)pAb;-EYEsaw;da|ZD&JEQ2L>wCMwsKdxKf{TgaQ>gs zI!UJJn6J8r1#lVE3Q@o`7@M6qZIlVCU8T>9NhI8arN*6sr5=!CM7=U85*Qwxkk{Q1 zU_U`;(*q|`W`HozLj>1l>FtJYRokq2?r~hCKJ|RsnU>_;#(g!-J@f$3q z(0?t@DR={7i5Nz}29S86>2L>q(bg>X`J( zVpj2xk^)P07<-mjywb&dfA7F>Ss{IYR zb|iS!f17kI2h`<9QV&R&owct!>}N1Dd2EpBOlM)|Oz!;Ih)9dD`vU{^Wp_MSAjOLn zedk>F9+WEu79J!1s)-EC4T8Xe(*ItKb44y(Q75n?jI2C=_e&g0b1fn<7tNh;?Dm5z*i}vCdPPxesO;_ZTaSt}g2l_N621BP&HLb|#15GdQc} ztH*xA>fPjwNyH$K1iU(mefwhl{ei8~ne=|yY;MnNu89IBzP9traV`*n1Vb2*o0mFCF(bi-pn|7q@XX||LF}$F=*z2aO8-_ zHf&e`oJrWw=1gVW#bek{((JzN^5upyZ`&*-i!|tH=5^2eSpSIT)Aqmx>Q+x%OYh*G z$ae(mkqI+bh1$u5a5o>?;AqE8KSDdvP~UJq1n0 z*N(6Js%;pGaw^fw4hVS9%KK0Q(l6M45Kx_xVvs8lcLS*utsX?!$}q`9A@F>FC@6#a zlPqB_qiK%bsqf3b=4-q=f%Z(hU}&6aC(M5IGRKMSc@sLD)kw@UFE+#nG_N;IFvpL; ztsGYQPH1d{^fBQbCf-Y}`RqxWoiH?K0Zk-Wsd_%D zg!sJ0qY^+Uz#D8f1e5@*8c<1GhQo1KE-~CkQ5Pv~8%DBA!!NtKTFgV?>?NSmTJphV z8!GJ48|n zSTY_`5F_Z7Zw_n^{qzy7;#Xk=;S!raoD$@%R@sV)k|cWTK*76%z1OO2r(^kS4B<jE>nZr z$QJc0xX*Brok71O!BFiT^O1{C0#qX7hc$D&7(Er@Gw=$EUWv_#taZ~}1) z^gUKL>o0dQBtH5GAl`$t}?ZHHZ%oX6=8jNz=y#3YuV6yjFB>wM5bS%-)TyN~);PwwWjuRti??TGI z#li~_6a+P!KYG7RTNuK)5hkJjkAq4KSPFOU3I0AH1p|tCN7>p4DNmmPok|$>du~pA z-u~r<7B!E$rb^2(jlrs`jnK#Uq5~QJeJ~Ak=YOop?el=OV~G}Y>qmZCyh)S-9f(!Z zn$VRv<8B$7){@O89AM{a$x!`iY>iOKmyz&OCidKJI( zh0-+1$~6(OHLB1o+Xsm7>munsYSl)fMo>jvWd#<`rI0qTK} zjQ|A^puekEXfTQE+rfnt9sMDqW%~XT5hz_vb5PbY$-481eZYnjQlhuFt3>WB3SBYEwZ z*&n5mdtVYQ2x8~?)Gw6GQGC>`dLA#Am2@5vmVoDEAx*{Ib;;^QhyD^{J`K#$B=VOSibc`cy>w&Y zzzTr08vOEW2dSk8Mox@eYEk%HbsMx9GpdgHh15Z!ed*4U3x}kk#P>S<0QH`}9+nGd<^7FA z^pg^Qbmd=Mhlda!N1NX53H{HSMihrrVB}(UIOnk4;h^5t0`wI@!cn;6p}{)oqMW~= z334T%+0WJ60LIxE)W@K{0eY?2i1(K9Ow}a1#o67x#4^JPp}dVnik=}^ z*g8Ok?8D7%!>YkSGJC{N9X%#z$lvfcFXePieG*kaT2@=EJfPW0Bjzpj=p${(Feui?)_6n55)7Rhy~D0B3ow8 zgef-b=~x^wr1Jar2cODhb{vv9aA5w)yf;1jdV4uJx6SIStB1F(i5>j93eb_?ymSz#dKHjX5!=2io2+#lkg2DF+ zBj2vWf;ElXIgfry9vYCM7YNCsQiMME(U!Ams{L2KfD8ut`#7>q$CjT8?r~(%^dva+ zRYD<`E}RX-KCYzneS26|t3_2*CufQZOI*B+TmqPg^J|p#6dUMxD!=$WeF}v&s&`=z z1q_jte$#nuC-N5s?vH>j8;FO0Wz@P%hYWE4PY1bX$v*&3WoRgRK0`u_PhC6arp766 zrqepQGGPMk?4qzjdXd{l%vCoUz*)3Mv!Mz!J&n0NzoBAkdm0)LsMiA{{2EOE_$D)7 zT!NFZXS=4ur^9wau0=OBxrjZS*TRQg)8TR?y`6f-$?WfLDQHcwv$@ZzT*WbXk02@r zAa&{OvaQ$!Irz4xI9DD+P!{vFz<~(=aymFK<7onQ`|HcL&AhiVVlN516j(7-ThHZf zZ0)8i?)673PsRN`UN1q=SswyVYz^}l5b%p^z+}?V7-Ki7kKl+l+$XtAJ8i1Q#%>3X zs;}+KZEXpAsp2XvbhfxhH6`d=i;6l}5)7 zk1V|v#}miO@Ngwl`+?fws&V6y0coe9jyrcm8LCiDzfb{nG2*kb4q0aWd}a?LNRXdQ zc&CLn0JZ7Pe*ioc07qy%5L%9&VmbekzY;IQ6)e<{9)qN2de(!ywdA{#q#J4?SeUp_ z>5u3YBoO1i1FxOcP4_Dtcq0G@;C|-XB#!a*D-$N-*wjO8lXuy7mL~hJ^J6QQpLHNa zWM5`t+Q!V!2mGny10zj(L7?>)#W@B*^^m1CsTC6hvTUeZXdqNvxD5LRy)4=d*Q@I$ z{f6gZopjie@7g;%krvHEhyct}v=rVDpvpL{*zjIGCJm|-e*T7uZjdcR+IWWj#}6g7 zw_q2Lgt0#ZlHFmP5Lqz&~;1b6RdNtBwSFg1xX*(8P3bi?*2qaFV>femy&uuJwr77Ywvz8fZe9N zbi5*BPf&>x3$1&e5* zs}dt%v90rYoFHiumS$k7pe ze}z&$nkvXTyj~AmpC(z4$!ifDpFXxJjf4pGq%{JzJG`3!@@|>a1%D}*`VjC%*A~)Y zf2{4u;CWz{;X6KQAqA1WFct+zZIXT#^I7YmkgMxLEPLZ#E~T4o&F`i)v_gMJa0IFV zGDsZnaCyHos+Lat=|nhIS4y(7U#ax34t{yjD&Al@+Vma=a@`(L!ae~oWHc$Dpbni1 z*I&CaN(Wfb4lmc5jqT+lc^$pHZhs^JI6-XJp3J5b@4?%{C6vm-sSv)ol*@0+dIE9N z;%LN&B=4Jf^@3k_;VBU@=?zpBy+o2x#L9{DUPY(4f^H>0XLL~+1#-M9;2>9k?SwJ$ zdeC!bXa$Ntym?DkS=em!<_W|a^6|9=K39g!0wAD>KZctO>wEn+J{wlCz5fb`pd@NZ zSU$&^@%BjWV1Zy+P}v#`UOmAL(=UP3)8jW7ulRNZ;#CXU@KV2iftJFSKNjNB=z2B< za(olaz0xPl(FR9Ee$+dR&HB5*Et{OSdw-kyR}Z0x$QqsQ4}H(!)PAd_`J6b{pA;v= zy3aMATT4BNiWNK|I4XGRYbEqr}-wLxm5L3_$7Gk-Yk!%1uyk$cFe)#;` zb{>_&cPMJ%v>LCDOb`|>^w4LoGjQt!mdWO*5ORY0zh^3dGDAE(z~DM7lAWK(v5cE7 zUJce09b}J!0%rb6BsDc_w{uQ_|B;oIUZ+(d!vO2A5^dg}X4475&}c?VV33Gn?r12; zTLb15G$lRhFh>%hQ{4dA6)qNvPUD@D!jxaBVK$rHcLdbFg>vDi;5zoXC# zWmfaFmG>0uNCqN_+c^@!Dml~RQhcUQO}Ma9$Gh_!C;aH>V*AVEAcTPwIUt~AM#Rp; z22rtS0CYVugRUE$s>^&6;;P|{P;gkNF8t>#301`W^By$IFAF;;zD<0W1WgP+Kp5AV zm6ROv!x_$g-TwN~8;|%w0UQn{U_d@62o;+9+x1ipygH2E@ET%nJtTz)c@&KIKP}g? zV!KNy$+ZgY7||4fWr8XddsP9#1JIyJde;hJ!y+V0^n+GV?&*`Xqz0$S_CQbSM5lHb zpBf(Bf>F}X4{5+`AWoPpdJllNfikCu;=6M4g7V6kuVGyCS zfsyDXsUsgW<`kPKgZR{-mXScm4dyWSXE)FfT2&L8gHnt~u1sG?AR0&VZ03$X&J(^V zK%h7x2?+v#@kV12VBz}8{xPbVZC*hGb}&p@hMUXC2k+ZA{&v6;-bi75@kLrBze~XH zu>KT4HB~r?6MYTz(^8HFCKCu9x_Nl^FF55%^+sg0a>G!7d?0DfQEI}x5pL@2y*9ji z;qR@0H^oY}!MiZqldhA|Rkxa>+}w(WZ*jYf*2&N)K|>)QR{(;TI|o`Ga}t{CfwE(M zfPX{2&!%b4h##0A$8$jhv9^IEvTMo82R*n3>4OiNXZFA)Cb`f0CdbE00N>21&Xi>2hijHBz=6iZp(95u(V>@n0%RV z`IJ0BZy54f;K3+&k#3Q!lxLf{D)g_{Cjg=1_^yK^4g;g%8RGOT&Hm)`P`sCsp-g5rx-IQH0wMql#o-xJJh82rhaDQ}a6plG)d* zn|Dix;FX!v3BAVdUw*;C0v;2pE{ze^&ot9NPIK6A(oV?w{*LEr#{M z+1dkWG$*k{_befxxgRGng<#5(R~a z27UCkqiFS2gTqqwTT-Kr!Hbag+9>K1Bv*WD>q4$b^a^g#F=C5uLL7I+O<9#!h!EJT^$h0{Wda_o2J>t5 z&{J$Ldb;9Xh0RiuxVm%l>_>wQF$qTX5@LPUhfz)7#&4h+`{5OxYIcsZnKynFtHz*b zrN7eK^hox0Tlu~0>YtBpE&uffH{Y(Dc6)94g6-p4G7Q)JAIWL79q^$4QQct7`%j%d zg$jxFe1ydR5RLjZVhQFWJ5uf~tu@1Q&Ihp>ST3?v(=KSo9qloJ5r@+OEs=YtO6SUM zi16FLCxRf}nwG>I7we#F30jm5gJc6yDJ`(iFZo^e_wiKr83q?EeT%g%)yeO8a>}WD zPv#GAJtC*HVow`w$B)m?Mwx7$6&{rq#w~?1{%YRIfVTw=U7Zd|yj!ee=g%tZ@T)=w zpr3w4JN{VNN#L#&o>>R-M3Lj->1YOj-l+I!Qx+Wrhg zkE+Eh-LD_L7;OLVXQA>3sWO1rMg2SM1OC;UJOJc?J-{>QvZNLW)JGSGjUVA!h!#_1 zTkotwNNi5NEza=nMSIBI$35W-`H~;^EHtApPV^L2Q_m3ux03^XI;1@WifDXJoVo?IK}4 zh|rqfjK_OaF@6cr4JP~QLApF!f5)N?ttm#S3n#V=q@RQC#&RSAG-|OB0vlk_QwkVn zI;Q#ZW=0e-k--ra1OyQz2WLPK5fBl{K@btiIR~9l5V}E>ph!?# zMS>tX2NjwOl0lNvoumDthp1v7U#mtxA=!;HtPSyNYEl|1p&BD7&`MiotvYFL;R(#0vQls(JZ$uZIw-6FjIq zlEb6un#nm>giSFiJ8X}*}HVdPW&c-oi#A)i#Gi6s?ZYcs>NBz5t~k%90d0f`#i)DN;Qs0jO(_ZB+UCORm6o%IZ8^*Jdf3{8bLd|uGGa6P ze>P<$Y$1&MR%O<80M#w7!xK2~<=B^jQ*<5s8(Y$@boph zblD&t&MYxzeoyo7X$P~7l6 z^;3hFGyK!+{KpjUAsF*(DjdwqX2j=0VhubLG54JPmqz+3N-b0Oa@Q);%8WkxZ)u1= zjVdT_h6!!~O^E|{%U-{Q^Jh;!AfuJwB?AMkc05)6srd3~L~UoPDippK%?>_(I8bvz z5T*g_p#k4J0tacp5VFHtHi(2#o}A9>@+|9lknGIVIY&@tz56bveR1DJV!KTQG~r~P z=Mf?KE%iusfRCB^IW;a-%6tT2oC}Vjd#TH~kBOfLyITc8h{O&!W*==64anOzJFtbzpip{x;h)c~+03&krh~ z&s_WorF@hH@)(`p&+j_E*|!Cxz}Fxhjt_%<@kDupLh*S&WDM%g23Tx=Z@EgaZUBpQ z^iM(TbiTf2z)PKKQ1n#_|3K@bG`0DrUq?ZDYSPIm4(qSxxSiLqM2Q5Ft76mhGcbBt{I1pS49IBx9~X1qVU^|*(d?Dcg~mv%ETTi4DH_d7CbgPj>JQ~AdH zlplRc4I1ba9|(@-a_QQ3Y3z{beu#`v89}GyvYVKZ;%KrJ+SK$4)C?Xc(G>^f#IWn9YyC5Qj4Ilr>47pzfQ+VtpXUBkZEt3ql zG36I_?BhHv(mJEBbHt@}1vC=ki1)8o`x=~(ND`buW)q5RKfUe%=qJxV(Q6suh17Yl zpv~qrGt+-CvzSy;I%_4=$)Jiy{Tsk>SG86!$TxULeB_Z6 zo+NDm0X2NtT@pv!AzBJnaqJ=7Ei4USA8^f|SP%Z>ytt-ZL z>UUvMKQ-k#!(RbicJxnOgTQEf0nB}`g^kd4XL_QfA4k4PyMh^yW6q}m-D0~LWs_?I z;OpU{vOL~zFT;P$n%SUNDM)@GuDNQXs407$vZi5d>&8^Kohs1`fp4cKW^O4mOgR z>Imb5R}shyMo$f%yja0~f%+qYXWele&|K`N;onaqpgBM^pc#uY+HmAam&yoZLV%zjywL3NX%U?-k=lpPY=d3&obPqL z8E4aVl(iDT!B3vG@J2|l;U}{=&dlF|&iSYdE^M8bK*x17a!DNA3*);Y^sS4Y)2_ZZ zT7w;UH6PoZ0#bZb5sU`IyVSXrNx6GqM+S9i*=!|9eB1s#^7G}nprQb22I??U9*lZ! z4j%yfb{S4{yar`FkGRYeQTjPEcf7NfxA!tsgVWN`p1$XGA&+VI^b*O8oe8>C`xS>W zk3r!4QEAG0?u|`wAZiB|`d^{1YxHKHMCeoMMS)=rX?9c}Ppf^HYM?k+6eN3IG`86` z{4b1UV9uuFkiSvtPX{f@FF_ki7>dfa{3o`uS-66mgfdP=cdJPul3tDn=q7>yYPy>m zwP6sDaWfsjM?aei_Pod`LkY}cUlJjBbb!ntVZ={kd$@lX(@&zVU!S7hoQ2SCcWFGm zwU~kGqtw!BwAqYSHN|Q*&aKZ{G?zcI_j0S}^KmDclQ*-=%f=RVD&Qg~-oNCZ>89sgl(#TY zf(Q$-5~$@O@yA*5Jz{9s)}5X72|kpyNMWl;G*S}PfV@S`t5I8a^v$LDsTBB`Z`ywT z(EKSu8I{{P%E7s+<0r`0$cHpTB%=XNM!r@U1Uh;*4z#9uBput6wAHj0!|2d3LY+eQ z%w)Zhoay&yf+BQWwSb*v+UR-d*lCp=505bleK64g5;U>Qn*qb5p1*u~Rx??zVW2v~ z2^y7p!31J)r_oTASm`&YQ7O#lT7oX1<9>|jw@@EdrHH*tXZleA1{_0g{@5%YBeqeh zd&x4x%+cWp^85!_KAATo@A{2cckSMFB zJ$KNuS>pEPj~v@!3@pb7G;ogDB$i(<$B3QG2op2HPw2jaDHWZ%XWHGMw06B^LOFDB zY{``gg=5Wje2py0&XtSR<#-*<+gJS(^*_xmvjRTP2x?gwvgu0c!m*Ef%wJW-X;7%pL5J|=Pw2q{#LKL_)KkpveL3T^@M@BENg44=Hk6*B z$u>aeMY9m5im&}`VMK!+{=vTZW2l4fx9RDLv?C*E&}2BDe!}*h2LY2{|Ayh2MbJJk z!LaM=fLX32X+4Aa3DBbZ&sreVgKgZZ8So>3S>dR6pnt<7h|<+}_PVz6jm_n_9_{vy zcwk+uR9^3u6kHcc*(VP|o5$n*XsNd=R^=L$)qkgeesc(xUV06UqfH}PQVeC#rbBmH zbw|exIB|ML>*mdyXaYbcj8fdbl-LM12DL5Du0C@PD7*Fko3AKLU&Q&9L96p@lLljMV9A z+@~t}3|irH3kWnoUy4JAo12?&z+#{-%!MzZvpjV0RYC(M3b+4@16+zwGD_JlS!{H^ zczuU3NfV)84a*)WHK%|eLC-I~Z*3pyZ^6fZi(u+Q#TVO*-y?RhNorBkvNc2dUFaKv z9o8{6ex7CBo$wDe?ai;B-w}JDM?bWzc7*=vgx&u>0Jd`DNC3xSy<>`<-o>(DCOQY$ z$n)8|0lyzgYIp}s!ur?y8W1CxLt_>JL!sjJ^PSD1!!tl|jBcVU*CA5cD-R|HPMkQf zfB%iS@eXIk-);ibMt`%1lm&+7X!s zT{9p@)T4<`WkLlem%=H1%g$+_cp!AFe~n-Rkc3y8xhN14m1rUNDAgwe9Ci5RI9G2xL-YR>gJiZ+Hf2Q z_EO#o+QzKsl`~E+jO61sRP{X_JaowDk8tBRDNvIeaI(PLTN1L`=cJsg(33nJok5UuEc`(3J1HK`DLPo~WV{TmF<{3kjO-F|bfr(8) zCyQ$Zi&0Nt67Q#oHCij^4n$c>g_z&}N3`;HI~V}`EX?xibQFXpwQV-=naKhH zKz(jeO>!or^WfRxL<*R$nJ?PYK3O?x98(vvmBd%e;vi*m_(y}~-~NI*P7RCGLtbOh ze;v(vLYb_OUdVvMHx^sL&9y1QG&&(Z1v!9CYXu>Tzt)e}`qvZ0^;3rVLp5EP zU-FzlSQlOIhK2?!`AasRG87HHX2aB|+;49cYDz)4iB1LTYYfVoRB=<-nan*?Gy2*yZnv{HY>~Bu>>BOCbHu)PrhF)SErc zM+r|9A8nO-WuE)m?PIwtMIootPVB{=GXH=ldyHMqxdArC1LHmHHaC>Ax=7;~2gjo} zEWl5tn!&%``sF%jodyt(XGDJFx&Q0~9Yw=mZKR-WKAK&$E|t@hFp(CVTzhsAe72iS z@a}gqYqI^O6}^^iaXVR?4G#X_})VZEa6OPUFyw#SOTbnTz1>(>U#1p7w#Q>rUs zdb7l&DPQTSYo!jd#|Rx=gSy?aT=nxL-0yMaIEttD1d1yw zAFPi(FyC5JUqK8^FA{Yo{dqItzcw!F_xk+$ z&f^uLcL|xJoW=fwb9Fe&rRL-0;S-u=rA^1K^^+>IlULnC=_x5KuJzHEn0lAHdUsoD zEUskLl7mxLcw-jcbCBy@)_9#M6U+@OWeNoHD{5nM0W&F$ap&^_SNSM`FEjm#Y)>5G zLWz+w6lW$XTZXKS22>2x$+?4(uHcDw1Qo+1I3mn3UBrpOV=eW}7tzBc&FK4tf zij86gsO?*Fyp_6IGNd`4NIRYPq=eU(DjZX-s+c}Ilx#@Upw(liqhp@BaWDN3nJAUY zxB2rEu8Xyrqz+9T(#V?=j3$9Y-3)Q63o@h$7w>%%bK|(6$v2E7vUl$IJ$_01(kZGl z$F_rpf}GnK0dfNCD`2}4Xt;8iugG=9`r)tG;%89s;9#E>5>+|Oy&^X%@tfA%(!0hJ zp)6_jx1>w`&~%nw7Th;y=^3+0c}z4N9l6}+Bqv&=k&L7@a#(CxUuO38YZ0|tdn~M1 zRODN8$0EpyJ|0p%?M(+dd%ZbEDKiv&TD(uF0JSei`dL6laClerEd;VS60O&;?AET0 ztTNd*boDUKQ)U)%mVM^?`WozV>^mlVvYWUPGxQG9k?T`z0&FrQ?+e&)`MAI1Y~nJg zN;B5(XqE{`=tTubjH!D~O)+hL2_zmc~g-kPBr6RZ>EATeMJZX>Irg@zwO3bHIz z(VEY(ii+M}e@)vaQvr`pVvG*ud7K6dS8Mc(4k49dW$O1}X0PRnm|dQ9@QG-e<~Upx zXm*XIc(pEOxNEi=N8|hD*~ZYBbs3Cfzd?m@@E5Yj&0B~@wK-4R-a&4d@H^z!H6Q&pyPbfA~)5d&%U zGAm{6K3}{2=ar1JrR)@hr036xpZ=hkQXp9O=;Q*WX|g?C(AuLfNQmtk(T7d%q{m`$ zaily(L$xI@ub)0WgvWg82EA)i+rboh&b?zV)Ai`Btgy*n>3f~bRb4^M>n^l03$5Ng zTlFN`JoHPSc{-_jr?n!tEtQ-jfi58bps59P5xRLxUDOA z3JZ$;{_;_Wz=~d1m5`Wx^O${~>XZ8+!A?qSi-E(#J(0xn%Y0+=arB<24%X)jpx4c> zU)7f^wsK%nkh^A?;s$~el{N6|=i%d9$*+>%Z9!28*?On>s!dgGtQ1M2ChtecEj&|_ zmbsQLQ=O@xz(Q=cKz6bXsT@}(A<0;ps5GC=_@oCqNl6#xN3~&8`x3|i) z_lBDhyz0F4eD=EY_~;A!UEV_5&EOEh&tQK#&EJ1Yub8{;k*zyZIV>*eiOh1RP>{cW zWT3P|4Nu&*`%i8c!I#i_b3P-><6K-7qH}Tj7z1+`q4F5q0beh*cYfuA7h1YnLrCx>o`z>ebd151j@4lK%1;DVA7gS!^cXazWFW*0#>J748ekPZ>lr zKYw4WVQ(l=V4hMWTJ>s8jfpmAOGHHH*gZwUSGpt6(_Hv2YC-BnVbzBZPujR0!tZc& zw81B`=h;;pC<~YTMo*q;4HpVp_Y%y-4~dLOTvle|B3|0b$yxRmnpCvZW1<3^mI}i@ zi}su3@*Xy>jrEbJi@p>|q*_~?Rn}IzzPSxZ;_&H9O*^HC+C{r|a;nOdymsDL)$>cphwvKb@s7u8rz|`tLqI(L8OgY1-=TenUB&ONBS=LI zxo`?uPf%dNLCm)Az|8FI?8`sb71c!PufP5xrpiqGQqumJ&CA}ysx4+={2!Ts?75+^xCB~3PoaC@ySXJ%+%TmAGvEylpO zWzT$RQ??md+6PO!6@pdPOZ`Shwnjb%_DA*vY{+iiQ`Lj6Ri>-3vyEAgFv7Vl^z!w~ zX$+@4*}xfB+u4I-W?N}^YTvN(rgt`zV`%<4)` z5u=o@V>qAh9^bNfJ7)6GW2VxiMb(=Qc`uk9Kwy6ByYq+SCJ3j0#Wwj~4kVt$csM;sgH4XmUOSy5Fls#&L}3U`>n#Tv`JUD(Y1tepGxqQ*Q5=S?WR95A_LEs z)wI3T38ID&Hmvw62oG#nC_D))>^^Wj_lZw86>dQ&)6 z8m&A=GSd~TRzaJEU@LEo3=hZ9aMv{L{8^Gi@C!RVuLcb}N=tPUgRj8P!u#P_db;4J zPoIooY9|>+441;pKZe4>!jX*ENl7?=f4AwuG8i#t^6vICO?I&>aHN3z$s(o9p+Ywg zkNY4tHaQstB)d%HlPBp1JMGmjUD^v6*P+X?Nm=dk<+uEp4^K?s@7}!&^E`uzL?S#Y9**KTviux}R~ z930@OhjIc;<9m8~v|t`AjJkX&>!+cqNe=_9#gdJ+6A}}70A=s1t*sr|gVC7x-ulOG z>{A>8Y>p$BHJ~QfneaqoK(>v4-mQ!^_*yCSixJDk4ZZS}Y-()W3&Rb~B`sk(v^}45 zw_Q1F!^e;Ji634iC-aF*NHj5EJ30y`Y9G1G&-a<>W}B^B2xRVfSDQhJutPgH_7AJz zkdl(Jo5*Z?MSh!@m>38`3L8jSJEYCpZ*hb?Q4beOGh zKu}FzEj)w@qdeI}Ed3!cM|QB_fGNmbl3uR;XEkl@$104MY;0^`9H1~>&~w7VP>GQA z$IhK59ciYFDKN+NNVM>atgNgXHa4mzm_Kq#X6!3hyC_XVCa^AyCMeERZ84;WKOe%q z9{&N`v(&f;?XGVs4B$LOYL9hhJP%#)9G#qA+TDHs{(XCnok?ieE`v8OVfXp+$J3(D z-#mKsXsW|d9YP(Glao>BjUkF@`1FYhW~7c>6cQFD!k|My)1YWth3-xOp-K}p673BJ zESp{_tEs7l5VbS2v&&&*EBp`)`Xj>pz9xnWs>ueEwm~Z!0H_qZj|qUQ1BdDZI{Av* z!yjSysWxrMnV8}6aYS(quqQSW^gAY=K ztEwahpOq_ETN*RpPe%u%dX)$!_K*~OsdfbnM?-gaG!d6pS68Q%c~j%Ere-Gx%oNkgibcjwxWa7M(r%YtR`?qEcr`3Ew6qQ89KKFYR`E-#8gRL&t39I9nFEx6(+4o+I}aV zum%#`PAI6q7mu9wO50!K?(W_fX^~tRU*aSRf=Gsey$f%TR#cE(J{q31u(l3@#Rbcx z97hxX_U%b=aq+vWtABYbqKD(KLDqqy8XF8Cqw(1z2>(}F2IzG&X|SMwPHc3?I;6Vt zj(z1XCf3$Gr?Ka+ULAuJe_xGYJ$k4cmX<8xZ~ll=jg66)9o?{gS~<2?FT_Z?89jMF zIC#C&Ac!M6I-1-N>^+lH4I&I$B|)R1sp%nfSURDTG5l{R0CZ$6NK0Pq zHlT&T&cwi!w*6K;bJuI~mpvMCysN?e&%~?u#V?J8R%1pCo1xViHW+#D54J-{ns3!K z9p3JBVq$gfxI>e&$XPcxHzxK7cn9!r!LY`JRPc3tmh1Y9S`d6c;o)_=g+mWt@T4gLtn_2zsb%P0j)3rS0qN? z!{80RfDF{W6rLJ}N#BE1u$+_A?3pa1ceig?S=Gj0YyJ4qwwY(x6b8*#wdcFo0QfJ1 z@#>LH^Fdd2w(k;^_Dj|YkKEv=68m~07ZJ)DIf>auZJ8+A%&aUTWDpu%wPxn#h>!(! z;D)vw^yQH8SJC?NhCtv*NJt2+k*@@MX#a_6x

*56>(V1pjk|nZ-?_iQr*gHK=K) zUsnc5;P}}oG%W0_g@r}k_6ogZ7o&v<3EPuEa?qwH>j=Z`?K%hOk$jc`;*Sv6fzhyU zxjyyk2>xK@C&JP9?KOQWc_WN9!OUl5Wy!0ntJj&v5P$=tYtwU3W)l3x``g3V7Y&C% zbJslq6#yuE9mR3itax6~e3&91X9tj_?D^S1a1JmQHjd8r_UUXF`9(u11_Z8I14h*2 zO3_m^s(N)HvvT{p;3R_Jw6?akXPB!ux39?V!&FwI$=YCf#`S;=&Rbw zNx8XVaT+zdkwODvNp+d(xs}#oyT6ysYoH`Yw{iLN{Co*~TGh)E$mGum zRV9G;#oM1UA>xF7AwJ#oV5ESsU^D$p%ejnn5@?YzaMf3!9W~t+k;3^H5;UbCza}t< zy#W*!#x0lYiH?Eg0*QQKrxFklu%qhw`ZEGSiic^vEDm3i-n^N3U2Po?`?-tz`mR~D zoJ%tgt6<16kb;4Hd0hA%i=U}w9?6#|UIT-giYVW2#qKAW+H4r@ThbJ4a;sopPX4he zsh^Kr(bHpTX=$OllMdHB3ob)^;w{wlWD1ENgMXyDlKXDG*H%|=Qj6#@@L8P&ghtZ| zm@CJ4fP}0y`(@t1dCe0ZlQH$kRLh5KA*5S?KENj9kdKuFVc)?9Q^|3+Z{G%sp%feW z4E8`rSC^2LCCpl-4d8${mO~1XIAx#&0B8UH{c^DTOU*R!WT}t{1Q@I8hcOHxdpy{^ zT8QpWhFu@Ay?_u}&PW>U^wL5F%t<(joxJ^B4J9ETo_ekF0azq<*Nn`}U6SB)4j=A& z8DVffHnIg)KiGZtt+!v%U9d@7sm5oC79#+R{rwHszD10vQy=H%4saT4eJNm4qHPu( zCa+!i_5DtBp^7Q*`Fx>YzBL2=cMm~kpcl9-Qj>fnYT`yy0wF$i1bTJ*W{h^9`W13Y z+D?v+axN||OlBH7I+5~Xi|yso;qux8IxIYVd@3Bf)h=GV-$VdW@*1_PoSjdz=`9js zI?`r!g@>EF@izue5NrmQ({F;^TzvS^Zf39!0)1Z!LE3YYjdd#PF$^d0{f-a{gvrb5 zK(xYS+5)CuURya@{4~iG>Mt0}WzjoAcXA=NBW-PT3aJSPKdc+k9hEQ7EfCmOL55N_ zxN)i|AkdImEW2L@#8yXBT@ZRXgvP?Rl`w3{vD?$vuepYPsY9p-6bLT}v*T$j6Wxx` z8@OI2k{d$1lc5p=aNz|KP4CK}2#6`TjVR3)V^=-9vIFKq=QK6ju4<}6pl6PHQ^xSuof>Ph3HeMzw3Mc=|wUXTwV1Oq90+zB;Z9i8V1 z3EXilP}QU>{JP>d+zD7Wv!`0Z%~u|+mZb$1=N^SLm8rx9jDd6O8G|LimQa~H$^ocs zXl*@+g6NM|!TRvRYBh=;J{%Ajh{6|hPr20;j)=yFXWn1`VV48n0KNih03w^_lHdta zZNI#SC-z4#1%OMXcnCIl;afy;6tRQ>X{nuN(-Ml&cI)!y1LQmafFf#>d-Rn|1GgKM z9Vf{Kaf$-(s^?<0VO_kdW5G=e3kdbMo7ep24*t;L6ZiCu0|yThAv0q#@E5~B$P!H6 z)M3|5@?3dambP}FI^27d6ac18Wy|HnXhVR*tCPWn_frOJ0xpj&bw>U2<^2GZ6*8@c z8WP^T!M7x8#tCpFK?#wtb(d(;(e&?ORoF|}60QzlM1XrD7UUPZv>SIoE-LN7F|40% z%el&eqbScfI}pMPt7ZUm?>T^IgQ&g?cHuby6IO9P;0PL;n_uV&-vtj|#%xm{z*`Jc z+3W{SF~}}y@~n$_Yo=-Rx?V^VgpUTZvDGwE)gC9;gZ<6PaxQ+Bk{VaE(#)MNXe)ZJK)x z&K(|SKY!uO1A9hXnw}<4p#LMOiSeB5>)w(YKGDXESpT#CUzR%PED#k1U;tCWB)URj zQ(qaqAr9c1qfTfo1Ps=^;0sfZU3!+Y_(0pPAtT-D z*6$c6;}#iYRNxJi-=cLv7hxWZGKvzPWUq?HUDB3A*~);k=@jz7WVz_k7CdcZNUL-k2GpyyCdTSDB!j zPHD2ru;>YVq9O-tMERvpxjneVh}kx88Y1GYFDvle4AOr23iI^uydn=gqY?_NVD7p3rUBhwr>Ea8ACd@f5aIpYE?MjyKw0CO+)=wijdg zeQRNTVVQ8stMkK^n*4#cDYl-bDM+GV1NO5&mK1oU60@HYf5Z3!$jmLh$FQ!u{_{8e zcplBtzn6xf`<9QYFsgA@=|Lp@2z0*xk(vC4G1(ITSPSB$q)nU(3J`VH6Z<`1?N+u`##+ zKm&+B!9kXrImJC%B(_~xzr@z|0FTNUJ2*I)GhHWvfbYm2ntbqyH!LiaNHk$#VZc;H z68j%ZOy+Pv6=ev~&nd^k1YM?o{W7}O)`u3BFf?{`vA_^RHSqV~fksZ2{H~57VAHp* zJ}s%1cbCVuVJF2mrOSxsA}^J^x{{KTkdP1n>vc5tVQm(}7^F7W;$*=7YTwk)P^G9u zhyomZ0)KZEUBZ3q68izwaD{I(Ap-Hx$YlnMjk(acAuZ0nd`%pR1&9|Z{0 z$Y1^^Oa|RiXJu|a^3*{`L*oEI>xTCBN7Jp&gf|e+FflU|06^49PS@aK%^*|;N`Zc2 zVq&Ce$IQ$Og5v;aXvH}ZDnZ+W_}4hrhYg|>2&c8{RVmA6qCQ0*Mr7*28y3y0KGhc#JKmwI`23{K#gB$ zG&D2-oP-nqVVPu!D-N~<=RwHKbo!hFP#GTcp}>%q)u)W(Cc3%Ll5{e;i=fIxUQ<(3 z+))eU$qwAq;EdE=S-zp`;1N?DM;eiTvGJaMl4@KQ;CTK@;K36Cer)bsB*gmNvq&iv zjJ8-RwHW0>i+7wI0#ioEuN0B4EqJpR4i?*3BjxH|ctMk3=fCIz| zgJfB2m)%7B?N?_dx_s#GcHS%UwqKlT4`FdvrkQVJR&9*+I+C&v^9?cDPzX#O@uD@6 zr+I*%2+s3;fQTyyMSzxX^-ro2YISmUmWP0-xvh=eN$lw1=inm|p$=dX0oa6u zs$8fANwx0IgTFJ0CIg~{qZfmowWo!ISZ3WAYNri182QS@A>a=JfpTombubfmtbrs2 za8QznUH4}78}*U#0RlzpxTX+u?HDzNrEMY)qf&SZ#9;)-OWKKox{*gzeU@JEX$Xmk zltZSW9p>uN+o%CqC8X7`&v#Kd5D4-1iZtnTBXMeIvKL@YAFep;|n5c~( zT$E{zP07x7(K4_ppfExCeNensIC^_{c6kss@z8AfX(T$&HjUoNzPlzZY0rTBF8h>e z(X{a5<7RrmQx8Z*ZKM~FZIQQUIv$fTWuuMXb zx$hggpGrVj&Bh%*Y`v!(Fy9IQQJ82GQ+!s(%tPmar6WO-NttxD0a66(EJ>_mjOVvX zA*Pd&L&YGz*2SxHLTqY~jZ(;T8fnM_tTUZ=hI45(G~17%15Xi4U?EE5X8&t={(Q@@=AQb|4Oo+ z*hRfApej__)C-Ecjf4qaYwX?&Uxcbgsbv=aLWYH9-e-!`($yu|pDPAYoH`<|fqvSN z246tX20<#~dQI&s5F~z00pK6Oi2|4F=ZiR_d2EcW}h4oBj%y1IzLn7&TaQtvp9_Uc(|u#HPT$ zfmEd(yn6UBSo`OYCW~wFhO*#jv}cxHn)>=)?TtNQng);MwXJ#z+&=e$g$LrnwmBHQ z2QKAt;@QB1rb*r{kbhvKvjbN5r5bu@y@l%Q8FfB#rBJmVV<#EPlbmxHzoUPtFugMK zv-9d~TU^7TT+6V0FPq_#5nQ&ot*^O$Y|$F7B)7B z{}p2EhdMiD0u8L8SXh#Q_u_yMlK^b0GQoe|(1Q<5zF(AtO;3SPRppIXis9<0Ca@Tb zA7oZO2r*sM(~D|47}(LTWBUZMnM}LBDfrAN0I$Gjnnk{rk-`pD4tF*c(d2I+V8E55 zoEHk0|M=sNJ~*Qgd{imC6g;~<_thxe+C=Z#34GepI*ay?=eP;G65%gO-=j25uqG2ko}^6Chpy~XiiyMHHYe=s54uO|9Zno1VA#_ariAH zBoXC?sCS6GF8Dz)u87 zna=hiUxzQe{J?(Q-VZPC!w50mFCY>=b?)`Ts7 z5Mnz0^#g5OgRf1=8~Opc>#-TO&Y5aMa}*))LLO$wy|^lSw;9!2R-je%WcxLSA!w12pY=#)jDJ z{L-Wh)DF&pfSMz&sH-cAh%SzBThqd8&q;?^*grT}28E&XW8>pESXexkXz0r3_GoPmS*4>=SXPn@qdRD<2#c zaQoJeNG36dK_sD&JAkr!0eqSc6jYQHcXoEBhdTmmg*X>HwKk-Fh`AF0^dQuDB;jJM zGBgY5{2T|dWYMs|)sD$j15{KkQZjPNOH|Xq&2<04gRjVkpx6NA(c0&o0Y3s;T!ylY zK;EI;5h$pDSzQ#@@#B@QGW~(;>iolX3QP~yLb@Nw<#lrcOc;7rlzszn)o5Pc|9&U` z1%g_LMZ=q-BJaWbs(Wvv%z`aooIe+V~M8-p1%oHfcRA9!02ED+= zS3@3#o9pbQ$eLLXc10jsR=_x&BPGGbgBA<*L06R`_>@TnC;tm}3|8@^Tp*Y{P@*&8 z0x=Scu)B*+M9V8!*bbsP=Z~_qF9rMxa=_SOZ1{BVEo{vo!@mDA+ zb2C}DrJ^ib(Dj0yogJdR@WJN+e8BW&0gy*>8GX}@l4zHn$MRx4{QT_UZw7k`Wc&VT*EOm47 z+d#SA-y{5yh)+Ygs4q0Y>B=5Vh5ZkTAHK4Y3Map#^*+feehjdHWO7S;T0A6}z?wN} zUC{=G3$d?KW2&7lhIqh)JD{btb$92eHgIB%0)=;$PtI&%$Y5afl`z zYj|eLmODQRpFUlt#9z1{ws6GOU=yt$A08VUQ<7be*oomFf;f`rVv{vEJj?dCf5y0v z#=FAiK$zuXlmz7oumdIln(Zo&Mnt`RQ28w78DRKB{F^0I$vJ%iIp3;yRBxye0Ocac z>pB`AXS|WC3KK8@rvve~fO+GKM*=4DfU?+H0#pD6r$U*QE1b7Z_g;m!ek5ex_`6IS zXCj1&eg^7(McJq#W^H8=G4T7m^%dkCA+#u7sV*)h;+bI{RZpdB@a&jR%53aEly8P> z)oS{^i`GKP>_I9;J%nx8(k8cyqXOc|H?>lV39E3~EIdRneu!sNIMF}SzSp<@&j)Bd z;OqZtNqp@fZN|S*@=k+AeXkI?wSInJ0jg1IMR#EG2JSK{Q*BU!_Wc~qIhnUT#Xden z0qo?r#$`MA;vHB8^(G!c1^Z4*w}k&UAI+l39A|slxfb$7V5=kEk$Qlv#}1 zu^x}9!u8`y@FP1<4PsdT_mcw%-7%KeOD|iZC9!W@wsnt%CE3Eqq*5>`k#?3Kx(rey zUZRDnRKQ{X2L&G~>1Gdzfrdo}4$LAygpSbM%QSC#l>_LAIw&w8z<8>skP%wVf4c*( zjlPH>j!vPDv-`2nrAkx9BdgHuF?NeU;9RFoaFeQx*XM;^ z%4)AkS-=5!58p4@z|Snq!?~baB=}Sc1={e|*rNO{VfuJemTgkL7k_OE0iP<;UMdd zAlRjG*XddjDEr3xIX3IsaOxx6*AX0!rK}7w0&%_Ky*3>S2R#l~dof^#47Im-{$ubqpg)xc6a%Uwl;44|7yvZ{4C}RP^!ko! zmOTYx@Oz?+?He;z52;G7Esz9Et3q)}5c?G^wWM5!{rZUhvYR_X`SGcvaA=>5qrpC3 zySgbFl4(~z+cC_T79T3DMeW`|0@9W{wb5kga%VNmhU`)GI$H6oYDfX z^_&~d(w7a^K7nFYZ0q?c$;E3+t}Ep7CQ*n_jUfS>YT1z{I+4-TconNqqTbEo4=HO@ zW_Y+$;boh?i#~b~C|X^iAeZsgM8_1VSH_acO?tqH1M=r|AT>wrTm5i;I{?$c^L74?k39^j= z$n?d4A!*OD(iMl(B}lMADa(+II{t=zei64}WNjZTLX^jhR24-HP*0y;C(gqMhzsS6 zk-34hmcpf&k>>(3zV(vR7bKU*)5%cXSBi>Tj#JfvW@=?wYIi}R9;%74sm%g>QAO}X zUWmkz?>2mSqn>j;l_TghK~8Q6Z=o6Be}xi~|`~ ziU_TPOdxtDKj~dLmSxk+28Sm7StXoN#cCgL!Ah~=UuK$AgVD=PWcS8E9oqd(zaV;V z7NR*=L;BvexoncpT0Q0QZmKf|izU?3dkmKvBl?hugUT)_5b)*QG?)JKJs$if5~&cL zJN^NS3DgH;r*tg9w2}Q7wp(_S+xly5BzDdGscja-u<+}|49K(G`n!V@h@c{@dvG{{mJbOY;B# literal 0 HcmV?d00001 From b0e2b5bfd2791dfa38ee9e4c9cd9480f7a01016e Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Thu, 31 Jul 2025 06:17:32 +0000 Subject: [PATCH 425/492] CA-414418: Detection of AD account removal does not cause logout For performance, during revalidate existing sessions, xapi query subject details from xapi db first, if the subject is suspend, then goes to AD, to make sure unblocked user can login. There is a backend thread to update xapi db subject information from AD. However, it can not handle the case that the subject is removed. (and should not remove the subject for user until user remove it explictly). Thus, the subject information is not updated and keep alive. In this case, subject revalidate always got session not suspend from xapi db. The issue is fixed by query subject information from AD direclty, and session revalidate thread handle the removed subject properly to kick off the sessions For performance, there is a follow up commit to resovle that Signed-off-by: Lin Liu --- ocaml/xapi/xapi_session.ml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index ad1e1a37a0a..a0bf847ca0b 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -472,15 +472,7 @@ let revalidate_external_session ~__context ~session = (* CP-827: if the user was suspended (disabled,expired,locked-out), then we must destroy the session *) let suspended, _ = - is_subject_suspended ~__context ~cache:true authenticated_user_sid - in - let suspended = - if suspended then - is_subject_suspended ~__context ~cache:false - authenticated_user_sid - |> fst - else - suspended + is_subject_suspended ~__context ~cache:false authenticated_user_sid in if suspended then ( debug From 92377bfe2fffd762ab922a015a93afa314ae000f Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Thu, 31 Jul 2025 07:25:30 +0000 Subject: [PATCH 426/492] CA-414418: Perf: save user validate result and apply to sessions For all sessions created by external/AD users, session revalidate will check whether the users are still acitve, and kick off the session accordingly. However, xapi check the user for every session. The problem here is lots of session are created by only a few users. (for the case of CVAD and ControlUP). This would cause lot of duplicated check for the same user again and again, which is slow and waste lots of resources. To fix the issue, [(user_sid, check_result)] is defined for every round of check. The check result is saved so later check for the session with same user can just be applied. Signed-off-by: Lin Liu --- ocaml/xapi/xapi_session.ml | 86 +++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index a0bf847ca0b..002b374a78c 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -420,7 +420,7 @@ let destroy_db_session ~__context ~self = (* CP-703: ensure that activate sessions are invalidated in a bounded time *) (* in response to external authentication/directory services updates, such as *) (* e.g. group membership changes, or even account disabled *) -let revalidate_external_session ~__context ~session = +let revalidate_external_session ~__context acc session = Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> try (* guard: we only want to revalidate external sessions, where is_local_superuser is false *) @@ -430,8 +430,7 @@ let revalidate_external_session ~__context ~session = (Db.Session.get_is_local_superuser ~__context ~self:session || Xapi_database.Db_backend.is_session_registered (Ref.string_of session) ) - then ( - (* 1. is the external authentication disabled in the pool? *) + then (* 1. is the external authentication disabled in the pool? *) let master = Helpers.get_master ~__context in let auth_type = Db.Host.get_external_auth_type ~__context ~self:master in if auth_type = "" then ( @@ -442,31 +441,49 @@ let revalidate_external_session ~__context ~session = (trackid session) in debug "%s" msg ; - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + acc ) else (* otherwise, we try to revalidate it against the external authentication service *) let session_lifespan = 60.0 *. 30.0 in (* allowed session lifespan = 30 minutes *) let random_lifespan = Random.float 60.0 *. 10.0 in - (* extra random (up to 10min) lifespan to spread access to external directory *) - (* 2. has the external session expired/does it need revalidation? *) let session_last_validation_time = Date.to_unix_time (Db.Session.get_validation_time ~__context ~self:session) in let now = Date.now () in - let session_needs_revalidation = + let session_timeout = Date.to_unix_time now > session_last_validation_time +. session_lifespan +. random_lifespan in - if session_needs_revalidation then ( + + (* extra random (up to 10min) lifespan to spread access to external directory *) + let authenticated_user_sid = + Db.Session.get_auth_user_sid ~__context ~self:session + in + let user_validated = + (* acc is [(sid, check_result)] , true for check pass, false for check failed *) + match List.assoc_opt authenticated_user_sid acc with + | None -> + false + | Some v -> + if v = false && session_timeout then ( + debug + "Destory session %s as previous check for user %s not pass" + (trackid session) authenticated_user_sid ; + destroy_db_session ~__context ~self:session + ) ; + debug "Skip check session %s as previous check for user %s exists" + (trackid session) authenticated_user_sid ; + true + in + + if session_timeout && not user_validated then ( (* if so, then:*) debug "session %s needs revalidation" (trackid session) ; - let authenticated_user_sid = - Db.Session.get_auth_user_sid ~__context ~self:session - in (* 2a. revalidate external authentication *) @@ -480,7 +497,8 @@ let revalidate_external_session ~__context ~session = %s" authenticated_user_sid (trackid session) ; (* we must destroy the session in this case *) - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + (authenticated_user_sid, false) :: acc ) else try (* if the user is not in the external directory service anymore, this call raises Not_found *) @@ -517,7 +535,8 @@ let revalidate_external_session ~__context ~session = in debug "%s" msg ; (* we must destroy the session in this case *) - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + (authenticated_user_sid, false) :: acc ) else ( (* non-empty intersection: externally-authenticated subject still has login rights in the pool *) @@ -544,7 +563,9 @@ let revalidate_external_session ~__context ~session = ~value:subject_in_intersection ; debug "updated subject for session %s, sid %s " (trackid session) authenticated_user_sid - ) + ) ; + debug "end revalidation of session %s " (trackid session) ; + (authenticated_user_sid, true) :: acc with Not_found -> (* subject ref for intersection's sid does not exist in our metadata!!! *) (* this should never happen, it's an internal metadata inconsistency between steps 2b and 2c *) @@ -556,7 +577,8 @@ let revalidate_external_session ~__context ~session = in debug "%s" msg ; (* we must destroy the session in this case *) - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + (authenticated_user_sid, false) :: acc ) with Auth_signature.Subject_cannot_be_resolved | Not_found -> (* user was not found in external directory in order to obtain group membership *) @@ -569,15 +591,18 @@ let revalidate_external_session ~__context ~session = in debug "%s" msg ; (* user is not in the external directory anymore: we must destroy the session in this case *) - destroy_db_session ~__context ~self:session - ) ; - debug "end revalidation of session %s " (trackid session) - ) + destroy_db_session ~__context ~self:session ; + (authenticated_user_sid, false) :: acc + ) else + acc + else + acc with e -> (*unexpected exception: we absorb it and print out a debug line *) debug "Unexpected exception while revalidating session %s: %s" (trackid session) - (ExnHelper.string_of_exn e) + (ExnHelper.string_of_exn e) ; + acc (* CP-703: ensure that activate sessions are invalidated in a bounded time *) (* in response to external authentication/directory services updates, such as *) @@ -587,21 +612,16 @@ let revalidate_all_sessions ~__context = try debug "revalidating all external sessions in the local host" ; (* obtain all sessions in the pool *) - let sessions = Db.Session.get_all ~__context in + Db.Session.get_all ~__context (* filter out those sessions where is_local_superuser or client_certificate is true *) (* we only want to revalidate the sessions created using the external authentication service *) - let external_sessions = - List.filter - (fun session -> - (not (Db.Session.get_is_local_superuser ~__context ~self:session)) - && not (Db.Session.get_client_certificate ~__context ~self:session) - ) - sessions - in - (* revalidate each external session *) - List.iter - (fun session -> revalidate_external_session ~__context ~session) - external_sessions + |> List.filter (fun session -> + (not (Db.Session.get_is_local_superuser ~__context ~self:session)) + && not (Db.Session.get_client_certificate ~__context ~self:session) + ) + |> (* revalidate each external session *) + List.fold_left (revalidate_external_session ~__context) [] + |> ignore with e -> (*unexpected exception: we absorb it and print out a debug line *) debug "Unexpected exception while revalidating external sessions: %s" From 27780960ad6f9abc3590798db87156e40685fca9 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 1 Aug 2025 07:47:27 +0000 Subject: [PATCH 427/492] CA-414418: Code refine for comments - Rename session_timeout -> session_timed_out - Assoc List -> Map to store the check result - Apply check direclty instead of remembering check status Signed-off-by: Lin Liu --- ocaml/xapi/xapi_session.ml | 44 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index 002b374a78c..bc85146e223 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -32,6 +32,7 @@ module Listext = Xapi_stdext_std.Listext open Client open Auth_signature open Extauth +module SessionValidateMap = Map.Make (String) module AuthFail : sig (* stats are reset each time you query, so if there hasn't @@ -455,7 +456,7 @@ let revalidate_external_session ~__context acc session = (Db.Session.get_validation_time ~__context ~self:session) in let now = Date.now () in - let session_timeout = + let session_timed_out = Date.to_unix_time now > session_last_validation_time +. session_lifespan +. random_lifespan in @@ -464,25 +465,24 @@ let revalidate_external_session ~__context acc session = let authenticated_user_sid = Db.Session.get_auth_user_sid ~__context ~self:session in - let user_validated = - (* acc is [(sid, check_result)] , true for check pass, false for check failed *) - match List.assoc_opt authenticated_user_sid acc with + let validate_with_memo acc f = + match SessionValidateMap.find_opt authenticated_user_sid acc with | None -> - false - | Some v -> - if v = false && session_timeout then ( - debug - "Destory session %s as previous check for user %s not pass" - (trackid session) authenticated_user_sid ; - destroy_db_session ~__context ~self:session - ) ; - debug "Skip check session %s as previous check for user %s exists" + f acc + | Some false -> + debug "Destory session %s as previous check for user %s not pass" + (trackid session) authenticated_user_sid ; + destroy_db_session ~__context ~self:session ; + acc + | Some true -> + debug "Skip check session %s as previous check for user %s pass" (trackid session) authenticated_user_sid ; - true + acc in - if session_timeout && not user_validated then ( + if session_timed_out then ( (* if so, then:*) + validate_with_memo acc @@ fun acc -> debug "session %s needs revalidation" (trackid session) ; (* 2a. revalidate external authentication *) @@ -498,7 +498,7 @@ let revalidate_external_session ~__context acc session = authenticated_user_sid (trackid session) ; (* we must destroy the session in this case *) destroy_db_session ~__context ~self:session ; - (authenticated_user_sid, false) :: acc + SessionValidateMap.add authenticated_user_sid false acc ) else try (* if the user is not in the external directory service anymore, this call raises Not_found *) @@ -536,7 +536,7 @@ let revalidate_external_session ~__context acc session = debug "%s" msg ; (* we must destroy the session in this case *) destroy_db_session ~__context ~self:session ; - (authenticated_user_sid, false) :: acc + SessionValidateMap.add authenticated_user_sid false acc ) else ( (* non-empty intersection: externally-authenticated subject still has login rights in the pool *) @@ -565,7 +565,7 @@ let revalidate_external_session ~__context acc session = (trackid session) authenticated_user_sid ) ; debug "end revalidation of session %s " (trackid session) ; - (authenticated_user_sid, true) :: acc + SessionValidateMap.add authenticated_user_sid true acc with Not_found -> (* subject ref for intersection's sid does not exist in our metadata!!! *) (* this should never happen, it's an internal metadata inconsistency between steps 2b and 2c *) @@ -578,7 +578,7 @@ let revalidate_external_session ~__context acc session = debug "%s" msg ; (* we must destroy the session in this case *) destroy_db_session ~__context ~self:session ; - (authenticated_user_sid, false) :: acc + SessionValidateMap.add authenticated_user_sid false acc ) with Auth_signature.Subject_cannot_be_resolved | Not_found -> (* user was not found in external directory in order to obtain group membership *) @@ -592,7 +592,7 @@ let revalidate_external_session ~__context acc session = debug "%s" msg ; (* user is not in the external directory anymore: we must destroy the session in this case *) destroy_db_session ~__context ~self:session ; - (authenticated_user_sid, false) :: acc + SessionValidateMap.add authenticated_user_sid false acc ) else acc else @@ -620,7 +620,9 @@ let revalidate_all_sessions ~__context = && not (Db.Session.get_client_certificate ~__context ~self:session) ) |> (* revalidate each external session *) - List.fold_left (revalidate_external_session ~__context) [] + List.fold_left + (revalidate_external_session ~__context) + SessionValidateMap.empty |> ignore with e -> (*unexpected exception: we absorb it and print out a debug line *) From 25331abd5aef719fd2cc28f6de3d0a402f6c3ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 1 Aug 2025 16:06:29 +0100 Subject: [PATCH 428/492] CA-414627: increase polling duration for tapdisk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polling mode is a performance optimization in tapdisk, where it would busy poll the PV ring for the next request, instead of going to sleep and waiting for an event channel to wake it up when the next request arrives. When the guest supplies us with a steady sequence of IO requests this is quicker (it avoids a lot of syscalls), at the expense of consuming more Dom0 CPU. Once polling mode is entered if there are no requests for polling-duration microseconds, then polling mode is exited. The wakeup granularity is 4ms on the 4.19 kernel (which also matches CONFIG_HZ=250). The default polling duration is 1ms, but we may not be able to reliably handle such low timeouts. On newer kernels we seem to hit the 1ms polling duration timeout all the time, which results in tapdisk entering and exiting polling mode very often. This is in fact a lot slower than just staying in event mode, or permanently staying in polling mode when measured with a Phoronix SQLite benchmark. Testing showed that a 6ms value results in good benchmark results (~40% faster than the default event based), and a 3ms value in bad benchmark results (50% slower than event based). Increase the default polling duration to be a multiple of the wakeup granularity on 4.19, which should work with both 4.19 kernels and newer. Eventually we should measure what is the optimal value where the overhead of entering/exiting polling mode is no longer greater than staying in event mode. (This is similar to how entering and exiting C modes need to take into account enter and latencies, except in this case those latencies are not known). Signed-off-by: Edwin Török --- ocaml/xapi/xapi_globs.ml | 2 +- scripts/xapi.conf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index ba08ad1132a..490045f871f 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -741,7 +741,7 @@ let ha_default_timeout_base = ref 60. let guest_liveness_timeout = ref 300. (** The default time, in µs, in which tapdisk3 will keep polling the vbd ring buffer in expectation for extra requests from the guest *) -let default_vbd3_polling_duration = ref 1000 +let default_vbd3_polling_duration = ref 8000 (** The default % of idle dom0 cpu above which tapdisk3 will keep polling the vbd ring buffer *) let default_vbd3_polling_idle_threshold = ref 50 diff --git a/scripts/xapi.conf b/scripts/xapi.conf index 91a5ea40f56..8736fed6c0d 100644 --- a/scripts/xapi.conf +++ b/scripts/xapi.conf @@ -329,7 +329,7 @@ sm-plugins=ext nfs iscsi lvmoiscsi dummy file hba rawhba udev iso lvm lvmohba lv # ha_monitor_interval = 20 # Unconditionally replan every once in a while just in case the overcommit -# protection is buggy and we don't notice +# protection is buggy and we don't notice # ha_monitor_plan_interval = 1800 # ha_monitor_startup_timeout = 1800 @@ -371,7 +371,7 @@ sm-plugins=ext nfs iscsi lvmoiscsi dummy file hba rawhba udev iso lvm lvmohba lv # The default time, in µs, in which tapdisk3 will keep polling the # vbd ring buffer in expectation for extra requests from the guest -# default-vbd3-polling-duration = 1000 +# default-vbd3-polling-duration = 8000 # The default % of idle dom0 cpu above which tapdisk3 will keep polling # the vbd ring buffer @@ -386,7 +386,7 @@ sm-plugins=ext nfs iscsi lvmoiscsi dummy file hba rawhba udev iso lvm lvmohba lv # evacuation-batch-size = 10 # number of VMs migrated in parallel in Host.evacuate -# How often tracing will export spans to endpoints +# How often tracing will export spans to endpoints # export-interval = 30. # The file to check if host reboot required From d31de0296ea734d5cd766d771f102f3ace8bc9a3 Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Tue, 5 Aug 2025 10:12:49 +0100 Subject: [PATCH 429/492] Update datamodel lifecycle Signed-off-by: Bengang Yuan --- ocaml/idl/datamodel_host.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 97d35adf95a..27b1bf60410 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1339,7 +1339,7 @@ let create_params = param_type= Bool ; param_name= "ssh_auto_mode" ; param_doc= "True if SSH auto mode is enabled for the host" - ; param_release= numbered_release "25.26.0-next" + ; param_release= numbered_release "25.27.0" ; param_default= Some (VBool Constants.default_ssh_auto_mode) } ] diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index cf4d59eae47..3a644fba8cd 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -97,6 +97,8 @@ let prototyped_of_field = function Some "22.26.0" | "SM", "host_pending_features" -> Some "24.37.0" + | "host", "ssh_auto_mode" -> + Some "25.27.0" | "host", "console_idle_timeout" -> Some "25.21.0" | "host", "ssh_expiry" -> @@ -221,6 +223,8 @@ let prototyped_of_message = function Some "22.26.0" | "VTPM", "create" -> Some "22.26.0" + | "host", "set_ssh_auto_mode" -> + Some "25.27.0" | "host", "set_console_idle_timeout" -> Some "25.21.0" | "host", "set_ssh_enabled_timeout" -> @@ -251,6 +255,8 @@ let prototyped_of_message = function Some "25.22.0" | "VM", "set_groups" -> Some "24.19.1" + | "pool", "set_ssh_auto_mode" -> + Some "25.27.0" | "pool", "set_console_idle_timeout" -> Some "25.21.0" | "pool", "set_ssh_enabled_timeout" -> From 2fcc62daf5e3ad3237ca27a16ee05b59be0b4985 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 6 Aug 2025 10:25:12 +0100 Subject: [PATCH 430/492] CI: update pre-commit config This avoids some warnings, produced by running `pre-commit migrate-config` Signed-off-by: Pau Ruiz Safont --- .github/workflows/other.yml | 2 +- .pre-commit-config.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/other.yml b/.github/workflows/other.yml index 7ec6914045d..0a94353560c 100644 --- a/.github/workflows/other.yml +++ b/.github/workflows/other.yml @@ -51,7 +51,7 @@ jobs: - uses: pre-commit/action@v3.0.1 name: Run pre-commit checks (no spaces at end of lines, etc) with: - extra_args: --all-files --verbose --hook-stage commit + extra_args: --all-files --verbose --hook-stage pre-commit env: SKIP: no-commit-to-branch diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e8fb2f37e0e..01bf34b9fba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ ## For manually executing the pre-push hook: # pre-commit run -av --hook-stage pre-push # -default_stages: [commit, push] +default_stages: [pre-commit, pre-push] default_language_version: python: python3.11 repos: @@ -108,7 +108,7 @@ repos: hooks: - id: pylint files: python3/ - stages: [push] + stages: [pre-push] name: check that changes to python3 tree pass pylint entry: diff-quality --violations=pylint --ignore-whitespace --compare-branch=origin/master @@ -134,7 +134,7 @@ repos: entry: python3 pytype_reporter.py pass_filenames: false types: [python] - stages: [push] + stages: [pre-push] verbose: true # This hook runs locally only when Python files change: language: python From ca9e21146a6bbeb4e15634fd3491582e4c3f1821 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 6 Aug 2025 10:30:13 +0100 Subject: [PATCH 431/492] CI: update diff-cover parameters The way to ask for an html report has changed Signed-off-by: Pau Ruiz Safont --- .pre-commit-config.yaml | 2 +- doc/content/python/_index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 01bf34b9fba..008a4e13fb7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,7 +68,7 @@ repos: entry: env PYTHONDEVMODE=yes sh -c 'coverage run && coverage xml && coverage html && coverage report && diff-cover --ignore-whitespace --compare-branch=origin/master - --show-uncovered --html-report .git/coverage-diff.html + --show-uncovered --format html:.git/coverage-diff.html --fail-under 50 .git/coverage3.11.xml' require_serial: true pass_filenames: false diff --git a/doc/content/python/_index.md b/doc/content/python/_index.md index 773f02ce38c..523c2018718 100644 --- a/doc/content/python/_index.md +++ b/doc/content/python/_index.md @@ -52,7 +52,7 @@ in the [pre-commit] configuration file [.pre-commit-config.yaml]. entry: sh -c 'coverage run && coverage xml && coverage html && coverage report && diff-cover --ignore-whitespace --compare-branch=origin/master - --show-uncovered --html-report .git/coverage-diff.html + --show-uncovered --format html:.git/coverage-diff.html --fail-under 50 .git/coverage3.11.xml' require_serial: true pass_filenames: false From 4fabd6a949b233b847f4d2a24db28df9f5070388 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 6 Aug 2025 18:13:07 +0800 Subject: [PATCH 432/492] Minor wording improvement "vcpu", "VCPU" unified to "vCPU". Signed-off-by: Gang Ji --- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index 8ab1acaba70..24163b98657 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -20,7 +20,7 @@ module Process = Rrdd_plugin.Process (struct let name = "xcp-rrdd-cpu" end) let xen_flag_complement = Int64.(shift_left 1L 63 |> lognot) -(* This function is used for getting vcpu stats of the VMs present on this host. *) +(* This function is used for getting vCPU stats of the VMs present on this host. *) let dss_vcpus xc doms = List.fold_left (fun dss (dom, uuid, domid) -> @@ -49,7 +49,7 @@ let dss_vcpus xc doms = in cpus (i + 1) (cputime_rrd :: dss) in - (* Runstate info is per-domain rather than per-vcpu *) + (* Runstate info is per-domain rather than per-vCPU *) let dss = let dom_cpu_time = Int64.(to_float @@ logand dom.Xenctrl.cpu_time xen_flag_complement) @@ -62,14 +62,14 @@ let dss_vcpus xc doms = ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) - ~description:"Fraction of time that all VCPUs are running" + ~description:"Fraction of time that all vCPUs are running" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time1 /. 1.0e9)) ~description: - "Fraction of time that all VCPUs are runnable (i.e., \ + "Fraction of time that all vCPUs are runnable (i.e., \ waiting for CPU)" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) @@ -78,7 +78,7 @@ let dss_vcpus xc doms = ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time2 /. 1.0e9)) ~description: - "Fraction of time that some VCPUs are running and some are \ + "Fraction of time that some vCPUs are running and some are \ runnable" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) @@ -86,14 +86,14 @@ let dss_vcpus xc doms = , Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9)) ~description: - "Fraction of time that all VCPUs are blocked or offline" + "Fraction of time that all vCPUs are blocked or offline" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time4 /. 1.0e9)) ~description: - "Fraction of time that some VCPUs are running, and some are \ + "Fraction of time that some vCPUs are running and some are \ blocked" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) @@ -102,7 +102,7 @@ let dss_vcpus xc doms = ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time5 /. 1.0e9)) ~description: - "Fraction of time that some VCPUs are runnable and some are \ + "Fraction of time that some vCPUs are runnable and some are \ blocked" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) From fae641768702dbc09f0d02a5562f312c3b0e8463 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 6 Aug 2025 18:21:38 +0800 Subject: [PATCH 433/492] CP-53858: Domain CPU ready RRD metric - runnable_any Adding a new metric 'runnable_any' as % of time that at least one vCPU of the domain is in the runnable state. It is the sum of the following 3 metrics: - runstate_full_contention - runstate_concurrency_hazard - runstate_partial_contention Naming it 'runnable_any' instead of 'runnable' is to resolve one problem with rrd2csv: if we name it 'runnable', rrd2csv will select both "runnable" and "runnable_vcpus" when the 'runnable' is used: > rrd2csv AVERAGE:vm::runnable > timestamp, AVERAGE:vm::runnable, AVERAGE:vm::runnable_vcpus This is because "runnable" is a prefix of "runnable_vcpus". Naming it 'runnable_any', with rrd2csv: * can select only 'runnable_any' if we use: rrd2csv AVERAGE:vm::runnable_any * can select only 'runnable_vcpus' if we use: rrd2csv AVERAGE:vm::runnable_vcpus * can select both 'runnable_any' and 'runnable_vcpus' if we use: rrd2csv AVERAGE:vm::runnable Naming it 'runnable_any' also makes it clearer as the metric is % of time that at least one vCPU of the domain is in the runnable state. Add max to "runnable_any" metric to follow the fix here: Signed-off-by: Bengang Yuan [Rebase with metric renaming and some fixes] Signed-off-by: Gang Ji --- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index 24163b98657..30a66a29fbe 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -57,6 +57,7 @@ let dss_vcpus xc doms = let dom_cpu_time = dom_cpu_time /. (1.0e9 *. float_of_int dom.Xenctrl.nr_online_vcpus) in + let ( ++ ) = Int64.add in try let ri = Xenctrl.domain_get_runstate_info xc domid in ( Rrd.VM uuid @@ -106,6 +107,23 @@ let dss_vcpus xc doms = blocked" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runnable_any" ~units:"(fraction)" + ~value: + (Rrd.VT_Float + (Int64.to_float + (ri.Xenctrl.time1 + ++ ri.Xenctrl.time2 + ++ ri.Xenctrl.time5 + ) + /. 1.0e9 + ) + ) + ~description: + "Fraction of time that at least one vCPU is runnable in the \ + domain" + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () + ) :: ( Rrd.VM uuid , Ds.ds_make ~name:(Printf.sprintf "cpu_usage") From 2c8c7c797f9dd40e5af61900e6cf1cb7883e6779 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 18 Jun 2025 18:44:59 +0800 Subject: [PATCH 434/492] CP-54087: Domain CPU ready RRD metric - runnable_vcpus Adding a new CPU ready RRD metric: "runnable_vcpus" per domain as time % of vCPUs of the domain that are in the runnable state. Signed-off-by: Gang Ji --- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index 30a66a29fbe..a86fc8fb590 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -60,6 +60,24 @@ let dss_vcpus xc doms = let ( ++ ) = Int64.add in try let ri = Xenctrl.domain_get_runstate_info xc domid in + let runnable_vcpus_ds = + match ri.Xenctrl.runnable with + | 0L -> + [] + | _ -> + [ + ( Rrd.VM uuid + , Ds.ds_make ~name:"runnable_vcpus" ~units:"(fraction)" + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.runnable /. 1.0e9) + ) + ~description: + "Fraction of time that vCPUs of the domain are runnable" + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () + ) + ] + in ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) @@ -133,6 +151,7 @@ let dss_vcpus xc doms = ~min:0.0 ~max:1.0 () ) :: dss + @ runnable_vcpus_ds with _ -> dss in try cpus 0 dss with _ -> dss From d3557250ab9acc43a1fa1eef190f1c647be6b6e0 Mon Sep 17 00:00:00 2001 From: Marcus Granado Date: Fri, 1 Aug 2025 12:42:40 +0000 Subject: [PATCH 435/492] CP-308465: RRD metric "runnable_vcups": rebase on top of xen.spec/PR#481 Query runstate with the new API: Xenctrl.Runstateinfo.V2.domain_get Signed-off-by: Marcus Granado --- ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 44 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index a86fc8fb590..a677fd17465 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -59,9 +59,9 @@ let dss_vcpus xc doms = in let ( ++ ) = Int64.add in try - let ri = Xenctrl.domain_get_runstate_info xc domid in + let ri = Xenctrl.Runstateinfo.V2.domain_get xc domid in let runnable_vcpus_ds = - match ri.Xenctrl.runnable with + match ri.Xenctrl.Runstateinfo.V2.runnable with | 0L -> [] | _ -> @@ -70,7 +70,9 @@ let dss_vcpus xc doms = , Ds.ds_make ~name:"runnable_vcpus" ~units:"(fraction)" ~value: (Rrd.VT_Float - (Int64.to_float ri.Xenctrl.runnable /. 1.0e9) + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.runnable + /. 1.0e9 + ) ) ~description: "Fraction of time that vCPUs of the domain are runnable" @@ -80,13 +82,19 @@ let dss_vcpus xc doms = in ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time0 /. 1.0e9) + ) ~description:"Fraction of time that all vCPUs are running" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time1 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time1 /. 1.0e9) + ) ~description: "Fraction of time that all vCPUs are runnable (i.e., \ waiting for CPU)" @@ -95,7 +103,10 @@ let dss_vcpus xc doms = :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_concurrency_hazard" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time2 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time2 /. 1.0e9) + ) ~description: "Fraction of time that some vCPUs are running and some are \ runnable" @@ -103,14 +114,20 @@ let dss_vcpus xc doms = ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time3 /. 1.0e9) + ) ~description: "Fraction of time that all vCPUs are blocked or offline" ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time4 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time4 /. 1.0e9) + ) ~description: "Fraction of time that some vCPUs are running and some are \ blocked" @@ -119,7 +136,10 @@ let dss_vcpus xc doms = :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_contention" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time5 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time5 /. 1.0e9) + ) ~description: "Fraction of time that some vCPUs are runnable and some are \ blocked" @@ -130,9 +150,9 @@ let dss_vcpus xc doms = ~value: (Rrd.VT_Float (Int64.to_float - (ri.Xenctrl.time1 - ++ ri.Xenctrl.time2 - ++ ri.Xenctrl.time5 + (ri.Xenctrl.Runstateinfo.V2.time1 + ++ ri.Xenctrl.Runstateinfo.V2.time2 + ++ ri.Xenctrl.Runstateinfo.V2.time5 ) /. 1.0e9 ) From c7986adbb6286cd6becc3f3e3334bd10783c82e2 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 6 Aug 2025 15:10:22 +0200 Subject: [PATCH 436/492] python3/usb_scan: Skip empty lines in usb-policy.conf, add more comments Allow empty lines in usb-policy.conf (it's standard practice in similar configuration files to allow both comments and empty lines, and it can help readability quite a bit). Previously, the script would fail in a rather unhelpful manner: Traceback (most recent call last): File "/opt/xensource/libexec/usb_scan.py", line 681, in pusbs = make_pusbs_list(devices, interfaces) File "/opt/xensource/libexec/usb_scan.py", line 660, in make_pusbs_list policy = Policy() File "/opt/xensource/libexec/usb_scan.py", line 384, in __init__ self.parse_line(line) File "/opt/xensource/libexec/usb_scan.py", line 444, in parse_line if action.lower() == "allow": UnboundLocalError: local variable 'action' referenced before assignment See this forum thread for a user figuring this out on their own: https://xcp-ng.org/forum/topic/11091/usb-passthrough-has-stopped-working-after-update-and-updating-usb-policy.conf/ Add a test verifying a blank line in the configuration is accepted. Add some more comments to usb-policy.conf to help debug cases like the above. Signed-off-by: Andrii Sultanov --- python3/libexec/usb_scan.py | 4 ++++ python3/tests/test_usb_scan.py | 8 ++++++++ scripts/usb-policy.conf | 5 +++++ 3 files changed, 17 insertions(+) diff --git a/python3/libexec/usb_scan.py b/python3/libexec/usb_scan.py index 03d89f7baed..15888a25dff 100755 --- a/python3/libexec/usb_scan.py +++ b/python3/libexec/usb_scan.py @@ -421,6 +421,10 @@ def parse_line(self, line): :param line: (str) single line of policy file :return: None """ + # 0. skip empty lines + if line.strip() == '': + return + # 1. remove comments # ^([^#]*)(#.*)?$ i = line.find("#") diff --git a/python3/tests/test_usb_scan.py b/python3/tests/test_usb_scan.py index 45bfc78e569..9ed8be1faad 100644 --- a/python3/tests/test_usb_scan.py +++ b/python3/tests/test_usb_scan.py @@ -372,3 +372,11 @@ def test_usb_config_error_missing_colon(self): ALLOW # Otherwise allow everything else """ self.verify_usb_config_error_common(content, "to unpack") + + def test_usb_config_empty_line(self): + content = """# empty line +ALLOW:vid=056a pid=0314 class=03 # Wacom Intuos tablet + +ALLOW # Otherwise allow everything else +""" + self.verify_usb_config_error_common(content, "") diff --git a/scripts/usb-policy.conf b/scripts/usb-policy.conf index 777cd96e246..e14a11d68a2 100644 --- a/scripts/usb-policy.conf +++ b/scripts/usb-policy.conf @@ -1,11 +1,16 @@ # When you change this file, run 'xe pusb-scan' to confirm # the file can be parsed correctly. +# You can also run '/opt/xensource/libexec/usb_scan.py -d' to see +# debug output from the script parsing this configuration file. # # Syntax is an ordered list of case insensitive rules where # is line comment # and each rule is (ALLOW | DENY) : ( match )* # and each match is (class|subclass|prot|vid|pid|rel) = hex-number # Maximum hex value for class/subclass/prot is FF, and for vid/pid/rel is FFFF # +# Rules are ordered so that the first matching rule will override +# any other rules for the device below it +# # USB Hubs (class 09) are always denied, independently of the rules in this file DENY: vid=17e9 # All DisplayLink USB displays DENY: class=02 # Communications and CDC-Control From 3dd583aa414f89d1dbe10a03bb41f34a6b5cfd58 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 7 Aug 2025 10:37:10 +0800 Subject: [PATCH 437/492] CA-412983: HA doesn't keep trying to start best-effort VM The issue occurs in a scenario involving a HA-enabled pool. A VM with its VM.ha_restart_priority set to best-effort is running on a host. The VM's disk resides on the host's local storage. When the host goes down, the VM cannot be restarted on other hosts due to the disk's local storage dependency. However, after the host recovers and comes back online, the VM still does not automatically start on the original host. Expected behavior: The VM should automatically start on the original host once it has recovered. Generally, this behavior should be applied to all non-agile VMs. Signed-off-by: Ming Lu --- ocaml/xapi/xapi_ha.ml | 25 ++++--- ocaml/xapi/xapi_ha_vm_failover.ml | 104 ++++++++++++++++++++++++----- ocaml/xapi/xapi_ha_vm_failover.mli | 6 +- 3 files changed, 105 insertions(+), 30 deletions(-) diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index e88ecf13769..fda471b1868 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -508,6 +508,14 @@ module Monitor = struct let liveset_uuids = List.sort compare (uuids_of_liveset liveset) in + let to_refs uuids = + List.map + (fun uuid -> + Db.Host.get_by_uuid ~__context ~uuid:(Uuidx.to_string uuid) + ) + uuids + in + let last_live_set = to_refs !last_liveset_uuids in if !last_liveset_uuids <> liveset_uuids then ( warn "Liveset looks different; assuming we need to rerun the \ @@ -515,17 +523,11 @@ module Monitor = struct plan_out_of_date := true ; last_liveset_uuids := liveset_uuids ) ; - let liveset_refs = - List.map - (fun uuid -> - Db.Host.get_by_uuid ~__context ~uuid:(Uuidx.to_string uuid) - ) - liveset_uuids - in + let live_set = to_refs liveset_uuids in if local_failover_decisions_are_ok () then ( try Xapi_ha_vm_failover.restart_auto_run_vms ~__context - liveset_refs to_tolerate + ~last_live_set ~live_set to_tolerate with e -> log_backtrace e ; error @@ -539,9 +541,7 @@ module Monitor = struct (* Next update the Host_metrics.live value to spot hosts coming back *) let all_hosts = Db.Host.get_all ~__context in let livemap = - List.map - (fun host -> (host, List.mem host liveset_refs)) - all_hosts + List.map (fun host -> (host, List.mem host live_set)) all_hosts in List.iter (fun (host, live) -> @@ -704,8 +704,7 @@ module Monitor = struct in if plan_too_old || !plan_out_of_date then ( let changed = - Xapi_ha_vm_failover.update_pool_status ~__context - ~live_set:liveset_refs () + Xapi_ha_vm_failover.update_pool_status ~__context ~live_set () in (* Extremely bad: something managed to break our careful plan *) if changed && not !plan_out_of_date then diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 5cbb946b150..b5c6b24f022 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -1259,9 +1259,26 @@ let restart_failed : (API.ref_VM, unit) Hashtbl.t = Hashtbl.create 10 (* We also limit the rate we attempt to retry starting the VM. *) let last_start_attempt : (API.ref_VM, float) Hashtbl.t = Hashtbl.create 10 +module VMRefOrd = struct + type t = [`VM] Ref.t + + let compare = Ref.compare +end + +module VMMap = Map.Make (VMRefOrd) + +(* When a host is up, it will be added in the HA live set. But it may be still + in disabled state so that starting best-effort VMs on it would fail. + Meanwhile we don't want to retry on starting them forever. + This data is to remember the best-effort VMs which failed to start due to + this and the key of the map is the VM ref. And its value is the count of the + attempts of starting. This is to avoid retrying for ever and can be adjusted + according to how hong the host becomes enabled since it is in HA live set. *) +let tried_best_eff_vms = ref VMMap.empty + (* Takes the current live_set and number of hosts we're planning to handle, updates the host records in the database and restarts any offline protected VMs *) -let restart_auto_run_vms ~__context live_set n = +let restart_auto_run_vms ~__context ~last_live_set ~live_set n = (* ensure we have live=false on the host_metrics for those hosts not in the live_set; and force state to Halted for all VMs that are "running" or "paused" with resident_on set to one of the hosts that is now dead *) @@ -1566,32 +1583,87 @@ let restart_auto_run_vms ~__context live_set n = ok since this is 'best-effort'). NOTE we do not use the restart_vm function above as this will mark the pool as overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received (although this should never happen it's better safe than sorry) *) - map_parallel - ~order_f:(fun vm -> order_f (vm, Db.VM.get_record ~__context ~self:vm)) - (fun vm -> + let is_best_effort r = + r.API.vM_ha_restart_priority = Constants.ha_restart_best_effort + && r.API.vM_power_state = `Halted + in + let resets = + !reset_vms + |> List.map (fun self -> (self, Db.VM.get_record ~__context ~self)) + in + let revalidate_tried m = + let valid, invalid = + VMMap.bindings m + |> List.partition_map (fun (self, _) -> + match Db.VM.get_record ~__context ~self with + | r -> + Left (self, r) + | exception _ -> + Right self + ) + in + let to_retry, to_remove = + List.partition (fun (_, r) -> is_best_effort r) valid + in + let m' = + List.map fst to_remove + |> List.rev_append invalid + |> List.fold_left (fun acc vm -> VMMap.remove vm acc) m + in + (to_retry, m') + in + let best_effort_vms = + (* Carefully decide which best-effort VMs should attempt to start. *) + let all_prot_is_ok = List.for_all (fun (_, r) -> r = Ok ()) started in + let is_better = List.length live_set > List.length last_live_set in + ( match (all_prot_is_ok, is_better, last_live_set = live_set) with + | true, true, _ -> + (* Try to start all the best-effort halted VMs when HA is being + enabled or some hosts are transiting to HA live. + The DB has been updated by Xapi_vm_lifecycle.force_state_reset. + Read again. *) + tried_best_eff_vms := VMMap.empty ; + Db.VM.get_all_records ~__context + | true, false, true -> + (* Retry for best-effort VMs which attepmted but failed last time. *) + let to_retry, m = revalidate_tried !tried_best_eff_vms in + tried_best_eff_vms := m ; + List.rev_append to_retry resets + | true, false, false | false, _, _ -> + (* Try to start only the reset VMs. They were observed as residing + on the non-live hosts in this run. + Give up starting tried VMs as the HA situation changes. *) + tried_best_eff_vms := VMMap.empty ; + resets + ) + |> List.filter (fun (_, r) -> is_best_effort r) + in + map_parallel ~order_f + (fun (vm, _) -> ( vm - , if - Db.VM.get_power_state ~__context ~self:vm = `Halted - && Db.VM.get_ha_restart_priority ~__context ~self:vm - = Constants.ha_restart_best_effort - then - TaskChains.task (fun () -> - Client.Client.Async.VM.start ~rpc ~session_id ~vm - ~start_paused:false ~force:true - ) - else - TaskChains.ok Rpc.Null + , TaskChains.task (fun () -> + Client.Client.Async.VM.start ~rpc ~session_id ~vm + ~start_paused:false ~force:true + ) ) ) - !reset_vms + best_effort_vms |> List.iter (fun (vm, result) -> match result with | Error e -> + tried_best_eff_vms := + VMMap.update vm + (Option.fold ~none:(Some 1) ~some:(fun n -> + if n < 2 then Some (n + 1) else None + ) + ) + !tried_best_eff_vms ; error "Failed to restart best-effort VM %s (%s): %s" (Db.VM.get_uuid ~__context ~self:vm) (Db.VM.get_name_label ~__context ~self:vm) (ExnHelper.string_of_exn e) | Ok _ -> + tried_best_eff_vms := VMMap.remove vm !tried_best_eff_vms ; () ) ) diff --git a/ocaml/xapi/xapi_ha_vm_failover.mli b/ocaml/xapi/xapi_ha_vm_failover.mli index 20eb3b6b844..abf6374822a 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.mli +++ b/ocaml/xapi/xapi_ha_vm_failover.mli @@ -18,7 +18,11 @@ val all_protected_vms : __context:Context.t -> (API.ref_VM * API.vM_t) list val restart_auto_run_vms : - __context:Context.t -> API.ref_host list -> int -> unit + __context:Context.t + -> last_live_set:API.ref_host list + -> live_set:API.ref_host list + -> int + -> unit (** Take a set of live VMs and attempt to restart all protected VMs which have failed *) val compute_evacuation_plan : From ea89a26c69ddfd583bf979a58b9cce0775a1d7fb Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Mon, 11 Aug 2025 13:29:21 +0800 Subject: [PATCH 438/492] Add Xapi_globs.ha_best_effort_max_retries to eliminate hard-coding Signed-off-by: Ming Lu --- ocaml/xapi/xapi_globs.ml | 5 +++++ ocaml/xapi/xapi_ha_vm_failover.ml | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 490045f871f..3688478dce8 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -436,6 +436,10 @@ let xapi_clusterd_port = ref 8896 *) let local_yum_repo_port = ref 8000 +(* The maximum number of start attempts for HA best-effort VMs. Each attempt is + spaced 20 seconds apart. *) +let ha_best_effort_max_retries = ref 2 + (* When a host is known to be shutting down or rebooting, we add it's reference in here. This can be used to force the Host_metrics.live flag to false. *) let hosts_which_are_shutting_down : API.ref_host list ref = ref [] @@ -1238,6 +1242,7 @@ let xapi_globs_spec = ; ("max_observer_file_size", Int max_observer_file_size) ; ("test-open", Int test_open) (* for consistency with xenopsd *) ; ("local_yum_repo_port", Int local_yum_repo_port) + ; ("ha_best_effort_max_retries", Int ha_best_effort_max_retries) ] let xapi_globs_spec_with_descriptions = diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index b5c6b24f022..31dc6e7a2ef 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -1654,7 +1654,10 @@ let restart_auto_run_vms ~__context ~last_live_set ~live_set n = tried_best_eff_vms := VMMap.update vm (Option.fold ~none:(Some 1) ~some:(fun n -> - if n < 2 then Some (n + 1) else None + if n < !Xapi_globs.ha_best_effort_max_retries then + Some (n + 1) + else + None ) ) !tried_best_eff_vms ; From c7db87d7ab1ab5644edee0b8d919d1d062710f08 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Wed, 2 Jul 2025 11:32:35 +0100 Subject: [PATCH 439/492] Changed the order of operations so that the sources are stored before any CI runs. This makes it handy for inspection in case CI fails. Signed-off-by: Konstantina Chremmou --- .github/workflows/generate-and-build-sdks.yml | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 4083db393c9..70c34162b4c 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -29,17 +29,6 @@ jobs: shell: bash run: opam exec -- make sdk - # sdk-ci runs some Go unit tests. - # This setting ensures that SDK date time - # tests are run on a machine that - # isn't using UTC - - name: Set Timezone to Tokyo for datetime tests - run: | - sudo timedatectl set-timezone Asia/Tokyo - - - name: Run CI for SDKs - uses: ./.github/workflows/sdk-ci - - name: Store C SDK source uses: actions/upload-artifact@v4 with: @@ -60,7 +49,13 @@ jobs: name: SDK_Source_PowerShell path: _build/install/default/share/powershell/* - - name: Store Go SDK Artifacts + - name: Store Java SDK source + uses: actions/upload-artifact@v4 + with: + name: SDK_Source_Java + path: _build/install/default/share/java/* + + - name: Store Go SDK source uses: actions/upload-artifact@v4 with: name: SDK_Artifacts_Go @@ -69,11 +64,16 @@ jobs: !_build/install/default/share/go/dune !_build/install/default/share/go/**/*_test.go - - name: Store Java SDK source - uses: actions/upload-artifact@v4 - with: - name: SDK_Source_Java - path: _build/install/default/share/java/* + # sdk-ci runs some Go unit tests. + # This setting ensures that SDK date time + # tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + run: | + sudo timedatectl set-timezone Asia/Tokyo + + - name: Run CI for SDKs + uses: ./.github/workflows/sdk-ci - name: Trim dune cache run: opam exec -- dune cache trim --size=2GiB From affb5fd2d68e783580f198d26d868fbb91b1b611 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Tue, 12 Aug 2025 13:45:14 +0100 Subject: [PATCH 440/492] CA-413254: Sort and remove duplicate serialized types. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/go/gen_go_binding.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/sdk-gen/go/gen_go_binding.ml b/ocaml/sdk-gen/go/gen_go_binding.ml index eb7bc73a96b..bfa541732a3 100644 --- a/ocaml/sdk-gen/go/gen_go_binding.ml +++ b/ocaml/sdk-gen/go/gen_go_binding.ml @@ -105,6 +105,7 @@ let render_converts destdir = let json : Mustache.Json.t = of_json params in render_template template json () ) + |> List.sort_uniq compare |> String.concat "" in let rendered = From 34bdb57dd8a2ecc5ab62ed8c9fc612446fc6eaba Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Wed, 13 Aug 2025 18:49:09 +0800 Subject: [PATCH 441/492] Optimize with List.compare_lengths Signed-off-by: Ming Lu --- ocaml/xapi/xapi_ha_vm_failover.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 31dc6e7a2ef..5c43984541c 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -1615,7 +1615,7 @@ let restart_auto_run_vms ~__context ~last_live_set ~live_set n = let best_effort_vms = (* Carefully decide which best-effort VMs should attempt to start. *) let all_prot_is_ok = List.for_all (fun (_, r) -> r = Ok ()) started in - let is_better = List.length live_set > List.length last_live_set in + let is_better = List.compare_lengths live_set last_live_set > 0 in ( match (all_prot_is_ok, is_better, last_live_set = live_set) with | true, true, _ -> (* Try to start all the best-effort halted VMs when HA is being From 3897cdb59d959b70c6b4795aaee08efe1cb2623b Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 14 Aug 2025 16:35:24 +0200 Subject: [PATCH 442/492] message_forwarding: Log which operation is added/removed from blocked_ops Otherwise the log is not very helpful. Signed-off-by: Andrii Sultanov --- ocaml/xapi/message_forwarding.ml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 177f959fa52..f25f5bd9431 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -3111,14 +3111,16 @@ functor Xapi_vm_lifecycle.update_allowed_operations ~__context ~self let add_to_blocked_operations ~__context ~self ~key ~value = - info "VM.add_to_blocked_operations: self = '%s'" - (vm_uuid ~__context self) ; + info "VM.add_to_blocked_operations: self = '%s', key = '%s'" + (vm_uuid ~__context self) + (API.vm_operations_to_string key) ; Local.VM.add_to_blocked_operations ~__context ~self ~key ~value ; Xapi_vm_lifecycle.update_allowed_operations ~__context ~self let remove_from_blocked_operations ~__context ~self ~key = - info "VM.remove_from_blocked_operations: self = '%s'" - (vm_uuid ~__context self) ; + info "VM.remove_from_blocked_operations: self = '%s', key = '%s'" + (vm_uuid ~__context self) + (API.vm_operations_to_string key) ; Local.VM.remove_from_blocked_operations ~__context ~self ~key ; Xapi_vm_lifecycle.update_allowed_operations ~__context ~self From 0f5a9f9b79d70d4a9f1ab4f4645a486099e0311e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 14 Aug 2025 15:36:52 +0100 Subject: [PATCH 443/492] xe-cli: Allow floppy to be autocompleted Also add it to the documentation of the parameter of vbd-create Signed-off-by: Pau Ruiz Safont --- ocaml/xapi-cli-server/cli_frontend.ml | 2 +- ocaml/xe-cli/bash-completion | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 2b10741fc9c..d7b1984888a 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -2142,7 +2142,7 @@ let rec cmdtable_data : (string * cmd_spec) list = "Create a VBD. Appropriate values for the device field are listed in \ the parameter 'allowed-VBD-devices' on the VM. If no VDI is \ specified, an empty VBD will be created. The type parameter can be \ - 'Disk' or 'CD', and the mode is 'RO' or 'RW'." + 'Disk', 'CD' or 'Floppy', and the mode is 'RO' or 'RW'." ; implementation= No_fd Cli_operations.vbd_create ; flags= [] } diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index b4568c16b74..d11195c667c 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -342,7 +342,7 @@ _xe() __xe_debug "triggering autocompletion for type, class is '$fst'" if [[ "$fst" == "vbd" ]]; then - set_completions 'Disk,CD' "$value" + set_completions 'Disk,CD,Floppy' "$value" elif [[ "$fst" == "vdi" ]]; then set_completions 'system,user,suspend,crashdump' "$value" elif [[ "$fst" == "sr" ]]; then From 48b9aa347d7914547d96757506382029564d34d8 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Thu, 14 Aug 2025 08:26:10 +0000 Subject: [PATCH 444/492] CA-415952: HA can not be enabled https://github.com/xenserver/xha/commit/7ed46d6a871422837aae6a8d62eead0fa362585e Add a new field *HostIndex* into the ha_query_liveset result, Xapi needs to update accordingly to parse the result, Otherwise, xapi can not parse the result and understand the liveset thus cause pool-ha-enable always failed This commit fix the issue and intend to be compatible with old/new xha, to update smoothly Signed-off-by: Lin Liu --- ocaml/xapi/xha_interface.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/xha_interface.ml b/ocaml/xapi/xha_interface.ml index e89d22978ab..0935e06619d 100644 --- a/ocaml/xapi/xha_interface.ml +++ b/ocaml/xapi/xha_interface.ml @@ -493,7 +493,7 @@ module LiveSetInformation = struct ( match first_xml_element_with_name elements "localhost" with | Some (Xml.Element - (_, _, [Xml.Element ("HostID", _, [Xml.PCData local_host_id])]) + (_, _, Xml.Element ("HostID", _, [Xml.PCData local_host_id]) :: _) ) -> ( match Uuidx.of_string local_host_id with | None -> From 420a6907a226038c1e16f8de2df790d4ad72c4d1 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Mon, 18 Aug 2025 18:10:49 +0100 Subject: [PATCH 445/492] Copy dependency libraries to the output folder. Build using the project file (or the build switches in it are ignored). Signed-off-by: Konstantina Chremmou --- .github/workflows/generate-and-build-sdks.yml | 2 +- ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 70c34162b4c..90ca98f1515 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -188,7 +188,7 @@ jobs: - name: Build C# SDK shell: pwsh run: | - dotnet build source/src ` + dotnet build source/src/XenServer.csproj ` --disable-build-servers ` --configuration Release ` -p:Version=${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned ` diff --git a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj index 8f36aba76fa..613939e7cae 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj +++ b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj @@ -18,6 +18,7 @@ packageIcon.png git README-NuGet.md + true From 84f38cd33ae050d7c1ca8671497f4b5ae820828f Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Tue, 19 Aug 2025 16:51:22 +0100 Subject: [PATCH 446/492] CP-308539 Added preprocessor conditions to compile with .NET 8 Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/Failure.cs | 7 ++++++- ocaml/sdk-gen/csharp/autogen/src/HTTP.cs | 13 +++++++++++-- ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj | 3 ++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Failure.cs b/ocaml/sdk-gen/csharp/autogen/src/Failure.cs index 62cd536afd0..e8b514f20ea 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Failure.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Failure.cs @@ -31,9 +31,11 @@ using System.Collections.Generic; using System.Linq; using System.Resources; +#if !(NET8_0_OR_GREATER) using System.Runtime.Serialization; using System.Text.RegularExpressions; using System.Xml; +#endif using Newtonsoft.Json.Linq; @@ -88,12 +90,14 @@ public Failure(string message, Exception exception) ParseExceptionMessage(); } +#if !(NET8_0_OR_GREATER) protected Failure(SerializationInfo info, StreamingContext context) : base(info, context) { errorDescription = (List)info.GetValue("errorDescription", typeof(List)); errorText = info.GetString("errorText"); } +#endif #endregion @@ -174,7 +178,7 @@ public override string ToString() { return Message; } - +#if !(NET8_0_OR_GREATER) public override void GetObjectData(SerializationInfo info, StreamingContext context) { if (info == null) @@ -185,5 +189,6 @@ public override void GetObjectData(SerializationInfo info, StreamingContext cont base.GetObjectData(info, context); } +#endif } } diff --git a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs index 60fe64f4de5..d4dc4d0b004 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs @@ -38,7 +38,10 @@ using System.Security.Authentication; using System.Security.Cryptography; using System.Security.Cryptography.X509Certificates; + +#if !(NET8_0_OR_GREATER) using System.Runtime.Serialization; +#endif namespace XenAPI { @@ -64,6 +67,7 @@ public TooManyRedirectsException(string message) : base(message) { } public TooManyRedirectsException(string message, Exception exception) : base(message, exception) { } +#if !(NET8_0_OR_GREATER) protected TooManyRedirectsException(SerializationInfo info, StreamingContext context) : base(info, context) { @@ -81,6 +85,7 @@ public override void GetObjectData(SerializationInfo info, StreamingContext cont base.GetObjectData(info, context); } +#endif } [Serializable] @@ -92,7 +97,9 @@ public BadServerResponseException(string message) : base(message) { } public BadServerResponseException(string message, Exception exception) : base(message, exception) { } +#if !(NET8_0_OR_GREATER) protected BadServerResponseException(SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } [Serializable] @@ -103,8 +110,9 @@ public CancelledException() : base() { } public CancelledException(string message) : base(message) { } public CancelledException(string message, Exception exception) : base(message, exception) { } - +#if !(NET8_0_OR_GREATER) protected CancelledException(SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } [Serializable] @@ -115,8 +123,9 @@ public ProxyServerAuthenticationException() : base() { } public ProxyServerAuthenticationException(string message) : base(message) { } public ProxyServerAuthenticationException(string message, Exception exception) : base(message, exception) { } - +#if !(NET8_0_OR_GREATER) protected ProxyServerAuthenticationException(SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } #endregion diff --git a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj index 613939e7cae..22acc1de24a 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj +++ b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj @@ -1,7 +1,7 @@  0.0.0 - netstandard2.0;net45 + net80;netstandard2.0;net45 Library XenAPI True @@ -27,6 +27,7 @@ true + 8981 From 70de0c5bb49c12842ade89e8b6241a5f973de4ad Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Tue, 19 Aug 2025 16:58:29 +0100 Subject: [PATCH 447/492] Updated language use. Removed redundant calls and initializers. Use Properties instead of public fields. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/HTTP.cs | 74 ++++++-------- ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs | 27 +++--- ocaml/sdk-gen/csharp/autogen/src/Session.cs | 102 ++++++++++---------- 3 files changed, 94 insertions(+), 109 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs index d4dc4d0b004..35b303855b2 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs @@ -61,7 +61,7 @@ public TooManyRedirectsException(int redirect, Uri uri) this.uri = uri; } - public TooManyRedirectsException() : base() { } + public TooManyRedirectsException() { } public TooManyRedirectsException(string message) : base(message) { } @@ -91,7 +91,7 @@ public override void GetObjectData(SerializationInfo info, StreamingContext cont [Serializable] public class BadServerResponseException : Exception { - public BadServerResponseException() : base() { } + public BadServerResponseException() { } public BadServerResponseException(string message) : base(message) { } @@ -105,7 +105,7 @@ protected BadServerResponseException(SerializationInfo info, StreamingContext co [Serializable] public class CancelledException : Exception { - public CancelledException() : base() { } + public CancelledException() { } public CancelledException(string message) : base(message) { } @@ -118,7 +118,7 @@ protected CancelledException(SerializationInfo info, StreamingContext context) : [Serializable] public class ProxyServerAuthenticationException : Exception { - public ProxyServerAuthenticationException() : base() { } + public ProxyServerAuthenticationException() { } public ProxyServerAuthenticationException(string message) : base(message) { } @@ -142,6 +142,9 @@ protected ProxyServerAuthenticationException(SerializationInfo info, StreamingCo public const int DEFAULT_HTTPS_PORT = 443; private const int NONCE_LENGTH = 16; + private const int FILE_MOVE_MAX_RETRIES = 5; + private const int FILE_MOVE_SLEEP_BETWEEN_RETRIES = 100; + public enum ProxyAuthenticationMethod { Basic = 0, @@ -158,7 +161,7 @@ public enum ProxyAuthenticationMethod private static void WriteLine(String txt, Stream stream) { - byte[] bytes = System.Text.Encoding.ASCII.GetBytes(String.Format("{0}\r\n", txt)); + byte[] bytes = Encoding.ASCII.GetBytes($"{txt}\r\n"); stream.Write(bytes, 0, bytes.Length); } @@ -173,7 +176,7 @@ private static void WriteLine(Stream stream) // done here. private static string ReadLine(Stream stream) { - System.Text.StringBuilder result = new StringBuilder(); + StringBuilder result = new StringBuilder(); while (true) { int b = stream.ReadByte(); @@ -217,9 +220,8 @@ private static bool ReadHttpHeaders(ref Stream stream, IWebProxy proxy, bool nod // read chunk size string chunkSizeStr = ReadLine(stream); chunkSizeStr = chunkSizeStr.TrimEnd('\r', '\n'); - int chunkSize = 0; int.TryParse(chunkSizeStr, System.Globalization.NumberStyles.HexNumber, - System.Globalization.CultureInfo.InvariantCulture, out chunkSize); + System.Globalization.CultureInfo.InvariantCulture, out var chunkSize); // read number of bytes from the stream int totalNumberOfBytesRead = 0; @@ -231,8 +233,8 @@ private static bool ReadHttpHeaders(ref Stream stream, IWebProxy proxy, bool nod totalNumberOfBytesRead += numberOfBytesRead; } while (numberOfBytesRead > 0 && totalNumberOfBytesRead < chunkSize); - string str = System.Text.Encoding.ASCII.GetString(bytes); - string[] split = str.Split(new string[] {"\r\n"}, StringSplitOptions.RemoveEmptyEntries); + string str = Encoding.ASCII.GetString(bytes); + string[] split = str.Split(new [] {"\r\n"}, StringSplitOptions.RemoveEmptyEntries); headers.AddRange(split); entityBody += str; @@ -276,7 +278,7 @@ private static bool ReadHttpHeaders(ref Stream stream, IWebProxy proxy, bool nod private static int getResultCode(string line) { - string[] bits = line.Split(new char[] { ' ' }); + string[] bits = line.Split(' '); return (bits.Length < 2 ? 0 : Int32.Parse(bits[1])); } @@ -426,7 +428,7 @@ public static Uri BuildUri(string hostname, string path, params object[] args) private static string GetPartOrNull(string str, int partIndex) { - string[] parts = str.Split(new char[] { ' ' }, partIndex + 2, StringSplitOptions.RemoveEmptyEntries); + string[] parts = str.Split(new [] { ' ' }, partIndex + 2, StringSplitOptions.RemoveEmptyEntries); return partIndex < parts.Length - 1 ? parts[partIndex] : null; } @@ -457,8 +459,7 @@ private static NetworkStream ConnectSocket(Uri uri, bool nodelay, int timeoutMs) /// Timeout, in ms. 0 for no timeout. public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int timeoutMs) { - IMockWebProxy mockProxy = proxy as IMockWebProxy; - if (mockProxy != null) + if (proxy is IMockWebProxy mockProxy) return mockProxy.GetStream(uri); Stream stream; @@ -478,7 +479,7 @@ public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int t { if (useProxy) { - string line = string.Format("CONNECT {0}:{1} HTTP/1.0", uri.Host, uri.Port); + string line = $"CONNECT {uri.Host}:{uri.Port} HTTP/1.0"; WriteLine(line, stream); WriteLine(stream); @@ -490,8 +491,7 @@ public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int t if (UseSSL(uri)) { - SslStream sslStream = new SslStream(stream, false, - new RemoteCertificateValidationCallback(ValidateServerCertificate), null); + SslStream sslStream = new SslStream(stream, false, ValidateServerCertificate, null); sslStream.AuthenticateAsClient("", null, SslProtocols.Tls | SslProtocols.Tls11 | SslProtocols.Tls12, true); stream = sslStream; @@ -523,7 +523,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox } if (proxy.Credentials == null) - throw new BadServerResponseException(string.Format("Received error code {0} from the server", initialResponse[0])); + throw new BadServerResponseException($"Received error code {initialResponse[0]} from the server"); NetworkCredential credentials = proxy.Credentials.GetCredential(uri, null); @@ -535,10 +535,9 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox if (string.IsNullOrEmpty(basicField)) throw new ProxyServerAuthenticationException("Basic authentication scheme is not supported/enabled by the proxy server."); - string authenticationFieldReply = string.Format("Proxy-Authorization: Basic {0}", - Convert.ToBase64String(Encoding.UTF8.GetBytes(credentials.UserName + ":" + credentials.Password))); + var creds = Convert.ToBase64String(Encoding.UTF8.GetBytes(credentials.UserName + ":" + credentials.Password)); WriteLine(header, stream); - WriteLine(authenticationFieldReply, stream); + WriteLine($"Proxy-Authorization: Basic {creds}", stream); WriteLine(stream); } else if (CurrentProxyAuthenticationMethod == ProxyAuthenticationMethod.Digest) @@ -548,9 +547,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox if (string.IsNullOrEmpty(digestField)) throw new ProxyServerAuthenticationException("Digest authentication scheme is not supported/enabled by the proxy server."); - string authenticationFieldReply = string.Format( - "Proxy-Authorization: Digest username=\"{0}\", uri=\"{1}:{2}\"", - credentials.UserName, uri.Host, uri.Port); + string authenticationFieldReply = $"Proxy-Authorization: Digest username=\"{credentials.UserName}\", uri=\"{uri.Host}:{uri.Port}\""; int len = "Proxy-Authorization: Digest".Length; string directiveString = digestField.Substring(len, digestField.Length - len); @@ -571,19 +568,19 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox throw new ProxyServerAuthenticationException("Stale nonce in Digest authentication attempt."); break; case "realm=": - authenticationFieldReply += string.Format(", realm=\"{0}\"", directives[++i]); + authenticationFieldReply += $", realm=\"{directives[++i]}\""; realm = directives[i]; break; case "nonce=": - authenticationFieldReply += string.Format(", nonce=\"{0}\"", directives[++i]); + authenticationFieldReply += $", nonce=\"{directives[++i]}\""; nonce = directives[i]; break; case "opaque=": - authenticationFieldReply += string.Format(", opaque=\"{0}\"", directives[++i]); + authenticationFieldReply += $", opaque=\"{directives[++i]}\""; opaque = directives[i]; break; case "algorithm=": - authenticationFieldReply += string.Format(", algorithm={0}", directives[++i]); //unquoted; see RFC7616-3.4 + authenticationFieldReply += $", algorithm={directives[++i]}"; //unquoted; see RFC7616-3.4 algorithm = directives[i]; break; case "qop=": @@ -593,9 +590,8 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox qop = qops.FirstOrDefault(q => q.ToLowerInvariant() == "auth") ?? qops.FirstOrDefault(q => q.ToLowerInvariant() == "auth-int"); if (qop == null) - throw new ProxyServerAuthenticationException( - "Digest authentication's quality-of-protection directive is not supported."); - authenticationFieldReply += string.Format(", qop={0}", qop); //unquoted; see RFC7616-3.4 + throw new ProxyServerAuthenticationException("Digest authentication's quality-of-protection directive is not supported."); + authenticationFieldReply += $", qop={qop}"; //unquoted; see RFC7616-3.4 } break; } @@ -603,11 +599,11 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox string clientNonce = GenerateNonce(); if (qop != null) - authenticationFieldReply += string.Format(", cnonce=\"{0}\"", clientNonce); + authenticationFieldReply += $", cnonce=\"{clientNonce}\""; string nonceCount = "00000001"; // todo: track nonces and their corresponding nonce counts if (qop != null) - authenticationFieldReply += string.Format(", nc={0}", nonceCount); //unquoted; see RFC7616-3.4 + authenticationFieldReply += $", nc={nonceCount}"; //unquoted; see RFC7616-3.4 Func algFunc; var scratch1 = string.Join(":", credentials.UserName, realm, credentials.Password); @@ -645,7 +641,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox : new[] {HA1, nonce, nonceCount, clientNonce, qop, HA2}; var response = algFunc(string.Join(":", array3)); - authenticationFieldReply += string.Format(", response=\"{0}\"", response); + authenticationFieldReply += $", response=\"{response}\""; WriteLine(header, stream); WriteLine(authenticationFieldReply, stream); @@ -654,8 +650,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox else { string authType = GetPartOrNull(fields[0], 1); - throw new ProxyServerAuthenticationException( - string.Format("Proxy server's {0} authentication method is not supported.", authType ?? "chosen")); + throw new ProxyServerAuthenticationException($"Proxy server's {authType ?? "chosen"} authentication method is not supported."); } // handle authentication attempt response @@ -671,12 +666,10 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox case 407: throw new ProxyServerAuthenticationException("Proxy server denied access due to wrong credentials."); default: - throw new BadServerResponseException(string.Format( - "Received error code {0} from the server", authenticatedResponse[0])); + throw new BadServerResponseException($"Received error code {authenticatedResponse[0]} from the server"); } } - private static Stream DoHttp(Uri uri, IWebProxy proxy, bool noDelay, int timeoutMs, params string[] headers) { Stream stream = ConnectStream(uri, proxy, noDelay, timeoutMs); @@ -838,9 +831,6 @@ public static void Get(DataCopiedDelegate dataCopiedDelegate, FuncBool cancellin } } - private const int FILE_MOVE_MAX_RETRIES = 5; - private const int FILE_MOVE_SLEEP_BETWEEN_RETRIES = 100; - ///

/// Move a file, retrying a few times with a short sleep between retries. /// If it still fails after these retries, then throw the error. diff --git a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs index 519cc430d4e..5cda57b14b7 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs @@ -49,9 +49,9 @@ internal abstract class JsonRequest { protected JsonRequest(int id, string method, JToken parameters) { - this.Id = id; - this.Method = method; - this.Parameters = parameters; + Id = id; + Method = method; + Parameters = parameters; } public static JsonRequest Create(JsonRpcVersion jsonRpcVersion, int id, string method, JToken parameters) @@ -105,18 +105,15 @@ public JsonRequestV2(int id, string method, JToken parameters) } [JsonProperty("jsonrpc", Required = Required.Always)] - public string JsonRPC - { - get { return "2.0"; } - } + public string JsonRPC => "2.0"; } internal abstract class JsonResponse { - [JsonProperty("id", Required = Required.AllowNull)] public int Id = 0; + [JsonProperty("id", Required = Required.AllowNull)] public int Id { get; set; } - [JsonProperty("result", Required = Required.Default)] public T Result = default(T); + [JsonProperty("result", Required = Required.Default)] public T Result { get; set; } public override string ToString() { @@ -126,23 +123,23 @@ public override string ToString() internal class JsonResponseV1 : JsonResponse { - [JsonProperty("error", Required = Required.AllowNull)] public JToken Error = null; + [JsonProperty("error", Required = Required.AllowNull)] public JToken Error { get; set; } } internal class JsonResponseV2 : JsonResponse { - [JsonProperty("error", Required = Required.DisallowNull)] public JsonResponseV2Error Error = null; + [JsonProperty("error", Required = Required.DisallowNull)] public JsonResponseV2Error Error { get; set; } - [JsonProperty("jsonrpc", Required = Required.Always)] public string JsonRpc = null; + [JsonProperty("jsonrpc", Required = Required.Always)] public string JsonRpc { get; set; } } internal class JsonResponseV2Error { - [JsonProperty("code", Required = Required.Always)] public int Code = 0; + [JsonProperty("code", Required = Required.Always)] public int Code { get; set; } - [JsonProperty("message", Required = Required.Always)] public string Message = null; + [JsonProperty("message", Required = Required.Always)] public string Message { get; set; } - [JsonProperty("data", Required = Required.Default)] public JToken Data = null; + [JsonProperty("data", Required = Required.Default)] public JToken Data { get; set; } public override string ToString() { diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index 82db84a8210..7fd9469cbd1 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -44,7 +44,7 @@ public partial class Session : XenObject /// /// This string is used as the HTTP UserAgent for each request. /// - public static string UserAgent = string.Format("XenAPI/{0}", Helper.APIVersionString(API_Version.LATEST)); + public static string UserAgent = $"XenAPI/{Helper.APIVersionString(API_Version.LATEST)}"; /// /// If null, no proxy is used, otherwise this proxy is used for each request. @@ -55,8 +55,6 @@ public partial class Session : XenObject public object Tag; - private List roles = new List(); - #region Constructors public Session(JsonRpcClient client) @@ -124,7 +122,7 @@ public Session(Session session) private static string GetUrl(string hostname, int port) { - return string.Format("{0}://{1}:{2}", port == 8080 || port == 80 ? "http" : "https", hostname, port); + return $"{(port == 8080 || port == 80 ? "http" : "https")}://{hostname}:{port}"; } private void SetupSessionDetails() @@ -159,7 +157,7 @@ private void CopyADFromSession(Session session) IsLocalSuperuser = session.IsLocalSuperuser; SessionSubject = session.SessionSubject; UserSid = session.UserSid; - roles = session.Roles; + Roles = session.Roles; Permissions = session.Permissions; } @@ -208,7 +206,7 @@ private void SetRbacPermissions() if (r.subroles.Count > 0 && r.name_label == s) { r.opaque_ref = xr.opaque_ref; - roles.Add(r); + Roles.Add(r); break; } } @@ -220,7 +218,7 @@ public override void UpdateFrom(Session update) throw new Exception("The method or operation is not implemented."); } - public override string SaveChanges(Session session, string _serverOpaqueRef, Session serverObject) + public override string SaveChanges(Session session, string serverOpaqueRef, Session serverObject) { throw new Exception("The method or operation is not implemented."); } @@ -306,7 +304,7 @@ public Dictionary RequestHeaders /// instead use Permissions. This list should only be used for UI purposes. /// [JsonConverter(typeof(XenRefListConverter))] - public List Roles => roles; + public List Roles { get; private set; } #endregion @@ -315,9 +313,9 @@ public string[] GetSystemMethods() return JsonRpcClient.system_list_methods(); } - public static Session get_record(Session session, string _session) + public static Session get_record(Session session, string sessionOpaqueRef) { - Session newSession = new Session(session.Url) { opaque_ref = _session }; + Session newSession = new Session(session.Url) { opaque_ref = sessionOpaqueRef }; newSession.SetAPIVersion(); return newSession; } @@ -402,13 +400,13 @@ public void logout(Session session2) /// /// Log out of the session with the given reference, using this session for the connection. /// - /// The session to log out - public void logout(string _self) + /// The session to log out + public void logout(string self) { - if (_self == null) + if (self == null) return; - JsonRpcClient.session_logout(_self); + JsonRpcClient.session_logout(self); } public void local_logout() @@ -451,9 +449,9 @@ public string get_this_host() return get_this_host(this, opaque_ref); } - public static string get_this_host(Session session, string _self) + public static string get_this_host(Session session, string self) { - return session.JsonRpcClient.session_get_this_host(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_this_host(session.opaque_ref, self ?? ""); } public string get_this_user() @@ -461,9 +459,9 @@ public string get_this_user() return get_this_user(this, opaque_ref); } - public static string get_this_user(Session session, string _self) + public static string get_this_user(Session session, string self) { - return session.JsonRpcClient.session_get_this_user(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_this_user(session.opaque_ref, self ?? ""); } public bool get_is_local_superuser() @@ -471,14 +469,14 @@ public bool get_is_local_superuser() return get_is_local_superuser(this, opaque_ref); } - public static bool get_is_local_superuser(Session session, string _self) + public static bool get_is_local_superuser(Session session, string self) { - return session.JsonRpcClient.session_get_is_local_superuser(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_is_local_superuser(session.opaque_ref, self ?? ""); } - public static string[] get_rbac_permissions(Session session, string _self) + public static string[] get_rbac_permissions(Session session, string self) { - return session.JsonRpcClient.session_get_rbac_permissions(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_rbac_permissions(session.opaque_ref, self ?? ""); } public DateTime get_last_active() @@ -486,9 +484,9 @@ public DateTime get_last_active() return get_last_active(this, opaque_ref); } - public static DateTime get_last_active(Session session, string _self) + public static DateTime get_last_active(Session session, string self) { - return session.JsonRpcClient.session_get_last_active(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_last_active(session.opaque_ref, self ?? ""); } public bool get_pool() @@ -496,9 +494,9 @@ public bool get_pool() return get_pool(this, opaque_ref); } - public static bool get_pool(Session session, string _self) + public static bool get_pool(Session session, string self) { - return session.JsonRpcClient.session_get_pool(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_pool(session.opaque_ref, self ?? ""); } public XenRef get_subject() @@ -506,9 +504,9 @@ public XenRef get_subject() return get_subject(this, opaque_ref); } - public static XenRef get_subject(Session session, string _self) + public static XenRef get_subject(Session session, string self) { - return session.JsonRpcClient.session_get_subject(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_subject(session.opaque_ref, self ?? ""); } public string get_auth_user_sid() @@ -516,9 +514,9 @@ public string get_auth_user_sid() return get_auth_user_sid(this, opaque_ref); } - public static string get_auth_user_sid(Session session, string _self) + public static string get_auth_user_sid(Session session, string self) { - return session.JsonRpcClient.session_get_auth_user_sid(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_auth_user_sid(session.opaque_ref, self ?? ""); } #region AD SID enumeration and bootout @@ -543,25 +541,25 @@ public static XenRef async_get_all_subject_identifiers(Session session) return session.JsonRpcClient.async_session_get_all_subject_identifiers(session.opaque_ref); } - public string logout_subject_identifier(string subject_identifier) + public string logout_subject_identifier(string subjectIdentifier) { - return logout_subject_identifier(this, subject_identifier); + return logout_subject_identifier(this, subjectIdentifier); } - public static string logout_subject_identifier(Session session, string subject_identifier) + public static string logout_subject_identifier(Session session, string subjectIdentifier) { - session.JsonRpcClient.session_logout_subject_identifier(session.opaque_ref, subject_identifier); + session.JsonRpcClient.session_logout_subject_identifier(session.opaque_ref, subjectIdentifier); return string.Empty; } - public XenRef async_logout_subject_identifier(string subject_identifier) + public XenRef async_logout_subject_identifier(string subjectIdentifier) { - return async_logout_subject_identifier(this, subject_identifier); + return async_logout_subject_identifier(this, subjectIdentifier); } - public static XenRef async_logout_subject_identifier(Session session, string subject_identifier) + public static XenRef async_logout_subject_identifier(Session session, string subjectIdentifier) { - return session.JsonRpcClient.async_session_logout_subject_identifier(session.opaque_ref, subject_identifier); + return session.JsonRpcClient.async_session_logout_subject_identifier(session.opaque_ref, subjectIdentifier); } #endregion @@ -573,39 +571,39 @@ public Dictionary get_other_config() return get_other_config(this, opaque_ref); } - public static Dictionary get_other_config(Session session, string _self) + public static Dictionary get_other_config(Session session, string self) { - return session.JsonRpcClient.session_get_other_config(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_other_config(session.opaque_ref, self ?? ""); } - public void set_other_config(Dictionary _other_config) + public void set_other_config(Dictionary otherConfig) { - set_other_config(this, opaque_ref, _other_config); + set_other_config(this, opaque_ref, otherConfig); } - public static void set_other_config(Session session, string _self, Dictionary _other_config) + public static void set_other_config(Session session, string self, Dictionary otherConfig) { - session.JsonRpcClient.session_set_other_config(session.opaque_ref, _self ?? "", _other_config); + session.JsonRpcClient.session_set_other_config(session.opaque_ref, self ?? "", otherConfig); } - public void add_to_other_config(string _key, string _value) + public void add_to_other_config(string key, string value) { - add_to_other_config(this, opaque_ref, _key, _value); + add_to_other_config(this, opaque_ref, key, value); } - public static void add_to_other_config(Session session, string _self, string _key, string _value) + public static void add_to_other_config(Session session, string self, string key, string value) { - session.JsonRpcClient.session_add_to_other_config(session.opaque_ref, _self ?? "", _key ?? "", _value ?? ""); + session.JsonRpcClient.session_add_to_other_config(session.opaque_ref, self ?? "", key ?? "", value ?? ""); } - public void remove_from_other_config(string _key) + public void remove_from_other_config(string key) { - remove_from_other_config(this, opaque_ref, _key); + remove_from_other_config(this, opaque_ref, key); } - public static void remove_from_other_config(Session session, string _self, string _key) + public static void remove_from_other_config(Session session, string self, string key) { - session.JsonRpcClient.session_remove_from_other_config(session.opaque_ref, _self ?? "", _key ?? ""); + session.JsonRpcClient.session_remove_from_other_config(session.opaque_ref, self ?? "", key ?? ""); } #endregion From 1b705ed4f88367de69fb4df97c739d4cfe501f5b Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Tue, 19 Aug 2025 17:00:18 +0100 Subject: [PATCH 448/492] CP-308539 Replaced obsolete code. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/HTTP.cs | 28 +++++++++--------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs index 35b303855b2..732478828f2 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs @@ -38,7 +38,6 @@ using System.Security.Authentication; using System.Security.Cryptography; using System.Security.Cryptography.X509Certificates; - #if !(NET8_0_OR_GREATER) using System.Runtime.Serialization; #endif @@ -303,7 +302,8 @@ private static bool ValidateServerCertificate( /// The secure hash as a hex string. private static string _MD5Hash(string str) { - return ComputeHash(str, "MD5"); + using (var hasher = MD5.Create()) + return ComputeHash(hasher, str); } /// @@ -313,32 +313,24 @@ private static string _MD5Hash(string str) /// The secure hash as a hex string. private static string Sha256Hash(string str) { - return ComputeHash(str, "SHA256"); + using (var hasher = SHA256.Create()) + return ComputeHash(hasher, str); } - private static string ComputeHash(string input, string method) + private static string ComputeHash(HashAlgorithm hasher, string input) { - if (input == null) + if (hasher == null || input == null) return null; var enc = new UTF8Encoding(); byte[] bytes = enc.GetBytes(input); - - using (var hasher = HashAlgorithm.Create(method)) - { - if (hasher != null) - { - byte[] hash = hasher.ComputeHash(bytes); - return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); - } - } - - return null; + byte[] hash = hasher.ComputeHash(bytes); + return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); } private static string GenerateNonce() { - using (var rngCsProvider = new RNGCryptoServiceProvider()) + using (var rngCsProvider = RandomNumberGenerator.Create()) { var nonceBytes = new byte[NONCE_LENGTH]; rngCsProvider.GetBytes(nonceBytes); @@ -492,7 +484,7 @@ public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int t if (UseSSL(uri)) { SslStream sslStream = new SslStream(stream, false, ValidateServerCertificate, null); - sslStream.AuthenticateAsClient("", null, SslProtocols.Tls | SslProtocols.Tls11 | SslProtocols.Tls12, true); + sslStream.AuthenticateAsClient("", null, SslProtocols.Tls12, true); stream = sslStream; } From 383d45f28458554ec882070f734dc1912ec4d729 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Tue, 19 Aug 2025 17:18:15 +0100 Subject: [PATCH 449/492] CP-308539 Use HttpClient for .NET as HttpWebRequest is obsolete. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs | 71 ++++++++++++++++++++- ocaml/sdk-gen/csharp/autogen/src/Session.cs | 12 ++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs index 5cda57b14b7..711da740641 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs @@ -31,6 +31,12 @@ using System.Collections.Generic; using System.IO; using System.Net; +#if (NET8_0_OR_GREATER) +using System.Linq; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Security.Cryptography.X509Certificates; +#endif using System.Net.Security; using System.Threading; using Newtonsoft.Json; @@ -177,7 +183,13 @@ public JsonRpcClient(string baseUrl) public bool AllowAutoRedirect { get; set; } public bool PreAuthenticate { get; set; } public CookieContainer Cookies { get; set; } + +#if (NET8_0_OR_GREATER) + public Func ServerCertificateValidationCallback { get; set; } +#else public RemoteCertificateValidationCallback ServerCertificateValidationCallback { get; set; } +#endif + public Dictionary RequestHeaders { get; set; } public Dictionary ResponseHeaders { get; set; } @@ -264,9 +276,65 @@ protected virtual T Rpc(string callName, JToken parameters, JsonSerializer se } } - protected virtual void PerformPostRequest(Stream postStream, Stream responseStream) { +#if (NET8_0_OR_GREATER) + HttpClient httpClient = null; + HttpClientHandler httpHandler = null; + HttpRequestMessage requestMessage = null; + HttpResponseMessage responseMessage = null; + + try + { + httpHandler = new HttpClientHandler + { + AllowAutoRedirect = AllowAutoRedirect, + PreAuthenticate = PreAuthenticate, + CookieContainer = Cookies ?? new CookieContainer(), + Proxy = WebProxy + }; + + Func callBack = null; + if (ServicePointManager.ServerCertificateValidationCallback != null) + callBack = ServicePointManager.ServerCertificateValidationCallback.Invoke; + + httpHandler.ServerCertificateCustomValidationCallback = ServerCertificateValidationCallback ?? callBack; + + httpClient = new HttpClient(httpHandler) { Timeout = TimeSpan.FromMilliseconds(Timeout) }; + + requestMessage = new HttpRequestMessage(HttpMethod.Post, new Uri(JsonRpcUrl)); + if (ProtocolVersion != null) + requestMessage.Version = ProtocolVersion; + requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + requestMessage.Headers.UserAgent.ParseAdd(UserAgent); + requestMessage.Headers.ConnectionClose = !KeepAlive; + requestMessage.Headers.ExpectContinue = Expect100Continue; + requestMessage.Content = new StreamContent(postStream); + + if (RequestHeaders != null) + { + foreach (var header in RequestHeaders) + requestMessage.Headers.Add(header.Key, header.Value); + } + + responseMessage = httpClient.SendAsync(requestMessage).Result; + responseMessage.EnsureSuccessStatusCode(); + + var str = responseMessage.Content.ReadAsStream(); + str.CopyTo(responseStream); + responseStream.Flush(); + + ResponseHeaders = responseMessage.Headers.ToDictionary(header => header.Key, header => string.Join(",", header.Value)); + } + finally + { + RequestHeaders = null; + responseMessage?.Dispose(); + requestMessage?.Dispose(); + httpClient?.Dispose(); + httpHandler?.Dispose(); + } +#else var webRequest = (HttpWebRequest)WebRequest.Create(JsonRpcUrl); webRequest.Method = "POST"; webRequest.ContentType = "application/json"; @@ -329,6 +397,7 @@ protected virtual void PerformPostRequest(Stream postStream, Stream responseStre RequestHeaders = null; webResponse?.Dispose(); } +#endif } private JsonSerializerSettings CreateSettings(IList converters) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index 7fd9469cbd1..f7b0963880b 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -31,6 +31,10 @@ using System.Collections.Generic; using System.Linq; using System.Net; +#if (NET8_0_OR_GREATER) +using System.Net.Http; +using System.Security.Cryptography.X509Certificates; +#endif using System.Net.Security; using Newtonsoft.Json; @@ -246,11 +250,19 @@ public int Timeout set => JsonRpcClient.Timeout = value; } +#if (NET8_0_OR_GREATER) + public Func ServerCertificateValidationCallback + { + get => JsonRpcClient?.ServerCertificateValidationCallback; + set => JsonRpcClient.ServerCertificateValidationCallback = value; + } +#else public RemoteCertificateValidationCallback ServerCertificateValidationCallback { get => JsonRpcClient?.ServerCertificateValidationCallback; set => JsonRpcClient.ServerCertificateValidationCallback = value; } +#endif public ICredentials Credentials => JsonRpcClient?.WebProxy?.Credentials; From 04f8b87dfd2dfe0dfc8ba17aad332ca3c28df92a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 19 Aug 2025 18:12:50 +0100 Subject: [PATCH 450/492] CP-44752: propagate System.Diagnostics tracing information using W3C traceparent header. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs | 183 +++++++++++++++----- 1 file changed, 143 insertions(+), 40 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs index 711da740641..a790f397320 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs @@ -32,6 +32,7 @@ using System.IO; using System.Net; #if (NET8_0_OR_GREATER) +using System.Diagnostics; using System.Linq; using System.Net.Http; using System.Net.Http.Headers; @@ -71,6 +72,8 @@ public static JsonRequest Create(JsonRpcVersion jsonRpcVersion, int id, string m } } + public abstract string JsonRPC { get;} + /// /// Unique call id. Can be null in JSON_RPC v2.0, but xapi disallows it. /// @@ -101,6 +104,9 @@ public JsonRequestV1(int id, string method, JToken parameters) : base(id, method, parameters) { } + + [JsonIgnore] + public override string JsonRPC => "1.0"; } internal class JsonRequestV2 : JsonRequest @@ -111,7 +117,7 @@ public JsonRequestV2(int id, string method, JToken parameters) } [JsonProperty("jsonrpc", Required = Required.Always)] - public string JsonRPC => "2.0"; + public override string JsonRPC => "2.0"; } @@ -158,6 +164,42 @@ public partial class JsonRpcClient { private int _globalId; +#if (NET8_0_OR_GREATER) + private static readonly Type ClassType = typeof(JsonRpcClient); + private static readonly System.Reflection.AssemblyName ClassAssemblyName = ClassType?.Assembly?.GetName(); + private static readonly ActivitySource source = new ActivitySource(ClassAssemblyName.Name + "." + ClassType?.FullName, ClassAssemblyName.Version?.ToString()); + + // Follow naming conventions from OpenTelemetry.SemanticConventions + // Not yet on NuGet though: + // dotnet add package OpenTelemetry.SemanticConventions + private static class RpcAttributes + { + public const string AttributeRpcMethod = "rpc.method"; + public const string AttributeRpcSystem = "rpc.system"; + public const string AttributeRpcService = "rpc.service"; + public const string AttributeRpcJsonrpcErrorCode = "rpc.jsonrpc.error_code"; + public const string AttributeRpcJsonrpcErrorMessage = "rpc.jsonrpc.error_message"; + public const string AttributeRpcJsonrpcRequestId = "rpc.jsonrpc.request_id"; + public const string AttributeRpcJsonrpcVersion = "rpc.jsonrpc.version"; + public const string AttributeRpcMessageType = "rpc.message.type"; + + public static class RpcMessageTypeValues + { + public const string Sent = "SENT"; + public const string Received = "RECEIVED"; + } + } + + private static class ServerAttributes + { + public const string AttributeServerAddress = "server.address"; + } + + // not part of the SemanticConventions package + private const string ValueJsonRpc = "jsonrpc"; + private const string EventRpcMessage = "rpc.message"; +#endif + public JsonRpcClient(string baseUrl) { Url = baseUrl; @@ -216,63 +258,98 @@ protected virtual T Rpc(string callName, JToken parameters, JsonSerializer se // therefore the latter will be done only in DEBUG mode using (var postStream = new MemoryStream()) { - using (var sw = new StreamWriter(postStream)) +#if (NET8_0_OR_GREATER) + // the semantic convention is $package.$service/$method + using (Activity activity = source.CreateActivity("XenAPI/" + callName, ActivityKind.Client)) { + activity?.Start(); + // Set the fields described in the OpenTelemetry Semantic Conventions: + // https://opentelemetry.io/docs/specs/semconv/rpc/json-rpc/ + // https://opentelemetry.io/docs/specs/semconv/rpc/rpc-spans/ + activity?.SetTag(RpcAttributes.AttributeRpcSystem, ValueJsonRpc); + activity?.SetTag(ServerAttributes.AttributeServerAddress, new Uri(Url).Host); + activity?.SetTag(RpcAttributes.AttributeRpcMethod, callName); + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcRequestId, id.ToString()); +#endif + using (var sw = new StreamWriter(postStream)) + { #if DEBUG - var settings = CreateSettings(serializer.Converters); - string jsonReq = JsonConvert.SerializeObject(request, settings); - if (RequestEvent != null) - RequestEvent(jsonReq); - sw.Write(jsonReq); + var settings = CreateSettings(serializer.Converters); + string jsonReq = JsonConvert.SerializeObject(request, settings); + if (RequestEvent != null) + RequestEvent(jsonReq); + sw.Write(jsonReq); #else - if (RequestEvent != null) - RequestEvent(callName); - serializer.Serialize(sw, request); + if (RequestEvent != null) + RequestEvent(callName); + serializer.Serialize(sw, request); #endif - sw.Flush(); - postStream.Seek(0, SeekOrigin.Begin); + sw.Flush(); + postStream.Seek(0, SeekOrigin.Begin); - using (var responseStream = new MemoryStream()) - { - PerformPostRequest(postStream, responseStream); - responseStream.Position = 0; - - using (var responseReader = new StreamReader(responseStream)) + using (var responseStream = new MemoryStream()) { - switch (JsonRpcVersion) + PerformPostRequest(postStream, responseStream); + responseStream.Position = 0; + + using (var responseReader = new StreamReader(responseStream)) { - case JsonRpcVersion.v2: +#if (NET8_0_OR_GREATER) + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcVersion, request.JsonRPC); +#endif + switch (JsonRpcVersion) + { + case JsonRpcVersion.v2: #if DEBUG - string json2 = responseReader.ReadToEnd(); - var res2 = JsonConvert.DeserializeObject>(json2, settings); + string json2 = responseReader.ReadToEnd(); + var res2 = JsonConvert.DeserializeObject>(json2, settings); #else - var res2 = (JsonResponseV2)serializer.Deserialize(responseReader, typeof(JsonResponseV2)); + var res2 = (JsonResponseV2)serializer.Deserialize(responseReader, typeof(JsonResponseV2)); #endif - if (res2.Error != null) - { - var descr = new List { res2.Error.Message }; - descr.AddRange(res2.Error.Data.ToObject()); - throw new Failure(descr); - } - return res2.Result; - default: + if (res2.Error != null) + { + var descr = new List { res2.Error.Message }; + descr.AddRange(res2.Error.Data.ToObject()); +#if (NET8_0_OR_GREATER) + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcErrorCode, res2.Error.Code); + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcErrorMessage, descr); + activity?.SetStatus(ActivityStatusCode.Error); +#endif + throw new Failure(descr); + } +#if (NET8_0_OR_GREATER) + activity?.SetStatus(ActivityStatusCode.Ok); +#endif + return res2.Result; + default: #if DEBUG - string json1 = responseReader.ReadToEnd(); - var res1 = JsonConvert.DeserializeObject>(json1, settings); + string json1 = responseReader.ReadToEnd(); + var res1 = JsonConvert.DeserializeObject>(json1, settings); #else - var res1 = (JsonResponseV1)serializer.Deserialize(responseReader, typeof(JsonResponseV1)); + var res1 = (JsonResponseV1)serializer.Deserialize(responseReader, typeof(JsonResponseV1)); #endif - if (res1.Error != null) - { - var errorArray = res1.Error.ToObject(); + var errorArray = res1.Error?.ToObject(); if (errorArray != null) + { +#if (NET8_0_OR_GREATER) + activity?.SetStatus(ActivityStatusCode.Error); + // we can't be sure whether we'll have a Code here + // the exact format of an error object is not specified in JSONRPC v1 + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcErrorMessage, errorArray.ToString()); +#endif throw new Failure(errorArray); - } - return res1.Result; + } +#if (NET8_0_OR_GREATER) + activity?.SetStatus(ActivityStatusCode.Ok); +#endif + return res1.Result; + } } } } +#if (NET8_0_OR_GREATER) } +#endif } } @@ -317,6 +394,26 @@ protected virtual void PerformPostRequest(Stream postStream, Stream responseStre requestMessage.Headers.Add(header.Key, header.Value); } + // propagate W3C traceparent and tracestate + // HttpClient would do this automatically on .NET 5, + // and .NET 6 would provide even more control over this: https://blog.ladeak.net/posts/opentelemetry-net6-httpclient + // the caller must ensure that the activity is in W3C format (by inheritance or direct setting) + var activity = Activity.Current; + if (activity != null) + { + if (activity.IdFormat == ActivityIdFormat.W3C) + { + requestMessage.Headers.Add("traceparent", activity.Id); + var state = activity.TraceStateString; + + if (state?.Length > 0) + requestMessage.Headers.Add("tracestate", state); + } + + var tags = new ActivityTagsCollection { { RpcAttributes.AttributeRpcMessageType, RpcAttributes.RpcMessageTypeValues.Sent } }; + activity.AddEvent(new ActivityEvent(EventRpcMessage, DateTimeOffset.Now, tags)); + } + responseMessage = httpClient.SendAsync(requestMessage).Result; responseMessage.EnsureSuccessStatusCode(); @@ -325,10 +422,16 @@ protected virtual void PerformPostRequest(Stream postStream, Stream responseStre responseStream.Flush(); ResponseHeaders = responseMessage.Headers.ToDictionary(header => header.Key, header => string.Join(",", header.Value)); + + if (activity != null) + { + var tags = new ActivityTagsCollection { { RpcAttributes.AttributeRpcMessageType, RpcAttributes.RpcMessageTypeValues.Received } }; + activity.AddEvent(new ActivityEvent(EventRpcMessage, DateTimeOffset.Now, tags)); + } } finally { - RequestHeaders = null; + RequestHeaders = null; responseMessage?.Dispose(); requestMessage?.Dispose(); httpClient?.Dispose(); From c2f49ea4d983977e2e7ebed290c32225c13e440e Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Wed, 20 Aug 2025 15:40:26 +0100 Subject: [PATCH 451/492] Action from CA-408836: Deprecate the method SaveChanges. It is a XenCenterism and not always correct. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/Event.cs | 1 + ocaml/sdk-gen/csharp/autogen/src/Session.cs | 1 + ocaml/sdk-gen/csharp/autogen/src/XenObject.cs | 10 +--------- ocaml/sdk-gen/csharp/gen_csharp_binding.ml | 2 ++ 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Event.cs b/ocaml/sdk-gen/csharp/autogen/src/Event.cs index 62bb7d16ae8..1eed4e3ef10 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Event.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Event.cs @@ -45,6 +45,7 @@ public override void UpdateFrom(Event update) id = update.id; } + [Obsolete("Use the calls setting individual fields of the API object instead.")] public override string SaveChanges(Session session, string opaqueRef, Event serverObject) { if (opaqueRef == null) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index f7b0963880b..1b15037a736 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -222,6 +222,7 @@ public override void UpdateFrom(Session update) throw new Exception("The method or operation is not implemented."); } + [Obsolete("Use the calls setting individual fields of the API object instead.")] public override string SaveChanges(Session session, string serverOpaqueRef, Session serverObject) { throw new Exception("The method or operation is not implemented."); diff --git a/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs b/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs index 3d372799771..10f238a2b04 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs @@ -42,15 +42,7 @@ public abstract partial class XenObject : IXenObject where S : XenObject /// public abstract void UpdateFrom(S record); - /// - /// Save any changed fields to the server. - /// This method is usually invoked on a thread pool thread. - /// - /// - /// - /// Changes are sent to the server if the field in "this" - /// is different from serverObject. Can be the object in the cache, or another reference - /// object that we want to save changes to. + [Obsolete("Use the calls setting individual fields of the API object instead.")] public abstract string SaveChanges(Session session, string serverOpaqueRef, S serverObject); public string opaque_ref { get; set; } diff --git a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml index 14b6af5e225..45ee61f46c7 100644 --- a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml +++ b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml @@ -353,6 +353,8 @@ and gen_class out_chan cls = print ";\n\ \ }\n\n\ + \ [Obsolete(\"Use the calls setting individual fields of the API \ + object instead.\")]\n\ \ public override string SaveChanges(Session session, string \ opaqueRef, %s server)\n\ \ {\n\ From 4f8834b593b9e7633b136b78ed21ec6cf3f8e1bd Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 20 Aug 2025 10:10:01 +0100 Subject: [PATCH 452/492] libs/log: adapt backtrace test to pass on aarch64 Unfortunately, the backtrace in aarch64 has one location less than on x86_64, at least on ocaml 4.14.2, so we have to change the test to check for some repetitions instead of just matching the whole output. Signed-off-by: Pau Ruiz Safont --- ocaml/libs/log/test/log_test.t | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/ocaml/libs/log/test/log_test.t b/ocaml/libs/log/test/log_test.t index 20d41233f8a..ae296392b86 100644 --- a/ocaml/libs/log/test/log_test.t +++ b/ocaml/libs/log/test/log_test.t @@ -1,9 +1,23 @@ - $ ./log_test.exe | sed -re 's/[0-9]+T[0-9:.]+Z//' - [|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") - [|error||0 |main|backtrace] 1/4 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 - [|error||0 |main|backtrace] 2/4 log_test.exe Called from file fun.ml, line 33 - [|error||0 |main|backtrace] 3/4 log_test.exe Called from file fun.ml, line 38 - [|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 18 - [|error||0 |main|backtrace] - [| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") +The log_test executable produces a backtrace on purpose, on x86_64, and with +the datetimes removed, it looks like this: +$ ./log_test.exe | sed -re 's/[0-9]+T[0-9:.]+Z//' +[|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") +[|error||0 |main|backtrace] 1/4 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 +[|error||0 |main|backtrace] 2/4 log_test.exe Called from file fun.ml, line 33 +[|error||0 |main|backtrace] 3/4 log_test.exe Called from file fun.ml, line 38 +[|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 18 +[|error||0 |main|backtrace] +[| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") +and on aarch64: +[|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") +[|error||0 |main|backtrace] 1/3 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 +[|error||0 |main|backtrace] 2/3 log_test.exe Called from file fun.ml, line 38 +[|error||0 |main|backtrace] 3/3 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 18 +[|error||0 |main|backtrace] +[| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") + + $ ./log_test.exe | grep "main|backtrace" -c | xargs -I _ sh -c "test 5 -eq _ || test 6 -eq _" + $ ./log_test.exe | grep "log_test.exe" -c | xargs -I _ sh -c "test 3 -eq _ || test 4 -eq _" + $ ./log_test.exe | grep "ocaml/libs/log/test/log_test.ml" -c + 2 From 920372441435249e46f1dda903564a091a77fb73 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 19 Aug 2025 10:29:44 +0100 Subject: [PATCH 453/492] ocaml/util: delete module xapi_host_driver_helpers and tests The tests for the module needed to be modified to be made more robust against failures and make it work under aarch64, but the code is unused, so simply remove all of it. Signed-off-by: Pau Ruiz Safont --- ocaml/tests/dune | 31 +---- ocaml/tests/test_data/buildid.s | 9 -- ocaml/tests/test_data/gen_notes.sh | 22 ---- ocaml/tests/test_data/linux.s | 9 -- ocaml/tests/test_data/xenserver.s | 9 -- ocaml/tests/test_data/xenserver_two_notes.s | 20 --- ocaml/tests/test_host_driver_helpers.ml | 89 ------------- ocaml/tests/test_host_driver_helpers.mli | 0 ocaml/util/dune | 7 -- ocaml/util/xapi_host_driver_helpers.ml | 131 -------------------- ocaml/util/xapi_host_driver_helpers.mli | 28 ----- ocaml/xapi/dune | 2 +- 12 files changed, 5 insertions(+), 352 deletions(-) delete mode 100644 ocaml/tests/test_data/buildid.s delete mode 100755 ocaml/tests/test_data/gen_notes.sh delete mode 100644 ocaml/tests/test_data/linux.s delete mode 100644 ocaml/tests/test_data/xenserver.s delete mode 100644 ocaml/tests/test_data/xenserver_two_notes.s delete mode 100644 ocaml/tests/test_host_driver_helpers.ml delete mode 100644 ocaml/tests/test_host_driver_helpers.mli delete mode 100644 ocaml/util/xapi_host_driver_helpers.ml delete mode 100644 ocaml/util/xapi_host_driver_helpers.mli diff --git a/ocaml/tests/dune b/ocaml/tests/dune index 7a3620fb6c3..9f4ad45b9d9 100644 --- a/ocaml/tests/dune +++ b/ocaml/tests/dune @@ -7,7 +7,7 @@ test_cluster_host test_cluster test_pusb test_network_sriov test_client test_valid_ref_list suite_alcotest_server test_vm_placement test_vm_helpers test_repository test_repository_helpers - test_ref test_xapi_helpers test_vm_group test_host_driver_helpers + test_ref test_xapi_helpers test_vm_group test_livepatch test_rpm test_updateinfo test_storage_smapiv1_wrapper test_storage_quicktest test_observer test_pool_periodic_update_sync test_pkg_mgr test_tar_ext test_pool_repository)) (libraries @@ -50,7 +50,6 @@ xapi_xenopsd xapi_cli_server xapi_database - xapi_host_driver_helpers xapi_internal xml-light2 ) @@ -78,19 +77,18 @@ ) ) - (tests (names test_vm_helpers test_vm_placement test_network_sriov test_vdi_cbt test_bounded_psq test_auth_cache test_clustering test_pusb test_daemon_manager test_repository test_repository_helpers test_livepatch test_rpm test_updateinfo test_pool_periodic_update_sync test_pkg_mgr - test_xapi_helpers test_tar_ext test_pool_repository test_host_driver_helpers) + test_xapi_helpers test_tar_ext test_pool_repository) (package xapi) (modes exe) (modules test_vm_helpers test_vm_placement test_network_sriov test_vdi_cbt test_bounded_psq test_auth_cache test_event test_clustering test_cluster_host test_cluster test_pusb test_daemon_manager test_repository test_repository_helpers test_livepatch test_rpm test_updateinfo test_pool_periodic_update_sync test_pkg_mgr - test_xapi_helpers test_tar_ext test_pool_repository test_host_driver_helpers) + test_xapi_helpers test_tar_ext test_pool_repository) (libraries alcotest bos @@ -121,13 +119,13 @@ xapi-types xapi_cli_server xapi_database - xapi_host_driver_helpers xapi_internal xml-light2 yojson ) (preprocess (per_module ((pps ppx_deriving_rpc) Test_cluster_host))) ) + (test (name test_storage_smapiv1_wrapper) (modes exe) @@ -171,27 +169,6 @@ (action (run ./check-no-xenctrl %{x})) ) -(rule - (alias runtest) - (package xapi) - (targets - .note.XenServer - .note.Linux - .note.gnu.build-id - .note.XenServerTwo - ) - (deps - (:asm - test_data/xenserver.s - test_data/xenserver_two_notes.s - test_data/linux.s - test_data/buildid.s - ) - (:script test_data/gen_notes.sh) - ) - (action (bash "%{script} %{asm}")) -) - (env (_ (env-vars (XAPI_TEST 1)))) ; disassemble, but without sources diff --git a/ocaml/tests/test_data/buildid.s b/ocaml/tests/test_data/buildid.s deleted file mode 100644 index 75f77766980..00000000000 --- a/ocaml/tests/test_data/buildid.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".note.gnu.build-id", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x1 # type -0: .asciz "gnu.build-id" # name -1: .p2align 2 -2: .long 0x000000 # desc -3: .p2align 2 diff --git a/ocaml/tests/test_data/gen_notes.sh b/ocaml/tests/test_data/gen_notes.sh deleted file mode 100755 index 9b173bd31da..00000000000 --- a/ocaml/tests/test_data/gen_notes.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# -# Copyright (c) Cloud Software Group, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation; version 2.1 only. with the special -# exception on linking described in file LICENSE. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. - -elf_file=test_data/xenserver_elf_file -as "$@" -o $elf_file - -sections=$(readelf -n $elf_file | grep -Po "(?<=Displaying notes found in: ).*") -for dep in $sections; do - objcopy "$elf_file" "$dep" --only-section="$dep" -O binary -done - diff --git a/ocaml/tests/test_data/linux.s b/ocaml/tests/test_data/linux.s deleted file mode 100644 index ca106e94af7..00000000000 --- a/ocaml/tests/test_data/linux.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".note.Linux", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x257 # type -0: .asciz "Linux" # name -1: .p2align 2 -2: .asciz "4.19.0+1" # desc -3: .p2align 2 diff --git a/ocaml/tests/test_data/xenserver.s b/ocaml/tests/test_data/xenserver.s deleted file mode 100644 index f44575ce5eb..00000000000 --- a/ocaml/tests/test_data/xenserver.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".note.XenServer", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x1 # type -0: .asciz "XenServer" # name -1: .p2align 2 -2: .asciz "v2.1.3+0.1fix" # desc -3: .p2align 2 diff --git a/ocaml/tests/test_data/xenserver_two_notes.s b/ocaml/tests/test_data/xenserver_two_notes.s deleted file mode 100644 index cbde4916dd5..00000000000 --- a/ocaml/tests/test_data/xenserver_two_notes.s +++ /dev/null @@ -1,20 +0,0 @@ -.section ".note.XenServerTwo", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x2 # type -0: .asciz "XenServer" # name -1: .p2align 2 -2: .asciz "Built on December 25th" # desc -3: .p2align 2 - -.section ".note.XenServerTwo", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x1 # type -0: .asciz "XenServer" # name -1: .p2align 2 -2: .asciz "2.0.0-rc.2" # desc -3: .p2align 2 - diff --git a/ocaml/tests/test_host_driver_helpers.ml b/ocaml/tests/test_host_driver_helpers.ml deleted file mode 100644 index bb1a49050b1..00000000000 --- a/ocaml/tests/test_host_driver_helpers.ml +++ /dev/null @@ -1,89 +0,0 @@ -(* - Copyright (c) Cloud Software Group, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; version 2.1 only. with the special - exception on linking described in file LICENSE. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - *) - -open Xapi_host_driver_helpers - -let note = - Alcotest.testable - (Fmt.of_to_string (fun n -> - Printf.sprintf "{typ=%d; name=%s; desc=%s}" (Int32.to_int n.typ) n.name - n.desc - ) - ) - ( = ) - -let versions = - [ - (".note.XenServer", Some "v2.1.3+0.1fix") - ; (".note.XenServerTwo", Some "2.0.0-rc.2") - ; (".note.Linux", None) - ; (".note.gnu.build-id", None) - ] - -let get_version_test = - List.map - (fun (filename, expected) -> - let test_version () = - let parsed_ver = Result.to_option (get_version filename) in - Printf.printf "%s\n" filename ; - Alcotest.(check (option string)) - "ELF notes should be parsed properly" expected parsed_ver - in - ( Printf.sprintf {|Validation of ELF note parsing: "%s"|} filename - , `Quick - , test_version - ) - ) - versions - -let notes = - [ - (".note.XenServer", [{typ= 1l; name= "XenServer"; desc= "v2.1.3+0.1fix"}]) - ; ( ".note.XenServerTwo" - , [ - {typ= 2l; name= "XenServer"; desc= "Built on December 25th"} - ; {typ= 1l; name= "XenServer"; desc= "2.0.0-rc.2"} - ] - ) - ; (".note.Linux", [{typ= 599l; name= "Linux"; desc= "4.19.0+1"}]) - ; ( ".note.gnu.build-id" - , [{typ= 1l; name= "gnu.build-id"; desc= "\x00\x00\x00"}] - ) - ] - -let note_parsing_test = - List.map - (fun (filename, expected) -> - let test_note () = - let parsed = - match get_notes filename with Ok res -> res | Error e -> failwith e - in - Printf.printf "%s\n" filename ; - Alcotest.(check (list note)) - "ELF notes should be parsed properly" expected parsed - in - ( Printf.sprintf {|Validation of ELF note parsing: "%s"|} filename - , `Quick - , test_note - ) - ) - notes - -let () = - Suite_init.harness_init () ; - Alcotest.run "Test Host Driver Helpers suite" - [ - ("Test_host_driver_helpers.get_note", note_parsing_test) - ; ("Test_host_driver_helpers.get_version", get_version_test) - ] diff --git a/ocaml/tests/test_host_driver_helpers.mli b/ocaml/tests/test_host_driver_helpers.mli deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ocaml/util/dune b/ocaml/util/dune index 488cf4f444f..6bd1ec5719f 100644 --- a/ocaml/util/dune +++ b/ocaml/util/dune @@ -16,10 +16,3 @@ ) (wrapped false) ) - -(library - (name xapi_host_driver_helpers) - (modules xapi_host_driver_helpers) - (libraries yojson angstrom xapi-stdext-unix) - (wrapped false) -) diff --git a/ocaml/util/xapi_host_driver_helpers.ml b/ocaml/util/xapi_host_driver_helpers.ml deleted file mode 100644 index 4910ed8d11f..00000000000 --- a/ocaml/util/xapi_host_driver_helpers.ml +++ /dev/null @@ -1,131 +0,0 @@ -(* - Copyright (c) Cloud Software Group, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; version 2.1 only. with the special - exception on linking described in file LICENSE. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - *) - -module J = Yojson -open Angstrom - -let int n = Int32.to_int n - -let ( // ) = Filename.concat - -(** Read a (small) file into a string *) -let read path = Xapi_stdext_unix.Unixext.string_of_file path - -type note = {typ: int32; name: string; desc: string} - -module JSON = struct - let note l = - let l = - List.map - (fun d -> - `Assoc - [ - ("type", `Int (int d.typ)) - ; ("name", `String d.name) - ; ("desc", `String d.desc) - ] - ) - l - in - `List l - - let emit json = J.pretty_to_channel stdout json -end - -(** return the smallest k >= n such that k is divisible by 4 *) -let align4 n = - let ( & ) = Int.logand in - n + (-n & 3) - -(** advance the cursor to position n *) -let advance_to n = - let* pos in - advance (max 0 (n - pos)) - -(** align the cursor to a multiple of 4 *) -let align = - let* pos in - advance_to (align4 pos) - -(** parse an ELF note entry; it assumes that name and desc are null - terminated strings. This should be always true for name but desc - depends on the entry. We don't capture the terminating zero for - strings. *) -let note = - let* name_length = LE.any_int32 in - let* desc_length = LE.any_int32 in - let* typ = LE.any_int32 in - let* name = take (int name_length - 1) in - (* skip over terminating null and re-align cursor *) - let* _ = char '\000' in - let* () = align in - let* desc = take (int desc_length - 1) in - (* skip over terminating null and re-align cursor *) - let* _ = char '\000' in - let* () = align in - return {typ; name; desc} - -(** parser for a sequence of note entries *) -let notes = many note - -(** parse a sequence of note entries from a string *) -let parse str = - let consume = Consume.Prefix in - parse_string ~consume notes str - -let get_version path = - let version = - read path - |> parse - |> Result.map - @@ List.filter_map (fun note -> - match (note.typ, note.name) with - | 1l, "XenServer" -> - Some note.desc - | _ -> - None - ) - in - match version with - | Ok (v :: _) -> - Ok v - | _ -> - Error - (Format.sprintf - "Failed to parse %s, didn't find a XenServer driver version notes \ - section" - path - ) - -let get_notes path = - let version = read path |> parse in - match version with - | Ok (_ :: _) as v -> - v - | _ -> - Error - (Format.sprintf "Failed to parse %s, didn't find a notes section" path) - -let dump_notes prefix = - let notes_dir = prefix // "notes" in - try - let lst = - Sys.readdir notes_dir - |> Array.to_list - |> List.map (fun n -> read (notes_dir // n)) - |> List.filter_map (fun note_str -> Result.to_option (parse note_str)) - |> List.map (fun note -> (prefix, JSON.note note)) - in - JSON.emit (`Assoc lst) - with _ -> () diff --git a/ocaml/util/xapi_host_driver_helpers.mli b/ocaml/util/xapi_host_driver_helpers.mli deleted file mode 100644 index 6528d6bec94..00000000000 --- a/ocaml/util/xapi_host_driver_helpers.mli +++ /dev/null @@ -1,28 +0,0 @@ -(* - Copyright (c) Cloud Software Group, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; version 2.1 only. with the special - exception on linking described in file LICENSE. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - *) - -type note = {typ: int32; name: string; desc: string} - -(* Parse an ELF notes section, returning the specially-encoded driver version. - - The kernel does not reveal the location from where it loaded an active - driver. Hence the name is not sufficient to observe the currently active - version. For this, XS uses ELF notes, with the kernel presenting a particular - note section in `/sys/module//notes/.note.XenServer` *) -val get_version : string -> (string, string) result - -val get_notes : string -> (note list, string) result - -(* Dumps JSON-formatted parsed ELF notes of a driver *) -val dump_notes : string -> unit diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 88213955afc..3b0f0e1b843 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -239,7 +239,7 @@ xxhash yojson zstd - xapi_host_driver_helpers) + ) (preprocess (per_module ((pps ppx_sexp_conv) From 47f1300bfe8410ad30b3f2da62f34d6907d5da8f Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 22 Aug 2025 12:10:19 +0100 Subject: [PATCH 454/492] Updated dependencies for PS 5.1. Signed-off-by: Konstantina Chremmou --- .../powershell/autogen/src/XenServerPowerShell.csproj | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj b/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj index 1fb6483bd34..3d952212447 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj +++ b/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj @@ -16,10 +16,7 @@ - - False - $(MSBuildProgramFiles32)\Reference Assemblies\Microsoft\WindowsPowerShell\3.0\System.Management.Automation.dll - + From ea688ff8897403c1a5d61677dd15c50fcc0a167a Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 22 Aug 2025 15:25:09 +0100 Subject: [PATCH 455/492] I forgot to initialize the Roles. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/Session.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index 1b15037a736..5d999136833 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -317,7 +317,7 @@ public Dictionary RequestHeaders /// instead use Permissions. This list should only be used for UI purposes. /// [JsonConverter(typeof(XenRefListConverter))] - public List Roles { get; private set; } + public List Roles { get; private set; } = new List(); #endregion From 722aa8f7320f5b2676d6ccca092aaf07cfc1e6ba Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 18 Aug 2025 16:56:54 +0100 Subject: [PATCH 456/492] ci: enable experimental ocaml workflow on aarch64 This allows to see any regressions happening while not blocking any merges. This is useful to have as a base to make xapi work on arm-based hosts, which is an objective for xcp-ng. Signed-off-by: Pau Ruiz Safont --- .github/workflows/main.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 92f5101d189..e4971839e16 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,7 +21,17 @@ concurrency: # On new push, cancel old workflows from the same PR, branch or tag jobs: ocaml-tests: name: Run OCaml tests - runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + runs-on: ["ubuntu-22.04"] + experimental: [false] + include: + - runs-on: "ubuntu-22.04-arm" + experimental: true + + continue-on-error: ${{ matrix.experimental }} + runs-on: ${{ matrix.runs-on }} permissions: contents: read env: @@ -29,6 +39,7 @@ jobs: # when changing this value, to keep builds # consistent XAPI_VERSION: "v0.0.0" + steps: - name: Checkout code uses: actions/checkout@v4 From c97250d71c87d7912bf45e9f1d00d439c67c5281 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 19 Aug 2025 16:13:33 +0100 Subject: [PATCH 457/492] CP-308455 VM.sysprep if CD insert fails, remove ISO If inserting the CD fails, remove the ISO to protect its contents. Likewise, remove it when eject fails. Signed-off-by: Christian Lindig --- ocaml/xapi/vm_sysprep.ml | 11 ++++++++++- ocaml/xapi/vm_sysprep.mli | 1 + ocaml/xapi/xapi_vm.ml | 5 ++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index d8c6e213395..449eaf8e2c2 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -29,6 +29,7 @@ type error = | Other of string | VM_CDR_not_found | VM_CDR_eject + | VM_CDR_insert | VM_misses_feature | VM_not_running | VM_sysprep_timeout @@ -212,6 +213,8 @@ let eject ~rpc ~session_id ~vbd ~iso = Client.VBD.eject ~rpc ~session_id ~vbd ; Sys.remove iso with exn -> + Sys.remove iso ; + (* still remove ISO to protect it *) warn "%s: ejecting CD failed: %s" __FUNCTION__ (Printexc.to_string exn) ; fail VM_CDR_eject @@ -281,7 +284,13 @@ let sysprep ~__context ~vm ~unattend ~timeout = let uuid = Db.VDI.get_uuid ~__context ~self:vdi in debug "%s: inserting Sysprep VDI for VM %s" __FUNCTION__ vm_uuid ; call ~__context @@ fun rpc session_id -> - Client.VBD.insert ~rpc ~session_id ~vdi ~vbd ; + ( try Client.VBD.insert ~rpc ~session_id ~vdi ~vbd + with e -> + debug "%s: failed to insert CD, removing ISO %s: %s" __FUNCTION__ iso + (Printexc.to_string e) ; + Sys.remove iso ; + fail VM_CDR_insert + ) ; Thread.delay !Xapi_globs.vm_sysprep_wait ; match trigger ~rpc ~session_id ~domid ~uuid ~timeout ~vbd ~iso with | true -> diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index badac1379e5..ace6cf184a4 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -18,6 +18,7 @@ type error = | Other of string | VM_CDR_not_found | VM_CDR_eject + | VM_CDR_insert | VM_misses_feature | VM_not_running | VM_sysprep_timeout diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index d08e28c5fca..68d07bfac11 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1727,10 +1727,13 @@ let sysprep ~__context ~self ~unattend ~timeout = raise Api_errors.(Server_error (sysprep, [uuid; "VM is not running"])) | exception Vm_sysprep.Sysprep VM_CDR_eject -> raise Api_errors.(Server_error (sysprep, [uuid; "VM failed to eject CD"])) + | exception Vm_sysprep.Sysprep VM_CDR_insert -> + raise Api_errors.(Server_error (sysprep, [uuid; "VM failed to insert CD"])) | exception Vm_sysprep.Sysprep VM_sysprep_timeout -> raise Api_errors.( - Server_error (sysprep, [uuid; "sysprep not found running - timeout"]) + Server_error + (sysprep, [uuid; "No response from sysprep within allocated time"]) ) | exception Vm_sysprep.Sysprep XML_too_large -> raise From 8c5053a8c9c2771a3afe8e1ec43df268b48c4578 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 1 Jul 2025 15:36:29 +0100 Subject: [PATCH 458/492] CP-308455 VM.sysprep declare XML content as SecretString Desclare the string parameter holding unattend.xml as secret to avoid logging it. Signed-off-by: Christian Lindig # Conflicts: # ocaml/idl/datamodel_vm.ml # ocaml/xapi/vm_sysprep.mli # ocaml/xapi/xapi_vm.mli --- ocaml/idl/datamodel_vm.ml | 2 +- ocaml/xapi-cli-server/cli_operations.ml | 2 +- ocaml/xapi-types/secretString.ml | 2 ++ ocaml/xapi-types/secretString.mli | 2 ++ ocaml/xapi/vm_sysprep.ml | 4 ++-- ocaml/xapi/vm_sysprep.mli | 4 ++-- ocaml/xapi/xapi_vm.mli | 2 +- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 34ffda62d6b..84b329890b8 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2375,7 +2375,7 @@ let sysprep = ~params: [ (Ref _vm, "self", "The VM") - ; (String, "unattend", "XML content passed to sysprep") + ; (SecretString, "unattend", "XML content passed to sysprep") ; (Float, "timeout", "timeout in seconds for expected reboot") ] ~doc: diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index 020eec9f193..d7993d4d577 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -3602,7 +3602,7 @@ let vm_sysprep fd printer rpc session_id params = let unattend = match get_client_file fd filename with | Some xml -> - xml + xml |> SecretString.of_string | None -> marshal fd (Command (PrintStderr "Failed to read file.\n")) ; raise (ExitWithError 1) diff --git a/ocaml/xapi-types/secretString.ml b/ocaml/xapi-types/secretString.ml index 781dac86697..b552e46edfd 100644 --- a/ocaml/xapi-types/secretString.ml +++ b/ocaml/xapi-types/secretString.ml @@ -24,6 +24,8 @@ let write_to_channel c s = output_string c s let equal = String.equal +let length = String.length + let pool_secret = "pool_secret" let with_cookie t cookies = (pool_secret, t) :: cookies diff --git a/ocaml/xapi-types/secretString.mli b/ocaml/xapi-types/secretString.mli index 82d97eaaa72..6d85364d04e 100644 --- a/ocaml/xapi-types/secretString.mli +++ b/ocaml/xapi-types/secretString.mli @@ -25,6 +25,8 @@ val of_string : string -> t val equal : t -> t -> bool +val length : t -> int + val json_rpc_of_t : t -> Rpc.t val t_of_rpc : Rpc.t -> t diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml index 449eaf8e2c2..abc9a2f2742 100644 --- a/ocaml/xapi/vm_sysprep.ml +++ b/ocaml/xapi/vm_sysprep.ml @@ -140,7 +140,7 @@ let make_iso ~vm_uuid ~unattend = Xapi_stdext_unix.Unixext.mkdir_rec SR.dir 0o755 ; with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> let path = temp_dir // "unattend.xml" in - Unixext.write_string_to_file path unattend ; + SecretString.write_to_file path unattend ; debug "%s: written to %s" __FUNCTION__ path ; let args = ["-r"; "-J"; "-o"; iso; temp_dir] in Forkhelpers.execute_command_get_output genisoimage args |> ignore ; @@ -262,7 +262,7 @@ let sysprep ~__context ~vm ~unattend ~timeout = let control = Printf.sprintf "/local/domain/%Ld/control" domid in if domid <= 0L then fail VM_not_running ; - if String.length unattend > 32 * 1024 then + if SecretString.length unattend > 32 * 1024 then fail XML_too_large ; Ezxenstore_core.Xenstore.with_xs (fun xs -> let open Ezxenstore_core.Xenstore in diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli index ace6cf184a4..76cdfb7f621 100644 --- a/ocaml/xapi/vm_sysprep.mli +++ b/ocaml/xapi/vm_sysprep.mli @@ -32,9 +32,9 @@ val on_startup : __context:Context.t -> unit val sysprep : __context:Context.t -> vm:API.ref_VM - -> unattend:string + -> unattend:SecretString.t -> timeout:float -> unit (** Execute sysprep on [vm] using script [unattend]. This requires - driver support from the VM and is checked. [unattend:string] must + driver support from the VM and is checked. [unattend] must not exceed 32kb. Raised [Failure] that must be handled, *) diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index 2e861e8601b..b3f07d38a9d 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -454,6 +454,6 @@ val remove_from_blocked_operations : val sysprep : __context:Context.t -> self:API.ref_VM - -> unattend:string + -> unattend:SecretString.t -> timeout:float -> unit From ddaf4ce9bdb0495926122dc95c4d108f6529ebb9 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 29 Aug 2025 19:37:05 +0100 Subject: [PATCH 459/492] CP-308539: Updated certificate validation to support .NET 8.0 in PowerShell. Signed-off-by: Konstantina Chremmou --- .github/workflows/generate-and-build-sdks.yml | 2 +- .github/workflows/release.yml | 2 +- .../powershell/autogen/src/Connect-XenServer.cs | 16 ++++++++++++---- .../autogen/src/XenServerPowerShell.csproj | 5 +---- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 90ca98f1515..46e2457aed7 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -205,7 +205,7 @@ jobs: strategy: fail-fast: false matrix: - dotnet: ["6", "8"] + dotnet: ["8"] needs: build-csharp-sdk runs-on: windows-2022 permissions: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9c892846e1e..5dc14425102 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -86,7 +86,7 @@ jobs: - name: Retrieve PowerShell 7.x SDK distribution artifacts uses: actions/download-artifact@v4 with: - name: SDK_Binaries_XenServerPowerShell_NET6 + name: SDK_Binaries_XenServerPowerShell_NET8 path: sdk_powershell_7x/ - name: Package C SDK artifacts for deployment diff --git a/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs b/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs index a1dc4ecf964..52cb8e21e54 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs +++ b/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs @@ -32,6 +32,9 @@ using System.IO; using System.Management.Automation; using System.Net; +#if NET8_0_OR_GREATER +using System.Net.Http; +#endif using System.Net.Security; using System.Runtime.InteropServices; using System.Security; @@ -159,7 +162,7 @@ protected override void ProcessRecord() } ServicePointManager.ServerCertificateValidationCallback = ValidateServerCertificate; - ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12; + ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; if (Url == null || Url.Length == 0) { @@ -209,7 +212,7 @@ protected override void ProcessRecord() throw; } } - catch (WebException e) + catch (Exception e) { var inner = e.InnerException?.InnerException ?? //.NET case e.InnerException; //.NET Framework case @@ -271,8 +274,13 @@ private bool ValidateServerCertificate(object sender, X509Certificate certificat bool ignoreChanged = Force || NoWarnCertificates || (bool)GetVariableValue("NoWarnCertificates", false); bool ignoreNew = Force || NoWarnNewCertificates || (bool)GetVariableValue("NoWarnNewCertificates", false); - HttpWebRequest webreq = (HttpWebRequest)sender; - string hostname = webreq.Address.Host; +#if NET8_0_OR_GREATER + var requestMessage = sender as HttpRequestMessage; + string hostname = requestMessage?.RequestUri?.Host ?? string.Empty; +#else + var webreq = sender as HttpWebRequest; + string hostname = webreq?.Address?.Host ?? string.Empty; +#endif string fingerprint = CommonCmdletFunctions.FingerprintPrettyString(certificate.GetCertHashString()); bool trusted = VerifyInAllStores(new X509Certificate2(certificate)); diff --git a/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj b/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj index 3d952212447..35c2fc8fa42 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj +++ b/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj @@ -1,7 +1,7 @@ 0.0.0 - net8.0;net6.0;net45 + net8.0;net45 Library True @@ -12,9 +12,6 @@ - - - From 48701044297544edec2cd18ccf3b10a82a554b82 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 18 Jul 2025 16:58:56 +0100 Subject: [PATCH 460/492] CP-308811: Add an option to limit the span depth in tracing Adds a new span.depth key to the trace context baggage, and a configurable max_span_depth. This defaults to 100 and so will not limit traces, but is useful when wanting to analyse large traces which can often become slow if all the traces are recorded. Signed-off-by: Steven Woods --- ocaml/libs/tracing/tracing.ml | 98 +++++++++++++++++++++++++++++----- ocaml/libs/tracing/tracing.mli | 2 + ocaml/tests/test_observer.ml | 1 + ocaml/xapi/xapi_globs.ml | 7 +++ ocaml/xapi/xapi_observer.ml | 1 + 5 files changed, 95 insertions(+), 14 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index d320fd6061b..389c5bafaa3 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -222,6 +222,8 @@ module TraceContext = struct let empty = {traceparent= None; baggage= None} + let depth_key = "span.depth" + let with_traceparent traceparent ctx = {ctx with traceparent} let with_baggage baggage ctx = {ctx with baggage} @@ -230,6 +232,20 @@ module TraceContext = struct let baggage_of ctx = ctx.baggage + let baggage_depth_of ctx = + Option.bind (baggage_of ctx) (List.assoc_opt depth_key) + |> Option.value ~default:"1" + |> int_of_string + + let update_with_baggage k v ctx = + let new_baggage = + baggage_of ctx + |> Option.value ~default:[] + |> List.remove_assoc k + |> List.cons (k, v) + in + with_baggage (Some new_baggage) ctx + let parse input = let open Astring.String in let trim_pair (key, value) = (trim key, trim value) in @@ -322,22 +338,36 @@ module Span = struct let start ?(attributes = Attributes.empty) ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = - let trace_id, extra_context = + let trace_id, extra_context, depth = match parent with | None -> - (Trace_id.make (), TraceContext.empty) + (Trace_id.make (), TraceContext.empty, 1) | Some span_parent -> - (span_parent.context.trace_id, span_parent.context.trace_context) + ( span_parent.context.trace_id + , span_parent.context.trace_context + , TraceContext.baggage_depth_of span_parent.context.trace_context + 1 + ) in let span_id = Span_id.make () in + let extra_context_with_depth = + TraceContext.( + with_added_baggage depth_key (string_of_int depth) extra_context + ) + in let context : SpanContext.t = - {trace_id; span_id; trace_context= extra_context} + {trace_id; span_id; trace_context= extra_context_with_depth} in let context = - (* If trace_context is provided to the call, override any inherited trace context. *) - trace_context - |> Option.fold ~none:context - ~some:(Fun.flip SpanContext.with_trace_context context) + (* If trace_context is provided to the call, override any inherited trace + context except span.depth which should still be maintained. *) + match trace_context with + | Some tc -> + let tc_with_depth = + TraceContext.(with_added_baggage depth_key (string_of_int depth) tc) + in + SpanContext.with_trace_context tc_with_depth context + | None -> + context in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in @@ -473,6 +503,11 @@ module Spans = struct let set_max_traces x = Atomic.set max_traces x + (* Default is much larger than the largest current traces, so effectively off *) + let max_depth = Atomic.make 100 + + let set_max_depth x = Atomic.set max_depth x + let finished_spans = Atomic.make ([], 0) let span_hashtbl_is_empty () = TraceMap.is_empty (Atomic.get spans) @@ -713,12 +748,18 @@ module Tracer = struct let get_tracer ~name:_ = TracerProvider.get_current () let span_of_span_context context name : Span.t = + let tc = SpanContext.context_of_span_context context in + let new_depth = TraceContext.baggage_depth_of tc in + let new_tc = + TraceContext.(with_added_baggage depth_key (string_of_int new_depth) tc) + in + let context = SpanContext.with_trace_context new_tc context in { context ; status= {status_code= Status.Unset; _description= None} ; name ; parent= None - ; span_kind= SpanKind.Client (* This will be the span of the client call*) + ; span_kind= SpanKind.Client (* This will be the span of the client call *) ; begin_time= Unix.gettimeofday () ; end_time= None ; links= [] @@ -730,10 +771,23 @@ module Tracer = struct ?(span_kind = SpanKind.Internal) ~name ~parent () : (Span.t option, exn) result = let open TracerProvider in - (* Do not start span if the TracerProvider is disabled*) + let parent_depth = + Option.fold ~none:1 + ~some:(fun parent -> + parent.Span.context + |> SpanContext.context_of_span_context + |> TraceContext.baggage_depth_of + ) + parent + in + (* Do not start span if the TracerProvider is disabled *) if not t.enabled then + ok_none (* Do not start span if the max depth has been reached *) + else if parent_depth >= Atomic.get Spans.max_depth then ( + let parent_trace_id = Option.fold ~none:"None" ~some:(fun p -> p.Span.context |> SpanContext.span_id_of_span_context |> Span_id.to_string) parent in + debug "Max_span_depth limit reached, not creating span %s (parent %s)" name parent_trace_id ; ok_none - else + ) else let attributes = Attributes.merge_into t.attributes attributes in let span = Span.start ~attributes ?trace_context ~name ~parent ~span_kind () @@ -750,8 +804,17 @@ module Tracer = struct |> Spans.remove_from_spans |> Option.map (fun existing_span -> let old_context = Span.get_context existing_span in + let parent_trace_context = Span.get_trace_context parent in + let new_depth = + TraceContext.baggage_depth_of parent_trace_context + 1 + in let new_context : SpanContext.t = - let trace_context = span.Span.context.trace_context in + let trace_context = + TraceContext.( + with_added_baggage depth_key (string_of_int new_depth) + span.Span.context.trace_context + ) + in SpanContext.context (SpanContext.trace_id_of_span_context parent.context) old_context.span_id @@ -759,7 +822,6 @@ module Tracer = struct in let updated_span = {existing_span with parent= Some parent} in let updated_span = {updated_span with context= new_context} in - let () = Spans.add_to_spans ~span:updated_span in updated_span ) @@ -926,7 +988,15 @@ module Propagator = struct let trace_context' = TraceContext.with_traceparent (Some traceparent) trace_context in - let carrier' = P.inject_into trace_context' carrier in + let new_depth = + TraceContext.baggage_depth_of trace_context' + 1 |> string_of_int + in + let trace_context'' = + TraceContext.( + with_added_baggage depth_key new_depth trace_context' + ) + in + let carrier' = P.inject_into trace_context'' carrier in f carrier' | _ -> f carrier diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 8323346a443..ec33f4ac5ff 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -165,6 +165,8 @@ module Spans : sig val set_max_traces : int -> unit + val set_max_depth : int -> unit + val span_count : unit -> int val since : unit -> Span.t list * int diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 2e2f8e6aa29..07d746e81c0 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -305,6 +305,7 @@ let verify_json_fields_and_values ~json = ; ("xs.host.uuid", `String _) ; ("xs.host.name", `String _) ; ("service.name", `String _) + ; ("span.depth", `String _) ] ) ; ("annotations", `List _) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 3688478dce8..f32d61443bc 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1059,6 +1059,8 @@ let max_spans = ref 10000 let max_traces = ref 10000 +let max_span_depth = ref 100 + let use_xmlrpc = ref true let compress_tracing_files = ref true @@ -1783,6 +1785,11 @@ let other_options = , (fun () -> string_of_float !vm_sysprep_wait) , "Time in seconds to wait for VM to recognise inserted CD" ) + ; ( "max-span-depth" + , Arg.Set_int max_span_depth + , (fun () -> string_of_int !max_span_depth) + , "The maximum depth to which spans are recorded in a trace in Tracing" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 62d3ea4359c..a0f1f453b7d 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -599,6 +599,7 @@ let initialise_observer ~__context component = initialise_observer_component ~__context component let initialise ~__context = + Tracing.Spans.set_max_depth !Xapi_globs.max_span_depth ; List.iter (initialise_observer_meta ~__context) (startup_components ()) ; Db.Observer.get_all ~__context |> List.iter (fun self -> From 38767739e4fdc6a15daf0f3bd5207efbcbe1cc2d Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Fri, 1 Aug 2025 17:02:59 +0100 Subject: [PATCH 461/492] CP-309305: Split Spans.since into chunks for exporting Http exporting appears to get overwhelmed when too many spans are exported at the same time. This adds the option to export a smaller chunk of spans at a time. This also reduces the size of the file exports as we only check for max file size after exporting all of the finished spans. Signed-off-by: Steven Woods --- ocaml/libs/tracing/tracing.ml | 23 ++++++++++++------ ocaml/libs/tracing/tracing_export.ml | 35 ++++++++++++++++++++++++--- ocaml/libs/tracing/tracing_export.mli | 7 ++++++ ocaml/xapi/xapi_globs.ml | 7 ++++++ ocaml/xapi/xapi_observer.ml | 1 + 5 files changed, 62 insertions(+), 11 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 389c5bafaa3..78ba3bc3ab6 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -351,7 +351,7 @@ module Span = struct let span_id = Span_id.make () in let extra_context_with_depth = TraceContext.( - with_added_baggage depth_key (string_of_int depth) extra_context + update_with_baggage depth_key (string_of_int depth) extra_context ) in let context : SpanContext.t = @@ -363,7 +363,7 @@ module Span = struct match trace_context with | Some tc -> let tc_with_depth = - TraceContext.(with_added_baggage depth_key (string_of_int depth) tc) + TraceContext.(update_with_baggage depth_key (string_of_int depth) tc) in SpanContext.with_trace_context tc_with_depth context | None -> @@ -751,7 +751,7 @@ module Tracer = struct let tc = SpanContext.context_of_span_context context in let new_depth = TraceContext.baggage_depth_of tc in let new_tc = - TraceContext.(with_added_baggage depth_key (string_of_int new_depth) tc) + TraceContext.(update_with_baggage depth_key (string_of_int new_depth) tc) in let context = SpanContext.with_trace_context new_tc context in { @@ -784,8 +784,17 @@ module Tracer = struct if not t.enabled then ok_none (* Do not start span if the max depth has been reached *) else if parent_depth >= Atomic.get Spans.max_depth then ( - let parent_trace_id = Option.fold ~none:"None" ~some:(fun p -> p.Span.context |> SpanContext.span_id_of_span_context |> Span_id.to_string) parent in - debug "Max_span_depth limit reached, not creating span %s (parent %s)" name parent_trace_id ; + let parent_trace_id = + Option.fold ~none:"None" + ~some:(fun p -> + p.Span.context + |> SpanContext.span_id_of_span_context + |> Span_id.to_string + ) + parent + in + debug "Max_span_depth limit reached, not creating span %s (parent %s)" + name parent_trace_id ; ok_none ) else let attributes = Attributes.merge_into t.attributes attributes in @@ -811,7 +820,7 @@ module Tracer = struct let new_context : SpanContext.t = let trace_context = TraceContext.( - with_added_baggage depth_key (string_of_int new_depth) + update_with_baggage depth_key (string_of_int new_depth) span.Span.context.trace_context ) in @@ -993,7 +1002,7 @@ module Propagator = struct in let trace_context'' = TraceContext.( - with_added_baggage depth_key new_depth trace_context' + update_with_baggage depth_key new_depth trace_context' ) in let carrier' = P.inject_into trace_context'' carrier in diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 1162202b611..352d5d488e4 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -24,6 +24,10 @@ let export_interval = ref 30. let set_export_interval t = export_interval := t +let export_chunk_size = Atomic.make 10000 + +let set_export_chunk_size x = Atomic.set export_chunk_size x + let host_id = ref "localhost" let set_host_id id = host_id := id @@ -289,6 +293,22 @@ module Destination = struct with exn -> debug "Tracing: unable to export span : %s" (Printexc.to_string exn) + let rec span_info_chunks span_info batch_size = + let rec list_to_chunks_inner l n curr chunks = + if n = 0 then + if l <> [] then + list_to_chunks_inner l batch_size [] ((curr, batch_size) :: chunks) + else + (curr, batch_size) :: chunks + else + match l with + | [] -> + (curr, List.length curr) :: chunks + | h :: t -> + list_to_chunks_inner t (n - 1) (h :: curr) chunks + in + list_to_chunks_inner (fst span_info) batch_size [] [] + let flush_spans () = let ((_span_list, span_count) as span_info) = Spans.since () in let attributes = [("export.traces.count", string_of_int span_count)] in @@ -296,10 +316,17 @@ module Destination = struct with_tracing ~span_kind:Server ~trace_context:TraceContext.empty ~parent:None ~attributes ~name:"Tracing.flush_spans" in - TracerProvider.get_tracer_providers () - |> List.filter TracerProvider.get_enabled - |> List.concat_map TracerProvider.get_endpoints - |> List.iter (export_to_endpoint parent span_info) + let endpoints = + TracerProvider.get_tracer_providers () + |> List.filter TracerProvider.get_enabled + |> List.concat_map TracerProvider.get_endpoints + in + let span_info_chunks = + span_info_chunks span_info (Atomic.get export_chunk_size) + in + List.iter + (fun s_i -> List.iter (export_to_endpoint parent s_i) endpoints) + span_info_chunks let delay = Delay.make () diff --git a/ocaml/libs/tracing/tracing_export.mli b/ocaml/libs/tracing/tracing_export.mli index f322bd2404c..a857a4f523d 100644 --- a/ocaml/libs/tracing/tracing_export.mli +++ b/ocaml/libs/tracing/tracing_export.mli @@ -23,6 +23,13 @@ val set_export_interval : float -> unit Default is every [30.] seconds. *) +val set_export_chunk_size : int -> unit +(** [set_export_chunk_size size] sets the maximum number of finished spans that + can be exported in one chunk to [size]. + + Default is 10000 spans. + *) + val set_host_id : string -> unit (** [set_host_id id] sets the id of the host to [id]. diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index f32d61443bc..de2d68829fb 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1055,6 +1055,8 @@ let trace_log_dir = ref "/var/log/dt/zipkinv2/json" let export_interval = ref 30. +let export_chunk_size = ref 10000 + let max_spans = ref 10000 let max_traces = ref 10000 @@ -1678,6 +1680,11 @@ let other_options = , (fun () -> string_of_float !export_interval) , "The interval for exports in Tracing" ) + ; ( "export-chunk-size" + , Arg.Set_int export_chunk_size + , (fun () -> string_of_int !export_chunk_size) + , "The span chunk size for exports in Tracing" + ) ; ( "max-spans" , Arg.Set_int max_spans , (fun () -> string_of_int !max_spans) diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index a0f1f453b7d..7a7163ff42f 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -600,6 +600,7 @@ let initialise_observer ~__context component = let initialise ~__context = Tracing.Spans.set_max_depth !Xapi_globs.max_span_depth ; + Tracing_export.set_export_chunk_size !Xapi_globs.export_chunk_size ; List.iter (initialise_observer_meta ~__context) (startup_components ()) ; Db.Observer.get_all ~__context |> List.iter (fun self -> From ec61f8e025e48683a3f7a9926b2cba116b247ac3 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Fri, 29 Aug 2025 18:08:46 +0100 Subject: [PATCH 462/492] Revert "xapi/nm: Send non-empty dns to networkd when using IPv6 autoconf (#6586)" This reverts commit 05e63170ec3aaaeb0afb05ac181d3badc982602a, reversing changes made to 1fbdaae34ea3f32248397e072ea24b9107fd3788. Signed-off-by: Gabriel Buica --- ocaml/networkd/bin/network_server.ml | 16 +++++++----- ocaml/networkd/bin_db/networkd_db.ml | 29 +++++++++------------ ocaml/networkd/lib/network_config.ml | 15 +++++------ ocaml/xapi-idl/network/network_interface.ml | 7 ++--- ocaml/xapi/nm.ml | 17 +++++++----- 5 files changed, 41 insertions(+), 43 deletions(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 5e056f73eb7..59c76e319f3 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -554,8 +554,7 @@ module Interface = struct let set_dns _ dbg ~name ~nameservers ~domains = Debug.with_thread_associated dbg (fun () -> - update_config name - {(get_config name) with dns= Some (nameservers, domains)} ; + update_config name {(get_config name) with dns= (nameservers, domains)} ; debug "Configuring DNS for %s: nameservers: [%s]; domains: [%s]" name (String.concat ", " (List.map Unix.string_of_inet_addr nameservers)) (String.concat ", " domains) ; @@ -728,7 +727,7 @@ module Interface = struct ; ipv6_conf ; ipv6_gateway ; ipv4_routes - ; dns + ; dns= nameservers, domains ; mtu ; ethtool_settings ; ethtool_offload @@ -737,10 +736,15 @@ module Interface = struct ) ) -> update_config name c ; exec (fun () -> - match dns with - | Some (nameservers, domains) -> + (* We only apply the DNS settings when not in a DHCP mode + to avoid conflicts. The `dns` field + should really be an option type so that we don't have to + derive the intention of the caller by looking at other + fields. *) + match (ipv4_conf, ipv6_conf) with + | Static4 _, _ | _, Static6 _ | _, Autoconf6 -> set_dns () dbg ~name ~nameservers ~domains - | None -> + | _ -> () ) ; exec (fun () -> set_ipv4_conf dbg name ipv4_conf) ; diff --git a/ocaml/networkd/bin_db/networkd_db.ml b/ocaml/networkd/bin_db/networkd_db.ml index bffe93a32bc..f62021828fa 100644 --- a/ocaml/networkd/bin_db/networkd_db.ml +++ b/ocaml/networkd/bin_db/networkd_db.ml @@ -74,25 +74,20 @@ let _ = [("gateway", Unix.string_of_inet_addr addr)] in let dns = - interface_config.dns - |> Option.map fst - |> Option.map (List.map Unix.string_of_inet_addr) - |> Option.fold ~none:[] ~some:(function - | [] -> - [] - | dns' -> - [("dns", String.concat "," dns')] - ) + let dns' = + List.map Unix.string_of_inet_addr (fst interface_config.dns) + in + if dns' = [] then + [] + else + [("dns", String.concat "," dns')] in let domains = - interface_config.dns - |> Option.map snd - |> Option.fold ~none:[] ~some:(function - | [] -> - [] - | domains' -> - [("domain", String.concat "," domains')] - ) + let domains' = snd interface_config.dns in + if domains' = [] then + [] + else + [("domain", String.concat "," domains')] in mode @ addrs @ gateway @ dns @ domains | None4 -> diff --git a/ocaml/networkd/lib/network_config.ml b/ocaml/networkd/lib/network_config.ml index 3d034f05284..56eef61ce3d 100644 --- a/ocaml/networkd/lib/network_config.ml +++ b/ocaml/networkd/lib/network_config.ml @@ -37,6 +37,7 @@ let bridge_naming_convention (device : string) = let get_list_from ~sep ~key args = List.assoc_opt key args |> Option.map (fun v -> Astring.String.cuts ~empty:false ~sep v) + |> Option.value ~default:[] let parse_ipv4_config args = function | Some "static" -> @@ -72,13 +73,11 @@ let parse_ipv6_config args = function (None6, None) let parse_dns_config args = - let ( let* ) = Option.bind in - let* nameservers = - get_list_from ~sep:"," ~key:"DNS" args - |> Option.map (List.map Unix.inet_addr_of_string) + let nameservers = + get_list_from ~sep:"," ~key:"DNS" args |> List.map Unix.inet_addr_of_string in - let* domains = get_list_from ~sep:" " ~key:"DOMAIN" args in - Some (nameservers, domains) + let domains = get_list_from ~sep:" " ~key:"DOMAIN" args in + (nameservers, domains) let read_management_conf () = try @@ -104,7 +103,7 @@ let read_management_conf () = let device = (* Take 1st member of bond *) match (bond_mode, bond_members) with - | None, _ | _, (None | Some []) -> ( + | None, _ | _, [] -> ( match List.assoc_opt "LABEL" args with | Some x -> x @@ -112,7 +111,7 @@ let read_management_conf () = error "%s: missing LABEL in %s" __FUNCTION__ management_conf ; raise Read_error ) - | _, Some (hd :: _) -> + | _, hd :: _ -> hd in Inventory.reread_inventory () ; diff --git a/ocaml/xapi-idl/network/network_interface.ml b/ocaml/xapi-idl/network/network_interface.ml index 06d38ff1a87..2f3368fc131 100644 --- a/ocaml/xapi-idl/network/network_interface.ml +++ b/ocaml/xapi-idl/network/network_interface.ml @@ -158,10 +158,7 @@ type interface_config_t = { ; ipv6_conf: ipv6 [@default None6] ; ipv6_gateway: Unix.inet_addr option [@default None] ; ipv4_routes: ipv4_route_t list [@default []] - ; dns: (Unix.inet_addr list * string list) option [@default None] - (** the list - of nameservers and domains to persist in /etc/resolv.conf. Must be None when - using a DHCP mode *) + ; dns: Unix.inet_addr list * string list [@default [], []] ; mtu: int [@default 1500] ; ethtool_settings: (string * string) list [@default []] ; ethtool_offload: (string * string) list [@default [("lro", "off")]] @@ -203,7 +200,7 @@ let default_interface = ; ipv6_conf= None6 ; ipv6_gateway= None ; ipv4_routes= [] - ; dns= None + ; dns= ([], []) ; mtu= 1500 ; ethtool_settings= [] ; ethtool_offload= [("lro", "off")] diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index fbc37a5fedc..229b53adbe2 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -634,25 +634,28 @@ let bring_pif_up ~__context ?(management_interface = false) (pif : API.ref_PIF) rc.API.pIF_ip_configuration_mode = `Static | `IPv6 -> rc.API.pIF_ipv6_configuration_mode = `Static - || rc.API.pIF_ipv6_configuration_mode = `Autoconf in let dns = match (static, rc.API.pIF_DNS) with | false, _ | true, "" -> - None + ([], []) | true, pif_dns -> let nameservers = List.map Unix.inet_addr_of_string - (String.split_on_char ',' pif_dns) + (String.split ',' pif_dns) in let domains = match List.assoc_opt "domain" rc.API.pIF_other_config with - | None | Some "" -> + | None -> [] - | Some domains -> - String.split_on_char ',' domains + | Some domains -> ( + try String.split ',' domains + with _ -> + warn "Invalid DNS search domains: %s" domains ; + [] + ) in - Some (nameservers, domains) + (nameservers, domains) in let mtu = determine_mtu rc net_rc in let ethtool_settings, ethtool_offload = From 78df7443fbb2c4ad5ac32b575586594ab8100620 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 1 Sep 2025 14:12:10 +0100 Subject: [PATCH 463/492] Use a forwarder so each component updates their depth and chunk size The other Observer components e.g. xenops need a forwarder to notify them of any changes to the export_chunk_size and max_depth as they do not have access to xapi_globs/xapi.conf Signed-off-by: Steven Woods --- ocaml/tests/test_cluster.ml | 2 ++ ocaml/xapi-idl/cluster/cli-help.t | 4 +++ ocaml/xapi-idl/lib/observer_helpers.ml | 16 ++++++++++ ocaml/xapi-idl/lib/observer_helpers.mli | 21 +++++++++++++ ocaml/xapi-idl/lib/observer_skeleton.ml | 4 +++ ocaml/xapi-idl/lib/observer_skeleton.mli | 4 +++ ocaml/xapi/xapi_observer.ml | 40 ++++++++++++++++++++++-- ocaml/xapi/xapi_xenops.ml | 10 ++++++ ocaml/xenopsd/lib/xenops_server.ml | 14 +++++++++ 9 files changed, 113 insertions(+), 2 deletions(-) diff --git a/ocaml/tests/test_cluster.ml b/ocaml/tests/test_cluster.ml index d24e36fe72c..9c945776cd9 100644 --- a/ocaml/tests/test_cluster.ml +++ b/ocaml/tests/test_cluster.ml @@ -34,9 +34,11 @@ let test_clusterd_rpc ~__context call = | "Observer.init" | "Observer.set_trace_log_dir" | "Observer.set_export_interval" + | "Observer.set_export_chunk_size" | "Observer.set_host_id" | "Observer.set_max_traces" | "Observer.set_max_spans" + | "Observer.set_max_depth" | "Observer.set_max_file_size" | "Observer.set_compress_tracing_files" ) , _ ) -> diff --git a/ocaml/xapi-idl/cluster/cli-help.t b/ocaml/xapi-idl/cluster/cli-help.t index 5b9362aa648..abe729544da 100644 --- a/ocaml/xapi-idl/cluster/cli-help.t +++ b/ocaml/xapi-idl/cluster/cli-help.t @@ -21,10 +21,14 @@ Observer.set_endpoints [OPTION]… dbg uuid endpoints + Observer.set_export_chunk_size [OPTION]… dbg int + Observer.set_export_interval [OPTION]… dbg float Observer.set_host_id [OPTION]… dbg string + Observer.set_max_depth [OPTION]… dbg int + Observer.set_max_file_size [OPTION]… dbg int Observer.set_max_spans [OPTION]… dbg int diff --git a/ocaml/xapi-idl/lib/observer_helpers.ml b/ocaml/xapi-idl/lib/observer_helpers.ml index 24f7ee3db46..c2ea58bb8d3 100644 --- a/ocaml/xapi-idl/lib/observer_helpers.ml +++ b/ocaml/xapi-idl/lib/observer_helpers.ml @@ -138,6 +138,10 @@ module ObserverAPI (R : RPC) = struct declare "Observer.set_export_interval" [] (dbg_p @-> float_p @-> returning unit_p err) + let set_export_chunk_size = + declare "Observer.set_export_chunk_size" [] + (dbg_p @-> int_p @-> returning unit_p err) + let set_max_spans = declare "Observer.set_max_spans" [] (dbg_p @-> int_p @-> returning unit_p err) @@ -146,6 +150,10 @@ module ObserverAPI (R : RPC) = struct declare "Observer.set_max_traces" [] (dbg_p @-> int_p @-> returning unit_p err) + let set_max_depth = + declare "Observer.set_max_depth" [] + (dbg_p @-> int_p @-> returning unit_p err) + let set_max_file_size = declare "Observer.set_max_file_size" [] (dbg_p @-> int_p @-> returning unit_p err) @@ -193,10 +201,14 @@ module type Server_impl = sig val set_export_interval : context -> dbg:debug_info -> interval:float -> unit + val set_export_chunk_size : context -> dbg:debug_info -> size:int -> unit + val set_max_spans : context -> dbg:debug_info -> spans:int -> unit val set_max_traces : context -> dbg:debug_info -> traces:int -> unit + val set_max_depth : context -> dbg:debug_info -> depth:int -> unit + val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit val set_host_id : context -> dbg:debug_info -> host_id:string -> unit @@ -227,8 +239,12 @@ module Server (Impl : Server_impl) () = struct S.set_export_interval (fun dbg interval -> Impl.set_export_interval () ~dbg ~interval ) ; + S.set_export_chunk_size (fun dbg size -> + Impl.set_export_chunk_size () ~dbg ~size + ) ; S.set_max_spans (fun dbg spans -> Impl.set_max_spans () ~dbg ~spans) ; S.set_max_traces (fun dbg traces -> Impl.set_max_traces () ~dbg ~traces) ; + S.set_max_depth (fun dbg depth -> Impl.set_max_depth () ~dbg ~depth) ; S.set_max_file_size (fun dbg file_size -> Impl.set_max_file_size () ~dbg ~file_size ) ; diff --git a/ocaml/xapi-idl/lib/observer_helpers.mli b/ocaml/xapi-idl/lib/observer_helpers.mli index cd23d2d1e80..489310a0847 100644 --- a/ocaml/xapi-idl/lib/observer_helpers.mli +++ b/ocaml/xapi-idl/lib/observer_helpers.mli @@ -77,6 +77,11 @@ module ObserverAPI : functor (R : Idl.RPC) -> sig (** [set_export_interval dbg interval] notifies the fowarder that the interval between trace exports has been set to [interval]. *) + val set_export_chunk_size : + (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_export_chunk_size dbg size] notifies the fowarder that the max size + of each chunk of finished spans exported has been set to [size]. *) + val set_max_spans : (debug_info -> int -> (unit, Errors.error) R.comp) R.res (** [set_max_spans dbg spans] notifies the fowarder that the max number of spans has been set to [spans]. *) @@ -85,6 +90,10 @@ module ObserverAPI : functor (R : Idl.RPC) -> sig (** [set_max_traces dbg traces] notifies the fowarder that the max number of traces has been set to [traces]. *) + val set_max_depth : (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_depth dbg depth] notifies the fowarder that the max depth of + a span in a trace has been set to [depth]. *) + val set_max_file_size : (debug_info -> int -> (unit, Errors.error) R.comp) R.res (** [set_max_file_size dbg file_size] notifies the fowarder that the max file @@ -135,10 +144,14 @@ module type Server_impl = sig val set_export_interval : context -> dbg:debug_info -> interval:float -> unit + val set_export_chunk_size : context -> dbg:debug_info -> size:int -> unit + val set_max_spans : context -> dbg:debug_info -> spans:int -> unit val set_max_traces : context -> dbg:debug_info -> traces:int -> unit + val set_max_depth : context -> dbg:debug_info -> depth:int -> unit + val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit val set_host_id : context -> dbg:debug_info -> host_id:string -> unit @@ -176,10 +189,14 @@ module Server : functor (_ : Server_impl) () -> sig val set_export_interval : (debug_info -> float -> unit) -> unit + val set_export_chunk_size : (debug_info -> int -> unit) -> unit + val set_max_spans : (debug_info -> int -> unit) -> unit val set_max_traces : (debug_info -> int -> unit) -> unit + val set_max_depth : (debug_info -> int -> unit) -> unit + val set_max_file_size : (debug_info -> int -> unit) -> unit val set_host_id : (debug_info -> string -> unit) -> unit @@ -215,10 +232,14 @@ module Client : sig val set_export_interval : debug_info -> float -> unit + val set_export_chunk_size : debug_info -> int -> unit + val set_max_spans : debug_info -> int -> unit val set_max_traces : debug_info -> int -> unit + val set_max_depth : debug_info -> int -> unit + val set_max_file_size : debug_info -> int -> unit val set_host_id : debug_info -> string -> unit diff --git a/ocaml/xapi-idl/lib/observer_skeleton.ml b/ocaml/xapi-idl/lib/observer_skeleton.ml index e53a45f958c..59df66d246e 100644 --- a/ocaml/xapi-idl/lib/observer_skeleton.ml +++ b/ocaml/xapi-idl/lib/observer_skeleton.ml @@ -36,10 +36,14 @@ module Observer = struct let set_export_interval ctx ~dbg ~interval = unimplemented __FUNCTION__ + let set_export_chunk_size ctx ~dbg ~size = unimplemented __FUNCTION__ + let set_max_spans ctx ~dbg ~spans = unimplemented __FUNCTION__ let set_max_traces ctx ~dbg ~traces = unimplemented __FUNCTION__ + let set_max_depth ctx ~dbg ~depth = unimplemented __FUNCTION__ + let set_max_file_size ctx ~dbg ~file_size = unimplemented __FUNCTION__ let set_host_id ctx ~dbg ~host_id = unimplemented __FUNCTION__ diff --git a/ocaml/xapi-idl/lib/observer_skeleton.mli b/ocaml/xapi-idl/lib/observer_skeleton.mli index c99b77f9a34..2b914ada71d 100644 --- a/ocaml/xapi-idl/lib/observer_skeleton.mli +++ b/ocaml/xapi-idl/lib/observer_skeleton.mli @@ -34,10 +34,14 @@ module Observer : sig val set_export_interval : context -> dbg:string -> interval:float -> unit + val set_export_chunk_size : context -> dbg:string -> size:int -> unit + val set_max_spans : context -> dbg:string -> spans:int -> unit val set_max_traces : context -> dbg:string -> traces:int -> unit + val set_max_depth : context -> dbg:string -> depth:int -> unit + val set_max_file_size : context -> dbg:string -> file_size:int -> unit val set_host_id : context -> dbg:string -> host_id:string -> unit diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 7a7163ff42f..073e920cba2 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -48,10 +48,14 @@ module type ObserverInterface = sig val set_export_interval : __context:Context.t -> interval:float -> unit + val set_export_chunk_size : __context:Context.t -> size:int -> unit + val set_max_spans : __context:Context.t -> spans:int -> unit val set_max_traces : __context:Context.t -> traces:int -> unit + val set_max_depth : __context:Context.t -> depth:int -> unit + val set_max_file_size : __context:Context.t -> file_size:int -> unit val set_host_id : __context:Context.t -> host_id:string -> unit @@ -93,6 +97,10 @@ module Observer : ObserverInterface = struct debug "xapi Observer.set_export_interval" ; Tracing_export.set_export_interval interval + let set_export_chunk_size ~__context ~size = + debug "xapi Observer.set_export_chunk_size" ; + Tracing_export.set_export_chunk_size size + let set_max_spans ~__context ~spans = debug "xapi Observer.set_max_spans" ; Tracing.Spans.set_max_spans spans @@ -101,6 +109,10 @@ module Observer : ObserverInterface = struct debug "xapi Observer.set_max_traces" ; Tracing.Spans.set_max_traces traces + let set_max_depth ~__context ~depth = + debug "xapi Observer.set_max_depth" ; + Tracing.Spans.set_max_depth depth + let set_max_file_size ~__context ~file_size = debug "xapi Observer.set_max_file_size" ; Tracing_export.Destination.File.set_max_file_size file_size @@ -189,6 +201,12 @@ module Xapi_cluster = struct let dbg = Context.string_of_task __context in S.Observer.set_export_interval dbg interval + let set_export_chunk_size ~__context ~size = + debug "xapi_cluster Observer.set_export_chunk_size" ; + let module S = (val local_client ~__context : XAPI_CLUSTER) in + let dbg = Context.string_of_task __context in + S.Observer.set_export_chunk_size dbg size + let set_max_spans ~__context ~spans = debug "xapi_cluster Observer.set_max_spans" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in @@ -201,6 +219,12 @@ module Xapi_cluster = struct let dbg = Context.string_of_task __context in S.Observer.set_max_traces dbg traces + let set_max_depth ~__context ~depth = + debug "xapi_cluster Observer.set_max_depth" ; + let module S = (val local_client ~__context : XAPI_CLUSTER) in + let dbg = Context.string_of_task __context in + S.Observer.set_max_depth dbg depth + let set_max_file_size ~__context ~file_size = debug "xapi_cluster Observer.set_max_file_size" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in @@ -370,10 +394,14 @@ module Dom0ObserverConfig (ObserverComponent : OBSERVER_COMPONENT) : let set_export_interval ~__context:_ ~interval:_ = () + let set_export_chunk_size ~__context:_ ~size:_ = () + let set_max_spans ~__context:_ ~spans:_ = () let set_max_traces ~__context:_ ~traces:_ = () + let set_max_depth ~__context:_ ~depth:_ = () + let set_max_file_size ~__context:_ ~file_size:_ = () let set_host_id ~__context:_ ~host_id:_ = () @@ -542,6 +570,10 @@ let set_export_interval ~__context interval component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_export_interval ~__context ~interval +let set_export_chunk_size ~__context size component = + let module Forwarder = (val get_forwarder component : ObserverInterface) in + Forwarder.set_export_chunk_size ~__context ~size + let set_max_spans ~__context spans component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_max_spans ~__context ~spans @@ -550,6 +582,10 @@ let set_max_traces ~__context traces component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_max_traces ~__context ~traces +let set_max_depth ~__context depth component = + let module Forwarder = (val get_forwarder component : ObserverInterface) in + Forwarder.set_max_depth ~__context ~depth + let set_max_file_size ~__context file_size component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_max_file_size ~__context ~file_size @@ -585,8 +621,10 @@ let initialise_observer_component ~__context component = let initialise_observer_meta ~__context component = set_trace_log_dir ~__context !Xapi_globs.trace_log_dir component ; set_export_interval ~__context !Xapi_globs.export_interval component ; + set_export_chunk_size ~__context !Xapi_globs.export_chunk_size component ; set_max_spans ~__context !Xapi_globs.max_spans component ; set_max_traces ~__context !Xapi_globs.max_traces component ; + set_max_depth ~__context !Xapi_globs.max_span_depth component ; set_max_file_size ~__context !Xapi_globs.max_observer_file_size component ; set_host_id ~__context (Helpers.get_localhost_uuid ()) component ; set_compress_tracing_files ~__context @@ -599,8 +637,6 @@ let initialise_observer ~__context component = initialise_observer_component ~__context component let initialise ~__context = - Tracing.Spans.set_max_depth !Xapi_globs.max_span_depth ; - Tracing_export.set_export_chunk_size !Xapi_globs.export_chunk_size ; List.iter (initialise_observer_meta ~__context) (startup_components ()) ; Db.Observer.get_all ~__context |> List.iter (fun self -> diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 7487d723ab3..9b12bcec5a6 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -4493,6 +4493,11 @@ module Observer = struct let dbg = Context.string_of_task __context in Client.Observer.set_export_interval dbg interval + let set_export_chunk_size ~__context ~size = + let module Client = (val make_client (default_xenopsd ()) : XENOPS) in + let dbg = Context.string_of_task __context in + Client.Observer.set_export_chunk_size dbg size + let set_max_spans ~__context ~spans = let module Client = (val make_client (default_xenopsd ()) : XENOPS) in let dbg = Context.string_of_task __context in @@ -4503,6 +4508,11 @@ module Observer = struct let dbg = Context.string_of_task __context in Client.Observer.set_max_traces dbg traces + let set_max_depth ~__context ~depth = + let module Client = (val make_client (default_xenopsd ()) : XENOPS) in + let dbg = Context.string_of_task __context in + Client.Observer.set_max_depth dbg depth + let set_max_file_size ~__context ~file_size = let module Client = (val make_client (default_xenopsd ()) : XENOPS) in let dbg = Context.string_of_task __context in diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index b47344a30e6..6a06b36ba14 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -4311,6 +4311,12 @@ module Observer = struct (fun () -> Tracing_export.set_export_interval interval) () + let set_export_chunk_size _ dbg size = + debug "Observer.set_export_chunk_size : dbg=%s" dbg ; + Debug.with_thread_associated dbg + (fun () -> Tracing_export.set_export_chunk_size size) + () + let set_max_spans _ dbg spans = debug "Observer.set_max_spans : dbg=%s" dbg ; Debug.with_thread_associated dbg @@ -4323,6 +4329,12 @@ module Observer = struct (fun () -> Tracing.Spans.set_max_traces traces) () + let set_max_depth _ dbg depth = + debug "Observer.set_max_depth : dbg=%s" dbg ; + Debug.with_thread_associated dbg + (fun () -> Tracing.Spans.set_max_depth depth) + () + let set_max_file_size _ dbg file_size = debug "Observer.set_max_file_size : dbg=%s" dbg ; Debug.with_thread_associated dbg @@ -4446,8 +4458,10 @@ let _ = Server.Observer.init (Observer.init ()) ; Server.Observer.set_trace_log_dir (Observer.set_trace_log_dir ()) ; Server.Observer.set_export_interval (Observer.set_export_interval ()) ; + Server.Observer.set_export_chunk_size (Observer.set_export_chunk_size ()) ; Server.Observer.set_max_spans (Observer.set_max_spans ()) ; Server.Observer.set_max_traces (Observer.set_max_traces ()) ; + Server.Observer.set_max_depth (Observer.set_max_depth ()) ; Server.Observer.set_max_file_size (Observer.set_max_file_size ()) ; Server.Observer.set_host_id (Observer.set_host_id ()) ; Server.Observer.set_compress_tracing_files From a57d5811ebde9e589102eed8fb5c378e5580a25d Mon Sep 17 00:00:00 2001 From: Bengang Yuan Date: Fri, 29 Aug 2025 07:50:02 +0000 Subject: [PATCH 464/492] CA-416351: Slave shutdown timeout Before shutting down, xapi calls `xapi_pre_shutdown` to execute several pre-shutdown scripts, which require the current host's UUID as a parameter. Currently, xapi obtains this UUID in a redundant manner: 1. It retrieves the UUID from the local inventory file. 2. It queries the database for the host's reference using the UUID. 3. It queries the database again for the host's UUID using the reference obtained in step 2. Steps 2 and 3 are unnecessary since the UUID is already available from step 1. Moreover, when the master stops, the slave fails to query the database, increasing xapi shutdown times on the slave. The solution is to directly use the UUID obtained in step 1, eliminating the redundant database queries. Signed-off-by: Bengang Yuan --- ocaml/xapi/xapi_hooks.ml | 23 +++++++++++------------ ocaml/xapi/xapi_host.ml | 6 +++--- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ocaml/xapi/xapi_hooks.ml b/ocaml/xapi/xapi_hooks.ml index 2f9edaff073..a7ba2d75548 100644 --- a/ocaml/xapi/xapi_hooks.ml +++ b/ocaml/xapi/xapi_hooks.ml @@ -102,13 +102,11 @@ let execute_hook ~__context ~script_name ~args ~reason = ) scripts -let execute_vm_hook ~__context ~reason ~vm = - let vmuuid = Db.VM.get_uuid ~__context ~self:vm in - execute_hook ~__context ~args:["-vmuuid"; vmuuid] ~reason +let execute_vm_hook ~__context ~reason ~vm_uuid = + execute_hook ~__context ~args:["-vmuuid"; vm_uuid] ~reason -let execute_host_hook ~__context ~reason ~host = - let uuid = Db.Host.get_uuid ~__context ~self:host in - execute_hook ~__context ~args:["-hostuuid"; uuid] ~reason +let execute_host_hook ~__context ~reason ~host_uuid = + execute_hook ~__context ~args:["-hostuuid"; host_uuid] ~reason let execute_pool_hook ~__context ~reason = execute_hook ~__context ~args:[] ~reason @@ -116,8 +114,9 @@ let execute_pool_hook ~__context ~reason = let host_pre_declare_dead ~__context ~host ~reason = info "Running host pre declare dead hook for %s" (Ref.string_of host) ; (* this could use power fencing *) + let host_uuid = Db.Host.get_uuid ~__context ~self:host in execute_host_hook ~__context ~script_name:scriptname__host_pre_declare_dead - ~reason ~host ; + ~reason ~host_uuid ; if String.equal reason reason__dbdestroy then log_and_ignore_exn (fun () -> (* declare it as dead to the clustering daemon if any *) @@ -132,11 +131,10 @@ let host_pre_declare_dead ~__context ~host ~reason = () ) -let xapi_pre_shutdown ~__context ~host ~reason = - info "%s Running xapi pre shutdown hooks for %s" __FUNCTION__ - (Ref.string_of host) ; +let xapi_pre_shutdown ~__context ~host_uuid ~reason = + info "%s Running xapi pre shutdown hooks for %s" __FUNCTION__ host_uuid ; execute_host_hook ~__context ~script_name:scriptname__xapi_pre_shutdown - ~reason ~host + ~reason ~host_uuid (* Called when host died -- !! hook code in here to abort outstanding forwarded ops *) let internal_host_dead_hook __context host = @@ -159,8 +157,9 @@ let internal_host_dead_hook __context host = let host_post_declare_dead ~__context ~host ~reason = (* Cancel outstanding tasks first-- should release necessary locks *) internal_host_dead_hook __context host ; + let host_uuid = Db.Host.get_uuid ~__context ~self:host in execute_host_hook ~__context ~script_name:scriptname__host_post_declare_dead - ~reason ~host + ~reason ~host_uuid let pool_ha_overcommitted_hook ~__context = execute_pool_hook ~__context ~script_name:scriptname__pool_ha_overcommitted diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 1bf3e4d9b6a..8b904661bf8 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -793,9 +793,9 @@ let restart_agent ~__context ~host:_ = ) let shutdown_agent ~__context = - debug "Host.restart_agent: Host agent will shutdown in 1s!!!!" ; - let localhost = Helpers.get_localhost ~__context in - Xapi_hooks.xapi_pre_shutdown ~__context ~host:localhost + debug "Host.shutdown_agent: Host agent will shutdown in 1s!!!!" ; + let host_uuid = Helpers.get_localhost_uuid () in + Xapi_hooks.xapi_pre_shutdown ~__context ~host_uuid ~reason:Xapi_hooks.reason__clean_shutdown ; Xapi_fuse.light_fuse_and_dont_restart ~fuse_length:1. () From 422600239a01e38f55b352e6aa72db7aa3c8b8de Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 2 Sep 2025 09:23:10 +0100 Subject: [PATCH 465/492] rrd: Fix absolute rate calculations Absolute metric should work as follows: Timestamp = 300, 600, 900, 1200 Step = 300 seconds ABSOLUTE DS = 1, 2, 3, 4 But they do not seem to have ever worked correctly - they were previously divided by the interval but incorrectly handled NaNs, resulting in wrong behaviour. Then the refactoring (including 73ca3cc ("CA-404597: rrd - Pass Gauge and Absolute data source values as-is")) broke them further. Divide absolute metrics by the interval in process_ds_value. Fix the unit test to expect the right behaviour - it passes with this fix. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 10 +++--- ocaml/libs/xapi-rrd/lib_test/unit_tests.ml | 42 +++++----------------- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index bb516ea6a28..4f4c4d3cec1 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -342,9 +342,9 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = (* We assume that the data being given is of the form of a rate; that is, it's dependent on the time interval between updates. - Gauge and Absolute data sources are simply kept as is without any - time-based calculations, while Derive data sources will be changed according - to the time passed since the last measurement. (see CA-404597) *) + Gauge data sources are simply kept as is without any time-based + calculations, while Absolute and Derive data sources will be changed + according to the time passed since the last measurement. (see CA-404597) *) let process_ds_value ds value interval new_rrd = if interval > ds.ds_mrhb then nan @@ -361,8 +361,10 @@ let process_ds_value ds value interval new_rrd = let rate = match (ds.ds_ty, new_rrd) with - | Absolute, _ | Derive, true | Gauge, _ -> + | Derive, true | Gauge, _ -> value_raw + | Absolute, _ -> + value_raw /. interval | Derive, false -> ( match (ds.ds_last, value) with | VT_Int64 x, VT_Int64 y -> diff --git a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml index f016605848c..5f84e76f194 100644 --- a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml @@ -147,46 +147,22 @@ let absolute_rrd = let absolute_rrd_CA_404597 () = let rra = rra_create CF_Average 100 1 0.5 in - let rra2 = rra_create CF_Average 100 10 0.5 in - let rra3 = rra_create CF_Average 100 100 0.5 in - let rra4 = rra_create CF_Average 100 1000 0.5 in - let ts = 1000000000.0 in + let ts = 0.0 in let ds = - ds_create "foo" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) + ds_create "foo" Absolute ~mrhb:1000.0 ~min:0. ~max:infinity (VT_Float 0.0) in - let ds2 = - ds_create "bar" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) - in - let ds3 = - ds_create "baz" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) - in - let ds4 = - ds_create "boo" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) - in - let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in + let rrd = rrd_create [|ds|] [|rra|] 1L ts in let id = Identity in for i = 1 to 100000 do - let t = 1000000.0 +. (0.7 *. float_of_int i) in + let t = 300. *. float_of_int i in let ((_, val1) as v1) = - (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + (0, {value= VT_Float (300. *. float_of_int i); transform= id}) in - let ((_, val2) as v2) = - (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) - in - let ((_, val3) as v3) = - (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) - in - let ((_, val4) as v4) = - (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) - in - ds_update rrd t [|v1; v2; v3; v4|] false ; + ds_update rrd t [|v1|] false ; - Array.iter2 - (fun ds value -> - compare_float __LOC__ ds.ds_value - (float_of_string (ds_value_to_string value.value)) - ) - rrd.rrd_dss [|val1; val2; val3; val4|] + compare_float __LOC__ + (float_of_string (ds_value_to_string val1.value) /. 300.) + rrd.rrd_dss.(0).ds_value done (** Verify that Gauge data soruce values are correctly handled by the RRD lib From 31b846153ffa34499e2a7dc1c40757b2be86bebc Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Fri, 22 Aug 2025 15:27:32 +0800 Subject: [PATCH 466/492] CP-309523: Make networkd_db utility return bridge MAC address This is a simplified version of commit 846ce82ba3. The feature introduced that commit is to remove the network device renaming functionality. When the feature is merged, there will be 3 states of releases: 1. without the feature. 2. with the feature but renaming is still working. 3. with the feature and renaming has been removed. Originally, the upgrade path was intended to support transitions from state 2 to state 3 only. However, it has become necessary to also support upgrades directly from state 1 to state 3. This commit is to enable the release in state 1 to upgrade to state 3. The change is kept extremely small so it can be merged independently without waiting for the full feature to be merged. In details, during the upgrade, the host-installer can't know the management interface as the eth will not present in the environment the host-installer running. So it needs the MAC address to find out the management interface for setting up network. Signed-off-by: Ming Lu --- ocaml/networkd/bin_db/networkd_db.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocaml/networkd/bin_db/networkd_db.ml b/ocaml/networkd/bin_db/networkd_db.ml index f62021828fa..b6194718d37 100644 --- a/ocaml/networkd/bin_db/networkd_db.ml +++ b/ocaml/networkd/bin_db/networkd_db.ml @@ -38,6 +38,8 @@ let _ = List.concat_map (fun (_, port) -> port.interfaces) bridge_config.ports in Printf.printf "interfaces=%s\n" (String.concat "," ifaces) ; + Printf.printf "hwaddrs=%s\n" + (Option.value ~default:"" bridge_config.bridge_mac) ; match bridge_config.vlan with | None -> () From afe8e1b9f3aad2d654a2eebb92ad87f1db0c8640 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 3 Sep 2025 13:38:42 +0100 Subject: [PATCH 467/492] xapi_vm_migrate: Fix reservations not being cleared on halted VMs Cross-pool migrations set scheduled_to_be_resident_on to the destination host, reserving memory and vGPUs. If the VM is still halted when migration is finished, the field is not cleared on the destination, preserving the reservations even though they're not necessarily going to be used anytime soon. Call force_state_reset_keep_current_operations in pool_migrate_complete on the destination to clear the reservations among other things at the end of the migration. This fixes an issue when VMs migrated across pools in a halted state would take up memory in xapi's view (but not in RRD's view), which is not intuitive and could prevent further migrations from claiming enough free memory on the host. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vm_migrate.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index e5eca21283d..07406014afb 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -489,6 +489,11 @@ let pool_migrate_complete ~__context ~vm ~host:_ = ~value:`restart_device_model ; let dbg = Context.string_of_task __context in let queue_name = Xapi_xenops_queue.queue_of_vm ~__context ~self:vm in + (* Reset the state, which will update allowed operations, clear reservations + for halted VMs, disconnect devices *) + let power_state = Db.VM.get_power_state ~__context ~self:vm in + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:power_state ; if Xapi_xenops.vm_exists_in_xenopsd queue_name dbg id then ( remove_stale_pcis ~__context ~vm ; Xapi_xenops.set_resident_on ~__context ~self:vm ; From b657c62d0506d02fe00860b2b519f943bb9686cb Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Wed, 27 Aug 2025 14:59:45 +0800 Subject: [PATCH 468/492] CP-308863: Count vGPU migrations This metric represents the rate of VM migrations with vGPUs per second. The total count can be calculated as AVERAGE * seconds. For example, for one-day data granularity, the total count for one day is AVERAGE * 86400, in which 86400 is the seconds of one day. Signed-off-by: Ming Lu --- ocaml/xapi/message_forwarding.ml | 4 ++++ ocaml/xapi/xapi_stats.ml | 22 +++++++++++++++++++++- ocaml/xapi/xapi_stats.mli | 3 +++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index f25f5bd9431..4a027a2da5e 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -2501,6 +2501,8 @@ functor let snapshot = Db.VM.get_record ~__context ~self:vm in reserve_memory_for_vm ~__context ~vm ~host ~snapshot ~host_op:`vm_migrate (fun () -> + if Db.VM.get_VGPUs ~__context ~self:vm <> [] then + Xapi_stats.incr_pool_vgpu_migration_count () ; forward_vm_op ~local_fn ~__context ~vm ~remote_fn ) ) ; @@ -2622,6 +2624,8 @@ functor assert_can_migrate ~__context ~vm ~dest ~live ~vdi_map ~vif_map ~vgpu_map ~options ) ; + if vgpu_map <> [] then + Xapi_stats.incr_pool_vgpu_migration_count () ; forward_migrate_send () ) in diff --git a/ocaml/xapi/xapi_stats.ml b/ocaml/xapi/xapi_stats.ml index 2c94ca64974..f39b5ae88fa 100644 --- a/ocaml/xapi/xapi_stats.ml +++ b/ocaml/xapi/xapi_stats.ml @@ -16,6 +16,10 @@ module D = Debug.Make (struct let name = "xapi_stats" end) let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute +let pool_vgpu_migration_count : int Atomic.t = Atomic.make 0 + +let incr_pool_vgpu_migration_count () = Atomic.incr pool_vgpu_migration_count + let generate_master_stats ~__context = let session_count = Db.Session.get_all ~__context |> List.length |> Int64.of_int @@ -44,7 +48,23 @@ let generate_master_stats ~__context = ~min:0.0 ~units:"sessions/s" () ) in - [session_count_ds; task_count_ds; session_count_change_ds] + let vgpu_migration_count = + Atomic.exchange pool_vgpu_migration_count 0 |> Int64.of_int + in + let vgpu_migration_count_ds = + ( Rrd.Host + , Ds.ds_make ~name:"pool_vgpu_migration_rate" + ~description:"Number of vGPU migrations occurred per second" + ~value:(Rrd.VT_Int64 vgpu_migration_count) ~ty:Rrd.Absolute + ~default:true ~min:0. ~units:"migrations/s" () + ) + in + [ + session_count_ds + ; task_count_ds + ; session_count_change_ds + ; vgpu_migration_count_ds + ] let gc_debug = ref true diff --git a/ocaml/xapi/xapi_stats.mli b/ocaml/xapi/xapi_stats.mli index 5282dca6db7..4e1b20750d9 100644 --- a/ocaml/xapi/xapi_stats.mli +++ b/ocaml/xapi/xapi_stats.mli @@ -18,3 +18,6 @@ val start : unit -> unit val stop : unit -> unit (** Stop the stats reporting thread. *) + +val incr_pool_vgpu_migration_count : unit -> unit +(** Increments the pool_vgpu_migration_count by 1 . *) From e343dfe6a502c5a1e45f7051f3b96eb0c6312f5d Mon Sep 17 00:00:00 2001 From: Chunjie Zhu Date: Fri, 5 Sep 2025 14:31:07 +0800 Subject: [PATCH 469/492] CA-416516: vm.slice/cgroup.procs write operation gets EBUSY see cgroups v2 "no internal processes" rule if cgroup.subtree_control is not empty, and we attach a pid to cgroup.procs, kernel would return EBUSY Signed-off-by: Chunjie Zhu --- ocaml/xenopsd/scripts/qemu-wrapper | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index c8acefbd3f6..b1d811e7126 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -102,7 +102,15 @@ def prepare_exec(): g = open("/sys/fs/cgroup/cpu/%s/cgroup.procs" % cgroup_slice, 'w') except FileNotFoundError: # cgroup-v2 path: - g = open("/sys/fs/cgroup/%s/cgroup.procs" % cgroup_slice, 'w') + # Note cgroups v2 "no internal processes" rule + # if cgroup.subtree_control is not empty, and we attach a pid + # into cgroup.procs, kernel would return EBUSY + cgroup_slice_dir = os.path.join("/sys/fs/cgroup", cgroup_slice) + qemu_dm_dir = os.path.join(cgroup_slice_dir, "qemu-dm") + if not os.path.exists(qemu_dm_dir): + os.mkdir(qemu_dm_dir) + procs_file = os.path.join(qemu_dm_dir, "cgroup.procs") + g = open(procs_file, 'w') g.write(str(os.getpid())) g.close() except IOError as e: From ec8f3d65e93f05a3544183753f32bd0f5c0ff1b2 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 5 Sep 2025 15:41:21 +0000 Subject: [PATCH 470/492] CA-367765: remove reference to obsolete default URL Signed-off-by: Rob Hoes --- ocaml/xapi/xapi_host.ml | 2 ++ ocaml/xapi/xapi_host_crashdump.ml | 3 ++- ocaml/xapi/xapi_support.ml | 8 -------- scripts/host-bugreport-upload | 4 +--- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 1bf3e4d9b6a..0df031ca019 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -119,6 +119,8 @@ let pool_size_is_restricted ~__context = not (Pool_features.is_enabled ~__context Features.Pool_size) let bugreport_upload ~__context ~host:_ ~url ~options = + if url = "" then + raise Api_errors.(Server_error (invalid_value, ["url"; ""])) ; let proxy = if List.mem_assoc "http_proxy" options then List.assoc "http_proxy" options diff --git a/ocaml/xapi/xapi_host_crashdump.ml b/ocaml/xapi/xapi_host_crashdump.ml index 645e1e6fc33..02e8303d777 100644 --- a/ocaml/xapi/xapi_host_crashdump.ml +++ b/ocaml/xapi/xapi_host_crashdump.ml @@ -149,5 +149,6 @@ let destroy ~__context ~self = let upload ~__context ~self ~url ~options = let filename = Db.Host_crashdump.get_filename ~__context ~self in - let url = if url = "" then upload_url filename else url in + if url = "" then + raise Api_errors.(Server_error (invalid_value, ["url"; ""])) ; do_upload "host-crash-upload" (crash_dir ^ "/" ^ filename) url options diff --git a/ocaml/xapi/xapi_support.ml b/ocaml/xapi/xapi_support.ml index 5e65d586776..13666dc09e0 100644 --- a/ocaml/xapi/xapi_support.ml +++ b/ocaml/xapi/xapi_support.ml @@ -14,14 +14,6 @@ module D = Debug.Make (struct let name = "xapi_support" end) open D - -let support_url = "ftp://support.xensource.com/uploads/" - -(* URL to which the crashdump/whatever will be uploaded *) -let upload_url name = - let uuid = Xapi_inventory.lookup Xapi_inventory._installation_uuid in - Printf.sprintf "%s%s-%s" support_url uuid name - open Forkhelpers let do_upload label file url options = diff --git a/scripts/host-bugreport-upload b/scripts/host-bugreport-upload index 766b6964f2e..545b7d561d3 100755 --- a/scripts/host-bugreport-upload +++ b/scripts/host-bugreport-upload @@ -4,8 +4,6 @@ # # Upload a bugreport to the support website -DEFAULT_BASE_URL="ftp://support.xensource.com/uploads/" - # If the user supplies a bare filename without a URI scheme, # we ignore it -- if they _really_ want to upload named files # to our support server, they can specify the URI scheme. @@ -19,7 +17,7 @@ if [ -z "$FILENAME" ]; then . @INVENTORY@ FILENAME=${INSTALLATION_UUID}-${now} fi -[ ! -z "${BASE_URL}" ] || BASE_URL="${DEFAULT_BASE_URL}" +[ -n "${BASE_URL}" ] || exit 1 URL="${BASE_URL}${FILENAME}" From 3fecb75ce52c90b678e01ca43c0bd8d7d9575dd5 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 5 Sep 2025 15:41:52 +0000 Subject: [PATCH 471/492] Remove obsolete test script Signed-off-by: Rob Hoes --- scripts/runtests | 57 ------------------------------------------------ 1 file changed, 57 deletions(-) delete mode 100755 scripts/runtests diff --git a/scripts/runtests b/scripts/runtests deleted file mode 100755 index cfdc95cf867..00000000000 --- a/scripts/runtests +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# It appears that this file is NOT installed by OMakefile. If installing it in -# the future, make sure that variable BASE_PATH is set correctly. - -if [ -z $1 ] -then - echo "Need to specify the current network interface (e.g. eth0) as a parameter - to this script" - exit 1 -fi - -IF=$1 - -export PATH=$PATH:@OPTDIR@/bin - -# generic stuff, necessary for xenrt too - -#install necessary packages -yum -y install nc -yum -y install rsync -yum -y install wget - -# mount the iso directory -mount bespin:/scratch/images/autoinstall /var/opt/xen/iso_import - -# make a lv for import/export tests, mount it on /mnt -VG=`vgs --noheadings -o size,name,size --separator=, | cut -d, -f2` -lvcreate -n importexport -L 10G $VG -mke2fs /dev/$VG/importexport -mount /dev/$VG/importexport /mnt - -#tmp dir for logging output -mkdir -p /tmp/rt -cd /tmp/rt -wget http://snoosnoo.uk.xensource.com/~jludlam/test.css -wget http://snoosnoo.uk.xensource.com/~jludlam/test_log.js - -# post this line is non-xenrt only. xenrt should provide the vms -# rather than having to import them. - -# mount the volume with the images on -mkdir -p /tmp/vms -mount bespin:/scratch2/jludlam /tmp/vms - -# import them - -cd /tmp/vms/ -./debian.sh $IF -./debian-pv.sh $IF -./windowsxp.sh $IF - -cp /tmp/vms/vncsnapshot /usr/bin/ -cd /tmp/rt - -test_host -a -v debian,debian-pv,windowsxp -i $IF - From cf5be62226e0af827d8b3713d77d14c8fe3de7b7 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 8 Sep 2025 08:54:13 +0100 Subject: [PATCH 472/492] host.disable: Add auto_enabled parameter for persistency Currently a manually disabled host will be re-enabled on toolstack restarts and host reboots, which will provoke VM migrations in an HA cluster. If maintenance requires many restarts, that could be painful. To allow for keeping a host persistently disabled across toolstack restarts and host reboots, add a new localdb flag "host_auto_enable" (set through the parameter on Host.disable). This coexists with the internal flag of host_disabled_until_reboot, which is only set on host poweroff internally and cannot be controlled by the user directly. With host_auto_enable set to false, xapi will not re-enable a host on its own no matter what: toolstack restarts, host reboots, calls to consider_enabling_host (triggered by PBD plugs etc.) will have no effect. Only a manual call to Host.enable will re-enable the host. Expose the new parameter in the CLI. Also fix up the comment in xapi_host_helpers.mli. Signed-off-by: Andrii Sultanov --- ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_errors.ml | 5 +++ ocaml/idl/datamodel_host.ml | 27 ++++++++++++- ocaml/xapi-cli-server/cli_frontend.ml | 10 +++-- ocaml/xapi-cli-server/cli_operations.ml | 7 +++- ocaml/xapi-consts/api_errors.ml | 2 + ocaml/xapi-consts/constants.ml | 5 +++ ocaml/xapi/message_forwarding.ml | 10 +++-- ocaml/xapi/xapi_host.ml | 21 ++++++++--- ocaml/xapi/xapi_host.mli | 3 +- ocaml/xapi/xapi_host_helpers.ml | 50 ++++++++++++++++++------- ocaml/xapi/xapi_host_helpers.mli | 3 +- 12 files changed, 114 insertions(+), 31 deletions(-) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 819b7c61141..4786d2d329e 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 789 +let schema_minor_vsn = 790 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 33d8f32e153..62cf8d8452a 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -648,6 +648,11 @@ let _ = "The specified server is disabled and cannot be re-enabled until after \ it has rebooted." () ; + error Api_errors.host_disabled_indefinitely ["host"] + ~doc: + "The specified server is disabled and can only be re-enabled manually \ + with Host.enable." + () ; error Api_errors.no_hosts_available [] ~doc:"There were no servers available to complete the specified operation." () ; diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 27b1bf60410..ed2aef4bfa8 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -625,12 +625,37 @@ let disable = , "Puts the host into a state in which no new VMs can be started. \ Currently active VMs on the host continue to execute." ) + ; ( Changed + , "25.31.0" + , "Added auto_enable option to allow persisting the state across \ + toolstack restarts and host reboots." + ) ] ~name:"disable" ~doc: "Puts the host into a state in which no new VMs can be started. \ Currently active VMs on the host continue to execute." - ~params:[(Ref _host, "host", "The Host to disable")] + ~versioned_params: + [ + { + param_type= Ref _host + ; param_name= "host" + ; param_doc= "The Host to disable" + ; param_release= rio_release + ; param_default= None + } + ; { + param_type= Bool + ; param_name= "auto_enable" + ; param_doc= + "If true (default), the host will be re-enabled after a toolstack \ + restart automatically. If false, the host will be disabled \ + indefinitely, across toolstack restarts and host reboots, until \ + re-enabled explicitly with Host.enable." + ; param_release= numbered_release "25.31.0" + ; param_default= Some (VBool true) + } + ] ~allowed_roles:(_R_POOL_OP ++ _R_CLIENT_CERT) () diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index d7b1984888a..8f8117fde0d 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -579,8 +579,10 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "host-disable" , { reqd= [] - ; optn= [] - ; help= "Disable the XE host." + ; optn= ["auto-enable"] + ; help= + "Disable the XE host. Setting auto-enable=false will keep the host \ + persistently disabled until manually re-enabled with Host.enable." ; implementation= No_fd Cli_operations.host_disable ; flags= [Host_selectors] } @@ -4049,7 +4051,7 @@ let rio_help printer minimal cmd = in let help = Printf.sprintf - {|Usage: + {|Usage: %s [ -s ] XenServer host [ -p ] XenServer port number @@ -4061,7 +4063,7 @@ let rio_help printer minimal cmd = [ --traceparent ] Distributed tracing context [ ... ] Command-specific options -To get help on a specific command: +To get help on a specific command: %s help |} diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index d7993d4d577..ad17ca837dd 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -2240,6 +2240,9 @@ let print_assert_exception e = "VM requires access to SR: " ^ Cli_util.ref_convert (get_arg 2 params) | Api_errors.Server_error (code, _) when code = Api_errors.host_disabled -> "Host disabled (use 'xe host-enable' to re-enable)" + | Api_errors.Server_error (code, _) + when code = Api_errors.host_disabled_indefinitely -> + "Host disabled indefinitely (use 'xe host-enable' to re-enable)" | Api_errors.Server_error (code, _) when code = Api_errors.host_not_live -> "Host down" | Api_errors.Server_error (code, _) @@ -6565,12 +6568,14 @@ let bond_set_mode _printer rpc session_id params = Client.Bond.set_mode ~rpc ~session_id ~self:bond ~value:mode let host_disable _printer rpc session_id params = + let auto_enable = get_bool_param ~default:true params "auto-enable" in ignore (do_host_op rpc session_id (fun _ host -> Client.Host.disable ~rpc ~session_id ~host:(host.getref ()) + ~auto_enable ) - params [] + params ["auto-enable"] ) let host_sync_data _printer rpc session_id params = diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 28418e2e9ed..2a1b9b58b72 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -113,6 +113,8 @@ let host_disabled = add_error "HOST_DISABLED" let host_disabled_until_reboot = add_error "HOST_DISABLED_UNTIL_REBOOT" +let host_disabled_indefinitely = add_error "HOST_DISABLED_INDEFINITELY" + let host_not_disabled = add_error "HOST_NOT_DISABLED" let host_not_live = add_error "HOST_NOT_LIVE" diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 353c4606f7c..524d7ab07a5 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -233,6 +233,11 @@ let master_scripts = "master_scripts" This will prevent anyone from re-enabling the host and starting VMs on it during shutdown. *) let host_disabled_until_reboot = "host_disabled_until_reboot" +(* This flag is set to false when the host is forcibly disabled in a + persistent way - it will not be re-enabled on startup (even after reboots) + until manually directed by the user *) +let host_auto_enable = "host_auto_enable" + (* Set when shutting down and rebooting. If we come up and finds no new crashdump and HA is enabled, we assume the host was fenced. *) let host_restarted_cleanly = "host_restarted_cleanly" diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index f25f5bd9431..a6697fa621e 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -3345,13 +3345,15 @@ functor (host_uuid ~__context host) ; Local.Host.get_management_interface ~__context ~host - let disable ~__context ~host = - info "Host.disable: host = '%s'" (host_uuid ~__context host) ; + let disable ~__context ~host ~auto_enable = + info "Host.disable: host = '%s', auto_enable = '%b'" + (host_uuid ~__context host) + auto_enable ; (* Block call if this would break our VM restart plan *) Xapi_ha_vm_failover.assert_host_disable_preserves_ha_plan ~__context host ; - let local_fn = Local.Host.disable ~host in - let remote_fn = Client.Host.disable ~host in + let local_fn = Local.Host.disable ~host ~auto_enable in + let remote_fn = Client.Host.disable ~host ~auto_enable in do_op_on ~local_fn ~__context ~host ~remote_fn ; Xapi_host_helpers.update_allowed_operations ~__context ~self:host diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 1bf3e4d9b6a..7193ae64f69 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -74,7 +74,7 @@ let set_power_on_mode ~__context ~self ~power_on_mode ~power_on_config = + HA is enabled and this host has broken storage or networking which would cause protected VMs to become non-agile *) -let assert_safe_to_reenable ~__context ~self = +let assert_safe_to_reenable ~__context ~self ~user_request = assert_startup_complete () ; Repository_helpers.assert_no_host_pending_mandatory_guidance ~__context ~host:self ; @@ -87,6 +87,14 @@ let assert_safe_to_reenable ~__context ~self = (Api_errors.Server_error (Api_errors.host_disabled_until_reboot, [Ref.string_of self]) ) ; + let host_auto_enable = + try bool_of_string (Localdb.get Constants.host_auto_enable) with _ -> true + in + if (not host_auto_enable) && not user_request then + raise + (Api_errors.Server_error + (Api_errors.host_disabled_indefinitely, [Ref.string_of self]) + ) ; if Db.Pool.get_ha_enabled ~__context ~self:(Helpers.get_pool ~__context) then ( let pbds = Db.Host.get_PBDs ~__context ~self in let unplugged_pbds = @@ -799,20 +807,23 @@ let shutdown_agent ~__context = ~reason:Xapi_hooks.reason__clean_shutdown ; Xapi_fuse.light_fuse_and_dont_restart ~fuse_length:1. () -let disable ~__context ~host = +let disable ~__context ~host ~auto_enable = if Db.Host.get_enabled ~__context ~self:host then ( info "Host.enabled: setting host %s (%s) to disabled because of user request" (Ref.string_of host) (Db.Host.get_hostname ~__context ~self:host) ; Db.Host.set_enabled ~__context ~self:host ~value:false ; - Xapi_host_helpers.user_requested_host_disable := true + Xapi_host_helpers.user_requested_host_disable := true ; + if not auto_enable then + Localdb.put Constants.host_auto_enable "false" ) let enable ~__context ~host = if not (Db.Host.get_enabled ~__context ~self:host) then ( - assert_safe_to_reenable ~__context ~self:host ; + assert_safe_to_reenable ~__context ~self:host ~user_request:true ; Xapi_host_helpers.user_requested_host_disable := false ; + Localdb.put Constants.host_auto_enable "true" ; info "Host.enabled: setting host %s (%s) to enabled because of user request" (Ref.string_of host) (Db.Host.get_hostname ~__context ~self:host) ; @@ -3087,7 +3098,7 @@ let apply_updates ~__context ~self ~hash = if Db.Pool.get_ha_enabled ~__context ~self:pool then raise Api_errors.(Server_error (ha_is_enabled, [])) ; if Db.Host.get_enabled ~__context ~self then ( - disable ~__context ~host:self ; + disable ~__context ~host:self ~auto_enable:true ; Xapi_host_helpers.update_allowed_operations ~__context ~self ) ; Xapi_host_helpers.with_host_operation ~__context ~self diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 481b4699d57..19d25361528 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -79,7 +79,8 @@ val restart_agent : __context:'a -> host:'b -> unit val shutdown_agent : __context:Context.t -> unit -val disable : __context:Context.t -> host:[`host] Ref.t -> unit +val disable : + __context:Context.t -> host:[`host] Ref.t -> auto_enable:bool -> unit val enable : __context:Context.t -> host:[`host] Ref.t -> unit diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index 3523ceaefcf..da837f9329a 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -422,17 +422,33 @@ let consider_enabling_host_nolock ~__context = else f () in + let host_auto_enable = + try bool_of_string (Localdb.get Constants.host_auto_enable) + with _ -> true + in if !Xapi_globs.on_system_boot then ( debug "Host.enabled: system has just restarted" ; if_no_pending_guidances (fun () -> debug "Host.enabled: system has just restarted and no pending mandatory \ - guidances: setting localhost to enabled" ; - Db.Host.set_enabled ~__context ~self:localhost ~value:true ; - update_allowed_operations ~__context ~self:localhost ; + guidances: clearing host_disabled_until_reboot" ; Localdb.put Constants.host_disabled_until_reboot "false" ; - (* Start processing pending VM powercycle events *) - Local_work_queue.start_vm_lifecycle_queue () + + (* If the host was persistently disabled, honour it *) + if host_auto_enable then ( + debug + "Host.enabled: system has just restarted, no pending mandatory \ + guidances and host_auto_enable=true: setting localhost to \ + enabled" ; + Db.Host.set_enabled ~__context ~self:localhost ~value:true ; + update_allowed_operations ~__context ~self:localhost ; + (* Start processing pending VM powercycle events *) + Local_work_queue.start_vm_lifecycle_queue () + ) else + debug + "Host.enabled: system has just restarted, no pending mandatory \ + guidances, but host_auto_enable=false: Leaving host disabled \ + until manually re-enabled by the user" ) ) else if try bool_of_string (Localdb.get Constants.host_disabled_until_reboot) @@ -446,14 +462,22 @@ let consider_enabling_host_nolock ~__context = "Host.enabled: system not just rebooted && host_disabled_until_reboot \ not set" ; if_no_pending_guidances (fun () -> - debug - "Host.enabled: system not just rebooted && \ - host_disabled_until_reboot not set and no pending mandatory \ - guidances: setting localhost to enabled" ; - Db.Host.set_enabled ~__context ~self:localhost ~value:true ; - update_allowed_operations ~__context ~self:localhost ; - (* Start processing pending VM powercycle events *) - Local_work_queue.start_vm_lifecycle_queue () + if host_auto_enable then ( + debug + "Host.enabled: system not just rebooted && \ + host_disabled_until_reboot not set and no pending mandatory \ + guidances and host_auto_enable=true: setting localhost to \ + enabled" ; + Db.Host.set_enabled ~__context ~self:localhost ~value:true ; + update_allowed_operations ~__context ~self:localhost ; + (* Start processing pending VM powercycle events *) + Local_work_queue.start_vm_lifecycle_queue () + ) else + debug + "Host.enabled: system not just rebooted && \ + host_disabled_until_reboot not set and no pending mandatory \ + guidances but host_auto_enable=false: Leaving host disabled \ + until manually re-enabled by the user" ) ) ; (* If Host has been enabled and HA is also enabled then tell the master to recompute its plan *) diff --git a/ocaml/xapi/xapi_host_helpers.mli b/ocaml/xapi/xapi_host_helpers.mli index 519aa34a560..84cc271c652 100644 --- a/ocaml/xapi/xapi_host_helpers.mli +++ b/ocaml/xapi/xapi_host_helpers.mli @@ -79,7 +79,8 @@ val consider_enabling_host : __context:Context.t -> unit {ul {- the user asked the host to be disabled and there was a problem} {- HA is enabled and one-or-more PBDs failed to plug} - {- `disabled_until_next_reboot` is set in the local DB}} + {- `host_disabled_until_reboot` is set in the local DB and the system + hasn't just booted up}} *) val consider_enabling_host_request : __context:Context.t -> unit From fb83a6b325a76020fb201505cf8e78de4b7ae9a1 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 9 Sep 2025 13:22:16 +0100 Subject: [PATCH 473/492] Simplify UTF-8 decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Use the decoder from the OCaml standard library instead of our own implementation, which this patch removes. * Validate UTF-8/XML conformance for maps and sets, in addition to strings. This is XSA-474 / CVE-2025-58146. Signed-off-by: Christian Lindig Reviewed-by: Edwin Török --- ocaml/database/db_cache_impl.ml | 42 +- ocaml/database/string_marshall_helper.ml | 4 +- ocaml/idl/ocaml_backend/gen_server.ml | 2 +- .../bench/bench_encodings.ml | 4 +- .../lib/xapi-stdext-encodings/dune | 8 +- .../lib/xapi-stdext-encodings/encodings.ml | 167 ------ .../lib/xapi-stdext-encodings/encodings.mli | 84 --- .../lib/xapi-stdext-encodings/test.ml | 533 ------------------ .../lib/xapi-stdext-encodings/utf8.ml | 74 +++ .../lib/xapi-stdext-encodings/utf8.mli | 31 + ocaml/xapi/xapi_message.ml | 4 +- 11 files changed, 138 insertions(+), 815 deletions(-) delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index e9745749ada..050d43f0504 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -67,9 +67,7 @@ let read_field t tblname fldname objref = occurs. *) let ensure_utf8_xml string = let length = String.length string in - let prefix = - Xapi_stdext_encodings.Encodings.UTF8_XML.longest_valid_prefix string - in + let prefix = Xapi_stdext_encodings.Utf8.XML.longest_valid_prefix string in if length > String.length prefix then warn "string truncated to: '%s'." prefix ; prefix @@ -86,20 +84,32 @@ let write_field_locked t tblname objref fldname newval = (get_database t) ) +(** Ensure a value is conforming to UTF-8 with XML restrictions *) +let is_valid v = + let valid = Xapi_stdext_encodings.Utf8.XML.is_valid in + let valid_pair (x, y) = valid x && valid y in + match v with + | Schema.Value.String s -> + valid s + | Schema.Value.Set ss -> + List.for_all valid ss + | Schema.Value.Pairs pairs -> + List.for_all valid_pair pairs + +let share_string = function + | Schema.Value.String s -> + Schema.Value.String (Share.merge s) + | v -> + (* we assume strings in the tree have been shared already *) + v + let write_field t tblname objref fldname newval = - let newval = - match newval with - | Schema.Value.String s -> - (* the other caller of write_field_locked only uses sets and maps, - so we only need to check for String here - *) - if not (Xapi_stdext_encodings.Encodings.UTF8_XML.is_valid s) then - raise Invalid_value ; - Schema.Value.String (Share.merge s) - | _ -> - newval - in - with_lock (fun () -> write_field_locked t tblname objref fldname newval) + if not @@ is_valid newval then + raise Invalid_value + else + with_lock (fun () -> + write_field_locked t tblname objref fldname (share_string newval) + ) let touch_row t tblname objref = update_database t (touch tblname objref) ; diff --git a/ocaml/database/string_marshall_helper.ml b/ocaml/database/string_marshall_helper.ml index ba003bee96a..1add3aef7b0 100644 --- a/ocaml/database/string_marshall_helper.ml +++ b/ocaml/database/string_marshall_helper.ml @@ -22,9 +22,7 @@ module D = Debug.Make (struct let name = __MODULE__ end) let ensure_utf8_xml string = let length = String.length string in - let prefix = - Xapi_stdext_encodings.Encodings.UTF8_XML.longest_valid_prefix string - in + let prefix = Xapi_stdext_encodings.Utf8.XML.longest_valid_prefix string in if length > String.length prefix then D.warn "Whilst doing 'set' of structured field, string truncated to: '%s'." prefix ; diff --git a/ocaml/idl/ocaml_backend/gen_server.ml b/ocaml/idl/ocaml_backend/gen_server.ml index 19145021264..f95f5f6d962 100644 --- a/ocaml/idl/ocaml_backend/gen_server.ml +++ b/ocaml/idl/ocaml_backend/gen_server.ml @@ -457,7 +457,7 @@ let gen_module api : O.Module.t = ([ "let __call, __params = call.Rpc.name, call.Rpc.params in" ; "List.iter (fun p -> let s = Rpc.to_string p in if not \ - (Xapi_stdext_encodings.Encodings.UTF8_XML.is_valid s) then" + (Xapi_stdext_encodings.Utf8.is_valid s) then" ; "raise (Api_errors.Server_error(Api_errors.invalid_value, \ [\"Invalid UTF-8 string in parameter\"; s]))) __params;" ; "let __label = __call in" diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml index 7308c756d8b..bb20eed4f4e 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml @@ -1,5 +1,5 @@ open Bechamel -open Xapi_stdext_encodings.Encodings +open Xapi_stdext_encodings let test name f = Test.make_indexed_with_resource ~name ~args:[10; 1000; 10000] @@ -10,6 +10,6 @@ let test name f = let benchmarks = Test.make_grouped ~name:"Encodings.validate" - [test "UTF8_XML" UTF8_XML.validate] + [test "UTF8.XML" Utf8.XML.is_valid] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune index 742dd212f1e..839346e35ce 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune @@ -1,12 +1,6 @@ (library (name xapi_stdext_encodings) (public_name xapi-stdext-encodings) - (modules :standard \ test) + (modules :standard) ) -(test - (name test) - (package xapi-stdext-encodings) - (modules test) - (libraries alcotest xapi-stdext-encodings) -) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml deleted file mode 100644 index 2dfd45a7d18..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml +++ /dev/null @@ -1,167 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -exception UCS_value_out_of_range - -exception UCS_value_prohibited_in_UTF8 - -exception UCS_value_prohibited_in_XML - -exception UTF8_character_incomplete - -exception UTF8_header_byte_invalid - -exception UTF8_continuation_byte_invalid - -exception UTF8_encoding_not_canonical - -exception String_incomplete - -(* === Unicode Functions === *) - -module UCS = struct - let is_non_character value = - false - || (0xfdd0 <= value && value <= 0xfdef) (* case 1 *) - || Int.logand 0xfffe value = 0xfffe - (* case 2 *) - [@@inline] -end - -module XML = struct - let is_illegal_control_character value = - let value = Uchar.to_int value in - value < 0x20 && value <> 0x09 && value <> 0x0a && value <> 0x0d - [@@inline] -end - -(* === UCS Validators === *) - -module type UCS_VALIDATOR = sig - val validate : Uchar.t -> unit -end - -module UTF8_UCS_validator = struct - let validate value = - if (UCS.is_non_character [@inlined]) (Uchar.to_int value) then - raise UCS_value_prohibited_in_UTF8 - [@@inline] -end - -module XML_UTF8_UCS_validator = struct - let validate value = - (UTF8_UCS_validator.validate [@inlined]) value ; - if (XML.is_illegal_control_character [@inlined]) value then - raise UCS_value_prohibited_in_XML -end - -(* === String Validators === *) - -module type STRING_VALIDATOR = sig - val is_valid : string -> bool - - val validate : string -> unit - - val longest_valid_prefix : string -> string -end - -exception Validation_error of int * exn - -module UTF8_XML : STRING_VALIDATOR = struct - let decode_continuation_byte byte = - if byte land 0b11000000 = 0b10000000 then - byte land 0b00111111 - else - raise UTF8_continuation_byte_invalid - - let rec decode_continuation_bytes string last value index = - if index <= last then - let chunk = decode_continuation_byte (Char.code string.[index]) in - let value = (value lsl 6) lor chunk in - decode_continuation_bytes string last value (index + 1) - else - value - - let validate_character_utf8 string byte index = - let value, width = - if byte land 0b10000000 = 0b00000000 then - (byte, 1) - else if byte land 0b11100000 = 0b11000000 then - (byte land 0b0011111, 2) - else if byte land 0b11110000 = 0b11100000 then - (byte land 0b0001111, 3) - else if byte land 0b11111000 = 0b11110000 then - (byte land 0b0000111, 4) - else - raise UTF8_header_byte_invalid - in - let value = - if width = 1 then - value - else - decode_continuation_bytes string (index + width - 1) value (index + 1) - in - XML_UTF8_UCS_validator.validate (Uchar.unsafe_of_int value) ; - width - - let rec validate_aux string length index = - if index = length then - () - else - let width = - try - let byte = string.[index] |> Char.code in - validate_character_utf8 string byte index - with - | Invalid_argument _ -> - raise String_incomplete - | error -> - raise (Validation_error (index, error)) - in - validate_aux string length (index + width) - - let validate string = validate_aux string (String.length string) 0 - - let rec validate_with_fastpath string stop pos = - if pos < stop then - (* the compiler is smart enough to optimize the 'int32' away here, - and not allocate *) - let i32 = String.get_int32_ne string pos |> Int32.to_int in - (* test that for all bytes 0x20 <= byte < 0x80. - If any is <0x20 it would cause a negative value to appear in that byte, - which we can detect if we use 0x80 as a mask. - Byte >= 0x80 can be similarly detected with a mask of 0x80 on each byte. - We don't want to see a 0x80 from either of these, hence we bitwise or the 2 values together. - *) - if i32 lor (i32 - 0x20_20_20_20) land 0x80_80_80_80 = 0 then - validate_with_fastpath string stop (pos + 4) - else (* when the condition doesn't hold fall back to full UTF8 decoder *) - validate_aux string (String.length string) pos - else - validate_aux string (String.length string) pos - - let validate_with_fastpath string = - validate_with_fastpath string (String.length string - 3) 0 - - let validate = - if Sys.word_size = 64 then - validate_with_fastpath - else - validate - - let is_valid string = try validate string ; true with _ -> false - - let longest_valid_prefix string = - try validate string ; string - with Validation_error (index, _) -> String.sub string 0 index -end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli deleted file mode 100644 index 2a139ae3786..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli +++ /dev/null @@ -1,84 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(** Encoding helper modules *) - -(** {2 Exceptions} *) - -exception UCS_value_out_of_range - -exception UCS_value_prohibited_in_UTF8 - -exception UCS_value_prohibited_in_XML - -exception UTF8_character_incomplete - -exception UTF8_header_byte_invalid - -exception UTF8_continuation_byte_invalid - -exception UTF8_encoding_not_canonical - -exception String_incomplete - -(** {2 UCS Validators} *) - -(** Validates UCS character values. *) -module type UCS_VALIDATOR = sig - val validate : Uchar.t -> unit -end - -(** Accepts all values within the UCS character value range except - * those which are invalid for all UTF-8-encoded XML documents. *) -module XML_UTF8_UCS_validator : UCS_VALIDATOR - -module XML : sig - val is_illegal_control_character : Uchar.t -> bool - (** Returns true if and only if the given value corresponds to - * a illegal control character as defined in section 2.2 of - * the XML specification, version 1.0. *) -end - -(** {2 String Validators} *) - -(** Provides functionality for validating and processing - * strings according to a particular character encoding. *) -module type STRING_VALIDATOR = sig - val is_valid : string -> bool - (** Returns true if and only if the given string is validly-encoded. *) - - val validate : string -> unit - (** Raises an encoding error if the given string is not validly-encoded. *) - - val longest_valid_prefix : string -> string - (** Returns the longest validly-encoded prefix of the given string. *) -end - -(** Represents a validation error as a tuple [(i,e)], where: - * [i] = the index of the first non-compliant character; - * [e] = the reason for non-compliance. *) -exception Validation_error of int * exn - -(** Provides functions for validating and processing - * strings according to the UTF-8 character encoding, - * with certain additional restrictions on UCS values - * imposed by the XML specification. - * - * Validly-encoded strings must satisfy both RFC 3629 - * and section 2.2 of the XML specification. - * - * For further information, see: - * http://www.rfc.net/rfc3629.html - * http://www.w3.org/TR/REC-xml/#charsets *) -module UTF8_XML : STRING_VALIDATOR diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml deleted file mode 100644 index 9cc75b297d0..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml +++ /dev/null @@ -1,533 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -module E = Xapi_stdext_encodings.Encodings - -(* Pull in the infix operators from Encodings used in this test *) -let ( --- ), ( +++ ), ( <<< ) = (Int.sub, Int.add, Int.shift_left) - -(* === Mock exceptions ==================================================== *) - -(** Simulates a decoding error. *) -exception Decode_error - -(* === Mock UCS validators ================================================= *) - -(** A validator that always succeeds. *) -module Lenient_UCS_validator : E.UCS_VALIDATOR = struct - let validate _ = () -end - -(* === Mock character validators ============================================= *) - -(** A validator that succeeds for all characters. *) -module Universal_character_validator = struct - let validate _ = () -end - -(** A validator that fails for all characters. *) -module Failing_character_validator = struct - let validate _ = raise Decode_error -end - -(** A validator that succeeds for all characters except the letter 'F'. *) -module Selective_character_validator = struct - let validate uchar = - if Uchar.equal uchar (Uchar.of_char 'F') then raise Decode_error -end - -(* === Test helpers ======================================================== *) - -let assert_true = Alcotest.(check bool) "true" true - -let assert_false = Alcotest.(check bool) "false" false - -let assert_raises_match exception_match fn = - try - fn () ; - Alcotest.fail "assert_raises_match: failure expected" - with failure -> - if not (exception_match failure) then - raise failure - else - () - -(* === Mock codecs ========================================================= *) - -module UCS = struct - (* === Unicode Functions === *) - let min_value = 0x000000 - - let max_value = 0x10ffff - (* used to be 0x1fffff, but this changed and Unicode won't allocate larger than 0x10ffff *) - - let is_non_character value = - false - || (0xfdd0 <= value && value <= 0xfdef) (* case 1 *) - || Int.logand 0xfffe value = 0xfffe - (* case 2 *) - - let is_out_of_range value = value < min_value || value > max_value - - let is_surrogate value = 0xd800 <= value && value <= 0xdfff - - (** A list of UCS non-characters values, including: - a. non-characters within the basic multilingual plane; - b. non-characters at the end of the basic multilingual plane; - c. non-characters at the end of the private use area. *) - let non_characters = - [ - 0x00fdd0 - ; 0x00fdef - ; (* case a. *) - 0x00fffe - ; 0x00ffff - ; (* case b. *) - 0x1ffffe - ; 0x1fffff (* case c. *) - ] - - (** A list of UCS character values located immediately before or - after UCS non-character values, including: - a. non-characters within the basic multilingual plane; - b. non-characters at the end of the basic multilingual plane; - c. non-characters at the end of the private use area. *) - let valid_characters_next_to_non_characters = - [ - 0x00fdcf - ; 0x00fdf0 - ; (* case a. *) - 0x00fffd - ; 0x010000 - ; (* case b. *) - 0x1ffffd - ; 0x200000 (* case c. *) - ] - - let test_is_non_character () = - List.iter (fun value -> assert_true (is_non_character value)) non_characters ; - List.iter - (fun value -> assert_false (is_non_character value)) - valid_characters_next_to_non_characters - - let test_is_out_of_range () = - assert_true (is_out_of_range (min_value --- 1)) ; - assert_false (is_out_of_range min_value) ; - assert_false (is_out_of_range max_value) ; - assert_true (is_out_of_range (max_value +++ 1)) - - let test_is_surrogate () = - assert_false (is_surrogate 0xd7ff) ; - assert_true (is_surrogate 0xd800) ; - assert_true (is_surrogate 0xdfff) ; - assert_false (is_surrogate 0xe000) - - let tests = - [ - ("test_is_non_character", `Quick, test_is_non_character) - ; ("test_is_out_of_range", `Quick, test_is_out_of_range) - ; ("test_is_surrogate", `Quick, test_is_surrogate) - ] -end - -module Lenient_UTF8_codec = struct - let decode_header_byte byte = - if byte land 0b10000000 = 0b00000000 then - (byte, 1) - else if byte land 0b11100000 = 0b11000000 then - (byte land 0b0011111, 2) - else if byte land 0b11110000 = 0b11100000 then - (byte land 0b0001111, 3) - else if byte land 0b11111000 = 0b11110000 then - (byte land 0b0000111, 4) - else - raise E.UTF8_header_byte_invalid - - let decode_continuation_byte byte = - if byte land 0b11000000 = 0b10000000 then - byte land 0b00111111 - else - raise E.UTF8_continuation_byte_invalid - - let width_required_for_ucs_value value = - if value < 0x000080 (* 1 lsl 7 *) then - 1 - else if value < 0x000800 (* 1 lsl 11 *) then - 2 - else if value < 0x010000 (* 1 lsl 16 *) then - 3 - else - 4 - - let decode_character string index = - let value, width = decode_header_byte (Char.code string.[index]) in - let value = - if width = 1 then - value - else - let value = ref value in - for index = index + 1 to index + width - 1 do - let chunk = decode_continuation_byte (Char.code string.[index]) in - value := (!value lsl 6) lor chunk - done ; - if width > width_required_for_ucs_value !value then - raise E.UTF8_encoding_not_canonical ; - !value - in - (value, width) -end - -(* === Mock string validators ============================================== *) -module Mock_String_validator (Validator : E.UCS_VALIDATOR) : - E.STRING_VALIDATOR = struct - (* no longer a functor in Encodings for performance reasons, - so modify the original string passed as argument instead replacing - characters that would be invalid with a known invalid XML char: 0x0B. - *) - - let transform str = - let b = Buffer.create (String.length str) in - let rec loop pos = - if pos < String.length str then - let value, width = Lenient_UTF8_codec.decode_character str pos in - let () = - try - let u = Uchar.of_int value in - Validator.validate u ; Buffer.add_utf_8_uchar b u - with _ -> Buffer.add_char b '\x0B' - in - loop (pos + width) - in - loop 0 ; Buffer.contents b - - let is_valid str = E.UTF8_XML.is_valid (transform str) - - let validate str = - try E.UTF8_XML.validate (transform str) - with E.Validation_error (pos, _) -> - raise (E.Validation_error (pos, Decode_error)) - - let longest_valid_prefix str = E.UTF8_XML.longest_valid_prefix (transform str) -end - -(** A validator that accepts all strings. *) -module Universal_string_validator = - Mock_String_validator (Universal_character_validator) - -(** A validator that rejects all strings. *) -module Failing_string_validator = - Mock_String_validator (Failing_character_validator) - -(** A validator that rejects strings containing the character 'F'. *) -module Selective_string_validator = - Mock_String_validator (Selective_character_validator) - -(* === Tests =============================================================== *) - -module String_validator = struct - let test_is_valid () = - assert_true (Universal_string_validator.is_valid "") ; - assert_true (Universal_string_validator.is_valid "123456789") ; - assert_true (Selective_string_validator.is_valid "") ; - assert_true (Selective_string_validator.is_valid "123456789") ; - assert_false (Selective_string_validator.is_valid "F23456789") ; - assert_false (Selective_string_validator.is_valid "1234F6789") ; - assert_false (Selective_string_validator.is_valid "12345678F") ; - assert_false (Selective_string_validator.is_valid "FFFFFFFFF") - - let test_longest_valid_prefix () = - Alcotest.(check string) - "prefix" - (Universal_string_validator.longest_valid_prefix "") - "" ; - Alcotest.(check string) - "prefix" - (Universal_string_validator.longest_valid_prefix "123456789") - "123456789" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "") - "" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "123456789") - "123456789" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "F23456789") - "" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "1234F6789") - "1234" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "12345678F") - "12345678" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "FFFFFFFFF") - "" - - (** Tests that validation does not fail for an empty string. *) - let test_validate_with_empty_string () = E.UTF8_XML.validate "" - - let test_validate_with_incomplete_string () = - Alcotest.check_raises "Validation fails correctly for an incomplete string" - E.String_incomplete (fun () -> E.UTF8_XML.validate "\xc2" - ) - - let test_validate_with_failing_decoders () = - Failing_string_validator.validate "" ; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "F") ; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "F12345678") ; - assert_raises_match - (function E.Validation_error (4, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "0123F5678") ; - assert_raises_match - (function E.Validation_error (8, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "01234567F") ; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "FFFFFFFFF") - - let tests = - [ - ("test_is_valid", `Quick, test_is_valid) - ; ("test_longest_valid_prefix", `Quick, test_longest_valid_prefix) - ; ( "test_validate_with_empty_string" - , `Quick - , test_validate_with_empty_string - ) - ; ( "test_validate_with_incomplete_string" - , `Quick - , test_validate_with_incomplete_string - ) - ; ( "test_validate_with_failing_decoders" - , `Quick - , test_validate_with_failing_decoders - ) - ] -end - -module XML = struct - include E.XML - - let test_is_illegal_control_character () = - assert_true (is_illegal_control_character (Uchar.of_int 0x00)) ; - assert_true (is_illegal_control_character (Uchar.of_int 0x19)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x09)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x0a)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x0d)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x20)) - - let tests = - [ - ( "test_is_illegal_control_character" - , `Quick - , test_is_illegal_control_character - ) - ] -end - -(** Tests the XML-specific UTF-8 UCS validation function. *) -module XML_UTF8_UCS_validator = struct - include E.XML_UTF8_UCS_validator - - let validate uchar = - if Uchar.is_valid uchar then - validate @@ Uchar.of_int uchar - else if uchar < Uchar.to_int Uchar.min || uchar > Uchar.to_int Uchar.max - then - raise E.UCS_value_out_of_range - else - raise E.UCS_value_prohibited_in_UTF8 - - let test_validate () = - let value = ref (UCS.min_value --- 1) in - while !value <= UCS.max_value +++ 1 do - if UCS.is_out_of_range !value then - Alcotest.check_raises "should fail" E.UCS_value_out_of_range (fun () -> - validate !value - ) - else if UCS.is_non_character !value || UCS.is_surrogate !value then - Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_UTF8 - (fun () -> validate !value - ) - else if - Uchar.is_valid !value - && XML.is_illegal_control_character (Uchar.of_int !value) - then - Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_XML - (fun () -> validate !value - ) - else - validate !value ; - value := !value +++ 1 - done - - let tests = [("test_validate", `Quick, test_validate)] -end - -module UTF8_codec = struct - (** A list of canonical encoding widths of UCS values, - represented by tuples of the form (v, w), where: - v = the UCS character value to be encoded; and - w = the width of the encoded character, in bytes. *) - let valid_ucs_value_widths = - [ - (1, 1) - ; ((1 <<< 7) --- 1, 1) - ; (1 <<< 7, 2) - ; ((1 <<< 11) --- 1, 2) - ; (1 <<< 11, 3) - ; ((1 <<< 16) --- 1, 3) - ; (1 <<< 16, 4) - ; ((1 <<< 21) --- 1, 4) - ] - - let width_required_for_ucs_value value = - if value < 0x000080 (* 1 lsl 7 *) then - 1 - else if value < 0x000800 (* 1 lsl 11 *) then - 2 - else if value < 0x010000 (* 1 lsl 16 *) then - 3 - else - 4 - - let test_width_required_for_ucs_value () = - List.iter - (fun (value, width) -> - Alcotest.(check int) - "same ints" - (width_required_for_ucs_value value) - width - ) - valid_ucs_value_widths - - (** A list of valid character decodings represented by - tuples of the form (s, (v, w)), where: - - s = a validly-encoded UTF-8 string; - v = the UCS value represented by the string; - (which may or may not be valid in its own right) - w = the width of the encoded string, in bytes. - - For each byte length b in [1...4], the list contains - decodings for: - - v_min = the smallest UCS value encodable in b bytes. - v_max = the greatest UCS value encodable in b bytes. *) - let valid_character_decodings = - [ - (* 7654321 *) - (* 0b0xxxxxxx *) - (* 00000000000000xxxxxxx *) - ( "\x00" (* 0b00000000 *) - , (0b000000000000000000000, 1) - ) - ; ( "\x7f" (* 0b01111111 *) - , (0b000000000000001111111, 1) - ) - ; (* 10987654321 *) - (* 0b110xxxsx 0b10xxxxxx *) - (* 0000000000xxxsxxxxxxx *) - ( "\xc2\x80" (* 0b11000010 0b10000000 *) - , (0b000000000000010000000, 2) - ) - ; ( "\xdf\xbf" (* 0b11011111 0b10111111 *) - , (0b000000000011111111111, 2) - ) - ; (* 6543210987654321 *) - (* 0b1110xxxx 0b10sxxxxx 0b10xxxxxx *) - (* xxxxsxxxxxxxxxxx *) - ( "\xe0\xa0\x80" (* 0b11100000 0b10100000 0b10000000 *) - , (0b000000000100000000000, 3) - ) - ; ( "\xef\xbf\xbf" (* 0b11101111 0b10111111 0b10111111 *) - , (0b000001111111111111111, 3) - ) - ; (* 109876543210987654321 *) - (* 0b11110xxx 0b10xsxxxx 0b10xxxxxx 0b10xxxxxx *) - (* xxxxsxxxxxxxxxxxxxxxx *) - ( "\xf0\x90\x80\x80" (* 0b11110000 0b10010000 0b10000000 0b10000000 *) - , (0b000010000000000000000, 4) - ) - ; ( "\xf7\xbf\xbf\xbf" (* 0b11110111 0b10111111 0b10111111 0b10111111 *) - , (0b111111111111111111111, 4) - ) - ] - - let uchar = Alcotest.int - - let test_decode_character_when_valid () = - List.iter - (fun (string, (value, width)) -> - Alcotest.(check (pair uchar int)) - "same pair" - (Lenient_UTF8_codec.decode_character string 0) - (value, width) - ) - valid_character_decodings - - (** A list of strings containing overlong character encodings. - For each byte length b in [2...4], this list contains the - overlong encoding e (v), where v is the UCS value one less - than the smallest UCS value validly-encodable in b bytes. *) - let overlong_character_encodings = - [ - "\xc1\xbf" (* 0b11000001 0b10111111 *) - ; "\xe0\x9f\xbf" (* 0b11100000 0b10011111 0b10111111 *) - ; "\xf0\x8f\xbf\xbf" (* 0b11110000 0b10001111 0b10111111 0b10111111 *) - ] - - let test_decode_character_when_overlong () = - List.iter - (fun string -> - Alcotest.check_raises "should fail" E.UTF8_encoding_not_canonical - (fun () -> Lenient_UTF8_codec.decode_character string 0 |> ignore - ) - ) - overlong_character_encodings - - let tests = - [ - ( "test_width_required_for_ucs_value" - , `Quick - , test_width_required_for_ucs_value - ) - ; ( "test_decode_character_when_valid" - , `Quick - , test_decode_character_when_valid - ) - ; ( "test_decode_character_when_overlong" - , `Quick - , test_decode_character_when_overlong - ) - ] -end - -let () = - Alcotest.run "Encodings" - [ - ("UCS", UCS.tests) - ; ("XML", XML.tests) - ; ("String_validator", String_validator.tests) - ; ("XML_UTF8_UCS_validator", XML_UTF8_UCS_validator.tests) - ; ("UTF8_codec", UTF8_codec.tests) - ] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml new file mode 100644 index 00000000000..d17d85b3b37 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml @@ -0,0 +1,74 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let is_valid = String.is_valid_utf_8 + +(* deprecated - reject invalid UTF-8 *) +let longest_valid_prefix str = + let len = String.length str in + let rec loop = function + | i when i < len -> + let dec = String.get_utf_8_uchar str i in + if Uchar.utf_decode_is_valid dec then + loop (i + Uchar.utf_decode_length dec) + else + String.sub str 0 i + | i when i = len -> + str + | i -> + String.sub str 0 i (* never reached *) + in + loop 0 + +module XML = struct + (** some UTF-8 characters are not legal in XML. Assuming uchar is + legal UTF-8, further check that it is legal in XML *) + let is_legal uchar = + let uchar = Uchar.to_int uchar in + uchar >= 0x20 || uchar = 0x09 || uchar = 0x0a || uchar = 0x0d + [@@inline] + + let is_valid str = + let len = String.length str in + let rec loop = function + | i when i < len -> + let dec = String.get_utf_8_uchar str i in + Uchar.utf_decode_is_valid dec + && is_legal (Uchar.utf_decode_uchar dec) + && loop (i + Uchar.utf_decode_length dec) + | _ -> + true + in + loop 0 + + (* deprecated - reject invalid UTF-8 *) + let longest_valid_prefix str = + let len = String.length str in + let rec loop = function + | i when i < len -> + let dec = String.get_utf_8_uchar str i in + if + Uchar.utf_decode_is_valid dec + && is_legal (Uchar.utf_decode_uchar dec) + then + loop (i + Uchar.utf_decode_length dec) + else + String.sub str 0 i + | i when i = len -> + str (* avoid copy *) + | i -> + String.sub str 0 i (* never reached *) + in + loop 0 +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli new file mode 100644 index 00000000000..6d8949e2f8f --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli @@ -0,0 +1,31 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val is_valid : string -> bool +(** true, if a string is a proper UTF-8 string *) + +val longest_valid_prefix : string -> string +(** Deprecated. Longest prefix of a string that is proper UTF-8 *) + +(* strings in XML are more restricted than UTF-8 in general. The must be + valid UTF-8 and must not contain certain characters *) + +module XML : sig + val is_valid : string -> bool + (** true, if a string is a proper UTF-8 string in XML *) + + val longest_valid_prefix : string -> string + (** Deprecated. longest prefix of a string that is proper UTF-8. + Better reject invalid UTF-8. *) +end diff --git a/ocaml/xapi/xapi_message.ml b/ocaml/xapi/xapi_message.ml index 408ba7acf07..4c08648dc66 100644 --- a/ocaml/xapi/xapi_message.ml +++ b/ocaml/xapi/xapi_message.ml @@ -28,7 +28,7 @@ *) module Date = Clock.Date -module Encodings = Xapi_stdext_encodings.Encodings +module Encodings = Xapi_stdext_encodings module Listext = Xapi_stdext_std.Listext module Pervasiveext = Xapi_stdext_pervasives.Pervasiveext module Unixext = Xapi_stdext_unix.Unixext @@ -414,7 +414,7 @@ let create ~__context ~name ~priority ~cls ~obj_uuid ~body = debug "Message.create %s %Ld %s %s" name priority (Record_util.cls_to_string cls) obj_uuid ; - if not (Encodings.UTF8_XML.is_valid body) then + if not (Encodings.Utf8.is_valid body) then raise (Api_errors.Server_error (Api_errors.invalid_value, ["UTF8 expected"])) ; if not (check_uuid ~__context ~cls ~uuid:obj_uuid) then raise From 6aa075a4175446bdd816efda0d5937c29ad062a3 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 9 Sep 2025 13:23:37 +0100 Subject: [PATCH 474/492] Adjust quality-gate.sh Signed-off-by: Andrii Sultanov --- quality-gate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quality-gate.sh b/quality-gate.sh index 6f3a72b30a1..cd87c1252b5 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=464 + N=463 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From 9a968ba7f1e7328ae20b612c864adb9c8145fa51 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 11 Jul 2025 10:19:55 +0100 Subject: [PATCH 475/492] xapi/nm: Send non-empty dns to networkd when using IPv6 autoconf Because Autoconf is not DHCP, networkd uses the dns value to write to resolv.conf. This is done on ocaml/networkd/bin/network_server.ml line 745 This allows to have non-empty resolv.conf when using IPv6 autoconf. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/nm.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index 229b53adbe2..77f8c078ed3 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -634,6 +634,7 @@ let bring_pif_up ~__context ?(management_interface = false) (pif : API.ref_PIF) rc.API.pIF_ip_configuration_mode = `Static | `IPv6 -> rc.API.pIF_ipv6_configuration_mode = `Static + || rc.API.pIF_ipv6_configuration_mode = `Autoconf in let dns = match (static, rc.API.pIF_DNS) with From f65336766d83a8d4bbfde6e006c57c882b484041 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 10 Sep 2025 13:56:40 +0000 Subject: [PATCH 476/492] CP-53479: Add xapi-ssh-monitor script and service This adds the monitor service required for the SSH auto-mode, as described in `doc/content/toolstack/features/SSH`. Signed-off-by: Lunfan Zhang Signed-off-by: Rob Hoes --- scripts/Makefile | 2 + scripts/xapi-ssh-monitor | 285 +++++++++++++++++++++++++++++++ scripts/xapi-ssh-monitor.service | 14 ++ 3 files changed, 301 insertions(+) create mode 100644 scripts/xapi-ssh-monitor create mode 100644 scripts/xapi-ssh-monitor.service diff --git a/scripts/Makefile b/scripts/Makefile index 5751d8628ca..ad603ac37d8 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -137,6 +137,8 @@ install: mkdir -p $(DESTDIR)/etc/cron.d $(IDATA) xapi-tracing-log-trim.cron $(DESTDIR)/etc/cron.d/xapi-tracing-log-trim.cron mkdir -p $(DESTDIR)/opt/xensource/gpg + $(IPROG) xapi-ssh-monitor $(DESTDIR)$(OPTDIR)/bin + $(IDATA) xapi-ssh-monitor.service $(DESTDIR)/usr/lib/systemd/system/xapi-ssh-monitor.service # host-backup-restore $(IPROG) host-backup-restore/host-backup $(DESTDIR)$(LIBEXECDIR) $(IPROG) host-backup-restore/host-restore $(DESTDIR)$(LIBEXECDIR) diff --git a/scripts/xapi-ssh-monitor b/scripts/xapi-ssh-monitor new file mode 100644 index 00000000000..b67fd2b5ab0 --- /dev/null +++ b/scripts/xapi-ssh-monitor @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 + +import time +import subprocess +import logging +import os.path +import signal +import sys +import re +import XenAPI +import threading +from enum import Enum, auto +from typing import Tuple, List, Optional, Dict, Any +import traceback + +# Configure logging +log_format = '%(asctime)s - %(levelname)s - %(message)s' +log_level = logging.INFO + +logging.basicConfig( + level=log_level, + format=log_format, + handlers=[ + logging.StreamHandler(), + logging.FileHandler('/var/log/daemon.log') + ] +) + +logger = logging.getLogger(__name__) + +# Constants +class SshState(Enum): + DOWN = auto() + ACTIVE = auto() + UNKNOWN = auto() + +INSTALLATION_UUID_REGEX = re.compile("^INSTALLATION_UUID") + +def match_host_id(s): + return INSTALLATION_UUID_REGEX.search(s, 0) + +class XapiMonitor: + XAPI_HEALTH_CHECK = '/opt/xensource/libexec/xapi-health-check' + + def __init__(self): + self.logger = logging.getLogger(__name__) + self.running = True + self.session = None + self.localhost_uuid = self.get_localhost_uuid() + # Create event for graceful exit + self.exit_event = threading.Event() + signal.signal(signal.SIGTERM, self._handle_signal) + signal.signal(signal.SIGINT, self._handle_signal) + signal.signal(signal.SIGHUP, self._handle_signal) + + def _handle_signal(self, signum, frame): + """Handle termination signals""" + signal_names = { + signal.SIGTERM: "SIGTERM", + signal.SIGINT: "SIGINT", + signal.SIGHUP: "SIGHUP" + } + signal_name = signal_names.get(signum, f"Signal {signum}") + self.logger.info(f"Received {signal_name}, preparing to exit...") + self.running = False + # Set event to interrupt any waiting + self.exit_event.set() + + def _create_session(self) -> Optional[Any]: + """Create a session with local XAPI""" + try: + session = XenAPI.xapi_local() + session.login_with_password("", "") + return session + except Exception as e: + self.logger.error(f"Create XAPI session failed: {e}") + return None + + def _logout_session(self) -> None: + """Logout from XAPI session""" + try: + if self.session: + self.session.logout() + self.logger.debug("XAPI session logged out") + except Exception as e: + self.logger.warning(f"Error during session logout: {e}") + + @staticmethod + def get_localhost_uuid() -> str: + """Get the UUID of the local host from inventory file""" + filename = '/etc/xensource-inventory' + try: + with open(filename, 'r') as f: + for line in filter(match_host_id, f.readlines()): + return line.split("'")[1] + except Exception as e: + error_msg = f"Unable to open inventory file [{filename}]: {e}" + logging.getLogger(__name__).error(error_msg) + raise RuntimeError(error_msg) + + # If we get here, we didn't find the UUID + error_msg = f"Could not find INSTALLATION_UUID in {filename}" + logging.getLogger(__name__).error(error_msg) + raise RuntimeError(error_msg) + + def _run_command(self, command: List[str], timeout: int = 10) -> Tuple[int, str, str]: + """Execute command and return results + + Args: + command: Command to execute as list of strings + timeout: Command execution timeout in seconds (default: 10) + + Returns: + Tuple of (return_code, stdout, stderr) + """ + self.logger.debug(f"Running command: {' '.join(command)}") + try: + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True + ) + try: + stdout, stderr = process.communicate(timeout=timeout) + self.logger.debug(f"Command returned: {process.returncode}") + return process.returncode, stdout, stderr + except subprocess.TimeoutExpired: + process.kill() + process.communicate() + self.logger.error(f"Command execution timeout after {timeout}s: {' '.join(command)}") + return -1, "", "Timeout" + except Exception as e: + self.logger.error(f"Error executing command: {e}") + return -1, "", str(e) + + def _check_xapi_health(self) -> bool: + """Check XAPI health status with extended timeout""" + self.logger.debug("Performing XAPI health check") + returncode, stdout, stderr = self._run_command([self.XAPI_HEALTH_CHECK], timeout=120) + + if returncode != 0: + self.logger.warning(f"XAPI health check failed: {stderr}") + + return returncode == 0 + + def _get_ssh_state(self) -> SshState: + """Get SSH service status""" + returncode, stdout, stderr = self._run_command(['systemctl', 'is-active', 'sshd']) + status = stdout.strip() + + if status == 'active': + return SshState.ACTIVE + if status in ('inactive', 'failed', 'unknown'): + return SshState.DOWN + + self.logger.warning(f"Unexpected SSH status: {status}, stderr: {stderr}") + return SshState.UNKNOWN + + def _control_ssh_service(self, enable: bool) -> bool: + """Control SSH service + + Returns: + bool: True if operation was successful, False otherwise + """ + action = "starting" if enable else "stopping" + try: + if enable: + ret1, _, stderr1 = self._run_command(['systemctl', 'enable', 'sshd']) + ret2, _, stderr2 = self._run_command(['systemctl', 'start', 'sshd']) + success = (ret1 == 0 and ret2 == 0) + else: + ret2, _, stderr2 = self._run_command(['systemctl', 'stop', 'sshd']) + ret1, _, stderr1 = self._run_command(['systemctl', 'disable', 'sshd']) + success = (ret1 == 0 and ret2 == 0) + + if success: + self.logger.info(f"SSH service {action} successful") + else: + self.logger.error(f"SSH service {action} failed: enable/disable stderr: {stderr1}, start/stop stderr: {stderr2}") + + return success + except Exception as e: + self.logger.error(f"SSH service {action} failed with exception: {e}") + self.logger.debug(traceback.format_exc()) + return False + + def _disable_ssh_via_api(self) -> bool: + """Disable SSH via XAPI, max retries 3 times""" + if not self.session: + self.session = self._create_session() + if not self.session: + return False + + retry_count = 0 + max_retries = 3 + retry_interval = 5 + + while retry_count < max_retries and self.running: + try: + host = self.session.xenapi.host.get_by_uuid(self.localhost_uuid) + self.session.xenapi.host.disable_ssh(host) + self.logger.info("Successfully disabled SSH via XAPI") + return True + except Exception as e: + retry_count += 1 + self.logger.warning(f"Disable SSH via API failed ({retry_count}/{max_retries}): {e}") + if retry_count < max_retries and self.running: + # Use interruptible sleep + if self.exit_event.wait(retry_interval): + return False + self._logout_session() + self.session = self._create_session() + + if not self.running: + return False + + self.logger.error(f"Disable SSH via API failed, max retries reached ({max_retries})") + return False + + def run(self): + """Main monitoring loop""" + self.logger.info("Starting XAPI and SSH service monitoring...") + + self.session = self._create_session() + if not self.session: + self.logger.warning("Initial session creation failed, will retry later") + + while self.running: + try: + # Check XAPI health - always perform the check + xapi_healthy = self._check_xapi_health() + + # Get current SSH state + current_ssh_state = self._get_ssh_state() + self.logger.debug(f"Current SSH state: {current_ssh_state}") + + if xapi_healthy: + if current_ssh_state == SshState.ACTIVE: + self.logger.info("XAPI healthy: Stopping SSH service") + if not self._disable_ssh_via_api(): + self.logger.warning("Disable SSH via API failed, keeping SSH service running") + else: + if current_ssh_state != SshState.ACTIVE: + self.logger.info("XAPI unhealthy: Starting SSH service") + self._control_ssh_service(True) + + except Exception as e: + self.logger.error(f"Runtime error: {e}") + self.logger.debug(traceback.format_exc()) + + self._logout_session() + + self.session = None + + # Use interruptible sleep with a fixed interval when there is an error + if self.exit_event.wait(5): + break + + continue + + # Use interruptible sleep for main loop + if self.exit_event.wait(60): + break + + self._logout_session() + + self.logger.info("Monitoring service stopped") + +def main(): + logger.info(f"SSH Control Service starting (PID: {os.getpid()})") + + try: + monitor = XapiMonitor() + monitor.run() + except Exception as e: + logger.critical(f"Fatal error in main process: {e}") + logger.critical(traceback.format_exc()) + sys.exit(1) + + logger.info("SSH Control Service exited normally") + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/scripts/xapi-ssh-monitor.service b/scripts/xapi-ssh-monitor.service new file mode 100644 index 00000000000..f38685e7082 --- /dev/null +++ b/scripts/xapi-ssh-monitor.service @@ -0,0 +1,14 @@ +[Unit] +Description=XAPI SSH monitor service +After=network.target +After=xapi.service +OnFailure=sshd.service + +[Service] +Type=simple +RemainAfterExit=true +ExecStart=/opt/xensource/bin/xapi-ssh-monitor +ExecStop=/bin/true + +[Install] +WantedBy=multi-user.target From cfee0d846d34e2d3a5e445948313aa4c68b4387d Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 10 Sep 2025 15:32:04 +0000 Subject: [PATCH 477/492] CP-308800: Dynamically control ssh firewalld service in xapi-ssh-monitor Signed-off-by: Bengang Yuan Signed-off-by: Rob Hoes --- scripts/xapi-ssh-monitor | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/scripts/xapi-ssh-monitor b/scripts/xapi-ssh-monitor index b67fd2b5ab0..d3c35658b47 100644 --- a/scripts/xapi-ssh-monitor +++ b/scripts/xapi-ssh-monitor @@ -165,20 +165,32 @@ class XapiMonitor: """ action = "starting" if enable else "stopping" try: + firewall_cmd = '/usr/bin/firewall-cmd' + use_firewalld = os.path.exists(firewall_cmd) if enable: + if use_firewalld: + ret0, _, stderr0 = self._run_command([firewall_cmd, '--add-service', 'ssh']) + else: + ret0, stderr0 = 0, "n/a" ret1, _, stderr1 = self._run_command(['systemctl', 'enable', 'sshd']) ret2, _, stderr2 = self._run_command(['systemctl', 'start', 'sshd']) - success = (ret1 == 0 and ret2 == 0) + success = (ret0 == 0 and ret1 == 0 and ret2 == 0) else: ret2, _, stderr2 = self._run_command(['systemctl', 'stop', 'sshd']) ret1, _, stderr1 = self._run_command(['systemctl', 'disable', 'sshd']) - success = (ret1 == 0 and ret2 == 0) + if use_firewalld: + ret0, _, stderr0 = self._run_command([firewall_cmd, '--remove-service', 'ssh']) + else: + ret0, stderr0 = 0, "n/a" + success = (ret0 == 0 and ret1 == 0 and ret2 == 0) if success: self.logger.info(f"SSH service {action} successful") else: - self.logger.error(f"SSH service {action} failed: enable/disable stderr: {stderr1}, start/stop stderr: {stderr2}") - + err_msg = f"""SSH service {action} failed: enable/disable firewalld service stderr: {stderr0}, + enable/disable sshd stderr: {stderr1}, start/stop sshd stderr: {stderr2} """ + self.logger.error(err_msg) + return success except Exception as e: self.logger.error(f"SSH service {action} failed with exception: {e}") From 4d127013033f0e418c4048b622c40b8f4b294da9 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 11 Jul 2025 12:03:05 +0100 Subject: [PATCH 478/492] xapi-idl/network: Remove code duplication for DNS persistence decisions Previously both xapi and networkd had to inspect the IP configuration to decide whether the DNS values should be persistend into /etc/resolv.conf. This actually lead to a mismatch in them. Instead use an option value for DNS that simply means that if there's a value, it must be persisted. Now xapi decides the instances where these values are written. Treat a couple of empty lists as a lack of value to avoid writing empty resolv.conf files. This can happen when updating a host from previous versions, which use empty lists when using DHCP. Tested manually by installing a version with this change and restarting the toolstack. The file is kept intact, unlike the previous version of the change that did not take into account the update behaviour. Signed-off-by: Pau Ruiz Safont --- ocaml/networkd/bin/network_server.ml | 20 +++++++------- ocaml/networkd/bin_db/networkd_db.ml | 29 ++++++++++++--------- ocaml/networkd/lib/network_config.ml | 15 ++++++----- ocaml/xapi-idl/network/network_interface.ml | 7 +++-- ocaml/xapi/nm.ml | 16 +++++------- 5 files changed, 45 insertions(+), 42 deletions(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 59c76e319f3..3cc2df776b5 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -554,7 +554,8 @@ module Interface = struct let set_dns _ dbg ~name ~nameservers ~domains = Debug.with_thread_associated dbg (fun () -> - update_config name {(get_config name) with dns= (nameservers, domains)} ; + update_config name + {(get_config name) with dns= Some (nameservers, domains)} ; debug "Configuring DNS for %s: nameservers: [%s]; domains: [%s]" name (String.concat ", " (List.map Unix.string_of_inet_addr nameservers)) (String.concat ", " domains) ; @@ -727,7 +728,7 @@ module Interface = struct ; ipv6_conf ; ipv6_gateway ; ipv4_routes - ; dns= nameservers, domains + ; dns ; mtu ; ethtool_settings ; ethtool_offload @@ -736,16 +737,13 @@ module Interface = struct ) ) -> update_config name c ; exec (fun () -> - (* We only apply the DNS settings when not in a DHCP mode - to avoid conflicts. The `dns` field - should really be an option type so that we don't have to - derive the intention of the caller by looking at other - fields. *) - match (ipv4_conf, ipv6_conf) with - | Static4 _, _ | _, Static6 _ | _, Autoconf6 -> - set_dns () dbg ~name ~nameservers ~domains - | _ -> + (* Old configs used empty dns lists to mean none, keep that + behaviour instead of writing an empty resolv.conf *) + match dns with + | None | Some ([], []) -> () + | Some (nameservers, domains) -> + set_dns () dbg ~name ~nameservers ~domains ) ; exec (fun () -> set_ipv4_conf dbg name ipv4_conf) ; exec (fun () -> diff --git a/ocaml/networkd/bin_db/networkd_db.ml b/ocaml/networkd/bin_db/networkd_db.ml index b6194718d37..e0fea0cca11 100644 --- a/ocaml/networkd/bin_db/networkd_db.ml +++ b/ocaml/networkd/bin_db/networkd_db.ml @@ -76,20 +76,25 @@ let _ = [("gateway", Unix.string_of_inet_addr addr)] in let dns = - let dns' = - List.map Unix.string_of_inet_addr (fst interface_config.dns) - in - if dns' = [] then - [] - else - [("dns", String.concat "," dns')] + interface_config.dns + |> Option.map fst + |> Option.map (List.map Unix.string_of_inet_addr) + |> Option.fold ~none:[] ~some:(function + | [] -> + [] + | dns' -> + [("dns", String.concat "," dns')] + ) in let domains = - let domains' = snd interface_config.dns in - if domains' = [] then - [] - else - [("domain", String.concat "," domains')] + interface_config.dns + |> Option.map snd + |> Option.fold ~none:[] ~some:(function + | [] -> + [] + | domains' -> + [("domain", String.concat "," domains')] + ) in mode @ addrs @ gateway @ dns @ domains | None4 -> diff --git a/ocaml/networkd/lib/network_config.ml b/ocaml/networkd/lib/network_config.ml index 56eef61ce3d..3d034f05284 100644 --- a/ocaml/networkd/lib/network_config.ml +++ b/ocaml/networkd/lib/network_config.ml @@ -37,7 +37,6 @@ let bridge_naming_convention (device : string) = let get_list_from ~sep ~key args = List.assoc_opt key args |> Option.map (fun v -> Astring.String.cuts ~empty:false ~sep v) - |> Option.value ~default:[] let parse_ipv4_config args = function | Some "static" -> @@ -73,11 +72,13 @@ let parse_ipv6_config args = function (None6, None) let parse_dns_config args = - let nameservers = - get_list_from ~sep:"," ~key:"DNS" args |> List.map Unix.inet_addr_of_string + let ( let* ) = Option.bind in + let* nameservers = + get_list_from ~sep:"," ~key:"DNS" args + |> Option.map (List.map Unix.inet_addr_of_string) in - let domains = get_list_from ~sep:" " ~key:"DOMAIN" args in - (nameservers, domains) + let* domains = get_list_from ~sep:" " ~key:"DOMAIN" args in + Some (nameservers, domains) let read_management_conf () = try @@ -103,7 +104,7 @@ let read_management_conf () = let device = (* Take 1st member of bond *) match (bond_mode, bond_members) with - | None, _ | _, [] -> ( + | None, _ | _, (None | Some []) -> ( match List.assoc_opt "LABEL" args with | Some x -> x @@ -111,7 +112,7 @@ let read_management_conf () = error "%s: missing LABEL in %s" __FUNCTION__ management_conf ; raise Read_error ) - | _, hd :: _ -> + | _, Some (hd :: _) -> hd in Inventory.reread_inventory () ; diff --git a/ocaml/xapi-idl/network/network_interface.ml b/ocaml/xapi-idl/network/network_interface.ml index 2f3368fc131..06d38ff1a87 100644 --- a/ocaml/xapi-idl/network/network_interface.ml +++ b/ocaml/xapi-idl/network/network_interface.ml @@ -158,7 +158,10 @@ type interface_config_t = { ; ipv6_conf: ipv6 [@default None6] ; ipv6_gateway: Unix.inet_addr option [@default None] ; ipv4_routes: ipv4_route_t list [@default []] - ; dns: Unix.inet_addr list * string list [@default [], []] + ; dns: (Unix.inet_addr list * string list) option [@default None] + (** the list + of nameservers and domains to persist in /etc/resolv.conf. Must be None when + using a DHCP mode *) ; mtu: int [@default 1500] ; ethtool_settings: (string * string) list [@default []] ; ethtool_offload: (string * string) list [@default [("lro", "off")]] @@ -200,7 +203,7 @@ let default_interface = ; ipv6_conf= None6 ; ipv6_gateway= None ; ipv4_routes= [] - ; dns= ([], []) + ; dns= None ; mtu= 1500 ; ethtool_settings= [] ; ethtool_offload= [("lro", "off")] diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index 77f8c078ed3..fbc37a5fedc 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -639,24 +639,20 @@ let bring_pif_up ~__context ?(management_interface = false) (pif : API.ref_PIF) let dns = match (static, rc.API.pIF_DNS) with | false, _ | true, "" -> - ([], []) + None | true, pif_dns -> let nameservers = List.map Unix.inet_addr_of_string - (String.split ',' pif_dns) + (String.split_on_char ',' pif_dns) in let domains = match List.assoc_opt "domain" rc.API.pIF_other_config with - | None -> + | None | Some "" -> [] - | Some domains -> ( - try String.split ',' domains - with _ -> - warn "Invalid DNS search domains: %s" domains ; - [] - ) + | Some domains -> + String.split_on_char ',' domains in - (nameservers, domains) + Some (nameservers, domains) in let mtu = determine_mtu rc net_rc in let ethtool_settings, ethtool_offload = From ecad30de2c055daf476054a881348fbefc36a0cf Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Thu, 11 Sep 2025 14:09:27 +0100 Subject: [PATCH 479/492] Remove redundant check Signed-off-by: Gabriel Buica --- ocaml/xapi/helpers.ml | 2 -- ocaml/xapi/xapi_vm_lifecycle.ml | 24 +----------------------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 121c1c179c4..35909e5d5ed 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -913,8 +913,6 @@ let sort_by_schwarzian ?(descending = false) f list = |> List.sort (fun (_, x') (_, y') -> comp x' y') |> List.map (fun (x, _) -> x) -let platform_version_inverness = [2; 4; 0] - let version_string_of : __context:Context.t -> [`host] api_object -> string = fun ~__context host -> try diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 4daa9c3b56b..b9cc6b884b4 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -309,26 +309,6 @@ let report_concurrent_operations_error ~current_ops ~ref_str = ) let check_vgpu ~__context ~op ~ref_str ~vgpus ~power_state = - let is_migratable vgpu = - try - (* Prevent VMs with VGPU from being migrated from pre-Jura to Jura and later hosts during RPU *) - let host_from = - Db.VGPU.get_VM ~__context ~self:vgpu |> fun vm -> - Db.VM.get_resident_on ~__context ~self:vm |> fun host -> - Helpers.LocalObject host - in - (* true if platform version of host_from more than inverness' 2.4.0 *) - Helpers.( - compare_int_lists - (version_of ~__context host_from) - platform_version_inverness - ) - > 0 - with e -> - debug "is_migratable: %s" (ExnHelper.string_of_exn e) ; - (* best effort: yes if not possible to decide *) - true - in let is_suspendable vgpu = Db.VGPU.get_type ~__context ~self:vgpu |> fun self -> Db.VGPU_type.get_implementation ~__context ~self |> function @@ -343,9 +323,7 @@ let check_vgpu ~__context ~op ~ref_str ~vgpus ~power_state = match op with | `migrate_send when power_state = `Halted -> None - | (`pool_migrate | `migrate_send) - when List.for_all is_migratable vgpus && List.for_all is_suspendable vgpus - -> + | (`pool_migrate | `migrate_send) when List.for_all is_suspendable vgpus -> None | `checkpoint when power_state = `Suspended -> None From b92f0c405f09e5721523d58926308a4a95cd8797 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 12 Sep 2025 17:03:13 +0100 Subject: [PATCH 480/492] xapi_pbd: use HA shared SR constraint violation when plugging and unplugging When unplugging a pbd, enabling a host, or adding a vbd, the shared SR constraint violation could be violated, but the error used in these cases was that the operation blocked the failover planning. This was confusing because the main reason was not mentioned in the error. Instead use the SR constraint violation error, and log a more descriptive message in the logs as info, because these can happen during normal operation and there's nothing dodgy going on. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_pbd.ml | 17 +++++++++++------ ocaml/xapi/xapi_vbd_helpers.ml | 17 +++++------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi/xapi_pbd.ml b/ocaml/xapi/xapi_pbd.ml index a9625dc3c62..86d9b7fabc0 100644 --- a/ocaml/xapi/xapi_pbd.ml +++ b/ocaml/xapi/xapi_pbd.ml @@ -114,13 +114,18 @@ let abort_if_storage_attached_to_protected_vms ~__context ~self = (fun vbd -> let vdi = Db.VBD.get_VDI ~__context ~self:vbd in if List.mem vdi vdis then ( - warn - "PBD.unplug will make protected VM %s not agile since it has a \ - VBD attached to VDI %s" - (Ref.string_of vm_ref) (Ref.string_of vdi) ; + let vm = Ref.string_of vm_ref in + let pbd = Ref.string_of self in + let sr = Ref.string_of sr in + info + "The protected VM %s must remain agile and blocked the \ + operation. The PBD %s of must be plugged to ensure this. This \ + happened because the SR %s is used by both the VM and the \ + PBD." + vm pbd sr ; raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) + Api_errors.( + Server_error (ha_constraint_violation_sr_not_shared, [sr]) ) ) ) diff --git a/ocaml/xapi/xapi_vbd_helpers.ml b/ocaml/xapi/xapi_vbd_helpers.ml index d23d161e988..07d6b012da2 100644 --- a/ocaml/xapi/xapi_vbd_helpers.ml +++ b/ocaml/xapi/xapi_vbd_helpers.ml @@ -358,24 +358,17 @@ let assert_attachable ~__context ~self = let assert_doesnt_make_vm_non_agile ~__context ~vm ~vdi = let pool = Helpers.get_pool ~__context in - let properly_shared = - Agility.is_sr_properly_shared ~__context - ~self:(Db.VDI.get_SR ~__context ~self:vdi) - in + let sr = Db.VDI.get_SR ~__context ~self:vdi in + let properly_shared = Agility.is_sr_properly_shared ~__context ~self:sr in if true && Db.Pool.get_ha_enabled ~__context ~self:pool && (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool)) && Helpers.is_xha_protected ~__context ~self:vm && not properly_shared - then ( - warn "Attaching VDI %s makes VM %s not agile" (Ref.string_of vdi) - (Ref.string_of vm) ; - raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) - ) - ) + then + let sr = Ref.string_of sr in + raise Api_errors.(Server_error (ha_constraint_violation_sr_not_shared, [sr])) let update_allowed_operations ~__context ~self : unit = let all = Db.VBD.get_record_internal ~__context ~self in From bcfe7981bf383fb6bb1011711755d620350135c4 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 12 Sep 2025 17:08:33 +0100 Subject: [PATCH 481/492] xapi_pif: use HA shared network constraint violation when plugging and unplugging When unplugging a PIF, enabling a host, or adding a VIF, the shared network constraint violation could be violated, but the error used in these cases was that the operation blocked the failover planning. This was confusing because the main reason was not mentioned in the error. Instead use the network constraint violation error, and log a more descriptive message in the logs as info, because these can happen during normal operation and there's nothing dodgy going on. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_pif.ml | 16 ++++++++++------ ocaml/xapi/xapi_vif_helpers.ml | 25 ++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index eaf4b37b8b9..b618edade44 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -271,13 +271,17 @@ let abort_if_network_attached_to_protected_vms ~__context ~self = List.iter (fun vm -> if Helpers.is_xha_protected ~__context ~self:vm then ( - warn - "PIF.unplug will make protected VM %s not agile since it has a VIF \ - attached to network %s" - (Ref.string_of vm) (Ref.string_of net) ; + let vm = Ref.string_of vm in + let pif = Ref.string_of self in + let net = Ref.string_of net in + info + "The protected VM %s must remain agile and blocked the operation. \ + PIF %s must be plugged this. This happened because network %s is \ + used by both the VM and the PIF" + vm pif net ; raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) + Api_errors.( + Server_error (ha_constraint_violation_network_not_shared, [net]) ) ) ) diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index 34682f9aa78..982a09b1923 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -267,19 +267,18 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config raise (Api_errors.Server_error (Api_errors.mac_invalid, [mAC])) ; (* Make people aware that non-shared networks being added to VMs makes them not agile *) let pool = Helpers.get_pool ~__context in - if - true - && Db.Pool.get_ha_enabled ~__context ~self:pool - && (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool)) - && Helpers.is_xha_protected ~__context ~self:vM - && not (Agility.is_network_properly_shared ~__context ~self:network) - then ( - warn "Creating VIF %s makes VM %s not agile" (Ref.string_of ref) - (Ref.string_of vM) ; - raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) - ) + ( if + true + && Db.Pool.get_ha_enabled ~__context ~self:pool + && (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool)) + && Helpers.is_xha_protected ~__context ~self:vM + && not (Agility.is_network_properly_shared ~__context ~self:network) + then + let net = Ref.string_of network in + raise + Api_errors.( + Server_error (ha_constraint_violation_network_not_shared, [net]) + ) ) ; (* Check to make sure the device is unique *) Xapi_stdext_threads.Threadext.Mutex.execute m (fun () -> From b81aa1d3fef0b08f387d17cb797483d90812c96f Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 12 Sep 2025 11:39:53 +0100 Subject: [PATCH 482/492] xapi_ha_vm_failover: remove superfluous debug message It didn't add any useful information to the error. Also cleaned up the formatting of some comments found during the patch series. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_ha_vm_failover.ml | 17 ++++++++--------- ocaml/xapi/xapi_vif_helpers.ml | 3 +-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 5c43984541c..19c84c4b7ed 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -1223,9 +1223,6 @@ let assert_configuration_change_preserves_ha_plan ~__context c = "assert_configuration_change_preserves_ha_plan: plan exists after \ change" | Plan_exists_excluding_non_agile_VMs | No_plan_exists -> - debug - "assert_configuration_change_preserves_ha_plan: proposed change \ - breaks plan" ; raise (Api_errors.Server_error (Api_errors.ha_operation_would_break_failover_plan, []) @@ -1413,8 +1410,9 @@ let restart_auto_run_vms ~__context ~last_live_set ~live_set n = let open TaskChains.Infix in (* execute the plan *) Helpers.call_api_functions ~__context (fun rpc session_id -> - (* Helper function to start a VM somewhere. If the HA overcommit protection stops us then disable it and try once more. - Returns true if the VM was restarted and false otherwise. *) + (* Helper function to start a VM somewhere. If the HA overcommit + protection stops us then disable it and try once more. Returns true if + the VM was restarted and false otherwise. *) let restart_vm vm ?host () = let go () = ( if Xapi_fist.simulate_restart_failure () then @@ -1579,10 +1577,11 @@ let restart_auto_run_vms ~__context ~last_live_set ~live_set n = in gc_table last_start_attempt ; gc_table restart_failed ; - (* Consider restarting the best-effort VMs we *think* have failed (but we might get this wrong -- - ok since this is 'best-effort'). NOTE we do not use the restart_vm function above as this will mark the - pool as overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received (although this should never - happen it's better safe than sorry) *) + (* Consider restarting the best-effort VMs we *think* have failed (but we + might get this wrong -- ok since this is 'best-effort'). NOTE we do + not use the restart_vm function above as this will mark the pool as + overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received + (although this should never happen it's better safe than sorry) *) let is_best_effort r = r.API.vM_ha_restart_priority = Constants.ha_restart_best_effort && r.API.vM_power_state = `Halted diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index 982a09b1923..37de1b77770 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -290,8 +290,7 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config in let new_device = int_of_string device in if List.exists (fun (_, d) -> d = new_device) all_vifs_with_devices then - raise - (Api_errors.Server_error (Api_errors.device_already_exists, [device])) ; + raise Api_errors.(Server_error (device_already_exists, [device])) ; (* If the VM uses a PVS_proxy, then the proxy _must_ be associated with the VIF that has the lowest device number. Check that the new VIF From 9ff831356ee73a447673a032ced2c184e52b6545 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 16 Sep 2025 15:07:04 +0100 Subject: [PATCH 483/492] xenopsd: Drop unused variables in domain.ml Some of these were passed through several layers of functions only to be unused in the end. Drop them, improving the legibility of the code. Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/xc/domain.ml | 33 ++++++++++++--------------- ocaml/xenopsd/xc/domain.mli | 3 --- ocaml/xenopsd/xc/xenops_server_xen.ml | 6 ++--- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 424b0085c45..32dcb1f7d39 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1214,8 +1214,8 @@ let correct_shadow_allocation xc domid uuid shadow_mib = ) (* puts value in store after the domain build succeed *) -let build_post ~xc ~xs ~vcpus:_ ~static_max_mib ~target_mib domid domain_type - store_mfn store_port ents vments = +let build_post ~xc ~xs ~static_max_mib ~target_mib domid domain_type store_mfn + store_port ents vments = let uuid = get_uuid ~xc domid in let dom_path = xs.Xs.getdomainpath domid in (* Unit conversion. *) @@ -1350,8 +1350,8 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid ) in let local_stuff = console_keys console_port console_mfn in - build_post ~xc ~xs ~vcpus ~target_mib ~static_max_mib domid domain_type - store_mfn store_port local_stuff vm_stuff + build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn + store_port local_stuff vm_stuff type suspend_flag = Live | Debug @@ -1452,8 +1452,7 @@ let consume_qemu_record fd limit domid uuid = (fun () -> Unix.close fd2) let restore_common (task : Xenops_task.task_handle) ~xc ~xs - ~(dm : Device.Profile.t) ~domain_type ~store_port ~store_domid:_ - ~console_port ~console_domid:_ ~no_incr_generationid:_ ~vcpus:_ ~extras + ~(dm : Device.Profile.t) ~domain_type ~store_port ~console_port ~extras ~vtpm ~numa_placements manager_path domid main_fd vgpu_fd = let module DD = Debug.Make (struct let name = "mig64" end) in let open DD in @@ -1719,9 +1718,8 @@ let restore_common (task : Xenops_task.task_handle) ~xc ~xs (Uuidx.to_string uuid) domid e ; raise Suspend_image_failure -let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~store_domid - ~console_domid ~no_incr_generationid ~timeoffset ~extras info ~manager_path - ~vtpm domid fd vgpu_fd = +let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~timeoffset ~extras + info ~manager_path ~vtpm domid fd vgpu_fd = let static_max_kib = info.memory_max in let target_kib = info.memory_target in let vcpus = info.vcpus in @@ -1766,16 +1764,15 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~store_domid build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity:info.hard_affinity domid in let store_mfn, console_mfn = - restore_common task ~xc ~xs ~dm ~domain_type ~store_port ~store_domid - ~console_port ~console_domid ~no_incr_generationid ~vcpus ~extras ~vtpm - ~numa_placements manager_path domid fd vgpu_fd + restore_common task ~xc ~xs ~dm ~domain_type ~store_port ~console_port + ~extras ~vtpm ~numa_placements manager_path domid fd vgpu_fd in let local_stuff = console_keys console_port console_mfn in (* And finish domain's building *) - build_post ~xc ~xs ~vcpus ~target_mib ~static_max_mib domid domain_type - store_mfn store_port local_stuff vm_stuff + build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn + store_port local_stuff vm_stuff -let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xc:_ ~xs ~domain_type +let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xs ~domain_type ~is_uefi ~vtpm ~dm ~manager_path ~domid ~uuid ~main_fd ~vgpu_fd ~flags ~progress_callback ~qemu_domid ~do_suspend_callback = let open Suspend_image in @@ -1987,9 +1984,9 @@ let suspend (task : Xenops_task.task_handle) ~xc ~xs ~domain_type ~is_uefi ~dm write_header main_fd (Xenops, Int64.of_int xenops_rec_len) >>= fun () -> debug "Writing Xenops record contents" ; Io.write main_fd xenops_record ; - suspend_emu_manager ~task ~xc ~xs ~domain_type ~is_uefi ~vtpm ~dm - ~manager_path ~domid ~uuid ~main_fd ~vgpu_fd ~flags ~progress_callback - ~qemu_domid ~do_suspend_callback + suspend_emu_manager ~task ~xs ~domain_type ~is_uefi ~vtpm ~dm ~manager_path + ~domid ~uuid ~main_fd ~vgpu_fd ~flags ~progress_callback ~qemu_domid + ~do_suspend_callback >>= fun () -> ( if is_uefi then write_varstored_record task ~xs domid main_fd >>= fun () -> diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index a7681827029..40f154561a3 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -247,9 +247,6 @@ val restore : -> xc:Xenctrl.handle -> xs:Ezxenstore_core.Xenstore.Xs.xsh -> dm:Device.Profile.t - -> store_domid:int - -> console_domid:int - -> no_incr_generationid:bool -> timeoffset:string -> extras:string list -> build_info diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 18383a04c00..3b348399a36 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -2628,7 +2628,6 @@ module VM = struct in ({x with Domain.memory_target= initial_target}, timeoffset) in - let no_incr_generationid = false in let vtpm = vtpm_of ~vm in ( try with_data ~xc ~xs task data false @@ fun fd -> @@ -2644,9 +2643,8 @@ module VM = struct None in let manager_path = choose_emu_manager vm.Vm.platformdata in - Domain.restore task ~xc ~xs ~dm:(dm_of ~vm) ~store_domid - ~console_domid ~no_incr_generationid ~timeoffset ~extras build_info - ~manager_path ~vtpm domid fd vgpu_fd + Domain.restore task ~xc ~xs ~dm:(dm_of ~vm) ~timeoffset ~extras + build_info ~manager_path ~vtpm domid fd vgpu_fd with e -> error "VM %s: restore failed: %s" vm.Vm.id (Printexc.to_string e) ; (* As of xen-unstable.hg 779c0ef9682 libxenguest will destroy From 0980515200695a693ac66fd7093b24c168d0007c Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Wed, 17 Sep 2025 11:31:38 +0100 Subject: [PATCH 484/492] docs: Update add-function.md to fix example When performing the changes described in add-function.md for adding a host-price-of function to xapi, a type error would arise from the message-forwarding.ml file - this is fixed by explicitly giving the remote_fn named argument. Signed-off-by: Christian Pardillo Laursen --- doc/content/xapi/guides/howtos/add-function.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/content/xapi/guides/howtos/add-function.md b/doc/content/xapi/guides/howtos/add-function.md index 8aeedfb27fb..cbde59a991e 100644 --- a/doc/content/xapi/guides/howtos/add-function.md +++ b/doc/content/xapi/guides/howtos/add-function.md @@ -172,8 +172,8 @@ the Host module: let price_of ~__context ~host ~item = info "Host.price_of for item %s" item; let local_fn = Local.Host.price_of ~host ~item in - do_op_on ~local_fn ~__context ~host - (fun session_id rpc -> Client.Host.price_of ~rpc ~session_id ~host ~item) + let remote_fn = Client.Host.price_of ~host ~item in + do_op_on ~local_fn ~__context ~host ~remote_fn After the ~__context parameter, the parameters of this new function should match the parameters we specified for the message. In this case, that is the From b988528411801de575c53bb6e65126cd762d8739 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 18 Sep 2025 16:25:34 +0100 Subject: [PATCH 485/492] ocaml: allow xapi to compile under OCaml 5.3 This removes using effect as a label because now it's a reserved word, and changes how the memprof is used to be compatible both with 4.14 and 5.3, by ignoring the value it returns. Signed-off-by: Pau Ruiz Safont --- ocaml/idl/datamodel_common.ml | 12 ++++++------ ocaml/idl/datamodel_vm.ml | 4 ++-- ocaml/libs/timeslice/timeslice.ml | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 4786d2d329e..c5b3376624d 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -578,8 +578,8 @@ let get_deprecated lifecycle = with Not_found -> None let call ~name ?(doc = "") ?(in_oss_since = Some "3.0.3") ?result - ?(flags = [`Session; `Async]) ?(effect = true) ?(tag = Custom) ?(errs = []) - ?(custom_marshaller = false) ?(db_only = false) + ?(flags = [`Session; `Async]) ?(has_effect = true) ?(tag = Custom) + ?(errs = []) ?(custom_marshaller = false) ?(db_only = false) ?(no_current_operations = false) ?(secret = false) ?(hide_from_docs = false) ?(pool_internal = false) ~allowed_roles ?(map_keys_roles = []) ?(params = []) ?versioned_params ?lifecycle ?(doc_tags = []) ?forward_to () @@ -633,7 +633,7 @@ let call ~name ?(doc = "") ?(in_oss_since = Some "3.0.3") ?result ; msg_db_only= db_only ; msg_release= call_release ; msg_lifecycle= Lifecycle.from lifecycle - ; msg_has_effect= effect + ; msg_has_effect= has_effect ; msg_tag= tag ; msg_obj_name= "" ; msg_force_custom= None @@ -659,8 +659,8 @@ let operation_enum x = (** Make an object field record *) let field ?(in_oss_since = Some "3.0.3") ?(internal_only = false) ?(ignore_foreign_key = false) ?(writer_roles = None) ?(reader_roles = None) - ?(qualifier = RW) ?(ty = String) ?(effect = false) ?(default_value = None) - ?(persist = true) ?(map_keys_roles = []) + ?(qualifier = RW) ?(ty = String) ?(has_effect = false) + ?(default_value = None) ?(persist = true) ?(map_keys_roles = []) ?(* list of (key_name,(writer_roles)) for a map field *) lifecycle ?(doc_tags = []) name desc = let lifecycle = @@ -695,7 +695,7 @@ let field ?(in_oss_since = Some "3.0.3") ?(internal_only = false) ; full_name= [name] ; field_description= desc ; field_persist= persist - ; field_has_effect= effect + ; field_has_effect= has_effect ; field_ignore_foreign_key= ignore_foreign_key ; field_setter_roles= writer_roles ; field_getter_roles= reader_roles diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 84b329890b8..f0d0856f9e4 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2620,7 +2620,7 @@ let t = ) ] "Creators of VMs and templates may store version information here." - ; field ~effect:true ~ty:Bool "is_a_template" + ; field ~has_effect:true ~ty:Bool "is_a_template" ~lifecycle: [ ( Published @@ -2815,7 +2815,7 @@ let t = ~ty:String "recommendations" "An XML specification of recommended values and ranges for \ properties of this VM" - ; field ~effect:true ~in_oss_since:None + ; field ~has_effect:true ~in_oss_since:None ~ty:(Map (String, String)) ~lifecycle: [ diff --git a/ocaml/libs/timeslice/timeslice.ml b/ocaml/libs/timeslice/timeslice.ml index c414b321d64..55b888871df 100644 --- a/ocaml/libs/timeslice/timeslice.ml +++ b/ocaml/libs/timeslice/timeslice.ml @@ -65,7 +65,8 @@ let periodic = let set ?(sampling_rate = 1e-4) interval = Atomic.set yield_interval (Mtime.Span.of_float_ns @@ (interval *. 1e9) |> Option.get) ; - Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic + let _ = Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic in + () let clear () = Gc.Memprof.stop () ; From cb8fad42ecd8241da31a030d46524c4b68af3846 Mon Sep 17 00:00:00 2001 From: Gabriel Buica Date: Mon, 8 Sep 2025 14:34:04 +0100 Subject: [PATCH 486/492] XSI-1987 & CA-416462: Fix RPU host evacuation version check Rolling pool upgrades should only allow VMs to be evacuated to hosts that have the same or higher versions of xapi build or xen. Previously, the check was done only for platform version. Signed-off-by: Gabriel Buica --- ocaml/tests/test_helpers.ml | 133 +++++++++++++++++++++++++++++++++ ocaml/xapi/db_gc.ml | 8 +- ocaml/xapi/helpers.ml | 144 +++++++++++++++++++++++------------- ocaml/xapi/xapi_globs.ml | 4 + ocaml/xapi/xapi_host.ml | 9 ++- ocaml/xapi/xapi_vm.ml | 2 +- 6 files changed, 244 insertions(+), 56 deletions(-) diff --git a/ocaml/tests/test_helpers.ml b/ocaml/tests/test_helpers.ml index b856bb363e3..aecd8d4b15a 100644 --- a/ocaml/tests/test_helpers.ml +++ b/ocaml/tests/test_helpers.ml @@ -466,6 +466,138 @@ module RunInParallel = Generic.MakeStateless (struct ] end) +module Version = struct + let test_compare_int_list () = + let test_cases = + [ + ("Equal Lists", [1; 2; 3], [1; 2; 3], 0) + ; ("Empty Lists", [], [], 0) + ; ("'a' is smaller (first element)", [1; 10; 100], [2; 0; 0], -1) + ; ("'a' is smaller (later element)", [1; 2; 3], [1; 2; 4], -1) + ; ("'a' is greater (first element)", [5; 1; 1], [2; 10; 10], 1) + ; ("'a' is greater (later element)", [1; 3; 3], [1; 2; 4], 1) + ; ("Lists with negative numbers", [0; -5; 10], [0; -2; -10], -1) + ; ("Single element lists (equal)", [42], [42], 0) + ; ("Single element lists (unequal)", [42], [43], -1) + ; ("Different number of element in lists", [25; 27], [25; 27; 1], -1) + ] + in + let test_compare (description, list1, list2, expected) = + let actual = Helpers.compare_int_lists list1 list2 in + let description = Printf.sprintf "compate_int_lists: %s" description in + Alcotest.(check int) description expected actual + in + List.iter test_compare test_cases + + let test_version_numbers_of_string () = + let test_cases = + [ + ( "Standard major.minor.patch version, e.g. xapi build version stored \ + in the database" + , "25.30.0" + , [25; 30; 0] + ) + ; ( "Dev build version, e.g. xapi build version stored in the database" + , "25.30.0.6.gb239bd75a" + , [25; 30; 0; 6] + ) + ; ( "Version with a patch identifier e.g. xen versions stored in the \ + database" + , "25.15.0-13" + , [25; 15; 0; 13] + ) + ; ("Default version", "0.0.0", [0; 0; 0]) + ] + in + let test_version_numbers (description, version_string, expected) = + let actual = Helpers.version_numbers_of_string version_string in + let description = + Printf.sprintf "version_numbers_of_string: %s" description + in + Alcotest.(check @@ list int) description expected actual + in + List.iter test_version_numbers test_cases + + let test_compare_versions () = + let sw_vers_a = + Xapi_globs.[(_platform_version, "2.4.0"); (_xen_version, "4.14.0-13")] + in + let sw_vers_b = Xapi_globs.[(_xen_version, "4.13.0-13")] in + let test_cases = + Xapi_globs. + [ + ( "Software versions 'b' are missing platform version" + , _platform_version + , sw_vers_a + , sw_vers_b + , 1 + ) + ; ( "Software versions 'a' are missing platform version" + , _platform_version + , sw_vers_b + , sw_vers_a + , -1 + ) + ; ( "xen version exists in both (`a` is greater)" + , _xen_version + , sw_vers_a + , sw_vers_b + , 1 + ) + ; ( "xapi build version is missing from both (equal)" + , _xapi_build_version + , sw_vers_a + , sw_vers_b + , 0 + ) + ] + in + let test_compare (description, key, value_a, value_b, expected) = + let actual = Helpers.compare_versions ~version_key:key value_a value_b in + let description = Printf.sprintf "compare_versions: %s" description in + Alcotest.(check int) description expected actual + in + List.iter test_compare test_cases + + let test_compare_all_versions () = + let current = + Xapi_globs.[(_platform_version, "8.1.0"); (_xen_version, "4.13.0-15")] + in + let newer = + Xapi_globs.[(_platform_version, "8.2.0"); (_xen_version, "4.13.0-15")] + in + let mixed = + Xapi_globs.[(_platform_version, "8.2.0"); (_xen_version, "4.12.0-15")] + in + let test_cases = + [ + ("Newer is greater or equal than Current", newer, current, true) + ; ("Current is greater or equal than Current", current, current, true) + ; ("Current is not greater or equal than Newer", current, newer, false) + ; ("Mixed is not greater or equal then Current", mixed, current, false) + ; ("Current is not greater or equal than Mixed", current, mixed, false) + ] + in + let test_compare (description, vers_a, vers_b, expected) = + let actual = + Helpers.compare_all_versions ~is_greater_or_equal:vers_a ~than:vers_b + in + let description = Printf.sprintf "compare_all_versions: %s" description in + Alcotest.(check bool) description expected actual + in + List.iter test_compare test_cases + + let test = + [ + ("Compare int list", `Quick, test_compare_int_list) + ; ("Version numbers from string", `Quick, test_version_numbers_of_string) + ; ("Compare versions", `Quick, test_compare_versions) + ; ("Compare all versions", `Quick, test_compare_all_versions) + ] + + let tests = [("Version compare tests", test)] +end + let tests = make_suite "helpers_" [ @@ -476,3 +608,4 @@ let tests = ; ("assert_is_valid_cidr", CIDRCheckers.tests) ; ("run_in_parallel", RunInParallel.tests) ] + @ Version.tests diff --git a/ocaml/xapi/db_gc.ml b/ocaml/xapi/db_gc.ml index c8c68309369..50e2a23a6a1 100644 --- a/ocaml/xapi/db_gc.ml +++ b/ocaml/xapi/db_gc.ml @@ -165,16 +165,18 @@ let detect_rolling_upgrade ~__context = in (* Resynchronise *) if actually_in_progress <> pool_says_in_progress then ( - let platform_versions = + let host_versions = List.map (fun host -> - Helpers.version_string_of ~__context (Helpers.LocalObject host) + Helpers.get_software_versions ~__context + (Helpers.LocalObject host) + |> Helpers.versions_string_of ) (Db.Host.get_all ~__context) in debug "xapi platform version = %s; host platform versions = [ %s ]" (Xapi_version.platform_version ()) - (String.concat "; " platform_versions) ; + (String.concat "; " host_versions) ; warn "Pool thinks rolling upgrade%s in progress but Host version \ numbers indicate otherwise; correcting" diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 35909e5d5ed..9807c4540d4 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -867,12 +867,18 @@ let assert_we_are_master ~__context = ) (* Host version compare helpers *) -let compare_int_lists : int list -> int list -> int = +let rec compare_int_lists : int list -> int list -> int = fun a b -> - let first_non_zero is = - List.fold_left (fun a b -> if a <> 0 then a else b) 0 is - in - first_non_zero (List.map2 compare a b) + match (a, b) with + | [], [] -> + 0 + | [], _ -> + -1 + | _, [] -> + 1 + | x :: xs, y :: ys -> + let r = compare x y in + if r <> 0 then r else compare_int_lists xs ys let group_by f list = let evaluated_list = List.map (fun x -> (x, f x)) list in @@ -913,39 +919,69 @@ let sort_by_schwarzian ?(descending = false) f list = |> List.sort (fun (_, x') (_, y') -> comp x' y') |> List.map (fun (x, _) -> x) -let version_string_of : __context:Context.t -> [`host] api_object -> string = - fun ~__context host -> - try - let software_version = - match host with - | LocalObject host_ref -> - Db.Host.get_software_version ~__context ~self:host_ref - | RemoteObject (rpc, session_id, host_ref) -> - Client.Client.Host.get_software_version ~rpc ~session_id - ~self:host_ref - in - List.assoc Xapi_globs._platform_version software_version - with Not_found -> Xapi_globs.default_platform_version +let version_keys_list = + Xapi_globs.[_platform_version; _xapi_build_version; _xen_version] -let version_of : __context:Context.t -> [`host] api_object -> int list = - fun ~__context host -> - let vs = version_string_of ~__context host in - List.map int_of_string (String.split_on_char '.' vs) +let get_software_versions ~__context host = + ( match host with + | LocalObject self -> + Db.Host.get_software_version ~__context ~self + | RemoteObject (rpc, session_id, self) -> + Client.Client.Host.get_software_version ~rpc ~session_id ~self + ) + |> List.filter (fun (k, _) -> List.mem k version_keys_list) + +let versions_string_of : (string * string) list -> string = + fun ver_list -> + ver_list + |> List.map (fun (k, v) -> Printf.sprintf "%s: %s" k v) + |> String.concat "," + +let version_numbers_of_string version_string = + ( match String.split_on_char '-' version_string with + | [standard_version; patch] -> + String.split_on_char '.' standard_version @ [patch] + | [standard_version] -> + String.split_on_char '.' standard_version + | _ -> + ["0"; "0"; "0"] + ) + |> List.filter_map int_of_string_opt -(* Compares host versions, analogous to Stdlib.compare. *) -let compare_host_platform_versions : - __context:Context.t -> [`host] api_object -> [`host] api_object -> int = - fun ~__context host_a host_b -> - let version_of = version_of ~__context in - compare_int_lists (version_of host_a) (version_of host_b) +let version_of : version_key:string -> (string * string) list -> int list = + fun ~version_key versions_list -> + List.assoc_opt version_key versions_list + |> Option.value ~default:"0.0.0" + |> version_numbers_of_string -let max_version_in_pool : __context:Context.t -> int list = +(* Compares host versions, analogous to Stdlib.compare. *) +let compare_versions : + version_key:string + -> (string * string) list + -> (string * string) list + -> int = + fun ~version_key sw_ver_a sw_ver_b -> + let version_a = version_of ~version_key sw_ver_a in + let version_b = version_of ~version_key sw_ver_b in + compare_int_lists version_a version_b + +let compare_all_versions ~is_greater_or_equal:a ~than:b = + List.for_all + (fun version_key -> compare_versions ~version_key a b >= 0) + version_keys_list + +let max_version_in_pool : __context:Context.t -> (string * string) list = fun ~__context -> let max_version a b = - if a = [] then b else if compare_int_lists a b > 0 then a else b + if a = [] then + b + else if compare_all_versions ~is_greater_or_equal:a ~than:b then + a + else + b and versions = List.map - (fun host_ref -> version_of ~__context (LocalObject host_ref)) + (fun host_ref -> get_software_versions ~__context (LocalObject host_ref)) (Db.Host.get_all ~__context) in List.fold_left max_version [] versions @@ -953,21 +989,30 @@ let max_version_in_pool : __context:Context.t -> int list = let host_has_highest_version_in_pool : __context:Context.t -> host:[`host] api_object -> bool = fun ~__context ~host -> - let host_version = version_of ~__context host + let host_versions = get_software_versions ~__context host and max_version = max_version_in_pool ~__context in - compare_int_lists host_version max_version >= 0 + compare_all_versions ~is_greater_or_equal:host_versions ~than:max_version let host_versions_not_decreasing ~__context ~host_from ~host_to = - compare_host_platform_versions ~__context host_from host_to <= 0 + let sw_vers_from = get_software_versions ~__context host_from in + let sw_vers_to = get_software_versions ~__context host_to in + compare_all_versions ~is_greater_or_equal:sw_vers_to ~than:sw_vers_from -let is_platform_version_same_on_master ~__context ~host = +let are_host_versions_same_on_master_inner ~__context ~host ~master = if is_pool_master ~__context ~host then true else - let master = get_master ~__context in - compare_host_platform_versions ~__context (LocalObject master) - (LocalObject host) - = 0 + let sw_ver_master = get_software_versions ~__context (LocalObject master) in + let sw_ver_host = get_software_versions ~__context (LocalObject host) in + List.for_all + (fun version_key -> + compare_versions ~version_key sw_ver_master sw_ver_host = 0 + ) + version_keys_list + +let are_host_versions_same_on_master ~__context ~host = + let master = get_master ~__context in + are_host_versions_same_on_master_inner ~__context ~host ~master let maybe_raise_vtpm_unimplemented func message = if not !ignore_vtpm_unimplemented then ( @@ -975,8 +1020,8 @@ let maybe_raise_vtpm_unimplemented func message = raise Api_errors.(Server_error (not_implemented, [message])) ) -let assert_platform_version_is_same_on_master ~__context ~host ~self = - if not (is_platform_version_same_on_master ~__context ~host) then +let assert_host_versions_are_same_on_master ~__context ~host ~self = + if not (are_host_versions_same_on_master ~__context ~host) then raise (Api_errors.Server_error ( Api_errors.vm_host_incompatible_version @@ -1002,15 +1047,14 @@ let assert_host_has_highest_version_in_pool : let pool_has_different_host_platform_versions ~__context = let all_hosts = Db.Host.get_all ~__context in - let platform_versions = - List.map - (fun host -> version_string_of ~__context (LocalObject host)) - all_hosts - in - let is_different_to_me platform_version = - platform_version <> Xapi_version.platform_version () - in - List.exists is_different_to_me platform_versions + let master = get_master ~__context in + not + (List.for_all + (fun host -> + are_host_versions_same_on_master_inner ~__context ~host ~master + ) + all_hosts + ) (* Checks that a host has a PBD for a particular SR (meaning that the SR is visible to the host) *) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 3688478dce8..ce2f9aeb0f4 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -151,6 +151,10 @@ let _dbv = "dbv" let _db_schema = "db_schema" +let _xapi_build_version = "xapi_build" + +let _xen_version = "xen" + (* When comparing two host versions, always treat a host that has platform_version defined as newer * than any host that does not have platform_version defined. * Substituting this default when a host does not have platform_version defined will be acceptable, diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 1bf3e4d9b6a..6c714f24f1b 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -285,13 +285,18 @@ let compute_evacuation_plan_no_wlb ~__context ~host ?(ignore_ha = false) () = the source host. So as long as host versions aren't decreasing, we're allowed to migrate VMs between hosts. *) debug "evacuating host version: %s" - (Helpers.version_string_of ~__context (Helpers.LocalObject host)) ; + (Helpers.get_software_versions ~__context (Helpers.LocalObject host) + |> Helpers.versions_string_of + ) ; let target_hosts = List.filter (fun target -> debug "host %s version: %s" (Db.Host.get_hostname ~__context ~self:target) - (Helpers.version_string_of ~__context (Helpers.LocalObject target)) ; + Helpers.( + get_software_versions ~__context (LocalObject target) + |> versions_string_of + ) ; Helpers.host_versions_not_decreasing ~__context ~host_from:(Helpers.LocalObject host) ~host_to:(Helpers.LocalObject target) diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 68d07bfac11..069e840731a 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -65,7 +65,7 @@ let update_allowed_operations ~__context ~self = let assert_can_boot_here ~__context ~self ~host = let snapshot = Db.VM.get_record ~__context ~self in if Helpers.rolling_upgrade_in_progress ~__context then - Helpers.assert_platform_version_is_same_on_master ~__context ~host ~self ; + Helpers.assert_host_versions_are_same_on_master ~__context ~host ~self ; assert_can_boot_here ~__context ~self ~host ~snapshot ~do_cpuid_check:true () let retrieve_wlb_recommendations ~__context ~vm = From 0e4ce01aa8e63f1ce60b0f6c8926a1b77f3ff7e5 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Mon, 22 Sep 2025 13:21:25 +0800 Subject: [PATCH 487/492] CA-417390: No RRD metric for vGPU migration with local storage The "vgpu_map" can be empty for an intra-pool migration. This was missed previously because XenCenter indeed prepares non-empty "vgpu_map" for intra-pool migration with shared storage. But it prepares empty "vgpu_map" for intra-pool migration with local storage. Signed-off-by: Ming Lu --- ocaml/xapi/message_forwarding.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 5b3fb078d47..4cc2dd70a50 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -2624,7 +2624,7 @@ functor assert_can_migrate ~__context ~vm ~dest ~live ~vdi_map ~vif_map ~vgpu_map ~options ) ; - if vgpu_map <> [] then + if Db.VM.get_VGPUs ~__context ~self:vm <> [] then Xapi_stats.incr_pool_vgpu_migration_count () ; forward_migrate_send () ) From bdf06bca7534fbc0c4fc3cee3408a51a22615226 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 19 Sep 2025 16:05:36 +0100 Subject: [PATCH 488/492] ocaml: prepare formatting for ocamlformat 0.27.0 This changes comments as well as the location of some comments,otherwise ocamlformat is unable to process files Signed-off-by: Pau Ruiz Safont --- ocaml/database/db_cache_impl.ml | 32 +++++++++---------- ocaml/database/db_secret_string.mli | 8 ++--- ocaml/forkexecd/lib/fe_systemctl.ml | 10 +++--- ocaml/forkexecd/src/child.ml | 2 +- ocaml/libs/pciutil/pciutil.ml | 7 ++-- ocaml/libs/tracing/tracing_export.mli | 24 +++++++------- ocaml/networkd/bin/network_monitor_thread.ml | 2 +- ocaml/xapi-aux/kerberos_encryption_types.ml | 14 ++++---- ocaml/xapi-idl/lib/coverage/enabled.ml | 8 ++--- ocaml/xapi-types/secretString.mli | 2 +- ocaml/xapi/certificates.ml | 16 +++++----- ocaml/xapi/extauth_plugin_ADpbis.ml | 7 ++-- ocaml/xapi/extauth_plugin_ADwinbind.ml | 10 +++--- ocaml/xapi/message_forwarding.ml | 4 +-- ocaml/xapi/vm_evacuation.ml | 2 +- ocaml/xapi/xapi_clustering.ml | 2 +- ocaml/xapi/xapi_guest_agent.ml | 4 +-- ocaml/xapi/xapi_ha_vm_failover.ml | 2 +- ocaml/xapi/xapi_host.ml | 6 ++-- ocaml/xapi/xapi_pif.ml | 2 +- ocaml/xapi/xapi_pool.ml | 6 ++-- ocaml/xapi/xapi_session.ml | 3 +- ocaml/xapi/xapi_vm.ml | 12 +++---- ocaml/xapi/xapi_vm_helpers.ml | 8 +++-- ocaml/xapi/xapi_vmss.ml | 2 +- ocaml/xapi/xapi_vusb_helpers.ml | 10 +++--- ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml | 2 +- ocaml/xenopsd/xc/domain.ml | 2 +- unixpwd/src/unixpwd.mli | 2 +- 29 files changed, 108 insertions(+), 103 deletions(-) diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index 050d43f0504..97e1def4acb 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -482,24 +482,24 @@ let spawn_db_flush_threads () = try Thread.delay Db_backend.db_FLUSH_TIMER ; (* If I have some writing capacity left in this write period then consider doing a write; or - if the connection is not write-limited then consider doing a write too. - We also have to consider doing a write if exit_on_next_flush is set: because when this is - set (by a signal handler) we want to do a flush whether or not our write limit has been - exceeded. + if the connection is not write-limited then consider doing a write too. + We also have to consider doing a write if exit_on_next_flush is set: because when this is + set (by a signal handler) we want to do a flush whether or not our write limit has been + exceeded. *) + (* always flush straight away; this request is urgent + otherwise, we only write if + (i) "coalesscing period has come to an end"; and + (ii) "write limiting requirements are met": *) ( if !Db_connections.exit_on_next_flush - (* always flush straight away; this request is urgent *) - || (* otherwise, we only write if (i) "coalesscing period has come to an end"; and (ii) "write limiting requirements are met": *) - (not (in_coallescing_period ())) - (* see (i) above *) - && (!my_writes_this_period - < dbconn.Parse_db_conf.write_limit_write_cycles - || dbconn.Parse_db_conf.mode - = Parse_db_conf.No_limit - (* (ii) above *) - ) - then (* debug "[%s] considering flush" db_path; *) + || (not (in_coallescing_period ())) + && (!my_writes_this_period + < dbconn.Parse_db_conf.write_limit_write_cycles + || dbconn.Parse_db_conf.mode + = Parse_db_conf.No_limit + ) + then let was_anything_flushed = Xapi_stdext_threads.Threadext.Mutex.execute Db_lock.global_flush_mutex (fun () -> @@ -509,7 +509,7 @@ let spawn_db_flush_threads () = if was_anything_flushed then ( my_writes_this_period := !my_writes_this_period + 1 ; (* when we do a write, reset the coallesce_period_start to now -- recall that this - variable tracks the time since last write *) + variable tracks the time since last write *) coallesce_period_start := Unix.gettimeofday () ) ) ; diff --git a/ocaml/database/db_secret_string.mli b/ocaml/database/db_secret_string.mli index f04812ebad3..e0587875a47 100644 --- a/ocaml/database/db_secret_string.mli +++ b/ocaml/database/db_secret_string.mli @@ -13,10 +13,10 @@ *) (* Prevent direct conversions to string to avoid accidental misuse. - * It is still possible to convert it to Rpc.t and recover it that way, - * it is not a protection against willfully recovering the protected string - * (we do need to send these as parameters in RPCs). - * *) + It is still possible to convert it to Rpc.t and recover it that way, + it is not a protection against willfully recovering the protected string + (we do need to send these as parameters in RPCs). +*) (** a type with no direct conversions to string *) type t diff --git a/ocaml/forkexecd/lib/fe_systemctl.ml b/ocaml/forkexecd/lib/fe_systemctl.ml index 84350c167a4..00b4371445d 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.ml +++ b/ocaml/forkexecd/lib/fe_systemctl.ml @@ -107,10 +107,10 @@ let show ~service = let stop ~service = action ~service "stop" ; (* Stopping shouldn't fail because it should fall back to SIGKILL which should almost always work, - * unless there is a kernel bug that keeps a process stuck. - * In the unlikely scenario that this does fail we leave the transient service file behind - * so that the failure can be investigated. - * *) + unless there is a kernel bug that keeps a process stuck. + In the unlikely scenario that this does fail we leave the transient service file behind + so that the failure can be investigated. + *) let status = show ~service in (* allow systemd to garbage-collect the status and the unit, preventing leaks. * See CollectMode in systemd.unit(5) for details. *) @@ -162,7 +162,7 @@ let start_transient ?env ?properties ?(exec_ty = Type.Simple) ~service cmd args (* If start failed we do not know what state the service is in: * try to stop it and clean up. * Stopping could fail as well, in which case report the original exception. - * *) + *) ( try let (_ : status) = stop ~service in () diff --git a/ocaml/forkexecd/src/child.ml b/ocaml/forkexecd/src/child.ml index 5f79f2fb6c9..76a1611ee86 100644 --- a/ocaml/forkexecd/src/child.ml +++ b/ocaml/forkexecd/src/child.ml @@ -274,7 +274,7 @@ let run state comms_sock fd_sock fd_sock_path = let (_ : int list) = Unix.sigprocmask Unix.SIG_BLOCK [Sys.sigchld] in (* First test whether the child has exited - if it has then report this - * via the socket and exit. *) + * via the socket and exit. *) match Unix.waitpid [Unix.WNOHANG] result with | pid, status when pid = result -> report_child_exit comms_sock args result status ; diff --git a/ocaml/libs/pciutil/pciutil.ml b/ocaml/libs/pciutil/pciutil.ml index ca63fb5aa13..757ea0ce0e4 100644 --- a/ocaml/libs/pciutil/pciutil.ml +++ b/ocaml/libs/pciutil/pciutil.ml @@ -25,13 +25,14 @@ let parse_from file vendor device = let vendor_str = ref (unknown_vendor vendor) and device_str = ref (unknown_device device) in (* CA-26771: As we parse the file we keep track of the current vendor. - When we find a device match we only accept it if it's from the right vendor; it doesn't make - sense to pair vendor 2's device with vendor 1. *) + When we find a device match we only accept it if it's from the right + vendor; it doesn't make sense to pair vendor 2's device with vendor 1. *) let current_xvendor = ref "" in Unixext.readfile_line (fun line -> + (* ignore subvendors/subdevices, blank lines and comments *) if line = "" || line.[0] = '#' || (line.[0] = '\t' && line.[1] = '\t') - then (* ignore subvendors/subdevices, blank lines and comments *) + then () else if line.[0] = '\t' then ( if diff --git a/ocaml/libs/tracing/tracing_export.mli b/ocaml/libs/tracing/tracing_export.mli index a857a4f523d..0714b7107a9 100644 --- a/ocaml/libs/tracing/tracing_export.mli +++ b/ocaml/libs/tracing/tracing_export.mli @@ -1,16 +1,16 @@ (* -* Copyright (C) 2024 Cloud Software Group -* -* This program is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published -* by the Free Software Foundation; version 2.1 only. with the special -* exception on linking described in file LICENSE. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License for more details. -*) + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) (** [Tracing_export] is a module dedicated for the creation and management of threads that export the tracing data. diff --git a/ocaml/networkd/bin/network_monitor_thread.ml b/ocaml/networkd/bin/network_monitor_thread.ml index 1b15dbe2a42..540468dfdcb 100644 --- a/ocaml/networkd/bin/network_monitor_thread.ml +++ b/ocaml/networkd/bin/network_monitor_thread.ml @@ -125,7 +125,7 @@ let get_link_stats () = in List.map (fun link -> standardise_name (Link.get_name link)) links |> (* Only keep interfaces with prefixes on the whitelist, and exclude VLAN - devices (ethx.y). *) + devices (ethx.y). *) List.filter (fun name -> is_whitelisted name && not (is_vlan name)) in Cache.free cache ; Socket.close s ; Socket.free s ; links diff --git a/ocaml/xapi-aux/kerberos_encryption_types.ml b/ocaml/xapi-aux/kerberos_encryption_types.ml index 8bb63004677..d88c0905b7e 100644 --- a/ocaml/xapi-aux/kerberos_encryption_types.ml +++ b/ocaml/xapi-aux/kerberos_encryption_types.ml @@ -15,18 +15,18 @@ (* Kerberos support several different encrytion types * winbind support it as strong, legacy and all * details, https://www.samba.org/samba/docs/current/man-html/smb.conf.5.html - * *) + *) module Winbind = struct type t = Strong | Legacy | All (* - * [X] 0x00000001 DES-CBC-CRC - * [X] 0x00000002 DES-CBC-MD5 - * [X] 0x00000004 RC4-HMAC - * [X] 0x00000008 AES128-CTS-HMAC-SHA1-96 - * [X] 0x00000010 AES256-CTS-HMAC-SHA1-96 - * *) + * [X] 0x00000001 DES-CBC-CRC + * [X] 0x00000002 DES-CBC-MD5 + * [X] 0x00000004 RC4-HMAC + * [X] 0x00000008 AES128-CTS-HMAC-SHA1-96 + * [X] 0x00000010 AES256-CTS-HMAC-SHA1-96 + *) let des_cbc_crc = 0x1 diff --git a/ocaml/xapi-idl/lib/coverage/enabled.ml b/ocaml/xapi-idl/lib/coverage/enabled.ml index 461221db512..11ac510f2d3 100644 --- a/ocaml/xapi-idl/lib/coverage/enabled.ml +++ b/ocaml/xapi-idl/lib/coverage/enabled.ml @@ -103,12 +103,12 @@ module Dispatcher = struct |> (* filter out ourselves *) List.filter (fun q -> self <> q) |> (* best-effort: collect and return all non-failed results, log - errors *) + errors *) List.rev_map (rpc_ignore_err ~t ~body) |> (* multiple return values converted to a single string, suitable - for use in a command like: mv $(message-cli call - org.xen.xapi.coverage.dispatch --timeout 60 --body 'dump - {jobid}') /tmp/coverage/ *) + for use in a command like: mv $(message-cli call + org.xen.xapi.coverage.dispatch --timeout 60 --body 'dump + {jobid}') /tmp/coverage/ *) String.concat " " |> ok ) diff --git a/ocaml/xapi-types/secretString.mli b/ocaml/xapi-types/secretString.mli index 6d85364d04e..1f14b6a9e95 100644 --- a/ocaml/xapi-types/secretString.mli +++ b/ocaml/xapi-types/secretString.mli @@ -16,7 +16,7 @@ * It is still possible to convert it to Rpc.t and recover it that way, * it is not a protection against willfully recovering the protected string * (we do need to send these as parameters in RPCs). - * *) + *) (** a type with no direct conversions to string *) type t diff --git a/ocaml/xapi/certificates.ml b/ocaml/xapi/certificates.ml index f69497ce118..f0a75779642 100644 --- a/ocaml/xapi/certificates.ml +++ b/ocaml/xapi/certificates.ml @@ -20,14 +20,14 @@ module D = Debug.Make (struct let name = "certificates" end) open D (* Certificate locations: - * a) stunnel external = /etc/xensource/xapi-ssl.pem - * b) stunnel SNI (internal) = /etc/xensource/xapi-pool-tls.pem - * c) user trusted cert folder = /etc/stunnel/certs/ - * d) internal trusted cert folder = /etc/stunnel/certs-pool/ - * e) appliance trusted bundle = /etc/stunnel/xapi-stunnel-ca-bundle.pem - * f) host-in-pool trusted bundle = /etc/stunnel/xapi-pool-ca-bundle.pem - * - * Note that the bundles (e) and (f) are generated automatically using the contents of (c) and (d) respectively *) + * a) stunnel external = /etc/xensource/xapi-ssl.pem + * b) stunnel SNI (internal) = /etc/xensource/xapi-pool-tls.pem + * c) user trusted cert folder = /etc/stunnel/certs/ + * d) internal trusted cert folder = /etc/stunnel/certs-pool/ + * e) appliance trusted bundle = /etc/stunnel/xapi-stunnel-ca-bundle.pem + * f) host-in-pool trusted bundle = /etc/stunnel/xapi-pool-ca-bundle.pem + * + * Note that the bundles (e) and (f) are generated automatically using the contents of (c) and (d) respectively *) type t_trusted = CA_Certificate | CRL diff --git a/ocaml/xapi/extauth_plugin_ADpbis.ml b/ocaml/xapi/extauth_plugin_ADpbis.ml index 56f723ff6f3..ea2dedfccc6 100644 --- a/ocaml/xapi/extauth_plugin_ADpbis.ml +++ b/ocaml/xapi/extauth_plugin_ADpbis.ml @@ -942,10 +942,9 @@ module AuthADlw : Auth_signature.AUTH_MODULE = struct Db.Host.get_external_auth_service_name ~__context ~self:host ) in - if - List.mem_assoc "domain" config_params - (* legacy test: do we have domain name in config? *) - then (* then config:domain must match service-name *) + (* legacy test: do we have domain name in config? + then config:domain must match service-name *) + if List.mem_assoc "domain" config_params then let _domain = List.assoc "domain" config_params in if service_name <> _domain then raise diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index 6def6c5bb64..214a941fe07 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -208,7 +208,7 @@ module Ldap = struct (* * Escape characters according to * https://docs.microsoft.com/en-gb/windows/win32/adsi/search-filter-syntax?redirectedfrom=MSDN#special-characters - * *) + *) let reg_star = {|*|} |> Re.str |> Re.compile @@ -402,7 +402,7 @@ module Ldap = struct let* stdout = try (* Query KDC instead of use domain here - * Just in case cannot resolve domain name from DNS *) + * Just in case cannot resolve domain name from DNS *) let args = [ "ads" @@ -553,7 +553,7 @@ module Wbinfo = struct * Name : UCC * Alt_Name : ucc.local * SID : S-1-5-21-2850064427-2368465266-4270348630 - * *) + *) let args = ["--domain-info"; from_name] in let* stdout = call_wbinfo args in let key = @@ -614,7 +614,7 @@ module Wbinfo = struct * CHILD1 * GRANDCHILD * UDDCHILD1 - * *) + *) let args = ["--all-domains"] in let* stdout = call_wbinfo args in Ok @@ -1655,7 +1655,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct * the joined domain (with 1 way trust ) , just return the default value * This is NOT a regression issue of PBIS * PBIS cannot handle such case neither - * *) + *) debug "Fallback to default value as no DC info in xapi database" ; Ok default_account in diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 5b3fb078d47..6ddb32b64bc 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -2579,7 +2579,7 @@ functor forward_vm_op ~local_fn ~__context ~vm ~remote_fn:(fun ~rpc ~session_id -> (* try InternalAsync.VM.migrate_send first to avoid long running idle stunnel connection - * fall back on Async.VM.migrate_send if slave doesn't support InternalAsync *) + * fall back on Async.VM.migrate_send if slave doesn't support InternalAsync *) Helpers.try_internal_async ~__context API.ref_VM_of_rpc (fun () -> Client.InternalAsync.VM.migrate_send ~rpc ~session_id ~vm @@ -6410,7 +6410,7 @@ functor let remote_fn = Client.Cluster_host.forget ~self in (* We need to ask another host that has a cluster host to mark it as dead. * We might've run force destroy and this host would no longer have a cluster host - * *) + *) let other_hosts = Db.Cluster.get_cluster_hosts ~__context ~self:cluster |> List.filter (( <> ) self) diff --git a/ocaml/xapi/vm_evacuation.ml b/ocaml/xapi/vm_evacuation.ml index 11a7560af8a..080da5a01ea 100644 --- a/ocaml/xapi/vm_evacuation.ml +++ b/ocaml/xapi/vm_evacuation.ml @@ -117,7 +117,7 @@ let ensure_no_vms ~__context ~rpc ~session_id ~evacuate_timeout = let shutdown vms = log_and_ignore_exn (fun () -> clean_shutdown vms) ; (* We can unplug the PBD if a VM is suspended or halted, but not if - * it is running or paused, i.e. "live" *) + * it is running or paused, i.e. "live" *) vms |> List.filter (fun self -> Xapi_vm_lifecycle_helpers.is_live ~__context ~self diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index c17b5eaf394..2553bc40202 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -338,7 +338,7 @@ let assert_cluster_host_quorate ~__context ~self = * achieved quorum yet if we have just booted and haven't seen enough hosts. * Do this via an API call rather than reading a field in the database, because the field in the * database could be out of date. - * *) + *) let result = Cluster_client.LocalClient.diagnostics (rpc ~__context) "assert_cluster_host_quorate" diff --git a/ocaml/xapi/xapi_guest_agent.ml b/ocaml/xapi/xapi_guest_agent.ml index edb56d64995..0de1ab163aa 100644 --- a/ocaml/xapi/xapi_guest_agent.ml +++ b/ocaml/xapi/xapi_guest_agent.ml @@ -94,7 +94,7 @@ let ( // ) = Filename.concat * * Add support for SR-IOV VF, so there are two kinds of vif_type, either to be * `vif` or `net-sriov-vf` - * *) + *) let networks path vif_type (list : string -> string list) = (* Find all ipv6 addresses under a path. *) let find_ipv6 path prefix = @@ -488,7 +488,7 @@ let all (lookup : string -> string option) (list : string -> string list) || guest_metrics_cached.can_use_hotplug_vif <> can_use_hotplug_vif (* Nb. we're ignoring the memory updates as far as the VM_guest_metrics API object is concerned. We are putting them into an RRD instead *) (* || - guest_metrics_cached.memory <> memory)*) + guest_metrics_cached.memory <> memory)*) then ( let gm = let existing = Db.VM.get_guest_metrics ~__context ~self in diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 19c84c4b7ed..998088d8f59 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -112,7 +112,7 @@ end = struct k x | Task (task, next) -> (* similar reasoning as above, when we get the result we need to chain the computations, - * refer to http://okmij.org/ftp/Computation/free-monad.html for a deeper theoretical explanation *) + * refer to http://okmij.org/ftp/Computation/free-monad.html for a deeper theoretical explanation *) Task (task, fun x -> next x >>= k) end diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 48730e26367..fee8d831619 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -497,7 +497,8 @@ let compute_evacuation_plan_wlb ~__context ~self = if Db.Host.get_control_domain ~__context ~self:target_host <> v && Db.Host.get_uuid ~__context ~self:resident_h = target_uuid - then (* resident host and migration host are the same. Reject this plan *) + (* resident host and migration host are the same. Reject this plan *) + then raise (Api_errors.Server_error ( Api_errors.wlb_malformed_response @@ -1782,7 +1783,6 @@ let enable_external_auth ~__context ~host ~config ~service_name ~auth_type = raise (Api_errors.Server_error (Api_errors.auth_unknown_type, [msg])) ) else (* if no auth_type is currently defined (it is an empty string), then we can set up a new one *) - (* we try to use the configuration to set up the new external authentication service *) (* we persist as much set up configuration now as we can *) @@ -2847,7 +2847,7 @@ let set_iscsi_iqn ~__context ~host ~value = * when you update the `iscsi_iqn` field we want to update `other_config`, * but when updating `other_config` we want to update `iscsi_iqn` too. * we have to be careful not to introduce an infinite loop of updates. - * *) + *) Db.Host.set_iscsi_iqn ~__context ~self:host ~value ; Db.Host.add_to_other_config ~__context ~self:host ~key:"iscsi_iqn" ~value ; Xapi_host_helpers.Configuration.set_initiator_name value diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index b618edade44..f5584fa8634 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -1138,7 +1138,7 @@ let start_of_day_best_effort_bring_up ~__context () = debug "Configured network backend: %s" (Network_interface.string_of_kind (Net.Bridge.get_kind dbg ())) ; (* Clear the state of the network daemon, before refreshing it by plugging - * the most important PIFs (see above). *) + * the most important PIFs (see above). *) Net.clear_state () ; List.iter (fun (pif, pifr) -> diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index fdbce5a595b..771f3a68243 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3066,8 +3066,10 @@ let disable_external_auth ~__context ~pool:_ ~config = debug "Failed to disable the external authentication of at least one \ host in the pool" ; - if String.starts_with ~prefix:Api_errors.auth_disable_failed err - then (* tagged exception *) + if + String.starts_with ~prefix:Api_errors.auth_disable_failed err + (* tagged exception *) + then raise (Api_errors.Server_error (Api_errors.pool_auth_prefix ^ err, [Ref.string_of host; msg]) diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index bc85146e223..e4b5a495a5f 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -426,12 +426,13 @@ let revalidate_external_session ~__context acc session = try (* guard: we only want to revalidate external sessions, where is_local_superuser is false *) (* Neither do we want to revalidate the special read-only external database sessions, since they can exist independent of external authentication. *) + (* 1. is the external authentication disabled in the pool? *) if not (Db.Session.get_is_local_superuser ~__context ~self:session || Xapi_database.Db_backend.is_session_registered (Ref.string_of session) ) - then (* 1. is the external authentication disabled in the pool? *) + then let master = Helpers.get_master ~__context in let auth_type = Db.Host.get_external_auth_type ~__context ~self:master in if auth_type = "" then ( diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 68d07bfac11..cff0e2d8338 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -1351,11 +1351,11 @@ let set_suspend_VDI ~__context ~self ~value = let dst_vdi = value in if src_vdi <> dst_vdi then ( (* - * We don't care if the future host can see current suspend VDI or not, but - * we want to make sure there's at least a host can see all the VDIs of the - * VM + the new suspend VDI. We raise an exception if there's no suitable - * host. - *) + * We don't care if the future host can see current suspend VDI or not, but + * we want to make sure there's at least a host can see all the VDIs of the + * VM + the new suspend VDI. We raise an exception if there's no suitable + * host. + *) let vbds = Db.VM.get_VBDs ~__context ~self in let vbds = List.filter (fun self -> not (Db.VBD.get_empty ~__context ~self)) vbds @@ -1615,7 +1615,7 @@ let nvram = Mutex.create () let set_NVRAM_EFI_variables ~__context ~self ~value = with_lock nvram (fun () -> (* do not use remove_from_NVRAM: we do not want to - * temporarily end up with an empty NVRAM in HA *) + * temporarily end up with an empty NVRAM in HA *) let key = "EFI-variables" in let nvram = Db.VM.get_NVRAM ~__context ~self in let value = (key, value) :: List.remove_assoc key nvram in diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 9556096fe4e..551a292f396 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -497,7 +497,7 @@ let has_non_allocated_vgpus ~__context ~self = * 4. Remove the list head from the remainding vGPU list of the VM * 5. Repeat step 2-4 until fail or the remainding list is empty * 6. Return success - * *) + *) let assert_gpus_available ~__context ~self ~host = let vgpus = Db.VM.get_VGPUs ~__context ~self in let vGPU_structs = List.map (Vgpuops.vgpu_of_ref ~__context) vgpus in @@ -1672,8 +1672,10 @@ let ensure_device_model_profile_present ~__context ~domain_type ~is_a_template let trad = Vm_platform.(device_model, fallback_device_model_stage_1) in if is_a_template then platform - else if (not needs_qemu) || List.mem_assoc Vm_platform.device_model platform - then (* upgrade existing Device Model entry *) + else if + (not needs_qemu) || List.mem_assoc Vm_platform.device_model platform + (* upgrade existing Device Model entry *) + then platform |> List.map (fun entry -> if entry = trad then default else entry) else (* only add device-model to an HVM VM platform if it is not already there *) diff --git a/ocaml/xapi/xapi_vmss.ml b/ocaml/xapi/xapi_vmss.ml index 611b729f6c4..ff32580365d 100644 --- a/ocaml/xapi/xapi_vmss.ml +++ b/ocaml/xapi/xapi_vmss.ml @@ -174,7 +174,7 @@ let set_type ~__context ~self ~value = (* Workaround for `param-set` calling `remove_from_schedule` first then `add_to_schedule` * In case `value` supplied is invalid for `add_to_schedule` it must not remove the key * We need the cache the original value before removing the key - * *) + *) let schedule_backup = ref [] let remove_from_schedule ~__context ~self ~key = diff --git a/ocaml/xapi/xapi_vusb_helpers.ml b/ocaml/xapi/xapi_vusb_helpers.ml index 5c17c5e8130..19298735a06 100644 --- a/ocaml/xapi/xapi_vusb_helpers.ml +++ b/ocaml/xapi/xapi_vusb_helpers.ml @@ -29,11 +29,11 @@ let valid_operations ~__context record _ref' : table = let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vUSB_current_operations in (* Policy: - * one operation at a time - * a running VM can do plug depending on whether the VUSB is already attached to VM. - * a running VM can do unplug depending on whether the VUSB is already attached to VM. - * - *) + * one operation at a time + * a running VM can do plug depending on whether the VUSB is already attached to VM. + * a running VM can do unplug depending on whether the VUSB is already attached to VM. + * + *) let table : table = Hashtbl.create 10 in List.iter (fun x -> Hashtbl.replace table x None) all_ops ; let set_errors (code : string) (params : string list) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml index bd31674a03a..72a840f9f15 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -69,7 +69,7 @@ let get_link_stats () = in List.map (fun link -> (standardise_name (Link.get_name link), link)) links |> (* Only keep interfaces with prefixes on the whitelist, and exclude VLAN - devices (ethx.y). *) + devices (ethx.y). *) List.filter (fun (name, _) -> is_whitelisted name && not (is_vlan name)) in let devs = diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 32dcb1f7d39..4af94d7b96c 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -389,7 +389,7 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept with _ -> let max_per_vif = 8 in (* 1 VIF takes up (256 rx entries + 256 tx entries) * 8 queues max - * 8 bytes per grant table entry / 4096 bytes size of frame *) + * 8 bytes per grant table entry / 4096 bytes size of frame *) let reasonable_per_vbd = 1 in (* (1 ring (itself taking up one granted page) + 1 ring * 32 requests * 11 grant refs contained in each * 8 bytes ) / diff --git a/unixpwd/src/unixpwd.mli b/unixpwd/src/unixpwd.mli index c191870388f..f613bf01d5d 100644 --- a/unixpwd/src/unixpwd.mli +++ b/unixpwd/src/unixpwd.mli @@ -30,7 +30,7 @@ val get : string -> string * /etc/shadow database if an entry exists, otherwise it tries to * obtain the password from the /etc/passwd database. It raises [Error] * if that fails. - * *) + *) val setpwd : string -> string -> unit From 1596ea956ff8c9f1a5e0602b796ea253fdedc0aa Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 22 Sep 2025 13:20:19 +0100 Subject: [PATCH 489/492] git-blame-ignore-revs: ignore previous, formatting commit Signed-off-by: Pau Ruiz Safont --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 721e2c63221..0b898836157 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -32,6 +32,7 @@ d6ab15362548b8fe270bd14d5153b8d94e1b15c0 b12cf444edea15da6274975e1b2ca6a7fce2a090 364c27f5d18ab9dd31825e67a93efabecad06823 d8b4de9076531dd13bdffa20cc10c72290a52356 +bdf06bca7534fbc0c4fc3cee3408a51a22615226 # ocp-indent d018d26d6acd4707a23288b327b49e44f732725e From 7af60c7877b6449280f9039516342ce5b7583bc6 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Wed, 17 Sep 2025 16:25:49 +0100 Subject: [PATCH 490/492] networkd: Remove usage of ovs-vlan-bug-workaround networkd had code to call ovs-vlan-bug-workaround, but this has been made obsolete as the bug in question only applies to kernels before 2.6.37, which is long out of support. We can safely remove the functionality associated with applying the workaround. Signed-off-by: Christian Pardillo Laursen --- ocaml/networkd/bin/network_server.ml | 8 +----- ocaml/networkd/lib/network_utils.ml | 43 +--------------------------- 2 files changed, 2 insertions(+), 49 deletions(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 3cc2df776b5..dee5edb0985 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -933,12 +933,6 @@ module Bridge = struct "standalone" ) in - let vlan_bug_workaround = - if List.mem_assoc "vlan-bug-workaround" other_config then - Some (List.assoc "vlan-bug-workaround" other_config = "true") - else - None - in let external_id = if List.mem_assoc "network-uuids" other_config then Some @@ -966,7 +960,7 @@ module Bridge = struct Option.iter (destroy_existing_vlan_ovs_bridge dbg name) vlan ; ignore (Ovs.create_bridge ?mac ~fail_mode ?external_id ?disable_in_band - ?igmp_snooping vlan vlan_bug_workaround name + ?igmp_snooping vlan name ) ; if igmp_snooping = Some true && not old_igmp_snooping then Ovs.inject_igmp_query ~name diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 2c3cdab9fb8..94ac2c35c5d 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -51,8 +51,6 @@ let ovs_ofctl = "/usr/bin/ovs-ofctl" let ovs_appctl = "/usr/bin/ovs-appctl" -let ovs_vlan_bug_workaround = "/usr/sbin/ovs-vlan-bug-workaround" - let brctl = ref "/sbin/brctl" let modprobe = "/sbin/modprobe" @@ -1352,44 +1350,6 @@ module Ovs = struct ) with _ -> warn "Failed to set max-idle=%d on OVS" t - let handle_vlan_bug_workaround override bridge = - (* This is a list of drivers that do support VLAN tx or rx acceleration, - but to which the VLAN bug workaround should not be applied. This could - be because these are known-good drivers (that is, they do not have any - of the bugs that the workaround avoids) or because the VLAN bug - workaround will not work for them and may cause other problems. - - This is a very short list because few drivers have been tested. *) - let no_vlan_workaround_drivers = ["bonding"] in - let phy_interfaces = - try - let interfaces = bridge_to_interfaces bridge in - List.filter Sysfs.is_physical interfaces - with _ -> [] - in - List.iter - (fun interface -> - let do_workaround = - match override with - | Some value -> - value - | None -> ( - match Sysfs.get_driver_name interface with - | None -> - Sysfs.has_vlan_accel interface - | Some driver -> - if List.mem driver no_vlan_workaround_drivers then - false - else - Sysfs.has_vlan_accel interface - ) - in - let setting = if do_workaround then "on" else "off" in - try ignore (call_script ovs_vlan_bug_workaround [interface; setting]) - with _ -> () - ) - phy_interfaces - let get_vlans name = try let vlans_with_uuid = @@ -1486,13 +1446,12 @@ module Ovs = struct ["--"; "--may-exist"; "add-port"; bridge; name] @ type_args let create_bridge ?mac ?external_id ?disable_in_band ?igmp_snooping - ~fail_mode vlan vlan_bug_workaround name = + ~fail_mode vlan name = let vlan_arg = match vlan with | None -> [] | Some (parent, tag) -> - handle_vlan_bug_workaround vlan_bug_workaround parent ; [parent; string_of_int tag] in let mac_arg = From 53d23bbacf66f71b31100f5bc99c54f86dc544d6 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 23 Sep 2025 15:33:26 +0100 Subject: [PATCH 491/492] networkd: Remove has_vlan_accel from network_utils Now that we have removed handle_vlan_bug_workaround, has_vlan_accel is no longer used anywhere, and it is no longer relevant. Therefore, we can safely remove it, and the function get_features that it calls. Signed-off-by: Christian Pardillo Laursen --- ocaml/networkd/lib/network_utils.ml | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 94ac2c35c5d..ca6153dae5a 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -272,25 +272,6 @@ module Sysfs = struct Result.Error (Fail_to_get_driver_name, "Failed to get driver name for: " ^ dev) - (** Returns the features bitmap for the driver for [dev]. The features bitmap - is a set of NETIF_F_ flags supported by its driver. *) - let get_features dev = - try Some (int_of_string (read_one_line (getpath dev "features"))) - with _ -> None - - (** Returns [true] if [dev] supports VLAN acceleration, [false] otherwise. *) - let has_vlan_accel dev = - let flag_NETIF_F_HW_VLAN_TX = 128 in - let flag_NETIF_F_HW_VLAN_RX = 256 in - let flag_NETIF_F_VLAN = - flag_NETIF_F_HW_VLAN_TX lor flag_NETIF_F_HW_VLAN_RX - in - match get_features dev with - | None -> - false - | Some features -> - features land flag_NETIF_F_VLAN <> 0 - let set_multicast_snooping bridge value = try let path = getpath bridge "bridge/multicast_snooping" in From c93e95712c34855da5275cf06965de332d4479bc Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Tue, 11 Feb 2025 12:00:00 +0100 Subject: [PATCH 492/492] Hugo docs: Add NUMA design docs Signed-off-by: Bernhard Kaindl --- .../features/NUMA/{index.md => _index.md} | 0 .../toolstack/features/NUMA/lazy-reclaim.md | 252 ++++++++++++++++++ .../features/NUMA/parallel-VM.build.md | 52 ++++ 3 files changed, 304 insertions(+) rename doc/content/toolstack/features/NUMA/{index.md => _index.md} (100%) create mode 100644 doc/content/toolstack/features/NUMA/lazy-reclaim.md create mode 100644 doc/content/toolstack/features/NUMA/parallel-VM.build.md diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/_index.md similarity index 100% rename from doc/content/toolstack/features/NUMA/index.md rename to doc/content/toolstack/features/NUMA/_index.md diff --git a/doc/content/toolstack/features/NUMA/lazy-reclaim.md b/doc/content/toolstack/features/NUMA/lazy-reclaim.md new file mode 100644 index 00000000000..b99c4dfdd1f --- /dev/null +++ b/doc/content/toolstack/features/NUMA/lazy-reclaim.md @@ -0,0 +1,252 @@ +--- +title: "Lazy memory reclaim" +weight: 10 +categories: + NUMA +--- +## Xen host memory scrubbing + +Xen does not immediately reclaim deallocated memory. +Instead, Xen has a host memory scrubber that runs lazily in +the background to reclaim recently deallocated memory. + +Thus, there is no guarantee that Xen has finished scrubbing +when `xenopsd` is being asked to build a domain. + +## Waiting for enough free host memory + +> [!info] +> In case reclaimed memory is not sufficient yet, before building +> a VM, `xenopsd` waits by polling until enough memory is reclaimed. +> See the +> [walk-through of Domain.build](../../../xenopsd/walkthroughs/VM.build/Domain.build.md#build_pre-prepare-building-the-vm) +> for more context. + +For this, with CA-39743, the implementation of Xenopsd's +[build_pre](https://github.com/xapi-project/xen-api/blob/073373ff2abfa386025f2b1eee7131520df76be9/ocaml/xenopsd/xc/domain.ml#L899-L964) +function was updated to +[call](https://github.com/xapi-project/xen-api/blob/073373ff2abfa386025f2b1eee7131520df76be9/ocaml/xenopsd/xc/domain.ml#L904) +[wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/073373ff2abfa386025f2b1eee7131520df76be9/ocaml/xenopsd/xc/domain.ml#L236-L272) +to wait for to Xen reclaim a sufficient amount of host memory +host-wide: + +```ml +let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = + let open Memory in + let uuid = get_uuid ~xc domid in + debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory" + (Uuidx.to_string uuid) domid memory.required_host_free_mib ; + (* CA-39743: Wait, if necessary, for the Xen scrubber to catch up. *) + if + not (wait_xen_free_mem ~xc (Memory.kib_of_mib memory.required_host_free_mib)) + then ( + error "VM = %s; domid = %d; Failed waiting for Xen to free %Ld MiB" + (Uuidx.to_string uuid) domid memory.required_host_free_mib ; + raise (Not_enough_memory (Memory.bytes_of_mib memory.required_host_free_mib)) + ) ; +``` + +This is the implementation of the function: + +```ml +let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib + : bool = + let open Memory in + let rec wait accumulated_wait_time_seconds = + let host_info = Xenctrl.physinfo xc in + let free_memory_kib = + kib_of_pages (Int64.of_nativeint host_info.Xenctrl.free_pages) + in + let scrub_memory_kib = + kib_of_pages (Int64.of_nativeint host_info.Xenctrl.scrub_pages) + in + (* At exponentially increasing intervals, write *) + (* a debug message saying how long we've waited: *) + if is_power_of_2 accumulated_wait_time_seconds then + debug + "Waited %i second(s) for memory to become available: %Ld KiB free, %Ld \ + KiB scrub, %Ld KiB required" + accumulated_wait_time_seconds free_memory_kib scrub_memory_kib + required_memory_kib ; + if + free_memory_kib >= required_memory_kib + (* We already have enough memory. *) + then + true + else if scrub_memory_kib = 0L (* We'll never have enough memory. *) then + false + else if + accumulated_wait_time_seconds >= maximum_wait_time_seconds + (* We've waited long enough. *) + then + false + else ( + Thread.delay 1.0 ; + wait (accumulated_wait_time_seconds + 1) + ) + in + wait 0 +``` + +## Waiting for enough free memory on NUMA nodes + +For NUMA, to improve the chances to make the new domain affine +to a single NUMA node, a similar algorighm could be utilized. + +This should be done directly before the NUMA placement algorithm +runs, or even as part of an improvement for it: + +The NUMA placement algorigthm calls the `numainfo` hypercall to +obtain the table of NUMA nodes along the available memory on each +node and the distance matrix between the NUMA nodes. + +If the reported free memory of the host is lower than would be +expected at that moment, this might be an indidation that some +memory might not be scrubbed yet. Another indication might be +if the amount of free memory is increasing betwen two checks. + +In such cases, when Xen has yet to reclaim more memory, or if +other domains are in the process of being shut down, it might +likewise make sense to wait until more memory is free to obtain +NUMA placement or a better NUMA placement for the new domain. + +In such situations, assuming for example that the NUMA placement +failed to obtain a suitable NUMA node affinity for the new domain, +the smallest possible change would be to simply re-run the NUMA +placement algorithm (essentially to poll for more memory) like +implemented with CA-39743 by calling `wait_xen_free_mem`, but +this time not by polling for system-wide memory, but for per-NUMA +node memory. + +As the NUMA placement algorithm does exactly that, simply +re-starting it in a defined way would be the smallest change +to handle cases of having to poll for free memory on a per-NUMA +node basis, if lazy memory reclaim is in progress on the host. + +## An example scenario + +This is an example scenario where not waiting for memory scrubbing +in a NUMA-aware way could fragment the VM across many NUMA nodes: + +In this example, a relatively large VM is rebooted: + +Fictional machine with 4 NUMA nodes, 25 GB each (for layout reasons): +```mermaid +%%{init: {"packet": {"bitsPerRow": 25, "rowHeight": 38}} }%% +packet-beta + 0-18: "Memory used by other VMs" + 19-24: "free: 6 GB" + 25-44: "VM before restart: 20 GB" + 45-49: "free: 5GB" + 50-69: "Memory used by other VMs" + 70-74: "free: 5GB" + 75-94: "Memory used by other VMs" + 95-99: "free: 5GB" +``` +VM is destroyed: +```mermaid +%%{init: {"packet": {"bitsPerRow": 25, "rowHeight": 38}} }%% +packet-beta + 0-18: "Memory used by other VMs" + 19-24: "free: 6 GB" + 25-44: "VM memory to be reclaimed, but not yet scrubbed" + 45-49: "free: 5GB" + 50-69: "Memory used by other VMs" + 70-74: "free: 5GB" + 75-94: "Memory used by other VMs" + 95-99: "free: 5GB" +``` + +NUMA placement runs, and sees that no NUMA node has enough memory +for the VM. Therefore: +1. NUMA placement does not return a NUMA placement solution. +2. As a result, vCPU soft pinning it not set up +3. As a result, the domain does not get a NUMA node affinity +4. When `xenguest` allocates the VM's memory, Xen falls back to + round-robin memory allocation across all NUMA nodes. + +Even if Xen has already scrubbed the memory by the time the +NUMA placement function returns, the decision to not select +a NUMA placement has already been done and the domain is +built in this way: +```mermaid +%%{init: {"packet": {"bitsPerRow": 25, "rowHeight": 38}} }%% +packet-beta + 0-18: "Memory used by other VMs" + 19-23: "VM: 5GB" + 24-24: "" + 25-44: "scrubbed/reclaimed free memory: 20 GB" + 45-49: "VM: 5GB" + 50-69: "Memory used by other VMs" + 70-74: "VM: 5GB" + 75-94: "Memory used by other VMs" + 95-99: "VM: 5GB" +``` + +In case the reclaimed 20 GB of memory is not partially allocated +for other VMs in the meantime: +After scrubbing and memory reclaim is complete. the 20 GB +of NUMA-node memory is available for the VM again. + +When the 20 GB VM is rebooted and the memory is still available, +the rebooted VM might become NUMA-affine to the 2nd NUMA node again. Of course, this unpredictability is what we need to fix. + +## Starting VMs when not enough reclaim is possible + +But, when no NUMA node has enough memory to run a new VM, +waiting will not help. + +But, it might be good to inform the caller that perfect NUMA placement +could not be achieved. + +However, if a CPU socket with multiple NUMA nodes with a very low +internode distance has enough free memory, that could be seen as +a fallback that would have relatively low performance impact. + +In the end, in such situations, it depends on the caller what to do: +Whether to start the VM anyway despite being not NUMA-aligned, +or to inform the caller of an expected performance degradation of the VM. + +### Example scenario when not waiting for free NUMA node memory: + +Note: This uses round numbers for easy checking and is purely theoretical: + +| Node | RAM | used | free | +| ----:| ---:| ----:| ----:| +| 1 | 50 | 35 | 15 | +| 2 | 50 | 45 | 5 | +| 3 | 50 | 35 | 15 | +| 4 | 50 | 35 | 15 | +| all | 200 | 150 | 50 | + +Action: A 45 GB VM on Node 2 is shut down and started again. +1. When the new `VM.start` runs, the 45 GB may not have been scrubbed yet. +2. The free memory check still finds 50 GB free, enough to start the VM. +3. NUMA placement picks one of the other nodes as they have more memory. +4. For example, assume it picks node 0, and sets the node-affinity to it. +5. The Xen buddy allocator will run out of 1GB superpages on node 0 after + having exhausted the 15 GB free memory on it. +6. This leaves 30 GB to be allocated elsewhere. +7. Meanwhile, some memory might have been scrubbed and reclaimed on Node 2. +8. The Xen buddy allocator then falls back to allocating in a round-robin + fashion from the other NUMA nodes, assume 10 GB on each of the 3 nodes. + +New memory situation after the restart: + +| Node | RAM | used | free | Dom1 | +| ----:| ---:| ----:| ----:| ----:| +| 1 | 50 | 50 | 0 | 15 | +| 2 | 50 | 10 | 40 | 10 | +| 3 | 50 | 45 | 5 | 10 | +| 4 | 50 | 45 | 5 | 10 | +| all | 200 | 150 | 50 | 45 | + +Thus, a single VM restart may cause the VM's memory to be spread over +all NUMA nodes. As a result, most memory accesses would be remote. + +`xenguest` populates the Guest memory in the process of the build step. + +But as the `VM.build` micro-ops running in parallel, this can happen: +The free memory reported by Xen may not yet reflect memory that will +be allocated by other concurrently running `VM.build` micro-ops when +the `xenguest` processes started by them populate the VM memory. diff --git a/doc/content/toolstack/features/NUMA/parallel-VM.build.md b/doc/content/toolstack/features/NUMA/parallel-VM.build.md new file mode 100644 index 00000000000..fe08dce2102 --- /dev/null +++ b/doc/content/toolstack/features/NUMA/parallel-VM.build.md @@ -0,0 +1,52 @@ +--- +title: "Parallel VM build" +categories: + - NUMA +weight: 50 +mermaid: + force: true +--- + +## Introduction + +When the `xenopsd` server receives a `VM.start` request, it: +1. splits the request it into micro-ops and +2. dispatches the micro-ops in one queue per VM. + +When `VM.start` requests arrive faster than the thread pool +finishes them, the thread pool will run multiple +micro-ops for different VMs in parallel. This includes the +VM.build micro-op that does NUMA placement and VM memory allocation. + +The [Xenopsd architecture](xenopsd/architecture/_index) and the +[walkthrough of VM.start](VM.start) provide more details. + +This walkthrough dives deeper into the `VM_create` and `VM_build` micro-ops +and focusses on allocating the memory allocation for different VMs in +parallel with respect to the NUMA placement of the starting VMs. + +## Architecture + +This diagram shows the [architecture](../../../xenopsd/architecture/_index) of Xenopsd: + +At the top of the diagram, two client RPCs have been sent: +One to start a VM and the other to fetch the latest events. +The `Xenops_server` module splits them into "micro-ops" (labelled "μ op" here). +These micro-ops are enqueued in queues, one queue per VM. The thread pool pulls +from the VM queues and runs the micro-ops: + +![Inside xenopsd](../../../../xenopsd/architecture/xenopsd.svg) +
Image 1: Xenopsd architecture
+ +Overview of the micro-ops for creating a new VM: + +- `VM.create`: create an empty Xen domain in the Hypervisor and the Xenstore +- `VM.build`: build a Xen domain: Allocate guest memory and load the firmware and `hvmloader` +- Several micro-ops to attach devices launch the device model. +- `VM.unpause`: unpause the domain + +## Flowchart: Parallel VM start + +When multiple `VM.start` run concurrently, an example could look like this: + +{{% include "snippets/vm-build-parallel" %}}