From 5e4b0bd75e05aa7bddac6720cd8166670038dfdd Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 26 Feb 2025 12:00:00 +0100 Subject: [PATCH] (docs) xenguest: Add a walk-through on populating VM memory Signed-off-by: Bernhard Kaindl --- .../lib/xenctrl/get_free_buddy-flowchart.md | 34 +++++ .../lib/xenctrl/populate_physmap-dataflow.md | 100 +++++++++++++ .../xenctrl/struct/xen_memory_reservation.md | 53 +++++++ .../lib/xenctrl/xc_domain_node_setaffinity.md | 32 +++-- .../lib/xenctrl/xc_domain_populate_physmap.md | 135 ++++++++++++++++++ .../xenopsd/walkthroughs/VM.build/xenguest.md | 86 ++++++----- .../VM.build/xenguest/do_hvm_build.md | 57 ++++++++ .../VM.build/xenguest/mode_vm_build.md | 36 +++++ 8 files changed, 487 insertions(+), 46 deletions(-) create mode 100644 doc/content/lib/xenctrl/get_free_buddy-flowchart.md create mode 100644 doc/content/lib/xenctrl/populate_physmap-dataflow.md create mode 100644 doc/content/lib/xenctrl/struct/xen_memory_reservation.md create mode 100644 doc/content/lib/xenctrl/xc_domain_populate_physmap.md create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/xenguest/do_hvm_build.md create mode 100644 doc/content/xenopsd/walkthroughs/VM.build/xenguest/mode_vm_build.md diff --git a/doc/content/lib/xenctrl/get_free_buddy-flowchart.md b/doc/content/lib/xenctrl/get_free_buddy-flowchart.md new file mode 100644 index 00000000000..5d7d7dcd534 --- /dev/null +++ b/doc/content/lib/xenctrl/get_free_buddy-flowchart.md @@ -0,0 +1,34 @@ +--- +title: Flowchart of get_free_buddy() of the Xen Buddy allocator +hidden: true +--- +```mermaid +flowchart TD +alloc_round_robin--No free memory on the host-->Failure +node_affinity_exact--No free memory
on the Domain's +node_affinity nodes:
Abort exact allocation-->Failure + +get_free_buddy["get_free_buddy()"] +-->MEMF_node{memflags
&
MEMF_node?} +--Yes--> + try_MEMF_node{Alloc
from
node}--Success: page-->Success + try_MEMF_node--No free memory on the node + -->MEMF_exact{memflags
&
MEMF_exact?} + MEMF_exact--"No"-->node_affinity_set{NUMA affinity set?} + node_affinity_set + --Domain->node_affinity is
not set: Fall back to
round-robin allocation + -->alloc_round_robin + MEMF_exact--No free memory on
the requested NUMA node: + Abort exact allocation-->Failure + MEMF_node--No NUMA node in memflags + -->node_affinity_set{domain->
node_affinity
set?} + --Set-->node_affinity{Alloc from
node_affinity
nodes} + --No free memory on
the node_affinity nodes
Check if exact request + -->node_affinity_exact{memflags
&
MEMF_exact?} + --Not exact: Fall back to
round-robin allocation-->alloc_round_robin + node_affinity--Success: page-->Success + alloc_round_robin{"Fall back to
round-robin + allocation"}--Success: page-->Success(Success: Return the page) +click get_free_buddy +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L855" _blank +``` diff --git a/doc/content/lib/xenctrl/populate_physmap-dataflow.md b/doc/content/lib/xenctrl/populate_physmap-dataflow.md new file mode 100644 index 00000000000..e03f1d406a5 --- /dev/null +++ b/doc/content/lib/xenctrl/populate_physmap-dataflow.md @@ -0,0 +1,100 @@ +--- +title: Flowchart for the populate_physmap hypercall +hidden: true +--- +```mermaid +flowchart + +subgraph XenCtrl +xc_domain_populate_physmap["xc_domain_populate_physmap()"] +xc_domain_populate_physmap_exact["xc_domain_populate_physmap_exact()"] +end + +subgraph Xen + +%% sub-subgraph from memory_op() to populate_node() and back + +xc_domain_populate_physmap & xc_domain_populate_physmap_exact +<--reservation,
and for preempt:
nr_start/nr_done--> +memory_op("memory_op(XENMEM_populate_physmap)") + +memory_op + --struct xen_memory_reservation--> + construct_memop_from_reservation("construct_memop_from_reservation()") + --struct
xen_memory_reservation->mem_flags--> + propagate_node("propagate_node()") + --_struct
memop_args->memflags_--> + construct_memop_from_reservation + --_struct memop_args_ +-->memory_op<--struct memop_args *: + struct domain *, + List of extent base addrs, + Number of extents, + Size of each extent (extent_order), + Allocation flags(memflags)--> + populate_physmap[["populate_physmap()"]] + <-.domain, extent base addrs, extent size, memflags, nr_start and nr_done.-> + populate_physmap_loop--if memflags & MEMF_populate_on_demand -->guest_physmap_mark_populate_on_demand(" + guest_physmap_mark_populate_on_demand()") + populate_physmap_loop@{ label: "While extents to populate, + and not asked to preempt, + for each extent left to do:", shape: notch-pent } + --domain, order, memflags--> + alloc_domheap_pages("alloc_domheap_pages()") + --zone_lo, zone_hi, order, memflags, domain--> + alloc_heap_pages + --zone_lo, zone_hi, order, memflags, domain--> + get_free_buddy("get_free_buddy()") + --_page_info_ + -->alloc_heap_pages + --if no page--> + no_scrub("get_free_buddy(MEMF_no_scrub) + (honored only when order==0)") + --_dirty 4k page_ + -->alloc_heap_pages + <--_dirty 4k page_--> + scrub_one_page("scrub_one_page()") + alloc_heap_pages("alloc_heap_pages() + (also splits higher-order pages + into smaller buddies if needed)") + --_page_info_ + -->alloc_domheap_pages + --page_info, order, domain, memflags-->assign_page("assign_page()") + assign_page + --page_info, nr_mfns, domain, memflags--> + assign_pages("assign_pages()") + --domain, nr_mfns--> + domain_adjust_tot_pages("domain_adjust_tot_pages()") + alloc_domheap_pages + --_page_info_--> + populate_physmap_loop + --page(gpfn, mfn, extent_order)--> + guest_physmap_add_page("guest_physmap_add_page()") + +populate_physmap--nr_done, preempted-->memory_op +end +click memory_op +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1409-L1425" _blank +click construct_memop_from_reservation +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1022-L1071" _blank +click propagate_node +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L524-L547" _blank +click populate_physmap +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L159-L314" _blank +click populate_physmap_loop +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L197-L304" _blank +click guest_physmap_mark_populate_on_demand +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L210-220" _blank +click guest_physmap_add_page +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L296" _blank +click get_free_buddy +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L855-L958" _blank +click alloc_heap_pages +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L967-L1116" _blank +click assign_page +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L2540-L2633" _blank +click assign_pages +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L2635-L2639" _blank +click alloc_domheap_pages +"https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L2641-L2697" _blank +``` diff --git a/doc/content/lib/xenctrl/struct/xen_memory_reservation.md b/doc/content/lib/xenctrl/struct/xen_memory_reservation.md new file mode 100644 index 00000000000..b9772ed3520 --- /dev/null +++ b/doc/content/lib/xenctrl/struct/xen_memory_reservation.md @@ -0,0 +1,53 @@ +--- +title: xen_memory_reservation +description: xen_memory_reservation for memory-related hypercalls +hidden: true +--- +[struct xen_memory_reservation](https://github.com/xen-project/xen/blob/96970b46/xen/include/public/memory.h#L46-80) +is used by +[these XENMEM hypercall commands](https://github.com/xen-project/xen/blob/96970b46/xen/include/public/memory.h#L48-59): + +- `XENMEM_increase_reservation`: Returns the first MFN of the allocated extents +- `XENMEM_decrease_reservation`: To pass the first GPFN of extents to free +- [XENMEM_populate_physmap](../xc_domain_populate_physmap): + - In: To pass the first GPFN to populate with memory + - Out: Returns the first GMFN base of extents that were allocated + (NB. This command also updates the mach_to_phys translation table) +- `XENMEM_claim_pages`: Not used, must be passed as 0 + (This is explicitly checked: Otherwise, it returns `-EINVAL`) + +[struct xen_memory_reservation](https://github.com/xen-project/xen/blob/96970b46/xen/include/public/memory.h#L46-80) +is defined as: + +```c +struct xen_memory_reservation { + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* PFN of the starting extent */ + xen_ulong_t nr_extents; /* number of extents of size extent_order */ + unsigned int extent_order; /* an order of 0 means: 4k pages, 1: 8k, etc. */ + unsigned int mem_flags; + domid_t domid; /* integer ID of the domain */ +}; +``` + +The `mem_flags` bit field is accessed using: + +```js +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +/* Flag to indicate the node specified is virtual node */ +#define XENMEMF_vnode (1<<18) +``` diff --git a/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md b/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md index 82cf2e36f08..d046f5c9f35 100644 --- a/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md +++ b/doc/content/lib/xenctrl/xc_domain_node_setaffinity.md @@ -62,16 +62,30 @@ https://github.com/xen-project/xen/blob/master/xen/common/domain.c#L943-L970" This function implements the functionality of `xc_domain_node_setaffinity` to set the NUMA affinity of a domain as described above. -If the new_affinity does not intersect the `node_online_map`, -it returns `-EINVAL`, otherwise on success `0`. -When the `new_affinity` is a specific set of NUMA nodes, it updates the NUMA -`node_affinity` of the domain to these nodes and disables `auto_node_affinity` -for this domain. It also notifies the Xen scheduler of the change. - -This sets the preference the memory allocator to the new NUMA nodes, -and in theory, it could also alter the behaviour of the scheduler. -This of course depends on the scheduler and its configuration. +- If `new_affinity` does not intersect the `node_online_map`, + it returns `-EINVAL`. Otherwise, the result is a success and it returns `0`. +- When the `new_affinity` is a specific set of NUMA nodes, + it sets `d->node_affinity` of the domain to these nodes + and disables `auto_node_affinity` for this domain. +- If `new_affinity` has all bits set, it re-enables `auto_node_affinity` + for this domain and calls + [domain_update_node_aff()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/sched/core.c#L1809-L1876) + to re-set the domain's `node_affinity` mask to the NUMA nodes of the current + the hard and soft affinity of the domain's online vCPUs. + +The result of changing the domains' node affinity changes the +preference of the memory allocator to the new NUMA nodes. + +Currently, the only scheduling change is that if set before vCPU creation, +the initial pCPU of the new vCPU is the first pCPU of the first NUMA node +in the domain's `node_affinity`. This is if further changed when one of more +`cpupools` are set up. + +When done early, before vCPU creation, domain-related data structures +could be allocated using the domain's `node_affinity` NUMA node mask. +With further changes in Xen, also the vCPU struct could be allocated +using it. ## Notes on future design improvements diff --git a/doc/content/lib/xenctrl/xc_domain_populate_physmap.md b/doc/content/lib/xenctrl/xc_domain_populate_physmap.md new file mode 100644 index 00000000000..9d051ca9696 --- /dev/null +++ b/doc/content/lib/xenctrl/xc_domain_populate_physmap.md @@ -0,0 +1,135 @@ +--- +title: xc_domain_populate_physmap() +description: Populate a Xen domain's physical memory map +mermaid: + force: true +--- +`xc_domain_populate_physmap()` and `xc_domain_populate_physmap_exact()` +populate a Xen domain's physical memory map: +Both call the `populate_physmap` +hypercall and `xc_domain_populate_physmap_exact()` also sets the flag +for allocating memory only on the given NUMA node. + +As an overview, it +[constructs](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1022-L1071) +a `struct memop_args` from the requested +[reservation](struct/xen_memory_reservation) +(start address, page size, now many of them, optionally on which NUMA node) and +[passes](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1459) +it to +[populate_physmap()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L159-L314) +to +[allocate](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L197) +the requested amount of pages: + +{{% include "populate_physmap-dataflow.md" %}} + +## construct_memop_from_reservation() + +[construct_memop_from_reservation()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1022-L1071) +populates `struct memop_args` using the +[hypercall arguments](struct/xen_memory_reservation). It: + +- Copies `extent_start`, `nr_extents`, and `extent_order`. +- Populates `memop_args->memflags` using the given `mem_flags`. + +### Converting a vNODE to a pNODE for vNUMA + +When a vNUMA vnode is passed using `XENMEMF_vnode`, and `domain->vnuma` and +`domain->vnuma->nr_vnodes` are set, and the vnode maps to a pnode, it also: + +- Populates the `pnode` in the `memflags` of the `struct memop_args` +- and sets a `XENMEMF_exact_node_request` in them as well. + +### Using propagate_node() to pass a pNODE + +If no vNUMA node is passed, `construct_memop_from_reservation` +[calls](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1067) +[propagate_node()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L524-L547) +to propagate the NUMA node and `XENMEMF_exact_node_request` for use in Xen. + +## Allocate pages for the domain + +`memory_op()` +[passes](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L1459) +the populated `struct memop_args` to +[populate_physmap()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L159-L314) +to +[loop over the extents](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L197) +to populate: + +For each extent in the reservation, +[it calls](https://github.com/xen-project/xen/blob/e16acd80/xen/common/memory.c#L275) +[alloc_domheap_pages()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L2641) +which +[calls](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L2673) +[alloc_heap_pages()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L968) +which in turn +[calls](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L1005) +[get_free_buddy()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L855) +to allocate the requested memory page. + +## Find a page using the buddy allocator + +[get_free_buddy()](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L855-L1116) +is the main function of the Xen buddy allocator. +If possible, it tries to find the best NUMA node and memory zone to allocate from. + +This flowchart shows an overview of the effects of the decisions described below: + +{{% include "get_free_buddy-flowchart.md" %}} + +Input parameters: +- `struct domain` +- Zones to allocate from (`zone_hi` until `zone_lo`) +- Page order (size of the page) + - populate_physmap() starts with 1GB pages and falls back to 2MB and 4k pages. + +Its first attempt is to find a page of matching page order +on the requested NUMA node(s). + +If this is not successful, it looks to breaking higher page orders, +and if that fails too, it lowers the zone until `zone_lo`. + +It does not attempt to use not scrubbed pages, but when `memflags` +tell it `MEMF_no_scrub`, it uses `check_and_stop_scrub(pg)` on 4k +pages to prevent breaking higher order pages instead. + +If this fails, it checks if other NUMA nodes shall be tried. + +### Exact NUMA allocation (on request, e.g. for vNUMA) + +For example for vNUMA domains, the calling functions pass one specific +NUMA node, and they would also set `MEMF_exact_node` to make sure that +memory is specifically only allocated from this NUMA node. + +If no NUMA node was passed or the allocation from it failed, and +`MEMF_exact_node` was not set in `memflags`, the function falls +back to the first fallback, NUMA-affine allocation. + +### NUMA-affine allocation + +For local NUMA memory allocation, the domain should have one or more NUMA nodes +in its `struct domain->node_affinity` field when this function is called. + +This happens as part of +[NUMA placement](../../../xenopsd/walkthroughs/VM.build/Domain.build/#numa-placement) +which writes the planned vCPU affinity of the domain's vCPUs to the XenStore +which [xenguest](../../../xenopsd/walkthroughs/VM.build/xenguest) reads to +update the vCPU affinities of the domain's vCPUs in Xen, which in turn, by +default (when to domain->auto_node_affinity is active) also updates the +`struct domain->node_affinity` field. + +Note: In case it contains multiple +NUMA nodes, this step allocates from the next NUMA node after the previous +NUMA node the domain allocated from in a round-robin way. + +Otherwise, the function falls back to host-wide round-robin allocation. + +### Host-wide round-robin allocation + +When the domain's `node_affinity` is not defined or did not succeed +and `MEMF_exact_node` was not passed in `memflags`, all remaining +NUMA nodes are attempted in a round-robin way: Each subsequent call +uses the next NUMA node after the previous node that the domain +allocated memory from. diff --git a/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md b/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md index 70908d556fb..231642e6413 100644 --- a/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md +++ b/doc/content/xenopsd/walkthroughs/VM.build/xenguest.md @@ -2,6 +2,8 @@ title: xenguest description: "Perform building VMs: Allocate and populate the domain's system memory." +mermaid: + force: true --- As part of starting a new domain in VM_build, `xenopsd` calls `xenguest`. When multiple domain build threads run in parallel, @@ -83,38 +85,30 @@ Xenstore[Xenstore platform data] --> xenguest When called to build a domain, `xenguest` reads those and builds the VM accordingly. -## Walkthrough of the xenguest build mode +## Walk-through of the xenguest build mode -```mermaid -flowchart -subgraph xenguest[xenguest #8209;#8209;mode hvm_build domid] -direction LR -stub_xc_hvm_build[stub_xc_hvm_build#40;#41;] --> get_flags[ - get_flags#40;#41; <#8209; Xenstore platform data -] -stub_xc_hvm_build --> configure_vcpus[ - configure_vcpus#40;#41; #8209;> Xen hypercall -] -stub_xc_hvm_build --> setup_mem[ - setup_mem#40;#41; #8209;> Xen hypercalls to setup domain memory -] -end -``` +{{% include "xenguest/mode_vm_build.md" %}} -Based on the given domain type, the `xenguest` program calls dedicated -functions for the build process of the given domain type. +The domain build functions +[stub_xc_hvm_build()](https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L2329-L2436) +and stub_xc_pv_build() call these functions: -These are: +1. `get_flags()` to get the platform data from the Xenstore + for filling out the fields of `struct flags` and `struct xc_dom_image`. +2. `configure_vcpus()` which uses the platform data from the Xenstore to configure: + - If `platform/vcpu//affinity` is set, the vCPU affinity. -- `stub_xc_hvm_build()` for HVM, -- `stub_xc_pvh_build()` for PVH, and -- `stub_xc_pv_build()` for PV domains. + By default, this sets the domain's `node_affinity` mask (NUMA nodes) as well. + This configures + [`get_free_buddy()`](https://github.com/xen-project/xen/blob/e16acd80/xen/common/page_alloc.c#L855-L958) + to prefer memory allocations from this NUMA node_affinity mask. + - If `platform/vcpu/weight` is set, the domain's scheduling weight + - If `platform/vcpu/cap` is set, the domain's scheduling cap (%cpu time) +3. The `_build_setup_mem` function for the given domain type. -These domain build functions call these functions: +Call graph of `do_hvm_build()` with emphasis on information flow: -1. `get_flags()` to get the platform data from the Xenstore -2. `configure_vcpus()` which uses the platform data from the Xenstore to configure vCPU affinity and the credit scheduler parameters vCPU weight and vCPU cap (max % pCPU time for throttling) -3. The `setup_mem` function for the given VM type. +{{% include "xenguest/do_hvm_build" %}} ## The function hvm_build_setup_mem() @@ -129,26 +123,41 @@ new domain. It must: 4. Call the `libxenguest` function `xc_dom_boot_mem_init()` (see below) 5. Call `construct_cpuid_policy()` to apply the CPUID `featureset` policy +It starts this by: +- Getting `struct xc_dom_image`, `max_mem_mib`, and `max_start_mib`. +- Calculating start and size of lower ranges of the domain's memory maps + - taking memory holes for I/O into account, e.g. `mmio_size` and `mmio_start`. +- Calculating `lowmem_end` and `highmem_end`. + +It then calls `xc_dom_boot_mem_init()`: + ## The function xc_dom_boot_mem_init() +`hvm_build_setup_mem()` calls +[xc_dom_boot_mem_init()](https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_boot.c#L110-L126) +to allocate and populate the domain's system memory: + ```mermaid flowchart LR subgraph xenguest hvm_build_setup_mem[hvm_build_setup_mem#40;#41;] end subgraph libxenguest -hvm_build_setup_mem --> xc_dom_boot_mem_init[xc_dom_boot_mem_init#40;#41;] +hvm_build_setup_mem --vmemranges--> xc_dom_boot_mem_init[xc_dom_boot_mem_init#40;#41;] xc_dom_boot_mem_init -->|vmemranges| meminit_hvm[meninit_hvm#40;#41;] click xc_dom_boot_mem_init "https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_boot.c#L110-L126" _blank click meminit_hvm "https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_x86.c#L1348-L1648" _blank end ``` -`hvm_build_setup_mem()` calls -[xc_dom_boot_mem_init()](https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_boot.c#L110-L126) -to allocate and populate the domain's system memory. +Except error handling and tracing, it only is a wrapper to call the +architecture-specific `meminit()` hook for the domain type: + +```c +rc = dom->arch_hooks->meminit(dom); +``` -It calls +For HVM domains, it calls [meminit_hvm()](https://github.com/xen-project/xen/blob/39c45c/tools/libs/guest/xg_dom_x86.c#L1348-L1648) to loop over the `vmemranges` of the domain for mapping the system RAM of the guest from the Xen hypervisor heap. Its goals are: @@ -157,22 +166,21 @@ of the guest from the Xen hypervisor heap. Its goals are: - Fall back to 2MB pages when 1GB allocation failed - Fall back to 4k pages when both failed -It uses the hypercall -[XENMEM_populate_physmap](https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1408-L1477) +It uses [xc_domain_populate_physmap()](../../../lib/xenctrl/xc_domain_populate_physmap.md) to perform memory allocation and to map the allocated memory to the system RAM ranges of the domain. -https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1022-L1071 +## Overview of `XENMEM_populate_physmap`: -`XENMEM_populate_physmap`: +It: 1. Uses [construct_memop_from_reservation](https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1022-L1071) to convert the arguments for allocating a page from [struct xen_memory_reservation](https://github.com/xen-project/xen/blob/master/xen/include/public/memory.h#L46-L80) to `struct memop_args`. -2. Sets flags and calls functions according to the arguments -3. Allocates the requested page at the most suitable place +2. Passes the `struct domain` and the given `memflags` to `get_free_buddy()`. +3. This allocates the requested page at the most suitable place - depending on passed flags, allocate on a specific NUMA node - else, if the domain has node affinity, on the affine nodes - also in the most suitable memory zone within the NUMA node @@ -181,5 +189,9 @@ https://github.com/xen-project/xen/blob/39c45c/xen/common/memory.c#L1022-L1071 5. When no pages of the requested size are free, it splits larger superpages into pages of the requested size. +For a more detailed walk-through of the inner workings of this hypercall, +see the reference on +[xc_domain_populate_physmap()](../../../lib/xenctrl/xc_domain_populate_physmap.md). + For more details on the VM build step involving `xenguest` and Xen side see: https://wiki.xenproject.org/wiki/Walkthrough:_VM_build_using_xenguest diff --git a/doc/content/xenopsd/walkthroughs/VM.build/xenguest/do_hvm_build.md b/doc/content/xenopsd/walkthroughs/VM.build/xenguest/do_hvm_build.md new file mode 100644 index 00000000000..f20f94d1c0f --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/xenguest/do_hvm_build.md @@ -0,0 +1,57 @@ +--- +title: Call graph of xenguest/do_hvm_build() +description: Call graph of xenguest/do_hvm_build() with emphasis on information flow +--- +```mermaid +flowchart TD +do_hvm_build("do_hvm_build() for HVM") +--> stub_xc_hvm_build +get_flags("get_flags()") --"VM platform_data from XenStore" +--> stub_xc_hvm_build("stub_xc_hvm_build()") +stub_xc_hvm_build --> configure_vcpus(configure_vcpus#40;#41;) +configure_vcpus --"When
platform/ + vcpu/%d/affinity
is set"--> xc_vcpu_setaffinity +configure_vcpus --"When
platform/ + vcpu/cap
or + vcpu/weight
is set"--> xc_sched_credit_domain_set +stub_xc_hvm_build --"struct xc_dom_image, mem_start_mib, mem_max_mib" +--> hvm_build_setup_mem("hvm_build_setup_mem()") +--"struct xc_dom_image + with + optional vmemranges"--> xc_dom_boot_mem_init + subgraph libxenguest + xc_dom_boot_mem_init("xc_dom_boot_mem_init()") + -- "struct xc_dom_image + with + optional vmemranges" --> + meminit_hvm("meminit_hvm()") -- page_size(1GB,2M,4k, memflags: e.g. exact) --> + xc_domain_populate_physmap("xc_domain_populate_physmap()") + end +subgraph set_affinity[XenCtrl Hypercalls] +direction TB + xc_sched_credit_domain_set("xc_sched_credit_domain_set()") + xc_vcpu_setaffinity("xc_vcpu_setaffinity()") + --> vcpu_set_affinity("vcpu_set_affinity()") + --> domain_update_node_aff("domain_update_node_aff()") + -- "if auto_node_affinity + is on (default)"--> auto_node_affinity(Update dom->node_affinity) +end +click vcpu_set_affinity +"https://github.com/xen-project/xen/blob/e16acd806/xen/common/sched/core.c#L1353-L1393" _blank +click domain_update_node_aff +"https://github.com/xen-project/xen/blob/e16acd806/xen/common/sched/core.c#L1809-L1876" _blank +click stub_xc_hvm_build +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L2329-L2436" _blank +click hvm_build_setup_mem +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L2002-L2219" _blank +click get_flags +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L1164-L1288" _blank +click configure_vcpus +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L1297" _blank +click xc_dom_boot_mem_init +"https://github.com/xen-project/xen/blob/e16acd806/tools/libs/guest/xg_dom_boot.c#L110-L125" +click meminit_hvm +"https://github.com/xen-project/xen/blob/e16acd806/tools/libs/guest/xg_dom_x86.c#L1348-L1648" +click xc_domain_populate_physmap +"../../../../lib/xenctrl/xc_domain_populate_physmap/index.html" _blank +``` diff --git a/doc/content/xenopsd/walkthroughs/VM.build/xenguest/mode_vm_build.md b/doc/content/xenopsd/walkthroughs/VM.build/xenguest/mode_vm_build.md new file mode 100644 index 00000000000..572308833da --- /dev/null +++ b/doc/content/xenopsd/walkthroughs/VM.build/xenguest/mode_vm_build.md @@ -0,0 +1,36 @@ +--- +hidden: true +title: Call graph to the xenguest hvm/pvh/pv build functions +description: Call graph of xenguest for calling the hvm/pvh/pv build functions +--- +```mermaid +flowchart LR +xenguest_main[" + xenguest + --mode hvm_build + / + --mode pvh_build + / + --mode pv_build +"] --> do_hvm_build[" + do_hvm_build() for HVM +"] & do_pvh_build["do_pvh_build() for PVH"] -- "`**Arguments:** + domid + mem_max_mib + mem_start_mib + image + store_port + store_domid + console_port + console_domid`" --> stub_xc_hvm_build["stub_xc_hvm_build()"] +xenguest_main --> do_pv_build[do_pvh_build for PV] --> + stub_xc_pv_build["stub_xc_pv_build()"] +click do_pv_build +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L575-L594" _blank +click do_hvm_build +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L596-L615" _blank +click do_pvh_build +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L617-L640" _blank +click stub_xc_hvm_build +"https://github.com/xenserver/xen.pg/blob/65c0438b/patches/xenguest.patch#L2329-L2436" _blank +```