Understanding KVM Hyp Mode Memory Mapping Initialization
This article walks through the step‑by‑step initialization of KVM's hyp mode memory mapping on ARM64, covering EL2 register setup, identity mapping, page‑table allocation, stack preparation, and code section mappings required for a functional hypervisor environment.
3.1 hyp mode page tables and identity mapping
The article assumes vHE is disabled and defines the operating modes: EL1 runs the Linux host in host mode and the guest in guest mode, while EL2 runs in hyp mode.
On ARM64 each exception level has its own system control registers. EL2 uses SCTLR_EL2 to enable the MMU (bit 0). The code in arch/arm64/kernel/head.S sets SCTLR_EL2 to SCTLR_EL2_RES1 | ENDIAN_SET_EL2:
/* arch/arm64/kernel/head.S */
SYM_FUNC_START(el2_setup)
...
1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
msr sctlr_el2, x0
...
SYM_FUNC_END(el2_setup)The macros are defined in arch/arm64/include/asm/sysreg.h:
#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
(BIT(29)))
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
#else
#define ENDIAN_SET_EL2 0
#endifThese values keep the MMU disabled at early boot (bit 0 = 0). Later, when KVM is initialized, the MMU is turned on with code from arch/arm64/kvm/hyp/nvhe/hyp-init.S that writes ttbr0_el2 and updates SCTLR_EL2 to enable the MMU.
/* arch/arm64/kvm/hyp/nvhe/hyp-init.S */
phys_to_ttbr x4, x0
...
msr ttbr0_el2, x4
...
mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
msr sctlr_el2, x4
isb
/* Set the stack and new vectors */
kern_hyp_va x1
mov sp, x1
msr vbar_el2, x2
...Because code runs both before and after the MMU is enabled, the early‑boot code that must execute with the MMU off is placed in an identity‑mapped region delimited by __hyp_idmap_text_start and __hyp_idmap_text_end. The linker script ( arch/arm64/kernel/vmlinux.lds.S) reserves this region:
/* arch/arm64/kernel/vmlinux.lds.S */
__hyp_idmap_text_start = .;
*(.hyp.idmap.text)
__hyp_idmap_text_end = .;3.2 hyp mode page tables and identity‑mapping setup
The function kvm_mmu_init (in arch/arm64/kvm/mmu.c) performs the following steps:
Obtain the physical addresses of the identity‑mapped region and align them to page boundaries.
Record the address of the hyp‑mode exception‑vector entry point ( __kvm_hyp_init).
int kvm_mmu_init(void)
{
int err;
hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
...
}A BUG_ON checks that the identity‑map does not cross a page boundary. Debug prints show the calculated addresses. If the region intersects the hyp VA space, the function aborts with -EINVAL.
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
kvm_debug("IDMAP page: %lx
", hyp_idmap_start);
...
if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
kvm_err("IDMAP intersecting with HYP VA, unable to continue
");
err = -EINVAL;
goto out;
}The function then allocates a page‑aligned PGD for hyp mode:
hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
if (!hyp_pgd) {
kvm_err("Hyp mode PGD not allocated
");
err = -ENOMEM;
goto out;
}If the CPU does not use an extended id‑map, kvm_map_idmap_text is called to map the identity‑mapped region into the newly allocated PGD:
err = kvm_map_idmap_text(hyp_pgd);
if (err)
goto out;
io_map_base = hyp_idmap_start;
return 0; kvm_map_idmap_textcreates the mappings via __create_hyp_mappings:
static int kvm_map_idmap_text(pgd_t *pgd)
{
int err;
err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
hyp_idmap_start, hyp_idmap_end,
__phys_to_pfn(hyp_idmap_start),
PAGE_HYP_EXEC);
if (err)
kvm_err("Failed to idmap %lx-%lx
", hyp_idmap_start, hyp_idmap_end);
return err;
}3.3 hyp mode stack and code mapping
The init_hyp_mode function (in arch/arm64/kvm/arm.c) orchestrates the overall hyp‑mode setup:
static int init_hyp_mode(void)
{
int cpu, err = 0;
err = kvm_mmu_init();
if (err)
goto out_err;
/* Allocate stack pages for each CPU */
for_each_possible_cpu(cpu) {
unsigned long stack_page = __get_free_page(GFP_KERNEL);
if (!stack_page) {
err = -ENOMEM;
goto out_err;
}
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
}
/* Map the Hyp code that is called directly from the host */
err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
if (err) { kvm_err("Cannot map world-switch code
"); goto out_err; }
err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
if (err) { kvm_err("Cannot map rodata section
"); goto out_err; }
err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
if (err) { kvm_err("Cannot map bss section
"); goto out_err; }
/* Map the Hyp stack pages */
for_each_possible_cpu(cpu) {
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, PAGE_HYP);
if (err) { kvm_err("Cannot map hyp stack
"); goto out_err; }
}
/* Map per‑CPU host data structures */
for_each_possible_cpu(cpu) {
kvm_host_data_t *cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
if (err) { kvm_err("Cannot map host CPU state: %d
", err); goto out_err; }
}
/* Vector mapping is omitted in this scenario */
err = kvm_map_vectors(); /* assumed to return 0 */
if (err) { kvm_err("Cannot map vectors
"); goto out_err; }
/* Auxiliary data mapping is also omitted */
err = hyp_map_aux_data(); /* returns 0 */
return 0;
out_err:
/* Cleanup omitted for brevity */
return err;
}The function allocates a PGD, performs identity mapping, sets up per‑CPU stacks, and maps the essential code sections ( __hyp_text, __start_rodata, __bss) into the hyp address space. Vector and auxiliary‑data mappings are assumed to succeed trivially in this scenario.
After these steps, hyp mode memory mapping is fully initialized, allowing code to run correctly both before and after the MMU is enabled, and enabling seamless calls between host and hyp code compiled into the same binary.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
