From 5788afc72de4d9e7a172763f06f40de8fad530f7 Mon Sep 17 00:00:00 2001 From: Heyang Zhou Date: Tue, 21 Oct 2025 19:28:00 +0000 Subject: [PATCH] feat: add config option to enable transparent huge pages Add a new machine-config field, `enable_thp`, that controls whether transparent huge pages (THP) is enabled for the microVM. Signed-off-by: Heyang Zhou --- docs/device-api.md | 2 ++ docs/hugepages.md | 28 +++++++++------ .../request/machine_configuration.rs | 5 +++ src/firecracker/swagger/firecracker.yaml | 7 ++++ src/vmm/src/persist.rs | 1 + src/vmm/src/resources.rs | 34 ++++++++++++++++++- src/vmm/src/vmm_config/machine_config.rs | 11 ++++++ src/vmm/src/vstate/memory.rs | 4 +++ tests/framework/microvm.py | 2 ++ .../integration_tests/functional/test_api.py | 32 +++++++++++++++++ 10 files changed, 115 insertions(+), 11 deletions(-) diff --git a/docs/device-api.md b/docs/device-api.md index f622c2137af..d028f00d49e 100644 --- a/docs/device-api.md +++ b/docs/device-api.md @@ -69,6 +69,8 @@ specification: | | show_level | O | O | O | O | O | O | O | O | | | show_log_origin | O | O | O | O | O | O | O | O | | `MachineConfiguration` | cpu_template | O | O | O | O | O | O | O | O | +| | huge_pages | O | O | O | O | O | O | O | O | +| | enable_thp | O | O | O | O | O | O | O | O | | | smt | O | O | O | O | O | O | O | O | | | mem_size_mib | O | O | O | O | O | O | O | O | | | track_dirty_pages | O | O | O | O | O | O | O | O | diff --git a/docs/hugepages.md b/docs/hugepages.md index 17cd5cc9b43..ad0e27bbbbb 100644 --- a/docs/hugepages.md +++ b/docs/hugepages.md @@ -31,6 +31,24 @@ size (in KiB) for each memory region as part of the initial handshake, as described in our documentation on [UFFD-assisted snapshot-restore](snapshotting/handling-page-faults-on-snapshot-resume.md). +## Transparent huge pages (THP) + +Firecracker supports enabling transparent huge pages on guest memory via the +`enable_thp` field under `/machine-config`. When `enable_thp` is set to `true`, +Firecracker uses `madvise(MADV_HUGEPAGE)` to request THP for the guest memory +regions it allocates. + +Limitations: +- THP is only attempted for explicit hugetlbfs pages (i.e., `huge_pages` is + `None`). +- THP is not supported for memfd-backed guest memory (e.g., when using + vhost-user-blk); in this case Firecracker will return an error if + `enable_thp` is set. +- THP does not integrate with UFFD; no transparent huge pages will be + allocated during userfault-handling while resuming from a snapshot. + +Please refer to the [Linux Documentation][thp_docs] for more information. + ## Known Limitations Currently, hugetlbfs support is mutually exclusive with the following @@ -43,15 +61,5 @@ performance benefits of using huge pages. This is because KVM will unconditionally establish guest page tables at 4K granularity if dirty page tracking is enabled, even if the host users huge mappings. -## FAQ - -### Why does Firecracker not offer a transparent huge pages (THP) setting? - -Firecracker's guest memory can be memfd based. Linux (as of 6.1) does not offer -a way to dynamically enable THP for such memory regions. Additionally, UFFD does -not integrate with THP (no transparent huge pages will be allocated during -userfaulting). Please refer to the [Linux Documentation][thp_docs] for more -information. - [hugetlbfs_docs]: https://docs.kernel.org/admin-guide/mm/hugetlbpage.html [thp_docs]: https://www.kernel.org/doc/html/next/admin-guide/mm/transhuge.html#hugepages-in-tmpfs-shmem diff --git a/src/firecracker/src/api_server/request/machine_configuration.rs b/src/firecracker/src/api_server/request/machine_configuration.rs index 2e8addffb74..91d70babf14 100644 --- a/src/firecracker/src/api_server/request/machine_configuration.rs +++ b/src/firecracker/src/api_server/request/machine_configuration.rs @@ -123,6 +123,7 @@ mod tests { cpu_template: None, track_dirty_pages: Some(false), huge_pages: Some(expected), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }; @@ -144,6 +145,7 @@ mod tests { cpu_template: Some(StaticCpuTemplate::None), track_dirty_pages: Some(false), huge_pages: Some(HugePageConfig::None), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }; @@ -165,6 +167,7 @@ mod tests { cpu_template: None, track_dirty_pages: Some(true), huge_pages: Some(HugePageConfig::None), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }; @@ -190,6 +193,7 @@ mod tests { cpu_template: Some(StaticCpuTemplate::T2), track_dirty_pages: Some(true), huge_pages: Some(HugePageConfig::None), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }; @@ -217,6 +221,7 @@ mod tests { cpu_template: None, track_dirty_pages: Some(true), huge_pages: Some(HugePageConfig::None), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }; diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 5bf55108b09..99619df41c1 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -1140,6 +1140,13 @@ definitions: - None - 2M description: Which huge pages configuration (if any) should be used to back guest memory. + enable_thp: + type: boolean + description: >- + Enable transparent huge pages via madvise(MADV_HUGEPAGE) for guest memory. + Effective only for anonymous memory (non-memfd) and when not using explicit hugetlbfs pages. + If guest memory is memfd-backed (e.g., due to vhost-user-blk), setting this will cause an error. + default: false MemoryBackend: type: object diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index ee76bf6800b..a402c2b8284 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -356,6 +356,7 @@ pub fn restore_from_snapshot( cpu_template: Some(microvm_state.vm_info.cpu_template), track_dirty_pages: Some(track_dirty_pages), huge_pages: Some(microvm_state.vm_info.huge_pages), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }) diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 066fe3524be..c0525d6476f 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -489,7 +489,38 @@ impl VmResources { pub fn allocate_guest_memory(&self) -> Result, MemoryError> { let regions = crate::arch::arch_memory_regions(mib_to_bytes(self.machine_config.mem_size_mib)); - self.allocate_memory_regions(®ions) + + // Determine whether memfd-backed memory would be used. + let vhost_user_device_used = self + .block + .devices + .iter() + .any(|b| b.lock().expect("Poisoned lock").is_vhost_user()); + + // If THP is requested but guest memory would be memfd-backed, return an error. + if self.machine_config.enable_thp && vhost_user_device_used { + return Err(MemoryError::ThpUnsupportedMemfd); + } + + let mut guest_regions = self.allocate_memory_regions(®ions)?; + + // If requested, enable transparent hugepages via madvise on anonymous memory only + // (skip if using explicit hugetlbfs pages). + if self.machine_config.enable_thp && self.machine_config.huge_pages == HugePageConfig::None + { + for region in &guest_regions { + // SAFETY: Address and size refer to a valid guest memory mapping we created. + #[allow(deprecated)] + let ret = unsafe { + libc::madvise(region.as_ptr().cast(), region.size() as usize, libc::MADV_HUGEPAGE) + }; + if ret != 0 { + return Err(MemoryError::Madvise(std::io::Error::last_os_error())); + } + } + } + + Ok(guest_regions) } } @@ -1381,6 +1412,7 @@ mod tests { cpu_template: Some(StaticCpuTemplate::V1N1), track_dirty_pages: Some(false), huge_pages: Some(HugePageConfig::None), + enable_thp: Some(false), #[cfg(feature = "gdb")] gdb_socket_path: None, }; diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index cfe7105fdf8..6105fe2d5bc 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -115,6 +115,11 @@ pub struct MachineConfig { /// Configures what page size Firecracker should use to back guest memory. #[serde(default)] pub huge_pages: HugePageConfig, + /// Enables or disables transparent huge pages (THP) on guest memory via madvise. + /// Only effective when guest memory is backed by anonymous memory (non-memfd) and + /// not using explicit hugetlbfs pages. + #[serde(default)] + pub enable_thp: bool, /// GDB socket address. #[cfg(feature = "gdb")] #[serde(default, skip_serializing_if = "Option::is_none")] @@ -157,6 +162,7 @@ impl Default for MachineConfig { cpu_template: None, track_dirty_pages: false, huge_pages: HugePageConfig::None, + enable_thp: false, #[cfg(feature = "gdb")] gdb_socket_path: None, } @@ -190,6 +196,9 @@ pub struct MachineConfigUpdate { /// Configures what page size Firecracker should use to back guest memory. #[serde(default)] pub huge_pages: Option, + /// Enables or disables transparent huge pages (THP) on guest memory via madvise. + #[serde(default)] + pub enable_thp: Option, /// GDB socket address. #[cfg(feature = "gdb")] #[serde(default)] @@ -214,6 +223,7 @@ impl From for MachineConfigUpdate { cpu_template: cfg.static_template(), track_dirty_pages: Some(cfg.track_dirty_pages), huge_pages: Some(cfg.huge_pages), + enable_thp: Some(cfg.enable_thp), #[cfg(feature = "gdb")] gdb_socket_path: cfg.gdb_socket_path, } @@ -281,6 +291,7 @@ impl MachineConfig { cpu_template, track_dirty_pages: update.track_dirty_pages.unwrap_or(self.track_dirty_pages), huge_pages: page_config, + enable_thp: update.enable_thp.unwrap_or(self.enable_thp), #[cfg(feature = "gdb")] gdb_socket_path: update.gdb_socket_path.clone(), }) diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 2d3f6a1b724..bcfa5ca2e97 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -53,6 +53,10 @@ pub enum MemoryError { OffsetTooLarge, /// Cannot retrieve snapshot file metadata: {0} FileMetadata(std::io::Error), + /// Cannot apply madvise: {0} + Madvise(std::io::Error), + /// Transparent huge pages are unsupported for memfd-backed guest memory + ThpUnsupportedMemfd, } /// Type of the guest region diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 74ae180950c..83ca4cb66ad 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -798,6 +798,7 @@ def basic_config( rootfs_io_engine=None, cpu_template: Optional[str] = None, enable_entropy_device=False, + enable_thp: Optional[bool] = None, ): """Shortcut for quickly configuring a microVM. @@ -821,6 +822,7 @@ def basic_config( mem_size_mib=mem_size_mib, track_dirty_pages=track_dirty_pages, huge_pages=huge_pages, + enable_thp=enable_thp, ) self.vcpus_count = vcpu_count self.mem_size_bytes = mem_size_mib * 2**20 diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 7dab0e14e6d..e99289c07a2 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -439,6 +439,36 @@ def test_negative_machine_config_api(uvm_plain): ) +def test_machine_config_enable_thp(uvm_plain): + """ + Enabling transparent huge pages via machine-config should succeed. + """ + vm = uvm_plain + vm.spawn() + vm.basic_config(enable_thp=True) + vm.start() + + vm_config = vm.api.vm_config.get().json() + assert vm_config["machine-config"]["enable_thp"] is True + + +def test_machine_config_enable_thp_with_vhost_user_fails( + uvm_vhost_user_plain_any, rootfs +): + """ + Enabling THP alongside memfd-backed memory (vhost-user block) should fail. + """ + vm = uvm_vhost_user_plain_any + vm.ssh_key = rootfs.with_suffix(".id_rsa") + vm.spawn() + vm.basic_config(add_root_device=False, enable_thp=True) + vm.add_vhost_user_drive("rootfs", rootfs, is_root_device=True, is_read_only=True) + + err_msg = "Transparent huge pages are unsupported for memfd-backed guest memory" + with pytest.raises(RuntimeError, match=err_msg): + vm.start() + + def test_api_cpu_config(uvm_plain, custom_cpu_template): """ Test /cpu-config PUT scenarios. @@ -1120,6 +1150,7 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano): "smt": True, "track_dirty_pages": False, "huge_pages": "None", + "enable_thp": False, } if cpu_vendor == utils_cpuid.CpuVendor.ARM: @@ -1251,6 +1282,7 @@ def test_get_full_config(uvm_plain): "smt": False, "track_dirty_pages": False, "huge_pages": "None", + "enable_thp": False, } expected_cfg["cpu-config"] = None expected_cfg["boot-source"] = {