Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/device-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ specification:
| | show_level | O | O | O | O | O | O | O | O |
| | show_log_origin | O | O | O | O | O | O | O | O |
| `MachineConfiguration` | cpu_template | O | O | O | O | O | O | O | O |
| | huge_pages | O | O | O | O | O | O | O | O |
| | enable_thp | O | O | O | O | O | O | O | O |
| | smt | O | O | O | O | O | O | O | O |
| | mem_size_mib | O | O | O | O | O | O | O | O |
| | track_dirty_pages | O | O | O | O | O | O | O | O |
Expand Down
28 changes: 18 additions & 10 deletions docs/hugepages.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,24 @@ size (in KiB) for each memory region as part of the initial handshake, as
described in our documentation on
[UFFD-assisted snapshot-restore](snapshotting/handling-page-faults-on-snapshot-resume.md).

## Transparent huge pages (THP)

Firecracker supports enabling transparent huge pages on guest memory via the
`enable_thp` field under `/machine-config`. When `enable_thp` is set to `true`,
Firecracker uses `madvise(MADV_HUGEPAGE)` to request THP for the guest memory
regions it allocates.

Limitations:
- THP is only attempted for explicit hugetlbfs pages (i.e., `huge_pages` is
`None`).
- THP is not supported for memfd-backed guest memory (e.g., when using
vhost-user-blk); in this case Firecracker will return an error if
`enable_thp` is set.
- THP does not integrate with UFFD; no transparent huge pages will be
allocated during userfault-handling while resuming from a snapshot.

Please refer to the [Linux Documentation][thp_docs] for more information.

## Known Limitations

Currently, hugetlbfs support is mutually exclusive with the following
Expand All @@ -43,15 +61,5 @@ performance benefits of using huge pages. This is because KVM will
unconditionally establish guest page tables at 4K granularity if dirty page
tracking is enabled, even if the host users huge mappings.

## FAQ

### Why does Firecracker not offer a transparent huge pages (THP) setting?

Firecracker's guest memory can be memfd based. Linux (as of 6.1) does not offer
a way to dynamically enable THP for such memory regions. Additionally, UFFD does
not integrate with THP (no transparent huge pages will be allocated during
userfaulting). Please refer to the [Linux Documentation][thp_docs] for more
information.

[hugetlbfs_docs]: https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
[thp_docs]: https://www.kernel.org/doc/html/next/admin-guide/mm/transhuge.html#hugepages-in-tmpfs-shmem
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ mod tests {
cpu_template: None,
track_dirty_pages: Some(false),
huge_pages: Some(expected),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -144,6 +145,7 @@ mod tests {
cpu_template: Some(StaticCpuTemplate::None),
track_dirty_pages: Some(false),
huge_pages: Some(HugePageConfig::None),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -165,6 +167,7 @@ mod tests {
cpu_template: None,
track_dirty_pages: Some(true),
huge_pages: Some(HugePageConfig::None),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -190,6 +193,7 @@ mod tests {
cpu_template: Some(StaticCpuTemplate::T2),
track_dirty_pages: Some(true),
huge_pages: Some(HugePageConfig::None),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand Down Expand Up @@ -217,6 +221,7 @@ mod tests {
cpu_template: None,
track_dirty_pages: Some(true),
huge_pages: Some(HugePageConfig::None),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand Down
7 changes: 7 additions & 0 deletions src/firecracker/swagger/firecracker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,13 @@ definitions:
- None
- 2M
description: Which huge pages configuration (if any) should be used to back guest memory.
enable_thp:
type: boolean
description: >-
Enable transparent huge pages via madvise(MADV_HUGEPAGE) for guest memory.
Effective only for anonymous memory (non-memfd) and when not using explicit hugetlbfs pages.
If guest memory is memfd-backed (e.g., due to vhost-user-blk), setting this will cause an error.
default: false

MemoryBackend:
type: object
Expand Down
1 change: 1 addition & 0 deletions src/vmm/src/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ pub fn restore_from_snapshot(
cpu_template: Some(microvm_state.vm_info.cpu_template),
track_dirty_pages: Some(track_dirty_pages),
huge_pages: Some(microvm_state.vm_info.huge_pages),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
})
Expand Down
34 changes: 33 additions & 1 deletion src/vmm/src/resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,38 @@ impl VmResources {
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
let regions =
crate::arch::arch_memory_regions(mib_to_bytes(self.machine_config.mem_size_mib));
self.allocate_memory_regions(&regions)

// Determine whether memfd-backed memory would be used.
let vhost_user_device_used = self
.block
.devices
.iter()
.any(|b| b.lock().expect("Poisoned lock").is_vhost_user());

// If THP is requested but guest memory would be memfd-backed, return an error.
if self.machine_config.enable_thp && vhost_user_device_used {
return Err(MemoryError::ThpUnsupportedMemfd);
}

let mut guest_regions = self.allocate_memory_regions(&regions)?;

// If requested, enable transparent hugepages via madvise on anonymous memory only
// (skip if using explicit hugetlbfs pages).
if self.machine_config.enable_thp && self.machine_config.huge_pages == HugePageConfig::None
{
for region in &guest_regions {
// SAFETY: Address and size refer to a valid guest memory mapping we created.
#[allow(deprecated)]
let ret = unsafe {
libc::madvise(region.as_ptr().cast(), region.size() as usize, libc::MADV_HUGEPAGE)
};
if ret != 0 {
return Err(MemoryError::Madvise(std::io::Error::last_os_error()));
}
}
}

Ok(guest_regions)
}
}

Expand Down Expand Up @@ -1381,6 +1412,7 @@ mod tests {
cpu_template: Some(StaticCpuTemplate::V1N1),
track_dirty_pages: Some(false),
huge_pages: Some(HugePageConfig::None),
enable_thp: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand Down
11 changes: 11 additions & 0 deletions src/vmm/src/vmm_config/machine_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ pub struct MachineConfig {
/// Configures what page size Firecracker should use to back guest memory.
#[serde(default)]
pub huge_pages: HugePageConfig,
/// Enables or disables transparent huge pages (THP) on guest memory via madvise.
/// Only effective when guest memory is backed by anonymous memory (non-memfd) and
/// not using explicit hugetlbfs pages.
#[serde(default)]
pub enable_thp: bool,
/// GDB socket address.
#[cfg(feature = "gdb")]
#[serde(default, skip_serializing_if = "Option::is_none")]
Expand Down Expand Up @@ -157,6 +162,7 @@ impl Default for MachineConfig {
cpu_template: None,
track_dirty_pages: false,
huge_pages: HugePageConfig::None,
enable_thp: false,
#[cfg(feature = "gdb")]
gdb_socket_path: None,
}
Expand Down Expand Up @@ -190,6 +196,9 @@ pub struct MachineConfigUpdate {
/// Configures what page size Firecracker should use to back guest memory.
#[serde(default)]
pub huge_pages: Option<HugePageConfig>,
/// Enables or disables transparent huge pages (THP) on guest memory via madvise.
#[serde(default)]
pub enable_thp: Option<bool>,
/// GDB socket address.
#[cfg(feature = "gdb")]
#[serde(default)]
Expand All @@ -214,6 +223,7 @@ impl From<MachineConfig> for MachineConfigUpdate {
cpu_template: cfg.static_template(),
track_dirty_pages: Some(cfg.track_dirty_pages),
huge_pages: Some(cfg.huge_pages),
enable_thp: Some(cfg.enable_thp),
#[cfg(feature = "gdb")]
gdb_socket_path: cfg.gdb_socket_path,
}
Expand Down Expand Up @@ -281,6 +291,7 @@ impl MachineConfig {
cpu_template,
track_dirty_pages: update.track_dirty_pages.unwrap_or(self.track_dirty_pages),
huge_pages: page_config,
enable_thp: update.enable_thp.unwrap_or(self.enable_thp),
#[cfg(feature = "gdb")]
gdb_socket_path: update.gdb_socket_path.clone(),
})
Expand Down
4 changes: 4 additions & 0 deletions src/vmm/src/vstate/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ pub enum MemoryError {
OffsetTooLarge,
/// Cannot retrieve snapshot file metadata: {0}
FileMetadata(std::io::Error),
/// Cannot apply madvise: {0}
Madvise(std::io::Error),
/// Transparent huge pages are unsupported for memfd-backed guest memory
ThpUnsupportedMemfd,
}

/// Type of the guest region
Expand Down
2 changes: 2 additions & 0 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ def basic_config(
rootfs_io_engine=None,
cpu_template: Optional[str] = None,
enable_entropy_device=False,
enable_thp: Optional[bool] = None,
):
"""Shortcut for quickly configuring a microVM.
Expand All @@ -821,6 +822,7 @@ def basic_config(
mem_size_mib=mem_size_mib,
track_dirty_pages=track_dirty_pages,
huge_pages=huge_pages,
enable_thp=enable_thp,
)
self.vcpus_count = vcpu_count
self.mem_size_bytes = mem_size_mib * 2**20
Expand Down
32 changes: 32 additions & 0 deletions tests/integration_tests/functional/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,36 @@ def test_negative_machine_config_api(uvm_plain):
)


def test_machine_config_enable_thp(uvm_plain):
"""
Enabling transparent huge pages via machine-config should succeed.
"""
vm = uvm_plain
vm.spawn()
vm.basic_config(enable_thp=True)
vm.start()

vm_config = vm.api.vm_config.get().json()
assert vm_config["machine-config"]["enable_thp"] is True


def test_machine_config_enable_thp_with_vhost_user_fails(
uvm_vhost_user_plain_any, rootfs
):
"""
Enabling THP alongside memfd-backed memory (vhost-user block) should fail.
"""
vm = uvm_vhost_user_plain_any
vm.ssh_key = rootfs.with_suffix(".id_rsa")
vm.spawn()
vm.basic_config(add_root_device=False, enable_thp=True)
vm.add_vhost_user_drive("rootfs", rootfs, is_root_device=True, is_read_only=True)

err_msg = "Transparent huge pages are unsupported for memfd-backed guest memory"
with pytest.raises(RuntimeError, match=err_msg):
vm.start()


def test_api_cpu_config(uvm_plain, custom_cpu_template):
"""
Test /cpu-config PUT scenarios.
Expand Down Expand Up @@ -1120,6 +1150,7 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano):
"smt": True,
"track_dirty_pages": False,
"huge_pages": "None",
"enable_thp": False,
}

if cpu_vendor == utils_cpuid.CpuVendor.ARM:
Expand Down Expand Up @@ -1251,6 +1282,7 @@ def test_get_full_config(uvm_plain):
"smt": False,
"track_dirty_pages": False,
"huge_pages": "None",
"enable_thp": False,
}
expected_cfg["cpu-config"] = None
expected_cfg["boot-source"] = {
Expand Down