Skip to content

Commit 8ef7916

Browse files
committed
virtio-balloon: Drop huge page restriction
While the traditional balloon device would not be able to reclaim memory when back by huge pages, it could still technically be used to to restrict memory usage in the guest. With the addition of hinting and reporting, they report ranges in bigger sizes (4mb by default). Because of this, it is possible for the host reclaim huge pages backing the guest. Updates the performance tests for the balloon when back by huge pages, added varients to the size reduction tests to ensure hinting and reporting can reduce the RSS of the guest. Move the inflation test to performance to ensure it runs sequentially in CI otherwise the host can be exhausted of huge pages. Signed-off-by: Jack Thomson <[email protected]>
1 parent f5ac6ca commit 8ef7916

File tree

10 files changed

+184
-218
lines changed

10 files changed

+184
-218
lines changed

src/vmm/src/devices/virtio/balloon/metrics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,4 +113,4 @@ pub mod tests {
113113
balloon_metrics.inflate_count.inc();
114114
assert_eq!(balloon_metrics.inflate_count.count(), 1);
115115
}
116-
116+
}

src/vmm/src/resources.rs

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::vmm_config::drive::*;
2424
use crate::vmm_config::entropy::*;
2525
use crate::vmm_config::instance_info::InstanceInfo;
2626
use crate::vmm_config::machine_config::{
27-
HugePageConfig, MachineConfig, MachineConfigError, MachineConfigUpdate,
27+
MachineConfig, MachineConfigError, MachineConfigUpdate,
2828
};
2929
use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError, init_metrics};
3030
use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
@@ -245,10 +245,6 @@ impl VmResources {
245245
}
246246
SharedDeviceType::Balloon(balloon) => {
247247
self.balloon.set_device(balloon);
248-
249-
if self.machine_config.huge_pages != HugePageConfig::None {
250-
return Err(ResourcesError::BalloonDevice(BalloonConfigError::HugePages));
251-
}
252248
}
253249
SharedDeviceType::Vsock(vsock) => {
254250
self.vsock.set_device(vsock);
@@ -290,9 +286,6 @@ impl VmResources {
290286
return Err(MachineConfigError::IncompatibleBalloonSize);
291287
}
292288

293-
if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None {
294-
return Err(MachineConfigError::BalloonAndHugePages);
295-
}
296289
self.machine_config = updated;
297290

298291
Ok(())
@@ -349,10 +342,6 @@ impl VmResources {
349342
return Err(BalloonConfigError::TooManyPagesRequested);
350343
}
351344

352-
if self.machine_config.huge_pages != HugePageConfig::None {
353-
return Err(BalloonConfigError::HugePages);
354-
}
355-
356345
self.balloon.set(config)
357346
}
358347

@@ -563,7 +552,6 @@ mod tests {
563552
use crate::HTTP_MAX_PAYLOAD_SIZE;
564553
use crate::cpu_config::templates::test_utils::TEST_TEMPLATE_JSON;
565554
use crate::cpu_config::templates::{CpuTemplateType, StaticCpuTemplate};
566-
use crate::devices::virtio::balloon::Balloon;
567555
use crate::devices::virtio::block::virtio::VirtioBlockError;
568556
use crate::devices::virtio::block::{BlockError, CacheType};
569557
use crate::devices::virtio::vsock::VSOCK_DEV_ID;
@@ -1543,31 +1531,6 @@ mod tests {
15431531
.unwrap_err();
15441532
}
15451533

1546-
#[test]
1547-
fn test_negative_restore_balloon_device_with_huge_pages() {
1548-
let mut vm_resources = default_vm_resources();
1549-
vm_resources.balloon = BalloonBuilder::new();
1550-
vm_resources
1551-
.update_machine_config(&MachineConfigUpdate {
1552-
huge_pages: Some(HugePageConfig::Hugetlbfs2M),
1553-
..Default::default()
1554-
})
1555-
.unwrap();
1556-
let err = vm_resources
1557-
.update_from_restored_device(SharedDeviceType::Balloon(Arc::new(Mutex::new(
1558-
Balloon::new(128, false, 0, false, false).unwrap(),
1559-
))))
1560-
.unwrap_err();
1561-
assert!(
1562-
matches!(
1563-
err,
1564-
ResourcesError::BalloonDevice(BalloonConfigError::HugePages)
1565-
),
1566-
"{:?}",
1567-
err
1568-
);
1569-
}
1570-
15711534
#[test]
15721535
fn test_set_entropy_device() {
15731536
let mut vm_resources = default_vm_resources();

src/vmm/src/vmm_config/balloon.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ pub enum BalloonConfigError {
2020
TooManyPagesRequested,
2121
/// Error creating the balloon device: {0}
2222
CreateFailure(crate::devices::virtio::balloon::BalloonError),
23-
/// Firecracker's huge pages support is incompatible with memory ballooning.
24-
HugePages,
2523
}
2624

2725
/// This struct represents the strongly typed equivalent of the json body

src/vmm/src/vmm_config/machine_config.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ pub enum MachineConfigError {
2929
SmtNotSupported,
3030
/// Could not determine host kernel version when checking hugetlbfs compatibility
3131
KernelVersion,
32-
/// Firecracker's huge pages support is incompatible with memory ballooning.
33-
BalloonAndHugePages,
3432
}
3533

3634
/// Describes the possible (huge)page configurations for a microVM's memory.

tests/conftest.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from framework import defs, utils
3535
from framework.artifacts import disks, kernel_params
3636
from framework.defs import DEFAULT_BINARY_DIR
37-
from framework.microvm import MicroVMFactory, SnapshotType
37+
from framework.microvm import HugePagesConfig, MicroVMFactory, SnapshotType
3838
from framework.properties import global_props
3939
from framework.utils_cpu_templates import (
4040
custom_cpu_templates_params,
@@ -593,6 +593,10 @@ def pci_enabled(request):
593593
"""Fixture that allows configuring whether a microVM will have PCI enabled or not"""
594594
yield request.param
595595

596+
@pytest.fixture(params=[HugePagesConfig.NONE, HugePagesConfig.HUGETLBFS_2MB], ids=["NO_HUGE_PAGES", "2M_HUGE_PAGES"])
597+
def huge_pages(request):
598+
"""Fixture that allows configuring whether a microVM will have huge pages enabled or not"""
599+
yield request.param
596600

597601
def uvm_booted(
598602
microvm_factory,

tests/framework/microvm.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
from collections import namedtuple
2424
from dataclasses import dataclass
2525
from enum import Enum, auto
26-
from functools import lru_cache
26+
from functools import cached_property, lru_cache
2727
from pathlib import Path
2828
from typing import Optional
2929

30+
import psutil
3031
from tenacity import Retrying, retry, stop_after_attempt, wait_fixed
3132

3233
import host_tools.cargo_build as build_tools
@@ -472,7 +473,7 @@ def state(self):
472473
"""Get the InstanceInfo property and return the state field."""
473474
return self.api.describe.get().json()["state"]
474475

475-
@property
476+
@cached_property
476477
def firecracker_pid(self):
477478
"""Return Firecracker's PID
478479
@@ -491,6 +492,11 @@ def firecracker_pid(self):
491492
with attempt:
492493
return int(self.jailer.pid_file.read_text(encoding="ascii"))
493494

495+
@cached_property
496+
def ps(self):
497+
"""Returns a handle to the psutil.Process for this VM"""
498+
return psutil.Process(self.firecracker_pid)
499+
494500
@property
495501
def dimensions(self):
496502
"""Gets a default set of cloudwatch dimensions describing the configuration of this microvm"""

tests/framework/utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from collections import defaultdict, namedtuple
1616
from contextlib import contextmanager
1717
from typing import Dict
18+
from pathlib import Path
1819

1920
import psutil
2021
import semver
@@ -259,6 +260,48 @@ def get_free_mem_ssh(ssh_connection):
259260
raise Exception("Available memory not found in `/proc/meminfo")
260261

261262

263+
def get_stable_rss_mem_by_pid(process, percentage_delta=1):
264+
"""
265+
Get the RSS memory that a guest uses, given the pid of the guest.
266+
267+
Wait till the fluctuations in RSS drop below percentage_delta.
268+
Or print a warning if this does not happen.
269+
"""
270+
271+
# All values are reported as KiB
272+
273+
def get_rss_from_pmap():
274+
"""Returns current memory utilization in KiB, including used HugeTLBFS"""
275+
276+
proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8")
277+
for line in proc_status.splitlines():
278+
if line.startswith("HugetlbPages:"): # entry is in KiB
279+
hugetlbfs_usage = int(line.split()[1])
280+
break
281+
else:
282+
assert False, f"HugetlbPages not found in {str(proc_status)}"
283+
return hugetlbfs_usage + process.memory_info().rss // 1024
284+
285+
first_rss = 0
286+
second_rss = 0
287+
for _ in range(5):
288+
first_rss = get_rss_from_pmap()
289+
time.sleep(1)
290+
second_rss = get_rss_from_pmap()
291+
abs_diff = abs(first_rss - second_rss)
292+
abs_delta = abs_diff / first_rss * 100
293+
print(
294+
f"RSS readings: old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}"
295+
)
296+
if abs_delta < percentage_delta:
297+
return second_rss
298+
299+
time.sleep(1)
300+
301+
print("WARNING: RSS readings did not stabilize")
302+
return second_rss
303+
304+
262305
def _format_output_message(proc, stdout, stderr):
263306
output_message = f"\n[{proc.pid}] Command:\n{proc.args}"
264307
# Append stdout/stderr to the output message

0 commit comments

Comments
 (0)