1use std::sync::{Arc, Mutex};
2use std::{io, thread};
3
4use anyhow::Result;
5
6use event_manager::SubscriberOps;
7use vmm::Vcpu;
8use vmm::Vmm;
9use vmm::builder::StartMicrovmError;
10use vmm::cpu_config::templates::GetCpuTemplate;
11use vmm::initrd::InitrdConfig;
12use vmm::resources::VmResources;
13use vmm::vmm_config::instance_info::InstanceInfo;
14use vmm::vstate::memory;
15use vmm::{EventManager, VcpuHandle};
16
17use kvm_bindings::KVM_CAP_NESTED_STATE;
18use kvm_bindings::{KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_SW_BP, kvm_guest_debug};
19
20#[cfg(target_arch = "x86_64")]
21use vmm::cpu_config::templates::KvmCapability;
22#[cfg(target_arch = "x86_64")]
23use vmm::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host;
24#[cfg(target_arch = "x86_64")]
25use vmm::cpu_config::x86_64::cpuid::{
26 CpuidKey, CpuidTrait, KvmCpuidFlags, VENDOR_ID_AMD, VENDOR_ID_INTEL,
27};
28#[cfg(target_arch = "x86_64")]
29use vmm::cpu_config::x86_64::custom_cpu_template::{
30 CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, CustomCpuTemplate,
31};
32
33#[derive(Debug, thiserror::Error, displaydoc::Display)]
34pub enum ResizeFdTableError {
35 GetRlimit,
37 Dup2(io::Error),
39 Close(io::Error),
41}
42
43#[cfg(target_arch = "x86_64")]
44fn ensure_nested_kvm_caps(template: &mut CustomCpuTemplate) {
45 if !template
46 .kvm_capabilities
47 .iter()
48 .any(|cap| matches!(cap, KvmCapability::Add(value) if *value == KVM_CAP_NESTED_STATE))
49 {
50 template
51 .kvm_capabilities
52 .push(KvmCapability::Add(KVM_CAP_NESTED_STATE));
53 }
54}
55
56#[cfg(target_arch = "x86_64")]
57fn set_cpuid_bit(
58 template: &mut CustomCpuTemplate,
59 leaf: u32,
60 subleaf: u32,
61 register: CpuidRegister,
62 bit: u8,
63) {
64 let mask = 1u32 << bit;
65 if let Some(leaf_mod) = template
66 .cpuid_modifiers
67 .iter_mut()
68 .find(|entry| entry.leaf == leaf && entry.subleaf == subleaf)
69 {
70 if let Some(reg_mod) = leaf_mod
71 .modifiers
72 .iter_mut()
73 .find(|entry| entry.register == register)
74 {
75 reg_mod.bitmap.filter |= mask;
76 reg_mod.bitmap.value |= mask;
77 } else {
78 leaf_mod.modifiers.push(CpuidRegisterModifier {
79 register,
80 bitmap: vmm::cpu_config::templates::RegisterValueFilter {
81 filter: mask,
82 value: mask,
83 },
84 });
85 }
86 } else {
87 template.cpuid_modifiers.push(CpuidLeafModifier {
88 leaf,
89 subleaf,
90 flags: KvmCpuidFlags::EMPTY,
91 modifiers: vec![CpuidRegisterModifier {
92 register,
93 bitmap: vmm::cpu_config::templates::RegisterValueFilter {
94 filter: mask,
95 value: mask,
96 },
97 }],
98 });
99 }
100}
101
102#[cfg(target_arch = "x86_64")]
103fn ensure_nested_virt_supported(
104 kvm: &vmm::arch::x86_64::kvm::Kvm,
105 template: &mut CustomCpuTemplate,
106) -> Result<(), StartMicrovmError> {
107 if kvm.fd.check_extension_raw(u64::from(KVM_CAP_NESTED_STATE)) == 0 {
108 return Err(StartMicrovmError::NestedVirtUnsupported(
109 "KVM_CAP_NESTED_STATE not supported by host".to_string(),
110 ));
111 }
112
113 let vendor = get_vendor_id_from_host().map_err(|err| {
114 StartMicrovmError::NestedVirtUnsupported(format!("unable to read CPUID vendor: {err}"))
115 })?;
116
117 if &vendor == VENDOR_ID_INTEL {
118 let key = CpuidKey {
119 leaf: 0x1,
120 subleaf: 0,
121 };
122 let entry = kvm.supported_cpuid.get(&key).ok_or_else(|| {
123 StartMicrovmError::NestedVirtUnsupported("missing CPUID leaf 0x1".to_string())
124 })?;
125 if entry.result.ecx & (1 << 5) == 0 {
126 return Err(StartMicrovmError::NestedVirtUnsupported(
127 "host CPUID does not advertise VMX support".to_string(),
128 ));
129 }
130 set_cpuid_bit(template, 0x1, 0x0, CpuidRegister::Ecx, 5);
131 Ok(())
132 } else if &vendor == VENDOR_ID_AMD {
133 let key = CpuidKey {
134 leaf: 0x8000_0001,
135 subleaf: 0,
136 };
137 let entry = kvm.supported_cpuid.get(&key).ok_or_else(|| {
138 StartMicrovmError::NestedVirtUnsupported("missing CPUID leaf 0x80000001".to_string())
139 })?;
140 if entry.result.ecx & (1 << 2) == 0 {
141 return Err(StartMicrovmError::NestedVirtUnsupported(
142 "host CPUID does not advertise SVM support".to_string(),
143 ));
144 }
145 set_cpuid_bit(template, 0x8000_0001, 0x0, CpuidRegister::Ecx, 2);
146 Ok(())
147 } else {
148 Err(StartMicrovmError::NestedVirtUnsupported(
149 "unsupported CPU vendor for nested virtualization".to_string(),
150 ))
151 }
152}
153
154pub fn resize_fdtable() -> Result<(), ResizeFdTableError> {
163 let mut rlimit = libc::rlimit {
164 rlim_cur: 0,
165 rlim_max: 0,
166 };
167
168 if unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &mut rlimit as *mut libc::rlimit) } < 0 {
170 return Err(ResizeFdTableError::GetRlimit);
171 }
172
173 let limit: libc::c_int = if rlimit.rlim_cur == libc::RLIM_INFINITY {
176 2048
177 } else {
178 rlimit.rlim_cur.try_into().unwrap_or(2048)
179 };
180
181 if limit > 3 {
188 if unsafe { libc::dup2(0, limit - 1) } < 0 {
190 return Err(ResizeFdTableError::Dup2(io::Error::last_os_error()));
191 }
192
193 if unsafe { libc::close(limit - 1) } < 0 {
195 return Err(ResizeFdTableError::Close(io::Error::last_os_error()));
196 }
197 }
198
199 Ok(())
200}
201
202pub fn build_microvm_for_boot(
208 instance_info: &InstanceInfo,
209 vm_resources: &VmResources,
210 event_manager: &mut EventManager,
211) -> Result<(Arc<Mutex<Vmm>>, Vcpu), StartMicrovmError> {
212 use self::StartMicrovmError::*;
213
214 let boot_config = vm_resources
215 .boot_source
216 .builder
217 .as_ref()
218 .ok_or(MissingKernelConfig)?;
219
220 let track_dirty_pages = vm_resources.machine_config.track_dirty_pages;
221
222 let vhost_user_device_used = vm_resources
223 .block
224 .devices
225 .iter()
226 .any(|b| b.lock().expect("Poisoned lock").is_vhost_user());
227
228 let regions = vmm::arch::arch_memory_regions(vm_resources.machine_config.mem_size_mib << 20);
238 let guest_regions = if vhost_user_device_used {
239 memory::memfd_backed(
240 ®ions,
241 track_dirty_pages,
242 vm_resources.machine_config.huge_pages,
243 )
244 .map_err(StartMicrovmError::GuestMemory)?
245 } else {
246 memory::anonymous(
247 regions.iter().copied(),
248 track_dirty_pages,
249 vm_resources.machine_config.huge_pages,
250 )
251 .map_err(StartMicrovmError::GuestMemory)?
252 };
253 #[allow(unused_mut)]
255 let mut boot_cmdline = boot_config.cmdline.clone();
256
257 let mut cpu_template = vm_resources
258 .machine_config
259 .cpu_template
260 .get_cpu_template()?
261 .into_owned();
262 if vm_resources.machine_config.enable_nested_virt {
263 #[cfg(target_arch = "x86_64")]
264 {
265 ensure_nested_kvm_caps(&mut cpu_template);
266 }
267 #[cfg(not(target_arch = "x86_64"))]
268 {
269 return Err(StartMicrovmError::NestedVirtUnsupported(
270 "nested virtualization is only supported on x86_64".to_string(),
271 ));
272 }
273 }
274
275 let (mut vmm, mut vcpus) = vmm::builder::create_vmm_and_vcpus(
276 instance_info,
277 event_manager,
278 guest_regions,
279 None,
280 track_dirty_pages,
281 vm_resources.machine_config.vcpu_count,
282 cpu_template.kvm_capabilities.clone(),
283 )?;
284
285 if vm_resources.machine_config.enable_nested_virt {
286 #[cfg(target_arch = "x86_64")]
287 {
288 ensure_nested_virt_supported(vmm.kvm(), &mut cpu_template)?;
289 }
290 }
291
292 let entry_addr = vmm::arch::load_kernel(&boot_config.kernel_file, vmm.vm.guest_memory())?;
293 let initrd = InitrdConfig::from_config(boot_config, vmm.vm.guest_memory())?;
294
295 if vm_resources.pci_enabled {
296 vmm.device_manager.enable_pci(&vmm.vm)?;
297 } else {
298 boot_cmdline.insert("pci", "off")?;
299 }
300
301 assert_eq!(vcpus.len(), 1);
303 let debug_struct = kvm_guest_debug {
304 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP,
307 pad: 0,
308 arch: Default::default(),
310 };
311
312 vcpus[0].kvm_vcpu.fd.set_guest_debug(&debug_struct).unwrap();
313 vmm::builder::attach_block_devices(
321 &mut vmm.device_manager,
322 &vmm.vm,
323 &mut boot_cmdline,
324 vm_resources.block.devices.iter(),
325 event_manager,
326 )?;
327 vmm::builder::attach_net_devices(
328 &mut vmm.device_manager,
329 &vmm.vm,
330 &mut boot_cmdline,
331 vm_resources.net_builder.iter(),
332 event_manager,
333 )?;
334
335 let vm_arc = vmm.vm.clone();
340 let kvm_ptr = vmm.kvm() as *const _;
341 unsafe {
344 vmm::arch::configure_system_for_boot(
345 &*kvm_ptr,
346 vm_arc.as_ref(),
347 &mut vmm.device_manager,
348 vcpus.as_mut(),
349 &vm_resources.machine_config,
350 &cpu_template,
351 entry_addr,
352 &initrd,
353 boot_cmdline,
354 )?;
355 }
356
357 let mut vcpu = vcpus.into_iter().next().unwrap();
358 let event_sender = vcpu.event_sender.take().expect("vCPU already started");
359 let response_receiver = vcpu.response_receiver.take().unwrap();
360 let vcpu_fd = vcpu
361 .copy_kvm_vcpu_fd(vmm.vm.as_ref())
362 .map_err(StartMicrovmError::VcpuFdCloneError)?;
363 let vcpu_join_handle = thread::Builder::new()
364 .name(format!("fake vcpu thread"))
365 .spawn(|| {})
366 .unwrap();
367 let handle = VcpuHandle::new(event_sender, response_receiver, vcpu_fd, vcpu_join_handle);
368
369 vmm.vcpus_handles.push(handle);
371 let vmm = Arc::new(Mutex::new(vmm));
372 event_manager.add_subscriber(vmm.clone());
373
374 vcpu.set_mmio_bus(vmm.lock().unwrap().vm.common.mmio_bus.clone());
375 vcpu.kvm_vcpu
376 .set_pio_bus(vmm.lock().unwrap().vm.pio_bus.clone());
377 Ok((vmm, vcpu))
378}