vmm/arch/x86_64/
regs.rs

1// Copyright © 2020, Oracle and/or its affiliates.
2// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3// SPDX-License-Identifier: Apache-2.0
4//
5// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
6// Use of this source code is governed by a BSD-style license that can be
7// found in the THIRD-PARTY file.
8
9use std::mem;
10
11use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs};
12use kvm_ioctls::VcpuFd;
13
14use super::super::{BootProtocol, EntryPoint};
15use super::gdt::{gdt_entry, kvm_segment_from_gdt};
16use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap};
17
18// Initial pagetables.
19const PML4_START: u64 = 0x9000;
20const PDPTE_START: u64 = 0xa000;
21const PDE_START: u64 = 0xb000;
22
23/// Errors thrown while setting up x86_64 registers.
24#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)]
25pub enum RegsError {
26    /// Failed to get SREGs for this CPU: {0}
27    GetStatusRegisters(kvm_ioctls::Error),
28    /// Failed to set base registers for this CPU: {0}
29    SetBaseRegisters(kvm_ioctls::Error),
30    /// Failed to configure the FPU: {0}
31    SetFPURegisters(kvm_ioctls::Error),
32    /// Failed to set SREGs for this CPU: {0}
33    SetStatusRegisters(kvm_ioctls::Error),
34    /// Writing the GDT to RAM failed.
35    WriteGDT,
36    /// Writing the IDT to RAM failed
37    WriteIDT,
38    /// WritePDPTEAddress
39    WritePDPTEAddress,
40    /// WritePDEAddress
41    WritePDEAddress,
42    /// WritePML4Address
43    WritePML4Address,
44}
45
46/// Error type for [`setup_fpu`].
47#[derive(Debug, derive_more::From, PartialEq, Eq, thiserror::Error)]
48#[error("Failed to setup FPU: {0}")]
49pub struct SetupFpuError(vmm_sys_util::errno::Error);
50
51/// Configure Floating-Point Unit (FPU) registers for a given CPU.
52///
53/// # Arguments
54///
55/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
56///
57/// # Errors
58///
59/// When [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_fpu`] errors.
60pub fn setup_fpu(vcpu: &VcpuFd) -> Result<(), SetupFpuError> {
61    let fpu: kvm_fpu = kvm_fpu {
62        fcw: 0x37f,
63        mxcsr: 0x1f80,
64        ..Default::default()
65    };
66
67    vcpu.set_fpu(&fpu).map_err(SetupFpuError)
68}
69
70/// Error type of [`setup_regs`].
71#[derive(Debug, derive_more::From, PartialEq, Eq, thiserror::Error)]
72#[error("Failed to setup registers: {0}")]
73pub struct SetupRegistersError(vmm_sys_util::errno::Error);
74
75/// Configure base registers for a given CPU.
76///
77/// # Arguments
78///
79/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
80/// * `boot_ip` - Starting instruction pointer.
81///
82/// # Errors
83///
84/// When [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_regs`] errors.
85pub fn setup_regs(vcpu: &VcpuFd, entry_point: EntryPoint) -> Result<(), SetupRegistersError> {
86    let regs: kvm_regs = match entry_point.protocol {
87        BootProtocol::PvhBoot => kvm_regs {
88            // Configure regs as required by PVH boot protocol.
89            rflags: 0x0000_0000_0000_0002u64,
90            rbx: super::layout::PVH_INFO_START,
91            rip: entry_point.entry_addr.raw_value(),
92            ..Default::default()
93        },
94        BootProtocol::LinuxBoot => kvm_regs {
95            // Configure regs as required by Linux 64-bit boot protocol.
96            rflags: 0x0000_0000_0000_0002u64,
97            rip: entry_point.entry_addr.raw_value(),
98            // Frame pointer. It gets a snapshot of the stack pointer (rsp) so that when adjustments
99            // are made to rsp (i.e. reserving space for local variables or pushing
100            // values on to the stack), local variables and function parameters are
101            // still accessible from a constant offset from rbp.
102            rsp: super::layout::BOOT_STACK_POINTER,
103            // Starting stack pointer.
104            rbp: super::layout::BOOT_STACK_POINTER,
105            // Must point to zero page address per Linux ABI. This is x86_64 specific.
106            rsi: super::layout::ZERO_PAGE_START,
107            ..Default::default()
108        },
109    };
110
111    vcpu.set_regs(&regs).map_err(SetupRegistersError)
112}
113
114/// Error type for [`setup_sregs`].
115#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)]
116pub enum SetupSpecialRegistersError {
117    /// Failed to get special registers: {0}
118    GetSpecialRegisters(vmm_sys_util::errno::Error),
119    /// Failed to configure segments and special registers: {0}
120    ConfigureSegmentsAndSpecialRegisters(RegsError),
121    /// Failed to setup page tables: {0}
122    SetupPageTables(RegsError),
123    /// Failed to set special registers: {0}
124    SetSpecialRegisters(vmm_sys_util::errno::Error),
125}
126
127/// Configures the special registers and system page tables for a given CPU.
128///
129/// # Arguments
130///
131/// * `mem` - The memory that will be passed to the guest.
132/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
133/// * `boot_prot` - The boot protocol being used.
134///
135/// # Errors
136///
137/// When:
138/// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::get_sregs`] errors.
139/// - [`configure_segments_and_sregs`] errors.
140/// - [`setup_page_tables`] errors
141/// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_sregs`] errors.
142pub fn setup_sregs(
143    mem: &GuestMemoryMmap,
144    vcpu: &VcpuFd,
145    boot_prot: BootProtocol,
146) -> Result<(), SetupSpecialRegistersError> {
147    let mut sregs: kvm_sregs = vcpu
148        .get_sregs()
149        .map_err(SetupSpecialRegistersError::GetSpecialRegisters)?;
150
151    configure_segments_and_sregs(mem, &mut sregs, boot_prot)
152        .map_err(SetupSpecialRegistersError::ConfigureSegmentsAndSpecialRegisters)?;
153    if let BootProtocol::LinuxBoot = boot_prot {
154        setup_page_tables(mem, &mut sregs).map_err(SetupSpecialRegistersError::SetupPageTables)?;
155        // TODO(dgreid) - Can this be done once per system instead?
156    }
157
158    vcpu.set_sregs(&sregs)
159        .map_err(SetupSpecialRegistersError::SetSpecialRegisters)
160}
161
162const BOOT_GDT_OFFSET: u64 = 0x500;
163const BOOT_IDT_OFFSET: u64 = 0x520;
164
165const BOOT_GDT_MAX: usize = 4;
166
167const EFER_LMA: u64 = 0x400;
168const EFER_LME: u64 = 0x100;
169
170const X86_CR0_PE: u64 = 0x1;
171const X86_CR0_ET: u64 = 0x10;
172const X86_CR0_PG: u64 = 0x8000_0000;
173const X86_CR4_PAE: u64 = 0x20;
174
175fn write_gdt_table(table: &[u64], guest_mem: &GuestMemoryMmap) -> Result<(), RegsError> {
176    let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
177    for (index, entry) in table.iter().enumerate() {
178        let addr = guest_mem
179            .checked_offset(boot_gdt_addr, index * mem::size_of::<u64>())
180            .ok_or(RegsError::WriteGDT)?;
181        guest_mem
182            .write_obj(*entry, addr)
183            .map_err(|_| RegsError::WriteGDT)?;
184    }
185    Ok(())
186}
187
188fn write_idt_value(val: u64, guest_mem: &GuestMemoryMmap) -> Result<(), RegsError> {
189    let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
190    guest_mem
191        .write_obj(val, boot_idt_addr)
192        .map_err(|_| RegsError::WriteIDT)
193}
194
195fn configure_segments_and_sregs(
196    mem: &GuestMemoryMmap,
197    sregs: &mut kvm_sregs,
198    boot_prot: BootProtocol,
199) -> Result<(), RegsError> {
200    let gdt_table: [u64; BOOT_GDT_MAX] = match boot_prot {
201        BootProtocol::PvhBoot => {
202            // Configure GDT entries as specified by PVH boot protocol
203            [
204                gdt_entry(0, 0, 0),                // NULL
205                gdt_entry(0xc09b, 0, 0xffff_ffff), // CODE
206                gdt_entry(0xc093, 0, 0xffff_ffff), // DATA
207                gdt_entry(0x008b, 0, 0x67),        // TSS
208            ]
209        }
210        BootProtocol::LinuxBoot => {
211            // Configure GDT entries as specified by Linux 64bit boot protocol
212            [
213                gdt_entry(0, 0, 0),            // NULL
214                gdt_entry(0xa09b, 0, 0xfffff), // CODE
215                gdt_entry(0xc093, 0, 0xfffff), // DATA
216                gdt_entry(0x808b, 0, 0xfffff), // TSS
217            ]
218        }
219    };
220
221    let code_seg = kvm_segment_from_gdt(gdt_table[1], 1);
222    let data_seg = kvm_segment_from_gdt(gdt_table[2], 2);
223    let tss_seg = kvm_segment_from_gdt(gdt_table[3], 3);
224
225    // Write segments
226    write_gdt_table(&gdt_table[..], mem)?;
227    sregs.gdt.base = BOOT_GDT_OFFSET;
228    sregs.gdt.limit = u16::try_from(mem::size_of_val(&gdt_table)).unwrap() - 1;
229
230    write_idt_value(0, mem)?;
231    sregs.idt.base = BOOT_IDT_OFFSET;
232    sregs.idt.limit = u16::try_from(mem::size_of::<u64>()).unwrap() - 1;
233
234    sregs.cs = code_seg;
235    sregs.ds = data_seg;
236    sregs.es = data_seg;
237    sregs.fs = data_seg;
238    sregs.gs = data_seg;
239    sregs.ss = data_seg;
240    sregs.tr = tss_seg;
241
242    match boot_prot {
243        BootProtocol::PvhBoot => {
244            sregs.cr0 = X86_CR0_PE | X86_CR0_ET;
245            sregs.cr4 = 0;
246        }
247        BootProtocol::LinuxBoot => {
248            // 64-bit protected mode
249            sregs.cr0 |= X86_CR0_PE;
250            sregs.efer |= EFER_LME | EFER_LMA;
251        }
252    }
253
254    Ok(())
255}
256
257fn setup_page_tables(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<(), RegsError> {
258    // Puts PML4 right after zero page but aligned to 4k.
259    let boot_pml4_addr = GuestAddress(PML4_START);
260    let boot_pdpte_addr = GuestAddress(PDPTE_START);
261    let boot_pde_addr = GuestAddress(PDE_START);
262
263    // Entry covering VA [0..512GB)
264    mem.write_obj(boot_pdpte_addr.raw_value() | 0x03, boot_pml4_addr)
265        .map_err(|_| RegsError::WritePML4Address)?;
266
267    // Entry covering VA [0..1GB)
268    mem.write_obj(boot_pde_addr.raw_value() | 0x03, boot_pdpte_addr)
269        .map_err(|_| RegsError::WritePDPTEAddress)?;
270    // 512 2MB entries together covering VA [0..1GB). Note we are assuming
271    // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
272    for i in 0..512 {
273        mem.write_obj((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8))
274            .map_err(|_| RegsError::WritePDEAddress)?;
275    }
276
277    sregs.cr3 = boot_pml4_addr.raw_value();
278    sregs.cr4 |= X86_CR4_PAE;
279    sregs.cr0 |= X86_CR0_PG;
280    Ok(())
281}
282
283#[cfg(test)]
284mod tests {
285    #![allow(clippy::cast_possible_truncation)]
286
287    use kvm_ioctls::Kvm;
288
289    use super::*;
290    use crate::test_utils::single_region_mem;
291    use crate::vstate::memory::{Bytes, GuestAddress, GuestMemoryMmap};
292
293    fn read_u64(gm: &GuestMemoryMmap, offset: u64) -> u64 {
294        let read_addr = GuestAddress(offset);
295        gm.read_obj(read_addr).unwrap()
296    }
297
298    fn validate_segments_and_sregs(
299        gm: &GuestMemoryMmap,
300        sregs: &kvm_sregs,
301        boot_prot: BootProtocol,
302    ) {
303        if let BootProtocol::LinuxBoot = boot_prot {
304            assert_eq!(0xaf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8));
305            assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16));
306            assert_eq!(0x8f_8b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 24));
307
308            assert_eq!(0xffff_ffff, sregs.tr.limit);
309
310            assert!(sregs.cr0 & X86_CR0_PE != 0);
311            assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0);
312        } else {
313            // Validate values that are specific to PVH boot protocol
314            assert_eq!(0xcf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8));
315            assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16));
316            assert_eq!(0x00_8b00_0000_0067, read_u64(gm, BOOT_GDT_OFFSET + 24));
317
318            assert_eq!(0x67, sregs.tr.limit);
319            assert_eq!(0, sregs.tr.g);
320
321            assert!(sregs.cr0 & X86_CR0_PE != 0 && sregs.cr0 & X86_CR0_ET != 0);
322            assert_eq!(0, sregs.cr4);
323        }
324
325        // Common settings for both PVH and Linux boot protocol
326        assert_eq!(0x0, read_u64(gm, BOOT_GDT_OFFSET));
327        assert_eq!(0x0, read_u64(gm, BOOT_IDT_OFFSET));
328
329        assert_eq!(0, sregs.cs.base);
330        assert_eq!(0xffff_ffff, sregs.ds.limit);
331        assert_eq!(0x10, sregs.es.selector);
332        assert_eq!(1, sregs.fs.present);
333        assert_eq!(1, sregs.gs.g);
334        assert_eq!(0, sregs.ss.avl);
335        assert_eq!(0, sregs.tr.base);
336        assert_eq!(0, sregs.tr.avl);
337    }
338
339    fn validate_page_tables(gm: &GuestMemoryMmap, sregs: &kvm_sregs) {
340        assert_eq!(0xa003, read_u64(gm, PML4_START));
341        assert_eq!(0xb003, read_u64(gm, PDPTE_START));
342        for i in 0..512 {
343            assert_eq!((i << 21) + 0x83u64, read_u64(gm, PDE_START + (i * 8)));
344        }
345
346        assert_eq!(PML4_START, sregs.cr3);
347        assert!(sregs.cr4 & X86_CR4_PAE != 0);
348        assert!(sregs.cr0 & X86_CR0_PG != 0);
349    }
350
351    #[test]
352    fn test_setup_fpu() {
353        let kvm = Kvm::new().unwrap();
354        let vm = kvm.create_vm().unwrap();
355        let vcpu = vm.create_vcpu(0).unwrap();
356        setup_fpu(&vcpu).unwrap();
357
358        let expected_fpu: kvm_fpu = kvm_fpu {
359            fcw: 0x37f,
360            mxcsr: 0x1f80,
361            ..Default::default()
362        };
363        let actual_fpu: kvm_fpu = vcpu.get_fpu().unwrap();
364        // TODO: auto-generate kvm related structures with PartialEq on.
365        assert_eq!(expected_fpu.fcw, actual_fpu.fcw);
366        // Setting the mxcsr register from kvm_fpu inside setup_fpu does not influence anything.
367        // See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c.
368        // The mxcsr will stay 0 and the assert below fails. Decide whether or not we should
369        // remove it at all.
370        // assert!(expected_fpu.mxcsr == actual_fpu.mxcsr);
371    }
372
373    #[test]
374    fn test_setup_regs() {
375        let kvm = Kvm::new().unwrap();
376        let vm = kvm.create_vm().unwrap();
377        let vcpu = vm.create_vcpu(0).unwrap();
378
379        let expected_regs: kvm_regs = kvm_regs {
380            rflags: 0x0000_0000_0000_0002u64,
381            rip: 1,
382            rsp: super::super::layout::BOOT_STACK_POINTER,
383            rbp: super::super::layout::BOOT_STACK_POINTER,
384            rsi: super::super::layout::ZERO_PAGE_START,
385            ..Default::default()
386        };
387
388        let entry_point: EntryPoint = EntryPoint {
389            entry_addr: GuestAddress(expected_regs.rip),
390            protocol: BootProtocol::LinuxBoot,
391        };
392
393        setup_regs(&vcpu, entry_point).unwrap();
394
395        let actual_regs: kvm_regs = vcpu.get_regs().unwrap();
396        assert_eq!(actual_regs, expected_regs);
397    }
398
399    #[test]
400    fn test_setup_sregs() {
401        let kvm = Kvm::new().unwrap();
402        let vm = kvm.create_vm().unwrap();
403        let vcpu = vm.create_vcpu(0).unwrap();
404        let gm = single_region_mem(0x10000);
405
406        [BootProtocol::LinuxBoot, BootProtocol::PvhBoot]
407            .iter()
408            .for_each(|boot_prot| {
409                vcpu.set_sregs(&Default::default()).unwrap();
410                setup_sregs(&gm, &vcpu, *boot_prot).unwrap();
411
412                let mut sregs: kvm_sregs = vcpu.get_sregs().unwrap();
413                // for AMD KVM_GET_SREGS returns g = 0 for each kvm_segment.
414                // We set it to 1, otherwise the test will fail.
415                sregs.gs.g = 1;
416
417                validate_segments_and_sregs(&gm, &sregs, *boot_prot);
418                if let BootProtocol::LinuxBoot = *boot_prot {
419                    validate_page_tables(&gm, &sregs);
420                }
421            });
422    }
423
424    #[test]
425    fn test_write_gdt_table() {
426        // Not enough memory for the gdt table to be written.
427        let gm = single_region_mem(BOOT_GDT_OFFSET as usize);
428        let gdt_table: [u64; BOOT_GDT_MAX] = [
429            gdt_entry(0, 0, 0),            // NULL
430            gdt_entry(0xa09b, 0, 0xfffff), // CODE
431            gdt_entry(0xc093, 0, 0xfffff), // DATA
432            gdt_entry(0x808b, 0, 0xfffff), // TSS
433        ];
434        write_gdt_table(&gdt_table, &gm).unwrap_err();
435
436        // We allocate exactly the amount needed to write four u64 to `BOOT_GDT_OFFSET`.
437        let gm =
438            single_region_mem(BOOT_GDT_OFFSET as usize + (mem::size_of::<u64>() * BOOT_GDT_MAX));
439
440        let gdt_table: [u64; BOOT_GDT_MAX] = [
441            gdt_entry(0, 0, 0),            // NULL
442            gdt_entry(0xa09b, 0, 0xfffff), // CODE
443            gdt_entry(0xc093, 0, 0xfffff), // DATA
444            gdt_entry(0x808b, 0, 0xfffff), // TSS
445        ];
446        write_gdt_table(&gdt_table, &gm).unwrap();
447    }
448
449    #[test]
450    fn test_write_idt_table() {
451        // Not enough memory for the a u64 value to fit.
452        let gm = single_region_mem(BOOT_IDT_OFFSET as usize);
453        let val = 0x100;
454        write_idt_value(val, &gm).unwrap_err();
455
456        let gm = single_region_mem(BOOT_IDT_OFFSET as usize + mem::size_of::<u64>());
457        // We have allocated exactly the amount neded to write an u64 to `BOOT_IDT_OFFSET`.
458        write_idt_value(val, &gm).unwrap();
459    }
460
461    #[test]
462    fn test_configure_segments_and_sregs() {
463        let mut sregs: kvm_sregs = Default::default();
464        let gm = single_region_mem(0x10000);
465        configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::LinuxBoot).unwrap();
466
467        validate_segments_and_sregs(&gm, &sregs, BootProtocol::LinuxBoot);
468
469        configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::PvhBoot).unwrap();
470
471        validate_segments_and_sregs(&gm, &sregs, BootProtocol::PvhBoot);
472    }
473
474    #[test]
475    fn test_setup_page_tables() {
476        let mut sregs: kvm_sregs = Default::default();
477        let gm = single_region_mem(PML4_START as usize);
478        setup_page_tables(&gm, &mut sregs).unwrap_err();
479
480        let gm = single_region_mem(PDPTE_START as usize);
481        setup_page_tables(&gm, &mut sregs).unwrap_err();
482
483        let gm = single_region_mem(PDE_START as usize);
484        setup_page_tables(&gm, &mut sregs).unwrap_err();
485
486        let gm = single_region_mem(0x10000);
487        setup_page_tables(&gm, &mut sregs).unwrap();
488
489        validate_page_tables(&gm, &sregs);
490    }
491}