vmm/arch/x86_64/
xstate.rs

1// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use vmm_sys_util::syscall::SyscallReturnCode;
5
6use crate::arch::x86_64::generated::arch_prctl;
7use crate::logger::info;
8
9const INTEL_AMX_MASK: u64 = 1u64 << arch_prctl::ARCH_XCOMP_TILEDATA;
10
11/// Errors assocaited with x86_64's dynamic XSAVE state features.
12#[derive(Debug, thiserror::Error, displaydoc::Display)]
13pub enum XstateError {
14    /// Failed to get supported XSTATE features: {0}
15    GetSupportedFeatures(std::io::Error),
16    /// Failed to request permission for XSTATE feature ({0}): {1}
17    RequestFeaturePermission(u32, std::io::Error),
18}
19
20/// Request permission for all dynamic XSTATE features.
21///
22/// Some XSTATE features are not permitted by default, because they may require a larger area to
23/// save their states than the tranditional 4096-byte area. Instead, the permission for them can be
24/// requested via arch_prctl().
25/// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst
26///
27/// Firecracker requests permission for them by default if available in order to retrieve the
28/// full supported feature set via KVM_GET_SUPPORTED_CPUID.
29/// https://docs.kernel.org/virt/kvm/api.html#kvm-get-supported-cpuid
30///
31/// Note that requested features can be masked by a CPU template.
32pub fn request_dynamic_xstate_features() -> Result<(), XstateError> {
33    let supported_xfeatures =
34        match get_supported_xfeatures().map_err(XstateError::GetSupportedFeatures)? {
35            Some(supported_xfeatures) => supported_xfeatures,
36            // Exit early if dynamic XSTATE feature enabling is not supported on the kernel.
37            None => return Ok(()),
38        };
39
40    // Intel AMX's TILEDATA
41    //
42    // Unless requested, on kernels prior to v6.4, KVM_GET_SUPPORTED_CPUID returns an
43    // inconsistent state where TILECFG is set but TILEDATA isn't. Such a half-enabled state
44    // causes guest crash during boot because a guest calls XSETBV instruction with all
45    // XSAVE feature bits enumerated on CPUID and XSETBV only accepts either of both Intel
46    // AMX bits enabled or disabled; otherwise resulting in general protection fault.
47    // https://lore.kernel.org/all/20230405004520.421768-1-seanjc@google.com/
48    if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK {
49        request_xfeature_permission(arch_prctl::ARCH_XCOMP_TILEDATA).map_err(|err| {
50            XstateError::RequestFeaturePermission(arch_prctl::ARCH_XCOMP_TILEDATA, err)
51        })?;
52    }
53
54    Ok(())
55}
56
57/// Get supported XSTATE features
58///
59/// Returns Ok(None) if dynamic XSTATE feature enabling is not supported.
60fn get_supported_xfeatures() -> Result<Option<u64>, std::io::Error> {
61    let mut supported_xfeatures: u64 = 0;
62
63    // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer.
64    // https://man7.org/linux/man-pages/man2/arch_prctl.2.html
65    match SyscallReturnCode(unsafe {
66        libc::syscall(
67            libc::SYS_arch_prctl,
68            arch_prctl::ARCH_GET_XCOMP_SUPP,
69            &mut supported_xfeatures as *mut libc::c_ulong,
70        )
71    })
72    .into_empty_result()
73    {
74        Ok(()) => Ok(Some(supported_xfeatures)),
75        // EINVAL is returned if the dynamic XSTATE feature enabling is not supported (e.g. kernel
76        // version prior to v5.16).
77        // https://github.com/torvalds/linux/commit/db8268df0983adc2bb1fb48c9e5f7bfbb5f617f3
78        Err(err) if err.raw_os_error() == Some(libc::EINVAL) => {
79            info!("Dynamic XSTATE feature enabling is not supported.");
80            Ok(None)
81        }
82        Err(err) => Err(err),
83    }
84}
85
86/// Request permission for a dynamic XSTATE feature.
87///
88/// This should be called after `get_supported_xfeatures()` that retrieves supported dynamic XSTATE
89/// features.
90///
91/// Returns Ok(()) if the permission request succeeded or dynamic XSTATE feature enabling for
92/// "guest" is not supported.
93fn request_xfeature_permission(xfeature: u32) -> Result<(), std::io::Error> {
94    // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` value.
95    // https://man7.org/linux/man-pages/man2/arch_prctl.2.html
96    match SyscallReturnCode(unsafe {
97        libc::syscall(
98            libc::SYS_arch_prctl,
99            arch_prctl::ARCH_REQ_XCOMP_GUEST_PERM as libc::c_ulong,
100            xfeature as libc::c_ulong,
101        )
102    })
103    .into_empty_result()
104    {
105        Ok(()) => Ok(()),
106        // EINVAL is returned if the dynamic XSTATE feature enabling for "guest" is not supported
107        // although that for "userspace application" is supported (e.g. kernel versions >= 5.16 and
108        // < 5.17).
109        // https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
110        //
111        // Note that XFEATURE_MASK_XTILE (= XFEATURE_MASK_XTILE_DATA | XFEATURE_MASK_XTILE_CFG) was
112        // also added to KVM_SUPPORTED_XCR0 in kernel v5.17. KVM_SUPPORTED_XCR0 is used to
113        // initialize the guest-supported XCR0. Thus, KVM_GET_SUPPORTED_CPUID doesn't
114        // return AMX-half-enabled state, where XTILE_CFG is set but XTILE_DATA is unset, on such
115        // kernels.
116        // https://github.com/torvalds/linux/commit/86aff7a4799286635efd94dab17b513544703cad
117        // https://github.com/torvalds/linux/blame/f443e374ae131c168a065ea1748feac6b2e76613/arch/x86/kvm/x86.c#L8850-L8853
118        // https://github.com/firecracker-microvm/firecracker/pull/5065
119        Err(err) if err.raw_os_error() == Some(libc::EINVAL) => {
120            info!("Dynamic XSTATE feature enabling is not supported for guest.");
121            Ok(())
122        }
123        Err(err) => Err(err),
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    // Get permitted XSTATE features.
132    fn get_permitted_xstate_features() -> Result<u64, std::io::Error> {
133        let mut permitted_xfeatures: u64 = 0;
134        // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer.
135        match SyscallReturnCode(unsafe {
136            libc::syscall(
137                libc::SYS_arch_prctl,
138                arch_prctl::ARCH_GET_XCOMP_GUEST_PERM,
139                &mut permitted_xfeatures as *mut libc::c_ulong,
140            )
141        })
142        .into_empty_result()
143        {
144            Ok(()) => Ok(permitted_xfeatures),
145            Err(err) => Err(err),
146        }
147    }
148
149    #[test]
150    fn test_request_xstate_feature_permission() {
151        request_dynamic_xstate_features().unwrap();
152
153        let supported_xfeatures = match get_supported_xfeatures().unwrap() {
154            Some(supported_xfeatures) => supported_xfeatures,
155            // Nothing to test if dynamic XSTATE feature enabling is not supported on the kernel.
156            None => return,
157        };
158
159        // Check each dynamic feature is enabled. (currently only Intel AMX TILEDATA)
160        if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK {
161            let permitted_xfeatures = get_permitted_xstate_features().unwrap();
162            assert_eq!(permitted_xfeatures & INTEL_AMX_MASK, INTEL_AMX_MASK);
163        }
164    }
165}