vmm/arch/x86_64/xstate.rs
1// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use vmm_sys_util::syscall::SyscallReturnCode;
5
6use crate::arch::x86_64::generated::arch_prctl;
7use crate::logger::info;
8
9const INTEL_AMX_MASK: u64 = 1u64 << arch_prctl::ARCH_XCOMP_TILEDATA;
10
11/// Errors assocaited with x86_64's dynamic XSAVE state features.
12#[derive(Debug, thiserror::Error, displaydoc::Display)]
13pub enum XstateError {
14 /// Failed to get supported XSTATE features: {0}
15 GetSupportedFeatures(std::io::Error),
16 /// Failed to request permission for XSTATE feature ({0}): {1}
17 RequestFeaturePermission(u32, std::io::Error),
18}
19
20/// Request permission for all dynamic XSTATE features.
21///
22/// Some XSTATE features are not permitted by default, because they may require a larger area to
23/// save their states than the tranditional 4096-byte area. Instead, the permission for them can be
24/// requested via arch_prctl().
25/// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst
26///
27/// Firecracker requests permission for them by default if available in order to retrieve the
28/// full supported feature set via KVM_GET_SUPPORTED_CPUID.
29/// https://docs.kernel.org/virt/kvm/api.html#kvm-get-supported-cpuid
30///
31/// Note that requested features can be masked by a CPU template.
32pub fn request_dynamic_xstate_features() -> Result<(), XstateError> {
33 let supported_xfeatures =
34 match get_supported_xfeatures().map_err(XstateError::GetSupportedFeatures)? {
35 Some(supported_xfeatures) => supported_xfeatures,
36 // Exit early if dynamic XSTATE feature enabling is not supported on the kernel.
37 None => return Ok(()),
38 };
39
40 // Intel AMX's TILEDATA
41 //
42 // Unless requested, on kernels prior to v6.4, KVM_GET_SUPPORTED_CPUID returns an
43 // inconsistent state where TILECFG is set but TILEDATA isn't. Such a half-enabled state
44 // causes guest crash during boot because a guest calls XSETBV instruction with all
45 // XSAVE feature bits enumerated on CPUID and XSETBV only accepts either of both Intel
46 // AMX bits enabled or disabled; otherwise resulting in general protection fault.
47 // https://lore.kernel.org/all/20230405004520.421768-1-seanjc@google.com/
48 if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK {
49 request_xfeature_permission(arch_prctl::ARCH_XCOMP_TILEDATA).map_err(|err| {
50 XstateError::RequestFeaturePermission(arch_prctl::ARCH_XCOMP_TILEDATA, err)
51 })?;
52 }
53
54 Ok(())
55}
56
57/// Get supported XSTATE features
58///
59/// Returns Ok(None) if dynamic XSTATE feature enabling is not supported.
60fn get_supported_xfeatures() -> Result<Option<u64>, std::io::Error> {
61 let mut supported_xfeatures: u64 = 0;
62
63 // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer.
64 // https://man7.org/linux/man-pages/man2/arch_prctl.2.html
65 match SyscallReturnCode(unsafe {
66 libc::syscall(
67 libc::SYS_arch_prctl,
68 arch_prctl::ARCH_GET_XCOMP_SUPP,
69 &mut supported_xfeatures as *mut libc::c_ulong,
70 )
71 })
72 .into_empty_result()
73 {
74 Ok(()) => Ok(Some(supported_xfeatures)),
75 // EINVAL is returned if the dynamic XSTATE feature enabling is not supported (e.g. kernel
76 // version prior to v5.16).
77 // https://github.com/torvalds/linux/commit/db8268df0983adc2bb1fb48c9e5f7bfbb5f617f3
78 Err(err) if err.raw_os_error() == Some(libc::EINVAL) => {
79 info!("Dynamic XSTATE feature enabling is not supported.");
80 Ok(None)
81 }
82 Err(err) => Err(err),
83 }
84}
85
86/// Request permission for a dynamic XSTATE feature.
87///
88/// This should be called after `get_supported_xfeatures()` that retrieves supported dynamic XSTATE
89/// features.
90///
91/// Returns Ok(()) if the permission request succeeded or dynamic XSTATE feature enabling for
92/// "guest" is not supported.
93fn request_xfeature_permission(xfeature: u32) -> Result<(), std::io::Error> {
94 // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` value.
95 // https://man7.org/linux/man-pages/man2/arch_prctl.2.html
96 match SyscallReturnCode(unsafe {
97 libc::syscall(
98 libc::SYS_arch_prctl,
99 arch_prctl::ARCH_REQ_XCOMP_GUEST_PERM as libc::c_ulong,
100 xfeature as libc::c_ulong,
101 )
102 })
103 .into_empty_result()
104 {
105 Ok(()) => Ok(()),
106 // EINVAL is returned if the dynamic XSTATE feature enabling for "guest" is not supported
107 // although that for "userspace application" is supported (e.g. kernel versions >= 5.16 and
108 // < 5.17).
109 // https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
110 //
111 // Note that XFEATURE_MASK_XTILE (= XFEATURE_MASK_XTILE_DATA | XFEATURE_MASK_XTILE_CFG) was
112 // also added to KVM_SUPPORTED_XCR0 in kernel v5.17. KVM_SUPPORTED_XCR0 is used to
113 // initialize the guest-supported XCR0. Thus, KVM_GET_SUPPORTED_CPUID doesn't
114 // return AMX-half-enabled state, where XTILE_CFG is set but XTILE_DATA is unset, on such
115 // kernels.
116 // https://github.com/torvalds/linux/commit/86aff7a4799286635efd94dab17b513544703cad
117 // https://github.com/torvalds/linux/blame/f443e374ae131c168a065ea1748feac6b2e76613/arch/x86/kvm/x86.c#L8850-L8853
118 // https://github.com/firecracker-microvm/firecracker/pull/5065
119 Err(err) if err.raw_os_error() == Some(libc::EINVAL) => {
120 info!("Dynamic XSTATE feature enabling is not supported for guest.");
121 Ok(())
122 }
123 Err(err) => Err(err),
124 }
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130
131 // Get permitted XSTATE features.
132 fn get_permitted_xstate_features() -> Result<u64, std::io::Error> {
133 let mut permitted_xfeatures: u64 = 0;
134 // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer.
135 match SyscallReturnCode(unsafe {
136 libc::syscall(
137 libc::SYS_arch_prctl,
138 arch_prctl::ARCH_GET_XCOMP_GUEST_PERM,
139 &mut permitted_xfeatures as *mut libc::c_ulong,
140 )
141 })
142 .into_empty_result()
143 {
144 Ok(()) => Ok(permitted_xfeatures),
145 Err(err) => Err(err),
146 }
147 }
148
149 #[test]
150 fn test_request_xstate_feature_permission() {
151 request_dynamic_xstate_features().unwrap();
152
153 let supported_xfeatures = match get_supported_xfeatures().unwrap() {
154 Some(supported_xfeatures) => supported_xfeatures,
155 // Nothing to test if dynamic XSTATE feature enabling is not supported on the kernel.
156 None => return,
157 };
158
159 // Check each dynamic feature is enabled. (currently only Intel AMX TILEDATA)
160 if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK {
161 let permitted_xfeatures = get_permitted_xstate_features().unwrap();
162 assert_eq!(permitted_xfeatures & INTEL_AMX_MASK, INTEL_AMX_MASK);
163 }
164 }
165}