Skip to main content

extended_pstate/
x86_64.rs

1// Copyright 2023 The Fuchsia Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use static_assertions::const_assert_eq;
6use std::sync::LazyLock;
7
8#[derive(Clone, Copy)]
9#[repr(C)]
10pub(crate) struct State {
11    pub(crate) buffer: XSaveArea,
12    strategy: Strategy,
13}
14
15// Ensure ABI compatibility with assembly routines in `x86_64_asm.S`.
16static_assertions::assert_eq_align!(State, XSaveArea);
17// LINT.IfChange(x86_64_state_offsets)
18const_assert_eq!(std::mem::offset_of!(State, buffer), 0);
19const_assert_eq!(std::mem::offset_of!(State, strategy), 832);
20// LINT.ThenChange(x86_64_asm.S:x86_64_state_offsets)
21
22// Size of the XSAVE area.
23pub const XSAVE_AREA_SIZE: usize = 832;
24
25const XSAVE_FEATURE_X87: u64 = 1 << 0;
26const XSAVE_FEATURE_SSE: u64 = 1 << 1;
27const XSAVE_FEATURE_AVX: u64 = 1 << 2;
28
29// Save FPU, SSE and AVX registers. This matches the set of features supported by Zircon (see
30// zircon/kernel/arch/x86/registers.cc ).
31pub const SUPPORTED_XSAVE_FEATURES: u64 = XSAVE_FEATURE_X87 | XSAVE_FEATURE_SSE | XSAVE_FEATURE_AVX;
32
33const NUM_XMM_REGS: usize = 16;
34
35#[derive(Clone, Copy, Default)]
36#[repr(C)]
37struct X87MMXState {
38    low: u64,
39    high: u64,
40}
41
42#[derive(Clone, Copy, Default)]
43#[repr(C)]
44struct SSERegister {
45    low: u64,
46    high: u64,
47}
48
49// [intel/vol1] Table 10-2. Format of an FXSAVE Area
50#[derive(Clone, Copy)]
51#[repr(C)]
52struct X86LegacySaveArea {
53    fcw: u16,
54    fsw: u16,
55    ftw: u8,
56    _reserved: u8,
57
58    fop: u16,
59    fip: u64,
60    fdp: u64,
61
62    mxcsr: u32,
63    mxcsr_mask: u32,
64
65    st: [X87MMXState; 8],
66
67    xmm: [SSERegister; NUM_XMM_REGS],
68}
69
70const_assert_eq!(std::mem::size_of::<X86LegacySaveArea>(), 416);
71
72#[derive(Clone, Copy)]
73#[repr(C, align(16))]
74struct FXSaveArea {
75    x86_legacy_save_area: X86LegacySaveArea,
76    _reserved: [u8; 96],
77}
78const_assert_eq!(std::mem::size_of::<FXSaveArea>(), 512);
79
80impl Default for FXSaveArea {
81    fn default() -> Self {
82        Self {
83            x86_legacy_save_area: X86LegacySaveArea {
84                fcw: 0x37f, // All exceptions masked, no exceptions raised.
85                fsw: 0,
86                // The ftw field stores an abbreviated version where all zero bits match the default.
87                // See [intel/vol1] 10.5.1.1 x87 State for details.
88                ftw: 0,
89                _reserved: Default::default(),
90                fop: 0,
91                fip: 0,
92                fdp: 0,
93                mxcsr: 0x3f << 7, // All exceptions masked, no exceptions raised.
94                mxcsr_mask: 0,
95                st: Default::default(),
96                xmm: Default::default(),
97            },
98            _reserved: [0; 96],
99        }
100    }
101}
102
103#[derive(Clone, Copy)]
104#[repr(C, align(64))]
105pub(crate) struct XSaveArea {
106    fxsave_area: FXSaveArea,
107    xsave_header: [u8; 64],
108    // High 128 bits of ymm0-15 registers
109    avx_state: [u8; 256],
110    // TODO: Size of the extended region is dynamic depending on which features are enabled.
111    // See [intel/vol1] 13.5 XSAVE-MANAGED STATE
112}
113
114const_assert_eq!(std::mem::size_of::<XSaveArea>(), XSAVE_AREA_SIZE);
115
116impl Default for XSaveArea {
117    fn default() -> Self {
118        Self { fxsave_area: Default::default(), xsave_header: [0; 64], avx_state: [0; 256] }
119    }
120}
121
122#[derive(PartialEq, Debug, Copy, Clone, PartialOrd)]
123#[repr(u32)]
124pub enum Strategy {
125    // LINT.IfChange(strategy_discriminants)
126    XSaveOpt = 0,
127    XSave = 1,
128    FXSave = 2,
129    // LINT.ThenChange(x86_64_asm.S:strategy_discriminants)
130}
131
132pub static PREFERRED_STRATEGY: LazyLock<Strategy> = LazyLock::new(|| {
133    if is_x86_feature_detected!("xsaveopt") {
134        Strategy::XSaveOpt
135    } else if is_x86_feature_detected!("xsave") {
136        Strategy::XSave
137    } else {
138        // The FXSave strategy does not preserve the high 128 bits of the YMM
139        // register. If we find hardware that requires this, we need to add
140        // support for saving and restoring these through load/store
141        // instructions with the VEX.256 prefix and remove this assertion.
142        // [intel/vol1]: 14.8 ACCESSING YMM REGISTERS
143        assert!(!is_x86_feature_detected!("avx"));
144        Strategy::FXSave
145    }
146});
147
148impl State {
149    pub fn with_strategy(strategy: Strategy) -> Self {
150        Self { buffer: XSaveArea::default(), strategy }
151    }
152
153    pub fn reset(&mut self) {
154        self.initialize_saved_area()
155    }
156
157    fn initialize_saved_area(&mut self) {
158        *self = Default::default()
159    }
160
161    pub(crate) fn set_xsave_area(&mut self, xsave_area: [u8; XSAVE_AREA_SIZE]) {
162        self.buffer = {
163            #[allow(
164                clippy::undocumented_unsafe_blocks,
165                reason = "Force documented unsafe blocks in Starnix"
166            )]
167            unsafe {
168                std::mem::transmute(xsave_area)
169            }
170        };
171
172        // The tail of the FXSAVE are is unused and is ignored. It may be modified when returning
173        // from a signal handler. Reset it to zeros.
174        self.buffer.fxsave_area._reserved = [0u8; 96];
175    }
176}
177
178impl Default for State {
179    fn default() -> Self {
180        Self { buffer: XSaveArea::default(), strategy: *PREFERRED_STRATEGY }
181    }
182}
183
184#[cfg(test)]
185mod test {
186    use super::*;
187    use core::arch::asm;
188
189    const XMM_REG_SIZE: usize = std::mem::size_of::<u128>();
190
191    #[fuchsia::test]
192    fn test_save_restore_x86_64() {
193        let mut state = crate::ExtendedPstateState::default();
194        let mut pstate_ptr_struct = crate::ExtendedPstatePointer { extended_pstate: &mut state };
195        let pstate_ptr = &mut pstate_ptr_struct as *mut crate::ExtendedPstatePointer;
196
197        let mut restored_regs = [0u128; NUM_XMM_REGS];
198        let restored_regs_ptr = restored_regs.as_mut_ptr() as *mut u8;
199
200        let base_sentinel: u128 = 0x01234567_89ABCDEF_FEDCBA98_76543210_u128;
201        let mut sentinels_xmm = [0u128; NUM_XMM_REGS];
202        for i in 0..NUM_XMM_REGS {
203            sentinels_xmm[i] = base_sentinel + i as u128;
204        }
205
206        // SAFETY: all memory accesses are to mutable variables on the stack and all clobbers are
207        // specified.
208        unsafe {
209            asm!(
210                // 1. Load sentinels into registers
211                "movdqu xmm0, [{sentinels_xmm}]",
212                "movdqu xmm1, [{sentinels_xmm} + 16]",
213                "movdqu xmm2, [{sentinels_xmm} + 32]",
214                "movdqu xmm3, [{sentinels_xmm} + 48]",
215                "movdqu xmm4, [{sentinels_xmm} + 64]",
216                "movdqu xmm5, [{sentinels_xmm} + 80]",
217                "movdqu xmm6, [{sentinels_xmm} + 96]",
218                "movdqu xmm7, [{sentinels_xmm} + 112]",
219                "movdqu xmm8, [{sentinels_xmm} + 128]",
220                "movdqu xmm9, [{sentinels_xmm} + 144]",
221                "movdqu xmm10, [{sentinels_xmm} + 160]",
222                "movdqu xmm11, [{sentinels_xmm} + 176]",
223                "movdqu xmm12, [{sentinels_xmm} + 192]",
224                "movdqu xmm13, [{sentinels_xmm} + 208]",
225                "movdqu xmm14, [{sentinels_xmm} + 224]",
226                "movdqu xmm15, [{sentinels_xmm} + 240]",
227
228                // 2. Call save routine
229                "mov rdi, r12",
230                "call {save_fn}",
231
232                // 3. Zero registers
233                "pxor xmm0, xmm0",
234                "pxor xmm1, xmm1",
235                "pxor xmm2, xmm2",
236                "pxor xmm3, xmm3",
237                "pxor xmm4, xmm4",
238                "pxor xmm5, xmm5",
239                "pxor xmm6, xmm6",
240                "pxor xmm7, xmm7",
241                "pxor xmm8, xmm8",
242                "pxor xmm9, xmm9",
243                "pxor xmm10, xmm10",
244                "pxor xmm11, xmm11",
245                "pxor xmm12, xmm12",
246                "pxor xmm13, xmm13",
247                "pxor xmm14, xmm14",
248                "pxor xmm15, xmm15",
249
250                // 4. Call restore routine
251                "mov rdi, r12",
252                "call {restore_fn}",
253
254                // 5. Save registers to buffer
255                "movdqu [r13], xmm0",
256                "movdqu [r13 + 16], xmm1",
257                "movdqu [r13 + 32], xmm2",
258                "movdqu [r13 + 48], xmm3",
259                "movdqu [r13 + 64], xmm4",
260                "movdqu [r13 + 80], xmm5",
261                "movdqu [r13 + 96], xmm6",
262                "movdqu [r13 + 112], xmm7",
263                "movdqu [r13 + 128], xmm8",
264                "movdqu [r13 + 144], xmm9",
265                "movdqu [r13 + 160], xmm10",
266                "movdqu [r13 + 176], xmm11",
267                "movdqu [r13 + 192], xmm12",
268                "movdqu [r13 + 208], xmm13",
269                "movdqu [r13 + 224], xmm14",
270                "movdqu [r13 + 240], xmm15",
271
272                sentinels_xmm = in(reg) &sentinels_xmm,
273                in("r12") pstate_ptr,
274                in("r13") restored_regs_ptr,
275                save_fn = sym crate::save_extended_pstate,
276                restore_fn = sym crate::restore_extended_pstate,
277                clobber_abi("C"),
278                out("rdi") _,
279                out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
280                out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
281                out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
282                out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
283            );
284        }
285
286        // Assertions
287        for i in 0..NUM_XMM_REGS {
288            assert_eq!(restored_regs[i], sentinels_xmm[i], "restored_regs[{}] mismatch", i);
289        }
290
291        let saved_xsave = state.get_x64_xsave_area();
292        for i in 0..NUM_XMM_REGS {
293            let offset = 160 + i * XMM_REG_SIZE;
294            let val =
295                u128::from_le_bytes(saved_xsave[offset..offset + XMM_REG_SIZE].try_into().unwrap());
296            assert_eq!(val, sentinels_xmm[i], "saved_xsave.xmm[{}] mismatch", i);
297        }
298    }
299}