extended_pstate/
x86_64.rs

1// Copyright 2023 The Fuchsia Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use static_assertions::const_assert_eq;
6use std::sync::LazyLock;
7
8#[derive(Clone, Copy)]
9pub(crate) struct State {
10    pub(crate) buffer: XSaveArea,
11    strategy: Strategy,
12}
13
14// Size of the XSAVE area.
15pub const XSAVE_AREA_SIZE: usize = 832;
16
17const XSAVE_FEATURE_X87: u64 = 1 << 0;
18const XSAVE_FEATURE_SSE: u64 = 1 << 1;
19const XSAVE_FEATURE_AVX: u64 = 1 << 2;
20
21// Save FPU, SSE and AVX registers. This matches the set of features supported by Zircon (see
22// zircon/kernel/arch/x86/registers.cc ).
23pub const SUPPORTED_XSAVE_FEATURES: u64 = XSAVE_FEATURE_X87 | XSAVE_FEATURE_SSE | XSAVE_FEATURE_AVX;
24
25#[derive(Clone, Copy, Default)]
26#[repr(C)]
27struct X87MMXState {
28    low: u64,
29    high: u64,
30}
31
32#[derive(Clone, Copy, Default)]
33#[repr(C)]
34struct SSERegister {
35    low: u64,
36    high: u64,
37}
38
39// [intel/vol1] Table 10-2. Format of an FXSAVE Area
40#[derive(Clone, Copy)]
41#[repr(C)]
42struct X86LegacySaveArea {
43    fcw: u16,
44    fsw: u16,
45    ftw: u8,
46    _reserved: u8,
47
48    fop: u16,
49    fip: u64,
50    fdp: u64,
51
52    mxcsr: u32,
53    mxcsr_mask: u32,
54
55    st: [X87MMXState; 8],
56
57    xmm: [SSERegister; 16],
58}
59
60const_assert_eq!(std::mem::size_of::<X86LegacySaveArea>(), 416);
61
62#[derive(Clone, Copy)]
63#[repr(C, align(16))]
64struct FXSaveArea {
65    x86_legacy_save_area: X86LegacySaveArea,
66    _reserved: [u8; 96],
67}
68const_assert_eq!(std::mem::size_of::<FXSaveArea>(), 512);
69
70impl Default for FXSaveArea {
71    fn default() -> Self {
72        Self {
73            x86_legacy_save_area: X86LegacySaveArea {
74                fcw: 0x37f, // All exceptions masked, no exceptions raised.
75                fsw: 0,
76                // The ftw field stores an abbreviated version where all zero bits match the default.
77                // See [intel/vol1] 10.5.1.1 x87 State for details.
78                ftw: 0,
79                _reserved: Default::default(),
80                fop: 0,
81                fip: 0,
82                fdp: 0,
83                mxcsr: 0x3f << 7, // All exceptions masked, no exceptions raised.
84                mxcsr_mask: 0,
85                st: Default::default(),
86                xmm: Default::default(),
87            },
88            _reserved: [0; 96],
89        }
90    }
91}
92
93#[derive(Clone, Copy)]
94#[repr(C, align(64))]
95pub(crate) struct XSaveArea {
96    fxsave_area: FXSaveArea,
97    xsave_header: [u8; 64],
98    // High 128 bits of ymm0-15 registers
99    avx_state: [u8; 256],
100    // TODO: Size of the extended region is dynamic depending on which features are enabled.
101    // See [intel/vol1] 13.5 XSAVE-MANAGED STATE
102}
103
104const_assert_eq!(std::mem::size_of::<XSaveArea>(), XSAVE_AREA_SIZE);
105
106impl XSaveArea {
107    fn addr(&self) -> *const u8 {
108        self as *const _ as *const u8
109    }
110
111    fn addr_mut(&mut self) -> *mut u8 {
112        self as *mut _ as *mut u8
113    }
114}
115
116impl Default for XSaveArea {
117    fn default() -> Self {
118        Self { fxsave_area: Default::default(), xsave_header: [0; 64], avx_state: [0; 256] }
119    }
120}
121
122#[derive(PartialEq, Debug, Copy, Clone, PartialOrd)]
123pub enum Strategy {
124    XSaveOpt,
125    XSave,
126    FXSave,
127}
128
129pub static PREFERRED_STRATEGY: LazyLock<Strategy> = LazyLock::new(|| {
130    if is_x86_feature_detected!("xsaveopt") {
131        Strategy::XSaveOpt
132    } else if is_x86_feature_detected!("xsave") {
133        Strategy::XSave
134    } else {
135        // The FXSave strategy does not preserve the high 128 bits of the YMM
136        // register. If we find hardware that requires this, we need to add
137        // support for saving and restoring these through load/store
138        // instructions with the VEX.256 prefix and remove this assertion.
139        // [intel/vol1]: 14.8 ACCESSING YMM REGISTERS
140        assert!(!is_x86_feature_detected!("avx"));
141        Strategy::FXSave
142    }
143});
144
145impl State {
146    pub fn with_strategy(strategy: Strategy) -> Self {
147        Self { buffer: XSaveArea::default(), strategy }
148    }
149
150    #[inline(always)]
151    pub(crate) fn save(&mut self) {
152        #[allow(
153            clippy::undocumented_unsafe_blocks,
154            reason = "Force documented unsafe blocks in Starnix"
155        )]
156        match self.strategy {
157            Strategy::XSaveOpt => unsafe {
158                std::arch::x86_64::_xsaveopt(self.buffer.addr_mut(), SUPPORTED_XSAVE_FEATURES);
159            },
160            Strategy::XSave => unsafe {
161                std::arch::x86_64::_xsave(self.buffer.addr_mut(), SUPPORTED_XSAVE_FEATURES);
162            },
163            Strategy::FXSave => unsafe {
164                std::arch::x86_64::_fxsave(self.buffer.addr_mut());
165            },
166        }
167    }
168
169    #[inline(always)]
170    // Safety: See comment in lib.rs.
171    pub(crate) unsafe fn restore(&self) {
172        #[allow(clippy::undocumented_unsafe_blocks, reason = "2024 edition migration")]
173        match self.strategy {
174            Strategy::XSave | Strategy::XSaveOpt => unsafe {
175                std::arch::x86_64::_xrstor(self.buffer.addr(), SUPPORTED_XSAVE_FEATURES)
176            },
177            Strategy::FXSave => unsafe { std::arch::x86_64::_fxrstor(self.buffer.addr()) },
178        }
179    }
180
181    pub fn reset(&mut self) {
182        self.initialize_saved_area()
183    }
184
185    fn initialize_saved_area(&mut self) {
186        *self = Default::default()
187    }
188
189    pub(crate) fn set_xsave_area(&mut self, xsave_area: [u8; XSAVE_AREA_SIZE]) {
190        self.buffer = {
191            #[allow(
192                clippy::undocumented_unsafe_blocks,
193                reason = "Force documented unsafe blocks in Starnix"
194            )]
195            unsafe {
196                std::mem::transmute(xsave_area)
197            }
198        };
199
200        // The tail of the FXSAVE are is unused and is ignored. It may be modified when returning
201        // from a signal handler. Reset it to zeros.
202        self.buffer.fxsave_area._reserved = [0u8; 96];
203    }
204}
205
206impl Default for State {
207    fn default() -> Self {
208        Self { buffer: XSaveArea::default(), strategy: *PREFERRED_STRATEGY }
209    }
210}
211
212#[cfg(test)]
213mod test {
214    use super::*;
215
216    #[::fuchsia::test]
217    fn save_restore_sse_registers() {
218        use core::arch::asm;
219
220        let write_custom_state = || {
221            // x87 FPU status word
222            //   x87 FPU Status Word: FSTSW/FNSTSW, FSTENV/FNSTENV
223            // Exception state lives in the status word
224            // The exception flags are “sticky” bits (once set, they remain set until explicitly cleared). They can be cleared by
225            // executing the FCLEX/FNCLEX (clear exceptions) instructions, by reinitializing the x87 FPU with the FINIT/FNINIT or
226            // FSAVE/FNSAVE instructions, or by overwriting the flags with an FRSTOR or FLDENV instruction.
227
228            // We expect the FPU stack to be empty. Pop a value to generate a stack underflow exception
229            let flt = [0u8; 8];
230            #[allow(
231                clippy::undocumented_unsafe_blocks,
232                reason = "Force documented unsafe blocks in Starnix"
233            )]
234            unsafe {
235                asm!("fstp dword ptr [{flt}]", flt = in(reg) &flt as *const u8);
236            }
237            // Check that the IE and SF bits are 1 and the C1 flag is 0. [intel/vol1] 8.5.1.1 Stack Overflow or Underflow Exception (#IS)
238            let fpust = 0u16;
239            #[allow(
240                clippy::undocumented_unsafe_blocks,
241                reason = "Force documented unsafe blocks in Starnix"
242            )]
243            unsafe {
244                asm!("fnstsw [{fpust}]", fpust = in(reg)&fpust);
245            }
246            assert_eq!(fpust & 1 << 0, 0x1); // IE flag, bit 0
247            assert_eq!(fpust & 1 << 6, 1 << 6); // SF flag, bit 6
248            assert_eq!(fpust & 1 << 9, 0); // C1 flag, bit 9.
249
250            // x87 FPU control word.
251            let mut fpucw = 0u16;
252            #[allow(
253                clippy::undocumented_unsafe_blocks,
254                reason = "Force documented unsafe blocks in Starnix"
255            )]
256            unsafe {
257                asm!("fnstcw [{fpucw}]", fpucw = in(reg) &fpucw);
258            }
259            // Unmask all 6 x87 exceptions
260            fpucw &= !0x3f;
261            #[allow(
262                clippy::undocumented_unsafe_blocks,
263                reason = "Force documented unsafe blocks in Starnix"
264            )]
265            unsafe {
266                asm!("fldcw [{fpucw}]", fpucw = in(reg) &fpucw);
267            }
268
269            let mut mxcsr = 0u32;
270            #[allow(
271                clippy::undocumented_unsafe_blocks,
272                reason = "Force documented unsafe blocks in Starnix"
273            )]
274            unsafe {
275                asm!("stmxcsr [{mxcsr}]", mxcsr = in(reg) &mxcsr);
276            }
277            // Unmask the lowest 3 exceptions.
278            mxcsr &= !(0x7 << 7);
279            #[allow(
280                clippy::undocumented_unsafe_blocks,
281                reason = "Force documented unsafe blocks in Starnix"
282            )]
283            unsafe {
284                asm!("ldmxcsr [{mxcsr}]", mxcsr = in(reg) &mxcsr);
285            }
286
287            // Populate SSE registers
288            let vals_a = [0x42u8; 16];
289            let vals_b = [0x43u8; 16];
290            let vals_c = [0x44u8; 16];
291            #[allow(
292                clippy::undocumented_unsafe_blocks,
293                reason = "Force documented unsafe blocks in Starnix"
294            )]
295            unsafe {
296                asm!("movups xmm0, [{vals_a}]
297                          movups xmm1, [{vals_b}]
298                          movups xmm2, [{vals_c}]",
299                    vals_a = in(reg) &vals_a,
300                    vals_b = in(reg) &vals_b,
301                    vals_c = in(reg) &vals_c,
302                    out("xmm0") _,
303                    out("xmm1") _,
304                    out("xmm2") _,
305                );
306            }
307        };
308
309        let clear_state = || {
310            #[allow(
311                clippy::undocumented_unsafe_blocks,
312                reason = "Force documented unsafe blocks in Starnix"
313            )]
314            unsafe {
315                // Reinitialize x87 FPU
316                asm!("fninit");
317                // Reset SSE control state to all exceptions masked, no exceptions detected
318                let mxcsr = 0x3f << 7;
319                asm!("ldmxcsr [{mxcsr}]", mxcsr = in(reg) &mxcsr);
320                // Clear SSE registers
321                asm!("xorps xmm0, xmm0
322                          xorps xmm1, xmm1
323                          xorps xmm2, xmm2",
324                    out("xmm0") _,
325                    out("xmm1") _,
326                    out("xmm2") _,
327                );
328            }
329        };
330
331        let dest = [0u8; 16];
332        let validate_state_cleared = || {
333            let fpust = 0u16;
334            #[allow(
335                clippy::undocumented_unsafe_blocks,
336                reason = "Force documented unsafe blocks in Starnix"
337            )]
338            unsafe {
339                asm!("fnstsw [{fpust}]", fpust = in(reg)&fpust);
340            }
341            assert_eq!(fpust, 0);
342
343            let fpucw = 0u16;
344            #[allow(
345                clippy::undocumented_unsafe_blocks,
346                reason = "Force documented unsafe blocks in Starnix"
347            )]
348            unsafe {
349                asm!("fnstcw [{fpucw}]", fpucw = in(reg) &fpucw)
350            };
351            assert_eq!(fpucw, 0x37f); // Initial FPU state per [intel/vol1] 8.1.5 x87 FPU Control Word
352
353            let mxcsr = 0u32;
354            #[allow(
355                clippy::undocumented_unsafe_blocks,
356                reason = "Force documented unsafe blocks in Starnix"
357            )]
358            unsafe {
359                asm!("stmxcsr [{mxcsr}]", mxcsr = in(reg) &mxcsr);
360            }
361            assert_eq!(mxcsr & 0x1f, 0); // No exceptions raised.
362            assert_eq!((mxcsr >> 7) & 0x3f, 0x3f); // All exceptions masked.
363            #[allow(
364                clippy::undocumented_unsafe_blocks,
365                reason = "Force documented unsafe blocks in Starnix"
366            )]
367            unsafe {
368                asm!("movups [{dest}], xmm0", dest = in(reg) &dest);
369            }
370            for i in 0..16 {
371                assert_eq!(dest[i], 0);
372            }
373            #[allow(
374                clippy::undocumented_unsafe_blocks,
375                reason = "Force documented unsafe blocks in Starnix"
376            )]
377            unsafe {
378                asm!("movups [{dest}], xmm1", dest = in(reg) &dest);
379            }
380            for i in 0..16 {
381                assert_eq!(dest[i], 0);
382            }
383            #[allow(
384                clippy::undocumented_unsafe_blocks,
385                reason = "Force documented unsafe blocks in Starnix"
386            )]
387            unsafe {
388                asm!("movups [{dest}], xmm2", dest = in(reg) &dest);
389            }
390            for i in 0..16 {
391                assert_eq!(dest[i], 0);
392            }
393        };
394
395        let validate_state_restored = || {
396            // x87 FPU status word
397
398            // Check that the IE and SF bits are 1 and the C1 flag is 0. [intel/vol1] 8.5.1.1 Stack Overflow or Underflow Exception (#IS)
399            let fpust = 0u16;
400            #[allow(
401                clippy::undocumented_unsafe_blocks,
402                reason = "Force documented unsafe blocks in Starnix"
403            )]
404            unsafe {
405                asm!("fnstsw [{fpust}]", fpust = in(reg)&fpust);
406            }
407            assert_eq!(fpust & 1 << 0, 0x1); // IE flag, bit 0
408            assert_eq!(fpust & 1 << 6, 1 << 6); // SF flag, bit 6
409            assert_eq!(fpust & 1 << 9, 0); // C1 flag, bit 9.
410
411            // x87 FPU control word
412            let fpucw = 0u16;
413            #[allow(
414                clippy::undocumented_unsafe_blocks,
415                reason = "Force documented unsafe blocks in Starnix"
416            )]
417            unsafe {
418                asm!("fnstcw [{fpucw}]", fpucw = in(reg) &fpucw)
419            };
420            assert_eq!(fpucw, 0x340); // All exceptions masked, 64 bit precision, round to nearest.
421
422            let mxcsr = 0u32;
423            #[allow(
424                clippy::undocumented_unsafe_blocks,
425                reason = "Force documented unsafe blocks in Starnix"
426            )]
427            unsafe {
428                asm!("stmxcsr [{mxcsr}]", mxcsr = in(reg) &mxcsr);
429            }
430            assert_eq!(mxcsr & 0x1f, 0); // No exceptions raised.
431            assert_eq!((mxcsr >> 7) & 0x3f, 0x38); // First 3 exceptions unmasked, rest masked.
432
433            // SSE registers
434            #[allow(
435                clippy::undocumented_unsafe_blocks,
436                reason = "Force documented unsafe blocks in Starnix"
437            )]
438            unsafe {
439                asm!("movups [{dest}], xmm0", dest = in(reg) &dest);
440            }
441            for i in 0..16 {
442                assert_eq!(dest[i], 0x42);
443            }
444            #[allow(
445                clippy::undocumented_unsafe_blocks,
446                reason = "Force documented unsafe blocks in Starnix"
447            )]
448            unsafe {
449                asm!("movups [{dest}], xmm1", dest = in(reg) &dest);
450            }
451            for i in 0..16 {
452                assert_eq!(dest[i], 0x43);
453            }
454            #[allow(
455                clippy::undocumented_unsafe_blocks,
456                reason = "Force documented unsafe blocks in Starnix"
457            )]
458            unsafe {
459                asm!("movups [{dest}], xmm2", dest = in(reg) &dest);
460            }
461            for i in 0..16 {
462                assert_eq!(dest[i], 0x44);
463            }
464        };
465
466        let mut state = State::default();
467        write_custom_state();
468        state.save();
469        clear_state();
470        validate_state_cleared();
471        #[allow(
472            clippy::undocumented_unsafe_blocks,
473            reason = "Force documented unsafe blocks in Starnix"
474        )]
475        unsafe {
476            state.restore();
477        }
478        validate_state_restored();
479    }
480}