Skip to main content

starnix_core/arch/x64/
signal_handling.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::signals::{SignalInfo, SignalState};
6use crate::task::{ArchExtendedPstateStorage, CurrentTask, Task};
7use starnix_logging::log_debug;
8use starnix_registers::{RegisterState, RegisterStorageEnum};
9use starnix_types::arch::ArchWidth;
10use starnix_uapi::errors::Errno;
11use starnix_uapi::signals::SigSet;
12use starnix_uapi::user_address::UserAddress;
13use starnix_uapi::{
14    self as uapi, error, sigaction_t, sigaltstack, sigcontext, siginfo_t, ucontext,
15};
16use static_assertions::const_assert_eq;
17
18/// The size of the red zone.
19///
20/// From the AMD64 ABI:
21///   > The 128-byte area beyond the location pointed to
22///   > by %rsp is considered to be reserved and shall not be modified by signal or
23///   > interrupt handlers. Therefore, functions may use this area for temporary
24///   > data that is not needed across function calls. In particular, leaf functions
25///   > may use this area for their entire stack frame, rather than adjusting the
26///   > stack pointer in the prologue and epilogue. This area is known as the red
27///   > zone.
28pub const RED_ZONE_SIZE: u64 = 128;
29
30/// A `SignalStackFrame` contains all the state that is stored on the stack prior
31/// to executing a signal handler.
32///
33/// The ordering of the fields is significant, as it is part of the syscall ABI. In particular,
34/// restorer_address must be the first field, since that is where the signal handler will return
35/// after it finishes executing.
36#[repr(C)]
37pub struct SignalStackFrame {
38    /// The address of the signal handler function.
39    ///
40    /// Must be the first field, to be positioned to serve as the return address.
41    restorer_address: u64,
42
43    /// Information about the signal.
44    pub siginfo_bytes: [u8; std::mem::size_of::<siginfo_t>()],
45
46    /// The state of the thread at the time the signal was handled.
47    pub context: ucontext,
48
49    /// Extended CPU state, i.e, FPU, SSE & AVX registers.
50    xstate: XState,
51}
52
53/// CPU state that needs to restored when returning from the signal handler and that is not
54/// include in the `ucontext`. Currently it contains just `uapi::_xstate` that stores  X87, SSE
55/// and AVX registers. This matches the set of extensions supported by Zircon. In the future it
56/// may be extended with a buffer for other extensions (e.g. AVX-512). That buffer should be added
57/// between `xstate` and `xstate_magic2`.
58/// See https://github.com/google/gvisor/blob/master/pkg/sentry/arch/fpu/fpu_amd64_unsafe.go
59/// for the corresponding code in GVisor.
60#[repr(C, packed)]
61struct XState {
62    base_xstate: uapi::_xstate,
63
64    // Magic value marking the end of the `xstate`. Should be set to `FP_XSTATE_MAGIC2`.
65    xstate_magic2: u32,
66}
67
68// There should be no padding in front of `xstate_magic2`.
69const_assert_eq!(
70    std::mem::size_of::<XState>(),
71    std::mem::size_of::<uapi::_xstate>() + std::mem::size_of::<u32>()
72);
73
74pub const SIG_STACK_SIZE: usize = std::mem::size_of::<SignalStackFrame>();
75
76impl SignalStackFrame {
77    pub fn new(
78        _task: &Task,
79        arch_width: ArchWidth,
80        registers: &RegisterState<RegisterStorageEnum>,
81        extended_pstate: &ArchExtendedPstateStorage,
82        signal_state: &SignalState,
83        siginfo: &SignalInfo,
84        action: sigaction_t,
85        stack_pointer: UserAddress,
86    ) -> Result<SignalStackFrame, Errno> {
87        let fpstate_addr = (uapi::uaddr {
88            addr: stack_pointer.ptr() as u64
89                + memoffset::offset_of!(SignalStackFrame, xstate) as u64,
90        })
91        .into();
92        let context = ucontext {
93            uc_mcontext: sigcontext {
94                r8: registers.r8,
95                r9: registers.r9,
96                r10: registers.r10,
97                r11: registers.r11,
98                r12: registers.r12,
99                r13: registers.r13,
100                r14: registers.r14,
101                r15: registers.r15,
102                rdi: registers.rdi,
103                rsi: registers.rsi,
104                rbp: registers.rbp,
105                rbx: registers.rbx,
106                rdx: registers.rdx,
107                rax: registers.rax,
108                rcx: registers.rcx,
109                rsp: registers.rsp,
110                rip: registers.ip,
111                eflags: registers.flags,
112                oldmask: signal_state.mask().into(),
113                fpstate: fpstate_addr,
114                ..Default::default()
115            },
116            uc_stack: signal_state
117                .alt_stack
118                .map(|stack| sigaltstack {
119                    ss_sp: stack.ss_sp.into(),
120                    ss_flags: stack.ss_flags as i32,
121                    ss_size: stack.ss_size as u64,
122                    ..Default::default()
123                })
124                .unwrap_or_default(),
125            uc_sigmask: signal_state.mask().into(),
126            ..Default::default()
127        };
128        Ok(SignalStackFrame {
129            context,
130            siginfo_bytes: siginfo.as_siginfo_bytes(arch_width)?,
131            restorer_address: action.sa_restorer.addr,
132            xstate: get_xstate(extended_pstate),
133        })
134    }
135
136    pub fn as_bytes(&self) -> &[u8; SIG_STACK_SIZE] {
137        #[allow(
138            clippy::undocumented_unsafe_blocks,
139            reason = "Force documented unsafe blocks in Starnix"
140        )]
141        unsafe {
142            std::mem::transmute(self)
143        }
144    }
145
146    pub fn from_bytes(bytes: [u8; SIG_STACK_SIZE]) -> SignalStackFrame {
147        #[allow(
148            clippy::undocumented_unsafe_blocks,
149            reason = "Force documented unsafe blocks in Starnix"
150        )]
151        unsafe {
152            std::mem::transmute(bytes)
153        }
154    }
155
156    pub fn get_signal_mask(&self, _is_arch32: bool) -> SigSet {
157        self.context.uc_sigmask.into()
158    }
159}
160
161/// Aligns the stack pointer to be 16 byte aligned, and then misaligns it by 8 bytes.
162///
163/// This is done because x86-64 functions expect the stack to be misaligned by 8 bytes,
164/// as if the stack was 16 byte aligned and then someone used a call instruction. This
165/// is due to alignment-requiring SSE instructions.
166pub fn align_stack_pointer(pointer: u64) -> u64 {
167    pointer - (pointer % 16 + 8)
168}
169
170fn get_xstate(extended_pstate: &ArchExtendedPstateStorage) -> XState {
171    let extended_pstate = match extended_pstate {
172        ArchExtendedPstateStorage::State64(extended_pstate) => extended_pstate,
173    };
174    const_assert_eq!(std::mem::size_of::<uapi::_xstate>(), extended_pstate::X64_XSAVE_AREA_SIZE);
175
176    #[allow(
177        clippy::undocumented_unsafe_blocks,
178        reason = "Force documented unsafe blocks in Starnix"
179    )]
180    let mut xstate = XState {
181        // `_xstate` layout matches the layout of the XSAVE area.
182        base_xstate: unsafe { std::mem::transmute(extended_pstate.get_x64_xsave_area()) },
183        xstate_magic2: uapi::FP_XSTATE_MAGIC2,
184    };
185
186    xstate.base_xstate.fpstate.__bindgen_anon_1.sw_reserved = uapi::_fpx_sw_bytes {
187        // `FP_XSTATE_MAGIC1` is used to indicate that the signal stack contains the `xstate`,
188        // which includes not just the default X87 registers (included in `fpstate`), but also
189        // other extensions, such as SSE and AVX. The end of the `xstate` buffer is marked with
190        // `FP_XSTATE_MAGIC2`.
191        magic1: uapi::FP_XSTATE_MAGIC1,
192        extended_size: std::mem::size_of::<XState>() as u32,
193        // TODO: CPU features should be detected dynamically.
194        xfeatures: extended_pstate::X64_SUPPORTED_XSAVE_FEATURES,
195        xstate_size: std::mem::size_of::<uapi::_xstate>() as u32,
196        ..Default::default()
197    };
198
199    xstate
200}
201
202pub fn restore_registers(
203    current_task: &mut CurrentTask,
204    signal_stack_frame: &SignalStackFrame,
205    _stack_pointer: UserAddress,
206) -> Result<(), Errno> {
207    let uctx = &signal_stack_frame.context.uc_mcontext;
208    // Restore the register state from before executing the signal handler.
209    let restored_regs = zx::sys::zx_restricted_state_t {
210        r8: uctx.r8,
211        r9: uctx.r9,
212        r10: uctx.r10,
213        r11: uctx.r11,
214        r12: uctx.r12,
215        r13: uctx.r13,
216        r14: uctx.r14,
217        r15: uctx.r15,
218        rax: uctx.rax,
219        rbx: uctx.rbx,
220        rcx: uctx.rcx,
221        rdx: uctx.rdx,
222        rsi: uctx.rsi,
223        rdi: uctx.rdi,
224        rbp: uctx.rbp,
225        rsp: uctx.rsp,
226        ip: uctx.rip,
227        flags: uctx.eflags,
228        fs_base: current_task.thread_state.registers.fs_base,
229        gs_base: current_task.thread_state.registers.gs_base,
230    };
231    current_task.thread_state.registers.load(restored_regs);
232
233    let xstate = &signal_stack_frame.xstate;
234    #[allow(
235        clippy::undocumented_unsafe_blocks,
236        reason = "Force documented unsafe blocks in Starnix"
237    )]
238    let fpx_sw_bytes = unsafe { xstate.base_xstate.fpstate.__bindgen_anon_1.sw_reserved };
239    if fpx_sw_bytes.magic1 != uapi::FP_XSTATE_MAGIC1
240        || fpx_sw_bytes.extended_size != std::mem::size_of::<XState>() as u32
241        || fpx_sw_bytes.xfeatures != extended_pstate::X64_SUPPORTED_XSAVE_FEATURES
242        || fpx_sw_bytes.xstate_size != std::mem::size_of::<uapi::_xstate>() as u32
243        || xstate.xstate_magic2 != uapi::FP_XSTATE_MAGIC2
244    {
245        log_debug!("Invalid xstate found in signal stack frame.");
246        return error!(EINVAL);
247    }
248
249    let extended_pstate = match &mut current_task.thread_state.extended_pstate {
250        ArchExtendedPstateStorage::State64(state) => state,
251    };
252    #[allow(
253        clippy::undocumented_unsafe_blocks,
254        reason = "Force documented unsafe blocks in Starnix"
255    )]
256    extended_pstate.set_x64_xsave_area(unsafe { std::mem::transmute(xstate.base_xstate) });
257
258    Ok(())
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use crate::mm::memory::MemoryObject;
265    use crate::mm::{DesiredAddress, MappingName, MappingOptions, ProtectionFlags};
266    use crate::signals::{SignalDetail, dequeue_signal, restore_from_signal_handler};
267    use crate::task::CurrentTask;
268    use crate::testing::spawn_kernel_and_run;
269    use starnix_sync::{Locked, Unlocked};
270    use starnix_uapi::errors::{EINTR, ERESTARTSYS};
271    use starnix_uapi::file_mode::Access;
272    use starnix_uapi::signals::{SIGUSR1, SIGUSR2};
273    use starnix_uapi::{__NR_rt_sigreturn, SA_RESTART, SA_RESTORER, SA_SIGINFO, SI_USER};
274    use std::future::Future;
275
276    const SYSCALL_INSTRUCTION_ADDRESS: UserAddress = UserAddress::const_from(100);
277    const SYSCALL_NUMBER: u64 = 42;
278    const SYSCALL_ARGS: (u64, u64, u64, u64, u64, u64) = (20, 21, 22, 23, 24, 25);
279    const SA_RESTORER_ADDRESS: UserAddress = UserAddress::const_from(0xDEADBEEF);
280    const SA_HANDLER_ADDRESS: UserAddress = UserAddress::const_from(0x00BADDAD);
281
282    const SYSCALL2_INSTRUCTION_ADDRESS: UserAddress = UserAddress::const_from(200);
283    const SYSCALL2_NUMBER: u64 = 84;
284    const SYSCALL2_ARGS: (u64, u64, u64, u64, u64, u64) = (30, 31, 32, 33, 34, 35);
285    const SA_HANDLER2_ADDRESS: UserAddress = UserAddress::const_from(0xBADDAD00);
286
287    #[fuchsia::test]
288    async fn syscall_restart_adjusts_instruction_pointer_and_rax() {
289        spawn_kernel_and_run_with_stack(|locked, current_task| {
290            // Register the signal action.
291            current_task.thread_group().signal_actions.set(
292                SIGUSR1,
293                sigaction_t {
294                    sa_flags: (SA_RESTORER | SA_RESTART | SA_SIGINFO) as u64,
295                    sa_handler: SA_HANDLER_ADDRESS.into(),
296                    sa_restorer: SA_RESTORER_ADDRESS.into(),
297                    ..sigaction_t::default()
298                },
299            );
300
301            // Simulate a syscall that should be restarted by setting up the register state to what it
302            // was after the interrupted syscall. `rax` should have the return value (-ERESTARTSYS);
303            // `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, should be the syscall arguments;
304            // `orig_rax` should hold the syscall number;
305            // and the instruction pointer should be 2 bytes after the syscall instruction.
306            current_task.thread_state.restart_code = Some(ERESTARTSYS);
307            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
308            current_task.thread_state.registers.rdi = SYSCALL_ARGS.0;
309            current_task.thread_state.registers.rsi = SYSCALL_ARGS.1;
310            current_task.thread_state.registers.rdx = SYSCALL_ARGS.2;
311            current_task.thread_state.registers.r10 = SYSCALL_ARGS.3;
312            current_task.thread_state.registers.r8 = SYSCALL_ARGS.4;
313            current_task.thread_state.registers.r9 = SYSCALL_ARGS.5;
314            current_task.thread_state.registers.orig_rax = SYSCALL_NUMBER;
315            current_task.thread_state.registers.ip =
316                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
317
318            // Queue the signal that interrupted the syscall.
319            current_task.write().enqueue_signal(SignalInfo::with_detail(
320                SIGUSR1,
321                SI_USER as i32,
322                SignalDetail::None,
323            ));
324
325            // Process the signal.
326            dequeue_signal(locked, current_task);
327
328            // The instruction pointer should have changed to the signal handling address.
329            assert_eq!(current_task.thread_state.registers.ip, SA_HANDLER_ADDRESS.ptr() as u64);
330
331            // The syscall arguments should be overwritten with signal handling args.
332            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
333            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
334            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
335
336            // Now we assume that execution of the signal handler completed with a call to
337            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
338            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
339            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
340
341            restore_from_signal_handler(current_task).expect("failed to restore state");
342
343            // The state of the task is now such that when switching back to userspace, the instruction
344            // pointer will point at the original syscall instruction, with the arguments correctly
345            // restored into the registers.
346            assert_eq!(current_task.thread_state.registers.rax, SYSCALL_NUMBER);
347            assert_eq!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
348            assert_eq!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
349            assert_eq!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
350            assert_eq!(current_task.thread_state.registers.r10, SYSCALL_ARGS.3);
351            assert_eq!(current_task.thread_state.registers.r8, SYSCALL_ARGS.4);
352            assert_eq!(current_task.thread_state.registers.r9, SYSCALL_ARGS.5);
353            assert_eq!(
354                current_task.thread_state.registers.ip,
355                SYSCALL_INSTRUCTION_ADDRESS.ptr() as u64
356            );
357        })
358        .await;
359    }
360
361    #[fuchsia::test]
362    async fn syscall_nested_restart() {
363        spawn_kernel_and_run_with_stack(|locked, current_task| {
364            // Register the signal actions.
365            current_task.thread_group().signal_actions.set(
366                SIGUSR1,
367                sigaction_t {
368                    sa_flags: (SA_RESTORER | SA_RESTART | SA_SIGINFO) as u64,
369                    sa_handler: SA_HANDLER_ADDRESS.into(),
370                    sa_restorer: SA_RESTORER_ADDRESS.into(),
371                    ..sigaction_t::default()
372                },
373            );
374            current_task.thread_group().signal_actions.set(
375                SIGUSR2,
376                sigaction_t {
377                    sa_flags: (SA_RESTORER | SA_RESTART | SA_SIGINFO) as u64,
378                    sa_handler: SA_HANDLER2_ADDRESS.into(),
379                    sa_restorer: SA_RESTORER_ADDRESS.into(),
380                    ..sigaction_t::default()
381                },
382            );
383
384            // Simulate a syscall that should be restarted by setting up the register state to what it
385            // was after the interrupted syscall. `rax` should have the return value (-ERESTARTSYS);
386            // `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, should be the syscall arguments;
387            // `orig_rax` should hold the syscall number;
388            // and the instruction pointer should be 2 bytes after the syscall instruction.
389            current_task.thread_state.restart_code = Some(ERESTARTSYS);
390            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
391            current_task.thread_state.registers.rdi = SYSCALL_ARGS.0;
392            current_task.thread_state.registers.rsi = SYSCALL_ARGS.1;
393            current_task.thread_state.registers.rdx = SYSCALL_ARGS.2;
394            current_task.thread_state.registers.r10 = SYSCALL_ARGS.3;
395            current_task.thread_state.registers.r8 = SYSCALL_ARGS.4;
396            current_task.thread_state.registers.r9 = SYSCALL_ARGS.5;
397            current_task.thread_state.registers.orig_rax = SYSCALL_NUMBER;
398            current_task.thread_state.registers.ip =
399                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
400
401            // Queue the signal that interrupted the syscall.
402            current_task.write().enqueue_signal(SignalInfo::with_detail(
403                SIGUSR1,
404                SI_USER as i32,
405                SignalDetail::None,
406            ));
407
408            // Process the signal.
409            dequeue_signal(locked, current_task);
410
411            // The instruction pointer should have changed to the signal handling address.
412            assert_eq!(current_task.thread_state.registers.ip, SA_HANDLER_ADDRESS.ptr() as u64);
413
414            // The syscall arguments should be overwritten with signal handling args.
415            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
416            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
417            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
418
419            // Simulate another syscall being interrupted.
420            current_task.thread_state.restart_code = Some(ERESTARTSYS);
421            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
422            current_task.thread_state.registers.rdi = SYSCALL2_ARGS.0;
423            current_task.thread_state.registers.rsi = SYSCALL2_ARGS.1;
424            current_task.thread_state.registers.rdx = SYSCALL2_ARGS.2;
425            current_task.thread_state.registers.r10 = SYSCALL2_ARGS.3;
426            current_task.thread_state.registers.r8 = SYSCALL2_ARGS.4;
427            current_task.thread_state.registers.r9 = SYSCALL2_ARGS.5;
428            current_task.thread_state.registers.orig_rax = SYSCALL2_NUMBER;
429            current_task.thread_state.registers.ip =
430                (SYSCALL2_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
431
432            // Queue the signal that interrupted the syscall.
433            current_task.write().enqueue_signal(SignalInfo::with_detail(
434                SIGUSR2,
435                SI_USER as i32,
436                SignalDetail::None,
437            ));
438
439            // Process the signal.
440            dequeue_signal(locked, current_task);
441
442            // The instruction pointer should have changed to the signal handling address.
443            assert_eq!(current_task.thread_state.registers.ip, SA_HANDLER2_ADDRESS.ptr() as u64);
444
445            // The syscall arguments should be overwritten with signal handling args.
446            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL2_ARGS.0);
447            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL2_ARGS.1);
448            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL2_ARGS.2);
449
450            // Now we assume that execution of the second signal handler completed with a call to
451            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
452            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
453            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
454
455            restore_from_signal_handler(current_task).expect("failed to restore state");
456
457            // The state of the task is now such that when switching back to userspace, the instruction
458            // pointer will point at the original syscall instruction, with the arguments correctly
459            // restored into the registers.
460            assert_eq!(current_task.thread_state.registers.rax, SYSCALL2_NUMBER);
461            assert_eq!(current_task.thread_state.registers.rdi, SYSCALL2_ARGS.0);
462            assert_eq!(current_task.thread_state.registers.rsi, SYSCALL2_ARGS.1);
463            assert_eq!(current_task.thread_state.registers.rdx, SYSCALL2_ARGS.2);
464            assert_eq!(current_task.thread_state.registers.r10, SYSCALL2_ARGS.3);
465            assert_eq!(current_task.thread_state.registers.r8, SYSCALL2_ARGS.4);
466            assert_eq!(current_task.thread_state.registers.r9, SYSCALL2_ARGS.5);
467            assert_eq!(
468                current_task.thread_state.registers.ip,
469                SYSCALL2_INSTRUCTION_ADDRESS.ptr() as u64
470            );
471
472            // Now we assume that execution of the first signal handler completed with a call to
473            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
474            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
475            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
476
477            restore_from_signal_handler(current_task).expect("failed to restore state");
478
479            // The state of the task is now such that when switching back to userspace, the instruction
480            // pointer will point at the original syscall instruction, with the arguments correctly
481            // restored into the registers.
482            assert_eq!(current_task.thread_state.registers.rax, SYSCALL_NUMBER);
483            assert_eq!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
484            assert_eq!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
485            assert_eq!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
486            assert_eq!(current_task.thread_state.registers.r10, SYSCALL_ARGS.3);
487            assert_eq!(current_task.thread_state.registers.r8, SYSCALL_ARGS.4);
488            assert_eq!(current_task.thread_state.registers.r9, SYSCALL_ARGS.5);
489            assert_eq!(
490                current_task.thread_state.registers.ip,
491                SYSCALL_INSTRUCTION_ADDRESS.ptr() as u64
492            );
493        })
494        .await;
495    }
496
497    #[fuchsia::test]
498    async fn syscall_does_not_restart_if_signal_action_has_no_sa_restart_flag() {
499        spawn_kernel_and_run_with_stack(|locked, current_task| {
500            // Register the signal action.
501            current_task.thread_group().signal_actions.set(
502                SIGUSR1,
503                sigaction_t {
504                    sa_flags: (SA_RESTORER | SA_SIGINFO) as u64,
505                    sa_handler: SA_HANDLER_ADDRESS.into(),
506                    sa_restorer: SA_RESTORER_ADDRESS.into(),
507                    ..sigaction_t::default()
508                },
509            );
510
511            // Simulate a syscall that should be restarted by setting up the register state to what it
512            // was after the interrupted syscall. `rax` should have the return value (-ERESTARTSYS);
513            // `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, should be the syscall arguments;
514            // `orig_rax` should hold the syscall number;
515            // and the instruction pointer should be 2 bytes after the syscall instruction.
516            current_task.thread_state.restart_code = Some(ERESTARTSYS);
517            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
518            current_task.thread_state.registers.rdi = SYSCALL_ARGS.0;
519            current_task.thread_state.registers.rsi = SYSCALL_ARGS.1;
520            current_task.thread_state.registers.rdx = SYSCALL_ARGS.2;
521            current_task.thread_state.registers.r10 = SYSCALL_ARGS.3;
522            current_task.thread_state.registers.r8 = SYSCALL_ARGS.4;
523            current_task.thread_state.registers.r9 = SYSCALL_ARGS.5;
524            current_task.thread_state.registers.orig_rax = SYSCALL_NUMBER;
525            current_task.thread_state.registers.ip =
526                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
527
528            // Queue the signal that interrupted the syscall.
529            current_task.write().enqueue_signal(SignalInfo::with_detail(
530                SIGUSR1,
531                SI_USER as i32,
532                SignalDetail::None,
533            ));
534
535            // Process the signal.
536            dequeue_signal(locked, current_task);
537
538            // The instruction pointer should have changed to the signal handling address.
539            assert_eq!(current_task.thread_state.registers.ip, SA_HANDLER_ADDRESS.ptr() as u64);
540
541            // The syscall arguments should be overwritten with signal handling args.
542            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
543            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
544            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
545
546            // Now we assume that execution of the signal handler completed with a call to
547            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
548            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
549            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
550
551            restore_from_signal_handler(current_task).expect("failed to restore state");
552
553            // The state of the task is now such that when switching back to userspace, the instruction
554            // pointer will point at the original syscall instruction, with the arguments correctly
555            // restored into the registers.
556            assert_eq!(current_task.thread_state.registers.rax, EINTR.return_value());
557            assert_eq!(
558                current_task.thread_state.registers.ip,
559                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64
560            );
561        })
562        .await;
563    }
564
565    /// Creates a kernel and initial task, giving the task a stack.
566    fn spawn_kernel_and_run_with_stack<F>(callback: F) -> impl Future<Output = ()>
567    where
568        F: FnOnce(&mut Locked<Unlocked>, &mut CurrentTask) + Send + Sync + 'static,
569    {
570        spawn_kernel_and_run(async |locked, current_task| {
571            const STACK_SIZE: usize = 0x1000;
572
573            // Give the task a stack.
574            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
575            let stack_base = current_task
576                .mm()
577                .unwrap()
578                .map_memory(
579                    DesiredAddress::Any,
580                    MemoryObject::from(
581                        zx::Vmo::create(STACK_SIZE as u64).expect("failed to create stack VMO"),
582                    )
583                    .into(),
584                    0,
585                    STACK_SIZE,
586                    prot_flags,
587                    Access::rwx(),
588                    MappingOptions::empty(),
589                    MappingName::Stack,
590                )
591                .expect("failed to map stack VMO");
592            let stack_address = (stack_base + (STACK_SIZE - 8)).expect("OOB memory access.");
593            current_task.thread_state.registers.rsp = stack_address.ptr() as u64;
594
595            callback(locked, current_task);
596        })
597    }
598}