starnix_core/arch/x64/
signal_handling.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::signals::{SignalInfo, SignalState};
6use crate::task::{CurrentTask, Task};
7use extended_pstate::ExtendedPstateState;
8use starnix_logging::log_debug;
9use starnix_registers::RegisterState;
10use starnix_types::arch::ArchWidth;
11use starnix_uapi::errors::Errno;
12use starnix_uapi::signals::SigSet;
13use starnix_uapi::user_address::UserAddress;
14use starnix_uapi::{
15    self as uapi, error, sigaction_t, sigaltstack, sigcontext, siginfo_t, ucontext,
16};
17use static_assertions::const_assert_eq;
18
19/// The size of the red zone.
20///
21/// From the AMD64 ABI:
22///   > The 128-byte area beyond the location pointed to
23///   > by %rsp is considered to be reserved and shall not be modified by signal or
24///   > interrupt handlers. Therefore, functions may use this area for temporary
25///   > data that is not needed across function calls. In particular, leaf functions
26///   > may use this area for their entire stack frame, rather than adjusting the
27///   > stack pointer in the prologue and epilogue. This area is known as the red
28///   > zone.
29pub const RED_ZONE_SIZE: u64 = 128;
30
31/// A `SignalStackFrame` contains all the state that is stored on the stack prior
32/// to executing a signal handler.
33///
34/// The ordering of the fields is significant, as it is part of the syscall ABI. In particular,
35/// restorer_address must be the first field, since that is where the signal handler will return
36/// after it finishes executing.
37#[repr(C)]
38pub struct SignalStackFrame {
39    /// The address of the signal handler function.
40    ///
41    /// Must be the first field, to be positioned to serve as the return address.
42    restorer_address: u64,
43
44    /// Information about the signal.
45    pub siginfo_bytes: [u8; std::mem::size_of::<siginfo_t>()],
46
47    /// The state of the thread at the time the signal was handled.
48    pub context: ucontext,
49
50    /// Extended CPU state, i.e, FPU, SSE & AVX registers.
51    xstate: XState,
52}
53
54/// CPU state that needs to restored when returning from the signal handler and that is not
55/// include in the `ucontext`. Currently it contains just `uapi::_xstate` that stores  X87, SSE
56/// and AVX registers. This matches the set of extensions supported by Zircon. In the future it
57/// may be extended with a buffer for other extensions (e.g. AVX-512). That buffer should be added
58/// between `xstate` and `xstate_magic2`.
59/// See https://github.com/google/gvisor/blob/master/pkg/sentry/arch/fpu/fpu_amd64_unsafe.go
60/// for the corresponding code in GVisor.
61#[repr(C, packed)]
62struct XState {
63    base_xstate: uapi::_xstate,
64
65    // Magic value marking the end of the `xstate`. Should be set to `FP_XSTATE_MAGIC2`.
66    xstate_magic2: u32,
67}
68
69// There should be no padding in front of `xstate_magic2`.
70const_assert_eq!(
71    std::mem::size_of::<XState>(),
72    std::mem::size_of::<uapi::_xstate>() + std::mem::size_of::<u32>()
73);
74
75pub const SIG_STACK_SIZE: usize = std::mem::size_of::<SignalStackFrame>();
76
77impl SignalStackFrame {
78    pub fn new(
79        _task: &Task,
80        arch_width: ArchWidth,
81        registers: &RegisterState,
82        extended_pstate: &ExtendedPstateState,
83        signal_state: &SignalState,
84        siginfo: &SignalInfo,
85        action: sigaction_t,
86        stack_pointer: UserAddress,
87    ) -> Result<SignalStackFrame, Errno> {
88        let fpstate_addr = (uapi::uaddr {
89            addr: stack_pointer.ptr() as u64
90                + memoffset::offset_of!(SignalStackFrame, xstate) as u64,
91        })
92        .into();
93        let context = ucontext {
94            uc_mcontext: sigcontext {
95                r8: registers.r8,
96                r9: registers.r9,
97                r10: registers.r10,
98                r11: registers.r11,
99                r12: registers.r12,
100                r13: registers.r13,
101                r14: registers.r14,
102                r15: registers.r15,
103                rdi: registers.rdi,
104                rsi: registers.rsi,
105                rbp: registers.rbp,
106                rbx: registers.rbx,
107                rdx: registers.rdx,
108                rax: registers.rax,
109                rcx: registers.rcx,
110                rsp: registers.rsp,
111                rip: registers.rip,
112                eflags: registers.rflags,
113                oldmask: signal_state.mask().into(),
114                fpstate: fpstate_addr,
115                ..Default::default()
116            },
117            uc_stack: signal_state
118                .alt_stack
119                .map(|stack| sigaltstack {
120                    ss_sp: stack.ss_sp.into(),
121                    ss_flags: stack.ss_flags as i32,
122                    ss_size: stack.ss_size as u64,
123                    ..Default::default()
124                })
125                .unwrap_or_default(),
126            uc_sigmask: signal_state.mask().into(),
127            ..Default::default()
128        };
129        Ok(SignalStackFrame {
130            context,
131            siginfo_bytes: siginfo.as_siginfo_bytes(arch_width)?,
132            restorer_address: action.sa_restorer.addr,
133            xstate: get_xstate(extended_pstate),
134        })
135    }
136
137    pub fn as_bytes(&self) -> &[u8; SIG_STACK_SIZE] {
138        #[allow(
139            clippy::undocumented_unsafe_blocks,
140            reason = "Force documented unsafe blocks in Starnix"
141        )]
142        unsafe {
143            std::mem::transmute(self)
144        }
145    }
146
147    pub fn from_bytes(bytes: [u8; SIG_STACK_SIZE]) -> SignalStackFrame {
148        #[allow(
149            clippy::undocumented_unsafe_blocks,
150            reason = "Force documented unsafe blocks in Starnix"
151        )]
152        unsafe {
153            std::mem::transmute(bytes)
154        }
155    }
156
157    pub fn get_signal_mask(&self) -> SigSet {
158        self.context.uc_sigmask.into()
159    }
160}
161
162/// Aligns the stack pointer to be 16 byte aligned, and then misaligns it by 8 bytes.
163///
164/// This is done because x86-64 functions expect the stack to be misaligned by 8 bytes,
165/// as if the stack was 16 byte aligned and then someone used a call instruction. This
166/// is due to alignment-requiring SSE instructions.
167pub fn align_stack_pointer(pointer: u64) -> u64 {
168    pointer - (pointer % 16 + 8)
169}
170
171fn get_xstate(extended_pstate: &ExtendedPstateState) -> XState {
172    const_assert_eq!(std::mem::size_of::<uapi::_xstate>(), extended_pstate::X64_XSAVE_AREA_SIZE);
173
174    #[allow(
175        clippy::undocumented_unsafe_blocks,
176        reason = "Force documented unsafe blocks in Starnix"
177    )]
178    let mut xstate = XState {
179        // `_xstate` layout matches the layout of the XSAVE area.
180        base_xstate: unsafe { std::mem::transmute(extended_pstate.get_x64_xsave_area()) },
181        xstate_magic2: uapi::FP_XSTATE_MAGIC2,
182    };
183
184    xstate.base_xstate.fpstate.__bindgen_anon_1.sw_reserved = uapi::_fpx_sw_bytes {
185        // `FP_XSTATE_MAGIC1` is used to indicate that the signal stack contains the `xstate`,
186        // which includes not just the default X87 registers (included in `fpstate`), but also
187        // other extensions, such as SSE and AVX. The end of the `xstate` buffer is marked with
188        // `FP_XSTATE_MAGIC2`.
189        magic1: uapi::FP_XSTATE_MAGIC1,
190        extended_size: std::mem::size_of::<XState>() as u32,
191        // TODO: CPU features should be detected dynamically.
192        xfeatures: extended_pstate::X64_SUPPORTED_XSAVE_FEATURES,
193        xstate_size: std::mem::size_of::<uapi::_xstate>() as u32,
194        ..Default::default()
195    };
196
197    xstate
198}
199
200pub fn restore_registers(
201    current_task: &mut CurrentTask,
202    signal_stack_frame: &SignalStackFrame,
203    _stack_pointer: UserAddress,
204) -> Result<(), Errno> {
205    let uctx = &signal_stack_frame.context.uc_mcontext;
206    // Restore the register state from before executing the signal handler.
207    current_task.thread_state.registers = zx::sys::zx_thread_state_general_regs_t {
208        r8: uctx.r8,
209        r9: uctx.r9,
210        r10: uctx.r10,
211        r11: uctx.r11,
212        r12: uctx.r12,
213        r13: uctx.r13,
214        r14: uctx.r14,
215        r15: uctx.r15,
216        rax: uctx.rax,
217        rbx: uctx.rbx,
218        rcx: uctx.rcx,
219        rdx: uctx.rdx,
220        rsi: uctx.rsi,
221        rdi: uctx.rdi,
222        rbp: uctx.rbp,
223        rsp: uctx.rsp,
224        rip: uctx.rip,
225        rflags: uctx.eflags,
226        fs_base: current_task.thread_state.registers.fs_base,
227        gs_base: current_task.thread_state.registers.gs_base,
228    }
229    .into();
230
231    let xstate = &signal_stack_frame.xstate;
232    #[allow(
233        clippy::undocumented_unsafe_blocks,
234        reason = "Force documented unsafe blocks in Starnix"
235    )]
236    let fpx_sw_bytes = unsafe { xstate.base_xstate.fpstate.__bindgen_anon_1.sw_reserved };
237    if fpx_sw_bytes.magic1 != uapi::FP_XSTATE_MAGIC1
238        || fpx_sw_bytes.extended_size != std::mem::size_of::<XState>() as u32
239        || fpx_sw_bytes.xfeatures != extended_pstate::X64_SUPPORTED_XSAVE_FEATURES
240        || fpx_sw_bytes.xstate_size != std::mem::size_of::<uapi::_xstate>() as u32
241        || xstate.xstate_magic2 != uapi::FP_XSTATE_MAGIC2
242    {
243        log_debug!("Invalid xstate found in signal stack frame.");
244        return error!(EINVAL);
245    }
246
247    #[allow(
248        clippy::undocumented_unsafe_blocks,
249        reason = "Force documented unsafe blocks in Starnix"
250    )]
251    current_task
252        .thread_state
253        .extended_pstate
254        .set_x64_xsave_area(unsafe { std::mem::transmute(xstate.base_xstate) });
255
256    Ok(())
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::mm::memory::MemoryObject;
263    use crate::mm::{DesiredAddress, MappingName, MappingOptions, ProtectionFlags};
264    use crate::signals::{SignalDetail, dequeue_signal, restore_from_signal_handler};
265    use crate::task::CurrentTask;
266    use crate::testing::spawn_kernel_and_run;
267    use starnix_sync::{Locked, Unlocked};
268    use starnix_uapi::errors::{EINTR, ERESTARTSYS};
269    use starnix_uapi::file_mode::Access;
270    use starnix_uapi::signals::{SIGUSR1, SIGUSR2};
271    use starnix_uapi::{__NR_rt_sigreturn, SA_RESTART, SA_RESTORER, SA_SIGINFO, SI_USER};
272    use std::future::Future;
273
274    const SYSCALL_INSTRUCTION_ADDRESS: UserAddress = UserAddress::const_from(100);
275    const SYSCALL_NUMBER: u64 = 42;
276    const SYSCALL_ARGS: (u64, u64, u64, u64, u64, u64) = (20, 21, 22, 23, 24, 25);
277    const SA_RESTORER_ADDRESS: UserAddress = UserAddress::const_from(0xDEADBEEF);
278    const SA_HANDLER_ADDRESS: UserAddress = UserAddress::const_from(0x00BADDAD);
279
280    const SYSCALL2_INSTRUCTION_ADDRESS: UserAddress = UserAddress::const_from(200);
281    const SYSCALL2_NUMBER: u64 = 84;
282    const SYSCALL2_ARGS: (u64, u64, u64, u64, u64, u64) = (30, 31, 32, 33, 34, 35);
283    const SA_HANDLER2_ADDRESS: UserAddress = UserAddress::const_from(0xBADDAD00);
284
285    #[fuchsia::test]
286    async fn syscall_restart_adjusts_instruction_pointer_and_rax() {
287        spawn_kernel_and_run_with_stack(|locked, current_task| {
288            // Register the signal action.
289            current_task.thread_group().signal_actions.set(
290                SIGUSR1,
291                sigaction_t {
292                    sa_flags: (SA_RESTORER | SA_RESTART | SA_SIGINFO) as u64,
293                    sa_handler: SA_HANDLER_ADDRESS.into(),
294                    sa_restorer: SA_RESTORER_ADDRESS.into(),
295                    ..sigaction_t::default()
296                },
297            );
298
299            // Simulate a syscall that should be restarted by setting up the register state to what it
300            // was after the interrupted syscall. `rax` should have the return value (-ERESTARTSYS);
301            // `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, should be the syscall arguments;
302            // `orig_rax` should hold the syscall number;
303            // and the instruction pointer should be 2 bytes after the syscall instruction.
304            current_task.thread_state.restart_code = Some(ERESTARTSYS);
305            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
306            current_task.thread_state.registers.rdi = SYSCALL_ARGS.0;
307            current_task.thread_state.registers.rsi = SYSCALL_ARGS.1;
308            current_task.thread_state.registers.rdx = SYSCALL_ARGS.2;
309            current_task.thread_state.registers.r10 = SYSCALL_ARGS.3;
310            current_task.thread_state.registers.r8 = SYSCALL_ARGS.4;
311            current_task.thread_state.registers.r9 = SYSCALL_ARGS.5;
312            current_task.thread_state.registers.orig_rax = SYSCALL_NUMBER;
313            current_task.thread_state.registers.rip =
314                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
315
316            // Queue the signal that interrupted the syscall.
317            current_task.write().enqueue_signal(SignalInfo::new(
318                SIGUSR1,
319                SI_USER as i32,
320                SignalDetail::None,
321            ));
322
323            // Process the signal.
324            dequeue_signal(locked, current_task);
325
326            // The instruction pointer should have changed to the signal handling address.
327            assert_eq!(current_task.thread_state.registers.rip, SA_HANDLER_ADDRESS.ptr() as u64);
328
329            // The syscall arguments should be overwritten with signal handling args.
330            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
331            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
332            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
333
334            // Now we assume that execution of the signal handler completed with a call to
335            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
336            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
337            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
338
339            restore_from_signal_handler(current_task).expect("failed to restore state");
340
341            // The state of the task is now such that when switching back to userspace, the instruction
342            // pointer will point at the original syscall instruction, with the arguments correctly
343            // restored into the registers.
344            assert_eq!(current_task.thread_state.registers.rax, SYSCALL_NUMBER);
345            assert_eq!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
346            assert_eq!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
347            assert_eq!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
348            assert_eq!(current_task.thread_state.registers.r10, SYSCALL_ARGS.3);
349            assert_eq!(current_task.thread_state.registers.r8, SYSCALL_ARGS.4);
350            assert_eq!(current_task.thread_state.registers.r9, SYSCALL_ARGS.5);
351            assert_eq!(
352                current_task.thread_state.registers.rip,
353                SYSCALL_INSTRUCTION_ADDRESS.ptr() as u64
354            );
355        })
356        .await;
357    }
358
359    #[fuchsia::test]
360    async fn syscall_nested_restart() {
361        spawn_kernel_and_run_with_stack(|locked, current_task| {
362            // Register the signal actions.
363            current_task.thread_group().signal_actions.set(
364                SIGUSR1,
365                sigaction_t {
366                    sa_flags: (SA_RESTORER | SA_RESTART | SA_SIGINFO) as u64,
367                    sa_handler: SA_HANDLER_ADDRESS.into(),
368                    sa_restorer: SA_RESTORER_ADDRESS.into(),
369                    ..sigaction_t::default()
370                },
371            );
372            current_task.thread_group().signal_actions.set(
373                SIGUSR2,
374                sigaction_t {
375                    sa_flags: (SA_RESTORER | SA_RESTART | SA_SIGINFO) as u64,
376                    sa_handler: SA_HANDLER2_ADDRESS.into(),
377                    sa_restorer: SA_RESTORER_ADDRESS.into(),
378                    ..sigaction_t::default()
379                },
380            );
381
382            // Simulate a syscall that should be restarted by setting up the register state to what it
383            // was after the interrupted syscall. `rax` should have the return value (-ERESTARTSYS);
384            // `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, should be the syscall arguments;
385            // `orig_rax` should hold the syscall number;
386            // and the instruction pointer should be 2 bytes after the syscall instruction.
387            current_task.thread_state.restart_code = Some(ERESTARTSYS);
388            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
389            current_task.thread_state.registers.rdi = SYSCALL_ARGS.0;
390            current_task.thread_state.registers.rsi = SYSCALL_ARGS.1;
391            current_task.thread_state.registers.rdx = SYSCALL_ARGS.2;
392            current_task.thread_state.registers.r10 = SYSCALL_ARGS.3;
393            current_task.thread_state.registers.r8 = SYSCALL_ARGS.4;
394            current_task.thread_state.registers.r9 = SYSCALL_ARGS.5;
395            current_task.thread_state.registers.orig_rax = SYSCALL_NUMBER;
396            current_task.thread_state.registers.rip =
397                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
398
399            // Queue the signal that interrupted the syscall.
400            current_task.write().enqueue_signal(SignalInfo::new(
401                SIGUSR1,
402                SI_USER as i32,
403                SignalDetail::None,
404            ));
405
406            // Process the signal.
407            dequeue_signal(locked, current_task);
408
409            // The instruction pointer should have changed to the signal handling address.
410            assert_eq!(current_task.thread_state.registers.rip, SA_HANDLER_ADDRESS.ptr() as u64);
411
412            // The syscall arguments should be overwritten with signal handling args.
413            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
414            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
415            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
416
417            // Simulate another syscall being interrupted.
418            current_task.thread_state.restart_code = Some(ERESTARTSYS);
419            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
420            current_task.thread_state.registers.rdi = SYSCALL2_ARGS.0;
421            current_task.thread_state.registers.rsi = SYSCALL2_ARGS.1;
422            current_task.thread_state.registers.rdx = SYSCALL2_ARGS.2;
423            current_task.thread_state.registers.r10 = SYSCALL2_ARGS.3;
424            current_task.thread_state.registers.r8 = SYSCALL2_ARGS.4;
425            current_task.thread_state.registers.r9 = SYSCALL2_ARGS.5;
426            current_task.thread_state.registers.orig_rax = SYSCALL2_NUMBER;
427            current_task.thread_state.registers.rip =
428                (SYSCALL2_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
429
430            // Queue the signal that interrupted the syscall.
431            current_task.write().enqueue_signal(SignalInfo::new(
432                SIGUSR2,
433                SI_USER as i32,
434                SignalDetail::None,
435            ));
436
437            // Process the signal.
438            dequeue_signal(locked, current_task);
439
440            // The instruction pointer should have changed to the signal handling address.
441            assert_eq!(current_task.thread_state.registers.rip, SA_HANDLER2_ADDRESS.ptr() as u64);
442
443            // The syscall arguments should be overwritten with signal handling args.
444            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL2_ARGS.0);
445            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL2_ARGS.1);
446            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL2_ARGS.2);
447
448            // Now we assume that execution of the second signal handler completed with a call to
449            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
450            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
451            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
452
453            restore_from_signal_handler(current_task).expect("failed to restore state");
454
455            // The state of the task is now such that when switching back to userspace, the instruction
456            // pointer will point at the original syscall instruction, with the arguments correctly
457            // restored into the registers.
458            assert_eq!(current_task.thread_state.registers.rax, SYSCALL2_NUMBER);
459            assert_eq!(current_task.thread_state.registers.rdi, SYSCALL2_ARGS.0);
460            assert_eq!(current_task.thread_state.registers.rsi, SYSCALL2_ARGS.1);
461            assert_eq!(current_task.thread_state.registers.rdx, SYSCALL2_ARGS.2);
462            assert_eq!(current_task.thread_state.registers.r10, SYSCALL2_ARGS.3);
463            assert_eq!(current_task.thread_state.registers.r8, SYSCALL2_ARGS.4);
464            assert_eq!(current_task.thread_state.registers.r9, SYSCALL2_ARGS.5);
465            assert_eq!(
466                current_task.thread_state.registers.rip,
467                SYSCALL2_INSTRUCTION_ADDRESS.ptr() as u64
468            );
469
470            // Now we assume that execution of the first signal handler completed with a call to
471            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
472            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
473            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
474
475            restore_from_signal_handler(current_task).expect("failed to restore state");
476
477            // The state of the task is now such that when switching back to userspace, the instruction
478            // pointer will point at the original syscall instruction, with the arguments correctly
479            // restored into the registers.
480            assert_eq!(current_task.thread_state.registers.rax, SYSCALL_NUMBER);
481            assert_eq!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
482            assert_eq!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
483            assert_eq!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
484            assert_eq!(current_task.thread_state.registers.r10, SYSCALL_ARGS.3);
485            assert_eq!(current_task.thread_state.registers.r8, SYSCALL_ARGS.4);
486            assert_eq!(current_task.thread_state.registers.r9, SYSCALL_ARGS.5);
487            assert_eq!(
488                current_task.thread_state.registers.rip,
489                SYSCALL_INSTRUCTION_ADDRESS.ptr() as u64
490            );
491        })
492        .await;
493    }
494
495    #[fuchsia::test]
496    async fn syscall_does_not_restart_if_signal_action_has_no_sa_restart_flag() {
497        spawn_kernel_and_run_with_stack(|locked, current_task| {
498            // Register the signal action.
499            current_task.thread_group().signal_actions.set(
500                SIGUSR1,
501                sigaction_t {
502                    sa_flags: (SA_RESTORER | SA_SIGINFO) as u64,
503                    sa_handler: SA_HANDLER_ADDRESS.into(),
504                    sa_restorer: SA_RESTORER_ADDRESS.into(),
505                    ..sigaction_t::default()
506                },
507            );
508
509            // Simulate a syscall that should be restarted by setting up the register state to what it
510            // was after the interrupted syscall. `rax` should have the return value (-ERESTARTSYS);
511            // `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, should be the syscall arguments;
512            // `orig_rax` should hold the syscall number;
513            // and the instruction pointer should be 2 bytes after the syscall instruction.
514            current_task.thread_state.restart_code = Some(ERESTARTSYS);
515            current_task.thread_state.registers.rax = ERESTARTSYS.return_value();
516            current_task.thread_state.registers.rdi = SYSCALL_ARGS.0;
517            current_task.thread_state.registers.rsi = SYSCALL_ARGS.1;
518            current_task.thread_state.registers.rdx = SYSCALL_ARGS.2;
519            current_task.thread_state.registers.r10 = SYSCALL_ARGS.3;
520            current_task.thread_state.registers.r8 = SYSCALL_ARGS.4;
521            current_task.thread_state.registers.r9 = SYSCALL_ARGS.5;
522            current_task.thread_state.registers.orig_rax = SYSCALL_NUMBER;
523            current_task.thread_state.registers.rip =
524                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64;
525
526            // Queue the signal that interrupted the syscall.
527            current_task.write().enqueue_signal(SignalInfo::new(
528                SIGUSR1,
529                SI_USER as i32,
530                SignalDetail::None,
531            ));
532
533            // Process the signal.
534            dequeue_signal(locked, current_task);
535
536            // The instruction pointer should have changed to the signal handling address.
537            assert_eq!(current_task.thread_state.registers.rip, SA_HANDLER_ADDRESS.ptr() as u64);
538
539            // The syscall arguments should be overwritten with signal handling args.
540            assert_ne!(current_task.thread_state.registers.rdi, SYSCALL_ARGS.0);
541            assert_ne!(current_task.thread_state.registers.rsi, SYSCALL_ARGS.1);
542            assert_ne!(current_task.thread_state.registers.rdx, SYSCALL_ARGS.2);
543
544            // Now we assume that execution of the signal handler completed with a call to
545            // `sys_rt_sigreturn`, which would set `rax` to that syscall number.
546            current_task.thread_state.registers.rax = __NR_rt_sigreturn as u64;
547            current_task.thread_state.registers.rsp += 8; // The stack was popped returning from the signal handler.
548
549            restore_from_signal_handler(current_task).expect("failed to restore state");
550
551            // The state of the task is now such that when switching back to userspace, the instruction
552            // pointer will point at the original syscall instruction, with the arguments correctly
553            // restored into the registers.
554            assert_eq!(current_task.thread_state.registers.rax, EINTR.return_value());
555            assert_eq!(
556                current_task.thread_state.registers.rip,
557                (SYSCALL_INSTRUCTION_ADDRESS + 2u64).unwrap().ptr() as u64
558            );
559        })
560        .await;
561    }
562
563    /// Creates a kernel and initial task, giving the task a stack.
564    fn spawn_kernel_and_run_with_stack<F>(callback: F) -> impl Future<Output = ()>
565    where
566        F: FnOnce(&mut Locked<Unlocked>, &mut CurrentTask) + Send + Sync + 'static,
567    {
568        spawn_kernel_and_run(async |locked, current_task| {
569            const STACK_SIZE: usize = 0x1000;
570
571            // Give the task a stack.
572            let prot_flags = ProtectionFlags::READ | ProtectionFlags::WRITE;
573            let stack_base = current_task
574                .mm()
575                .unwrap()
576                .map_memory(
577                    DesiredAddress::Any,
578                    MemoryObject::from(
579                        zx::Vmo::create(STACK_SIZE as u64).expect("failed to create stack VMO"),
580                    )
581                    .into(),
582                    0,
583                    STACK_SIZE,
584                    prot_flags,
585                    Access::rwx(),
586                    MappingOptions::empty(),
587                    MappingName::Stack,
588                )
589                .expect("failed to map stack VMO");
590            let stack_address = (stack_base + (STACK_SIZE - 8)).expect("OOB memory access.");
591            current_task.thread_state.registers.rsp = stack_address.ptr() as u64;
592
593            callback(locked, current_task);
594        })
595    }
596}