diff --git a/kernel/src/arch/loongarch64/interrupt/mod.rs b/kernel/src/arch/loongarch64/interrupt/mod.rs index bf9ba32c04..950312043c 100644 --- a/kernel/src/arch/loongarch64/interrupt/mod.rs +++ b/kernel/src/arch/loongarch64/interrupt/mod.rs @@ -141,3 +141,116 @@ impl crate::process::rseq::RseqTrapFrame for TrapFrame { self.csr_era = ip; } } + +/// Linux 兼容的用户寄存器结构体 (LoongArch64) +/// +/// 严格按照 Linux 6.6.21 的 `arch/loongarch/include/uapi/asm/ptrace.h` 中的 +/// +/// 该结构体用于 ptrace 系统调用向用户空间暴露寄存器信息。 +/// +/// 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/arch/loongarch/include/uapi/asm/ptrace.h#30 +#[repr(C, align(8))] +#[derive(Debug, Clone, Copy, Default)] +pub struct UserRegsStruct { + /// 主处理器寄存器 (r0-r31) + pub regs: [u64; 32], + /// 原始系统调用参数 a0 + pub orig_a0: u64, + /// CSR ERA (Exception Return Address) + pub csr_era: u64, + /// CSR BADV (Bad Virtual Address) + pub csr_badv: u64, + /// 保留字段 + pub reserved: [u64; 10], +} + +impl UserRegsStruct { + /// 从 TrapFrame 创建 UserRegsStruct + /// + /// 这对应 Linux 中从 pt_regs 构建 user_pt_regs 的过程。 + pub fn from_trap_frame(trap_frame: &TrapFrame) -> Self { + let mut regs = [0u64; 32]; + // 按照 LoongArch 寄存器编号映射 + regs[0] = trap_frame.r0 as u64; + regs[1] = trap_frame.ra as u64; + regs[2] = trap_frame.tp as u64; + regs[3] = trap_frame.usp as u64; + regs[4] = trap_frame.a0 as u64; + regs[5] = trap_frame.a1 as u64; + regs[6] = trap_frame.a2 as u64; + regs[7] = trap_frame.a3 as u64; + regs[8] = trap_frame.a4 as u64; + regs[9] = trap_frame.a5 as u64; + regs[10] = trap_frame.a6 as u64; + regs[11] = trap_frame.a7 as u64; + regs[12] = trap_frame.t0 as u64; + regs[13] = trap_frame.t1 as u64; + regs[14] = trap_frame.t2 as u64; + regs[15] = trap_frame.t3 as u64; + regs[16] = trap_frame.t4 as u64; + regs[17] = trap_frame.t5 as u64; + regs[18] = trap_frame.t6 as u64; + regs[19] = trap_frame.t7 as u64; + regs[20] = trap_frame.t8 as u64; + regs[21] = trap_frame.r21 as u64; + regs[22] = trap_frame.fp as u64; + regs[23] = trap_frame.s0 as u64; + regs[24] = trap_frame.s1 as u64; + regs[25] = trap_frame.s2 as u64; + regs[26] = trap_frame.s3 as u64; + regs[27] = trap_frame.s4 as u64; + regs[28] = trap_frame.s5 as u64; + regs[29] = trap_frame.s6 as u64; + regs[30] = trap_frame.s7 as u64; + regs[31] = trap_frame.s8 as u64; + + Self { + regs, + orig_a0: trap_frame.orig_a0 as u64, + csr_era: trap_frame.csr_era as u64, + csr_badv: trap_frame.csr_badvaddr as u64, + reserved: [0; 10], + } + } + + /// 将 UserRegsStruct 的值写回 TrapFrame + /// + /// 用于 PTRACE_SETREGS 操作,允许调试器修改被跟踪进程的寄存器。 + pub fn write_to_trap_frame(&self, trap_frame: &mut TrapFrame) { + trap_frame.r0 = self.regs[0] as usize; + trap_frame.ra = self.regs[1] as usize; + trap_frame.tp = self.regs[2] as usize; + trap_frame.usp = self.regs[3] as usize; + trap_frame.a0 = self.regs[4] as usize; + trap_frame.a1 = self.regs[5] as usize; + trap_frame.a2 = self.regs[6] as usize; + trap_frame.a3 = self.regs[7] as usize; + trap_frame.a4 = self.regs[8] as usize; + trap_frame.a5 = self.regs[9] as usize; + trap_frame.a6 = self.regs[10] as usize; + trap_frame.a7 = self.regs[11] as usize; + trap_frame.t0 = self.regs[12] as usize; + trap_frame.t1 = self.regs[13] as usize; + trap_frame.t2 = self.regs[14] as usize; + trap_frame.t3 = self.regs[15] as usize; + trap_frame.t4 = self.regs[16] as usize; + trap_frame.t5 = self.regs[17] as usize; + trap_frame.t6 = self.regs[18] as usize; + trap_frame.t7 = self.regs[19] as usize; + trap_frame.t8 = self.regs[20] as usize; + trap_frame.r21 = self.regs[21] as usize; + trap_frame.fp = self.regs[22] as usize; + trap_frame.s0 = self.regs[23] as usize; + trap_frame.s1 = self.regs[24] as usize; + trap_frame.s2 = self.regs[25] as usize; + trap_frame.s3 = self.regs[26] as usize; + trap_frame.s4 = self.regs[27] as usize; + trap_frame.s5 = self.regs[28] as usize; + trap_frame.s6 = self.regs[29] as usize; + trap_frame.s7 = self.regs[30] as usize; + trap_frame.s8 = self.regs[31] as usize; + trap_frame.orig_a0 = self.orig_a0 as usize; + trap_frame.csr_era = self.csr_era as usize; + trap_frame.csr_badvaddr = self.csr_badv as usize; + } +} diff --git a/kernel/src/arch/loongarch64/kprobe.rs b/kernel/src/arch/loongarch64/kprobe.rs index 1d85e6d9dc..04173e4d4e 100644 --- a/kernel/src/arch/loongarch64/kprobe.rs +++ b/kernel/src/arch/loongarch64/kprobe.rs @@ -1,19 +1,144 @@ use crate::arch::interrupt::TrapFrame; pub fn setup_single_step(frame: &mut TrapFrame, step_addr: usize) { - todo!("la64: setup_single_step") + // LoongArch64 单步调试需要设置 CSR.FWPS 寄存器 + // 目前先设置 PC 到目标地址 + frame.csr_era = step_addr; } pub fn clear_single_step(frame: &mut TrapFrame, return_addr: usize) { - todo!("la64: clear_single_step") + // 清除单步调试状态并设置返回地址 + frame.csr_era = return_addr; } #[repr(C)] #[derive(Debug, Copy, Clone)] -pub struct KProbeContext {} +pub struct KProbeContext { + pub r0: usize, + pub ra: usize, + pub tp: usize, + pub sp: usize, + pub a0: usize, + pub a1: usize, + pub a2: usize, + pub a3: usize, + pub a4: usize, + pub a5: usize, + pub a6: usize, + pub a7: usize, + pub t0: usize, + pub t1: usize, + pub t2: usize, + pub t3: usize, + pub t4: usize, + pub t5: usize, + pub t6: usize, + pub t7: usize, + pub t8: usize, + pub r21: usize, + pub fp: usize, + pub s0: usize, + pub s1: usize, + pub s2: usize, + pub s3: usize, + pub s4: usize, + pub s5: usize, + pub s6: usize, + pub s7: usize, + pub s8: usize, + pub orig_a0: usize, + pub csr_era: usize, + pub csr_badvaddr: usize, + pub csr_crmd: usize, + pub csr_prmd: usize, + pub csr_euen: usize, + pub csr_ecfg: usize, + pub csr_estat: usize, +} impl From<&TrapFrame> for KProbeContext { fn from(trap_frame: &TrapFrame) -> Self { - todo!("from trap frame to kprobe context"); + Self { + r0: trap_frame.r0, + ra: trap_frame.ra, + tp: trap_frame.tp, + sp: trap_frame.usp, + a0: trap_frame.a0, + a1: trap_frame.a1, + a2: trap_frame.a2, + a3: trap_frame.a3, + a4: trap_frame.a4, + a5: trap_frame.a5, + a6: trap_frame.a6, + a7: trap_frame.a7, + t0: trap_frame.t0, + t1: trap_frame.t1, + t2: trap_frame.t2, + t3: trap_frame.t3, + t4: trap_frame.t4, + t5: trap_frame.t5, + t6: trap_frame.t6, + t7: trap_frame.t7, + t8: trap_frame.t8, + r21: trap_frame.r21, + fp: trap_frame.fp, + s0: trap_frame.s0, + s1: trap_frame.s1, + s2: trap_frame.s2, + s3: trap_frame.s3, + s4: trap_frame.s4, + s5: trap_frame.s5, + s6: trap_frame.s6, + s7: trap_frame.s7, + s8: trap_frame.s8, + orig_a0: trap_frame.orig_a0, + csr_era: trap_frame.csr_era, + csr_badvaddr: trap_frame.csr_badvaddr, + csr_crmd: trap_frame.csr_crmd, + csr_prmd: trap_frame.csr_prmd, + csr_euen: trap_frame.csr_euen, + csr_ecfg: trap_frame.csr_ecfg, + csr_estat: trap_frame.csr_estat, + } } } + +// LoongArch 64-bit 架构标识 (EM_LOONGARCH = 258, 64-bit) +const AUDIT_ARCH_LOONGARCH64: u32 = 0xC000_0102; + +/// 获取当前架构标识 +pub fn syscall_get_arch() -> u32 { + AUDIT_ARCH_LOONGARCH64 +} + +/// 从 KProbeContext 获取指令指针 (csr_era) +pub fn instruction_pointer(ctx: &KProbeContext) -> u64 { + ctx.csr_era as u64 +} + +/// 从 KProbeContext 获取用户栈指针 (sp) +pub fn user_stack_pointer(ctx: &KProbeContext) -> u64 { + ctx.sp as u64 +} + +/// 从 KProbeContext 获取系统调用号 (a7) +/// LoongArch64 使用 a7 寄存器传递系统调用号 +pub fn syscall_get_nr(ctx: &KProbeContext) -> u64 { + ctx.a7 as u64 +} + +/// 从 KProbeContext 获取系统调用返回值 (a0) +pub fn syscall_get_return_value(ctx: &KProbeContext) -> i64 { + ctx.a0 as i64 +} + +/// 从 KProbeContext 获取系统调用的前 6 个参数 +/// LoongArch64 使用 a0-a5 寄存器传递系统调用参数 +pub fn syscall_get_arguments(ctx: &KProbeContext, args: &mut [u64; 6]) { + args[0] = ctx.a0 as u64; + args[1] = ctx.a1 as u64; + args[2] = ctx.a2 as u64; + args[3] = ctx.a3 as u64; + args[4] = ctx.a4 as u64; + args[5] = ctx.a5 as u64; +} diff --git a/kernel/src/arch/riscv64/interrupt/mod.rs b/kernel/src/arch/riscv64/interrupt/mod.rs index ff5bb5e05e..595d309a92 100644 --- a/kernel/src/arch/riscv64/interrupt/mod.rs +++ b/kernel/src/arch/riscv64/interrupt/mod.rs @@ -198,3 +198,126 @@ impl crate::process::rseq::RseqTrapFrame for TrapFrame { self.epc = ip; } } + +/// Linux 兼容的用户寄存器结构体 (RISC-V 64) +/// +/// 该结构体用于 ptrace 系统调用向用户空间暴露寄存器信息。 +/// +/// 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/arch/riscv/include/uapi/asm/ptrace.h#24 +#[repr(C)] +#[derive(Debug, Clone, Copy, Default)] +pub struct UserRegsStruct { + pub pc: u64, + pub ra: u64, + pub sp: u64, + pub gp: u64, + pub tp: u64, + pub t0: u64, + pub t1: u64, + pub t2: u64, + pub s0: u64, + pub s1: u64, + pub a0: u64, + pub a1: u64, + pub a2: u64, + pub a3: u64, + pub a4: u64, + pub a5: u64, + pub a6: u64, + pub a7: u64, + pub s2: u64, + pub s3: u64, + pub s4: u64, + pub s5: u64, + pub s6: u64, + pub s7: u64, + pub s8: u64, + pub s9: u64, + pub s10: u64, + pub s11: u64, + pub t3: u64, + pub t4: u64, + pub t5: u64, + pub t6: u64, +} + +impl UserRegsStruct { + /// 从 TrapFrame 创建 UserRegsStruct + /// + /// 这对应 Linux 中从 pt_regs 构建 user_regs_struct 的过程。 + /// RISC-V 的 user_regs_struct 是 pt_regs 的前缀。 + pub fn from_trap_frame(trap_frame: &TrapFrame) -> Self { + Self { + pc: trap_frame.epc as u64, + ra: trap_frame.ra as u64, + sp: trap_frame.sp as u64, + gp: trap_frame.gp as u64, + tp: trap_frame.tp as u64, + t0: trap_frame.t0 as u64, + t1: trap_frame.t1 as u64, + t2: trap_frame.t2 as u64, + s0: trap_frame.s0 as u64, + s1: trap_frame.s1 as u64, + a0: trap_frame.a0 as u64, + a1: trap_frame.a1 as u64, + a2: trap_frame.a2 as u64, + a3: trap_frame.a3 as u64, + a4: trap_frame.a4 as u64, + a5: trap_frame.a5 as u64, + a6: trap_frame.a6 as u64, + a7: trap_frame.a7 as u64, + s2: trap_frame.s2 as u64, + s3: trap_frame.s3 as u64, + s4: trap_frame.s4 as u64, + s5: trap_frame.s5 as u64, + s6: trap_frame.s6 as u64, + s7: trap_frame.s7 as u64, + s8: trap_frame.s8 as u64, + s9: trap_frame.s9 as u64, + s10: trap_frame.s10 as u64, + s11: trap_frame.s11 as u64, + t3: trap_frame.t3 as u64, + t4: trap_frame.t4 as u64, + t5: trap_frame.t5 as u64, + t6: trap_frame.t6 as u64, + } + } + + /// 将 UserRegsStruct 的值写回 TrapFrame + /// + /// 用于 PTRACE_SETREGS 操作,允许调试器修改被跟踪进程的寄存器。 + pub fn write_to_trap_frame(&self, trap_frame: &mut TrapFrame) { + trap_frame.epc = self.pc as usize; + trap_frame.ra = self.ra as usize; + trap_frame.sp = self.sp as usize; + trap_frame.gp = self.gp as usize; + trap_frame.tp = self.tp as usize; + trap_frame.t0 = self.t0 as usize; + trap_frame.t1 = self.t1 as usize; + trap_frame.t2 = self.t2 as usize; + trap_frame.s0 = self.s0 as usize; + trap_frame.s1 = self.s1 as usize; + trap_frame.a0 = self.a0 as usize; + trap_frame.a1 = self.a1 as usize; + trap_frame.a2 = self.a2 as usize; + trap_frame.a3 = self.a3 as usize; + trap_frame.a4 = self.a4 as usize; + trap_frame.a5 = self.a5 as usize; + trap_frame.a6 = self.a6 as usize; + trap_frame.a7 = self.a7 as usize; + trap_frame.s2 = self.s2 as usize; + trap_frame.s3 = self.s3 as usize; + trap_frame.s4 = self.s4 as usize; + trap_frame.s5 = self.s5 as usize; + trap_frame.s6 = self.s6 as usize; + trap_frame.s7 = self.s7 as usize; + trap_frame.s8 = self.s8 as usize; + trap_frame.s9 = self.s9 as usize; + trap_frame.s10 = self.s10 as usize; + trap_frame.s11 = self.s11 as usize; + trap_frame.t3 = self.t3 as usize; + trap_frame.t4 = self.t4 as usize; + trap_frame.t5 = self.t5 as usize; + trap_frame.t6 = self.t6 as usize; + } +} diff --git a/kernel/src/arch/riscv64/kprobe.rs b/kernel/src/arch/riscv64/kprobe.rs index 960b06cd6b..2c7397afc1 100644 --- a/kernel/src/arch/riscv64/kprobe.rs +++ b/kernel/src/arch/riscv64/kprobe.rs @@ -83,3 +83,43 @@ impl From<&TrapFrame> for KProbeContext { } } } + +// RISC-V 64-bit 架构标识 (EM_RISCV = 243, 64-bit) +const AUDIT_ARCH_RISCV64: u32 = 0xC000_00F3; + +/// 获取当前架构标识 +pub fn syscall_get_arch() -> u32 { + AUDIT_ARCH_RISCV64 +} + +/// 从 KProbeContext 获取指令指针 (pc) +pub fn instruction_pointer(ctx: &KProbeContext) -> u64 { + ctx.pc as u64 +} + +/// 从 KProbeContext 获取用户栈指针 (sp) +pub fn user_stack_pointer(ctx: &KProbeContext) -> u64 { + ctx.sp as u64 +} + +/// 从 KProbeContext 获取系统调用号 (a7) +/// RISC-V 使用 a7 寄存器传递系统调用号 +pub fn syscall_get_nr(ctx: &KProbeContext) -> u64 { + ctx.a7 as u64 +} + +/// 从 KProbeContext 获取系统调用返回值 (a0) +pub fn syscall_get_return_value(ctx: &KProbeContext) -> i64 { + ctx.a0 as i64 +} + +/// 从 KProbeContext 获取系统调用的前 6 个参数 +/// RISC-V 使用 a0-a5 寄存器传递系统调用参数 +pub fn syscall_get_arguments(ctx: &KProbeContext, args: &mut [u64; 6]) { + args[0] = ctx.a0 as u64; + args[1] = ctx.a1 as u64; + args[2] = ctx.a2 as u64; + args[3] = ctx.a3 as u64; + args[4] = ctx.a4 as u64; + args[5] = ctx.a5 as u64; +} diff --git a/kernel/src/arch/x86_64/interrupt/mod.rs b/kernel/src/arch/x86_64/interrupt/mod.rs index 21335665fd..b69b099a0a 100644 --- a/kernel/src/arch/x86_64/interrupt/mod.rs +++ b/kernel/src/arch/x86_64/interrupt/mod.rs @@ -253,3 +253,136 @@ impl crate::process::rseq::RseqTrapFrame for TrapFrame { self.rip = ip as u64; } } + +/// Linux 兼容的用户寄存器结构体 (x86_64) +/// +/// 该结构体用于 ptrace 系统调用向用户空间暴露寄存器信息。 +/// +/// 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/include/asm/user_64.h#69 +#[repr(C)] +#[derive(Debug, Clone, Copy, Default)] +pub struct UserRegsStruct { + pub r15: u64, + pub r14: u64, + pub r13: u64, + pub r12: u64, + pub bp: u64, + pub bx: u64, + pub r11: u64, + pub r10: u64, + pub r9: u64, + pub r8: u64, + pub ax: u64, + pub cx: u64, + pub dx: u64, + pub si: u64, + pub di: u64, + /// 在系统调用入口时保存原始的 rax(系统调用号) + pub orig_ax: u64, + pub ip: u64, + pub cs: u64, + pub flags: u64, + pub sp: u64, + pub ss: u64, + /// FS 段基址,来自 task->thread.fsbase + pub fs_base: u64, + /// GS 段基址,来自 task->thread.gsbase + pub gs_base: u64, + /// DS 段选择器 + pub ds: u64, + /// ES 段选择器 + pub es: u64, + /// FS 段选择器 + pub fs: u64, + /// GS 段选择器 + pub gs: u64, +} + +impl UserRegsStruct { + /// 从 TrapFrame 创建 UserRegsStruct + /// + /// 这对应 Linux 中从 pt_regs 构建 user_regs_struct 的过程。 + /// TrapFrame 包含了 pt_regs 的核心字段,额外的段寄存器信息 + /// 需要从进程的 arch_info 中获取。 + /// + /// # 参数 + /// - `trap_frame`: 中断/异常时保存的寄存器状态 + /// - `fs_base`: FS 段基址(来自 task->thread.fsbase) + /// - `gs_base`: GS 段基址(来自 task->thread.gsbase) + /// - `fs`: FS 段选择器(来自 task->thread.fsindex) + /// - `gs`: GS 段选择器(来自 task->thread.gsindex) + pub fn from_trap_frame( + trap_frame: &TrapFrame, + fs_base: u64, + gs_base: u64, + fs: u64, + gs: u64, + ) -> Self { + Self { + r15: trap_frame.r15, + r14: trap_frame.r14, + r13: trap_frame.r13, + r12: trap_frame.r12, + bp: trap_frame.rbp, + bx: trap_frame.rbx, + r11: trap_frame.r11, + r10: trap_frame.r10, + r9: trap_frame.r9, + r8: trap_frame.r8, + ax: trap_frame.rax, + cx: trap_frame.rcx, + dx: trap_frame.rdx, + si: trap_frame.rsi, + di: trap_frame.rdi, + // errcode 在系统调用上下文中存储系统调用号 + orig_ax: trap_frame.errcode, + ip: trap_frame.rip, + cs: trap_frame.cs, + flags: trap_frame.rflags, + sp: trap_frame.rsp, + ss: trap_frame.ss, + fs_base, + gs_base, + // TrapFrame 中的 ds/es 是完整的段选择器值 + ds: trap_frame.ds, + es: trap_frame.es, + fs, + gs, + } + } + + /// 将 UserRegsStruct 的值写回 TrapFrame + /// + /// 用于 PTRACE_SETREGS 操作,允许调试器修改被跟踪进程的寄存器。 + /// + /// # 注意 + /// - fs_base, gs_base, fs, gs 需要单独写回到进程的 arch_info + /// - 某些字段(如 cs, ss)的修改可能受到安全限制 + #[allow(dead_code)] + pub fn write_to_trap_frame(&self, trap_frame: &mut TrapFrame) { + trap_frame.r15 = self.r15; + trap_frame.r14 = self.r14; + trap_frame.r13 = self.r13; + trap_frame.r12 = self.r12; + trap_frame.rbp = self.bp; + trap_frame.rbx = self.bx; + trap_frame.r11 = self.r11; + trap_frame.r10 = self.r10; + trap_frame.r9 = self.r9; + trap_frame.r8 = self.r8; + trap_frame.rax = self.ax; + trap_frame.rcx = self.cx; + trap_frame.rdx = self.dx; + trap_frame.rsi = self.si; + trap_frame.rdi = self.di; + trap_frame.errcode = self.orig_ax; + trap_frame.rip = self.ip; + // cs 和 ss 的修改需要谨慎,这里暂时允许 + trap_frame.cs = self.cs; + trap_frame.rflags = self.flags; + trap_frame.rsp = self.sp; + trap_frame.ss = self.ss; + trap_frame.ds = self.ds; + trap_frame.es = self.es; + } +} diff --git a/kernel/src/arch/x86_64/ipc/signal.rs b/kernel/src/arch/x86_64/ipc/signal.rs index 9cf7f219b2..e0256f4802 100644 --- a/kernel/src/arch/x86_64/ipc/signal.rs +++ b/kernel/src/arch/x86_64/ipc/signal.rs @@ -11,7 +11,7 @@ pub use crate::ipc::generic_signal::GenericSigSet as SigSet; pub use crate::ipc::generic_signal::GenericSigStackFlags as SigStackFlags; pub use crate::ipc::generic_signal::GenericSignal as Signal; -use crate::process::rseq::Rseq; +use crate::process::{ptrace::ptrace_signal, rseq::Rseq, ProcessFlags}; use crate::{ arch::{ fpu::FpState, @@ -592,7 +592,6 @@ unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { let pcb = ProcessManager::current_pcb(); let siginfo = pcb.try_siginfo_irqsave(5); - if unlikely(siginfo.is_none()) { return; } @@ -607,7 +606,7 @@ unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { return; } - let mut sig_number: Signal; + let mut sig: Signal; let mut info: Option; let mut sigaction: Option; let sig_block: SigSet = *siginfo_read_guard.sig_blocked(); @@ -620,31 +619,60 @@ unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { } let mut siginfo_mut_guard = siginfo_mut.unwrap(); + + // 循环直到取出一个有效的、需要处理的信号,或者队列为空 loop { - (sig_number, info) = siginfo_mut_guard.dequeue_signal(&sig_block, &pcb); + (sig, info) = siginfo_mut_guard.dequeue_signal(&sig_block, &pcb); // 如果信号非法,则直接返回 - if sig_number == Signal::INVALID { + if sig == Signal::INVALID { return; } - // 对 kernel-only 信号(如 SIGKILL/SIGSTOP)直接使用默认处理,避免任何用户帧构造 - if sig_number.kernel_only() { - // log::error!( - // "do_signal: kernel-only sig={} for pid={:?} -> default handler (no user frame)", - // sig_number as i32, - // pcb.raw_pid() - // ); - // 释放锁,按常规路径在本线程上下文执行默认处理 + // 只要进程处于 PTRACED 状态,都必须先通知 Tracer + let is_ptraced = pcb.flags().contains(ProcessFlags::PTRACED); + if is_ptraced { + // 保存 oldset,因为需要释放锁, ptrace_signal 内部会调用 schedule() + let _oldset = *siginfo_mut_guard.sig_blocked(); + drop(siginfo_mut_guard); + CurrentIrqArch::interrupt_enable(); + + let result = ptrace_signal(&pcb, sig, &mut info); + + // 重新获取锁以继续处理 + let siginfo_mut = pcb.try_siginfo_mut(5); + if siginfo_mut.is_none() { + return; + } + siginfo_mut_guard = siginfo_mut.unwrap(); + + match result { + Some(new_sig) => { + // tracer 注入了新信号,继续处理 + sig = new_sig; + } + None => { + // tracer 忽略了信号,继续下一个信号 + continue; + } + } + } + + // 只有在非 ptrace 状态,或 ptrace 返回了信号且该信号是 kernel_only 时才进入。 + if sig.kernel_only() { let _oldset = *siginfo_mut_guard.sig_blocked(); drop(siginfo_mut_guard); drop(pcb); CurrentIrqArch::interrupt_enable(); - sig_number.handle_default(); + // kernel_only 信号使用默认处理 + sig.handle_default(); + // 注意:如果是 SIGSTOP,进程被唤醒后在 Linux 中通常会跳转回循环开头重新检查 pending 信号。 + // 这里直接 return,依靠下一次中断返回路径再次进入 do_signal 来处理后续信号。 return; } - let sa = pcb.sighand().handler(sig_number).unwrap(); + // 查找普通信号的 sigaction + let sa = pcb.sighand().handler(sig).unwrap(); match sa.action() { SigactionType::SaHandler(action_type) => match action_type { SaHandlerType::Error => { @@ -662,6 +690,7 @@ unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { SigactionType::SaSigaction(_) => todo!(), } + // Init 进程保护机制 /* * Global init gets no signals it doesn't want. * Container-init gets no signals it doesn't want from same @@ -676,7 +705,7 @@ unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { if ProcessManager::current_pcb() .sighand() .flags_contains(SignalFlags::UNKILLABLE) - && !sig_number.kernel_only() + && !sig.kernel_only() { continue; } @@ -687,30 +716,41 @@ unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { } let oldset = *siginfo_mut_guard.sig_blocked(); - //避免死锁 drop(siginfo_mut_guard); - // no sig_struct guard to drop drop(pcb); - // 做完上面的检查后,开中断 - CurrentIrqArch::interrupt_enable(); - if sigaction.is_none() { - return; - } - *got_signal = true; + // 开中断(如果有 ptrace,已经在中断开启状态下从 ptrace_stop 返回) + CurrentIrqArch::interrupt_enable(); let mut sigaction = sigaction.unwrap(); // 注意!由于handle_signal里面可能会退出进程, // 因此这里需要检查清楚:上面所有的锁、arc指针都被释放了。否则会产生资源泄露的问题! let res: Result = - handle_signal(sig_number, &mut sigaction, &info.unwrap(), &oldset, frame); + handle_signal(sig, &mut sigaction, &info.unwrap(), &oldset, frame); + + // 更新 got_signal 状态 + // 只有当信号帧真正被设置时(即自定义处理器),才设置 got_signal = true ,系统调用被中断且被处理,不自动重启 + if res.is_ok() { + match sigaction.action() { + SigactionType::SaHandler(SaHandlerType::Customized(_)) => { + *got_signal = true; + } + SigactionType::SaSigaction(_) => { + *got_signal = true; + } + _ => { + // Default 或 Ignore 动作不设置用户信号帧,got_signal 保持 false + } + } + } + compiler_fence(Ordering::SeqCst); if let Err(e) = res { if e != SystemError::EFAULT { error!( "Error occurred when handling signal: {}, pid={:?}, errcode={:?}", - sig_number as i32, + sig as i32, ProcessManager::current_pcb().raw_pid(), &e ); diff --git a/kernel/src/arch/x86_64/kprobe.rs b/kernel/src/arch/x86_64/kprobe.rs index e998aa993e..74389b1f58 100644 --- a/kernel/src/arch/x86_64/kprobe.rs +++ b/kernel/src/arch/x86_64/kprobe.rs @@ -63,3 +63,41 @@ impl From<&TrapFrame> for KProbeContext { } } } + +const AUDIT_ARCH_X86_64: u32 = 0xC000_003E; + +/// 获取当前架构标识 +pub fn syscall_get_arch() -> u32 { + AUDIT_ARCH_X86_64 +} + +/// 从 KProbeContext 获取指令指针 (rip) +pub fn instruction_pointer(ctx: &KProbeContext) -> u64 { + ctx.rip +} + +/// 从 KProbeContext 获取用户栈指针 (rsp) +pub fn user_stack_pointer(ctx: &KProbeContext) -> u64 { + ctx.rsp +} + +/// 从 KProbeContext 获取系统调用号 (rax) +pub fn syscall_get_nr(ctx: &KProbeContext) -> u64 { + ctx.rax +} + +/// 从 KProbeContext 获取系统调用返回值 (rax) +pub fn syscall_get_return_value(ctx: &KProbeContext) -> i64 { + ctx.rax as i64 +} + +/// 从 KProbeContext 获取系统调用的前 6 个参数 +/// (遵循 x86_64 System V ABI) +pub fn syscall_get_arguments(ctx: &KProbeContext, args: &mut [u64; 6]) { + args[0] = ctx.rdi; + args[1] = ctx.rsi; + args[2] = ctx.rdx; + args[3] = ctx.r10; + args[4] = ctx.r8; + args[5] = ctx.r9; +} diff --git a/kernel/src/arch/x86_64/mm/fault.rs b/kernel/src/arch/x86_64/mm/fault.rs index b48aacdf04..3c4c4d78e3 100644 --- a/kernel/src/arch/x86_64/mm/fault.rs +++ b/kernel/src/arch/x86_64/mm/fault.rs @@ -11,7 +11,7 @@ use crate::{ CurrentIrqArch, MMArch, }, exception::{extable::ExceptionTableManager, InterruptArch}, - ipc::signal_types::{SigCode, SigInfo, SigType}, + ipc::signal_types::{BusCode, SegvCode, SigCode, SigFaultInfo, SigInfo, SigType}, mm::{ fault::{FaultFlags, PageFaultHandler, PageFaultMessage}, ucontext::{AddressSpace, LockedVMA}, @@ -278,12 +278,14 @@ impl X86_64MMArch { let send_segv = || { let pid = ProcessManager::current_pid(); - let uid = ProcessManager::current_pcb().cred().uid.data() as u32; let mut info = SigInfo::new( Signal::SIGSEGV, 0, - SigCode::User, - SigType::Kill { pid, uid }, + SigCode::Segv(SegvCode::MapErr), + SigType::SigFault(SigFaultInfo { + addr: address.data(), + trapno: 14, // X86_TRAP_PF + }), ); Signal::SIGSEGV .send_signal_info(Some(&mut info), pid) @@ -463,21 +465,21 @@ impl X86_64MMArch { } // 用户态 fault:发送对应信号 - let sig = if fault.contains(VmFaultReason::VM_FAULT_SIGSEGV) { - Signal::SIGSEGV + let (sig, code) = if fault.contains(VmFaultReason::VM_FAULT_SIGSEGV) { + (Signal::SIGSEGV, SigCode::Segv(SegvCode::MapErr)) } else { // 包括 SIGBUS / OOM / HWPOISON 等:目前统一 SIGBUS(后续可按 Linux 进一步细分) - Signal::SIGBUS + (Signal::SIGBUS, SigCode::Bus(BusCode::AdrErr)) }; let mut info = SigInfo::new( sig, 0, - SigCode::User, - SigType::Kill { - pid: ProcessManager::current_pid(), - uid: ProcessManager::current_pcb().cred().uid.data() as u32, - }, + code, + SigType::SigFault(SigFaultInfo { + addr: address.data(), + trapno: 14, // X86_TRAP_PF + }), ); let _ = sig.send_signal_info(Some(&mut info), ProcessManager::current_pid()); return; diff --git a/kernel/src/arch/x86_64/process/mod.rs b/kernel/src/arch/x86_64/process/mod.rs index 612bbc5cd9..30e8c66415 100644 --- a/kernel/src/arch/x86_64/process/mod.rs +++ b/kernel/src/arch/x86_64/process/mod.rs @@ -232,6 +232,16 @@ impl ArchPCBInfo { self.gsbase } + /// 获取 FS 段选择器的值 + pub fn fs(&self) -> u16 { + self.fs.bits() + } + + /// 获取 GS 段选择器的值 + pub fn gs(&self) -> u16 { + self.gs.bits() + } + pub fn cr2_mut(&mut self) -> &mut usize { &mut self.cr2 } diff --git a/kernel/src/arch/x86_64/syscall/mod.rs b/kernel/src/arch/x86_64/syscall/mod.rs index 2710bea831..1ee4995515 100644 --- a/kernel/src/arch/x86_64/syscall/mod.rs +++ b/kernel/src/arch/x86_64/syscall/mod.rs @@ -1,15 +1,15 @@ use crate::{ arch::{ - ipc::signal::X86_64SignalArch, + ipc::signal::{Signal, X86_64SignalArch}, syscall::nr::{SYS_ARCH_PRCTL, SYS_RT_SIGRETURN}, CurrentIrqArch, }, exception::InterruptArch, - ipc::signal_types::SignalArch, + ipc::signal_types::{ChldCode, SignalArch}, libs::align::SafeForZero, mm::VirtAddr, - process::ProcessManager, - syscall::{Syscall, SYS_SCHED}, + process::{ProcessFlags, ProcessManager, PtraceStopReason}, + syscall::Syscall, }; use log::debug; use system_error::SystemError; @@ -56,29 +56,59 @@ macro_rules! syscall_return { debug!("syscall return:pid={:?},ret= {:?}\n", pid, ret as isize); } - unsafe { - CurrentIrqArch::interrupt_disable(); - } + // 无条件调用统一的退出处理函数,由它来检查和处理所有工作 + unsafe { crate::exception::entry::syscall_exit_to_user_mode($regs) }; + + // 返回到汇编层,汇编代码将执行 iret/sysret 返回用户态 return; }}; } #[no_mangle] pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) { - // 系统调用进入时,把系统调用号存入errcode字段,以便在syscall_handler退出后,仍能获取到系统调用号 - frame.errcode = frame.rax; - let syscall_num = frame.rax as usize; - // 防止sys_sched由于超时无法退出导致的死锁 - if syscall_num == SYS_SCHED { - unsafe { - CurrentIrqArch::interrupt_disable(); - } - } else { - unsafe { - CurrentIrqArch::interrupt_enable(); - } + // 系统调用进入时,把系统调用号存入 orig_rax 字段 + // 用于恢复被 ptrace 修改的系统调用号 + // frame.orig_rax = frame.rax; + + // 系统调用进入时,始终开中断 + unsafe { + CurrentIrqArch::interrupt_enable(); + }; + + mfence(); + let pid = ProcessManager::current_pcb().raw_pid(); + let show = false; + if show { + debug!("syscall: pid: {:?}, num={:?}\n", pid, frame.rax as usize); } + let pcb = ProcessManager::current_pcb(); + + // 注意:必须同时检查 PTRACED 和 TRACE_SYSCALL 标志 + let needs_syscall_trace = pcb.flags().contains(ProcessFlags::TRACE_SYSCALL); + if needs_syscall_trace { + // 设置停止原因 + pcb.ptrace_state_mut().stop_reason = PtraceStopReason::SyscallEntry; + // 构造 syscall entry 的 exit_code: 0x80 | SIGTRAP + // 0x80 表示 PTRACE_SYSCALL_TRACE (PT_TRACESYSGOOD) + let exit_code = 0x80 | Signal::SIGTRAP as usize; + + // 同步调用 ptrace_stop,阻塞直到 tracer 唤醒 + // 这与 Linux 6.6.21 的 ptrace_report_syscall_entry() 行为一致 + let _signr = pcb.ptrace_stop(exit_code, ChldCode::Trapped, None); + + // ptrace_stop 返回后,检查 tracer 是否注入了信号 + // 如果有致命信号,需要立即处理 + // TODO: 处理注入信号 + } + + // 按照 Linux 6.6.21 kernel/entry/common.c:78 的模式: + // /* Either of the above might have changed the syscall number */ + // syscall = syscall_get_nr(current, regs); + // + // 关键:必须在 ptrace_stop 返回**之后**重新读取系统调用号和参数! + // 因为 tracer 可能在我们睡眠时修改了寄存器。 + let syscall_num = frame.rax as usize; let args = [ frame.rdi as usize, frame.rsi as usize, @@ -87,18 +117,9 @@ pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) { frame.r8 as usize, frame.r9 as usize, ]; - mfence(); - let pid = ProcessManager::current_pcb().raw_pid(); - let show = false; - // let show = if syscall_num != SYS_SCHED && pid.data() >= 9{ - // true - // } else { - // false - // }; - if show { - debug!("syscall: pid: {:?}, num={:?}\n", pid, syscall_num); - } + // 保存系统调用入口信息(用于 PTRACE_GETSIGINFO) + pcb.on_syscall_entry(syscall_num, &args); // Arch specific syscall match syscall_num { @@ -120,8 +141,34 @@ pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) { _ => {} } let mut syscall_handle = || -> u64 { - Syscall::catch_handle(syscall_num, &args, frame) - .unwrap_or_else(|e| e.to_posix_errno() as usize) as u64 + let pcb = ProcessManager::current_pcb(); + let result = Syscall::catch_handle(syscall_num, &args, frame) + .unwrap_or_else(|e| e.to_posix_errno() as usize) as u64; + + // 先将结果写入 frame.rax,这样 tracer 可以通过 PTRACE_POKEUSER 修改返回值 + frame.rax = result; + + // 按照 Linux 6.6.21 的同步 ptrace 模型处理系统调用出口 + // 在 syscall_exit_work() 中调用 ptrace_report_syscall_exit() + if pcb.flags().contains(ProcessFlags::TRACE_SYSCALL) { + // 设置停止原因 + pcb.ptrace_state_mut().stop_reason = crate::process::PtraceStopReason::SyscallExit; + + // 构造 syscall exit 的 exit_code: 0x80 | SIGTRAP + let exit_code = 0x80 | Signal::SIGTRAP as usize; + + // 同步调用 ptrace_stop,阻塞直到 tracer 唤醒 + // 这与 Linux 6.6.21 的 ptrace_report_syscall_exit() 行为一致 + let _signr = + pcb.ptrace_stop(exit_code, crate::ipc::signal_types::ChldCode::Trapped, None); + + // ptrace_stop 返回后,tracer 可能修改了 frame.rax + // 必须返回 frame.rax 而不是原始 result + // TODO: 处理注入信号 + } + + // 返回 frame.rax,包含 tracer 可能的修改 + frame.rax }; syscall_return!(syscall_handle(), frame, show); } diff --git a/kernel/src/exception/entry.rs b/kernel/src/exception/entry.rs index 1625544a6c..5d78b57c8a 100644 --- a/kernel/src/exception/entry.rs +++ b/kernel/src/exception/entry.rs @@ -1,56 +1,83 @@ use crate::{ - arch::{interrupt::TrapFrame, ipc::signal::Signal, CurrentSignalArch}, + arch::{interrupt::TrapFrame, ipc::signal::Signal, CurrentIrqArch, CurrentSignalArch}, + exception::InterruptArch, ipc::signal_types::SignalArch, process::{rseq::Rseq, ProcessFlags, ProcessManager}, + sched::{schedule, SchedMode}, }; -#[no_mangle] -unsafe extern "C" fn irqentry_exit(frame: &mut TrapFrame) { - if frame.is_from_user() { - irqentry_exit_to_user_mode(frame); - } -} - /// 退出到用户态之前,在这个函数内做最后的处理 /// /// # Safety /// -/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前, -/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。 -unsafe fn irqentry_exit_to_user_mode(frame: &mut TrapFrame) { - exit_to_user_mode_prepare(frame); -} +/// 由于此函数内可能会直接退出进程,在进入之前必须保证所有栈上的 Arc/Box 指针已被释,否则可能导致内存泄漏。 +unsafe fn exit_to_user_mode_loop(frame: &mut TrapFrame) { + loop { + // 必须在关中断下读取标志,防止竞态 + CurrentIrqArch::interrupt_disable(); -/// # Safety -/// -/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前, -/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。 -unsafe fn exit_to_user_mode_prepare(frame: &mut TrapFrame) { - let process_flags_work = *ProcessManager::current_pcb().flags(); - if !process_flags_work.exit_to_user_mode_work().is_empty() { - exit_to_user_mode_loop(frame, process_flags_work); - } -} + let pcb = ProcessManager::current_pcb(); + let flags = *pcb.flags(); -/// # Safety -/// -/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前, -/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。 -unsafe fn exit_to_user_mode_loop(frame: &mut TrapFrame, mut process_flags_work: ProcessFlags) { - while !process_flags_work.exit_to_user_mode_work().is_empty() { - // 优先处理 rseq,因为信号递送会保存 trapframe 到 sigframe - // rseq 的 IP fixup 必须在信号递送之前完成 - if process_flags_work.contains(ProcessFlags::NEED_RSEQ) + // 筛选出需要处理的标志位(信号、调度、RSEQ 等) + let work = flags.exit_to_user_mode_work(); + if work.is_empty() { + // 无工作,保持关中断返回 + break; + } + + // 有工作,必须开中断处理 + // 释放 PCB 引用,避免持有自旋锁或导致引用计数问题 + drop(pcb); + CurrentIrqArch::interrupt_enable(); + + // 处理调度 (Linux: _TIF_NEED_RESCHED),无论是 syscall 还是 irq 返回,都必须检查抢占! + if flags.contains(ProcessFlags::NEED_SCHEDULE) { + schedule(SchedMode::SM_NONE); + } + + // 处理信号 (Linux: _TIF_SIGPENDING) + // Linux 通常先处理信号。如果信号导致了栈帧改变(跳去 Handler),RSEQ 的处理将推迟到 Handler 返回时。 + if flags.contains(ProcessFlags::HAS_PENDING_SIGNAL) { + CurrentSignalArch::do_signal_or_restart(frame); + } + + // 处理 RSEQ / Notify Resume (Linux: _TIF_NOTIFY_RESUME) + if flags.contains(ProcessFlags::NEED_RSEQ) && Rseq::handle_notify_resume(Some(frame)).is_err() { - // rseq 处理失败,发送 SIGSEGV let pcb = ProcessManager::current_pcb(); let _ = crate::ipc::kill::send_signal_to_pcb(pcb, Signal::SIGSEGV); } - if process_flags_work.contains(ProcessFlags::HAS_PENDING_SIGNAL) { - unsafe { CurrentSignalArch::do_signal_or_restart(frame) }; - } - process_flags_work = *ProcessManager::current_pcb().flags(); + // 循环继续,再次关中断检查是否有新工作产生 + } + + // 循环结束,所有工作已完成,保持关中断状态返回到汇编层 + // 汇编代码将执行 iret/sysret 返回用户态 +} + +/// 从系统调用返回到用户态的统一退出路径 +/// 对应 Linux 6.6.21 arch/x86/entry/common.c::syscall_return_to_user_mode +/// +/// # Safety +/// +/// 由于此函数内可能会直接退出进程,在进入之前必须保证所有栈上的 Arc/Box 指针已被释放 +#[no_mangle] +pub unsafe extern "C" fn syscall_exit_to_user_mode(frame: &mut TrapFrame) { + // 这一步必须在 flags 检查之外进行,因为它是一个独立的安全检查 + Rseq::rseq_syscall_check(frame); + // 系统调用直接调用统一循环 + exit_to_user_mode_loop(frame); +} + +/// 从中断/异常返回到用户态的退出路径 +/// 对应 Linux 6.6.21 kernel/entry/common.c::irqentry_exit_to_user_mode +/// 用于处理非系统调用的返回路径(如中断返回) +#[no_mangle] +pub unsafe extern "C" fn irqentry_exit(frame: &mut TrapFrame) { + // 只有返回用户态时才处理 + if frame.is_from_user() { + exit_to_user_mode_loop(frame); } } diff --git a/kernel/src/filesystem/procfs/pid/stat.rs b/kernel/src/filesystem/procfs/pid/stat.rs index 7717c3a78a..effc921b7a 100644 --- a/kernel/src/filesystem/procfs/pid/stat.rs +++ b/kernel/src/filesystem/procfs/pid/stat.rs @@ -48,8 +48,9 @@ fn state_to_linux_char(state: ProcessState) -> char { 'D' } } - ProcessState::Stopped => 'T', + ProcessState::Stopped(_) => 'T', ProcessState::Exited(_) => 'Z', + _ => 'X', } } diff --git a/kernel/src/ipc/generic_signal.rs b/kernel/src/ipc/generic_signal.rs index e8eea6ce76..c2b20fe787 100644 --- a/kernel/src/ipc/generic_signal.rs +++ b/kernel/src/ipc/generic_signal.rs @@ -5,11 +5,11 @@ use num_traits::FromPrimitive; use crate::ipc::signal_types::SignalFlags; use crate::{ arch::{ - ipc::signal::{SigSet, Signal, MAX_SIG_NUM}, + ipc::signal::{SigFlags, SigSet, Signal, MAX_SIG_NUM}, CurrentIrqArch, }, exception::InterruptArch, - process::ProcessManager, + process::{ProcessFlags, ProcessManager}, sched::{schedule, SchedMode}, }; @@ -423,39 +423,61 @@ fn sig_terminate_dump(sig: Signal) { // TODO 生成 coredump 文件 } -/// 信号默认处理函数——暂停进程 +/// 信号默认处理函数——暂停进程 (SIGSTOP/SIGTSTP/SIGTTIN/SIGTTOU) +/// fn sig_stop(sig: Signal) { - // 在接收者上下文设置停止标志,并让当前任务进入 Stopped - let guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; - { - let pcb = ProcessManager::current_pcb(); - // 标记停止事件,供 waitid(WSTOPPED) 可见 - pcb.sighand().flags_insert(SignalFlags::CLD_STOPPED); - pcb.sighand().flags_insert(SignalFlags::STOP_STOPPED); + let pcb = ProcessManager::current_pcb(); + + // ===== Ptrace 进程的特殊处理 ===== + // 被 ptrace 的进程由 tracer 控制其状态(TASK_TRACED),不进入标准的 TASK_STOPPED + // 如果执行到这里,说明 ptrace_signal 已经在 do_signal 中处理过该信号 + // tracer 决定将信号注入给 tracee,但这不意味着 tracee 要再次停止 + // 直接返回,不做任何操作 + if pcb.flags().contains(ProcessFlags::PTRACED) { + return; } - ProcessManager::mark_stop().unwrap_or_else(|e| { + + // ===== 非 ptrace 进程的 Group Stop 逻辑 ===== + // 标记停止事件,供 waitid(WSTOPPED) 可见 + pcb.sighand().flags_insert(SignalFlags::CLD_STOPPED); + pcb.sighand().flags_insert(SignalFlags::STOP_STOPPED); + + // 切换进程状态为 Stopped 并调度 + let guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + ProcessManager::mark_stop(sig).unwrap_or_else(|e| { log::error!( "sleep error :{:?},failed to sleep process :{:?}, with signal :{:?}", e, - ProcessManager::current_pcb().pid(), + pcb.pid(), sig ); }); drop(guard); - log::debug!( - "sig_stop: pid={:?} entered Stopped; notifying parent and scheduler", - ProcessManager::current_pcb().raw_pid() - ); + // 向父进程报告 SIGCHLD 并唤醒父进程可能阻塞的 wait let pcb = ProcessManager::current_pcb(); if let Some(parent) = pcb.parent_pcb() { - let _ = crate::ipc::kill::send_signal_to_pcb(parent.clone(), Signal::SIGCHLD); + // 检查父进程是否设置了 SA_NOCLDSTOP + let should_notify = { + let sighand = parent.sighand(); + sighand + .handler(Signal::SIGCHLD) + .map(|sa| !sa.flags().contains(SigFlags::SA_NOCLDSTOP)) + .unwrap_or(false) + }; + + if should_notify { + let _ = crate::ipc::kill::send_signal_to_pcb(parent.clone(), Signal::SIGCHLD); + } + // 无论是否发送 SIGCHLD,都需要唤醒父进程的 wait 队列,因为 waitpid(WUNTRACED) 可能需要返回 parent.wake_all_waiters(); } // 唤醒等待在该子进程等待队列上的等待者 pcb.wake_all_waiters(); + // 让出 CPU 进入睡眠 schedule(SchedMode::SM_NONE); } + /// 信号默认处理函数——继续进程 fn sig_continue(_sig: Signal) { // 默认处理改为最小化:仅在已处于 Stopped 时唤醒停止,让进程继续运行。 diff --git a/kernel/src/ipc/kill.rs b/kernel/src/ipc/kill.rs index ea787c8ca4..91bab43b99 100644 --- a/kernel/src/ipc/kill.rs +++ b/kernel/src/ipc/kill.rs @@ -1,8 +1,14 @@ -use crate::ipc::signal_types::{SigInfo, SigType}; -use crate::ipc::syscall::sys_kill::check_signal_permission_pcb_with_sig; -use crate::process::pid::{Pid, PidType}; -use crate::process::{ProcessControlBlock, ProcessManager, RawPid}; -use crate::{arch::ipc::signal::Signal, ipc::signal_types::SigCode}; +use crate::{ + arch::ipc::signal::Signal, + ipc::{ + signal_types::{OriginCode, SigCode, SigInfo, SigType}, + syscall::sys_kill::check_kill_permission, + }, + process::{ + pid::{Pid, PidType}, + ProcessControlBlock, ProcessManager, RawPid, + }, +}; use alloc::sync::Arc; use alloc::vec::Vec; use core::sync::atomic::compiler_fence; @@ -15,7 +21,7 @@ pub fn send_signal_to_pid(pid: RawPid, sig: Signal) -> Result Result, sig: Signal) -> Result, sig: Signal) -> Result Result<(), SystemError> { let mut info = SigInfo::new( sig, 0, - SigCode::Kernel, + SigCode::Origin(OriginCode::Kernel), SigType::Kill { pid: RawPid::new(0), uid: 0, @@ -159,13 +160,13 @@ impl Signal { // signal的信息为空 if let Some(ref siginfo) = info { - force_send = matches!(siginfo.sig_code(), SigCode::Kernel); + force_send = matches!(siginfo.sig_code(), SigCode::Origin(OriginCode::Kernel)); } else { // todo: 判断signal是否来自于一个祖先进程的namespace,如果是,则强制发送信号 //详见 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/signal.c?r=&mo=32170&fi=1220#1226 } - let prepare_result = self.prepare_sianal(pcb.clone(), force_send); + let prepare_result = self.prepare_signal(pcb.clone(), force_send); if !prepare_result { return Ok(0); } @@ -204,7 +205,7 @@ impl Signal { SigInfo::new( *self, 0, - SigCode::User, + SigCode::Origin(OriginCode::User), SigType::Kill { pid: sender_pid, uid: sender_uid, @@ -282,9 +283,6 @@ impl Signal { // ===== 寻找需要wakeup的目标进程 ===== // 备注:由于当前没有进程组的概念,每个进程只有1个对应的线程,因此不需要通知进程组内的每个进程。 // todo: 当引入进程组的概念后,需要完善这里,使得它能寻找一个目标进程来唤醒,接着执行信号处理的操作。 - - // let _signal = pcb.sig_struct(); - let target_pcb: Option>; // 根据信号类型选择添加到线程级 pending 还是进程级 shared_pending @@ -310,7 +308,19 @@ impl Signal { crate::ipc::signalfd::notify_signalfd_for_pcb(&pcb, *self); // 判断目标进程是否应该被唤醒以立即处理该信号 let wants_signal = self.wants_signal(pcb.clone()); - if wants_signal { + + // 对于被 ptrace 的进程,如果收到 SIGSTOP 信号,需要特殊处理 + let is_ptrace_sigstop = + pcb.flags().contains(ProcessFlags::PTRACED) && *self == Signal::SIGSTOP; + + let should_wake = if is_ptrace_sigstop { + // 对于 ptrace 进程的 SIGSTOP,总是需要唤醒 + true + } else { + wants_signal + }; + + if should_wake { target_pcb = Some(pcb.clone()); } else if pt == PidType::PID { /* @@ -444,10 +454,10 @@ impl Signal { } drop(sig_info); - // TODO: ptrace 拦截被忽略的信号 - // if pcb.flags().contains(ProcessFlags::PTRACED) && *self != Signal::SIGKILL { - // return false; - // } + // ptrace 拦截被忽略的信号 + if pcb.flags().contains(ProcessFlags::PTRACED) && *self != Signal::SIGKILL { + return false; + } Self::sig_task_ignored(self, pcb, force) } @@ -465,7 +475,7 @@ impl Signal { /// - `true` 能够发送信号 /// /// - `false` 不能发送信号 - fn prepare_sianal(&self, pcb: Arc, _force: bool) -> bool { + fn prepare_signal(&self, pcb: Arc, _force: bool) -> bool { // 统一从线程组组长的 ThreadInfo 中获取完整线程列表。 // 注意:当前 sighand 共享在 CLONE_THREAD 线程组内,因此标志位操作仍然只需要对共享 sighand 做一次。 let thread_group_leader = { @@ -495,6 +505,20 @@ impl Signal { let flush: SigSet; if !(self.into_sigset() & SIG_KERNEL_STOP_MASK).is_empty() { flush = Signal::SIGCONT.into_sigset(); + + // 对于 ptrace 进程,SIGSTOP 应该在 do_signal 中由 ptrace_signal 处理 + if pcb.flags().contains(ProcessFlags::PTRACED) { + // 只清理 SIGCONT,不执行停止操作 + thread_group_leader + .sighand() + .shared_pending_flush_by_mask(&flush); + for_each_thread_in_group(&mut |t| { + t.sig_info_mut().sig_pending_mut().flush_by_mask(&flush); + }); + return !self.sig_ignored(&pcb, _force); + } + + // 非ptrace进程的正常SIGSTOP处理:立即停止并通知父进程 // Stop 类信号:清理 SIGCONT(共享 + 各线程私有 pending) thread_group_leader .sighand() @@ -513,12 +537,15 @@ impl Signal { // 线程组 stop:对组内所有线程置为 Stopped,保证 SIGSTOP 对整个线程组生效。 for_each_thread_in_group(&mut |t| { - let _ = ProcessManager::stop_task(t); + let _ = ProcessManager::stop_task(t, *self); }); if let Some(parent) = pcb.parent_pcb() { - let _ = crate::ipc::kill::send_signal_to_pcb(parent.clone(), Signal::SIGCHLD); + let _ = send_signal_to_pcb(parent.clone(), Signal::SIGCHLD); parent.wake_all_waiters(); + } else if let Some(real_parent) = pcb.real_parent_pcb() { + let _ = send_signal_to_pcb(real_parent.clone(), Signal::SIGCHLD); + real_parent.wake_all_waiters(); } // 唤醒等待在该子进程/线程上的等待者 thread_group_leader.wake_all_waiters(); @@ -527,7 +554,7 @@ impl Signal { }); // SIGSTOP 是 kernel-only stop 信号:其效果是把线程组置为 stopped 并通知父进程, - // 不应作为“可传递到用户态”的 pending 信号继续入队。 + // 不应作为"可传递到用户态"的 pending 信号继续入队。 // 否则在 SIGCONT 后可能错误地以 EINTR/ERESTART* 形式打断正在执行的系统调用(gVisor sigstop_test 即依赖这一点)。 if *self == Signal::SIGSTOP { return false; @@ -571,8 +598,11 @@ impl Signal { .sighand() .flags_remove(SignalFlags::STOP_STOPPED); if let Some(parent) = pcb.parent_pcb() { - let _ = crate::ipc::kill::send_signal_to_pcb(parent.clone(), Signal::SIGCHLD); + let _ = send_signal_to_pcb(parent.clone(), Signal::SIGCHLD); parent.wake_all_waiters(); + } else if let Some(real_parent) = pcb.real_parent_pcb() { + let _ = send_signal_to_pcb(real_parent.clone(), Signal::SIGCHLD); + real_parent.wake_all_waiters(); } // 唤醒等待在该子进程上的等待者 thread_group_leader.wake_all_waiters(); @@ -599,7 +629,7 @@ impl Signal { /// - `_guard` 信号结构体锁守卫,来保证信号结构体已上锁 /// - `fatal` 表明这个信号是不是致命的(会导致进程退出) #[inline] -fn signal_wake_up(pcb: Arc, fatal: bool) { +pub fn signal_wake_up(pcb: Arc, fatal: bool) { // 如果是 fatal 的话就唤醒 stop 和 block 的进程来响应,因为唤醒后就会终止 // 如果不是 fatal 的就只唤醒 stop 的进程来响应 // debug!("signal_wake_up"); diff --git a/kernel/src/ipc/signal_types.rs b/kernel/src/ipc/signal_types.rs index 0fd66db822..44602ef16e 100644 --- a/kernel/src/ipc/signal_types.rs +++ b/kernel/src/ipc/signal_types.rs @@ -19,6 +19,43 @@ use crate::{ #[derive(Copy, Debug, Clone, PartialEq, Eq)] #[repr(i32)] pub enum SigCode { + /// 描述通用来源 + Origin(OriginCode), + /// 描述 SIGCHLD 的具体原因 + SigChld(ChldCode), + /// 描述 SIGTRAP 的具体原因 (TRAP_*) + Trap(TrapCode), + /// 描述 SIGILL 的原因 + Ill(IllCode), + /// 描述 SIGFPE 的原因 + Fpe(FpeCode), + /// 描述 SIGSEGV 的原因 + Segv(SegvCode), + /// 描述 SIGBUS 的原因 + Bus(BusCode), + /// 其他未分类的 si_code (如 ptrace event 生成的 (event<<8)|SIGTRAP) + Raw(i32), +} + +impl From for i32 { + fn from(code: SigCode) -> i32 { + match code { + SigCode::Origin(origin) => origin as i32, + SigCode::SigChld(chld) => chld as i32, + SigCode::Trap(trap) => trap as i32, + SigCode::Ill(ill) => ill as i32, + SigCode::Fpe(fpe) => fpe as i32, + SigCode::Segv(segv) => segv as i32, + SigCode::Bus(bus) => bus as i32, + SigCode::Raw(raw) => raw, + } + } +} + +/// 信号的通用来源码 (SI_*) +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +pub enum OriginCode { /// sent by kill, sigsend, raise User = 0, /// sent by kernel from somewhere @@ -37,17 +74,110 @@ pub enum SigCode { Tkill = -6, } +/// SIGCHLD 专用原因码 (CLD_*) +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +pub enum ChldCode { + Exited = 1, + Killed = 2, + Dumped = 3, + Trapped = 4, + Stopped = 5, + Continued = 6, +} + +/// SIGTRAP si_codes (TRAP_*) +/// 用于区分不同类型的陷阱/断点 +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +#[allow(dead_code)] +pub enum TrapCode { + /// process breakpoint - 断点触发 + Brkpt = 1, + /// process trace trap - ptrace 单步执行 + Trace = 2, + /// process taken branch trap - 分支跟踪 + Branch = 3, + /// hardware breakpoint/watchpoint - 硬件断点 + Hwbkpt = 4, + /// undiagnosed trap - 未诊断的陷阱 + Unk = 5, + /// perf event with sigtrap=1 - 性能事件 + Perf = 6, +} + +/// SIGILL/SIGFPE/SIGSEGV/SIGBUS 的原因码 (ILL_*, FPE_*, SEGV_*, BUS_*) +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +pub enum IllCode { + /// 非法操作码 (Illegal Opcode) + IllOpC = 1, + /// 非法操作数 (Illegal Operand) + IllOpN = 2, + /// 非法寻址模式 (Illegal Addressing Mode) + IllAdr = 3, + /// 非法陷阱 (Illegal Trap) + IllTrp = 4, + /// 特权操作码 (Privileged Opcode) + PrvOpC = 5, + /// 特权寄存器 (Privileged Register) + PrvReg = 6, + /// 协处理器错误 (Coprocessor Error) + CoProc = 7, + /// 内部堆栈错误 (Internal Stack Error) + BadStk = 8, +} + +/// SIGFPE si_codes +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +#[allow(dead_code)] +pub enum FpeCode { + IntDiv = 1, /* integer divide by zero */ + IntOvf = 2, /* integer overflow */ + FltDiv = 3, /* floating point divide by zero */ + FltOvf = 4, /* floating point overflow */ + FltUnd = 5, /* floating point underflow */ + FltRes = 6, /* floating point inexact result */ + FltInv = 7, /* floating point invalid operation */ + FltSub = 8, /* subscript out of range */ +} + +/// SIGSEGV si_codes +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +#[allow(clippy::enum_variant_names)] +#[allow(dead_code)] +pub enum SegvCode { + MapErr = 1, /* address not mapped to object */ + AccErr = 2, /* invalid permissions for mapped object */ + BndErr = 3, /* failed address bound checks */ + PkuErr = 4, /* access was denied by memory protection keys */ +} + +/// SIGBUS si_codes +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +#[repr(i32)] +#[allow(dead_code)] +pub enum BusCode { + AdrAln = 1, /* invalid address alignment */ + AdrErr = 2, /* non-existent physical address */ + ObjErr = 3, /* object specific hardware error */ + MceErrAr = 4, /* hardware memory error consumed on a machine check: action required */ + MceErrAo = 5, /* hardware memory error detected in process but not consumed: action optional */ +} + impl SigCode { pub fn try_from_i32(x: i32) -> Option { match x { - 0 => Some(Self::User), - 0x80 => Some(Self::Kernel), - -1 => Some(Self::Queue), - -2 => Some(Self::Timer), - -3 => Some(Self::Mesgq), - -4 => Some(Self::AsyncIO), - -5 => Some(Self::SigIO), - -6 => Some(Self::Tkill), + 0 => Some(Self::Origin(OriginCode::User)), + 0x80 => Some(Self::Origin(OriginCode::Kernel)), + -1 => Some(Self::Origin(OriginCode::Queue)), + -2 => Some(Self::Origin(OriginCode::Timer)), + -3 => Some(Self::Origin(OriginCode::Mesgq)), + -4 => Some(Self::Origin(OriginCode::AsyncIO)), + -5 => Some(Self::Origin(OriginCode::SigIO)), + -6 => Some(Self::Origin(OriginCode::Tkill)), _ => None, } } @@ -498,8 +628,8 @@ impl SigInfo { match self.sig_type { SigType::Kill { pid, uid } => PosixSigInfo { si_signo: self.sig_no, + si_code: i32::from(self.sig_code), si_errno: self.errno, - si_code: self.sig_code as i32, _sifields: PosixSiginfoFields { _kill: PosixSiginfoKill { si_pid: pid.data() as i32, @@ -510,7 +640,7 @@ impl SigInfo { SigType::Rt { pid, uid, sigval } => PosixSigInfo { si_signo: self.sig_no, si_errno: self.errno, - si_code: self.sig_code as i32, + si_code: i32::from(self.sig_code), _sifields: PosixSiginfoFields { _rt: PosixSiginfoRt { si_pid: pid.data() as i32, @@ -521,8 +651,8 @@ impl SigInfo { }, SigType::Alarm(pid) => PosixSigInfo { si_signo: self.sig_no, + si_code: i32::from(self.sig_code), si_errno: self.errno, - si_code: self.sig_code as i32, _sifields: PosixSiginfoFields { _timer: PosixSiginfoTimer { si_tid: pid.data() as i32, @@ -538,7 +668,7 @@ impl SigInfo { } => PosixSigInfo { si_signo: self.sig_no, si_errno: self.errno, - si_code: self.sig_code as i32, + si_code: i32::from(self.sig_code), _sifields: PosixSiginfoFields { _timer: PosixSiginfoTimer { si_tid: timerid, @@ -547,6 +677,33 @@ impl SigInfo { }, }, }, + SigType::SigFault(sig_fault_info) => PosixSigInfo { + si_signo: self.sig_no, + si_errno: self.errno, + si_code: i32::from(self.sig_code), + _sifields: PosixSiginfoFields { + _sigfault: PosixSiginfoSigfault { + si_addr: sig_fault_info.addr as u64, + si_addr_lsb: 0, + si_band: 0, + si_fd: 0, + }, + }, + }, + SigType::SigChld(sig_chld_info) => PosixSigInfo { + si_signo: self.sig_no, + si_errno: self.errno, + si_code: i32::from(self.sig_code), + _sifields: PosixSiginfoFields { + _sigchld: PosixSiginfoSigchld { + si_pid: sig_chld_info.pid.data() as i32, + si_uid: sig_chld_info.uid as u32, + si_status: sig_chld_info.status, + si_utime: sig_chld_info.utime as i64, + si_stime: sig_chld_info.stime as i64, + }, + }, + }, } } @@ -599,12 +756,28 @@ pub enum SigType { // 后续完善下列中的具体字段 // Timer, // Rt, - // SigChild, - // SigFault, + SigFault(SigFaultInfo), + SigChld(SigChldInfo), // SigPoll, // SigSys, } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct SigFaultInfo { + pub addr: usize, + pub trapno: i32, + // 对于某些架构,可能有额外的字段 +} + +#[derive(Copy, Clone, Debug)] +pub struct SigChldInfo { + pub pid: RawPid, + pub uid: usize, + pub status: i32, + pub utime: u64, + pub stime: u64, +} + impl SigInfo { pub fn new(sig: Signal, sig_errno: i32, sig_code: SigCode, sig_type: SigType) -> Self { Self { @@ -645,7 +818,7 @@ impl SigPending { for info in self.queue.q.iter_mut() { // bump(0) 作为“匹配探测”,不会改变值 if info.is_signal(sig) - && info.sig_code() == SigCode::Timer + && info.sig_code() == SigCode::Origin(OriginCode::Timer) && info.bump_posix_timer_overrun(timerid, 0) { return true; @@ -658,7 +831,7 @@ impl SigPending { pub fn posix_timer_bump_overrun(&mut self, sig: Signal, timerid: i32, bump: i32) -> bool { for info in self.queue.q.iter_mut() { if info.is_signal(sig) - && info.sig_code() == SigCode::Timer + && info.sig_code() == SigCode::Origin(OriginCode::Timer) && info.bump_posix_timer_overrun(timerid, bump) { return true; @@ -671,7 +844,7 @@ impl SigPending { pub fn posix_timer_reset_overrun(&mut self, sig: Signal, timerid: i32) -> bool { for info in self.queue.q.iter_mut() { if info.is_signal(sig) - && info.sig_code() == SigCode::Timer + && info.sig_code() == SigCode::Origin(OriginCode::Timer) && info.reset_posix_timer_overrun(timerid) { return true; @@ -726,9 +899,9 @@ impl SigPending { let mut ret = SigInfo::new( sig, 0, - SigCode::User, + SigCode::Origin(OriginCode::User), SigType::Kill { - pid: RawPid::from(0), + pid: RawPid::new(0), uid: 0, }, ); diff --git a/kernel/src/ipc/syscall/sys_kill.rs b/kernel/src/ipc/syscall/sys_kill.rs index afdeadbef7..ff726adf31 100644 --- a/kernel/src/ipc/syscall/sys_kill.rs +++ b/kernel/src/ipc/syscall/sys_kill.rs @@ -6,6 +6,7 @@ use core::ffi::c_int; use crate::arch::interrupt::TrapFrame; use crate::process::cred::CAPFlags; use crate::process::pid::Pid; +use crate::process::pid::PidType; use crate::process::ProcessControlBlock; use crate::syscall::table::FormattedSyscallParam; use crate::syscall::table::Syscall; @@ -93,7 +94,7 @@ impl PidConverter { /// - SIGCONT can be sent to any process in the same session /// /// 参考: https://man7.org/linux/man-pages/man2/kill.2.html -pub fn check_signal_permission_pcb_with_sig( +pub fn check_kill_permission( target: &Arc, sig: Option, ) -> Result<(), SystemError> { @@ -111,11 +112,12 @@ pub fn check_signal_permission_pcb_with_sig( return Ok(()); } + // 凭证检查 (kill_ok_by_cred) // Check if sender's UID matches target's UID or saved UID - if current_cred.euid == target_cred.uid - || current_cred.euid == target_cred.suid - || current_cred.uid == target_cred.uid + if current_cred.euid == target_cred.suid + || current_cred.euid == target_cred.uid || current_cred.uid == target_cred.suid + || current_cred.uid == target_cred.uid { return Ok(()); } @@ -141,7 +143,7 @@ pub fn check_signal_permission_pcb_with_sig( /// Check if the current process has permission to send a signal to the target process. /// (不带信号参数的兼容版本) pub fn check_signal_permission_pcb(target: &Arc) -> Result<(), SystemError> { - check_signal_permission_pcb_with_sig(target, None) + check_kill_permission(target, None) } /// Check if the current process has permission to send a signal to the target process. @@ -170,10 +172,40 @@ fn handle_null_signal(converter: &PidConverter) -> Result { Ok(0) } PidConverter::Pgid(pgid) => { - // For process groups, verify the group exists - // A more complete implementation could check all processes in the group - pgid.as_ref().ok_or(SystemError::ESRCH)?; - Ok(0) + // For process groups, verify the group exists and has at least one living process + let pg = pgid.as_ref().ok_or(SystemError::ESRCH)?; + // 检查进程组中是否有存活的进程 + let tasks: Vec> = pg.tasks_iter(PidType::PGID).collect(); + if tasks.is_empty() { + return Err(SystemError::ESRCH); + } + + let mut any_success = false; + let mut last_err = SystemError::ESRCH; // 默认为 ESRCH,处理全员退出的情况 + for pcb in tasks { + // 如果进程已退出(僵尸),在 kill(0) 语义下通常视为不存在,跳过 + if pcb.is_exited() { + continue; + } + // 检查权限! + match check_signal_permission_pcb(&pcb) { + Ok(_) => { + // 只要有一个进程通过检查,整体就算成功 + any_success = true; + } + Err(e) => { + last_err = e; + } + } + } + if any_success { + Ok(0) + } else { + // 如果没有一个成功: + // 1. 可能是因为所有进程都是僵尸 (last_err 初始值 ESRCH) + // 2. 可能是因为所有活进程都无权发送 (last_err 被更新为 EPERM) + Err(last_err) + } } PidConverter::All => { // Signal 0 to all processes: just verify the syscall is valid diff --git a/kernel/src/ipc/syscall/sys_pidfd_sendsignal.rs b/kernel/src/ipc/syscall/sys_pidfd_sendsignal.rs index 46db0719f1..5c4d1b542e 100644 --- a/kernel/src/ipc/syscall/sys_pidfd_sendsignal.rs +++ b/kernel/src/ipc/syscall/sys_pidfd_sendsignal.rs @@ -1,5 +1,5 @@ use crate::arch::ipc::signal::Signal; -use crate::ipc::signal_types::SigCode; +use crate::ipc::signal_types::{OriginCode, SigCode}; use crate::ipc::signal_types::{SigInfo, SigType}; use alloc::string::ToString; use alloc::vec::Vec; @@ -69,7 +69,7 @@ impl Syscall for SysPidfdSendSignalHandle { let mut info = SigInfo::new( sig, 0, - SigCode::User, + SigCode::Origin(OriginCode::User), SigType::Kill { pid: sender_pid, uid: sender_uid, diff --git a/kernel/src/ipc/syscall/sys_restart.rs b/kernel/src/ipc/syscall/sys_restart.rs index f0cc4829d3..abe6d6eb7c 100644 --- a/kernel/src/ipc/syscall/sys_restart.rs +++ b/kernel/src/ipc/syscall/sys_restart.rs @@ -4,7 +4,7 @@ use crate::{ alloc::vec::Vec, arch::ipc::signal::Signal, arch::syscall::nr::SYS_RESTART_SYSCALL, - ipc::signal_types::SigCode, + ipc::signal_types::{OriginCode, SigCode}, process::{ProcessManager, RawPid}, syscall::table::{FormattedSyscallParam, Syscall}, }; @@ -25,7 +25,12 @@ pub(super) fn do_kernel_restart_syscall() -> Result { // 不应该走到这里,因此kill掉当前进程及同组的进程 let pid = RawPid::new(0); let sig = Signal::SIGKILL; - let mut info = SigInfo::new(sig, 0, SigCode::Kernel, SigType::Kill { pid, uid: 0 }); + let mut info = SigInfo::new( + sig, + 0, + SigCode::Origin(OriginCode::Kernel), + SigType::Kill { pid, uid: 0 }, + ); sig.send_signal_info(Some(&mut info), pid) .expect("Failed to kill "); diff --git a/kernel/src/ipc/syscall/sys_rt_sigqueueinfo.rs b/kernel/src/ipc/syscall/sys_rt_sigqueueinfo.rs index e289f1f7e6..e651a84654 100644 --- a/kernel/src/ipc/syscall/sys_rt_sigqueueinfo.rs +++ b/kernel/src/ipc/syscall/sys_rt_sigqueueinfo.rs @@ -5,8 +5,8 @@ use core::mem::size_of; use crate::arch::interrupt::TrapFrame; use crate::arch::syscall::nr::SYS_RT_SIGQUEUEINFO; -use crate::ipc::signal_types::{PosixSigInfo, SigCode, SigInfo, SigType}; -use crate::ipc::syscall::sys_kill::check_signal_permission_pcb_with_sig; +use crate::ipc::signal_types::{OriginCode, PosixSigInfo, SigCode, SigInfo, SigType}; +use crate::ipc::syscall::sys_kill::check_kill_permission; use crate::process::pid::PidType; use crate::syscall::table::{FormattedSyscallParam, Syscall}; use crate::syscall::user_access::UserBufferReader; @@ -63,7 +63,7 @@ impl Syscall for SysRtSigqueueinfoHandle { let target_pid = RawPid::from(pid as usize); let target = ProcessManager::find_task_by_vpid(target_pid).ok_or(SystemError::ESRCH)?; // 传入 Signal::INVALID/0 在权限检查里无特殊含义,这里用 None 即可 - check_signal_permission_pcb_with_sig(&target, None)?; + check_kill_permission(&target, None)?; return Ok(0); } @@ -83,16 +83,18 @@ impl Syscall for SysRtSigqueueinfoHandle { // Linux 6.6: do_rt_sigqueueinfo 权限校验 let si_code = user_info.si_code; - if (si_code >= 0 || si_code == (SigCode::Tkill as i32)) && current_pid != target_pid { + if (si_code >= 0 || si_code == (SigCode::Origin(OriginCode::Tkill)).into()) + && current_pid != target_pid + { return Err(SystemError::EPERM); } // 解析 si_code(未知 code:尽量保持“来自用户态(负值)”的语义,不 panic) let code_enum = SigCode::try_from_i32(si_code).unwrap_or({ if si_code < 0 { - SigCode::Queue + SigCode::Origin(OriginCode::Queue) } else { - SigCode::User + SigCode::Origin(OriginCode::User) } }); @@ -101,7 +103,7 @@ impl Syscall for SysRtSigqueueinfoHandle { // 根据信号来源/布局构造内核 SigInfo let sig_type = match code_enum { - SigCode::Queue => { + SigCode::Origin(OriginCode::Queue) => { let sigval = unsafe { user_info._sifields._rt.si_sigval }; SigType::Rt { pid: sender_pid, @@ -109,7 +111,7 @@ impl Syscall for SysRtSigqueueinfoHandle { sigval, } } - SigCode::Timer => { + SigCode::Origin(OriginCode::Timer) => { let timer = unsafe { user_info._sifields._timer }; SigType::PosixTimer { timerid: timer.si_tid, @@ -127,7 +129,7 @@ impl Syscall for SysRtSigqueueinfoHandle { // 查找目标进程并检查权限 let target = ProcessManager::find_task_by_vpid(target_pid).ok_or(SystemError::ESRCH)?; - check_signal_permission_pcb_with_sig(&target, Some(signal))?; + check_kill_permission(&target, Some(signal))?; // rt_sigqueueinfo 发送进程级信号,使用 PidType::TGID signal diff --git a/kernel/src/ipc/syscall/sys_tkill.rs b/kernel/src/ipc/syscall/sys_tkill.rs index 2e8d81eb2c..2d41764b3a 100644 --- a/kernel/src/ipc/syscall/sys_tkill.rs +++ b/kernel/src/ipc/syscall/sys_tkill.rs @@ -3,20 +3,17 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::ffi::c_int; -use crate::arch::interrupt::TrapFrame; -use crate::process::pid::PidType; -use crate::syscall::table::FormattedSyscallParam; -use crate::syscall::table::Syscall; use crate::{ - arch::{ipc::signal::Signal, syscall::nr::SYS_TKILL}, - ipc::signal_types::SigCode, - process::{ProcessControlBlock, ProcessManager, RawPid}, + arch::{interrupt::TrapFrame, ipc::signal::Signal, syscall::nr::SYS_TKILL}, + ipc::{ + signal_types::{OriginCode, SigCode, SigInfo, SigType}, + syscall::sys_kill::check_kill_permission, + }, + process::{pid::PidType, ProcessControlBlock, ProcessManager, RawPid}, + syscall::table::{FormattedSyscallParam, Syscall}, }; use system_error::SystemError; -use crate::ipc::signal_types::{SigInfo, SigType}; -use crate::process::cred::CAPFlags; - /// tkill系统调用处理器 pub struct SysTkillHandle; @@ -46,7 +43,7 @@ impl Syscall for SysTkillHandle { return Err(SystemError::EINVAL); } - // 调用通用实现,tgid=0表示不验证线程组 + // 调用通用实现,tgid=0表示不验证线程组 (tkill行为) do_tkill(0, tid, sig) } @@ -79,112 +76,61 @@ pub fn do_tkill(tgid: i32, tid: i32, sig: c_int) -> Result { let target_pcb = ProcessManager::find_task_by_vpid(RawPid::from(tid as usize)).ok_or(SystemError::ESRCH)?; - // 2. 验证线程组归属 (仅当tgid > 0时) + // 2. 验证线程组归属 (tgkill 逻辑) if tgid > 0 { let target_tgid = target_pcb.task_tgid_vnr().ok_or(SystemError::ESRCH)?; - if target_tgid != RawPid::from(tgid as usize) { return Err(SystemError::ESRCH); } } - // 3. 探测模式处理 (sig == 0) + // 3. 信号预处理 + // sig=0 用于探测,不产生实际 Signal 对象 + let signal = if sig == 0 { + Signal::INVALID + } else { + let s = Signal::from(sig); + if s == Signal::INVALID { + return Err(SystemError::EINVAL); + } + s + }; + + // 4. 权限检查 (sig=0 时也必须检查权限) + check_kill_permission(&target_pcb, Some(signal))?; + + // 5. 如果是探测模式,权限检查通过后直接返回 if sig == 0 { return Ok(0); } - // 4. 信号有效性检查 - let signal = Signal::from(sig); - if signal == Signal::INVALID { - return Err(SystemError::EINVAL); - } - - // 5. 权限检查 - check_kill_permission(signal, &target_pcb)?; - // 6. 发送信号 - send_signal_to_thread(signal, target_pcb) + send_signal_tkill(signal, target_pcb) } -/// 检查发送信号的权限 -/// -/// 根据Linux的权限检查规则: -/// 1. 发送者和接收者同用户,或者 -/// 2. 发送者具有CAP_KILL权限 -/// 3. 对于SIGKILL和SIGSTOP,需要更严格的权限检查 -/// -/// # 参数 -/// - `sig`: 要发送的信号 -/// - `target_pcb`: 目标进程控制块 -/// -/// # 返回值 -/// - `Ok(())`: 权限检查通过 -/// - `Err(SystemError::EPERM)`: 权限不足 -fn check_kill_permission( - sig: Signal, - target_pcb: &Arc, -) -> Result<(), SystemError> { - let current_pcb = ProcessManager::current_pcb(); - let current_cred = current_pcb.cred(); - let target_cred = target_pcb.cred(); - - // 检查是否具有CAP_KILL权限 - if current_cred.has_capability(CAPFlags::CAP_KILL) { - return Ok(()); - } - - // 检查是否为同一用户 - if current_cred.euid == target_cred.euid { - return Ok(()); - } - - // 对于SIGKILL和SIGSTOP,需要更严格的权限检查 - if matches!(sig, Signal::SIGKILL | Signal::SIGSTOP) { - if current_cred.has_capability(CAPFlags::CAP_KILL) { - return Ok(()); - } - return Err(SystemError::EPERM); - } - - // 其他信号,如果不同用户且没有CAP_KILL权限,则拒绝 - Err(SystemError::EPERM) -} - -/// 向指定线程发送信号 -/// -/// # 参数 -/// - `sig`: 要发送的信号 -/// - `target_pcb`: 目标进程控制块 -/// -/// # 返回值 -/// - `Ok(0)`: 成功 -/// - `Err(SystemError::ESRCH)`: 目标线程在投递过程中退出(竞态容忍) -fn send_signal_to_thread( +/// 发送 tkill 语义的信号 +fn send_signal_tkill( sig: Signal, target_pcb: Arc, ) -> Result { - // 构造SigInfo,使用SI_TKILL语义 let current_pcb = ProcessManager::current_pcb(); let current_tgid = current_pcb.task_tgid_vnr().unwrap_or(RawPid::from(0)); let sender_uid = current_pcb.cred().uid.data() as u32; let mut info = SigInfo::new( sig, - 0, - SigCode::Tkill, // 使用SI_TKILL语义 + 0, // errno + SigCode::Origin(OriginCode::Tkill), SigType::Kill { - pid: current_tgid, + pid: current_tgid, // 发送者的 TGID uid: sender_uid, }, ); - // 发送信号(tgkill 发送线程级信号,使用 PidType::PID) - let result = sig.send_signal_info_to_pcb(Some(&mut info), target_pcb, PidType::PID); - - // 处理竞态条件:如果目标线程在投递过程中退出,视为成功 - match result { - Err(SystemError::ESRCH) => Ok(0), // 竞态容忍 - other => other.map(|_| 0), + match sig.send_signal_info_to_pcb(Some(&mut info), target_pcb, PidType::PID) { + // 如果目标线程在投递过程中退出,Linux 视为成功(竞态容忍) + Err(SystemError::ESRCH) => Ok(0), + result => result.map(|_| 0), } } diff --git a/kernel/src/libs/printk.rs b/kernel/src/libs/printk.rs index 91af805cc3..a20da3129b 100644 --- a/kernel/src/libs/printk.rs +++ b/kernel/src/libs/printk.rs @@ -9,9 +9,13 @@ use crate::{ debug::klog::loglevel::{LogLevel, KERNEL_LOG_LEVEL}, driver::tty::{tty_driver::TtyOperation, virtual_terminal::vc_manager}, filesystem::procfs::{klog::LogMessage, kmsg::KMSG}, + libs::spinlock::SpinLock, time::PosixTimeSpec, }; +/// 全局串口输出锁,防止多进程并发输出导致字符交错 +static SERIAL_OUTPUT_LOCK: SpinLock<()> = SpinLock::new(()); + #[macro_export] macro_rules! print { ($($arg:tt)*) => ($crate::libs::printk::__printk(format_args!($($arg)*))); @@ -36,6 +40,8 @@ impl PrintkWriter { /// 并输出白底黑字 /// @param str: 要写入的字符 pub fn __write_string(&mut self, s: &str) { + // 获取全局串口输出锁,防止多进程并发输出导致字符交错 + let _guard = SERIAL_OUTPUT_LOCK.lock(); if let Some(current_vc) = vc_manager().current_vc() { // tty已经初始化了之后才输出到屏幕 let port = current_vc.port(); diff --git a/kernel/src/process/exit.rs b/kernel/src/process/exit.rs index a3ab20a7a4..cbf7c385c9 100644 --- a/kernel/src/process/exit.rs +++ b/kernel/src/process/exit.rs @@ -22,10 +22,15 @@ fn wstatus_to_waitid_status(raw_wstatus: i32) -> i32 { (raw_wstatus >> 8) & 0xff } +/// 获取子进程的 uid,用于填充 siginfo_t +fn get_child_uid(child_pcb: &Arc) -> u32 { + child_pcb.cred().uid.data() as u32 +} + /// 检查子进程的 exit_signal 是否与等待选项匹配 /// -/// 根据 Linux wait 语义: /// - __WALL: 等待所有子进程,忽略 exit_signal +/// - 如果子进程被 ptrace:总是可以等待,忽略 exit_signal /// - __WCLONE: 只等待"克隆"子进程(exit_signal != SIGCHLD) /// - 默认(无 __WCLONE): 只等待"正常"子进程(exit_signal == SIGCHLD) fn child_matches_wait_options(child_pcb: &Arc, options: WaitOption) -> bool { @@ -33,6 +38,10 @@ fn child_matches_wait_options(child_pcb: &Arc, options: Wai if options.contains(WaitOption::WALL) { return true; } + // 如果子进程被 ptrace,它总是可以被 wait + if child_pcb.is_traced() { + return true; + } let child_exit_signal = child_pcb.exit_signal.load(Ordering::SeqCst); let is_clone_child = child_exit_signal != Signal::SIGCHLD; @@ -54,11 +63,11 @@ pub struct KernelWaitOption<'a> { } #[derive(Debug, Clone)] -#[allow(dead_code)] pub struct WaitIdInfo { pub pid: RawPid, pub status: i32, pub cause: i32, + pub uid: u32, // 子进程的 uid,用于填充 siginfo_t } impl KernelWaitOption<'_> { @@ -85,7 +94,15 @@ pub fn kernel_wait4( // 构造参数 let mut kwo = KernelWaitOption::new(converter, options); + // 根据 Linux 6.6.21 语义: + // - wait4/waitpid 默认只等待 WEXITED(退出的子进程) + // - 只有显式传入 WUNTRACED (WSTOPPED) 时才报告停止的子进程 + // - __WALL 和 __WCLONE 是内部标志,不影响 WEXITED 的默认行为 + // + // 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/exit.c#1744 kwo.options.insert(WaitOption::WEXITED); + // 注意:绝不在这里默认添加 WSTOPPED! + kwo.ret_rusage = rusage_buf; // 调用do_wait,执行等待 @@ -142,7 +159,7 @@ pub fn kernel_waitid( si._sifields = PosixSiginfoFields { _sigchld: PosixSiginfoSigchld { si_pid: info.pid.data() as i32, - si_uid: 0, + si_uid: info.uid, // 从 WaitIdInfo 获取 uid si_status: info.status, si_utime: 0, si_stime: 0, @@ -184,20 +201,28 @@ fn is_eligible_child(child_pcb: &Arc, options: WaitOption) let current = ProcessManager::current_pcb(); let current_tgid = current.tgid; - // 获取子进程的 real_parent - let child_parent = match child_pcb.real_parent_pcb() { + // 如果子进程被ptrace,parent指向tracer,real_parent指向原始父进程 + // 我们需要使用real_parent来判断父子关系 + let child_parent = match child_pcb.parent_pcb() { Some(p) => p, - None => return false, + None => match child_pcb.real_parent_pcb() { + Some(p) => p, + None => { + return false; + } + }, }; if options.contains(WaitOption::WNOTHREAD) { // 带 __WNOTHREAD:只能等待当前线程自己创建的子进程 - // 检查子进程的 real_parent 是否就是当前线程 - Arc::ptr_eq(&child_parent, ¤t) + // 检查子进程的 parent 是否就是当前线程 + let result = Arc::ptr_eq(&child_parent, ¤t); + result } else { // 默认情况:线程组中的任何线程都可以等待同一线程组中任何线程创建的子进程 - // 检查子进程的 real_parent 的 tgid 是否与当前线程的 tgid 相同 - child_parent.tgid == current_tgid + // 检查子进程的 parent 的 tgid 是否与当前线程的 tgid 相同 + let result = child_parent.tgid == current_tgid; + result } } @@ -296,28 +321,23 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { get_thread_group_leader(¤t) }; loop { - if kwo.options.contains(WaitOption::WNOHANG) { - let rd_children = parent.children.read(); - if rd_children.is_empty() { - break Err(SystemError::ECHILD); - } else { - break Ok(0); - } - } - let mut scan_result: Option> = None; let mut echild = false; + let mut has_eligible_child = false; let wait_res = parent.wait_queue.wait_event_interruptible( || { let rd_childen = parent.children.read(); - if rd_childen.is_empty() { + let rd_ptraced = parent.ptraced_list.read(); + + if rd_childen.is_empty() && rd_ptraced.is_empty() { echild = true; return true; } let mut all_children_exited = true; let mut pid_to_release: Option = None; + // 首先遍历 children 列表(类似 Linux 的 do_wait_thread) for pid in rd_childen.iter() { let pcb = match ProcessManager::find_task_by_vpid(*pid) { Some(p) => p, @@ -331,22 +351,31 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { continue; } + // 找到了符合条件的子进程,标记为有可等待的子进程 + has_eligible_child = true; + let sched_guard = pcb.sched_info().inner_lock_read_irqsave(); let state = sched_guard.state(); if !pcb.is_zombie() { all_children_exited = false; } - if matches!(state, ProcessState::Stopped) + if matches!(state, ProcessState::Stopped(_)) && kwo.options.contains(WaitOption::WSTOPPED) && pcb.sighand().flags_contains(SignalFlags::CLD_STOPPED) { - let stopsig = Signal::SIGSTOP as i32; + // 从 ProcessState::Stopped 中提取实际的停止信号号 + let stopsig = if let ProcessState::Stopped(sig) = state { + (sig & 0x7f) as i32 + } else { + Signal::SIGSTOP as i32 + }; kwo.no_task_error = None; kwo.ret_info = Some(WaitIdInfo { pid: pcb.task_pid_vnr(), status: stopsig, cause: SigChildCode::Stopped.into(), + uid: get_child_uid(&pcb), }); kwo.ret_status = (stopsig << 8) | 0x7f; if !kwo.options.contains(WaitOption::WNOWAIT) { @@ -355,6 +384,25 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { scan_result = Some(Ok((*pid).into())); drop(sched_guard); break; + } else if matches!(state, ProcessState::TracedStopped(_)) { + // TracedStopped 状态类似于 Linux 的 TASK_TRACED + // 这是 ptrace 专用的停止状态,总是报告给 tracer + let stopsig = if let ProcessState::TracedStopped(sig) = state { + (sig & 0x7f) as i32 + } else { + Signal::SIGSTOP as i32 + }; + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: pcb.task_pid_vnr(), + status: stopsig, + cause: SigChildCode::Trapped.into(), + uid: get_child_uid(&pcb), + }); + kwo.ret_status = (stopsig << 8) | 0x7f; + scan_result = Some(Ok((*pid).into())); + drop(sched_guard); + break; } else if kwo.options.contains(WaitOption::WCONTINUED) && pcb.sighand().flags_contains(SignalFlags::CLD_CONTINUED) { @@ -363,6 +411,7 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { pid: pcb.task_pid_vnr(), status: Signal::SIGCONT as i32, cause: SigChildCode::Continued.into(), + uid: get_child_uid(&pcb), }); kwo.ret_status = 0xffff; if !kwo.options.contains(WaitOption::WNOWAIT) { @@ -384,6 +433,7 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { pid: pcb.task_pid_vnr(), status: status8, cause: SigChildCode::Exited.into(), + uid: get_child_uid(&pcb), }); tmp_child_pcb = Some(pcb.clone()); if !kwo.options.contains(WaitOption::WNOWAIT) { @@ -400,6 +450,76 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { drop(sched_guard); } drop(rd_childen); + + // 然后遍历 ptraced_list(类似 Linux 的 ptrace_do_wait) + // 被 ptrace 的进程不在 children 列表中,但可以被 tracer wait + if scan_result.is_none() { + for pid in rd_ptraced.iter() { + if scan_result.is_some() { + break; + } + let pcb = match ProcessManager::find_task_by_vpid(*pid) { + Some(p) => p, + None => continue, + }; + + // ptrace 的子进程总是可以被 wait,不需要检查 is_eligible_child + if !child_matches_wait_options(&pcb, kwo.options) { + continue; + } + + // 找到了符合条件的ptrace子进程,标记为有可等待的子进程 + has_eligible_child = true; + + let sched_guard = pcb.sched_info().inner_lock_read_irqsave(); + let state = sched_guard.state(); + if !state.is_exited() { + all_children_exited = false; + } + + if matches!(state, ProcessState::TracedStopped(_)) { + // TracedStopped 状态总是报告给 tracer + let stopsig = if let ProcessState::TracedStopped(sig) = state { + (sig & 0x7f) as i32 + } else { + Signal::SIGSTOP as i32 + }; + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: pcb.task_pid_vnr(), + status: stopsig, + cause: SigChildCode::Trapped.into(), + uid: get_child_uid(&pcb), + }); + kwo.ret_status = (stopsig << 8) | 0x7f; + scan_result = Some(Ok((*pid).into())); + drop(sched_guard); + break; + } else if state.is_exited() + && kwo.options.contains(WaitOption::WEXITED) + { + let raw = state.exit_code().unwrap() as i32; + kwo.ret_status = raw; + let status8 = wstatus_to_waitid_status(raw); + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: pcb.task_pid_vnr(), + status: status8, + cause: SigChildCode::Exited.into(), + uid: get_child_uid(&pcb), + }); + tmp_child_pcb = Some(pcb.clone()); + if !kwo.options.contains(WaitOption::WNOWAIT) { + pid_to_release = Some(pcb.raw_pid()); + } + scan_result = Some(Ok((*pid).into())); + drop(sched_guard); + break; + } + drop(sched_guard); + } + } + if let Some(pid) = pid_to_release { unsafe { ProcessManager::release(pid) }; } @@ -410,6 +530,11 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { echild = true; return true; } + // 如果没有找到任何符合条件的子进程,返回ECHILD + if !has_eligible_child { + echild = true; + return true; + } false }, None::, @@ -423,6 +548,11 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { if echild { break Err(SystemError::ECHILD); } + // 如果设置了WNOHANG,且没有符合条件的子进程准备好, + // 返回0表示"没有子进程准备好但存在可等待的子进程" + if kwo.options.contains(WaitOption::WNOHANG) { + break Ok(0); + } if ProcessManager::current_pcb().has_pending_signal_fast() { break Err(SystemError::ERESTARTSYS); } @@ -448,7 +578,9 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { let wait_res = parent.wait_queue.wait_event_interruptible( || { let rd_children = parent.children.read(); - if rd_children.is_empty() { + let rd_ptraced = parent.ptraced_list.read(); + + if rd_children.is_empty() && rd_ptraced.is_empty() { echild = true; return true; } @@ -489,16 +621,22 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { all_matching_children_exited = false; } - if matches!(state, ProcessState::Stopped) + if matches!(state, ProcessState::Stopped(_)) && kwo.options.contains(WaitOption::WSTOPPED) && pcb.sighand().flags_contains(SignalFlags::CLD_STOPPED) { - let stopsig = Signal::SIGSTOP as i32; + // 从 ProcessState::Stopped 中提取实际的停止信号号 + let stopsig = if let ProcessState::Stopped(sig) = state { + (sig & 0x7f) as i32 + } else { + Signal::SIGSTOP as i32 + }; kwo.no_task_error = None; kwo.ret_info = Some(WaitIdInfo { pid: pcb.task_pid_vnr(), status: stopsig, cause: SigChildCode::Stopped.into(), + uid: get_child_uid(&pcb), }); kwo.ret_status = (stopsig << 8) | 0x7f; if !kwo.options.contains(WaitOption::WNOWAIT) { @@ -515,6 +653,7 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { pid: pcb.task_pid_vnr(), status: Signal::SIGCONT as i32, cause: SigChildCode::Continued.into(), + uid: get_child_uid(&pcb), }); kwo.ret_status = 0xffff; if !kwo.options.contains(WaitOption::WNOWAIT) { @@ -536,6 +675,7 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { pid: pcb.task_pid_vnr(), status: status8, cause: SigChildCode::Exited.into(), + uid: get_child_uid(&pcb), }); tmp_child_pcb = Some(pcb.clone()); if !kwo.options.contains(WaitOption::WNOWAIT) { @@ -552,6 +692,85 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { drop(sched_guard); } drop(rd_children); + + if scan_result.is_none() { + for ptraced_pid in rd_ptraced.iter() { + if scan_result.is_some() { + break; + } + + let pcb = match ProcessManager::find_task_by_vpid(*ptraced_pid) { + Some(p) => p, + None => continue, + }; + + // 检查 PGID 是否匹配 + let child_pgrp = pcb.task_pgrp(); + let in_target_pgrp = match &child_pgrp { + Some(cp) => Arc::ptr_eq(cp, pgid), + None => false, + }; + if !in_target_pgrp { + continue; + } + + has_matching_child = true; + + // ptrace 的子进程总是可以被 wait,不需要检查 is_eligible_child + if !child_matches_wait_options(&pcb, kwo.options) { + continue; + } + + let sched_guard = pcb.sched_info().inner_lock_read_irqsave(); + let state = sched_guard.state(); + if !state.is_exited() { + all_matching_children_exited = false; + } + + if matches!(state, ProcessState::TracedStopped(_)) { + // TracedStopped 状态总是报告给 tracer + let stopsig = if let ProcessState::TracedStopped(sig) = state { + (sig & 0x7f) as i32 + } else { + Signal::SIGSTOP as i32 + }; + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: pcb.task_pid_vnr(), + status: stopsig, + cause: SigChildCode::Trapped.into(), + uid: get_child_uid(&pcb), + }); + kwo.ret_status = (stopsig << 8) | 0x7f; + scan_result = Some(Ok((*ptraced_pid).into())); + drop(sched_guard); + break; + } else if state.is_exited() + && kwo.options.contains(WaitOption::WEXITED) + { + let raw = state.exit_code().unwrap() as i32; + kwo.ret_status = raw; + let status8 = wstatus_to_waitid_status(raw); + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: pcb.task_pid_vnr(), + status: status8, + cause: SigChildCode::Exited.into(), + uid: get_child_uid(&pcb), + }); + tmp_child_pcb = Some(pcb.clone()); + if !kwo.options.contains(WaitOption::WNOWAIT) { + pid_to_release = Some(pcb.raw_pid()); + } + scan_result = Some(Ok((*ptraced_pid).into())); + drop(sched_guard); + break; + } + drop(sched_guard); + } + } + drop(rd_ptraced); + if let Some(pid) = pid_to_release { unsafe { ProcessManager::release(pid) }; } @@ -631,12 +850,22 @@ fn do_waitpid( pid: child_pcb.task_pid_vnr(), status: Signal::SIGCONT as i32, cause: SigChildCode::Continued.into(), + uid: get_child_uid(&child_pcb), }); // 设置 ret_status 供 wait4 使用 // Linux wait(2) 语义:continued 进程的 wstatus = 0xffff kwo.ret_status = 0xffff; + // 获取 rusage(如果提供了 rusage 缓冲区) + // 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/exit.c#1358 + if let Some(rusage) = kwo.ret_rusage.as_mut() { + if let Some(child_rusage) = child_pcb.get_rusage(super::resource::RUsageWho::RUsageSelf) + { + **rusage = child_rusage; + } + } + if !kwo.options.contains(WaitOption::WNOWAIT) { child_pcb.sighand().flags_remove(SignalFlags::CLD_CONTINUED); } @@ -656,33 +885,92 @@ fn do_waitpid( // 而不是立即返回0。只有当子进程真正退出时才应该返回。 return None; } - ProcessState::Stopped => { - // 非 ptrace 停止:报告 stopsig=SIGSTOP - let stopsig = Signal::SIGSTOP as i32; - // 由于目前不支持ptrace,因此这个值为false - let ptrace = false; - - if (!ptrace) && (!kwo.options.contains(WaitOption::WSTOPPED)) { + ProcessState::Stopped(stopsig) => { + // todo: 在stopped里面,添加code字段,表示停止的原因 + // 根据 Linux 6.6.21 的 ptrace 语义,stopsig 的格式可能是: + // - ((signal << 8) | 0x7f) - 普通停止 + // - ((signal << 8) | 0x80) - ptrace 陷阱停止 + // 我们需要提取实际的信号编号 (低 8 位 & 0x7f) + let actual_sig = stopsig & 0x7f; + if actual_sig >= Signal::SIGRTMAX.into() { + return Some(Err(SystemError::EINVAL)); + } + let ptrace = child_pcb.is_traced(); + // 对于被跟踪的进程,总是报告停止状态,无论 WUNTRACED 是否设置 + // 对于非跟踪进程,只有在设置了 WUNTRACED 时才报告停止状态 + if (!ptrace) && (!kwo.options.contains(WaitOption::WUNTRACED)) { // 调用方未请求 WSTOPPED,按照 Linux 语义应当继续等待其它事件 // 而不是返回 0 并写回空的 siginfo。 return None; } - - // 填充 waitid 信息 - // log::debug!("do_waitpid: report CLD_STOPPED for pid={:?}", child_pcb.raw_pid()); + if likely(!(kwo.options.contains(WaitOption::WNOWAIT))) { + // 根据 Linux 6.6.21 语义: + // - 普通停止:(signal << 8) | 0x7f + // - ptrace 停止:(signal << 8) | 0x80 + kwo.ret_status = if (stopsig & 0x80) != 0 { + // ptrace 停止,保留 0x80 标志 + ((actual_sig << 8) | 0x80) as i32 + } else { + // 普通停止 + ((actual_sig << 8) | 0x7f) as i32 + }; + } + // if let Some(infop) = &mut kwo.ret_info { + // *infop = WaitIdInfo { + // pid: child_pcb.raw_pid(), + // status: stopsig, + // cause: SigChildCode::Stopped.into(), + // }; + // } kwo.ret_info = Some(WaitIdInfo { pid: child_pcb.task_pid_vnr(), - status: stopsig, + status: actual_sig as i32, cause: SigChildCode::Stopped.into(), + uid: get_child_uid(&child_pcb), }); - // 设置 ret_status 供 wait4 使用 - // Linux wait(2) 语义:stopped 进程的 wstatus = (stopsig << 8) | 0x7f - kwo.ret_status = (stopsig << 8) | 0x7f; + // 获取 rusage(如果提供了 rusage 缓冲区) + // 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/exit.c#1308 + if let Some(rusage) = kwo.ret_rusage.as_mut() { + if let Some(child_rusage) = + child_pcb.get_rusage(super::resource::RUsageWho::RUsageSelf) + { + **rusage = child_rusage; + } + } - if !kwo.options.contains(WaitOption::WNOWAIT) { - // 消费一次停止事件标志(若存在) - child_pcb.sighand().flags_remove(SignalFlags::CLD_STOPPED); + return Some(Ok(child_pcb.raw_pid().data())); + } + ProcessState::TracedStopped(stopsig) => { + // 按照 Linux 6.6.21 的 ptrace 语义: + // TracedStopped 状态类似于 Linux 的 TASK_TRACED + // 这是 ptrace 专用的停止状态,总是报告给 tracer + // 提取实际的信号编号 (低 8 位 & 0x7f) + let actual_sig = stopsig & 0x7f; + if actual_sig >= Signal::SIGRTMAX.into() { + return Some(Err(SystemError::EINVAL)); + } + // TracedStopped 状态总是被 ptrace,所以总是报告停止状态 + // 不需要检查 WUNTRACED 标志 + if likely(!(kwo.options.contains(WaitOption::WNOWAIT))) { + // ptrace 停止:(signal << 8) | 0x7f + kwo.ret_status = ((actual_sig << 8) | 0x7f) as i32; + } + kwo.ret_info = Some(WaitIdInfo { + pid: child_pcb.task_pid_vnr(), + status: actual_sig as i32, + cause: SigChildCode::Trapped.into(), + uid: get_child_uid(&child_pcb), + }); + + // 获取 rusage(如果提供了 rusage 缓冲区) + // 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/exit.c#1308 + if let Some(rusage) = kwo.ret_rusage.as_mut() { + if let Some(child_rusage) = + child_pcb.get_rusage(super::resource::RUsageWho::RUsageSelf) + { + **rusage = child_rusage; + } } return Some(Ok(child_pcb.raw_pid().data())); @@ -703,10 +991,22 @@ fn do_waitpid( pid, status: wstatus_to_waitid_status(status as i32), cause: SigChildCode::Exited.into(), + uid: get_child_uid(&child_pcb), }); kwo.ret_status = status as i32; + // 获取 rusage(如果提供了 rusage 缓冲区) + // 参考: https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/exit.c#1191 + // 注意:需要在释放进程前获取 rusage + if let Some(rusage) = kwo.ret_rusage.as_mut() { + if let Some(child_rusage) = + child_pcb.get_rusage(super::resource::RUsageWho::RUsageSelf) + { + **rusage = child_rusage; + } + } + // 若指定 WNOWAIT,则只观测不回收 if !kwo.options.contains(WaitOption::WNOWAIT) { if !child_pcb.try_mark_dead_from_zombie() { @@ -728,7 +1028,7 @@ fn do_waitpid( impl ProcessControlBlock { /// 参考 https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/exit.c#143 - pub(super) fn __exit_signal(&mut self) { + pub(super) fn __exit_signal(&self) { let group_dead = self.is_thread_group_leader(); let mut sig_guard = self.sig_info_mut(); let mut tty: Option> = None; diff --git a/kernel/src/process/fork.rs b/kernel/src/process/fork.rs index 27320be78e..2478bba34b 100644 --- a/kernel/src/process/fork.rs +++ b/kernel/src/process/fork.rs @@ -1,25 +1,17 @@ -use alloc::vec::Vec; +use alloc::{string::ToString, sync::Arc, vec::Vec}; use core::{intrinsics::unlikely, sync::atomic::Ordering}; - -use crate::arch::MMArch; -use crate::filesystem::vfs::file::File; -use crate::filesystem::vfs::file::FileFlags; -use crate::filesystem::vfs::file::FilePrivateData; -use crate::filesystem::vfs::FileType; -use crate::filesystem::vfs::InodeMode; -use crate::mm::access_ok; -use crate::mm::MemoryManagementArch; -use crate::process::pid::PidPrivateData; -use alloc::{string::ToString, sync::Arc}; -use log::{error, warn}; use system_error::SystemError; use crate::{ - arch::{interrupt::TrapFrame, ipc::signal::Signal}, + arch::{interrupt::TrapFrame, ipc::signal::Signal, MMArch}, + filesystem::vfs::{ + file::{File, FileFlags, FilePrivateData}, + FileType, InodeMode, + }, ipc::signal_types::SignalFlags, libs::rwsem::RwSem, - mm::VirtAddr, - process::ProcessFlags, + mm::{access_ok, MemoryManagementArch, VirtAddr}, + process::{pid::PidPrivateData, ProcessFlags}, sched::{sched_cgroup_fork, sched_fork}, smp::core::smp_get_processor_id, syscall::user_access::UserBufferWriter, @@ -206,7 +198,7 @@ impl ProcessManager { args.verify()?; let pcb = ProcessControlBlock::new(name, new_kstack); Self::copy_process(¤t_pcb, &pcb, args, current_trapframe).map_err(|e| { - error!( + log::error!( "fork: Failed to copy process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}", current_pcb.raw_pid(), pcb.raw_pid(), @@ -687,6 +679,10 @@ impl ProcessManager { } else { // 新创建的进程,设置其父进程为当前进程 *pcb.real_parent_pcb.write_irqsave() = Arc::downgrade(current_pcb); + + // 同时设置parent_pcb(按照Linux语义,默认parent=real_parent) + *pcb.parent_pcb.write_irqsave() = Arc::downgrade(current_pcb); + pcb.exit_signal .store(clone_args.exit_signal, Ordering::SeqCst); } @@ -801,14 +797,14 @@ impl ProcessManager { if vpid.data() != 0 { children.push(vpid); } else { - warn!( + log::warn!( "fork: child pid is 0 in parent pidns, parent pid={:?}, child pid={:?}", thread_group_leader.raw_pid(), pcb.raw_pid() ); } } else { - warn!( + log::warn!( "fork: failed to resolve child pid in parent pidns, parent pid={:?}, child pid={:?}", thread_group_leader.raw_pid(), pcb.raw_pid() diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index caa8afe785..b61c29b86d 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -1,19 +1,18 @@ +use alloc::{ + ffi::CString, + string::{String, ToString}, + sync::{Arc, Weak}, + vec::Vec, +}; use core::{ fmt, hash::Hash, hint::spin_loop, intrinsics::unlikely, - mem::ManuallyDrop, + mem::{ManuallyDrop, MaybeUninit}, str::FromStr, sync::atomic::{compiler_fence, fence, AtomicBool, AtomicU8, AtomicUsize, Ordering}, }; - -use alloc::{ - ffi::CString, - string::{String, ToString}, - sync::{Arc, Weak}, - vec::Vec, -}; use cred::INIT_CRED; use hashbrown::HashMap; use log::{debug, error, info, warn}; @@ -37,7 +36,7 @@ use crate::{ ipc::{ kill::send_signal_to_pcb, sighand::SigHand, - signal::RestartBlock, + signal::{signal_wake_up, RestartBlock}, signal_types::{SigInfo, SigPending}, }, libs::{ @@ -90,6 +89,7 @@ pub mod pid; pub mod posix_timer; pub mod preempt; pub mod process_group; +pub mod ptrace; pub mod resource; pub mod rseq; pub mod session; @@ -243,43 +243,56 @@ impl ProcessManager { } /// 唤醒一个进程 + /// 参考 Linux 6.6.21 的 try_to_wake_up,支持唤醒 Blocked 和 TracedStopped 状态的进程 pub fn wakeup(pcb: &Arc) -> Result<(), SystemError> { let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; let state = pcb.sched_info().inner_lock_read_irqsave().state(); - if state.is_blocked() { - let mut writer = pcb.sched_info().inner_lock_write_irqsave(); - let state = writer.state(); - if state.is_blocked() { - writer.set_state(ProcessState::Runnable); - writer.set_wakeup(); - // avoid deadlock - drop(writer); + // 类似 Linux 的 try_to_wake_up,检查任务状态并决定是否唤醒 + // 支持 Blocked 和 TracedStopped 状态 + let should_wake = state.is_blocked() || matches!(state, ProcessState::TracedStopped(_)); - let rq = - cpu_rq(pcb.sched_info().on_cpu().unwrap_or(current_cpu_id()).data() as usize); + if !should_wake { + if state.is_exited() { + return Err(SystemError::EINVAL); + } + return Ok(()); + } - let (rq, _guard) = rq.self_lock(); - rq.update_rq_clock(); - rq.activate_task( - pcb, - EnqueueFlag::ENQUEUE_WAKEUP | EnqueueFlag::ENQUEUE_NOCLOCK, - ); + let mut writer = pcb.sched_info().inner_lock_write_irqsave(); + let current_state = writer.state(); - rq.check_preempt_currnet(pcb, WakeupFlags::empty()); + // 双重检查,防止状态在获取锁之后改变 + let is_target_state = + current_state.is_blocked() || matches!(current_state, ProcessState::TracedStopped(_)); - // sched_enqueue(pcb.clone(), true); - return Ok(()); - } else if state.is_exited() { + if !is_target_state { + drop(writer); + if current_state.is_exited() { return Err(SystemError::EINVAL); - } else { - return Ok(()); } - } else if state.is_exited() { - return Err(SystemError::EINVAL); - } else { return Ok(()); } + + // 将状态改为 Runnable(对应 Linux 的 TASK_RUNNING) + writer.set_state(ProcessState::Runnable); + writer.set_wakeup(); + + // avoid deadlock + drop(writer); + + let rq = cpu_rq(pcb.sched_info().on_cpu().unwrap_or(current_cpu_id()).data() as usize); + + let (rq, _guard) = rq.self_lock(); + rq.update_rq_clock(); + rq.activate_task( + pcb, + EnqueueFlag::ENQUEUE_WAKEUP | EnqueueFlag::ENQUEUE_NOCLOCK, + ); + + rq.check_preempt_currnet(pcb, WakeupFlags::empty()); + + return Ok(()); } #[allow(dead_code)] @@ -343,10 +356,10 @@ impl ProcessManager { pub fn wakeup_stop(pcb: &Arc) -> Result<(), SystemError> { let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; let state = pcb.sched_info().inner_lock_read_irqsave().state(); - if let ProcessState::Stopped = state { + if let ProcessState::Stopped(_) = state { let mut writer = pcb.sched_info().inner_lock_write_irqsave(); let state = writer.state(); - if let ProcessState::Stopped = state { + if let ProcessState::Stopped(_) = state { writer.set_state(ProcessState::Runnable); // Stopped -> Runnable:必须清理 sleep 标志,否则调度器可能把该任务当作“睡眠出队”处理。 writer.set_wakeup(); @@ -387,7 +400,7 @@ impl ProcessManager { /// /// 注意:该函数用于对“目标进程”进行停止标记,不要求在目标进程上下文调用。 /// 与 `mark_stop`(仅当前进程)相对应。 - pub fn stop_task(pcb: &Arc) -> Result<(), SystemError> { + pub fn stop_task(pcb: &Arc, sig: Signal) -> Result<(), SystemError> { let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; let mut writer = pcb.sched_info().inner_lock_write_irqsave(); let state = writer.state(); @@ -397,7 +410,7 @@ impl ProcessManager { // Stopped 的任务不应继续留在 runqueue 中,否则仍可能被选中运行。 let on_rq = *pcb.sched_info().on_rq.lock_irqsave(); - writer.set_state(ProcessState::Stopped); + writer.set_state(ProcessState::Stopped(sig.into())); // stop 后不应再被视为“睡眠任务”,避免后续调度错误地做 DEQUEUE_SLEEP。 writer.set_wakeup(); pcb.flags().insert(ProcessFlags::NEED_SCHEDULE); @@ -451,7 +464,7 @@ impl ProcessManager { /// /// - 进入当前函数之前,不能持有sched_info的锁 /// - 进入当前函数之前,必须关闭中断 - pub fn mark_stop() -> Result<(), SystemError> { + pub fn mark_stop(sig: Signal) -> Result<(), SystemError> { assert!( !CurrentIrqArch::is_irq_enabled(), "interrupt must be disabled before enter ProcessManager::mark_stop()" @@ -460,7 +473,7 @@ impl ProcessManager { let pcb = ProcessManager::current_pcb(); let mut writer = pcb.sched_info().inner_lock_write_irqsave(); if !matches!(writer.state(), ProcessState::Exited(_)) { - writer.set_state(ProcessState::Stopped); + writer.set_state(ProcessState::Stopped(sig.into())); pcb.flags().insert(ProcessFlags::NEED_SCHEDULE); drop(writer); @@ -473,12 +486,15 @@ impl ProcessManager { #[inline(never)] fn exit_notify() { let current = ProcessManager::current_pcb(); + // 让INIT进程收养所有子进程 if current.raw_pid() != RawPid(1) { unsafe { - current - .adopt_childen() - .unwrap_or_else(|e| panic!("adopte_childen failed: error: {e:?}")) + if let Err(e) = current.adopt_childen() { + // Log error but don't panic - allow exit to continue + // Init will inherit orphaned children + log::error!("adopt_children failed during exit: {:?}. Children will be inherited by init.", e); + } }; // 在通知父进程之前,先标记为 Zombie,保证 wait 可见 current.set_exit_state_zombie(); @@ -488,30 +504,39 @@ impl ProcessManager { } let parent_pcb = r.unwrap(); - // 检查子进程的exit_signal,只有在有效时才发送信号 - let exit_signal = current.exit_signal.load(Ordering::SeqCst); - if exit_signal != Signal::INVALID { - let r = crate::ipc::kill::send_signal_to_pcb(parent_pcb.clone(), exit_signal); - if let Err(e) = r { - warn!( - "failed to send kill signal to {:?}'s parent pcb {:?}: {:?}", - current.raw_pid(), - parent_pcb.raw_pid(), - e - ); + // 如果进程被 ptrace,忽略 exit_signal,总是发送 SIGCHLD + let is_ptraced = current.flags().contains(ProcessFlags::PTRACED); + let signal_to_send = if is_ptraced { + Signal::SIGCHLD + } else { + let exit_signal = current.exit_signal.load(Ordering::SeqCst); + if exit_signal == Signal::INVALID { + Signal::SIGCHLD + } else { + exit_signal } + }; + + // 发送信号 + let r = crate::ipc::kill::send_signal_to_pcb(parent_pcb.clone(), signal_to_send); + if let Err(e) = r { + warn!( + "failed to send kill signal to {:?}'s parent pcb {:?}: {:?}", + current.raw_pid(), + parent_pcb.raw_pid(), + e + ); } - // 无论exit_signal是什么值,都要唤醒父进程的wait_queue - // 因为父进程可能使用__WALL选项等待任何类型的子进程(包括exit_signal=0的clone子进程) - // 根据Linux语义,exit_signal只决定发送什么信号,不决定是否唤醒父进程 + // 要唤醒父进程的wait_queue,因为父进程可能使用__WALL选项等待任何类型的子进程 parent_pcb .wait_queue .wakeup_all(Some(ProcessState::Blocked(true))); - // 根据 Linux wait 语义,线程组中的任何线程都可以等待同一线程组中任何线程创建的子进程。 - // 由于子进程被添加到线程组 leader 的 children 列表中, - // 因此还需要唤醒线程组 leader 的 wait_queue(如果 leader 不是 parent_pcb 本身)。 + // 唤醒父进程 + parent_pcb.wake_up_process(); + + // 唤醒线程组 leader 的 wait_queue let parent_group_leader = { let ti = parent_pcb.thread.read_irqsave(); ti.group_leader() @@ -521,6 +546,7 @@ impl ProcessManager { leader .wait_queue .wakeup_all(Some(ProcessState::Blocked(true))); + leader.wake_up_process(); } } // todo: 这里还需要根据线程组的信息,决定信号的发送 @@ -550,6 +576,19 @@ impl ProcessManager { spin_loop(); } } + + // 注意:不再在 exit() 中调用 ptrace_stop + // 按照 Linux 6.6.21 的语义: + // 1. 被 ptrace 的进程在收到信号时会调用 ptrace_stop(通过 ptrace_signal) + // 2. tracer 可以通过 waitpid 查看进程状态 + // 3. 当进程退出时,应该直接退出,不需要再次调用 ptrace_stop + // 4. 这样 tracer 可以通过 waitpid(WEXITED) 回收进程 + // + // 如果在 exit() 中调用 ptrace_stop,会导致: + // - tracer 无法通过 waitpid 回收子进程(因为子进程在 TracedStopped 状态) + // - tracer 收到 ESRCH 错误并退出 + // - 子进程永远等待,形成死锁 + drop(current_pcb); // 关中断 @@ -724,6 +763,10 @@ impl ProcessManager { pub(super) unsafe fn release(pid: RawPid) { let pcb = ProcessManager::find(pid); if let Some(ref pcb) = pcb { + // 立即执行 exit_signal,清理 PID 等资源 + // 确保 waitid 之后的调用能立即感知到进程已彻底消失(特别是 PID namespace 清理) + pcb.__exit_signal(); + // 从父进程的 children 列表中移除 if let Some(parent) = pcb.real_parent_pcb() { let parent_ns = parent.active_pid_ns(); @@ -771,7 +814,6 @@ impl ProcessManager { /// ## 参数 /// /// - `pcb` : 进程的pcb - #[allow(dead_code)] pub fn kick(pcb: &Arc) { ProcessManager::current_pcb().preempt_disable(); let cpu_id = pcb.sched_info().on_cpu(); @@ -840,7 +882,9 @@ pub enum ProcessState { /// - 如果该bool为false,那么,这个进程必须被显式的唤醒,才能重新进入Runnable状态。 Blocked(bool), /// 进程被信号终止 - Stopped, + Stopped(usize), + /// 用于ptrace跟踪停止的状态 (TASK_TRACED),携带退出码 + TracedStopped(usize), /// 进程已经退出,usize表示进程的退出码 Exited(usize), } @@ -863,7 +907,6 @@ impl ExitState { } } -#[allow(dead_code)] impl ProcessState { #[inline(always)] pub fn is_runnable(&self) -> bool { @@ -886,24 +929,49 @@ impl ProcessState { return matches!(self, ProcessState::Exited(_)); } - /// Returns `true` if the process state is [`Stopped`]. + /// Returns `true` if the process state is [`Stopped`] or [`TracedStopped`]. /// /// [`Stopped`]: ProcessState::Stopped + /// [`TracedStopped`]: ProcessState::TracedStopped #[inline(always)] pub fn is_stopped(&self) -> bool { - matches!(self, ProcessState::Stopped) + matches!( + self, + ProcessState::Stopped(_) | ProcessState::TracedStopped(_) + ) } - /// Returns exit code if the process state is [`Exited`]. + /// Returns exit code if the process state is [`Exited`] or [`TracedStopped`]. #[inline(always)] pub fn exit_code(&self) -> Option { match self { ProcessState::Exited(code) => Some(*code), + ProcessState::TracedStopped(code) => Some(*code), _ => None, } } } +/// ptrace 系统调用的请求类型 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(i32)] +pub enum PtraceRequest { + Traceme = 0, + Peekdata = 2, + Peekuser = 3, + Pokedata = 5, + Cont = 7, + Singlestep = 9, + Getregs = 12, + Setregs = 13, + Attach = 16, + Detach = 17, + Syscall = 24, + Setoptions = 0x4200, + Getsiginfo = 0x4202, + Seize = 0x4206, // 现代 API,不发送 SIGSTOP +} + bitflags! { /// pcb的标志位 pub struct ProcessFlags: usize { @@ -934,11 +1002,51 @@ bitflags! { const FORKNOEXEC = 1 << 11; /// 进程需要在返回用户态前处理 rseq const NEED_RSEQ = 1 << 12; + /// 进程当前停止 + const STOPPED = 1 << 14; + /// 进程当前由ptrace跟踪 + const PTRACED = 1 << 15; + /// ptrace 正在停止(用于 attach/stop 的同步) + const TRAPPING = 1 << 13; + /// 跟踪器已发出PTRACE_SYSCALL请求 + const TRACE_SYSCALL = 1 << 16; + /// 跟踪器已发出PTRACE_SINGLESTEP请求 + const TRACE_SINGLESTEP = 1 << 17; + /// 跟踪器设置了TRACE_EXIT选项 + const TRACE_EXIT = 1 << 18; + /// 跟踪器设置了TRACE_FORK/CLONE选项 + const TRACE_FORK = 1 << 19; + /// 跟踪器设置了TRACE_VFORK选项 + const TRACE_VFORK = 1 << 20; + /// 跟踪器设置了TRACE_EXEC选项 + const TRACE_EXEC = 1 << 21; + /// 系统调用正在中断点(入口或出口) + const SYSCALL_INTERRUPT = 1 << 22; + /// 进程通过 PTRACE_SEIZE 被附加(而非 PTRACE_ATTACH) + /// 按照 Linux 6.6.21 语义:SEIZED 进程不会收到 Legacy SIGTRAP + const PT_SEIZED = 1 << 23; } } impl ProcessFlags { + /// 获取需要在系统调用返回到用户态前处理的工作标志 + /// + /// 按照 Linux 6.6.21 的 _TIF_WORK_MASK 语义: + /// - TIF_SIGPENDING (HAS_PENDING_SIGNAL): 有待处理的信号 + /// - TIF_NEED_RESCHED (NEED_SCHEDULE): 需要调度 + /// - TIF_NOTIFY_RESUME (NEED_RSEQ): 需要处理 rseq pub const fn exit_to_user_mode_work(&self) -> Self { + Self::from_bits_truncate( + self.bits + & (Self::HAS_PENDING_SIGNAL.bits | Self::NEED_SCHEDULE.bits | Self::NEED_RSEQ.bits), + ) + } + + /// 获取需要在中断返回到用户态前处理的工作标志 + /// + /// 中断返回路径不需要检查 NEED_SCHEDULE,因为调度在其他地方处理 + #[allow(dead_code)] + pub const fn exit_to_user_mode_work_irq(&self) -> Self { Self::from_bits_truncate(self.bits & (Self::HAS_PENDING_SIGNAL.bits | Self::NEED_RSEQ.bits)) } @@ -978,6 +1086,189 @@ pub struct ProcessItimers { pub prof: CpuItimer, // 用于 ITIMER_PROF } +#[repr(u8)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum PtraceSyscallInfoOp { + None = 0, + Entry = 1, + Exit = 2, + #[allow(dead_code)] + Seccomp = 3, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct PtraceSyscallInfoEntry { + pub nr: u64, + pub args: [u64; 6], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct PtraceSyscallInfoExit { + pub rval: i64, + pub is_error: u8, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct PtraceSyscallInfoSeccomp { + pub nr: u64, + pub args: [u64; 6], + pub ret_data: u32, +} + +#[repr(C)] +pub union PtraceSyscallInfoData { + pub entry: PtraceSyscallInfoEntry, + pub exit: PtraceSyscallInfoExit, + pub seccomp: PtraceSyscallInfoSeccomp, + _uninit: MaybeUninit<[u8; 64]>, +} + +#[repr(C)] +pub struct PtraceSyscallInfo { + /// PTRACE_SYSCALL_INFO_* + pub op: PtraceSyscallInfoOp, + pub pad: [u8; 3], + pub arch: u32, + pub instruction_pointer: u64, + pub stack_pointer: u64, + /// The union containing event-specific data. + pub data: PtraceSyscallInfoData, +} + +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] +pub enum PtraceStopReason { + #[default] + None, + SyscallEntry, + SyscallExit, + Signal(Signal), + Event(PtraceEvent), +} + +/// ptrace 系统调用的事件类型 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PtraceEvent { + Fork = 1, + VFork, + Clone, + Exec, + VForkDone, + Exit, + Seccomp, + Stop = 128, // 信号或单步执行导致的停止 +} + +/// 进程被跟踪的状态信息 +#[derive(Debug)] +pub struct PtraceState { + /// 跟踪此进程的进程PID + tracer: Option, + /// 挂起的信号(等待调试器处理) + pending_signals: Vec, + /// 停止原因(公开,以便在 syscall_handler 中设置) + pub stop_reason: PtraceStopReason, + /// ptrace选项位 + options: PtraceOptions, + /// 用于存储事件消息 + event_message: usize, + /// tracer 注入的信号(在 ptrace_stop 返回后要处理的信号) + injected_signal: Signal, + /// 最后一次 ptrace 停止时的 siginfo(供 PTRACE_GETSIGINFO 读取) + last_siginfo: Option, +} + +impl Default for PtraceState { + fn default() -> Self { + Self { + tracer: None, + pending_signals: Vec::new(), + stop_reason: PtraceStopReason::None, + options: PtraceOptions::empty(), + event_message: 0, + injected_signal: Signal::INVALID, + last_siginfo: None, + } + } +} +impl PtraceState { + pub fn new() -> Self { + Self { + tracer: None, + pending_signals: Vec::new(), + stop_reason: PtraceStopReason::None, + options: PtraceOptions::empty(), + event_message: 0, + injected_signal: Signal::INVALID, + last_siginfo: None, + } + } + + /// 获取停止状态的状态字 + pub fn status_code(&self) -> usize { + // 根据信号和状态生成状态码 + if let Some(signal) = self.pending_signals.first() { + (*signal as usize) << 8 + } else { + 0 + } + } + /// 检查是否有挂起的信号 + pub fn has_pending_signals(&self) -> bool { + !self.pending_signals.is_empty() + } + /// 添加挂起信号 + pub fn add_pending_signal(&mut self, signal: Signal) { + self.pending_signals.push(signal); + } + /// 获取下一个挂起信号 + pub fn next_pending_signal(&mut self) -> Option { + if self.pending_signals.is_empty() { + None + } else { + Some(self.pending_signals.remove(0)) + } + } + + /// 获取 last_siginfo(供 PTRACE_GETSIGINFO 使用) + pub fn last_siginfo(&self) -> Option { + self.last_siginfo + } + + /// 设置 last_siginfo + pub fn set_last_siginfo(&mut self, info: crate::ipc::signal_types::SigInfo) { + self.last_siginfo = Some(info); + } + + /// 清除 last_siginfo + pub fn clear_last_siginfo(&mut self) { + self.last_siginfo = None; + } +} + +#[derive(Debug, Default)] +#[allow(dead_code)] +pub struct SyscallInfo { + /// 系统调用入口信息(系统调用号、参数、返回值) + syscall_num: usize, + args: [usize; 6], + result: isize, +} +bitflags::bitflags! { + /// Ptrace选项(PTRACE_O_*) + #[derive(Default)] + pub struct PtraceOptions: usize { + const TRACESYSGOOD = 1 << 0; + const TRACEFORK = 1 << 1; + const TRACEVFORK = 1 << 2; + const TRACECLONE = 1 << 3; + const TRACEEXEC = 1 << 4; + const TRACEVFORKDONE = 1 << 5; + const TRACEEXIT = 1 << 6; + const TRACESECCOMP = 1 << 7; + } +} + #[derive(Debug)] pub struct ProcessControlBlock { /// 当前进程的pid @@ -1015,7 +1306,6 @@ pub struct ProcessControlBlock { sig_altstack: RwLock, /// 退出状态(Running/Zombie/Dead) exit_state: AtomicU8, - /// 退出信号S exit_signal: AtomicSignal, /// 父进程退出时要发送给当前进程的信号(PR_SET_PDEATHSIG) @@ -1075,6 +1365,11 @@ pub struct ProcessControlBlock { cmdline: RwLock>, /// 资源限制(rlimit)数组 rlimits: RwLock<[RLimit64; RLimitID::Nlimits as usize]>, + + /// ptrace跟踪状态 + ptrace_state: SpinLock, + /// 被ptrace跟踪的进程列表(跟踪器跟踪了哪些进程) + ptraced_list: RwLock>, } impl ProcessControlBlock { @@ -1154,6 +1449,7 @@ impl ProcessControlBlock { let ppcb: Weak = ProcessManager::find_task_by_vpid(ppid) .map(|p| Arc::downgrade(&p)) .unwrap_or_default(); + let ptrace_state = SpinLock::new(PtraceState::default()); // 使用 Arc::new_cyclic 避免在栈上创建巨大的结构体 let pcb = Arc::new_cyclic(|weak| { @@ -1202,6 +1498,8 @@ impl ProcessControlBlock { executable_path: RwLock::new(name), cmdline: RwLock::new(Vec::new()), rlimits: RwLock::new(Self::default_rlimits()), + ptrace_state, + ptraced_list: RwLock::new(Vec::new()), }; pcb.sig_info.write().set_tty(tty); @@ -1623,17 +1921,47 @@ impl ProcessControlBlock { /// 当前进程退出时,让初始进程收养所有子进程 unsafe fn adopt_childen(&self) -> Result<(), SystemError> { - // 取出并清空 children 列表,避免后续 wait/reparent 出现重复。 - let child_pids: Vec = { - let mut children_guard = self.children.write(); - core::mem::take(&mut *children_guard) + // 取出 children 列表(但不清空,我们会选择性地保留 ptraced 子进程) + let all_child_pids: Vec = { + let children_guard = self.children.read(); + children_guard.clone() }; - if child_pids.is_empty() { + if all_child_pids.is_empty() { + return Ok(()); + } + + // 按照 Linux 6.6.21 的 ptrace 语义: + // 如果子进程正在被当前进程 ptrace,则不要转移它 + // tracer 需要能够 wait 这个子进程,即使子进程已经退出 + let mut ptraced_children: Vec = Vec::new(); + let mut children_to_adopt: Vec = Vec::new(); + + for pid in all_child_pids.iter().copied() { + if let Some(child) = ProcessManager::find_task_by_vpid(pid) { + // 检查子进程是否被当前进程 ptrace + if let Some(child_parent) = child.parent_pcb() { + if Arc::ptr_eq(&child_parent, &self.self_ref.upgrade().unwrap()) { + // 子进程的 parent_pcb 指向当前进程,说明是被 ptrace 的 + // 不转移这个子进程 + ptraced_children.push(pid); + continue; + } + } + children_to_adopt.push(pid); + } + } + + // 只清空并转移非 ptraced 的子进程 + if !children_to_adopt.is_empty() { + let mut children_guard = self.children.write(); + children_guard.retain(|pid| !children_to_adopt.contains(pid)); + } else { + // 所有子进程都是 ptraced 的,不需要转移任何子进程 return Ok(()); } - self.notify_parent_exit_for_children(&child_pids); + self.notify_parent_exit_for_children(&children_to_adopt); let init_pcb = ProcessManager::find_task_by_vpid(RawPid(1)).ok_or(SystemError::ECHILD)?; @@ -1648,7 +1976,7 @@ impl ProcessControlBlock { let parent_init = ProcessManager::find_task_by_pid_ns(RawPid(1), &parent_pcb.active_pid_ns()); if let Some(parent_init) = parent_init { - for pid in child_pids.iter().copied() { + for pid in children_to_adopt.iter().copied() { if let Some(child) = ProcessManager::find_task_by_vpid(pid) { *child.parent_pcb.write_irqsave() = Arc::downgrade(&parent_init); *child.real_parent_pcb.write_irqsave() = Arc::downgrade(&parent_init); @@ -1670,7 +1998,7 @@ impl ProcessControlBlock { return Ok(()); } - // 常规情况:优先 reparent 到“最近祖先 subreaper”,否则 reparent 到 init。 + // 常规情况:优先 reparent 到"最近祖先 subreaper",否则 reparent 到 init。 let mut reaper: Arc = init_pcb.clone(); let mut cursor = self.parent_pcb(); while let Some(p) = cursor { @@ -1692,7 +2020,7 @@ impl ProcessControlBlock { cursor = leader.parent_pcb(); } - for pid in child_pids.iter().copied() { + for pid in children_to_adopt.iter().copied() { if let Some(child) = ProcessManager::find_task_by_vpid(pid) { *child.parent_pcb.write_irqsave() = Arc::downgrade(&reaper); *child.real_parent_pcb.write_irqsave() = Arc::downgrade(&reaper); @@ -1774,6 +2102,21 @@ impl ProcessControlBlock { self.rseq_state.write_irqsave() } + /// 获取 ptrace 状态的可变引用 + #[inline] + pub fn ptrace_state_mut(&self) -> SpinLockGuard<'_, PtraceState> { + self.ptrace_state.lock() + } + + /// 唤醒进程(用于子进程退出时唤醒父进程) + /// + /// 当子进程退出时,需要唤醒父进程,即使父进程不在 wait() 中睡眠 + /// 例如:父进程可能在 nanosleep 中,被 SIGCHLD 唤醒后应该处理信号 + #[inline] + pub fn wake_up_process(&self) { + signal_wake_up(self.self_ref.upgrade().unwrap(), false); + } + pub fn try_siginfo_mut(&self, times: u8) -> Option> { for _ in 0..times { if let Some(r) = self.sig_info.try_write_irqsave() { diff --git a/kernel/src/process/posix_timer.rs b/kernel/src/process/posix_timer.rs index 7b17c95bb6..48d008ba33 100644 --- a/kernel/src/process/posix_timer.rs +++ b/kernel/src/process/posix_timer.rs @@ -14,7 +14,7 @@ use system_error::SystemError; use crate::{ arch::ipc::signal::Signal, - ipc::signal_types::{PosixSigval, SigCode, SigInfo, SigType}, + ipc::signal_types::{OriginCode, PosixSigval, SigCode, SigInfo, SigType}, process::{pid::PidType, ProcessControlBlock, ProcessFlags, ProcessManager, RawPid}, time::{ jiffies::NSEC_PER_JIFFY, @@ -445,7 +445,7 @@ impl TimerFunction for PosixTimerHelper { let info = SigInfo::new( signo, 0, - SigCode::Timer, + SigCode::Origin(OriginCode::Timer), SigType::PosixTimer { timerid: self.timerid, overrun, diff --git a/kernel/src/process/ptrace.rs b/kernel/src/process/ptrace.rs new file mode 100644 index 0000000000..ace62ae8a5 --- /dev/null +++ b/kernel/src/process/ptrace.rs @@ -0,0 +1,957 @@ +use crate::arch::interrupt::TrapFrame; +use crate::arch::ipc::signal::{SigFlags, Signal}; +use crate::arch::kprobe; +use crate::ipc::signal_types::{ + ChldCode, OriginCode, SigChldInfo, SigCode, SigFaultInfo, SigInfo, SigType, TrapCode, +}; +use crate::process::cred; +use crate::process::{ + pid::PidType, ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, PtraceEvent, + PtraceOptions, PtraceRequest, PtraceStopReason, PtraceSyscallInfo, PtraceSyscallInfoData, + PtraceSyscallInfoEntry, PtraceSyscallInfoExit, PtraceSyscallInfoOp, RawPid, +}; +use crate::sched::{schedule, EnqueueFlag, SchedMode, WakeupFlags}; +use ::kprobe::ProbeArgs; +use alloc::{sync::Arc, vec::Vec}; +use core::{intrinsics::unlikely, mem::MaybeUninit}; +use system_error::SystemError; + +/// 在 get_signal 中调用的 ptrace 信号拦截器。 +/// 它会使进程停止,并根据追踪者的指令决定如何处理信号。 +/// 返回值: +/// - Some(Signal): 一个需要立即处理的信号。 +/// - None: 信号被 ptrace 取消或重新排队了,当前无需处理。 +pub fn ptrace_signal( + pcb: &Arc, + original_signal: Signal, + info: &mut Option, +) -> Option { + // Clone the Arc before calling ptrace_stop to prevent use-after-free. + let pcb_clone = Arc::clone(pcb); + // todo pcb.jobctl_set(JobControlFlags::STOP_DEQUEUED); + // 注意:ptrace_stop 内部会处理锁的释放和重新获取。 + let signr = pcb_clone.ptrace_stop(original_signal as usize, ChldCode::Trapped, info.as_mut()); + + if signr == 0 { + return None; // 丢弃原始信号,继续处理下一个信号(如果没有,则继续执行) + } + + // 将注入的信号转换为 Signal 类型 + let injected_signal = Signal::from(signr); + if injected_signal == Signal::INVALID { + return None; + } + + // 如果追踪者注入了不同于原始信号的新信号,更新 siginfo + if injected_signal != original_signal { + if let Some(info_ref) = info { + // 如果获取失败,保持原有的 siginfo + if let Some(tracer) = pcb_clone.tracer().and_then(ProcessManager::find) { + *info_ref = SigInfo::new( + injected_signal, + 0, + SigCode::Origin(OriginCode::User), + SigType::Kill { + pid: tracer.raw_pid(), + uid: tracer.cred().uid.data() as u32, + }, + ); + } + // 如果获取 tracer 失败,info 保持原样,这不是致命错误 + } + } + + // 特殊处理 SIGCONT:需要清除挂起的停止信号,但仍然要唤醒进程并传递给用户空间处理 + if injected_signal == Signal::SIGCONT { + // 清除任何挂起的停止信号(如 SIGSTOP, SIGTSTP 等) + let mut sig_info = pcb_clone.sig_info.write(); + let pending = sig_info.sig_pending_mut().signal_mut(); + for stop_sig in [ + Signal::SIGSTOP, + Signal::SIGTSTP, + Signal::SIGTTIN, + Signal::SIGTTOU, + ] { + pending.remove(stop_sig.into()); + } + drop(sig_info); + return Some(injected_signal); + } + + // 检查新信号是否被当前进程的信号掩码阻塞 + let sig_set = { + let guard = pcb_clone.sig_info_irqsave(); + *guard.sig_blocked() + }; + + if sig_set.contains(injected_signal.into()) { + // 如果信号被阻塞了,则尝试重新入队 + match injected_signal.send_signal_info_to_pcb(info.as_mut(), pcb_clone, PidType::PID) { + Ok(_) => return None, // 成功入队 + Err(e) => { + // 严重错误:无法保留被阻塞的信号。 + log::error!( + "ptrace_signal lost signal {:?} due to re-queue failure: {:?}", + injected_signal, + e + ); + return None; + } + } + } + // 如果没有被阻塞,则返回这个新信号,让 get_signal 继续分发和处理它。 + Some(injected_signal) +} + +impl ProcessControlBlock { + /// 设置ptrace跟踪器 + pub fn set_tracer(&self, tracer: RawPid) -> Result<(), SystemError> { + // 确保当前没有被追踪 + if self.ptrace_state.lock().tracer.is_some() { + return Err(SystemError::EPERM); + } + // 设置跟踪关系 + let mut state = self.ptrace_state.lock(); + state.tracer = Some(tracer); + // 设置 PTRACED 标志 + self.flags().insert(ProcessFlags::PTRACED); + Ok(()) + } + + /// 移除ptrace跟踪器 + pub fn clear_tracer(&self) { + self.ptrace_state.lock().tracer = None; + self.flags() + .remove(ProcessFlags::PTRACED | ProcessFlags::TRACE_SYSCALL); + } + + /// 获取ptrace跟踪器 + pub fn tracer(&self) -> Option { + self.ptrace_state.lock().tracer + } + + pub fn is_traced(&self) -> bool { + self.ptrace_state.lock().tracer.is_some() + } + + pub fn is_traced_by(&self, tracer: &Arc) -> bool { + let state = self.ptrace_state.lock(); + match state.tracer { + Some(pid) => pid == tracer.raw_pid(), + None => false, + } + } + + pub fn set_state(&self, state: ProcessState) { + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + sched_info.set_state(state); + } + + /// 设置父进程(用于 ptrace_link 和 ptrace_unlink) + pub fn set_parent(&self, new_parent: &Arc) -> Result<(), SystemError> { + if new_parent.raw_pid() == self.raw_pid() { + return Err(SystemError::EINVAL); + } + if new_parent.is_exited() { + return Err(SystemError::ESRCH); + } + + *(self.parent_pcb.write()) = Arc::downgrade(new_parent); + Ok(()) + } + + /// 获取停止状态的状态字 + pub fn ptrace_status_code(&self) -> usize { + self.ptrace_state.lock().status_code() + } + + /// 添加信号到队列 + pub fn enqueue_signal(&self, signal: Signal) { + let mut info = self.sig_info.write(); + info.sig_pending.signal_mut().insert(signal.into()); + } + + /// 通知父进程(调试器)发送 SIGTRAP 信号并设置适当的退出代码。 + pub fn ptrace_notify(exit_code: usize) -> Result<(), SystemError> { + let current_pcb = ProcessManager::current_pcb(); + if (exit_code & (0x7f | !0xffff)) != Signal::SIGTRAP as usize { + return Err(SystemError::EINVAL); + } + // 获取信号处理锁 + let sighand_lock = current_pcb.sighand(); + let result = Self::ptrace_do_notify(Signal::SIGTRAP, exit_code, None); + drop(sighand_lock); + result + } + + fn ptrace_do_notify( + signal: Signal, + exit_code: usize, + _reason: Option, + ) -> Result<(), SystemError> { + let current_pcb = ProcessManager::current_pcb(); + + // 构造 Raw code (si_code = exit_code & 0xff) + // Linux 中 ptrace_notify 使用 (exit_code & 0xff) 作为 si_code + // 通常是 SIGTRAP | (PTRACE_EVENT_xxx << 8) + let si_code = (exit_code >> 8) as i32; + + // 如果是标准的 TRAP_* 代码,使用 TrapCode + let code = match si_code { + 1 => SigCode::Trap(TrapCode::Brkpt), + 2 => SigCode::Trap(TrapCode::Trace), + 3 => SigCode::Trap(TrapCode::Branch), + 4 => SigCode::Trap(TrapCode::Hwbkpt), + 5 => SigCode::Trap(TrapCode::Unk), + 6 => SigCode::Trap(TrapCode::Perf), + _ => SigCode::Raw(si_code), + }; + + let mut info = SigInfo::new( + signal, // si_signo = SIGTRAP + 0, // si_errno = 0 + code, + SigType::SigFault(SigFaultInfo { + addr: 0, + trapno: exit_code as i32, // trapno 暂时用来存完整 exit_code + }), + ); + current_pcb.ptrace_stop(exit_code, ChldCode::Trapped, Some(&mut info)); + Ok(()) + } + + /// ptrace 事件通知 + /// + /// - 如果事件被启用(通过 PTRACE_O_TRACEEXEC 等选项),调用 ptrace_event 阻塞进程 + /// - 进程保持 TracedStopped 状态,直到 tracer 唤醒它 + /// - 不应该手动设置 Runnable 状态,这由 ptrace_resume 处理 + /// + /// Legacy Exec 行为(PTRACE_SEIZE): + /// - 如果进程是通过 PTRACE_SEIZE 附加的(PT_SEIZED 标志已设置), + /// 且没有设置 PTRACE_O_TRACEEXEC,则不发送 Legacy SIGTRAP + pub fn ptrace_event(&self, event: PtraceEvent, message: usize) { + // 检查是否启用了该事件的追踪 + if unlikely(self.ptrace_event_enabled(event)) { + self.ptrace_state.lock().event_message = message; + // ptrace_notify 会调用 ptrace_stop,阻塞进程直到 tracer 唤醒 + let exit_code = (event as usize) << 8 | Signal::SIGTRAP as usize; + if let Err(e) = Self::ptrace_notify(exit_code) { + log::error!( + "ptrace_event: failed to notify tracer of event {:?}: {:?}", + event, + e + ); + } + // ptrace_stop 内部会调用 schedule() 阻塞 + // 当 tracer 调用 PTRACE_CONT 时,ptrace_resume 会设置 Runnable + } else if event == PtraceEvent::Exec { + // Legacy Exec 行为:只有在非 PTRACE_SEIZE 时才发送自动 SIGTRAP + // - PTRACE_ATTACH:发送 Legacy SIGTRAP + // - PTRACE_SEIZE:不发送 Legacy SIGTRAP(除非显式设置 PTRACE_O_TRACEEXEC) + let flags = self.flags(); + if flags.contains(ProcessFlags::PTRACED) && !flags.contains(ProcessFlags::PT_SEIZED) { + // 非 PTRACE_SEIZE:发送 Legacy SIGTRAP + let sig = Signal::SIGTRAP; + let mut info = SigInfo::new( + sig, + 0, + SigCode::Origin(OriginCode::Kernel), + SigType::SigFault(SigFaultInfo { addr: 0, trapno: 0 }), + ); + // 如果 self_ref 升级失败,说明进程正在销毁,此时发送信号没有意义,安全地跳过 + if let Some(strong_ref) = self.self_ref.upgrade() { + if let Err(e) = + sig.send_signal_info_to_pcb(Some(&mut info), strong_ref, PidType::PID) + { + log::error!( + "ptrace_event: failed to send legacy SIGTRAP for exec: {:?}", + e + ); + } + } + } + // 未PTRACED或PTRACE_SEIZE:不发送信号,静默返回 + } + } + + /// 检查是否启用了指定的 ptrace 事件选项 + /// + /// - 检查 PTRACE_O_TRACEEXEC 等选项是否被设置 + /// - 返回 true 表示 tracer 想要接收该事件的通知 + pub fn ptrace_event_enabled(&self, event: PtraceEvent) -> bool { + // 将 PtraceEvent 转换为对应的 PtraceOptions 标志 + let event_flag = match event { + PtraceEvent::Fork => PtraceOptions::TRACEFORK, + PtraceEvent::VFork => PtraceOptions::TRACEVFORK, + PtraceEvent::Clone => PtraceOptions::TRACECLONE, + PtraceEvent::Exec => PtraceOptions::TRACEEXEC, + PtraceEvent::VForkDone => PtraceOptions::TRACEVFORKDONE, + PtraceEvent::Exit => PtraceOptions::TRACEEXIT, + PtraceEvent::Seccomp => PtraceOptions::TRACESECCOMP, + _ => return false, + }; + + // 检查该选项是否在 ptrace_state.options 中被设置 + self.ptrace_state.lock().options.contains(event_flag) + } + + /// 设置进程为停止状态 + /// + /// - 设置状态为 TracedStopped (类似 TASK_TRACED) + /// - 存储 last_siginfo(供 PTRACE_GETSIGINFO 读取) + /// - 调用 schedule() 让出 CPU,调度器会自动将任务从运行队列移除 + pub fn ptrace_stop( + &self, + exit_code: usize, + why: ChldCode, + info: Option<&mut SigInfo>, + ) -> usize { + // 设置 TRAPPING 标志,表示正在停止 + self.flags().insert(ProcessFlags::TRAPPING); + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + sched_info.set_state(ProcessState::TracedStopped(exit_code)); + sched_info.set_sleep(); + drop(sched_info); + + // 清除 ptrace_state 中的 event_message + self.ptrace_state.lock().event_message = 0; + + // 存储 last_siginfo + if let Some(info) = info { + self.ptrace_state.lock().set_last_siginfo(*info); + } + + // 通知跟踪器 + if let Some(tracer) = self.parent_pcb() { + self.notify_tracer(&tracer, why); + } + + // 清除 TRAPPING 标志,表示已经完成停止准备工作 + self.flags().remove(ProcessFlags::TRAPPING); + + schedule(SchedMode::SM_NONE); + + // 从 schedule() 返回后,tracer 已经通过 ptrace_resume 唤醒了我们 + // 进程恢复后,应该返回 tracer 注入的信号(data 参数) + let mut ptrace_state = self.ptrace_state.lock(); + let injected_signal = ptrace_state.injected_signal; + + // 如果注入的信号是 INVALID,返回 0,表示没有注入信号 + let result = if injected_signal == Signal::INVALID { + 0 + } else { + ptrace_state.injected_signal = Signal::INVALID; + injected_signal as usize + }; + drop(ptrace_state); + + result + } + + fn notify_tracer(&self, tracer: &Arc, why: ChldCode) { + let status = match why { + ChldCode::Stopped => self.exit_code().unwrap_or(0) as i32 & 0x7f, + ChldCode::Trapped => self.exit_code().unwrap_or(0) as i32 & 0x7f, + _ => Signal::SIGCONT as i32, + }; + + // 发送 SIGCHLD 通知父进程(tracer) + // 这与 tracee 内部的 SIGTRAP siginfo 是分离的 + let mut chld_info = SigInfo::new( + Signal::SIGCHLD, + 0, + SigCode::SigChld(why), + SigType::SigChld(SigChldInfo { + pid: self.raw_pid(), + uid: self.cred().uid.data(), + status, + utime: 0, + stime: 0, + }), + ); + + let should_send = { + let tracer_sighand = tracer.sighand(); + let sa = tracer_sighand.handler(Signal::SIGCHLD); + let force_send = why == ChldCode::Trapped; + if let Some(sa) = sa { + !sa.action().is_ignore() + && (force_send || !sa.flags().contains(SigFlags::SA_NOCLDSTOP)) + } else { + false + } + }; + if should_send { + let _ = Signal::SIGCHLD.send_signal_info_to_pcb( + Some(&mut chld_info), + Arc::clone(tracer), + PidType::TGID, + ); + } + // 唤醒 tracer 的 wait_queue + tracer + .wait_queue + .wakeup(Some(ProcessState::TracedStopped(status as usize))); + } + + /// 检查当前进程是否有权限跟踪目标进程 + pub fn has_permission_to_trace(&self, tracee: &Self) -> bool { + // 1. 超级用户可以跟踪任何进程 + // if self.is_superuser() { + // return true; + // } + + // 2. 同一线程组允许访问(自省) + if self.raw_tgid() == tracee.raw_tgid() { + return true; + } + + // 3. 检查UID、GID是否完全匹配 (euid/suid/uid、gid 都要相同) + let caller_cred = self.cred(); + let tracee_cred = tracee.cred(); + let uid_match = caller_cred.uid == tracee_cred.euid + && caller_cred.uid == tracee_cred.suid + && caller_cred.uid == tracee_cred.uid; + let gid_match = caller_cred.gid == tracee_cred.egid + && caller_cred.gid == tracee_cred.sgid + && caller_cred.gid == tracee_cred.gid; + if uid_match && gid_match && tracee.dumpable() != 0 { + return true; + } + + // 4. 检查CAP_SYS_PTRACE权限 + caller_cred.has_capability(cred::CAPFlags::CAP_SYS_PTRACE) + } + + pub fn ptrace_link(&self, tracer: &Arc) -> Result<(), SystemError> { + if !tracer.has_permission_to_trace(self) { + return Err(SystemError::EPERM); + } + + self.set_tracer(tracer.raw_pid())?; + self.set_parent(tracer)?; + + // 如果 root 进程 attach 一个普通用户进程,该进程必须保持原有权限。 + tracer.ptraced_list.write_irqsave().push(self.raw_pid()); + + Ok(()) + } + + /// 解除 ptrace 跟踪关系 + pub fn ptrace_unlink(&self) -> Result<(), SystemError> { + // 确保当前进程确实被跟踪 + if !self.is_traced() { + return Err(SystemError::EINVAL); + } + + // 1. 从跟踪器的跟踪列表中移除当前进程 + if let Some(tracer) = self.parent_pcb() { + tracer + .ptraced_list + .write_irqsave() + .retain(|&pid| pid != self.raw_pid()); + } + + // 2. 恢复父进程为真实父进程 + // 如果 real_parent 已退出,则过继给 init 进程(pid=1) + let new_parent = self + .real_parent_pcb() + .or_else(|| ProcessManager::find_task_by_vpid(RawPid(1))) + .ok_or(SystemError::ESRCH)?; + self.set_parent(&new_parent)?; + + // 3. 清除 ptrace 标志和 tracer + self.clear_tracer(); + + // 4. 清除 TRAPPING 标志:表示正在停止的同步标志 + self.flags().remove(ProcessFlags::TRAPPING); + + // 5. 检查进程是否需要进入停止状态 + // Linux: 如果组停止有效且子进程未退出,则重新设置 JOBCTL_STOP_PENDING + let is_exiting = self.flags().contains(ProcessFlags::EXITING); + if !is_exiting { + // 获取当前调度状态 + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + let current_state = sched_info.state(); + + match current_state { + // 如果进程处于 TracedStopped 状态 + ProcessState::TracedStopped(_exit_code) => { + // Linux 逻辑:如果 detach 时进程处于 TRACED 状态 + // 需要唤醒它,让它从 ptrace_stop 中返回 + // 唤醒后,进程会根据 injected_signal 决定后续行为 + sched_info.set_state(ProcessState::Runnable); + sched_info.set_wakeup(); + drop(sched_info); + + // 加入运行队列,确保进程能被调度 + if let Some(strong_ref) = self.self_ref.upgrade() { + let rq = crate::sched::cpu_rq( + self.sched_info() + .on_cpu() + .unwrap_or(crate::smp::core::smp_get_processor_id()) + .data() as usize, + ); + let (rq, _guard) = rq.self_lock(); + rq.update_rq_clock(); + rq.activate_task( + &strong_ref, + EnqueueFlag::ENQUEUE_WAKEUP | EnqueueFlag::ENQUEUE_NOCLOCK, + ); + } + } + _ => { + // 其他状态,清除 TRAPPING 标志即可 + drop(sched_info); + } + } + } + Ok(()) + } + + /// 处理PTRACE_TRACEME请求 + pub fn traceme(&self) -> Result { + if self.is_traced() { + return Err(SystemError::EPERM); + } + let parent = self.real_parent_pcb().ok_or(SystemError::ESRCH)?; + self.flags().insert(ProcessFlags::PTRACED); + self.ptrace_link(&parent)?; + Ok(0) + } + + /// 处理PTRACE_ATTACH请求 + pub fn attach(&self, tracer: &Arc) -> Result { + // 验证权限 + let _is_same_process = tracer.raw_pid() == self.raw_pid(); + let is_same_thread_group = tracer.raw_tgid() == self.raw_tgid(); + + if !tracer.has_permission_to_trace(self) + || self.flags().contains(ProcessFlags::KTHREAD) + || is_same_thread_group + { + return Err(SystemError::EPERM); + } + + self.flags().insert(ProcessFlags::PTRACED); + self.ptrace_link(tracer)?; + + // ptrace_attach 发送 SIGSTOP 作为内核信号 + let sig = Signal::SIGSTOP; + let mut info = SigInfo::new( + sig, + 0, + SigCode::Origin(OriginCode::Kernel), + SigType::Kill { + pid: RawPid(0), // 内核发送 + uid: 0, + }, + ); + if let Some(strong_ref) = self.self_ref.upgrade() { + if let Err(e) = sig.send_signal_info_to_pcb(Some(&mut info), strong_ref, PidType::PID) { + // 回滚 ptrace 设置 + self.flags().remove(ProcessFlags::PTRACED); + self.ptrace_unlink()?; + return Err(e); + } + } else { + // 如果 self_ref 升级失败,说明进程正在销毁,回滚 ptrace 设置 + self.flags().remove(ProcessFlags::PTRACED); + self.ptrace_unlink()?; + return Err(SystemError::ESRCH); + } + // PTRACE_ATTACH 发送信号后立即返回 + Ok(0) + } + + /// 处理PTRACE_SEIZE请求 + /// + /// 按照 Linux 6.6.21 的实现: + /// - PTRACE_SEIZE 是 PTRACE_ATTACH 的现代替代品 + /// - 不会发送 SIGSTOP 给 tracee + /// - 设置 PT_SEIZED 标志,影响后续行为(如 Legacy Exec SIGTRAP) + /// - 如果指定了 PTRACE_O_TRACEEXEC 等选项,这些选项会生效 + pub fn seize( + &self, + tracer: &Arc, + options: PtraceOptions, + ) -> Result { + // 验证权限 + let _is_same_process = tracer.raw_pid() == self.raw_pid(); + let is_same_thread_group = tracer.raw_tgid() == self.raw_tgid(); + + if !tracer.has_permission_to_trace(self) + || self.flags().contains(ProcessFlags::KTHREAD) + || is_same_thread_group + { + return Err(SystemError::EPERM); + } + + // 设置 PTRACED 标志 + self.flags().insert(ProcessFlags::PTRACED); + + // 设置 PT_SEIZED 标志,表示使用现代 API 附加 + self.flags().insert(ProcessFlags::PT_SEIZED); + + // 建立 ptrace 关系 + self.ptrace_link(tracer)?; + + // 设置 ptrace 选项 + let mut ptrace_state = self.ptrace_state.lock(); + ptrace_state.options = options; + drop(ptrace_state); + + // PTRACE_SEIZE 不发送 SIGSTOP,直接返回 + Ok(0) + } + + /// 处理PTRACE_DETACH请求 + /// + /// 注意:Linux 不重新发送信号到 pending 队列,只设置 exit_code。 + /// 如果 tracee 在 ptrace_stop 中睡眠,醒来后会读取 exit_code 作为返回值。 + /// 如果 tracee 不在 ptrace_stop 中,设置 exit_code 无效(预期行为)。 + /// + /// 信号处理语义: + /// - signal = None (data=0): 表示不注入信号,子进程继续运行 + /// - signal = Some(sig): 注入指定信号给子进程处理 + pub fn detach(&self, signal: Option) -> Result { + // 验证调用者是跟踪器 + let current_pcb = ProcessManager::current_pcb(); + + if !self.is_traced_by(¤t_pcb) { + return Err(SystemError::EPERM); + } + + let data_signal = match signal { + None => Signal::INVALID, // data=0 表示不注入信号 + Some(sig) => { + if sig == Signal::INVALID { + // 显式指定了无效信号(这种情况在 syscall 层已被过滤) + return Err(SystemError::EIO); + } + sig + } + }; + + let mut ptrace_state = self.ptrace_state.lock(); + ptrace_state.injected_signal = data_signal; + drop(ptrace_state); + + // 解除 ptrace 关系,恢复 real_parent + self.ptrace_unlink()?; + + // 唤醒处于停止状态的进程 + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + match sched_info.state() { + ProcessState::TracedStopped(_) | ProcessState::Stopped(_) => { + // 将状态设置为 Runnable,让进程可以被调度 + sched_info.set_state(ProcessState::Runnable); + sched_info.set_wakeup(); + } + _ => { + // 进程可能已经由于其他原因被唤醒,仍然需要确保 sleep 标志被清除 + sched_info.set_wakeup(); + } + } + drop(sched_info); + + // 加入调度队列 + let rq = crate::sched::cpu_rq( + self.sched_info() + .on_cpu() + .unwrap_or(crate::smp::core::smp_get_processor_id()) + .data() as usize, + ); + let (rq, _guard) = rq.self_lock(); + rq.update_rq_clock(); + let strong_ref = self.self_ref.upgrade().ok_or(SystemError::ESRCH)?; + rq.activate_task( + &strong_ref, + EnqueueFlag::ENQUEUE_WAKEUP | EnqueueFlag::ENQUEUE_NOCLOCK, + ); + rq.check_preempt_currnet(&strong_ref, WakeupFlags::empty()); + + Ok(0) + } + + /// 恢复进程执行 + pub fn ptrace_resume( + &self, + request: PtraceRequest, + signal: Option, + frame: &mut TrapFrame, + ) -> Result { + match request { + PtraceRequest::Syscall => self.flags().insert(ProcessFlags::TRACE_SYSCALL), + PtraceRequest::Singlestep => { + self.flags().insert(ProcessFlags::TRACE_SINGLESTEP); + // 使用架构无关的方式获取指令指针 + let step_addr = frame.break_address(); + kprobe::setup_single_step(frame, step_addr); + } + _ => {} // PTRACE_CONT 不需要特殊标志 + } + + let resume_signal = signal.unwrap_or(Signal::INVALID); + + // 清除停止/阻塞标志 + self.flags().remove(ProcessFlags::STOPPED); + + // 将注入的信号存储到 ptrace_state.injected_signal + let mut ptrace_state = self.ptrace_state.lock(); + ptrace_state.injected_signal = resume_signal; + drop(ptrace_state); + + // 将 TracedStopped 状态的进程设置为 Runnable 并加入运行队列 + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + + match sched_info.state() { + ProcessState::TracedStopped(_) | ProcessState::Stopped(_) => { + sched_info.set_state(ProcessState::Runnable); + sched_info.set_wakeup(); + } + _ => { + sched_info.set_wakeup(); + } + } + drop(sched_info); + + // 加入调度队列(如果不在队列中的话) + if let Some(strong_ref) = self.self_ref.upgrade() { + let rq = self.sched_info.sched_entity().cfs_rq().rq(); + let (rq, _guard) = rq.self_lock(); + rq.enqueue_task( + strong_ref.clone(), + EnqueueFlag::ENQUEUE_RESTORE | EnqueueFlag::ENQUEUE_WAKEUP, + ); + } + + Ok(0) + } + + // 处理PTRACE_SYSCALL请求 + pub fn trace_syscall(&self) -> Result { + // 设置系统调用跟踪标志 + self.flags().insert(ProcessFlags::TRACE_SYSCALL); + self.flags().remove(ProcessFlags::TRACE_SINGLESTEP); + + // 恢复进程运行 + // 进程将在下次系统调用的入口和出口自动停止 + // (在 syscall_handler 中同步调用 ptrace_stop) + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + match sched_info.state() { + ProcessState::TracedStopped(_) | ProcessState::Stopped(_) => { + sched_info.set_state(ProcessState::Runnable); + sched_info.set_wakeup(); + } + _ => { + sched_info.set_wakeup(); + } + } + drop(sched_info); + + // 加入调度队列 + if let Some(strong_ref) = self.self_ref.upgrade() { + let rq = self.sched_info.sched_entity().cfs_rq().rq(); + let (rq, _guard) = rq.self_lock(); + rq.enqueue_task( + strong_ref.clone(), + EnqueueFlag::ENQUEUE_RESTORE | EnqueueFlag::ENQUEUE_WAKEUP, + ); + } + + Ok(0) + } + + /// 处理 PTRACE_GETSIGINFO 请求,获取系统调用信息 + /// + /// # 注意 + /// **此函数当前未完全实现** - 返回的数据可能不正确 + #[allow(dead_code)] + pub fn ptrace_get_syscall_info( + &self, + user_size: usize, + _datavp: usize, // Use a raw byte pointer for flexibility + ) -> Result { + // TODO: 获取实际的trapframe,而不是创建空的 + // let trap_frame = self.task_context(); + let trap_frame = TrapFrame::new(); + let ctx = kprobe::KProbeContext::from(&trap_frame); + let mut info = PtraceSyscallInfo { + op: PtraceSyscallInfoOp::None, + pad: [0; 3], + arch: kprobe::syscall_get_arch(), + instruction_pointer: kprobe::instruction_pointer(&ctx), + stack_pointer: kprobe::user_stack_pointer(&ctx), + data: PtraceSyscallInfoData { + _uninit: MaybeUninit::uninit(), + }, + }; + + let ptrace_state = self.ptrace_state.lock(); + let actual_size = match ptrace_state.stop_reason { + PtraceStopReason::SyscallEntry => { + info.op = PtraceSyscallInfoOp::Entry; + let mut args = [0u64; 6]; + kprobe::syscall_get_arguments(&ctx, &mut args); + info.data.entry = PtraceSyscallInfoEntry { + nr: kprobe::syscall_get_nr(&ctx), + args, + }; + core::mem::size_of::() + } + PtraceStopReason::SyscallExit => { + info.op = PtraceSyscallInfoOp::Exit; + let rval = kprobe::syscall_get_return_value(&ctx); + let is_error = rval >= -4095; // MAX_ERRNO + info.data.exit = PtraceSyscallInfoExit { + rval, + is_error: is_error as u8, + }; + core::mem::size_of::() + } + _ => { + // 如果因为其他原因停止,只返回通用头部信息的大小 + core::mem::offset_of!(PtraceSyscallInfo, data) + } + }; + drop(ptrace_state); + + // 将数据拷贝到用户空间 + let write_size = core::cmp::min(actual_size, user_size); + if write_size > 0 { + // TODO: 实现用户空间数据拷贝 + // 需要使用 UserBufferWriter 将 info 结构体拷贝到 _datavp + let _info_bytes = + unsafe { core::slice::from_raw_parts(&info as *const _ as *const u8, write_size) }; + // datavp.write_bytes(info_bytes)?; + } + + // 无论拷贝多少,都返回内核准备好的完整数据大小 + // 注意:当前返回的大小是正确的,但数据内容是空的(因为使用TrapFrame::new()) + Ok(actual_size as isize) + } + + /// 处理PTRACE_SINGLESTEP请求 + /// # 未实现 + /// - CPU层面的单步执行标志设置(x86_64的EFLAGS.TF位) + #[allow(dead_code)] + pub fn single_step(&self) -> Result { + // 设置单步执行标志 + self.flags().insert(ProcessFlags::TRACE_SINGLESTEP); + self.flags().remove(ProcessFlags::TRACE_SYSCALL); + + // TODO: 在CPU层面启用单步执行 + // 需要设置x86_64的EFLAGS.TF (Trap Flag) 位 + // 参考: Linux arch/x86/kernel/ptrace.c::user_enable_single_step() + // if let Some(context) = self.context_mut() { + // context.enable_single_step(); + // } + + // 恢复进程运行 + let mut sched_info = self.sched_info.inner_lock_write_irqsave(); + match sched_info.state() { + ProcessState::TracedStopped(_) | ProcessState::Stopped(_) => { + sched_info.set_state(ProcessState::Runnable); + sched_info.set_wakeup(); + } + _ => { + sched_info.set_wakeup(); + } + } + drop(sched_info); + + // 加入调度队列 + if let Some(strong_ref) = self.self_ref.upgrade() { + let rq = self.sched_info.sched_entity().cfs_rq().rq(); + let (rq, _guard) = rq.self_lock(); + rq.enqueue_task( + strong_ref.clone(), + EnqueueFlag::ENQUEUE_RESTORE | EnqueueFlag::ENQUEUE_WAKEUP, + ); + } + + Ok(0) + } + + /// 启用单步执行功能 + /// # TODO + /// 需要实现架构特定的CPU标志设置: + /// - **x86_64**: 设置 EFLAGS.TF (Trap Flag, bit 8) + /// - **RISC-V**: 设置 sstatus.SSTEP + /// - **ARM64**: 设置 MDSCR_EL1.SS + /// + /// 参考: + /// - https://code.dragonos.org.cn/xref/linux-6.6.21/arch/x86/kernel/step.c#217 + #[allow(dead_code)] + pub fn enable_single_step(&self) { + unimplemented!() + } + + /// 启用系统调用跟踪 + pub fn enable_syscall_tracing(&self) { + self.flags().insert(ProcessFlags::TRACE_SYSCALL); + } + + /// 在系统调用入口处调用 + #[allow(dead_code)] + pub fn on_syscall_entry(&self, _num: usize, _args: &[usize]) { + // TODO: 记录系统调用入口信息 + } + + /// 在系统调用出口处调用 + #[allow(dead_code)] + pub fn on_syscall_exit(&self, _result: isize) { + // TODO: 记录系统调用出口信息 + } + + /// 处理 PTRACE_PEEKUSER 请求 + /// 在Linux中,此函数读取 tracee 的 "USER" 区域数据,主要包含: + /// - 寄存器值(通过偏移量访问) + /// - 特殊值如调试寄存器 + #[allow(dead_code)] + pub fn peek_user(&self, _addr: usize) -> Result { + // 未实现注释掉的代码: + // // 验证地址是否在用户空间范围内 + // if !self.memory.is_user_address(addr) { + // return Err(SystemError::EFAULT); + // } + // // 使用正确的寄存器偏移量 + // let offset = syscall_number_offset(); + // let reg_addr = offset * core::mem::size_of::(); + // // 确保访问的是寄存器区域 + // if addr != reg_addr { + // return Err(SystemError::EFAULT); + // } + // // 获取当前线程的寄存器值 + // let thread = self.current_thread().ok_or(SystemError::ESRCH)?; + // let regs = thread.get_registers(); + // // 返回系统调用号 + // Ok(regs.syscall_number() as isize) + Ok(0) + } + + /// 设置PTRACE选项 + pub fn set_ptrace_options(&self, options: PtraceOptions) -> Result<(), SystemError> { + let mut state = self.ptrace_state.lock(); + state.options = options; + Ok(()) + } + + /// 清空待处理信号 + #[allow(dead_code)] + pub fn clear_ptrace(&self) { + let mut ptrace_state = self.ptrace_state.lock(); + + // 清除跟踪关系 + ptrace_state.tracer = None; + // ptrace_state.siginfo = None; + ptrace_state.pending_signals = Vec::new(); + // ptrace_state.signal_queue.clear(); + + // 清除标志位 + self.flags().remove( + ProcessFlags::PTRACED | ProcessFlags::TRACE_SYSCALL | ProcessFlags::TRACE_SINGLESTEP, + ); + } +} diff --git a/kernel/src/process/rseq.rs b/kernel/src/process/rseq.rs index b1b8285bbd..46ec673fd6 100644 --- a/kernel/src/process/rseq.rs +++ b/kernel/src/process/rseq.rs @@ -17,9 +17,11 @@ use core::sync::atomic::{AtomicU32, Ordering}; use system_error::SystemError; use crate::{ - arch::cpu::current_cpu_id, - mm::VirtAddr, + arch::{cpu::current_cpu_id, ipc::signal::Signal, MMArch}, + ipc::kill::send_signal_to_pcb, + mm::{MemoryManagementArch, VirtAddr}, process::{ProcessControlBlock, ProcessFlags, ProcessManager}, + syscall::user_access::{copy_from_user_protected, copy_to_user_protected}, }; // ============================================================================ @@ -205,7 +207,7 @@ impl UserRseqAccess { /// 读取 u32 值 unsafe fn read_u32(&self, offset: usize) -> Result { let mut bytes = [0u8; 4]; - crate::syscall::user_access::copy_from_user_protected(&mut bytes, self.base + offset) + copy_from_user_protected(&mut bytes, self.base + offset) .map_err(|_| RseqError::UserAccessFault)?; Ok(u32::from_ne_bytes(bytes)) } @@ -213,28 +215,22 @@ impl UserRseqAccess { /// 读取 u64 值 unsafe fn read_u64(&self, offset: usize) -> Result { let mut bytes = [0u8; 8]; - crate::syscall::user_access::copy_from_user_protected(&mut bytes, self.base + offset) + copy_from_user_protected(&mut bytes, self.base + offset) .map_err(|_| RseqError::UserAccessFault)?; Ok(u64::from_ne_bytes(bytes)) } /// 写入 u32 值 unsafe fn write_u32(&self, offset: usize, value: u32) -> Result<(), RseqError> { - crate::syscall::user_access::copy_to_user_protected( - self.base + offset, - &value.to_ne_bytes(), - ) - .map_err(|_| RseqError::UserAccessFault)?; + copy_to_user_protected(self.base + offset, &value.to_ne_bytes()) + .map_err(|_| RseqError::UserAccessFault)?; Ok(()) } /// 写入 u64 值 unsafe fn write_u64(&self, offset: usize, value: u64) -> Result<(), RseqError> { - crate::syscall::user_access::copy_to_user_protected( - self.base + offset, - &value.to_ne_bytes(), - ) - .map_err(|_| RseqError::UserAccessFault)?; + copy_to_user_protected(self.base + offset, &value.to_ne_bytes()) + .map_err(|_| RseqError::UserAccessFault)?; Ok(()) } @@ -271,7 +267,7 @@ impl UserRseqAccess { } let sig_addr = VirtAddr::new((rseq_cs.abort_ip - 4) as usize); let mut sig_bytes = [0u8; 4]; - crate::syscall::user_access::copy_from_user_protected(&mut sig_bytes, sig_addr) + copy_from_user_protected(&mut sig_bytes, sig_addr) .map_err(|_| RseqError::UserAccessFault)?; let read_sig = u32::from_ne_bytes(sig_bytes); @@ -385,6 +381,24 @@ impl RseqState { self.registration.as_ref() } + /// 获取当前 rseq_cs(从用户内存读取) + /// 用于 rseq_syscall_check:检查是否在 rseq 临界区内发起了系统调用 + /// + /// # Safety + /// + /// 调用者必须确保用户内存有效 + pub unsafe fn get_rseq_cs(&self) -> Option<(RseqCs, u32)> { + let reg = self.registration.as_ref()?; + let access = UserRseqAccess::new(reg.ptr); + let user_end = MMArch::USER_END_VADDR.data(); + + // 读取 rseq_cs,忽略签名验证(因为在 syscall 路径中我们已经注册过) + match access.read_rseq_cs(reg.sig, user_end) { + Ok(Some(cs)) => Some((cs, reg.sig)), + _ => None, + } + } + /// 设置事件掩码(原子操作) #[inline] pub fn set_event(&self, event: RseqEventMask) { @@ -451,8 +465,6 @@ impl Rseq { rseq_len: u32, sig: u32, ) -> Result { - use crate::{arch::MMArch, mm::MemoryManagementArch}; - let mut rseq_state = pcb.rseq_state_mut(); // 检查是否已注册 @@ -546,8 +558,6 @@ impl Rseq { /// /// 在返回用户态前调用,执行 IP 修正和 cpu_id 更新 pub fn handle_notify_resume(frame: Option<&mut F>) -> Result<(), ()> { - use crate::{arch::MMArch, mm::MemoryManagementArch}; - let pcb = ProcessManager::current_pcb(); // 如果进程正在退出,直接返回 @@ -685,12 +695,11 @@ impl Rseq { /// 在信号递送时调用 pub fn on_signal(frame: &mut F) { - use crate::arch::ipc::signal::Signal; let pcb = ProcessManager::current_pcb(); if pcb.rseq_state().is_registered() { pcb.rseq_state().set_event(RseqEventMask::SIGNAL); if Self::handle_notify_resume(Some(frame)).is_err() { - let _ = crate::ipc::kill::send_signal_to_pcb(pcb.clone(), Signal::SIGSEGV); + let _ = send_signal_to_pcb(pcb.clone(), Signal::SIGSEGV); } } } @@ -704,6 +713,22 @@ impl Rseq { pcb.flags().insert(ProcessFlags::NEED_RSEQ); } } + + /// 系统调用退出时的 rseq 检查 + /// **注意**: Linux 的 rseq_syscall 仅在 CONFIG_DEBUG_RSEQ 启用时编译, + /// 用于调试目的,检测在 rseq 临界区内发起系统调用的违规行为。 + /// + /// 在生产环境中,此函数应为空操作。rseq 的正确性依赖于: + /// 此函数目前为空操作,与 Linux 生产内核行为一致。 + /// + /// # Safety + /// + /// 调用者必须保证 frame 指向有效的 TrapFrame + #[inline] + pub unsafe fn rseq_syscall_check(_frame: &F) { + // 生产环境:空操作,与 Linux 生产内核一致 + // 若需启用调试检查,应编译时启用 DEBUG_RSEQ 特性标志 + } } // ============================================================================ diff --git a/kernel/src/process/syscall/mod.rs b/kernel/src/process/syscall/mod.rs index 8752e64ea3..43182e86b0 100644 --- a/kernel/src/process/syscall/mod.rs +++ b/kernel/src/process/syscall/mod.rs @@ -24,6 +24,7 @@ mod sys_init_module; mod sys_pidfdopen; mod sys_prctl; pub mod sys_prlimit64; +mod sys_ptrace; mod sys_rseq; mod sys_set_tid_address; mod sys_setdomainname; diff --git a/kernel/src/process/syscall/sys_ptrace.rs b/kernel/src/process/syscall/sys_ptrace.rs new file mode 100644 index 0000000000..aa70f3a0fb --- /dev/null +++ b/kernel/src/process/syscall/sys_ptrace.rs @@ -0,0 +1,525 @@ +use crate::{ + arch::{ + interrupt::{TrapFrame, UserRegsStruct}, + ipc::signal::Signal, + syscall::nr::{SYS_EXIT, SYS_PTRACE}, + MMArch, + }, + ipc::signal_types::PosixSigInfo, + mm::{MemoryManagementArch, PhysAddr, VirtAddr}, + process::{ + syscall::sys_exit::SysExit, ProcessControlBlock, ProcessFlags, ProcessManager, + ProcessState, PtraceOptions, PtraceRequest, RawPid, + }, + syscall::{ + table::{FormattedSyscallParam, Syscall}, + user_access::UserBufferWriter, + }, +}; +use alloc::sync::Arc; +use alloc::vec::Vec; +use system_error::SystemError; + +impl TryFrom for PtraceRequest { + type Error = SystemError; + + fn try_from(value: usize) -> Result { + match value { + 0 => Ok(PtraceRequest::Traceme), + 2 => Ok(PtraceRequest::Peekdata), + 5 => Ok(PtraceRequest::Pokedata), + 7 => Ok(PtraceRequest::Cont), + 9 => Ok(PtraceRequest::Singlestep), + 12 => Ok(PtraceRequest::Getregs), + 13 => Ok(PtraceRequest::Setregs), + 16 => Ok(PtraceRequest::Attach), + 17 => Ok(PtraceRequest::Detach), + 24 => Ok(PtraceRequest::Syscall), + 0x4200 => Ok(PtraceRequest::Setoptions), + 0x4202 => Ok(PtraceRequest::Getsiginfo), + 0x4206 => Ok(PtraceRequest::Seize), + _ => Err(SystemError::EINVAL), + } + } +} + +/// ptrace 内存访问辅助函数 +/// +/// 按照 Linux 6.6 的 ptrace_access_vm 模式实现,但不使用页表切换: +/// - 直接将 tracee 的虚拟地址翻译为物理地址 +/// - 通过 phys_2_virt 映射到内核虚拟地址空间 +/// - 使用异常表保护的拷贝函数,安全处理缺页异常 +/// - **不关闭中断**,避免中断禁用期间缺页导致的死锁 +/// +/// # Safety +/// 调用者必须确保 tracee 在访问期间不会被销毁 +#[allow(dead_code)] +fn ptrace_access_vm(tracee: &Arc, f: F) -> Result +where + F: FnOnce() -> Result, +{ + // 获取目标进程的地址空间 + let tracee_vm = tracee.basic().user_vm().ok_or(SystemError::ESRCH)?; + + // 获取目标进程的地址空间锁,但不切换页表 + // 只需要在地址空间读锁保护下执行操作 + let _tracee_vm_guard = tracee_vm.read(); + + // 在目标进程的地址空间读锁保护中执行操作 + f() +} + +/// 从 tracee 的用户空间读取数据(安全版本) +/// +/// 使用物理地址翻译避免页表切换,不关闭中断。 +/// 参考 process_vm_readv 的实现方式。 +fn ptrace_peek_data(tracee: &Arc, addr: usize) -> Result { + let tracee_vm = tracee.basic().user_vm().ok_or(SystemError::ESRCH)?; + let tracee_vm_guard = tracee_vm.read(); + + let tracee_addr = VirtAddr::new(addr); + + // 检查地址是否在 tracee 的地址空间中 + if tracee_vm_guard.mappings.contains(tracee_addr).is_none() { + return Err(SystemError::EIO); + } + + // 计算页内偏移 + let page_offset = addr & (MMArch::PAGE_SIZE - 1); + + // 翻译 tracee 的虚拟地址为物理地址 + let tracee_phys = match tracee_vm_guard.user_mapper.utable.translate(tracee_addr) { + Some((phys_frame, _)) => PhysAddr::new(phys_frame.data() + page_offset), + None => return Err(SystemError::EIO), + }; + drop(tracee_vm_guard); + + // 使用异常表保护的拷贝 + let mut value: u64 = 0; + unsafe { + // 将物理地址映射为内核虚拟地址 + let kernel_virt = MMArch::phys_2_virt(tracee_phys).ok_or(SystemError::EIO)?; + + let src_ptr = kernel_virt.data() as *const u8; + let dst_ptr = &mut value as *mut u64 as *mut u8; + let result = MMArch::copy_with_exception_table(dst_ptr, src_ptr, 8); + if result != 0 { + return Err(SystemError::EIO); + } + } + + Ok(value as isize) +} + +/// 向 tracee 的用户空间写入数据(安全版本) +/// +/// 使用物理地址翻译避免页表切换,不关闭中断。 +/// 参考 process_vm_writev 的实现方式。 +fn ptrace_poke_data( + tracee: &Arc, + addr: usize, + data: usize, +) -> Result { + let tracee_vm = tracee.basic().user_vm().ok_or(SystemError::ESRCH)?; + let tracee_vm_guard = tracee_vm.read(); + + let tracee_addr = VirtAddr::new(addr); + + // 检查地址是否在 tracee 的地址空间中 + if tracee_vm_guard.mappings.contains(tracee_addr).is_none() { + return Err(SystemError::EIO); + } + + // 计算页内偏移 + let page_offset = addr & (MMArch::PAGE_SIZE - 1); + + // 翻译 tracee 的虚拟地址为物理地址 + let tracee_phys = match tracee_vm_guard.user_mapper.utable.translate(tracee_addr) { + Some((phys_frame, _)) => PhysAddr::new(phys_frame.data() + page_offset), + None => return Err(SystemError::EIO), + }; + drop(tracee_vm_guard); + + // 使用异常表保护的拷贝 + let value: u64 = data as u64; + unsafe { + // 将物理地址映射为内核虚拟地址 + let kernel_virt = MMArch::phys_2_virt(tracee_phys).ok_or(SystemError::EIO)?; + + let src_ptr = &value as *const u64 as *const u8; + let dst_ptr = kernel_virt.data() as *mut u8; + let result = MMArch::copy_with_exception_table(dst_ptr, src_ptr, 8); + if result != 0 { + return Err(SystemError::EIO); + } + } + + Ok(0) +} + +/// ptrace 系统调用实现 +pub struct SysPtrace; + +impl SysPtrace { + fn request(args: &[usize]) -> Result { + PtraceRequest::try_from(args[0]).map_err(|_| SystemError::EINVAL) + } + + fn pid(args: &[usize]) -> RawPid { + RawPid(args[1]) + } + + fn addr(args: &[usize]) -> usize { + args[2] + } + + fn data(args: &[usize]) -> usize { + args[3] + } + + /// 处理 PTRACE_TRACEME 请求(当前进程请求被跟踪) + fn handle_traceme(tracer: &Arc) -> Result { + tracer.traceme() + } + + /// 处理 PTRACE_ATTACH 请求(附加到目标进程) + fn handle_attach( + tracer: &Arc, + tracee_pid: RawPid, + ) -> Result { + let tracee = ProcessManager::find(tracee_pid).ok_or(SystemError::ESRCH)?; + tracee.attach(tracer) + } + + /// 处理 PTRACE_SEIZE 请求(现代附加 API) + /// + /// 按照 Linux 6.6.21 实现: + /// - 不发送 SIGSTOP 给 tracee + /// - addr 参数包含 ptrace 选项 + /// - data 参数通常为 0 + fn handle_seize( + tracer: &Arc, + tracee_pid: RawPid, + addr: usize, + ) -> Result { + let tracee = ProcessManager::find(tracee_pid).ok_or(SystemError::ESRCH)?; + // addr 参数包含 ptrace 选项 + let options = PtraceOptions::from_bits_truncate(addr); + tracee.seize(tracer, options) + } + + /// 处理 PTRACE_DETACH 请求(分离目标进程) + fn handle_detach( + tracee: &Arc, + signal: Option, + ) -> Result { + // 验证调用者是跟踪器 + let tracer_pid = ProcessManager::current_pcb().raw_pid(); + let tracee_tracer = tracee.tracer().ok_or(SystemError::ESRCH)?; + if tracer_pid != tracee_tracer { + return Err(SystemError::EPERM); + } + tracee.detach(signal) + } + + /// 处理 PTRACE_SYSCALL 请求(在系统调用入口和出口暂停) + #[allow(dead_code)] + fn handle_syscall(tracee: &Arc) -> Result { + // 检查调用者是否是该进程的跟踪器 + let tracer_pid = ProcessManager::current_pcb().raw_pid(); + let tracee_tracer = tracee.tracer().ok_or(SystemError::ESRCH)?; + if tracer_pid != tracee_tracer { + return Err(SystemError::ESRCH); + } + // 设置系统调用跟踪标志 + tracee.enable_syscall_tracing(); + tracee.trace_syscall() + } + + /// 处理 PTRACE_SETOPTIONS 请求(设置跟踪选项) + #[allow(dead_code)] + fn handle_set_options( + tracee: &Arc, + data: usize, + ) -> Result { + let options = PtraceOptions::from_bits_truncate(data); + // 设置跟踪选项 + tracee.set_ptrace_options(options)?; + + Ok(0) + } + + /// 处理 PTRACE_GETSIGINFO 请求(获取信号信息) + #[allow(dead_code)] + fn handle_get_siginfo( + tracee: &Arc, + data: usize, + ) -> Result { + // 读取 last_siginfo 并拷贝到用户空间 + let siginfo = tracee + .ptrace_state + .lock() + .last_siginfo() + .ok_or(SystemError::EINVAL)?; + + // 将 siginfo 转换为 PosixSigInfo 格式并拷贝到用户空间 + let uinfo = data as *mut PosixSigInfo; + siginfo.copy_posix_siginfo_to_user(uinfo)?; + log::debug!("PTRACE_GETSIGINFO: siginfo={:?}", siginfo); + Ok(0) + } + + /// 处理 PTRACE_PEEKUSER 请求 + fn handle_peek_user( + tracee: &Arc, + addr: usize, + ) -> Result { + let value = tracee.peek_user(addr)?; + Ok(value) + } + + /// 处理 PTRACE_PEEKDATA 请求(读取进程内存) + /// + /// 使用安全的物理地址翻译方式访问目标进程地址空间: + /// - 不进行页表切换 + /// - 不关闭中断 + /// - 使用异常表保护安全处理缺页 + fn handle_peek_data( + tracee: &Arc, + addr: usize, + ) -> Result { + ptrace_peek_data(tracee, addr) + } + + /// 处理 PTRACE_POKEDATA 请求(写入进程内存) + /// + /// 使用安全的物理地址翻译方式访问目标进程地址空间: + /// - 不进行页表切换 + /// - 不关闭中断 + /// - 使用异常表保护安全处理缺页 + fn handle_poke_data( + tracee: &Arc, + addr: usize, + data: usize, + ) -> Result { + ptrace_poke_data(tracee, addr, data) + } + + /// 处理 PTRACE_SINGLESTEP 请求 (单步执行) + #[allow(dead_code)] + fn handle_single_step(tracee: &Arc) -> Result { + // 检查调用者是否是该进程的跟踪器 + let tracer_pid = ProcessManager::current_pcb().raw_pid(); + let tracee_tracer = tracee.tracer().ok_or(SystemError::ESRCH)?; + if tracer_pid != tracee_tracer { + return Err(SystemError::ESRCH); + } + // 设置 EFLAGS 的 TF 标志 + tracee.enable_single_step(); + // 恢复进程运行 + let mut sched_info = tracee.sched_info.inner_lock_write_irqsave(); + if let ProcessState::Stopped(_signal) = sched_info.state() { + sched_info.set_state(ProcessState::Runnable); + } + Ok(0) + } + + /// 处理 PTRACE_GETREGS 请求 (获取寄存器值) + fn handle_get_regs( + tracee: &Arc, + data: usize, + ) -> Result { + // 获取 tracee 的 TrapFrame + // TrapFrame 位于内核栈顶部:kernel_stack.max_address - size_of::() + let kstack = tracee.kernel_stack(); + let trap_frame_vaddr = + VirtAddr::new(kstack.stack_max_address().data() - core::mem::size_of::()); + + // 从 tracee 的内核栈读取 TrapFrame + let trap_frame = unsafe { &*(trap_frame_vaddr.data() as *const TrapFrame) }; + + #[cfg(target_arch = "x86_64")] + let user_regs = { + // 获取 fs_base、gs_base 和段选择器 + let arch_info = tracee.arch_info_irqsave(); + let fs_base = arch_info.fsbase() as u64; + let gs_base = arch_info.gsbase() as u64; + let fs = arch_info.fs() as u64; + let gs = arch_info.gs() as u64; + drop(arch_info); + // 使用 UserRegsStruct::from_trap_frame 构造用户态寄存器结构体 + UserRegsStruct::from_trap_frame(trap_frame, fs_base, gs_base, fs, gs) + }; + #[cfg(not(target_arch = "x86_64"))] + let user_regs = { UserRegsStruct::from_trap_frame(trap_frame) }; + + // 拷贝到用户空间 + let mut writer = UserBufferWriter::new( + data as *mut u8, + core::mem::size_of::(), + true, + )?; + writer.copy_one_to_user(&user_regs, 0)?; + + Ok(0) + } + + /// 处理 PTRACE_SETREGS 请求 (设置寄存器值) + fn handle_set_regs( + _tracee: &Arc, + _data: usize, + ) -> Result { + // 从用户空间复制寄存器结构体 + Ok(0) + } + + // 在系统调用处理之前 + #[allow(dead_code)] + fn before_handle_syscall(num: usize, args: &[usize]) { + let current = ProcessManager::current_pcb(); + // 检查进程是否被跟踪并且启用了系统调用跟踪 + if current + .flags() + .contains(ProcessFlags::PTRACED | ProcessFlags::TRACE_SYSCALL) + { + // 保存系统调用信息 + current.on_syscall_entry(num, args); + // 暂停进程等待跟踪器 + current.set_state(ProcessState::Stopped(1)); + // Scheduler::schedule(SchedMode::SM_NONE); // 切换到其他进程 + } + } + + // 在系统调用处理之后 + #[allow(dead_code)] + fn after_handle_syscall(_num: usize, result: isize) { + let current = ProcessManager::current_pcb(); + // 检查进程是否被跟踪并且启用了系统调用跟踪 + if current + .flags() + .contains(ProcessFlags::PTRACED | ProcessFlags::TRACE_SYSCALL) + { + // 保存系统调用结果 + current.on_syscall_exit(result); + // 暂停进程等待跟踪器 + current.set_state(ProcessState::Stopped(1)); + // Scheduler::schedule(SchedMode::SM_NONE); // 切换到其他进程 + } + } + + // 在系统调用分发函数中 + #[allow(dead_code)] + fn dispatch_syscall( + num: usize, + args: &[usize], + frame: &mut TrapFrame, + ) -> Result { + Self::before_handle_syscall(num, args); + + // 执行实际的系统调用处理 + let result = match num { + SYS_EXIT => SysExit.handle(args, frame)?, + // ... 其他系统调用 ... + _ => Err(SystemError::ENOSYS)?, + }; + + Self::after_handle_syscall(num, result as isize); + Ok(result) + } + + #[allow(dead_code)] + fn ptrace_check_attach( + tracee: &Arc, + _request: PtraceRequest, + ) -> Result<(), SystemError> { + let current = ProcessManager::current_pcb(); + + if !tracee.is_traced_by(¤t) { + return Err(SystemError::EPERM); + } + match tracee.sched_info().inner_lock_read_irqsave().state() { + ProcessState::Stopped(_) | ProcessState::TracedStopped(_) => Ok(()), + _ => Err(SystemError::ESRCH), + } + } +} + +impl Syscall for SysPtrace { + fn num_args(&self) -> usize { + 4 + } + + fn handle(&self, args: &[usize], frame: &mut TrapFrame) -> Result { + if args.len() < 4 { + return Err(SystemError::EINVAL); + } + + let request = Self::request(args)?; + let pid = Self::pid(args); + let addr = Self::addr(args); + let data = Self::data(args); + + let tracer = ProcessManager::current_pcb(); + if request == PtraceRequest::Traceme { + return Self::handle_traceme(&tracer).map(|r| r as usize); + } + let tracee: Arc = + ProcessManager::find(pid).ok_or(SystemError::ESRCH)?; + let signal: Option = if data == 0 { + None // 表示无信号 + } else { + Some(Signal::from(data as i32)) + }; + + let result: isize = match request { + // 读取进程内存 + PtraceRequest::Peekdata => Self::handle_peek_data(&tracee, addr)?, + // 读取用户寄存器 + PtraceRequest::Peekuser => Self::handle_peek_user(&tracee, addr)?, + // 写入进程内存 + PtraceRequest::Pokedata => Self::handle_poke_data(&tracee, addr, data)?, + // 继续执行目标进程 + PtraceRequest::Cont | PtraceRequest::Singlestep | PtraceRequest::Syscall => { + tracee.ptrace_resume(request, signal, frame)? + } + // 获取寄存器值 + PtraceRequest::Getregs => Self::handle_get_regs(&tracee, data)?, + // 设置寄存器值 + PtraceRequest::Setregs => Self::handle_set_regs(&tracee, data)?, + // 附加到目标进程 + PtraceRequest::Attach => Self::handle_attach(&tracer, pid)?, + // 分离目标进程 + PtraceRequest::Detach => Self::handle_detach(&tracee, signal)?, + // 设置跟踪选项 + PtraceRequest::Setoptions => Self::handle_set_options(&tracee, data)?, + // 获取信号信息 + PtraceRequest::Getsiginfo => Self::handle_get_siginfo(&tracee, data)?, + // PTRACE_SEIZE:现代 API,不发送 SIGSTOP + PtraceRequest::Seize => Self::handle_seize(&tracer, pid, addr)?, + // 其他请求类型 + _ => { + log::warn!("Unimplemented ptrace request: {:?}", request); + 0 + } + }; + + Ok(result as usize) + } + + fn entry_format(&self, args: &[usize]) -> Vec { + let request_name = match PtraceRequest::try_from(args[0]) { + Ok(req) => format!("{:?}", req), + Err(_) => format!("{:#x}", args[0]), + }; + + vec![ + FormattedSyscallParam::new("request", request_name), + FormattedSyscallParam::new("pid", format!("{}", args[1])), + FormattedSyscallParam::new("addr", format!("{:#x}", args[2])), + FormattedSyscallParam::new("data", format!("{:#x}", args[3])), + ] + } +} + +// 注册系统调用 +syscall_table_macros::declare_syscall!(SYS_PTRACE, SysPtrace); diff --git a/kernel/src/process/timer.rs b/kernel/src/process/timer.rs index f6e186e58e..b502bd18f1 100644 --- a/kernel/src/process/timer.rs +++ b/kernel/src/process/timer.rs @@ -4,7 +4,10 @@ use crate::process::CurrentIrqArch; use crate::process::RawPid; use crate::process::SigInfo; use crate::time::timer::{clock, Jiffies, Timer, TimerFunction}; -use crate::{arch::ipc::signal::Signal, ipc::signal_types::SigCode}; +use crate::{ + arch::ipc::signal::Signal, + ipc::signal_types::{OriginCode, SigCode}, +}; use alloc::{boxed::Box, sync::Arc}; use core::sync::atomic::compiler_fence; use core::time::Duration; @@ -130,7 +133,12 @@ impl TimerFunction for AlarmTimerFunc { fn run(&mut self) -> Result<(), SystemError> { let sig = Signal::SIGALRM; // 初始化signal info - let mut info = SigInfo::new(sig, 0, SigCode::Timer, SigType::Alarm(self.pid)); + let mut info = SigInfo::new( + sig, + 0, + SigCode::Origin(OriginCode::Timer), + SigType::Alarm(self.pid), + ); compiler_fence(core::sync::atomic::Ordering::SeqCst); let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; diff --git a/user/apps/c_unitest/test_ptrace.c b/user/apps/c_unitest/test_ptrace.c new file mode 100644 index 0000000000..7c77f11644 --- /dev/null +++ b/user/apps/c_unitest/test_ptrace.c @@ -0,0 +1,291 @@ +/* +# test_ptrace.c测试在Linux下的行为 +=== Testing PTRACE_TRACEME === +Child ready for tracing +Child stopped by signal 19 (Stopped (signal)) +Child exited with status 0 +=== Testing PTRACE_ATTACH/DETACH === +target process 100 waiting... +Tracer attaching to target 100 +target stopped by signal 19 (Stopped (signal)) +Tracer detaching from target +target received 18 (Continued) +target exited with status 0 +=== Testing PTRACE_SYSCALL === +Child initial stop by signal 19 (Stopped (signal)) +Syscall entry detected: nr=39 +Syscall exit detected: nr=39 +Child called getpid() +Child exited normally +=== Testing PTRACE_PEEKDATA === +Child: msg_addr=0x49b643, heap_addr=0x23339c80, heap_val=0x66ccff +Parent: msg_addr=0x49b643, heap_addr=0x23339c80 +Read message: PTRACE_PEEKDATA_testing +Original heap value: 0x66ccff +Modified heap value: 0xee0000 +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// 根据CPU架构定义系统调用号位置 +#if defined(__x86_64__) || defined(_M_X64) +#define ORIG_RAX 15 // ORIG_RAX在user_regs_struct中的偏移 + +#elif defined(__aarch64__) || defined(_M_ARM64) +#define ORIG_RAX 8 // ARM64上系统调用号在regs[8] + +#elif defined(__riscv) || defined(__riscv__) +#define ORIG_RAX 0 // RISC-V上系统调用号在a7寄存器 + +#else +#error "Unsupported architecture for PTRACE_SYSCALL test" +#endif + +#define CHK_SYSCALL(call) \ + do { \ + if ((call) == -1) { \ + fprintf(stderr, "Error at %s:%d: %s failed: %s\n", __FILE__, __LINE__, #call, strerror(errno)); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +void sigcont_handler(int sig) { + printf("target received %d (%s)\n", sig, strsignal(sig)); + exit(EXIT_SUCCESS); +} + +// 测试 PTRACE_TRACEME 功能 +void test_trace_me() { + printf("=== Testing PTRACE_TRACEME ===\n"); + pid_t child = fork(); + if (child == 0) { + // 子进程请求被跟踪 + CHK_SYSCALL(ptrace(PTRACE_TRACEME, 0, NULL, NULL)); + // 强制产生一个信号/系统调用事件 + printf("Child ready for tracing\n"); + getpid(); + raise(SIGSTOP); + // 正常退出 + exit(EXIT_SUCCESS); + } else { + // 等待子进程停止 + int status; + CHK_SYSCALL(waitpid(child, &status, 0)); + if (WIFSTOPPED(status)) { + int sig = WSTOPSIG(status); + printf("Child stopped by signal %d (%s)\n", sig, strsignal(sig)); + // // 获取停止原因 + // long request = ptrace(PTRACE_PEEKUSER, child, (void*)ORIG_RAX, NULL); + // printf("System call: %ld\n", request); + // 恢复子进程执行 + CHK_SYSCALL(ptrace(PTRACE_CONT, child, NULL, NULL)); + // 等待子进程退出 + CHK_SYSCALL(waitpid(child, &status, 0)); + + if (WIFEXITED(status)) { + printf("Child exited with status %d\n", WEXITSTATUS(status)); + } else { + printf("Child did not exit normally (status=%d)\n", status); + } + } else if (WIFEXITED(status)) { + printf("Child exited without stopping (status=%d)\n", WEXITSTATUS(status)); + } else { + printf("Child did not stop as expected (status=%d)\n", status); + } + } +} + +// 测试 PTRACE_ATTACH/DETACH 功能 +void test_attach_detach() { + printf("=== Testing PTRACE_ATTACH/DETACH ===\n"); + pid_t target = fork(); + if (target == 0) { + // 目标进程暂停自己 + printf("target process %d waiting...\n", getpid()); + // 确保分离后有信号处理 + if (signal(SIGCONT, sigcont_handler) == SIG_ERR) { + perror("Error setting SIGCONT handler"); + exit(EXIT_FAILURE); + } + sleep(10); + // pause(); // 等待信号 + // 永远不会到达这里 + printf("target process resumed\n"); + exit(EXIT_FAILURE); + } else { + // 给目标进程时间进入pause状态 + sleep(1); + printf("Tracer attaching to target %d\n", target); + // 父进程附加到目标进程 + CHK_SYSCALL(ptrace(PTRACE_ATTACH, target, NULL, NULL)); + // 等待目标进程停止 + int status; + CHK_SYSCALL(waitpid(target, &status, 0)); + + if (WIFSTOPPED(status)) { + int sig = WSTOPSIG(status); + printf("target stopped by signal %d (%s)\n", sig, strsignal(sig)); + // 分离目标进程并发送SIGCONT唤醒它 + printf("Tracer detaching from target\n"); + CHK_SYSCALL(ptrace(PTRACE_DETACH, target, NULL, (void*)(long)SIGCONT)); + // 等待目标进程退出 + CHK_SYSCALL(waitpid(target, &status, 0)); + if (WIFEXITED(status)) { + printf("target exited with status %d\n", WEXITSTATUS(status)); + } else { + printf("target did not exit normally (status=%d)\n", status); + } + } else { + printf("target did not stop as expected (status=%d)\n", status); + } + } +} + +// 测试 PTRACE_SYSCALL 功能 +void test_syscall_tracing() { + printf("=== Testing PTRACE_SYSCALL ===\n"); + pid_t child = fork(); + if (child == 0) { + // 子进程请求被跟踪 + CHK_SYSCALL(ptrace(PTRACE_TRACEME, 0, NULL, NULL)); + // 触发系统调用 + raise(SIGSTOP); + printf("Child calling getpid()\n"); + getpid(); + exit(EXIT_SUCCESS); + } else { + // 等待子进程第一次停止 + int status; + CHK_SYSCALL(waitpid(child, &status, 0)); + + if (!WIFSTOPPED(status)) { + printf("Child did not stop as expected (status=%d)\n", status); + return; + } + printf("Child initial stop by signal %d\n", WSTOPSIG(status)); + // 启用系统调用跟踪 + CHK_SYSCALL(ptrace(PTRACE_SYSCALL, child, NULL, NULL)); + // 等待系统调用入口事件 + CHK_SYSCALL(waitpid(child, &status, 0)); + + if (WIFSTOPPED(status)) { + printf("Syscall entry detected\n"); + // 继续执行 + CHK_SYSCALL(ptrace(PTRACE_SYSCALL, child, NULL, NULL)); + // 等待系统调用出口事件 + CHK_SYSCALL(waitpid(child, &status, 0)); + if (WIFSTOPPED(status)) { + printf("Syscall exit detected\n"); + } + } + + // 恢复子进程执行 + CHK_SYSCALL(ptrace(PTRACE_CONT, child, NULL, NULL)); + // 等待子进程退出 + CHK_SYSCALL(waitpid(child, &status, 0)); + if (WIFEXITED(status)) { + printf("Child exited normally\n"); + } + } +} + +// 测试内存读取功能 +void test_peek_data() { + printf("=== Testing PTRACE_PEEKDATA ===\n"); + pid_t child = fork(); + if (child == 0) { + const char* message = "PTRACE_PEEKDATA_testing"; + long* heap_data = (long*)malloc(sizeof(long)); + *heap_data = 0x66CCFF; + // 直接写入共享内存结构 + struct { + const char* msg; + long* heap; + } addr_info = {message, heap_data}; + asm volatile("mov %0, %%r14" : : "r"(&addr_info)); + printf("Child: msg_addr=%p, heap_addr=%p, heap_val=%#lx\n", addr_info.msg, addr_info.heap, *addr_info.heap); + CHK_SYSCALL(ptrace(PTRACE_TRACEME, 0, NULL, NULL)); + raise(SIGSTOP); // 父进程检查点 + pause(); + exit(EXIT_SUCCESS); // 不会执行 + + } else { + int status; + struct user_regs_struct regs; + CHK_SYSCALL(waitpid(child, &status, 0)); + if (WIFSTOPPED(status)) { + CHK_SYSCALL(ptrace(PTRACE_GETREGS, child, NULL, ®s)); + uintptr_t addr_info_addr = regs.r14; + struct { + const char* msg; + long* heap; + } addr_info; + for (size_t i = 0; i < sizeof(addr_info) / sizeof(long); ++i) { + long* dest = ((long*)&addr_info) + i; + *dest = ptrace(PTRACE_PEEKDATA, child, (void*)(addr_info_addr + i * sizeof(long)), 0); + } + uintptr_t msg_addr = (uintptr_t)addr_info.msg; + uintptr_t heap_addr = (uintptr_t)addr_info.heap; + printf("Parent: msg_addr=%#lx, heap_addr=%#lx\n", msg_addr, heap_addr); + printf("Read message: "); + int bytes_printed = 0; + for (const char* p = (const char*)msg_addr;; p++) { + long data = ptrace(PTRACE_PEEKDATA, child, p, 0); + if (data == -1) { + perror("Error reading char"); + break; + } + char c = (char)(data & 0xFF); + if (c == '\0') { + break; + } + if (++bytes_printed > 128) { + printf("... (truncated)"); + break; + } + if (c >= ' ' && c <= '~') { // 可打印字符 + putchar(c); + } else if (c == '\n') { // 特殊字符转义 + fputs("\\n", stdout); + } else if (c == '\t') { + fputs("\\t", stdout); + } else { // 不可打印字符 + printf("\\x%02x", (unsigned char)c); + } + } + printf("\n"); + + // 读取并修改堆内存 + long heap_value = ptrace(PTRACE_PEEKDATA, child, (void*)heap_addr, 0); + printf("Original heap value: %#lx\n", heap_value); + ptrace(PTRACE_POKEDATA, child, (void*)heap_addr, (void*)0xEE0000); + long new_value = ptrace(PTRACE_PEEKDATA, child, (void*)heap_addr, 0); + printf("Modified heap value: %#lx\n", new_value); + } + // 结束子进程 + kill(child, SIGKILL); + waitpid(child, &status, 0); + } +} + +int main() { + printf("===== Starting ptrace tests =====\n\n"); + + test_trace_me(); + test_attach_detach(); + test_syscall_tracing(); + test_peek_data(); + + printf("\n===== All ptrace tests completed =====\n"); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/user/apps/tests/syscall/gvisor/blocklists/fork_test b/user/apps/tests/syscall/gvisor/blocklists/fork_test index 6c86ead402..adec909029 100644 --- a/user/apps/tests/syscall/gvisor/blocklists/fork_test +++ b/user/apps/tests/syscall/gvisor/blocklists/fork_test @@ -1 +1,3 @@ +# 未实现 SYS_SCHED_SETAFFINITY ForkTest.Affinity + diff --git a/user/apps/tests/syscall/gvisor/blocklists/ptrace_test b/user/apps/tests/syscall/gvisor/blocklists/ptrace_test new file mode 100644 index 0000000000..8c8f5a57a4 --- /dev/null +++ b/user/apps/tests/syscall/gvisor/blocklists/ptrace_test @@ -0,0 +1,17 @@ +PtraceTest.AttachParent_PeekData_PokeData_SignalSuppression +PtraceTest.GetSigMask +PtraceTest.GetSiginfo_SetSiginfo_SignalInjection +PtraceTest.SIGKILLDoesNotCauseSignalDeliveryStop +PtraceTest.PtraceKill +PtraceTest.GetRegSet +PtraceTest.ChangeRegSetInOptSyscall +PtraceTest.AttachingConvertsGroupStopToPtraceStop +PtraceTest.ExitWhenParentIsNotTracer_Syscall_TraceVfork_TraceVforkDone +PtraceTest.Int3 +PtraceTest.Sysemu_PokeUser +PtraceTest.85 +PtraceTest.Seize_Interrupt_Listen +PtraceTest.Interrupt_Listen_RequireSeize +PtraceTest.SeizeSetOptions +PtraceTest.SingleStep +*PtraceExecveTest.Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit* diff --git a/user/apps/tests/syscall/gvisor/blocklists/wait_test b/user/apps/tests/syscall/gvisor/blocklists/wait_test index b953b1bcb3..ed89348167 100644 --- a/user/apps/tests/syscall/gvisor/blocklists/wait_test +++ b/user/apps/tests/syscall/gvisor/blocklists/wait_test @@ -1,7 +1,5 @@ WaitTest.Wait4Rusage WaitTest.WaitidRusage -# 缺少 SYS_PTRACE -WaitTest.TraceeWALL # 卡死 Waiters/WaitAnyChildTest.WaitedChildRusage/* Waiters/WaitAnyChildTest.IgnoredChildRusage/* diff --git a/user/apps/tests/syscall/gvisor/whitelist.txt b/user/apps/tests/syscall/gvisor/whitelist.txt index 96ac81a716..cf5d2267fc 100644 --- a/user/apps/tests/syscall/gvisor/whitelist.txt +++ b/user/apps/tests/syscall/gvisor/whitelist.txt @@ -64,6 +64,7 @@ setns_test eventfd_test poll_test rseq_test +ptrace_test # 内存管理测试