From: cheloha Date: Mon, 27 Jun 2022 14:26:05 +0000 (+0000) Subject: kbind(2): unlock syscall, push kernel lock down to binding loop X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=52dd74b4de497591ffc2ca410cce842d712115e4;p=openbsd kbind(2): unlock syscall, push kernel lock down to binding loop - Rearrange the security check code in sys_kbind() so that we only need to take the kernel lock once if we need to raise SIGILL. - Protect process.ps_kbind_addr and process.ps_kbind_cookie with process.ps_mtx. This is easier to do after the aforementioned rearrangement. Under normal circumstances this isn't necessary: the process is single-threaded when we initialize kbind(2). But in stranger situations this brief mutex ensures that the first thread to reach sys_kbind() initializes both variables. - Wrap the binding loop with the kernel lock. We need to carefully confirm that uvm_unmap_remove(), uvm_map_extract(), and uvm_unmap_detach() are MP-safe in a subsequent patch before completely removing the kernel lock from sys_kbind(). - Remove the kernel lock from kbind(2) in syscalls.master. Prompted by mpi@, dlg@, and deraadt@. Current patch workshopped with deraadt@. Based on a patch from dlg@. With input from dlg@, bluhm@, mpi@, kettenis@, deraadt@, and guenther@. Thread: https://marc.info/?l=openbsd-tech&m=165274831829349&w=2 ok deraadt@ kettenis@ mpi@ --- diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 222c07a7ad7..0b8b0fd31f2 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_sysent.c,v 1.237 2022/05/16 07:38:10 mvs Exp $ */ +/* $OpenBSD: init_sysent.c,v 1.238 2022/06/27 14:26:05 cheloha Exp $ */ /* * System call switch table. @@ -204,7 +204,7 @@ const struct sysent sysent[] = { sys_utimensat }, /* 84 = utimensat */ { 2, s(struct sys_futimens_args), 0, sys_futimens }, /* 85 = futimens */ - { 3, s(struct sys_kbind_args), 0, + { 3, s(struct sys_kbind_args), SY_NOLOCK | 0, sys_kbind }, /* 86 = kbind */ { 2, s(struct sys_clock_gettime_args), SY_NOLOCK | 0, sys_clock_gettime }, /* 87 = clock_gettime */ diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 52bf617c0ae..b02d109ff27 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1,4 +1,4 @@ -; $OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 mvs Exp $ +; $OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 cheloha Exp $ ; $NetBSD: syscalls.master,v 1.32 1996/04/23 10:24:21 mycroft Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -194,7 +194,7 @@ const struct timespec *times, int flag); } 85 STD { int sys_futimens(int fd, \ const struct timespec *times); } -86 STD { int sys_kbind(const struct __kbind *param, \ +86 STD NOLOCK { int sys_kbind(const struct __kbind *param, \ size_t psize, int64_t proc_cookie); } 87 STD NOLOCK { int sys_clock_gettime(clockid_t clock_id, \ struct timespec *tp); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index df6c59d397d..a36fbfc7722 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proc.h,v 1.330 2022/05/13 15:32:00 claudio Exp $ */ +/* $OpenBSD: proc.h,v 1.331 2022/06/27 14:26:05 cheloha Exp $ */ /* $NetBSD: proc.h,v 1.44 1996/04/22 01:23:21 christos Exp $ */ /*- @@ -234,8 +234,8 @@ struct process { uint64_t ps_pledge; uint64_t ps_execpledge; - int64_t ps_kbind_cookie; - u_long ps_kbind_addr; + int64_t ps_kbind_cookie; /* [m] */ + u_long ps_kbind_addr; /* [m] */ /* End area that is copied on creation. */ #define ps_endcopy ps_refcnt diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 7cd8e404a37..e8dc9d8fda3 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -1,4 +1,4 @@ -/* $OpenBSD: syscall.h,v 1.234 2022/05/16 07:38:10 mvs Exp $ */ +/* $OpenBSD: syscall.h,v 1.235 2022/06/27 14:26:06 cheloha Exp $ */ /* * System call numbers. diff --git a/sys/sys/syscallargs.h b/sys/sys/syscallargs.h index c07f02204ed..daa303d8d7a 100644 --- a/sys/sys/syscallargs.h +++ b/sys/sys/syscallargs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: syscallargs.h,v 1.237 2022/05/16 07:38:10 mvs Exp $ */ +/* $OpenBSD: syscallargs.h,v 1.238 2022/06/27 14:26:06 cheloha Exp $ */ /* * System call argument lists. diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 7956206d866..d44941a5e95 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_mmap.c,v 1.169 2022/01/19 10:43:48 kn Exp $ */ +/* $OpenBSD: uvm_mmap.c,v 1.170 2022/06/27 14:26:06 cheloha Exp $ */ /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ /* @@ -1127,7 +1127,7 @@ sys_kbind(struct proc *p, void *v, register_t *retval) size_t psize, s; u_long pc; int count, i, extra; - int error; + int error, sigill = 0; /* * extract syscall args from uap @@ -1135,23 +1135,42 @@ sys_kbind(struct proc *p, void *v, register_t *retval) paramp = SCARG(uap, param); psize = SCARG(uap, psize); - /* a NULL paramp disables the syscall for the process */ - if (paramp == NULL) { - if (pr->ps_kbind_addr != 0) - sigexit(p, SIGILL); - pr->ps_kbind_addr = BOGO_PC; - return 0; - } - - /* security checks */ + /* + * If paramp is NULL and we're uninitialized, disable the syscall + * for the process. Raise SIGILL if paramp is NULL and we're + * already initialized. + * + * If paramp is non-NULL and we're uninitialized, do initialization. + * Otherwise, do security checks and raise SIGILL on failure. + */ pc = PROC_PC(p); - if (pr->ps_kbind_addr == 0) { + mtx_enter(&pr->ps_mtx); + if (paramp == NULL) { + if (pr->ps_kbind_addr == 0) + pr->ps_kbind_addr = BOGO_PC; + else + sigill = 1; + } else if (pr->ps_kbind_addr == 0) { pr->ps_kbind_addr = pc; pr->ps_kbind_cookie = SCARG(uap, proc_cookie); - } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC) - sigexit(p, SIGILL); - else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie)) + } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC || + pr->ps_kbind_cookie != SCARG(uap, proc_cookie)) { + sigill = 1; + } + mtx_leave(&pr->ps_mtx); + + /* Raise SIGILL if something is off. */ + if (sigill) { + KERNEL_LOCK(); sigexit(p, SIGILL); + /* NOTREACHED */ + KERNEL_UNLOCK(); + } + + /* We're done if we were disabling the syscall. */ + if (paramp == NULL) + return 0; + if (psize < sizeof(struct __kbind) || psize > sizeof(param)) return EINVAL; if ((error = copyin(paramp, ¶m, psize))) @@ -1190,6 +1209,7 @@ sys_kbind(struct proc *p, void *v, register_t *retval) last_baseva = VM_MAXUSER_ADDRESS; kva = 0; TAILQ_INIT(&dead_entries); + KERNEL_LOCK(); for (i = 0; i < count; i++) { baseva = (vaddr_t)paramp[i].kb_addr; s = paramp[i].kb_size; @@ -1239,6 +1259,7 @@ redo: vm_map_unlock(kernel_map); } uvm_unmap_detach(&dead_entries, AMAP_REFALL); + KERNEL_UNLOCK(); return error; }