-/* $OpenBSD: exec_elf.c,v 1.183 2023/07/12 19:34:14 jasper Exp $ */
+/* $OpenBSD: exec_elf.c,v 1.184 2024/01/16 19:05:01 deraadt Exp $ */
/*
* Copyright (c) 1996 Per Fogelstrom
#include <sys/ptrace.h>
#include <sys/signalvar.h>
#include <sys/pledge.h>
+#include <sys/syscall.h>
#include <sys/mman.h>
Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int);
int elf_os_pt_note_name(Elf_Note *);
int elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *);
+int elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
+ u_int **pinp, int is_ldso, size_t len);
/* round up and down to page boundaries. */
#define ELF_ROUND(a, b) (((a) + (b) - 1) & ~((b) - 1))
return (0);
}
+/*
+ * rebase the pin offsets inside a base,len window for the text segment only.
+ */
+void
+elf_adjustpins(vaddr_t *basep, size_t *lenp, u_int *pins, int npins, u_int offset)
+{
+ int i;
+
+ /* Adjust offsets, base, len */
+ for (i = 0; i < npins; i++) {
+ if (pins[i] == -1 || pins[i] == 0)
+ continue;
+ pins[i] -= offset;
+ }
+ *basep += offset;
+ *lenp -= offset;
+}
+
+int
+elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
+ u_int **pinp, int is_ldso, size_t len)
+{
+ struct pinsyscalls {
+ u_int offset;
+ u_int sysno;
+ } *syscalls = NULL;
+ int i, nsyscalls = 0, npins = 0;
+ u_int *pins = NULL;
+
+ if (pp->p_filesz > SYS_MAXSYSCALL * 2 * sizeof(*syscalls) ||
+ pp->p_filesz % sizeof(*syscalls) != 0)
+ goto bad;
+ nsyscalls = pp->p_filesz / sizeof(*syscalls);
+ syscalls = malloc(pp->p_filesz, M_PINSYSCALL, M_WAITOK);
+ if (elf_read_from(p, vp, pp->p_offset, syscalls,
+ pp->p_filesz) != 0)
+ goto bad;
+
+ /* Validate, and calculate pintable size */
+ for (i = 0; i < nsyscalls; i++) {
+ if (syscalls[i].sysno <= 0 ||
+ syscalls[i].sysno >= SYS_MAXSYSCALL ||
+ syscalls[i].offset > len)
+ goto bad;
+ npins = MAX(npins, syscalls[i].sysno);
+ }
+ if (is_ldso)
+ npins = MAX(npins, SYS_kbind); /* XXX see ld.so/loader.c */
+ npins++;
+
+ /* Fill pintable: 0 = invalid, -1 = allowed, else offset from base */
+ pins = mallocarray(npins, sizeof(u_int), M_PINSYSCALL, M_WAITOK|M_ZERO);
+ for (i = 0; i < nsyscalls; i++) {
+ if (pins[syscalls[i].sysno])
+ pins[syscalls[i].sysno] = -1; /* duplicated */
+ else
+ pins[syscalls[i].sysno] = syscalls[i].offset;
+ }
+ if (is_ldso)
+ pins[SYS_kbind] = -1; /* XXX see ld.so/loader.c */
+ *pinp = pins;
+ pins = NULL;
+bad:
+ free(syscalls, M_PINSYSCALL, nsyscalls * sizeof(*syscalls));
+ free(pins, M_PINSYSCALL, npins * sizeof(u_int));
+ return npins;
+}
+
/*
* Load a file (interpreter/library) pointed to by path [stolen from
* coff_load_shlib()]. Made slightly generic so it might be used externally.
int error, i;
struct nameidata nd;
Elf_Ehdr eh;
- Elf_Phdr *ph = NULL;
+ Elf_Phdr *ph = NULL, *syscall_ph = NULL;
u_long phsize = 0;
Elf_Addr addr;
struct vnode *vp;
int file_align;
int loop;
size_t randomizequota = ELF_RANDOMIZE_LIMIT;
+ vaddr_t text_start = -1, text_end = 0;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
nd.ni_pledge = PLEDGE_RPATH;
epp->ep_entry += pos;
ap->arg_interp = pos;
}
+ if (prot & PROT_EXEC) {
+ if (addr < text_start)
+ text_start = addr;
+ if (addr+size >= text_end)
+ text_end = addr + size;
+ }
addr += size;
break;
NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
break;
-
+ case PT_OPENBSD_SYSCALLS:
+ syscall_ph = &ph[i];
+ break;
default:
break;
}
}
+ if (syscall_ph) {
+ struct process *pr = p->p_p;
+ vaddr_t base = pos;
+ size_t len = text_end;
+ u_int *pins;
+ int npins;
+
+ npins = elf_read_pintable(p, nd.ni_vp, syscall_ph,
+ &pins, 1, len);
+ if (npins) {
+ elf_adjustpins(&base, &len, pins, npins,
+ text_start);
+ pr->ps_pin.pn_start = base;
+ pr->ps_pin.pn_end = base + len;
+ pr->ps_pin.pn_pins = pins;
+ pr->ps_pin.pn_npins = npins;
+ pr->ps_flags |= PS_PIN;
+ }
+ }
+
vn_marktext(nd.ni_vp);
bad1:
exec_elf_makecmds(struct proc *p, struct exec_package *epp)
{
Elf_Ehdr *eh = epp->ep_hdr;
- Elf_Phdr *ph, *pp, *base_ph = NULL;
- Elf_Addr phdr = 0, exe_base = 0;
+ Elf_Phdr *ph, *pp, *base_ph = NULL, *syscall_ph = NULL;
+ Elf_Addr phdr = 0, exe_base = 0, exe_end = 0;
int error, i, has_phdr = 0, names = 0, textrel = 0;
char *interp = NULL;
u_long phsize;
/*
* Permit system calls in main-text static binaries.
- * Also block the ld.so syscall-grant
+ * static binaries may not call msyscall() or
+ * pinsyscalls()
*/
if (interp == NULL) {
syscall = VMCMD_SYSCALL;
p->p_vmspace->vm_map.flags |= VM_MAP_SYSCALL_ONCE;
+ p->p_vmspace->vm_map.flags |= VM_MAP_PINSYSCALL_ONCE;
}
/*
epp->ep_tsize = addr+size -
epp->ep_taddr;
}
+ if (interp == NULL)
+ exe_end = epp->ep_taddr +
+ epp->ep_tsize; /* end of TEXT */
}
break;
NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
break;
-
+ case PT_OPENBSD_SYSCALLS:
+ if (interp == NULL)
+ syscall_ph = &ph[i];
+ break;
default:
/*
* Not fatal, we don't need to understand everything
}
}
+ if (syscall_ph) {
+ vaddr_t base = exe_base;
+ size_t len = exe_end - exe_base;
+ u_int *pins;
+ int npins;
+
+ npins = elf_read_pintable(p, epp->ep_vp, syscall_ph,
+ &pins, 0, len);
+ if (npins) {
+ elf_adjustpins(&base, &len, pins, npins,
+ epp->ep_taddr - exe_base);
+ epp->ep_pinstart = base;
+ epp->ep_pinend = base + len;
+ epp->ep_pins = pins;
+ epp->ep_npins = npins;
+ p->p_p->ps_flags |= PS_PIN;
+ }
+ }
+
phdr += exe_base;
/*
-/* $OpenBSD: kern_exec.c,v 1.252 2023/10/30 07:13:10 claudio Exp $ */
+/* $OpenBSD: kern_exec.c,v 1.253 2024/01/16 19:05:01 deraadt Exp $ */
/* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */
/*-
VMCMDSET_INIT(&pack.ep_vmcmds);
pack.ep_vap = &attr;
pack.ep_flags = 0;
+ pack.ep_pins = NULL;
+ pack.ep_npins = 0;
/* see if we can run it. */
if ((error = check_exec(p, &pack)) != 0) {
if (copyout(&arginfo, (char *)pr->ps_strings, sizeof(arginfo)))
goto exec_abort;
+ free(pr->ps_pin.pn_pins, M_PINSYSCALL,
+ pr->ps_pin.pn_npins * sizeof(u_int));
+ if (pack.ep_npins) {
+ pr->ps_pin.pn_start = pack.ep_pinstart;
+ pr->ps_pin.pn_end = pack.ep_pinend;
+ pr->ps_pin.pn_pins = pack.ep_pins;
+ pack.ep_pins = NULL;
+ pr->ps_pin.pn_npins = pack.ep_npins;
+ pr->ps_flags |= PS_PIN;
+ } else {
+ pr->ps_pin.pn_start = pr->ps_pin.pn_end = 0;
+ pr->ps_pin.pn_pins = NULL;
+ pr->ps_pin.pn_npins = 0;
+ pr->ps_flags &= ~PS_PIN;
+ }
+ if (pr->ps_libcpin.pn_pins) {
+ free(pr->ps_libcpin.pn_pins, M_PINSYSCALL,
+ pr->ps_libcpin.pn_npins * sizeof(u_int));
+ pr->ps_libcpin.pn_start = pr->ps_libcpin.pn_end = 0;
+ pr->ps_libcpin.pn_pins = NULL;
+ pr->ps_libcpin.pn_npins = 0;
+ pr->ps_flags &= ~PS_LIBCPIN;
+ }
+
stopprofclock(pr); /* stop profiling */
fdcloseexec(p); /* handle close on exec */
execsigs(p); /* reset caught signals */
if (pack.ep_interp != NULL)
pool_put(&namei_pool, pack.ep_interp);
free(pack.ep_args, M_TEMP, sizeof *pack.ep_args);
+ free(pack.ep_pins, M_PINSYSCALL, pack.ep_npins * sizeof(u_int));
/* close and put the exec'd file */
vn_close(pack.ep_vp, FREAD, cred, p);
pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf);
-/* $OpenBSD: kern_exit.c,v 1.218 2024/01/15 15:47:37 mvs Exp $ */
+/* $OpenBSD: kern_exit.c,v 1.219 2024/01/16 19:05:01 deraadt Exp $ */
/* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */
/*
unveil_destroy(pr);
+ free(pr->ps_pin.pn_pins, M_PINSYSCALL,
+ pr->ps_pin.pn_npins * sizeof(u_int));
+ free(pr->ps_libcpin.pn_pins, M_PINSYSCALL,
+ pr->ps_libcpin.pn_npins * sizeof(u_int));
+
/*
* If parent has the SAS_NOCLDWAIT flag set, we're not
* going to become a zombie.
-/* $OpenBSD: kern_fork.c,v 1.254 2024/01/15 15:47:37 mvs Exp $ */
+/* $OpenBSD: kern_fork.c,v 1.255 2024/01/16 19:05:01 deraadt Exp $ */
/* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */
/*
if (parent->ps_session->s_ttyvp != NULL)
pr->ps_flags |= parent->ps_flags & PS_CONTROLT;
+ if (parent->ps_pin.pn_pins) {
+ pr->ps_pin.pn_pins = mallocarray(parent->ps_pin.pn_npins,
+ sizeof(u_int), M_PINSYSCALL, M_WAITOK);
+ memcpy(pr->ps_pin.pn_pins, parent->ps_pin.pn_pins,
+ parent->ps_pin.pn_npins * sizeof(u_int));
+ pr->ps_flags |= PS_PIN;
+ }
+ if (parent->ps_libcpin.pn_pins) {
+ pr->ps_libcpin.pn_pins = mallocarray(parent->ps_libcpin.pn_npins,
+ sizeof(u_int), M_PINSYSCALL, M_WAITOK);
+ memcpy(pr->ps_libcpin.pn_pins, parent->ps_libcpin.pn_pins,
+ parent->ps_libcpin.pn_npins * sizeof(u_int));
+ pr->ps_flags |= PS_LIBCPIN;
+ }
+
/*
* Duplicate sub-structures as needed.
* Increase reference counts on shared objects.
-/* $OpenBSD: exec.h,v 1.52 2023/04/19 15:37:36 kettenis Exp $ */
+/* $OpenBSD: exec.h,v 1.53 2024/01/16 19:05:00 deraadt Exp $ */
/* $NetBSD: exec.h,v 1.59 1996/02/09 18:25:09 christos Exp $ */
/*-
struct elf_args *ep_args; /* ELF info */
void *ep_auxinfo; /* userspace auxinfo address */
char *ep_interp; /* name of interpreter if any */
+ vaddr_t ep_pinstart, ep_pinend; /* executable region */
+ u_int *ep_pins; /* array of system call offsets */
+ int ep_npins; /* entries in array */
};
#define EXEC_INDIR 0x0001 /* script handling already done */
#define EXEC_HASFD 0x0002 /* holding a shell script */
-/* $OpenBSD: proc.h,v 1.353 2024/01/15 15:47:37 mvs Exp $ */
+/* $OpenBSD: proc.h,v 1.354 2024/01/16 19:05:00 deraadt Exp $ */
/* $NetBSD: proc.h,v 1.44 1996/04/22 01:23:21 christos Exp $ */
/*-
TAILQ_HEAD(tslpqueue, tslpentry);
struct unveil;
+struct pinsyscall {
+ vaddr_t pn_start;
+ vaddr_t pn_end;
+ u_int *pn_pins; /* array of offsets indexed by syscall# */
+ int pn_npins; /* number of entries in table */
+};
+
/*
* Locks used to protect struct members in this file:
* I immutable after creation
/* an address that can't be in userspace or kernelspace */
#define BOGO_PC (u_long)-1
+ struct pinsyscall ps_pin; /* static or ld.so */
+ struct pinsyscall ps_libcpin; /* libc.so, from pinsyscalls(2) */
+
/* End area that is copied on creation. */
#define ps_endcopy ps_threadcnt
u_int ps_threadcnt; /* Number of threads. */
#define PS_CHROOT 0x01000000 /* Process is chrooted */
#define PS_NOBTCFI 0x02000000 /* No Branch Target CFI */
#define PS_ITIMER 0x04000000 /* Virtual interval timers running */
+#define PS_PIN 0x08000000 /* ld.so or static syscall pin */
+#define PS_LIBCPIN 0x10000000 /* libc.so syscall pin */
#define PS_BITS \
("\20" "\01CONTROLT" "\02EXEC" "\03INEXEC" "\04EXITING" "\05SUGID" \
-/* $OpenBSD: syscall_mi.h,v 1.29 2023/12/12 15:30:55 deraadt Exp $ */
+/* $OpenBSD: syscall_mi.h,v 1.30 2024/01/16 19:05:00 deraadt Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
#include <sys/param.h>
#include <sys/pledge.h>
+#include <sys/acct.h>
+#include <sys/syslog.h>
#include <sys/tracepoint.h>
#include <sys/syscall.h>
+#include <sys/signalvar.h>
#include <uvm/uvm_extern.h>
#ifdef KTRACE
#include <dev/dt/dtvar.h>
#endif
+/*
+ * Check if a system call is entered from precisely correct location
+ */
+static inline int
+pin_check(struct proc *p, register_t code)
+{
+ extern char sigcodecall[], sigcoderet[], sigcodecall[];
+ struct pinsyscall *pin = NULL, *ppin, *plibcpin;
+ struct process *pr = p->p_p;
+ vaddr_t addr;
+ int error = 0;
+
+ /* point at start of syscall instruction */
+ addr = (vaddr_t)PROC_PC(p) - (vaddr_t)(sigcoderet - sigcodecall);
+ ppin = &pr->ps_pin;
+ plibcpin = &pr->ps_libcpin;
+
+ /*
+ * System calls come from the following places, checks are ordered
+ * by most common case:
+ * 1) dynamic binary: syscalls in libc.so (in the ps_libcpin region)
+ * 2a) static binary: syscalls in main program (in the ps_pin region)
+ * 2b) dynamic binary: sysalls in ld.so (in the ps_pin region)
+ * 3) sigtramp, containing only sigreturn(2)
+ */
+ if (plibcpin->pn_pins &&
+ addr >= plibcpin->pn_start && addr < plibcpin->pn_end)
+ pin = plibcpin;
+ else if (ppin->pn_pins &&
+ addr >= ppin->pn_start && addr < ppin->pn_end)
+ pin = ppin;
+ else if (PROC_PC(p) == pr->ps_sigcoderet) {
+ if (code == SYS_sigreturn)
+ return (0);
+ error = EPERM;
+ }
+ if (pin) {
+ if (code >= pin->pn_npins || pin->pn_pins[code] == 0)
+ error = ENOSYS;
+ else if (pin->pn_pins[code] + pin->pn_start == addr)
+ ; /* correct location */
+ else if (pin->pn_pins[code] == (u_int)-1)
+ ; /* multiple locations, hopefully a boring operation */
+ else
+ error = ENOSYS;
+ }
+ if (error == 0)
+ return (0);
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_PINSYSCALL))
+ ktrpinsyscall(p, error, code, addr);
+#endif
+ KERNEL_LOCK();
+ log(LOG_ERR,
+ "%s[%d]: pinsyscalls addr %lx code %ld, pinoff 0x%x "
+ "(pin%s %d %lx-%lx %lx) (libcpin%s %d %lx-%lx %lx) error %d\n",
+ p->p_p->ps_comm, p->p_p->ps_pid, addr, code,
+ (pin && code < pin->pn_npins) ? pin->pn_pins[code] : -1,
+ pin == ppin ? "(Y)" : "", ppin->pn_npins,
+ ppin->pn_start, ppin->pn_end, ppin->pn_end - ppin->pn_start,
+ pin == plibcpin ? "(Y)" : "", plibcpin->pn_npins,
+ plibcpin->pn_start, plibcpin->pn_end, plibcpin->pn_end - plibcpin->pn_start,
+ error);
+ p->p_p->ps_acflag |= APINSYS;
+
+ /* Try to stop threads immediately, because this process is suspect */
+ if (P_HASSIBLING(p))
+ single_thread_set(p, SINGLE_UNWIND | SINGLE_DEEP);
+ /* Send uncatchable SIGABRT for coredump */
+ sigabort(p);
+ KERNEL_UNLOCK();
+ return (error);
+}
/*
* The MD setup for a system call has been done; here's the MI part.
uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial))
return (EPERM);
+ if ((error = pin_check(p, code)))
+ return (error);
+
pledged = (p->p_p->ps_flags & PS_PLEDGE);
if (pledged && (error = pledge_syscall(p, code, &tval))) {
KERNEL_LOCK();
-/* $OpenBSD: uvm_map.c,v 1.319 2023/08/02 09:19:47 mpi Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.320 2024/01/16 19:05:01 deraadt Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
/*
* when a process execs another program image.
*/
vm_map_lock(map);
- vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
+ vm_map_modflags(map, 0, VM_MAP_WIREFUTURE |
+ VM_MAP_SYSCALL_ONCE | VM_MAP_PINSYSCALL_ONCE);
/*
* now unmap the old program
new_map, new_entry->start, new_entry->end);
}
}
- new_map->flags |= old_map->flags & VM_MAP_SYSCALL_ONCE;
+ new_map->flags |= old_map->flags &
+ (VM_MAP_SYSCALL_ONCE | VM_MAP_PINSYSCALL_ONCE);
#ifdef PMAP_CHECK_COPYIN
if (PMAP_CHECK_COPYIN) {
memcpy(&new_map->check_copyin, &old_map->check_copyin,
-/* $OpenBSD: uvm_map.h,v 1.87 2023/08/02 09:19:47 mpi Exp $ */
+/* $OpenBSD: uvm_map.h,v 1.88 2024/01/16 19:05:01 deraadt Exp $ */
/* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */
/*
#define VM_MAP_GUARDPAGES 0x20 /* rw: add guard pgs to map */
#define VM_MAP_ISVMSPACE 0x40 /* ro: map is a vmspace */
#define VM_MAP_SYSCALL_ONCE 0x80 /* rw: libc syscall registered */
+#define VM_MAP_PINSYSCALL_ONCE 0x100 /* rw: pinsyscall done */
/* Number of kernel maps and entries to statically allocate */
#define MAX_KMAPENT 1024 /* Sufficient to make it to the scheduler. */
-/* $OpenBSD: uvm_mmap.c,v 1.183 2023/12/07 13:59:05 deraadt Exp $ */
+/* $OpenBSD: uvm_mmap.c,v 1.184 2024/01/16 19:05:01 deraadt Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
/*
return (0);
}
- /*
- * sys_pinsyscalls
+/*
+ * sys_pinsyscalls. The caller is required to normalize base,len
+ * to the minimum .text region, and adjust pintable offsets relative
+ * to that base.
*/
int
sys_pinsyscalls(struct proc *p, void *v, register_t *retval)
{
- /* STUB until other parts are ready */
+ struct sys_pinsyscalls_args /* {
+ syscallarg(void *) base;
+ syscallarg(size_t) len;
+ syscallarg(u_int *) pins;
+ syscallarg(int) npins;
+ } */ *uap = v;
+ struct process *pr = p->p_p;
+ int npins, error = 0, i;
+ vaddr_t base;
+ size_t len;
+ u_int *pins;
+
+ if (pr->ps_libcpin.pn_start ||
+ (pr->ps_vmspace->vm_map.flags & VM_MAP_PINSYSCALL_ONCE))
+ return (EPERM);
+ base = (vaddr_t)SCARG(uap, base);
+ len = (vsize_t)SCARG(uap, len);
+ if (base > SIZE_MAX - len)
+ return (EINVAL); /* disallow wrap-around. */
+
+ /* XXX MP unlock */
+
+ npins = SCARG(uap, npins);
+ if (npins < 1 || npins > SYS_MAXSYSCALL)
+ return (E2BIG);
+ pins = malloc(npins * sizeof(u_int), M_PINSYSCALL, M_WAITOK|M_ZERO);
+ if (pins == NULL)
+ return (ENOMEM);
+ error = copyin(SCARG(uap, pins), pins, npins * sizeof(u_int));
+ if (error)
+ goto err;
+
+ /* Range-check pintable offsets */
+ for (i = 0; i < npins; i++) {
+ if (pins[i] == (u_int)-1 || pins[i] == 0)
+ continue;
+ if (pins[i] > SCARG(uap, len)) {
+ error = ERANGE;
+ break;
+ }
+ }
+ if (error) {
+err:
+ free(pins, M_PINSYSCALL, npins * sizeof(u_int));
+ return (error);
+ }
+ pr->ps_libcpin.pn_start = base;
+ pr->ps_libcpin.pn_end = base + len;
+ pr->ps_libcpin.pn_pins = pins;
+ pr->ps_libcpin.pn_npins = npins;
+ pr->ps_flags |= PS_LIBCPIN;
return (0);
}