From: dv Date: Mon, 5 Feb 2024 21:58:09 +0000 (+0000) Subject: Cleanup fcntl(3) usage and fd lifetimes in vmd(8). X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=b3bc6112e4995b349a3e1f5ce822ae93ed9b5245;p=openbsd Cleanup fcntl(3) usage and fd lifetimes in vmd(8). Remove extraneous fcntl(3) usage for setting fd features that can be set at time of open(2), pipe2(2), or socketpair(2). Also cleans up pty creation switching to using functions from libutil instead of direct ioctl(2) calls. ok mlarkin@, original diff ok claudio@ as well. --- diff --git a/usr.sbin/vmd/config.c b/usr.sbin/vmd/config.c index 0022ce2a28c..3fb2c9b4003 100644 --- a/usr.sbin/vmd/config.c +++ b/usr.sbin/vmd/config.c @@ -1,4 +1,4 @@ -/* $OpenBSD: config.c,v 1.74 2024/01/18 14:49:59 claudio Exp $ */ +/* $OpenBSD: config.c,v 1.75 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Reyk Floeter @@ -284,7 +284,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) { if (vm->vm_kernel_path != NULL) { /* Open external kernel for child */ - kernfd = open(vm->vm_kernel_path, O_RDONLY); + kernfd = open(vm->vm_kernel_path, O_RDONLY | O_CLOEXEC); if (kernfd == -1) { ret = errno; log_warn("%s: can't open kernel or BIOS " @@ -301,7 +301,8 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) * license. */ if (kernfd == -1) { - if ((kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { + if ((kernfd = open(VM_DEFAULT_BIOS, + O_RDONLY | O_CLOEXEC)) == -1) { log_warn("can't open %s", VM_DEFAULT_BIOS); ret = VMD_BIOS_MISSING; goto fail; @@ -341,14 +342,17 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) } } - /* Open disk images for child */ + /* + * Open disk images for child. Don't set O_CLOEXEC as these must be + * explicitly closed by the vm process during virtio subprocess launch. + */ for (i = 0 ; i < vmc->vmc_ndisks; i++) { if (strlcpy(path, vmc->vmc_disks[i], sizeof(path)) >= sizeof(path)) log_warnx("disk path %s too long", vmc->vmc_disks[i]); memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); - oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; - aflags = R_OK|W_OK; + oflags = O_RDWR | O_EXLOCK | O_NONBLOCK; + aflags = R_OK | W_OK; for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { /* Stat disk[i] to ensure it is a regular file */ if ((diskfds[i][j] = open(path, oflags)) == -1) { @@ -372,7 +376,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) * All writes should go to the top image, allowing them * to be shared. */ - oflags = O_RDONLY|O_NONBLOCK; + oflags = O_RDONLY | O_NONBLOCK; aflags = R_OK; n = virtio_get_base(diskfds[i][j], base, sizeof(base), vmc->vmc_disktypes[i], path); @@ -407,7 +411,9 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) /* * Either open the requested tap(4) device or get - * the next available one. + * the next available one. Don't set O_CLOEXEC as these + * should be closed by the vm process during virtio device + * launch. */ if (s != NULL) { snprintf(path, PATH_MAX, "/dev/%s", s); @@ -454,7 +460,10 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK); } - /* Open TTY */ + /* + * Open TTY. Duplicate the fd before sending so the privileged parent + * process can perform permissions cleanup of the pty on vm termination. + */ if (vm->vm_ttyname[0] == '\0') { if (vm_opentty(vm) == -1) { log_warn("%s: can't open tty %s", __func__, diff --git a/usr.sbin/vmd/vioblk.c b/usr.sbin/vmd/vioblk.c index b1d393f8a8a..d97e5cc113a 100644 --- a/usr.sbin/vmd/vioblk.c +++ b/usr.sbin/vmd/vioblk.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vioblk.c,v 1.11 2024/02/04 14:54:51 dv Exp $ */ +/* $OpenBSD: vioblk.c,v 1.12 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2023 Dave Voutila @@ -176,11 +176,6 @@ vioblk_main(int fd, int fd_vmm) /* Configure our sync channel event handler. */ log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, dev.sync_fd); - if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto fail; - } imsg_init(&dev.sync_iev.ibuf, dev.sync_fd); dev.sync_iev.handler = handle_sync_io; dev.sync_iev.data = &dev; diff --git a/usr.sbin/vmd/vionet.c b/usr.sbin/vmd/vionet.c index 5f7e146b32b..5ed06fb8a4c 100644 --- a/usr.sbin/vmd/vionet.c +++ b/usr.sbin/vmd/vionet.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vionet.c,v 1.9 2024/02/03 21:41:35 dv Exp $ */ +/* $OpenBSD: vionet.c,v 1.10 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2023 Dave Voutila @@ -203,11 +203,6 @@ vionet_main(int fd, int fd_vmm) /* Configure our sync channel event handler. */ log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, dev.sync_fd); - if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto fail; - } imsg_init(&dev.sync_iev.ibuf, dev.sync_fd); dev.sync_iev.handler = handle_sync_io; dev.sync_iev.data = &dev; diff --git a/usr.sbin/vmd/virtio.c b/usr.sbin/vmd/virtio.c index cca6e041046..afe3dd8f7a4 100644 --- a/usr.sbin/vmd/virtio.c +++ b/usr.sbin/vmd/virtio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio.c,v 1.110 2023/11/03 11:16:43 dv Exp $ */ +/* $OpenBSD: virtio.c,v 1.111 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -73,6 +73,7 @@ SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs; static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *); static void virtio_dispatch_dev(int, short, void *); static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *); +static int virtio_dev_closefds(struct virtio_dev *); const char * virtio_reg_name(uint8_t reg) @@ -1303,6 +1304,7 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) int data_fds[VM_MAX_BASE_PER_DISK], sync_fds[2], async_fds[2], ret = 0; size_t i, data_fds_sz, sz = 0; struct viodev_msg msg; + struct virtio_dev *dev_entry; struct imsg imsg; struct imsgev *iev = &dev->sync_iev; @@ -1326,27 +1328,17 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) } /* We need two channels: one synchronous (IO reads) and one async. */ - if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, sync_fds) == -1) { + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, + sync_fds) == -1) { log_warn("failed to create socketpair"); return (errno); } - if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, async_fds) == -1) { + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, + async_fds) == -1) { log_warn("failed to create async socketpair"); return (errno); } - /* Keep communication channels open after exec. */ - if (fcntl(sync_fds[1], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } - if (fcntl(async_fds[1], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcnt", __func__); - goto err; - } - /* Fork... */ dev_pid = fork(); if (dev_pid == -1) { @@ -1371,13 +1363,6 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) for (i = 0; i < data_fds_sz; i++) close_fd(data_fds[i]); - /* Set our synchronous channel to non-blocking. */ - if (fcntl(sync_fds[0], F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } - /* 1. Send over our configured device. */ log_debug("%s: sending '%c' type device struct", __func__, dev->dev_type); @@ -1446,15 +1431,21 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) close_fd(async_fds[0]); close_fd(sync_fds[0]); + /* Close pty. Virtio devices do not need it. */ + close_fd(vm->vm_tty); + vm->vm_tty = -1; + + if (vm->vm_cdrom != -1) { + close_fd(vm->vm_cdrom); + vm->vm_cdrom = -1; + } + /* Keep data file descriptors open after exec. */ - for (i = 0; i < data_fds_sz; i++) { - log_debug("%s: marking fd %d !close-on-exec", __func__, - data_fds[i]); - if (fcntl(data_fds[i], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } + SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) { + if (dev_entry == dev) + continue; + if (virtio_dev_closefds(dev_entry) == -1) + fatalx("unable to close other virtio devs"); } memset(&nargv, 0, sizeof(nargv)); @@ -1520,11 +1511,6 @@ vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *)) log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__, dev->dev_type, fd); - if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { - log_warn("failed to set nonblocking mode on vm device pipe"); - return (-1); - } - imsg_init(&iev->ibuf, fd); iev->handler = cb; iev->data = dev; @@ -1764,3 +1750,30 @@ virtio_deassert_pic_irq(struct virtio_dev *dev, int vcpu) if (ret == -1) log_warnx("%s: failed to deassert irq %d", __func__, dev->irq); } + +/* + * Close all underlying file descriptors for a given virtio device. + */ +static int +virtio_dev_closefds(struct virtio_dev *dev) +{ + size_t i; + + switch (dev->dev_type) { + case VMD_DEVTYPE_DISK: + for (i = 0; i < dev->vioblk.ndisk_fd; i++) { + close_fd(dev->vioblk.disk_fd[i]); + dev->vioblk.disk_fd[i] = -1; + } + break; + case VMD_DEVTYPE_NET: + close_fd(dev->vionet.data_fd); + dev->vionet.data_fd = -1; + break; + default: + log_warnx("%s: invalid device type", __func__); + return (-1); + } + + return (0); +} diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c index 8d20b4c3a93..772d044604d 100644 --- a/usr.sbin/vmd/vm.c +++ b/usr.sbin/vmd/vm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm.c,v 1.96 2024/01/18 14:49:59 claudio Exp $ */ +/* $OpenBSD: vm.c,v 1.97 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -221,13 +221,18 @@ static const struct vcpu_reg_state vcpu_init_flat16 = { * fd_vmm: file descriptor for communicating with vmm(4) device */ void -vm_main(int fd, int vmm_fd) +vm_main(int fd, int fd_vmm) { struct vm_create_params *vcp = NULL; struct vmd_vm vm; size_t sz = 0; int ret = 0; + /* + * The vm process relies on global state. Set the fd for /dev/vmm. + */ + env->vmd_fd = fd_vmm; + /* * We aren't root, so we can't chroot(2). Use unveil(2) instead. */ @@ -277,11 +282,6 @@ vm_main(int fd, int vmm_fd) vcp->vcp_name); _exit(EINVAL); } - if (fcntl(vm.vm_kernel, F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("failed to set nonblocking mode on boot fd"); - _exit(ret); - } } ret = start_vm(&vm, fd); @@ -2454,7 +2454,7 @@ vm_pipe_init(struct vm_dev_pipe *p, void (*cb)(int, short, void *)) memset(p, 0, sizeof(struct vm_dev_pipe)); - ret = pipe(fds); + ret = pipe2(fds, O_CLOEXEC); if (ret) fatal("failed to create vm_dev_pipe pipe"); @@ -2507,7 +2507,7 @@ vm_pipe_recv(struct vm_dev_pipe *p) } /* - * Re-map the guest address space using the shared memory file descriptor. + * Re-map the guest address space using vmm(4)'s VMM_IOC_SHARE * * Returns 0 on success, non-zero in event of failure. */ diff --git a/usr.sbin/vmd/vmd.c b/usr.sbin/vmd/vmd.c index f821b7e3c8f..887e1cc9bf8 100644 --- a/usr.sbin/vmd/vmd.c +++ b/usr.sbin/vmd/vmd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmd.c,v 1.154 2024/02/04 14:56:45 dv Exp $ */ +/* $OpenBSD: vmd.c,v 1.155 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Reyk Floeter @@ -800,6 +800,8 @@ main(int argc, char **argv) if ((env = calloc(1, sizeof(*env))) == NULL) fatal("calloc: env"); + env->vmd_fd = -1; + env->vmd_fd6 = -1; while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { switch (ch) { @@ -891,7 +893,6 @@ main(int argc, char **argv) ps = &env->vmd_ps; ps->ps_env = env; - env->vmd_fd = vmm_fd; if (config_init(env) == -1) fatal("failed to initialize configuration"); @@ -924,7 +925,7 @@ main(int argc, char **argv) /* Open /dev/vmm early. */ if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { - env->vmd_fd = open(VMM_NODE, O_RDWR); + env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC); if (env->vmd_fd == -1) fatal("%s", VMM_NODE); } @@ -1014,9 +1015,6 @@ vmd_configure(void) int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; size_t ncpus_sz = sizeof(ncpus); - if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) - fatal("open %s", PATH_PTMDEV); - /* * pledge in the parent process: * stdio - for malloc and basic I/O including events. @@ -1034,6 +1032,9 @@ vmd_configure(void) " chown fattr flock", NULL) == -1) fatal("pledge"); + if ((env->vmd_ptmfd = getptmfd()) == -1) + fatal("getptmfd %s", PATH_PTMDEV); + if (parse_config(env->vmd_conffile) == -1) { proc_kill(&env->vmd_ps); exit(1); @@ -1846,32 +1847,29 @@ vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) int vm_opentty(struct vmd_vm *vm) { - struct ptmget ptm; struct stat st; struct group *gr; uid_t uid; gid_t gid; mode_t mode; - int on; + int on = 1, tty_slave; /* * Open tty with pre-opened PTM fd */ - if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) + if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname, + NULL, NULL) == -1) { + log_warn("fdopenpty"); return (-1); + } + close(tty_slave); /* * We use user ioctl(2) mode to pass break commands. */ - on = 1; - if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) - fatal("could not enable user ioctl mode"); - - vm->vm_tty = ptm.cfd; - close(ptm.sfd); - if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname)) - >= sizeof(vm->vm_ttyname)) { - log_warnx("%s: truncated ttyname", __func__); + if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) { + log_warn("could not enable user ioctl mode on %s", + vm->vm_ttyname); goto fail; } @@ -1896,8 +1894,10 @@ vm_opentty(struct vmd_vm *vm) * Change ownership and mode of the tty as required. * Loosely based on the implementation of sshpty.c */ - if (stat(vm->vm_ttyname, &st) == -1) + if (fstat(vm->vm_tty, &st) == -1) { + log_warn("fstat failed for %s", vm->vm_ttyname); goto fail; + } if (st.st_uid != uid || st.st_gid != gid) { if (chown(vm->vm_ttyname, uid, gid) == -1) { diff --git a/usr.sbin/vmd/vmm.c b/usr.sbin/vmd/vmm.c index 1f7678fbaff..dcd9a91fe4f 100644 --- a/usr.sbin/vmd/vmm.c +++ b/usr.sbin/vmd/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.118 2024/02/04 14:57:00 dv Exp $ */ +/* $OpenBSD: vmm.c,v 1.119 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -52,6 +52,7 @@ #include "vmd.h" #include "vmm.h" #include "atomicio.h" +#include "proc.h" void vmm_sighdlr(int, short, void *); int vmm_start_vm(struct imsg *, uint32_t *, pid_t *); @@ -467,8 +468,14 @@ vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *)) { struct imsgev *iev = &vm->vm_iev; - if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { - log_warn("failed to set nonblocking mode on vm pipe"); + /* + * Set to close-on-exec as vmm_pipe is used after fork+exec to + * establish async ipc between vm and vmd's vmm process. This + * prevents future vm processes or virtio subprocesses from + * inheriting this control channel. + */ + if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + log_warn("failed to set close-on-exec for vmm ipc channel"); return (-1); } @@ -661,16 +668,10 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) } } - if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1) + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, fds) + == -1) fatal("socketpair"); - /* Keep our channel open after exec. */ - if (fcntl(fds[1], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } - /* Start child vmd for this VM (fork, chroot, drop privs) */ vm_pid = fork(); if (vm_pid == -1) { @@ -745,7 +746,6 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) /* Wire up our pipe into the event handling. */ if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1) fatal("setup vm pipe"); - } else { /* Child. Create a new session. */ if (setsid() == -1) @@ -764,21 +764,6 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) close(fd); } - /* Toggle all fds to not close on exec. */ - for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) - for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) - if (vm->vm_disks[i][j] != -1) - fcntl(vm->vm_disks[i][j], F_SETFD, 0); - for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) - fcntl(vm->vm_ifs[i].vif_fd, F_SETFD, 0); - if (vm->vm_kernel != -1) - fcntl(vm->vm_kernel, F_SETFD, 0); - if (vm->vm_cdrom != -1) - fcntl(vm->vm_cdrom, F_SETFD, 0); - if (vm->vm_tty != -1) - fcntl(vm->vm_tty, F_SETFD, 0); - fcntl(env->vmd_fd, F_SETFD, 0); /* vmm device fd */ - /* * Prepare our new argv for execvp(2) with the fd of our open * pipe to the parent/vmm process as an argument.