-/* $OpenBSD: config.c,v 1.74 2024/01/18 14:49:59 claudio Exp $ */
+/* $OpenBSD: config.c,v 1.75 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) {
if (vm->vm_kernel_path != NULL) {
/* Open external kernel for child */
- kernfd = open(vm->vm_kernel_path, O_RDONLY);
+ kernfd = open(vm->vm_kernel_path, O_RDONLY | O_CLOEXEC);
if (kernfd == -1) {
ret = errno;
log_warn("%s: can't open kernel or BIOS "
* license.
*/
if (kernfd == -1) {
- if ((kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) {
+ if ((kernfd = open(VM_DEFAULT_BIOS,
+ O_RDONLY | O_CLOEXEC)) == -1) {
log_warn("can't open %s", VM_DEFAULT_BIOS);
ret = VMD_BIOS_MISSING;
goto fail;
}
}
- /* Open disk images for child */
+ /*
+ * Open disk images for child. Don't set O_CLOEXEC as these must be
+ * explicitly closed by the vm process during virtio subprocess launch.
+ */
for (i = 0 ; i < vmc->vmc_ndisks; i++) {
if (strlcpy(path, vmc->vmc_disks[i], sizeof(path))
>= sizeof(path))
log_warnx("disk path %s too long", vmc->vmc_disks[i]);
memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases));
- oflags = O_RDWR|O_EXLOCK|O_NONBLOCK;
- aflags = R_OK|W_OK;
+ oflags = O_RDWR | O_EXLOCK | O_NONBLOCK;
+ aflags = R_OK | W_OK;
for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
/* Stat disk[i] to ensure it is a regular file */
if ((diskfds[i][j] = open(path, oflags)) == -1) {
* All writes should go to the top image, allowing them
* to be shared.
*/
- oflags = O_RDONLY|O_NONBLOCK;
+ oflags = O_RDONLY | O_NONBLOCK;
aflags = R_OK;
n = virtio_get_base(diskfds[i][j], base, sizeof(base),
vmc->vmc_disktypes[i], path);
/*
* Either open the requested tap(4) device or get
- * the next available one.
+ * the next available one. Don't set O_CLOEXEC as these
+ * should be closed by the vm process during virtio device
+ * launch.
*/
if (s != NULL) {
snprintf(path, PATH_MAX, "/dev/%s", s);
vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK);
}
- /* Open TTY */
+ /*
+ * Open TTY. Duplicate the fd before sending so the privileged parent
+ * process can perform permissions cleanup of the pty on vm termination.
+ */
if (vm->vm_ttyname[0] == '\0') {
if (vm_opentty(vm) == -1) {
log_warn("%s: can't open tty %s", __func__,
-/* $OpenBSD: vioblk.c,v 1.11 2024/02/04 14:54:51 dv Exp $ */
+/* $OpenBSD: vioblk.c,v 1.12 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2023 Dave Voutila <dv@openbsd.org>
/* Configure our sync channel event handler. */
log_debug("%s: wiring in sync channel handler (fd=%d)", __func__,
dev.sync_fd);
- if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) {
- ret = errno;
- log_warn("%s: fcntl", __func__);
- goto fail;
- }
imsg_init(&dev.sync_iev.ibuf, dev.sync_fd);
dev.sync_iev.handler = handle_sync_io;
dev.sync_iev.data = &dev;
-/* $OpenBSD: vionet.c,v 1.9 2024/02/03 21:41:35 dv Exp $ */
+/* $OpenBSD: vionet.c,v 1.10 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2023 Dave Voutila <dv@openbsd.org>
/* Configure our sync channel event handler. */
log_debug("%s: wiring in sync channel handler (fd=%d)", __func__,
dev.sync_fd);
- if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) {
- ret = errno;
- log_warn("%s: fcntl", __func__);
- goto fail;
- }
imsg_init(&dev.sync_iev.ibuf, dev.sync_fd);
dev.sync_iev.handler = handle_sync_io;
dev.sync_iev.data = &dev;
-/* $OpenBSD: virtio.c,v 1.110 2023/11/03 11:16:43 dv Exp $ */
+/* $OpenBSD: virtio.c,v 1.111 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *);
static void virtio_dispatch_dev(int, short, void *);
static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *);
+static int virtio_dev_closefds(struct virtio_dev *);
const char *
virtio_reg_name(uint8_t reg)
int data_fds[VM_MAX_BASE_PER_DISK], sync_fds[2], async_fds[2], ret = 0;
size_t i, data_fds_sz, sz = 0;
struct viodev_msg msg;
+ struct virtio_dev *dev_entry;
struct imsg imsg;
struct imsgev *iev = &dev->sync_iev;
}
/* We need two channels: one synchronous (IO reads) and one async. */
- if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, sync_fds) == -1) {
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
+ sync_fds) == -1) {
log_warn("failed to create socketpair");
return (errno);
}
- if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, async_fds) == -1) {
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
+ async_fds) == -1) {
log_warn("failed to create async socketpair");
return (errno);
}
- /* Keep communication channels open after exec. */
- if (fcntl(sync_fds[1], F_SETFD, 0)) {
- ret = errno;
- log_warn("%s: fcntl", __func__);
- goto err;
- }
- if (fcntl(async_fds[1], F_SETFD, 0)) {
- ret = errno;
- log_warn("%s: fcnt", __func__);
- goto err;
- }
-
/* Fork... */
dev_pid = fork();
if (dev_pid == -1) {
for (i = 0; i < data_fds_sz; i++)
close_fd(data_fds[i]);
- /* Set our synchronous channel to non-blocking. */
- if (fcntl(sync_fds[0], F_SETFL, O_NONBLOCK) == -1) {
- ret = errno;
- log_warn("%s: fcntl", __func__);
- goto err;
- }
-
/* 1. Send over our configured device. */
log_debug("%s: sending '%c' type device struct", __func__,
dev->dev_type);
close_fd(async_fds[0]);
close_fd(sync_fds[0]);
+ /* Close pty. Virtio devices do not need it. */
+ close_fd(vm->vm_tty);
+ vm->vm_tty = -1;
+
+ if (vm->vm_cdrom != -1) {
+ close_fd(vm->vm_cdrom);
+ vm->vm_cdrom = -1;
+ }
+
/* Keep data file descriptors open after exec. */
- for (i = 0; i < data_fds_sz; i++) {
- log_debug("%s: marking fd %d !close-on-exec", __func__,
- data_fds[i]);
- if (fcntl(data_fds[i], F_SETFD, 0)) {
- ret = errno;
- log_warn("%s: fcntl", __func__);
- goto err;
- }
+ SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) {
+ if (dev_entry == dev)
+ continue;
+ if (virtio_dev_closefds(dev_entry) == -1)
+ fatalx("unable to close other virtio devs");
}
memset(&nargv, 0, sizeof(nargv));
log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__,
dev->dev_type, fd);
- if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
- log_warn("failed to set nonblocking mode on vm device pipe");
- return (-1);
- }
-
imsg_init(&iev->ibuf, fd);
iev->handler = cb;
iev->data = dev;
if (ret == -1)
log_warnx("%s: failed to deassert irq %d", __func__, dev->irq);
}
+
+/*
+ * Close all underlying file descriptors for a given virtio device.
+ */
+static int
+virtio_dev_closefds(struct virtio_dev *dev)
+{
+ size_t i;
+
+ switch (dev->dev_type) {
+ case VMD_DEVTYPE_DISK:
+ for (i = 0; i < dev->vioblk.ndisk_fd; i++) {
+ close_fd(dev->vioblk.disk_fd[i]);
+ dev->vioblk.disk_fd[i] = -1;
+ }
+ break;
+ case VMD_DEVTYPE_NET:
+ close_fd(dev->vionet.data_fd);
+ dev->vionet.data_fd = -1;
+ break;
+ default:
+ log_warnx("%s: invalid device type", __func__);
+ return (-1);
+ }
+
+ return (0);
+}
-/* $OpenBSD: vm.c,v 1.96 2024/01/18 14:49:59 claudio Exp $ */
+/* $OpenBSD: vm.c,v 1.97 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
* fd_vmm: file descriptor for communicating with vmm(4) device
*/
void
-vm_main(int fd, int vmm_fd)
+vm_main(int fd, int fd_vmm)
{
struct vm_create_params *vcp = NULL;
struct vmd_vm vm;
size_t sz = 0;
int ret = 0;
+ /*
+ * The vm process relies on global state. Set the fd for /dev/vmm.
+ */
+ env->vmd_fd = fd_vmm;
+
/*
* We aren't root, so we can't chroot(2). Use unveil(2) instead.
*/
vcp->vcp_name);
_exit(EINVAL);
}
- if (fcntl(vm.vm_kernel, F_SETFL, O_NONBLOCK) == -1) {
- ret = errno;
- log_warn("failed to set nonblocking mode on boot fd");
- _exit(ret);
- }
}
ret = start_vm(&vm, fd);
memset(p, 0, sizeof(struct vm_dev_pipe));
- ret = pipe(fds);
+ ret = pipe2(fds, O_CLOEXEC);
if (ret)
fatal("failed to create vm_dev_pipe pipe");
}
/*
- * Re-map the guest address space using the shared memory file descriptor.
+ * Re-map the guest address space using vmm(4)'s VMM_IOC_SHARE
*
* Returns 0 on success, non-zero in event of failure.
*/
-/* $OpenBSD: vmd.c,v 1.154 2024/02/04 14:56:45 dv Exp $ */
+/* $OpenBSD: vmd.c,v 1.155 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
if ((env = calloc(1, sizeof(*env))) == NULL)
fatal("calloc: env");
+ env->vmd_fd = -1;
+ env->vmd_fd6 = -1;
while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) {
switch (ch) {
ps = &env->vmd_ps;
ps->ps_env = env;
- env->vmd_fd = vmm_fd;
if (config_init(env) == -1)
fatal("failed to initialize configuration");
/* Open /dev/vmm early. */
if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) {
- env->vmd_fd = open(VMM_NODE, O_RDWR);
+ env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC);
if (env->vmd_fd == -1)
fatal("%s", VMM_NODE);
}
int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE};
size_t ncpus_sz = sizeof(ncpus);
- if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
- fatal("open %s", PATH_PTMDEV);
-
/*
* pledge in the parent process:
* stdio - for malloc and basic I/O including events.
" chown fattr flock", NULL) == -1)
fatal("pledge");
+ if ((env->vmd_ptmfd = getptmfd()) == -1)
+ fatal("getptmfd %s", PATH_PTMDEV);
+
if (parse_config(env->vmd_conffile) == -1) {
proc_kill(&env->vmd_ps);
exit(1);
int
vm_opentty(struct vmd_vm *vm)
{
- struct ptmget ptm;
struct stat st;
struct group *gr;
uid_t uid;
gid_t gid;
mode_t mode;
- int on;
+ int on = 1, tty_slave;
/*
* Open tty with pre-opened PTM fd
*/
- if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1))
+ if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname,
+ NULL, NULL) == -1) {
+ log_warn("fdopenpty");
return (-1);
+ }
+ close(tty_slave);
/*
* We use user ioctl(2) mode to pass break commands.
*/
- on = 1;
- if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1)
- fatal("could not enable user ioctl mode");
-
- vm->vm_tty = ptm.cfd;
- close(ptm.sfd);
- if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname))
- >= sizeof(vm->vm_ttyname)) {
- log_warnx("%s: truncated ttyname", __func__);
+ if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) {
+ log_warn("could not enable user ioctl mode on %s",
+ vm->vm_ttyname);
goto fail;
}
* Change ownership and mode of the tty as required.
* Loosely based on the implementation of sshpty.c
*/
- if (stat(vm->vm_ttyname, &st) == -1)
+ if (fstat(vm->vm_tty, &st) == -1) {
+ log_warn("fstat failed for %s", vm->vm_ttyname);
goto fail;
+ }
if (st.st_uid != uid || st.st_gid != gid) {
if (chown(vm->vm_ttyname, uid, gid) == -1) {
-/* $OpenBSD: vmm.c,v 1.118 2024/02/04 14:57:00 dv Exp $ */
+/* $OpenBSD: vmm.c,v 1.119 2024/02/05 21:58:09 dv Exp $ */
/*
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
#include "vmd.h"
#include "vmm.h"
#include "atomicio.h"
+#include "proc.h"
void vmm_sighdlr(int, short, void *);
int vmm_start_vm(struct imsg *, uint32_t *, pid_t *);
{
struct imsgev *iev = &vm->vm_iev;
- if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
- log_warn("failed to set nonblocking mode on vm pipe");
+ /*
+ * Set to close-on-exec as vmm_pipe is used after fork+exec to
+ * establish async ipc between vm and vmd's vmm process. This
+ * prevents future vm processes or virtio subprocesses from
+ * inheriting this control channel.
+ */
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
+ log_warn("failed to set close-on-exec for vmm ipc channel");
return (-1);
}
}
}
- if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, fds)
+ == -1)
fatal("socketpair");
- /* Keep our channel open after exec. */
- if (fcntl(fds[1], F_SETFD, 0)) {
- ret = errno;
- log_warn("%s: fcntl", __func__);
- goto err;
- }
-
/* Start child vmd for this VM (fork, chroot, drop privs) */
vm_pid = fork();
if (vm_pid == -1) {
/* Wire up our pipe into the event handling. */
if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1)
fatal("setup vm pipe");
-
} else {
/* Child. Create a new session. */
if (setsid() == -1)
close(fd);
}
- /* Toggle all fds to not close on exec. */
- for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++)
- for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
- if (vm->vm_disks[i][j] != -1)
- fcntl(vm->vm_disks[i][j], F_SETFD, 0);
- for (i = 0 ; i < vm->vm_params.vmc_nnics; i++)
- fcntl(vm->vm_ifs[i].vif_fd, F_SETFD, 0);
- if (vm->vm_kernel != -1)
- fcntl(vm->vm_kernel, F_SETFD, 0);
- if (vm->vm_cdrom != -1)
- fcntl(vm->vm_cdrom, F_SETFD, 0);
- if (vm->vm_tty != -1)
- fcntl(vm->vm_tty, F_SETFD, 0);
- fcntl(env->vmd_fd, F_SETFD, 0); /* vmm device fd */
-
/*
* Prepare our new argv for execvp(2) with the fd of our open
* pipe to the parent/vmm process as an argument.