vmd(8)/vmctl(8): allow vm owners to override boot kernel.
authordv <dv@openbsd.org>
Fri, 28 Apr 2023 19:46:41 +0000 (19:46 +0000)
committerdv <dv@openbsd.org>
Fri, 28 Apr 2023 19:46:41 +0000 (19:46 +0000)
vmd allows non-root users to "own" a vm defined in vm.conf(5). While
the user can start/stop the vm, if they break their filesystem they
have no means of booting recovery media like a ramdisk kernel.

This change opens the provided boot kernel via vmctl and passes the
file descriptor through the control channel to vmd. The next boot
of the vm will use the provided file descriptor as boot kernel/bios.
Subsequent boots (e.g. a reboot) will return to using behavior
defined in vm.conf or the default bios image.

ok mlarkin@

usr.sbin/vmctl/main.c
usr.sbin/vmctl/vmctl.c
usr.sbin/vmd/config.c
usr.sbin/vmd/control.c
usr.sbin/vmd/parse.y
usr.sbin/vmd/vm.c
usr.sbin/vmd/vmd.c
usr.sbin/vmd/vmd.h

index c1b04c5..b4c1056 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: main.c,v 1.74 2023/04/25 12:51:07 dv Exp $    */
+/*     $OpenBSD: main.c,v 1.75 2023/04/28 19:46:41 dv Exp $    */
 
 /*
  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
@@ -81,7 +81,7 @@ struct ctl_command ctl_commands[] = {
        { "show",       CMD_STATUS,     ctl_status,     "[id]" },
        { "start",      CMD_START,      ctl_start,
            "[-cL] [-B device] [-b path] [-d disk] [-i count]\n"
-           "\t\t[-m size] [-n switch] [-r path] [-t name] id | name" },
+           "\t\t[-m size] [-n switch] [-r path] [-t name] id | name",  1},
        { "status",     CMD_STATUS,     ctl_status,     "[id]" },
        { "stop",       CMD_STOP,       ctl_stop,       "[-fw] [id | -a]" },
        { "unpause",    CMD_UNPAUSE,    ctl_unpause,    "id" },
@@ -820,6 +820,10 @@ ctl_start(struct parse_result *res, int argc, char *argv[])
        char             path[PATH_MAX];
        const char      *s;
 
+       /* We may require sendfd */
+       if (pledge("stdio rpath exec unix getpw unveil sendfd", NULL) == -1)
+               err(1, "pledge");
+
        while ((ch = getopt(argc, argv, "b:B:cd:i:Lm:n:r:t:")) != -1) {
                switch (ch) {
                case 'b':
index c499298..66b7d27 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmctl.c,v 1.86 2023/04/25 12:51:07 dv Exp $   */
+/*     $OpenBSD: vmctl.c,v 1.87 2023/04/28 19:46:41 dv Exp $   */
 
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
@@ -81,6 +81,15 @@ vm_start(uint32_t start_id, const char *name, size_t memsize, int nnics,
        int i;
        const char *s;
 
+       if (kernel) {
+               if (unveil(kernel, "r") == -1)
+                       err(1, "unveil boot kernel");
+       } else {
+               /* We can drop sendfd promise. */
+               if (pledge("stdio rpath exec unix getpw unveil", NULL) == -1)
+                       err(1, "pledge");
+       }
+
        if (memsize)
                flags |= VMOP_CREATE_MEMORY;
        if (nnics)
@@ -98,7 +107,7 @@ vm_start(uint32_t start_id, const char *name, size_t memsize, int nnics,
                        memsize = VM_DEFAULT_MEMORY;
                if (ndisks > VM_MAX_DISKS_PER_VM)
                        errx(1, "too many disks");
-               else if (ndisks == 0)
+               else if (kernel == NULL && ndisks == 0)
                        warnx("starting without disks");
                if (kernel == NULL && ndisks == 0 && !iso)
                        errx(1, "no kernel or disk/cdrom specified");
@@ -106,13 +115,13 @@ vm_start(uint32_t start_id, const char *name, size_t memsize, int nnics,
                        nnics = 0;
                if (nnics > VM_MAX_NICS_PER_VM)
                        errx(1, "too many network interfaces");
-               if (nnics == 0)
+               if (kernel == NULL && nnics == 0)
                        warnx("starting without network interfaces");
        }
 
        if ((vmc = calloc(1, sizeof(struct vmop_create_params))) == NULL)
                return (ENOMEM);
-
+       vmc->vmc_kernel = -1;
        vmc->vmc_flags = flags;
 
        /* vcp includes configuration that is shared with the kernel */
@@ -173,10 +182,13 @@ vm_start(uint32_t start_id, const char *name, size_t memsize, int nnics,
                    sizeof(vcp->vcp_name)) >= sizeof(vcp->vcp_name))
                        errx(1, "vm name too long");
        }
-       if (kernel != NULL)
-               if (strlcpy(vmc->vmc_kernel, kernel,
-                   sizeof(vmc->vmc_kernel)) >= sizeof(vmc->vmc_kernel))
+       if (kernel != NULL) {
+               if (strnlen(kernel, PATH_MAX) == PATH_MAX)
                        errx(1, "kernel name too long");
+               vmc->vmc_kernel = open(kernel, O_RDONLY);
+               if (vmc->vmc_kernel == -1)
+                       err(1, "cannot open kernel '%s'", kernel);
+       }
        if (iso != NULL)
                if (strlcpy(vmc->vmc_cdrom, iso,
                    sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom))
@@ -187,7 +199,7 @@ vm_start(uint32_t start_id, const char *name, size_t memsize, int nnics,
                        errx(1, "instance vm name too long");
        vmc->vmc_bootdevice = bootdevice;
 
-       imsg_compose(ibuf, IMSG_VMDOP_START_VM_REQUEST, 0, 0, -1,
+       imsg_compose(ibuf, IMSG_VMDOP_START_VM_REQUEST, 0, 0, vmc->vmc_kernel,
            vmc, sizeof(struct vmop_create_params));
 
        free(vcp);
index e40cc65..b538d40 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: config.c,v 1.70 2023/04/25 12:46:13 dv Exp $  */
+/*     $OpenBSD: config.c,v 1.71 2023/04/28 19:46:42 dv Exp $  */
 
 /*
  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
@@ -283,14 +283,15 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
        /*
         * From here onward, all failures need cleanup and use goto fail
         */
-
-       if (!(vm->vm_state & VM_STATE_RECEIVED)) {
-               if (strlen(vmc->vmc_kernel)) {
+       if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) {
+               if (vm->vm_kernel_path != NULL) {
                        /* Open external kernel for child */
-                       if ((kernfd = open(vmc->vmc_kernel, O_RDONLY)) == -1) {
+                       kernfd = open(vm->vm_kernel_path, O_RDONLY);
+                       if (kernfd == -1) {
                                ret = errno;
                                log_warn("%s: can't open kernel or BIOS "
-                                   "boot image %s", __func__, vmc->vmc_kernel);
+                                   "boot image %s", __func__,
+                                   vm->vm_kernel_path);
                                goto fail;
                        }
                }
@@ -301,21 +302,25 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
                 * typically distributed separately due to an incompatible
                 * license.
                 */
-               if (kernfd == -1 &&
-                   (kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) {
-                       log_warn("can't open %s", VM_DEFAULT_BIOS);
-                       ret = VMD_BIOS_MISSING;
-                       goto fail;
+               if (kernfd == -1) {
+                       if ((kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) {
+                               log_warn("can't open %s", VM_DEFAULT_BIOS);
+                               ret = VMD_BIOS_MISSING;
+                               goto fail;
+                       }
                }
 
                if (vm_checkaccess(kernfd,
                    vmc->vmc_checkaccess & VMOP_CREATE_KERNEL,
                    uid, R_OK) == -1) {
-                       log_warnx("vm \"%s\" no read access to kernel %s",
-                           vcp->vcp_name, vmc->vmc_kernel);
+                       log_warnx("vm \"%s\" no read access to kernel "
+                           "%s", vcp->vcp_name, vm->vm_kernel_path);
                        ret = EPERM;
                        goto fail;
                }
+
+               vm->vm_kernel = kernfd;
+               vmc->vmc_kernel = kernfd;
        }
 
        /* Open CDROM image for child */
@@ -467,11 +472,11 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
        /* XXX check proc_compose_imsg return values */
        if (vm->vm_state & VM_STATE_RECEIVED)
                proc_compose_imsg(ps, PROC_VMM, -1,
-                   IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd,  vmc,
+                   IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc,
                    sizeof(struct vmop_create_params));
        else
                proc_compose_imsg(ps, PROC_VMM, -1,
-                   IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, kernfd,
+                   IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, vm->vm_kernel,
                    vmc, sizeof(*vmc));
 
        if (strlen(vmc->vmc_cdrom))
@@ -502,7 +507,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
 
        if (!(vm->vm_state & VM_STATE_RECEIVED))
                proc_compose_imsg(ps, PROC_VMM, -1,
-                   IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd,  NULL, 0);
+                   IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0);
 
        free(tapfds);
 
@@ -520,7 +525,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
  fail:
        log_warnx("failed to start vm %s", vcp->vcp_name);
 
-       if (kernfd != -1)
+       if (vm->vm_kernel != -1)
                close(kernfd);
        if (cdromfd != -1)
                close(cdromfd);
@@ -551,16 +556,15 @@ config_getvm(struct privsep *ps, struct imsg *imsg)
 
        IMSG_SIZE_CHECK(imsg, &vmc);
        memcpy(&vmc, imsg->data, sizeof(vmc));
+       vmc.vmc_kernel = imsg->fd;
 
        errno = 0;
        if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1)
                goto fail;
 
-       /* If the fd is -1, the kernel will be searched on the disk */
-       vm->vm_kernel = imsg->fd;
        vm->vm_state |= VM_STATE_RUNNING;
        vm->vm_peerid = (uint32_t)-1;
-
+       vm->vm_kernel = imsg->fd;
        return (0);
 
  fail:
index 1fe8c1e..ef4d0f0 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: control.c,v 1.40 2023/03/08 04:43:15 guenther Exp $   */
+/*     $OpenBSD: control.c,v 1.41 2023/04/28 19:46:42 dv Exp $ */
 
 /*
  * Copyright (c) 2010-2015 Reyk Floeter <reyk@openbsd.org>
@@ -451,8 +451,10 @@ control_dispatch_imsg(int fd, short event, void *arg)
                        vmc.vmc_owner.uid = c->peercred.uid;
                        vmc.vmc_owner.gid = -1;
 
+                       /* imsg.fd may contain kernel image fd. */
                        if (proc_compose_imsg(ps, PROC_PARENT, -1,
-                           imsg.hdr.type, fd, -1, &vmc, sizeof(vmc)) == -1) {
+                           imsg.hdr.type, fd, imsg.fd, &vmc,
+                           sizeof(vmc)) == -1) {
                                control_close(fd, cs);
                                return;
                        }
index b3e53eb..ea91e74 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: parse.y,v 1.65 2023/04/25 12:46:13 dv Exp $   */
+/*     $OpenBSD: parse.y,v 1.66 2023/04/28 19:46:42 dv Exp $   */
 
 /*
  * Copyright (c) 2007-2016 Reyk Floeter <reyk@openbsd.org>
@@ -96,6 +96,7 @@ unsigned int   parse_format(const char *);
 static struct vmop_create_params vmc;
 static struct vm_create_params *vcp;
 static struct vmd_switch       *vsw;
+static char                    *kernel = NULL;
 static char                     vsw_type[IF_NAMESIZE];
 static int                      vmc_disable;
 static size_t                   vmc_nnics;
@@ -325,6 +326,8 @@ vm          : VM string vm_instance         {
                        char            *name;
 
                        memset(&vmc, 0, sizeof(vmc));
+                       vmc.vmc_kernel = -1;
+
                        vcp = &vmc.vmc_params;
                        vmc_disable = 0;
                        vmc_nnics = 0;
@@ -396,8 +399,11 @@ vm         : VM string vm_instance         {
                                            vmc_disable ?
                                            "disabled" : "enabled");
                                }
+                               vm->vm_kernel_path = kernel;
+                               vm->vm_kernel = -1;
                                vm->vm_from_config = 1;
                        }
+                       kernel = NULL;
                }
                ;
 
@@ -458,7 +464,7 @@ vm_opts             : disable                       {
                | BOOT string                   {
                        char     path[PATH_MAX];
 
-                       if (vmc.vmc_kernel[0] != '\0') {
+                       if (kernel != NULL) {
                                yyerror("kernel specified more than once");
                                free($2);
                                YYERROR;
@@ -471,12 +477,7 @@ vm_opts            : disable                       {
                                YYERROR;
                        }
                        free($2);
-                       if (strlcpy(vmc.vmc_kernel, path,
-                           sizeof(vmc.vmc_kernel)) >=
-                           sizeof(vmc.vmc_kernel)) {
-                               yyerror("kernel name too long");
-                               YYERROR;
-                       }
+                       kernel = path;
                        vmc.vmc_flags |= VMOP_CREATE_KERNEL;
                }
                | BOOT DEVICE bootdevice        {
index 39f5544..d42abb5 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vm.c,v 1.87 2023/04/27 22:47:27 dv Exp $      */
+/*     $OpenBSD: vm.c,v 1.88 2023/04/28 19:46:42 dv Exp $      */
 
 /*
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -270,9 +270,17 @@ vm_main(int fd)
         * We need, at minimum, a vm_kernel fd to boot a vm. This is either a
         * kernel or a BIOS image.
         */
-       if (vm.vm_kernel < 0 && !(vm.vm_state & VM_STATE_RECEIVED)) {
-               log_warnx("%s: failed to receive boot fd", vcp->vcp_name);
-               _exit(EINVAL);
+       if (!(vm.vm_state & VM_STATE_RECEIVED)) {
+               if (vm.vm_kernel == -1) {
+                       log_warnx("%s: failed to receive boot fd",
+                           vcp->vcp_name);
+                       _exit(EINVAL);
+               }
+               if (fcntl(vm.vm_kernel, F_SETFL, O_NONBLOCK) == -1) {
+                       ret = errno;
+                       log_warn("failed to set nonblocking mode on boot fd");
+                       _exit(ret);
+               }
        }
 
        ret = start_vm(&vm, fd);
index a9124f9..c06fe97 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmd.c,v 1.145 2023/04/27 22:47:27 dv Exp $    */
+/*     $OpenBSD: vmd.c,v 1.146 2023/04/28 19:46:42 dv Exp $    */
 
 /*
  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
@@ -111,8 +111,10 @@ vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
        case IMSG_VMDOP_START_VM_REQUEST:
                IMSG_SIZE_CHECK(imsg, &vmc);
                memcpy(&vmc, imsg->data, sizeof(vmc));
+               vmc.vmc_kernel = imsg->fd;
+
                ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid);
-               if (vmc.vmc_flags == 0) {
+               if (vmc.vmc_flags == 0 || vmc.vmc_flags == VMOP_CREATE_KERNEL) {
                        /* start an existing VM with pre-configured options */
                        if (!(ret == -1 && errno == EALREADY &&
                            !(vm->vm_state & VM_STATE_RUNNING))) {
@@ -123,6 +125,7 @@ vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
                        res = errno;
                        cmd = IMSG_VMDOP_START_VM_RESPONSE;
                }
+
                if (res == 0) {
                        res = config_setvm(ps, vm, imsg->hdr.peerid,
                            vm->vm_params.vmc_owner.uid);
@@ -1290,6 +1293,8 @@ vm_remove(struct vmd_vm *vm, const char *caller)
        TAILQ_REMOVE(env->vmd_vms, vm, vm_entry);
 
        vm_stop(vm, 0, caller);
+       if (vm->vm_kernel_path != NULL && !vm->vm_from_config)
+               free(vm->vm_kernel_path);
        free(vm);
 }
 
@@ -1353,6 +1358,7 @@ vm_register(struct privsep *ps, struct vmop_create_params *vmc,
                        errno = EPERM;
                        goto fail;
                }
+               vm->vm_kernel = vmc->vmc_kernel;
                *ret_vm = vm;
                errno = EALREADY;
                goto fail;
@@ -1385,8 +1391,8 @@ vm_register(struct privsep *ps, struct vmop_create_params *vmc,
        } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) {
                log_warnx("invalid number of interfaces");
                goto fail;
-       } else if (strlen(vmc->vmc_kernel) == 0 &&
-           vmc->vmc_ndisks == 0 && strlen(vmc->vmc_cdrom) == 0) {
+       } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0
+           && strlen(vmc->vmc_cdrom) == 0) {
                log_warnx("no kernel or disk/cdrom specified");
                goto fail;
        } else if (strlen(vcp->vcp_name) == 0) {
@@ -1415,8 +1421,12 @@ vm_register(struct privsep *ps, struct vmop_create_params *vmc,
        vm->vm_pid = -1;
        vm->vm_tty = -1;
        vm->vm_receive_fd = -1;
+       vm->vm_kernel = -1;
        vm->vm_state &= ~VM_STATE_PAUSED;
 
+       if (vmc->vmc_kernel > -1)
+               vm->vm_kernel = vmc->vmc_kernel;
+
        for (i = 0; i < VM_MAX_DISKS_PER_VM; i++)
                for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
                        vm->vm_disks[i][j] = -1;
@@ -1445,7 +1455,6 @@ vm_register(struct privsep *ps, struct vmop_create_params *vmc,
                        vmc->vmc_macs[i][5] = rng >> 8;
                }
        }
-       vm->vm_kernel = -1;
        vm->vm_cdrom = -1;
        vm->vm_iev.ibuf.fd = -1;
 
@@ -1593,17 +1602,14 @@ vm_instance(struct privsep *ps, struct vmd_vm **vm_parent,
        }
 
        /* kernel */
-       if (strlen(vmc->vmc_kernel) > 0) {
+       if (vmc->vmc_kernel > -1 || (vm->vm_kernel_path != NULL &&
+               strnlen(vm->vm_kernel_path, PATH_MAX) < PATH_MAX)) {
                if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) {
                        log_warnx("vm \"%s\" no permission to set boot image",
                            name);
                        return (EPERM);
                }
                vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL;
-       } else if (strlcpy(vmc->vmc_kernel, vmcp->vmc_kernel,
-           sizeof(vmc->vmc_kernel)) >= sizeof(vmc->vmc_kernel)) {
-               log_warnx("vm \"%s\" kernel name too long", name);
-               return (EINVAL);
        }
 
        /* cdrom */
index cb77fda..68de054 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmd.h,v 1.120 2023/04/27 22:47:27 dv Exp $    */
+/*     $OpenBSD: vmd.h,v 1.121 2023/04/28 19:46:42 dv Exp $    */
 
 /*
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -230,7 +230,7 @@ struct vmop_create_params {
 #define VMDF_QCOW2             0x02
 
        char                     vmc_cdrom[PATH_MAX];
-       char                     vmc_kernel[PATH_MAX];
+       int                      vmc_kernel;
 
        size_t                   vmc_nnics;
        char                     vmc_ifnames[VM_MAX_NICS_PER_VM][IF_NAMESIZE];
@@ -299,7 +299,10 @@ struct vmd_vm {
        struct vmop_create_params vm_params;
        pid_t                    vm_pid;
        uint32_t                 vm_vmid;
+
        int                      vm_kernel;
+       char                    *vm_kernel_path; /* Used by vm.conf. */
+
        int                      vm_cdrom;
        int                      vm_disks[VM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK];
        struct vmd_if            vm_ifs[VM_MAX_NICS_PER_VM];