vmd(8): fix vmctl client "wait" state corruption
authordv <dv@openbsd.org>
Mon, 26 Apr 2021 22:58:27 +0000 (22:58 +0000)
committerdv <dv@openbsd.org>
Mon, 26 Apr 2021 22:58:27 +0000 (22:58 +0000)
Adds queue-based tracking of waiting client state to fix the cause of
state corruption when a vmctl(8) user cancels a wait and restarts it.
The socket fd value for the control process client was being used to
track the waiting party, but this also prevented multiple waiting
clients.

This moves all the state tracking of who to notify of a vm's stopping
to the control process and no longer requires the parent process to
track it in the global environment state.

Future work will be needed to smooth out the difference between the
IMSG_VMDOP_TERMINATE_VM_{EVENT,RESPONSE} events instead of needing to
translate before relaying to the vmctl(8) client.

Tested by Mischa Peters (thanks!)

ok mlarkin@

usr.sbin/vmd/control.c
usr.sbin/vmd/vmd.c
usr.sbin/vmd/vmm.c

index d6b266e..18d47d2 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: control.c,v 1.34 2021/04/20 21:11:56 dv Exp $ */
+/*     $OpenBSD: control.c,v 1.35 2021/04/26 22:58:27 dv Exp $ */
 
 /*
  * Copyright (c) 2010-2015 Reyk Floeter <reyk@openbsd.org>
 
 struct ctl_connlist ctl_conns = TAILQ_HEAD_INITIALIZER(ctl_conns);
 
+struct ctl_notify {
+       int                     ctl_fd;
+       uint32_t                ctl_vmid;
+       TAILQ_ENTRY(ctl_notify) entry;
+};
+TAILQ_HEAD(ctl_notify_q, ctl_notify) ctl_notify_q =
+       TAILQ_HEAD_INITIALIZER(ctl_notify_q);
 void
         control_accept(int, short, void *);
 struct ctl_conn
@@ -78,7 +85,10 @@ int
 control_dispatch_vmd(int fd, struct privsep_proc *p, struct imsg *imsg)
 {
        struct ctl_conn         *c;
+       struct ctl_notify       *notify = NULL, *notify_next;
        struct privsep          *ps = p->p_ps;
+       struct vmop_result       vmr;
+       int                      waiting = 0;
 
        switch (imsg->hdr.type) {
        case IMSG_VMDOP_START_VM_RESPONSE:
@@ -86,18 +96,72 @@ control_dispatch_vmd(int fd, struct privsep_proc *p, struct imsg *imsg)
        case IMSG_VMDOP_SEND_VM_RESPONSE:
        case IMSG_VMDOP_RECEIVE_VM_RESPONSE:
        case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
-       case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
        case IMSG_VMDOP_GET_INFO_VM_DATA:
        case IMSG_VMDOP_GET_INFO_VM_END_DATA:
        case IMSG_CTL_FAIL:
        case IMSG_CTL_OK:
+               /* Provide basic response back to a specific control client */
                if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
                        log_warnx("%s: lost control connection: fd %d",
                            __func__, imsg->hdr.peerid);
                        return (0);
                }
                imsg_compose_event(&c->iev, imsg->hdr.type,
-                   0, 0, imsg->fd, imsg->data, IMSG_DATA_SIZE(imsg));
+                   0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
+               break;
+       case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
+               IMSG_SIZE_CHECK(imsg, &vmr);
+               memcpy(&vmr, imsg->data, sizeof(vmr));
+
+               if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
+                       log_warnx("%s: lost control connection: fd %d",
+                           __func__, imsg->hdr.peerid);
+                       return (0);
+               }
+
+               TAILQ_FOREACH(notify, &ctl_notify_q, entry) {
+                       if (notify->ctl_fd == (int) imsg->hdr.peerid) {
+                               /*
+                                * Update if waiting by vm name. This is only
+                                * supported when stopping a single vm. If
+                                * stopping all vms, vmctl(8) sends the request
+                                * using the vmid.
+                                */
+                               if (notify->ctl_vmid < 1)
+                                       notify->ctl_vmid = vmr.vmr_id;
+                               waiting = 1;
+                               break;
+                       }
+               }
+
+               /* An error needs to be relayed to the client immediately */
+               if (!waiting || vmr.vmr_result) {
+                       imsg_compose_event(&c->iev, imsg->hdr.type,
+                           0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
+
+                       if (notify) {
+                               TAILQ_REMOVE(&ctl_notify_q, notify, entry);
+                               free(notify);
+                       }
+               }
+               break;
+       case IMSG_VMDOP_TERMINATE_VM_EVENT:
+               /* Notify any waiting clients that a VM terminated */
+               IMSG_SIZE_CHECK(imsg, &vmr);
+               memcpy(&vmr, imsg->data, sizeof(vmr));
+
+               TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
+                       if (notify->ctl_vmid != vmr.vmr_id)
+                               continue;
+                       if ((c = control_connbyfd(notify->ctl_fd)) != NULL) {
+                               /* XXX vmctl expects *_RESPONSE, not *_EVENT */
+                               imsg_compose_event(&c->iev,
+                                   IMSG_VMDOP_TERMINATE_VM_RESPONSE,
+                                   0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
+                               TAILQ_REMOVE(&ctl_notify_q, notify, entry);
+                               free(notify);
+                       }
+               }
                break;
        case IMSG_VMDOP_CONFIG:
                config_getconfig(ps->ps_env, imsg);
@@ -276,7 +340,8 @@ control_connbyfd(int fd)
 void
 control_close(int fd, struct control_sock *cs)
 {
-       struct ctl_conn *c;
+       struct ctl_conn         *c;
+       struct ctl_notify       *notify, *notify_next;
 
        if ((c = control_connbyfd(fd)) == NULL) {
                log_warn("%s: fd %d: not found", __func__, fd);
@@ -286,6 +351,14 @@ control_close(int fd, struct control_sock *cs)
        msgbuf_clear(&c->iev.ibuf.w);
        TAILQ_REMOVE(&ctl_conns, c, entry);
 
+       TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
+               if (notify->ctl_fd == fd) {
+                       TAILQ_REMOVE(&ctl_notify_q, notify, entry);
+                       free(notify);
+                       break;
+               }
+       }
+
        event_del(&c->iev.ev);
        close(c->iev.ibuf.fd);
 
@@ -308,7 +381,8 @@ control_dispatch_imsg(int fd, short event, void *arg)
        struct imsg                      imsg;
        struct vmop_create_params        vmc;
        struct vmop_id                   vid;
-       int                              n, v, ret = 0;
+       struct ctl_notify               *notify;
+       int                              n, v, wait = 0, ret = 0;
 
        if ((c = control_connbyfd(fd)) == NULL) {
                log_warn("%s: fd %d: not found", __func__, fd);
@@ -388,12 +462,26 @@ control_dispatch_imsg(int fd, short event, void *arg)
                        }
                        break;
                case IMSG_VMDOP_WAIT_VM_REQUEST:
+                       wait = 1;
+                       /* FALLTHROUGH */
                case IMSG_VMDOP_TERMINATE_VM_REQUEST:
                        if (IMSG_DATA_SIZE(&imsg) < sizeof(vid))
                                goto fail;
                        memcpy(&vid, imsg.data, sizeof(vid));
                        vid.vid_uid = c->peercred.uid;
 
+                       if (wait || vid.vid_flags & VMOP_WAIT) {
+                               vid.vid_flags |= VMOP_WAIT;
+                               notify = calloc(1, sizeof(struct ctl_notify));
+                               if (notify == NULL)
+                                       fatal("%s: calloc", __func__);
+                               notify->ctl_vmid = vid.vid_id;
+                               notify->ctl_fd = fd;
+                               TAILQ_INSERT_TAIL(&ctl_notify_q, notify, entry);
+                               log_debug("%s: registered wait for peer %d",
+                                   __func__, fd);
+                       }
+
                        if (proc_compose_imsg(ps, PROC_PARENT, -1,
                            imsg.hdr.type, fd, -1, &vid, sizeof(vid)) == -1) {
                                log_debug("%s: proc_compose_imsg failed",
index c9bcbb1..3494db4 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmd.c,v 1.122 2021/04/05 11:35:26 dv Exp $    */
+/*     $OpenBSD: vmd.c,v 1.123 2021/04/26 22:58:27 dv Exp $    */
 
 /*
  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
@@ -128,42 +128,41 @@ vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
                IMSG_SIZE_CHECK(imsg, &vid);
                memcpy(&vid, imsg->data, sizeof(vid));
                flags = vid.vid_flags;
+               cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
 
                if ((id = vid.vid_id) == 0) {
                        /* Lookup vm (id) by name */
                        if ((vm = vm_getbyname(vid.vid_name)) == NULL) {
                                res = ENOENT;
-                               cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
                                break;
                        } else if ((vm->vm_state & VM_STATE_SHUTDOWN) &&
                            (flags & VMOP_FORCE) == 0) {
                                res = EALREADY;
-                               cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
                                break;
                        } else if (!(vm->vm_state & VM_STATE_RUNNING)) {
                                res = EINVAL;
-                               cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
                                break;
                        }
                        id = vm->vm_vmid;
                } else if ((vm = vm_getbyvmid(id)) == NULL) {
                        res = ENOENT;
-                       cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
                        break;
                }
-               if (vm_checkperm(vm, &vm->vm_params.vmc_owner,
-                   vid.vid_uid) != 0) {
+               if (vm_checkperm(vm, &vm->vm_params.vmc_owner, vid.vid_uid)) {
                        res = EPERM;
-                       cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
                        break;
                }
 
-               memset(&vid, 0, sizeof(vid));
-               vid.vid_id = id;
-               vid.vid_flags = flags;
-               if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
-                   imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
-                       return (-1);
+               /* Only relay TERMINATION requests, not WAIT requests */
+               if (imsg->hdr.type == IMSG_VMDOP_TERMINATE_VM_REQUEST) {
+                       memset(&vid, 0, sizeof(vid));
+                       vid.vid_id = id;
+                       vid.vid_flags = flags;
+
+                       if (proc_compose_imsg(ps, PROC_VMM, -1, imsg->hdr.type,
+                               imsg->hdr.peerid, -1, &vid, sizeof(vid)) == -1)
+                               return (-1);
+               }
                break;
        case IMSG_VMDOP_GET_INFO_VM_REQUEST:
                proc_forward_imsg(ps, imsg, PROC_VMM, -1);
@@ -420,12 +419,14 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
        case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
                IMSG_SIZE_CHECK(imsg, &vmr);
                memcpy(&vmr, imsg->data, sizeof(vmr));
-               DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
-                   __func__, vmr.vmr_id);
-               proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
-               if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
-                       break;
-               if (vmr.vmr_result == 0) {
+
+               if (vmr.vmr_result) {
+                       DPRINTF("%s: forwarding TERMINATE VM for vm id %d",
+                           __func__, vmr.vmr_id);
+                       proc_forward_imsg(ps, imsg, PROC_CONTROL, -1);
+               } else {
+                       if ((vm = vm_getbyvmid(vmr.vmr_id)) == NULL)
+                               break;
                        /* Mark VM as shutting down */
                        vm->vm_state |= VM_STATE_SHUTDOWN;
                }
@@ -478,16 +479,13 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struct imsg *imsg)
                        config_setvm(ps, vm, (uint32_t)-1, vm->vm_uid);
                }
 
-               /* Send a response if a control client is waiting for it */
-               if (imsg->hdr.peerid != (uint32_t)-1) {
-                       /* the error is meaningless for deferred responses */
-                       vmr.vmr_result = 0;
+               /* The error is meaningless for deferred responses */
+               vmr.vmr_result = 0;
 
-                       if (proc_compose_imsg(ps, PROC_CONTROL, -1,
-                           IMSG_VMDOP_TERMINATE_VM_RESPONSE,
-                           imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
-                               return (-1);
-               }
+               if (proc_compose_imsg(ps, PROC_CONTROL, -1,
+                       IMSG_VMDOP_TERMINATE_VM_EVENT,
+                       imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1)
+                       return (-1);
                break;
        case IMSG_VMDOP_GET_INFO_VM_DATA:
                IMSG_SIZE_CHECK(imsg, &vir);
index 3a7034e..65cac6e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmm.c,v 1.100 2021/04/11 21:02:40 dv Exp $    */
+/*     $OpenBSD: vmm.c,v 1.101 2021/04/26 22:58:27 dv Exp $    */
 
 /*
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -150,30 +150,6 @@ vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
                        res = ENOENT;
                cmd = IMSG_VMDOP_START_VM_RESPONSE;
                break;
-       case IMSG_VMDOP_WAIT_VM_REQUEST:
-               IMSG_SIZE_CHECK(imsg, &vid);
-               memcpy(&vid, imsg->data, sizeof(vid));
-               id = vid.vid_id;
-
-               DPRINTF("%s: recv'ed WAIT_VM for %d", __func__, id);
-
-               cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
-               if (id == 0) {
-                       res = ENOENT;
-               } else if ((vm = vm_getbyvmid(id)) != NULL) {
-                       if (vm->vm_peerid != (uint32_t)-1) {
-                               peerid = vm->vm_peerid;
-                               res = EINTR;
-                       } else
-                               cmd = 0;
-                       vm->vm_peerid = imsg->hdr.peerid;
-               } else {
-                       /* vm doesn't exist, cannot stop vm */
-                       log_debug("%s: cannot stop vm that is not running",
-                           __func__);
-                       res = VMD_VM_STOP_INVALID;
-               }
-               break;
        case IMSG_VMDOP_TERMINATE_VM_REQUEST:
                IMSG_SIZE_CHECK(imsg, &vid);
                memcpy(&vid, imsg->data, sizeof(vid));
@@ -222,15 +198,6 @@ vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
                                        res = VMD_VM_STOP_INVALID;
                                }
                        }
-                       if ((flags & VMOP_WAIT) &&
-                           res == 0 && (vm->vm_state & VM_STATE_SHUTDOWN)) {
-                               if (vm->vm_peerid != (uint32_t)-1) {
-                                       peerid = vm->vm_peerid;
-                                       res = EINTR;
-                               } else
-                                       cmd = 0;
-                               vm->vm_peerid = imsg->hdr.peerid;
-                       }
                } else {
                        /* VM doesn't exist, cannot stop vm */
                        log_debug("%s: cannot stop vm that is not running",