From e44e04278855350d6c4e057a8696a8b8fc1ba393 Mon Sep 17 00:00:00 2001 From: stefan Date: Sat, 9 Jul 2016 09:06:22 +0000 Subject: [PATCH] Prepare vionet to be handled asynchronously to the VCPU thread This splits the handling of received data into a separate function that can later be called in parallel to the VCPU thread instead of handling received packets on VCPU exits only. It also makes virtq accesses in the rx path safe to run in parallel to the VCPU thread: the last index into the 'avail' ring the driver has notified to the host is kept track of. It also makes sure that the host only writes back to the 'avail' ring instead of modifying the whole receive virtq. While there, describe what virtio_vq_info and virtio_io_cfg are used for, as suggested by mlarkin@ ok mlarkin@ --- usr.sbin/vmd/virtio.c | 104 ++++++++++++++++++++++++------------------ usr.sbin/vmd/virtio.h | 37 ++++++++++++++- usr.sbin/vmd/vmd.h | 3 +- usr.sbin/vmd/vmm.c | 24 +++++++++- 4 files changed, 120 insertions(+), 48 deletions(-) diff --git a/usr.sbin/vmd/virtio.c b/usr.sbin/vmd/virtio.c index 516af7e3e75..ddf65052f3d 100644 --- a/usr.sbin/vmd/virtio.c +++ b/usr.sbin/vmd/virtio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio.c,v 1.14 2016/07/07 00:58:31 mlarkin Exp $ */ +/* $OpenBSD: virtio.c,v 1.15 2016/07/09 09:06:22 stefan Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -789,11 +790,13 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) uint64_t q_gpa; uint32_t vr_sz; uint16_t idx, pkt_desc_idx, hdr_desc_idx; + ptrdiff_t off; int ret; char *vr; struct vring_desc *desc, *pkt_desc, *hdr_desc; struct vring_avail *avail; struct vring_used *used; + struct vring_used_elem *ue; ret = 0; @@ -824,7 +827,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) idx = dev->vq[0].last_avail & VIONET_QUEUE_MASK; - if ((avail->idx & VIONET_QUEUE_MASK) == idx) { + if ((dev->vq[0].notified_avail & VIONET_QUEUE_MASK) == idx) { log_warnx("vionet queue notify - no space, dropping packet"); free(vr); return (0); @@ -854,13 +857,20 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) ret = 1; dev->cfg.isr_status = 1; - used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; - used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len + sz; + ue = &used->ring[used->idx & VIONET_QUEUE_MASK]; + ue->id = hdr_desc_idx; + ue->len = hdr_desc->len + sz; used->idx++; dev->vq[0].last_avail = (dev->vq[0].last_avail + 1); - *spc = avail->idx - dev->vq[0].last_avail; - if (write_mem(q_gpa, vr, vr_sz)) { + *spc = dev->vq[0].notified_avail - dev->vq[0].last_avail; + + off = (char *)ue - vr; + if (write_mem(q_gpa + off, ue, sizeof *ue)) log_warnx("vionet: error writing vio ring"); + else { + off = (char *)&used->idx - vr; + if (write_mem(q_gpa + off, &used->idx, sizeof used->idx)) + log_warnx("vionet: error writing vio ring"); } free(vr); @@ -868,43 +878,58 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, ssize_t sz, int *spc) return (ret); } +/* + * vionet_rx + * + * Enqueue data that was received on a tap file descriptor + * to the vionet device queue. + */ +static int +vionet_rx(struct vionet_dev *dev) +{ + char buf[PAGE_SIZE]; + int hasdata, num_enq = 0, spc = 0; + ssize_t sz; + + do { + sz = read(dev->fd, buf, sizeof buf); + if (sz == -1) { + /* + * If we get EAGAIN, No data is currently available. + * Do not treat this as an error. + */ + if (errno != EAGAIN) + log_warn("unexpected read error on vionet " + "device"); + } else if (sz != 0) + num_enq += vionet_enq_rx(dev, buf, sz, &spc); + else if (sz == 0) { + log_debug("process_rx: no data"); + hasdata = 0; + break; + } + + hasdata = fd_hasdata(dev->fd); + } while (spc && hasdata); + + dev->rx_pending = hasdata; + return (num_enq); +} + int vionet_process_rx(void) { - int i, num_enq, spc, hasdata; - ssize_t sz; - char *buf; - struct pollfd pfd; + int i, num_enq; num_enq = 0; - buf = malloc(PAGE_SIZE); for (i = 0 ; i < nr_vionet; i++) { if (!vionet[i].rx_added) continue; - spc = 1; - hasdata = 1; - memset(buf, 0, PAGE_SIZE); - memset(&pfd, 0, sizeof(struct pollfd)); - pfd.fd = vionet[i].fd; - pfd.events = POLLIN; - while (spc && hasdata) { - hasdata = poll(&pfd, 1, 0); - if (hasdata == 1) { - sz = read(vionet[i].fd, buf, PAGE_SIZE); - if (sz != 0) { - num_enq += vionet_enq_rx(&vionet[i], - buf, sz, &spc); - } else if (sz == 0) { - log_debug("process_rx: no data"); - hasdata = 0; - } - } - } + if (vionet[i].rx_pending || fd_hasdata(vionet[i].fd)) + num_enq += vionet_rx(&vionet[i]); } - free(buf); - /* * XXX returns the number of packets enqueued across all vionet, which * may not be right for VMs with more than one vionet. @@ -917,11 +942,8 @@ vionet_notify_rx(struct vionet_dev *dev) { uint64_t q_gpa; uint32_t vr_sz; - uint16_t idx, pkt_desc_idx; char *vr; - struct vring_desc *desc, *pkt_desc; struct vring_avail *avail; - struct vring_used *used; vr_sz = vring_size(VIONET_QUEUE_SIZE); q_gpa = dev->vq[dev->cfg.queue_notify].qa; @@ -933,26 +955,18 @@ vionet_notify_rx(struct vionet_dev *dev) return; } - memset(vr, 0, vr_sz); - if (read_mem(q_gpa, vr, vr_sz)) { log_warnx("error reading gpa 0x%llx", q_gpa); free(vr); return; } - /* Compute offsets in ring of descriptors, avail ring, and used ring */ - desc = (struct vring_desc *)(vr); + /* Compute offset into avail ring */ avail = (struct vring_avail *)(vr + dev->vq[dev->cfg.queue_notify].vq_availoffset); - used = (struct vring_used *)(vr + - dev->vq[dev->cfg.queue_notify].vq_usedoffset); - - idx = dev->vq[dev->cfg.queue_notify].last_avail & VIONET_QUEUE_MASK; - pkt_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; - pkt_desc = &desc[pkt_desc_idx]; dev->rx_added = 1; + dev->vq[0].notified_avail = avail->idx; free(vr); } diff --git a/usr.sbin/vmd/virtio.h b/usr.sbin/vmd/virtio.h index 5d886dadbfa..a676797f5fb 100644 --- a/usr.sbin/vmd/virtio.h +++ b/usr.sbin/vmd/virtio.h @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio.h,v 1.3 2015/12/03 08:42:11 reyk Exp $ */ +/* $OpenBSD: virtio.h,v 1.4 2016/07/09 09:06:22 stefan Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -34,6 +34,10 @@ /* All the devices we support have either 1 or 2 queues */ #define VIRTIO_MAX_QUEUES 2 +/* + * This struct stores notifications from a virtio driver. There is + * one such struct per virtio device. + */ struct virtio_io_cfg { uint32_t device_feature; uint32_t guest_feature; @@ -45,12 +49,42 @@ struct virtio_io_cfg { uint8_t isr_status; }; +/* + * A virtio device can have several virtqs. For example, vionet has one virtq + * each for transmitting and receiving packets. This struct describes the state + * of one virtq, such as their address in memory, size, offsets of rings, etc. + * There is one virtio_vq_info per virtq. + */ struct virtio_vq_info { + /* Guest physical address of virtq */ uint32_t qa; + + /* Queue size: number of queue entries in virtq */ uint32_t qs; + + /* + * The offset of the 'available' ring within the virtq located at + * guest physical address qa above + */ uint32_t vq_availoffset; + + /* + * The offset of the 'used' ring within the virtq located at guest + * physical address qa above + */ uint32_t vq_usedoffset; + + /* + * The index into a slot of the 'available' ring that a virtio device + * can consume next + */ uint16_t last_avail; + + /* + * The most recent index into the 'available' ring that a virtio + * driver notified to the host. + */ + uint16_t notified_avail; }; struct viornd_dev { @@ -74,6 +108,7 @@ struct vionet_dev { struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; int fd, rx_added; + int rx_pending; uint8_t mac[6]; }; diff --git a/usr.sbin/vmd/vmd.h b/usr.sbin/vmd/vmd.h index 9bb309b8ec7..386ba12f79a 100644 --- a/usr.sbin/vmd/vmd.h +++ b/usr.sbin/vmd/vmd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmd.h,v 1.20 2016/04/04 17:13:54 stefan Exp $ */ +/* $OpenBSD: vmd.h,v 1.21 2016/07/09 09:06:22 stefan Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -117,6 +117,7 @@ pid_t vmm(struct privsep *, struct privsep_proc *); int write_mem(paddr_t, void *buf, size_t); int read_mem(paddr_t, void *buf, size_t); int opentap(void); +int fd_hasdata(int); /* control.c */ int config_init(struct vmd *); diff --git a/usr.sbin/vmd/vmm.c b/usr.sbin/vmd/vmm.c index b863a876ddb..6ca56930e67 100644 --- a/usr.sbin/vmd/vmm.c +++ b/usr.sbin/vmd/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.31 2016/07/08 06:35:12 mlarkin Exp $ */ +/* $OpenBSD: vmm.c,v 1.32 2016/07/09 09:06:22 stefan Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1761,3 +1762,24 @@ read_mem(paddr_t src, void *buf, size_t len) return (0); } + +/* + * fd_hasdata + * + * Returns 1 if data can be read from an fd, or 0 otherwise. + */ +int +fd_hasdata(int fd) +{ + struct pollfd pfd[1]; + int nready, hasdata = 0; + + pfd[0].fd = fd; + pfd[0].events = POLLIN; + nready = poll(pfd, 1, 0); + if (nready == -1) + log_warn("checking file descriptor for data failed"); + else if (nready == 1 && pfd[0].revents & POLLIN) + hasdata = 1; + return (hasdata); +} -- 2.20.1