From 6c31e103a2a6d1ce5c80059b30a4becd40d1b3bb Mon Sep 17 00:00:00 2001 From: dv Date: Mon, 21 Jun 2021 02:38:18 +0000 Subject: [PATCH] vmd(8): support variable length vionet rx descriptor chains The original implementation of the virtio network device assumed a driver would only provide a 2-descriptor chain for receiving packets. The virtio spec allows for variable length chains and drivers, in practice, construct them when they use a sufficiently large MTU. This change lets the device use variable length chains provided by the driver, thus allowing for drivers to set an MTU up to the underlying host-side tap(4)'s limit of TUNMRU (16384). Size limitations are now enforced on both tx and rx-side dropping anything violating the underlying tap(4) min and max limits. More work is needed to increase the read(2) buffer in use by vmd to prevent packet truncation. OK mlarkin@ --- usr.sbin/vmd/virtio.c | 166 ++++++++++++++++++++++-------------------- usr.sbin/vmd/virtio.h | 8 +- 2 files changed, 96 insertions(+), 78 deletions(-) diff --git a/usr.sbin/vmd/virtio.c b/usr.sbin/vmd/virtio.c index 42080434b45..27648324916 100644 --- a/usr.sbin/vmd/virtio.c +++ b/usr.sbin/vmd/virtio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio.c,v 1.90 2021/06/17 22:03:33 dv Exp $ */ +/* $OpenBSD: virtio.c,v 1.91 2021/06/21 02:38:18 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -1095,6 +1095,13 @@ vionet_update_qs(struct vionet_dev *dev) } /* + * vionet_enq_rx + * + * Take a given packet from the host-side tap and copy it into the guest's + * buffers utilizing the rx virtio ring. If the packet length is invalid + * (too small or too large) or if there are not enough buffers available, + * the packet is dropped. + * * Must be called with dev->mutex acquired. */ int @@ -1102,24 +1109,25 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc) { uint64_t q_gpa; uint32_t vr_sz; - uint16_t idx, pkt_desc_idx, hdr_desc_idx; - ptrdiff_t off; - int ret; - char *vr; - size_t rem; + uint16_t dxx, idx, hdr_desc_idx, chain_hdr_idx; + int ret = 0; + char *vr = NULL; + size_t bufsz = 0, off = 0, pkt_offset = 0, chunk_size = 0; + size_t chain_len = 0; struct vring_desc *desc, *pkt_desc, *hdr_desc; struct vring_avail *avail; struct vring_used *used; struct vring_used_elem *ue; struct virtio_net_hdr hdr; + size_t hdr_sz; - ret = 0; - - if (sz < 1 || sz > IP_MAXPACKET + ETHER_HDR_LEN) { + if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) { log_warn("%s: invalid packet size", __func__); return (0); } + hdr_sz = sizeof(hdr); + if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) return ret; @@ -1144,90 +1152,91 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc) used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset); idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK; - if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) { - log_debug("vionet queue notify - no space, dropping packet"); + log_debug("%s: insufficient available buffer capacity, " + "dropping packet.", __func__); goto out; } hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; hdr_desc = &desc[hdr_desc_idx]; - pkt_desc_idx = hdr_desc->next & VIONET_QUEUE_MASK; - pkt_desc = &desc[pkt_desc_idx]; - - /* Set up the virtio header (written first, before the packet data) */ - memset(&hdr, 0, sizeof(struct virtio_net_hdr)); - hdr.hdr_len = sizeof(struct virtio_net_hdr); - - /* Check size of header descriptor */ - if (hdr_desc->len < sizeof(struct virtio_net_hdr)) { - log_warnx("%s: invalid header descriptor (too small)", - __func__); - goto out; - } + dxx = hdr_desc_idx; + chain_hdr_idx = dxx; + chain_len = 0; - /* Write out virtio header */ - if (write_mem(hdr_desc->addr, &hdr, sizeof(struct virtio_net_hdr))) { - log_warnx("vionet: rx enq header write_mem error @ " - "0x%llx", hdr_desc->addr); - goto out; - } + /* Process the descriptor and walk any potential chain. */ + do { + off = 0; + pkt_desc = &desc[dxx]; + if (!(pkt_desc->flags & VRING_DESC_F_WRITE)) { + log_warnx("%s: invalid descriptor, not writable", + __func__); + goto out; + } - /* - * Compute remaining space in the first (header) descriptor, and - * copy the packet data after if space is available. Otherwise, - * copy to the pkt_desc descriptor. - */ - rem = hdr_desc->len - sizeof(struct virtio_net_hdr); + /* How much data do we get to write? */ + if (sz - bufsz > pkt_desc->len) + chunk_size = pkt_desc->len; + else + chunk_size = sz - bufsz; - if (rem >= sz) { - if (write_mem(hdr_desc->addr + sizeof(struct virtio_net_hdr), - pkt, sz)) { - log_warnx("vionet: rx enq packet write_mem error @ " - "0x%llx", pkt_desc->addr); - goto out; + if (chain_len == 0) { + off = hdr_sz; + if (chunk_size == pkt_desc->len) + chunk_size -= off; } - } else { - /* Fallback to pkt_desc descriptor */ - if (pkt_desc->len >= sz) { - /* Must be not readable */ - if ((pkt_desc->flags & VRING_DESC_F_WRITE) == 0) { - log_warnx("unexpected readable rx desc %d", - pkt_desc_idx); - goto out; - } - /* Write packet to descriptor ring */ - if (write_mem(pkt_desc->addr, pkt, sz)) { - log_warnx("vionet: rx enq packet write_mem " - "error @ 0x%llx", pkt_desc->addr); - goto out; - } - } else { - log_warnx("%s: descriptor too small for packet data", - __func__); + /* Write a chunk of data if we need to */ + if (chunk_size && write_mem(pkt_desc->addr + off, + pkt + pkt_offset, chunk_size)) { + log_warnx("%s: failed to write to buffer 0x%llx", + __func__, pkt_desc->addr); goto out; } + + chain_len += chunk_size + off; + bufsz += chunk_size; + pkt_offset += chunk_size; + + dxx = pkt_desc->next & VIONET_QUEUE_MASK; + } while (bufsz < sz && pkt_desc->flags & VRING_DESC_F_NEXT); + + /* Update the list of used buffers. */ + ue = &used->ring[(used->idx) & VIONET_QUEUE_MASK]; + ue->id = chain_hdr_idx; + ue->len = chain_len; + off = ((char *)ue - vr); + if (write_mem(q_gpa + off, ue, sizeof(*ue))) { + log_warnx("%s: error updating rx used ring", __func__); + goto out; } - ret = 1; - dev->cfg.isr_status = 1; - ue = &used->ring[used->idx & VIONET_QUEUE_MASK]; - ue->id = hdr_desc_idx; - ue->len = sz + sizeof(struct virtio_net_hdr); + /* Move our marker in the ring...*/ used->idx++; - dev->vq[RXQ].last_avail++; - *spc = dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail; + dev->vq[RXQ].last_avail = (dev->vq[RXQ].last_avail + 1) & + VIONET_QUEUE_MASK; + + /* Prepend the virtio net header in the first buffer. */ + memset(&hdr, 0, sizeof(hdr)); + hdr.hdr_len = hdr_sz; + if (write_mem(hdr_desc->addr, &hdr, hdr_sz)) { + log_warnx("vionet: rx enq header write_mem error @ 0x%llx", + hdr_desc->addr); + goto out; + } - off = (char *)ue - vr; - if (write_mem(q_gpa + off, ue, sizeof *ue)) + /* Update the index field in the used ring. This must be done last. */ + dev->cfg.isr_status = 1; + off = (char *)&used->idx - vr; + *spc = (dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail) & + VIONET_QUEUE_MASK; + + if (write_mem(q_gpa + off, &used->idx, sizeof(used->idx))) log_warnx("vionet: error writing vio ring"); - else { - off = (char *)&used->idx - vr; - if (write_mem(q_gpa + off, &used->idx, sizeof used->idx)) - log_warnx("vionet: error writing vio ring"); - } + + ret = 1; + out: free(vr); return (ret); @@ -1490,11 +1499,11 @@ vionet_notify_tx(struct vionet_dev *dev) /* Remove virtio header descriptor len */ pktsz -= hdr_desc->len; - /* Only allow buffer len < max IP packet + Ethernet header */ - if (pktsz > IP_MAXPACKET + ETHER_HDR_LEN) { + /* Drop packets violating device MTU-based limits */ + if (pktsz < VIONET_MIN_TXLEN || pktsz > VIONET_MAX_TXLEN) { log_warnx("%s: invalid packet size %lu", __func__, pktsz); - goto out; + goto drop_packet; } pkt = malloc(pktsz); if (pkt == NULL) { @@ -1575,6 +1584,7 @@ vionet_notify_tx(struct vionet_dev *dev) goto out; } + drop_packet: ret = 1; dev->cfg.isr_status = 1; used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; @@ -1916,6 +1926,8 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, sizeof(struct vring_desc) * VIONET_QUEUE_SIZE + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); vionet[i].vq[RXQ].last_avail = 0; + vionet[i].vq[RXQ].notified_avail = 0; + vionet[i].vq[TXQ].qs = VIONET_QUEUE_SIZE; vionet[i].vq[TXQ].vq_availoffset = sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; diff --git a/usr.sbin/vmd/virtio.h b/usr.sbin/vmd/virtio.h index e618d4ebc21..b65855fcf2a 100644 --- a/usr.sbin/vmd/virtio.h +++ b/usr.sbin/vmd/virtio.h @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio.h,v 1.39 2021/06/16 16:55:02 dv Exp $ */ +/* $OpenBSD: virtio.h,v 1.40 2021/06/21 02:38:18 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin @@ -19,6 +19,7 @@ #include #include +#include #include @@ -45,6 +46,11 @@ #define VIONET_QUEUE_SIZE 256 #define VIONET_QUEUE_MASK (VIONET_QUEUE_SIZE - 1) +/* Virtio network device is backed by tap(4), so inherit limits */ +#define VIONET_HARD_MTU TUNMRU +#define VIONET_MIN_TXLEN ETHER_HDR_LEN +#define VIONET_MAX_TXLEN VIONET_HARD_MTU + ETHER_HDR_LEN + /* VMM Control Interface shutdown timeout (in seconds) */ #define VMMCI_TIMEOUT 3 #define VMMCI_SHUTDOWN_TIMEOUT 120 -- 2.20.1