From 0623538713195a655a6f9d2c9b2e142d4e999c2f Mon Sep 17 00:00:00 2001 From: mpi Date: Mon, 26 Jun 2017 09:32:31 +0000 Subject: [PATCH] Assert that the corresponding socket is locked when manipulating socket buffers. This is one step towards unlocking TCP input path. Note that all the functions asserting for the socket lock are not necessarilly MP-safe. All the fields of 'struct socket' aren't protected. Introduce a new kernel-only kqueue hint, NOTE_SUBMIT, to be able to tell when a filter needs to lock the underlying data structures. Logic and name taken from NetBSD. Tested by Hrvoje Popovski. ok claudio@, bluhm@, mikeb@ --- sys/kern/sys_generic.c | 4 +-- sys/kern/uipc_socket.c | 32 ++++++++++++--------- sys/kern/uipc_socket2.c | 52 ++++++++++++++++++++-------------- sys/kern/uipc_usrreq.c | 10 +++---- sys/miscfs/fifofs/fifo_vnops.c | 4 +-- sys/net/pfkeyv2.c | 8 +++--- sys/net/rtsock.c | 15 +++++----- sys/netinet/ip_divert.c | 4 +-- sys/netinet/ip_mroute.c | 4 +-- sys/netinet/raw_ip.c | 9 +++--- sys/netinet/tcp_input.c | 28 +++++++++--------- sys/netinet/tcp_output.c | 4 +-- sys/netinet/tcp_subr.c | 5 ++-- sys/netinet/tcp_usrreq.c | 16 +++++------ sys/netinet/udp_usrreq.c | 9 +++--- sys/netinet6/ip6_divert.c | 4 +-- sys/netinet6/ip6_mroute.c | 2 +- sys/netinet6/raw_ip6.c | 7 +++-- sys/nfs/nfs_socket.c | 4 +-- sys/sys/event.h | 9 +++++- sys/sys/socketvar.h | 52 ++++++++++++++++++++++------------ 21 files changed, 163 insertions(+), 119 deletions(-) diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index b1d3f7b6951..8afc08f8d17 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.114 2017/01/24 00:58:55 mpi Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.115 2017/06/26 09:32:31 mpi Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -799,7 +799,7 @@ selwakeup(struct selinfo *sip) struct proc *p; int s; - KNOTE(&sip->si_note, 0); + KNOTE(&sip->si_note, NOTE_SUBMIT); if (sip->si_seltid == 0) return; if (sip->si_flags & SI_COLL) { diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 82b14796a21..dfdc62b78b4 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.188 2017/06/20 17:13:21 bluhm Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.189 2017/06/26 09:32:31 mpi Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -216,7 +216,7 @@ sofree(struct socket *so) so->so_sp = NULL; } #endif /* SOCKET_SPLICE */ - sbrelease(&so->so_snd); + sbrelease(so, &so->so_snd); sorflush(so); pool_put(&socket_pool, so); } @@ -440,7 +440,7 @@ restart: } else if (addr == 0) snderr(EDESTADDRREQ); } - space = sbspace(&so->so_snd); + space = sbspace(so, &so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || @@ -1041,7 +1041,7 @@ sorflush(struct socket *so) struct sockbuf *sb = &so->so_rcv; struct protosw *pr = so->so_proto; sa_family_t af = pr->pr_domain->dom_family; - struct sockbuf asb; + struct socket aso; sb->sb_flags |= SB_NOINTR; sblock(sb, M_WAITOK, @@ -1049,16 +1049,17 @@ sorflush(struct socket *so) &netlock : NULL); socantrcvmore(so); sbunlock(sb); - asb = *sb; + aso.so_proto = pr; + aso.so_rcv = *sb; memset(sb, 0, sizeof (*sb)); /* XXX - the memset stomps all over so_rcv */ - if (asb.sb_flags & SB_KNOTE) { - sb->sb_sel.si_note = asb.sb_sel.si_note; + if (aso.so_rcv.sb_flags & SB_KNOTE) { + sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note; sb->sb_flags = SB_KNOTE; } if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) - (*pr->pr_domain->dom_dispose)(asb.sb_mb); - sbrelease(&asb); + (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb); + sbrelease(&aso, &aso.so_rcv); } #ifdef SOCKET_SPLICE @@ -1270,7 +1271,7 @@ somove(struct socket *so, int wait) maxreached = 1; } } - space = sbspace(&sosp->so_snd); + space = sbspace(sosp, &sosp->so_snd); if (so->so_oobmark && so->so_oobmark < len && so->so_oobmark < space + 1024) space += 1024; @@ -1635,7 +1636,7 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) goto bad; } if (sbcheckreserve(cnt, so->so_snd.sb_wat) || - sbreserve(&so->so_snd, cnt)) { + sbreserve(so, &so->so_snd, cnt)) { error = ENOBUFS; goto bad; } @@ -1648,7 +1649,7 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) goto bad; } if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || - sbreserve(&so->so_rcv, cnt)) { + sbreserve(so, &so->so_rcv, cnt)) { error = ENOBUFS; goto bad; } @@ -1990,8 +1991,13 @@ int filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; + int s; - kn->kn_data = sbspace(&so->so_snd); + if (!(hint & NOTE_SUBMIT)) + s = solock(so); + kn->kn_data = sbspace(so, &so->so_snd); + if (!(hint & NOTE_SUBMIT)) + sounlock(s); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 0ba476bcf51..65c5dd74f65 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.78 2017/06/07 13:41:02 mpi Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.79 2017/06/26 09:32:31 mpi Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -436,9 +436,9 @@ int soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { - if (sbreserve(&so->so_snd, sndcc)) + if (sbreserve(so, &so->so_snd, sndcc)) goto bad; - if (sbreserve(&so->so_rcv, rcvcc)) + if (sbreserve(so, &so->so_rcv, rcvcc)) goto bad2; so->so_snd.sb_wat = sndcc; so->so_rcv.sb_wat = rcvcc; @@ -450,7 +450,7 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; return (0); bad2: - sbrelease(&so->so_snd); + sbrelease(so, &so->so_snd); bad: return (ENOBUFS); } @@ -461,8 +461,10 @@ bad: * if buffering efficiency is near the normal case. */ int -sbreserve(struct sockbuf *sb, u_long cc) +sbreserve(struct socket *so, struct sockbuf *sb, u_long cc) { + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); + soassertlocked(so); if (cc == 0 || cc > sb_max) return (1); @@ -503,10 +505,10 @@ sbchecklowmem(void) * Free mbufs held by a socket, and reserved mbuf space. */ void -sbrelease(struct sockbuf *sb) +sbrelease(struct socket *so, struct sockbuf *sb) { - sbflush(sb); + sbflush(so, sb); sb->sb_hiwat = sb->sb_mbmax = 0; } @@ -597,7 +599,7 @@ do { \ * discarded and mbufs are compacted where possible. */ void -sbappend(struct sockbuf *sb, struct mbuf *m) +sbappend(struct socket *so, struct sockbuf *sb, struct mbuf *m) { struct mbuf *n; @@ -614,7 +616,7 @@ sbappend(struct sockbuf *sb, struct mbuf *m) */ do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + sbappendrecord(so, sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); @@ -635,9 +637,10 @@ sbappend(struct sockbuf *sb, struct mbuf *m) * in the socket buffer, that is, a stream protocol (such as TCP). */ void -sbappendstream(struct sockbuf *sb, struct mbuf *m) +sbappendstream(struct socket *so, struct sockbuf *sb, struct mbuf *m) { - + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); + soassertlocked(so); KDASSERT(m->m_nextpkt == NULL); KASSERT(sb->sb_mb == sb->sb_lastrecord); @@ -679,10 +682,13 @@ sbcheck(struct sockbuf *sb) * begins a new record. */ void -sbappendrecord(struct sockbuf *sb, struct mbuf *m0) +sbappendrecord(struct socket *so, struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); + soassertlocked(so); + if (m0 == NULL) return; @@ -759,8 +765,8 @@ sbinsertoob(struct sockbuf *sb, struct mbuf *m0) * Returns 0 if no space in sockbuf or insufficient mbufs. */ int -sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, - struct mbuf *control) +sbappendaddr(struct socket *so, struct sockbuf *sb, struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control) { struct mbuf *m, *n, *nlast; int space = asa->sa_len; @@ -774,7 +780,7 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, if (n->m_next == NULL) /* keep pointer to last control buf */ break; } - if (space > sbspace(sb)) + if (space > sbspace(so, sb)) return (0); if (asa->sa_len > MLEN) return (0); @@ -806,7 +812,8 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, } int -sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) +sbappendcontrol(struct socket *so, struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control) { struct mbuf *m, *mlast, *n; int space = 0; @@ -821,7 +828,7 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) n = m; /* save pointer to last control buffer */ for (m = m0; m; m = m->m_next) space += m->m_len; - if (space > sbspace(sb)) + if (space > sbspace(so, sb)) return (0); n->m_next = m0; /* concatenate data to control */ @@ -902,13 +909,13 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) * Check that all resources are reclaimed. */ void -sbflush(struct sockbuf *sb) +sbflush(struct socket *so, struct sockbuf *sb) { - + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); KASSERT((sb->sb_flags & SB_LOCK) == 0); while (sb->sb_mbcnt) - sbdrop(sb, (int)sb->sb_cc); + sbdrop(so, sb, (int)sb->sb_cc); KASSERT(sb->sb_cc == 0); KASSERT(sb->sb_datacc == 0); @@ -921,11 +928,14 @@ sbflush(struct sockbuf *sb) * Drop data from (the front of) a sockbuf. */ void -sbdrop(struct sockbuf *sb, int len) +sbdrop(struct socket *so, struct sockbuf *sb, int len) { struct mbuf *m, *mn; struct mbuf *next; + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); + soassertlocked(so); + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; while (len > 0) { if (m == NULL) { diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 9a6d192ac62..d2df74e3236 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.117 2017/03/13 20:18:21 claudio Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.118 2017/06/26 09:32:31 mpi Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -222,7 +222,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, from = mtod(unp->unp_addr, struct sockaddr *); else from = &sun_noname; - if (sbappendaddr(&so2->so_rcv, from, m, control)) { + if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { sorwakeup(so2); m = NULL; control = NULL; @@ -252,16 +252,16 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * Wake up readers. */ if (control) { - if (sbappendcontrol(rcv, m, control)) + if (sbappendcontrol(so2, rcv, m, control)) control = NULL; else { error = ENOBUFS; break; } } else if (so->so_type == SOCK_SEQPACKET) - sbappendrecord(rcv, m); + sbappendrecord(so2, rcv, m); else - sbappend(rcv, m); + sbappend(so2, rcv, m); snd->sb_mbcnt = rcv->sb_mbcnt; snd->sb_cc = rcv->sb_cc; sorwakeup(so2); diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index 6ffecdcd5fe..575f58ff11d 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.53 2016/12/19 08:36:49 mpi Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.54 2017/06/26 09:32:31 mpi Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -552,7 +552,7 @@ filt_fifowrite(struct knote *kn, long hint) { struct socket *so = (struct socket *)kn->kn_hook; - kn->kn_data = sbspace(&so->so_snd); + kn->kn_data = sbspace(so, &so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; return (1); diff --git a/sys/net/pfkeyv2.c b/sys/net/pfkeyv2.c index 469a852d21e..6e870613c92 100644 --- a/sys/net/pfkeyv2.c +++ b/sys/net/pfkeyv2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfkeyv2.c,v 1.161 2017/06/26 09:17:55 patrick Exp $ */ +/* $OpenBSD: pfkeyv2.c,v 1.162 2017/06/26 09:32:32 mpi Exp $ */ /* * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 @@ -327,7 +327,7 @@ ret: } int -pfkey_sendup(struct socket *socket, struct mbuf *packet, int more) +pfkey_sendup(struct socket *so, struct mbuf *packet, int more) { struct mbuf *packet2; @@ -339,12 +339,12 @@ pfkey_sendup(struct socket *socket, struct mbuf *packet, int more) } else packet2 = packet; - if (!sbappendaddr(&socket->so_rcv, &pfkey_addr, packet2, NULL)) { + if (!sbappendaddr(so, &so->so_rcv, &pfkey_addr, packet2, NULL)) { m_freem(packet2); return (ENOBUFS); } - sorwakeup(socket); + sorwakeup(so); return (0); } diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index f46a398e94a..fff48f069ab 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rtsock.c,v 1.238 2017/06/09 12:56:43 mpi Exp $ */ +/* $OpenBSD: rtsock.c,v 1.239 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ /* @@ -174,7 +174,7 @@ route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * empty so that we can clear the flag. */ if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && - ((sbspace(&rp->rcb_socket->so_rcv) == + ((sbspace(rp->rcb_socket, &rp->rcb_socket->so_rcv) == rp->rcb_socket->so_rcv.sb_hiwat))) rop->flags &= ~ROUTECB_FLAG_FLUSH; break; @@ -325,7 +325,8 @@ route_senddesync(void *data) */ desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); if (desync_mbuf != NULL) { - if (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src, + struct socket *so = rp->rcb_socket; + if (sbappendaddr(so, &so->so_rcv, &route_src, desync_mbuf, NULL) != 0) { rop->flags &= ~ROUTECB_FLAG_DESYNC; sorwakeup(rp->rcb_socket); @@ -431,8 +432,8 @@ route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family) if (last) { struct mbuf *n; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { - if (sbspace(&last->so_rcv) < (2 * MSIZE) || - sbappendaddr(&last->so_rcv, sosrc, + if (sbspace(last, &last->so_rcv) < (2*MSIZE) || + sbappendaddr(last, &last->so_rcv, sosrc, n, (struct mbuf *)NULL) == 0) { /* * Flag socket as desync'ed and @@ -452,8 +453,8 @@ route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family) last = rp->rcb_socket; } if (last) { - if (sbspace(&last->so_rcv) < (2 * MSIZE) || - sbappendaddr(&last->so_rcv, sosrc, + if (sbspace(last, &last->so_rcv) < (2 * MSIZE) || + sbappendaddr(last, &last->so_rcv, sosrc, m, (struct mbuf *)NULL) == 0) { /* Flag socket as desync'ed and flush required */ sotoroutecb(last)->flags |= diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 28ba9d7fc6a..cc93134864a 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_divert.c,v 1.47 2017/05/30 07:50:37 mpi Exp $ */ +/* $OpenBSD: ip_divert.c,v 1.48 2017/06/26 09:32:32 mpi Exp $ */ /* * Copyright (c) 2009 Michele Marchetto @@ -222,7 +222,7 @@ divert_packet(struct mbuf *m, int dir, u_int16_t divert_port) if (inp) { sa = inp->inp_socket; - if (sbappendaddr(&sa->so_rcv, sintosa(&addr), m, NULL) == 0) { + if (sbappendaddr(sa, &sa->so_rcv, sintosa(&addr), m, NULL) == 0) { divstat_inc(divs_fullsock); m_freem(m); return (0); diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 1f4e6da4478..6eb3af76a71 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_mroute.c,v 1.119 2017/06/19 17:00:16 bluhm Exp $ */ +/* $OpenBSD: ip_mroute.c,v 1.120 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $ */ /* @@ -1037,7 +1037,7 @@ int socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) { if (s != NULL) { - if (sbappendaddr(&s->so_rcv, sintosa(src), mm, NULL) != 0) { + if (sbappendaddr(s, &s->so_rcv, sintosa(src), mm, NULL) != 0) { sorwakeup(s); return (0); } diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 8f7fba3e233..8a14690911d 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -1,4 +1,4 @@ -/* $OpenBSD: raw_ip.c,v 1.99 2017/04/17 21:10:03 bluhm Exp $ */ +/* $OpenBSD: raw_ip.c,v 1.100 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ /* @@ -168,7 +168,8 @@ rip_input(struct mbuf **mp, int *offp, int proto, int af) if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, n); - if (sbappendaddr(&last->inp_socket->so_rcv, + if (sbappendaddr(last->inp_socket, + &last->inp_socket->so_rcv, sintosa(&ripsrc), n, opts) == 0) { /* should notify about lost packet */ m_freem(n); @@ -184,8 +185,8 @@ rip_input(struct mbuf **mp, int *offp, int proto, int af) if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, m); - if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m, - opts) == 0) { + if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, + sintosa(&ripsrc), m, opts) == 0) { m_freem(m); m_freem(opts); } else diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 724715de6d7..c847ceec16d 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.345 2017/05/18 11:38:07 mpi Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.346 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -339,7 +339,7 @@ tcp_flush_queue(struct tcpcb *tp) if (so->so_state & SS_CANTRCVMORE) m_freem(q->tcpqe_m); else - sbappendstream(&so->so_rcv, q->tcpqe_m); + sbappendstream(so, &so->so_rcv, q->tcpqe_m); pool_put(&tcpqe_pool, q); q = nq; } while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt); @@ -944,7 +944,7 @@ findpcb: tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); ND6_HINT(tp); - sbdrop(&so->so_snd, acked); + sbdrop(so, &so->so_snd, acked); /* * If we had a pending ICMP message that @@ -996,7 +996,7 @@ findpcb: TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); tcp_update_sndspace(tp); - if (sb_notify(&so->so_snd)) { + if (sb_notify(so, &so->so_snd)) { tp->t_flags |= TF_BLOCKOUTPUT; sowwakeup(so); tp->t_flags &= ~TF_BLOCKOUTPUT; @@ -1008,7 +1008,7 @@ findpcb: } } else if (th->th_ack == tp->snd_una && TAILQ_EMPTY(&tp->t_segq) && - tlen <= sbspace(&so->so_rcv)) { + tlen <= sbspace(so, &so->so_rcv)) { /* * This is a pure, in-sequence data packet * with nothing on the reassembly queue and @@ -1043,7 +1043,7 @@ findpcb: tp->rfbuf_cnt += tlen; } m_adj(m, iphlen + off); - sbappendstream(&so->so_rcv, m); + sbappendstream(so, &so->so_rcv, m); } tp->t_flags |= TF_BLOCKOUTPUT; sorwakeup(so); @@ -1067,7 +1067,7 @@ findpcb: */ { int win; - win = sbspace(&so->so_rcv); + win = sbspace(so, &so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); @@ -1780,16 +1780,16 @@ trimthenstep6: ND6_HINT(tp); if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; - sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); + sbdrop(so, &so->so_snd, (int)so->so_snd.sb_cc); ourfinisacked = 1; } else { - sbdrop(&so->so_snd, acked); + sbdrop(so, &so->so_snd, acked); tp->snd_wnd -= acked; ourfinisacked = 0; } tcp_update_sndspace(tp); - if (sb_notify(&so->so_snd)) { + if (sb_notify(so, &so->so_snd)) { tp->t_flags |= TF_BLOCKOUTPUT; sowwakeup(so); tp->t_flags &= ~TF_BLOCKOUTPUT; @@ -1997,7 +1997,7 @@ dodata: /* XXX */ m_freem(m); else { m_adj(m, hdroptlen); - sbappendstream(&so->so_rcv, m); + sbappendstream(so, &so->so_rcv, m); } tp->t_flags |= TF_BLOCKOUTPUT; sorwakeup(so); @@ -3107,7 +3107,7 @@ tcp_mss_update(struct tcpcb *tp) bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; - (void)sbreserve(&so->so_snd, bufsize); + (void)sbreserve(so, &so->so_snd, bufsize); } bufsize = so->so_rcv.sb_hiwat; @@ -3115,7 +3115,7 @@ tcp_mss_update(struct tcpcb *tp) bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; - (void)sbreserve(&so->so_rcv, bufsize); + (void)sbreserve(so, &so->so_rcv, bufsize); } } @@ -3909,7 +3909,7 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, /* * Initialize some local state. */ - win = sbspace(&so->so_rcv); + win = sbspace(so, &so->so_rcv); if (win > TCP_MAXWIN) win = TCP_MAXWIN; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index ebd9cccd5bf..d2510bf83e1 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.120 2017/05/18 11:38:07 mpi Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.121 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -392,7 +392,7 @@ again: if (off + len < so->so_snd.sb_cc) flags &= ~TH_FIN; - win = sbspace(&so->so_rcv); + win = sbspace(so, &so->so_rcv); /* * Sender silly window avoidance. If connection is idle diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9339a239d43..27cab0e05bb 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_subr.c,v 1.164 2017/05/18 11:38:07 mpi Exp $ */ +/* $OpenBSD: tcp_subr.c,v 1.165 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ /* @@ -305,7 +305,8 @@ tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0, int af; /* af on wire */ if (tp) { - win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); + struct socket *so = tp->t_inpcb->inp_socket; + win = sbspace(so, &so->so_rcv); /* * If this is called with an unconnected * socket/tp/pcb (tp->pf is 0), we lose. diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index de6ec5b99e9..b2d626480df 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_usrreq.c,v 1.151 2017/05/18 11:38:07 mpi Exp $ */ +/* $OpenBSD: tcp_usrreq.c,v 1.152 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ /* @@ -355,7 +355,7 @@ tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * marker if URG set. Possibly send more data. */ case PRU_SEND: - sbappendstream(&so->so_snd, m); + sbappendstream(so, &so->so_snd, m); error = tcp_output(tp); break; @@ -389,7 +389,7 @@ tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, break; case PRU_SENDOOB: - if (sbspace(&so->so_snd) < -512) { + if (sbspace(so, &so->so_snd) < -512) { m_freem(m); error = ENOBUFS; break; @@ -402,7 +402,7 @@ tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * of data past the urgent section. * Otherwise, snd_up should be one lower. */ - sbappendstream(&so->so_snd, m); + sbappendstream(so, &so->so_snd, m); tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; error = tcp_output(tp); @@ -662,7 +662,7 @@ tcp_disconnect(struct tcpcb *tp) tp = tcp_drop(tp, 0); else { soisdisconnecting(so); - sbflush(&so->so_rcv); + sbflush(so, &so->so_rcv); tp = tcp_usrclosed(tp); if (tp) (void) tcp_output(tp); @@ -1111,7 +1111,7 @@ tcp_update_sndspace(struct tcpcb *tp) tp->snd_una); /* a writable socket must be preserved because of poll(2) semantics */ - if (sbspace(&so->so_snd) >= so->so_snd.sb_lowat) { + if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) { if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat) nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat; if (nmax * 2 < so->so_snd.sb_mbcnt + so->so_snd.sb_lowat) @@ -1122,7 +1122,7 @@ tcp_update_sndspace(struct tcpcb *tp) nmax = roundup(nmax, tp->t_maxseg); if (nmax != so->so_snd.sb_hiwat) - sbreserve(&so->so_snd, nmax); + sbreserve(so, &so->so_snd, nmax); } /* @@ -1161,5 +1161,5 @@ tcp_update_rcvspace(struct tcpcb *tp) /* round to MSS boundary */ nmax = roundup(nmax, tp->t_maxseg); - sbreserve(&so->so_rcv, nmax); + sbreserve(so, &so->so_rcv, nmax); } diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 6369d41cf0e..9700554b3ab 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: udp_usrreq.c,v 1.237 2017/05/06 16:35:59 bluhm Exp $ */ +/* $OpenBSD: udp_usrreq.c,v 1.238 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ /* @@ -456,7 +456,7 @@ udp_input(struct mbuf **mp, int *offp, int proto, int af) ip, n); m_adj(n, iphlen); - if (sbappendaddr( + if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, &srcsa.sa, n, opts) == 0) { m_freem(n); @@ -501,7 +501,7 @@ udp_input(struct mbuf **mp, int *offp, int proto, int af) ip_savecontrol(last, &opts, ip, m); m_adj(m, iphlen); - if (sbappendaddr(&last->inp_socket->so_rcv, + if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, &srcsa.sa, m, opts) == 0) { udpstat_inc(udps_fullsock); goto bad; @@ -654,7 +654,8 @@ udp_input(struct mbuf **mp, int *offp, int proto, int af) iphlen += sizeof(struct udphdr); m_adj(m, iphlen); - if (sbappendaddr(&inp->inp_socket->so_rcv, &srcsa.sa, m, opts) == 0) { + if (sbappendaddr(inp->inp_socket, &inp->inp_socket->so_rcv, &srcsa.sa, + m, opts) == 0) { udpstat_inc(udps_fullsock); goto bad; } diff --git a/sys/netinet6/ip6_divert.c b/sys/netinet6/ip6_divert.c index ea0b0000652..574201fac18 100644 --- a/sys/netinet6/ip6_divert.c +++ b/sys/netinet6/ip6_divert.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_divert.c,v 1.47 2017/05/30 07:50:37 mpi Exp $ */ +/* $OpenBSD: ip6_divert.c,v 1.48 2017/06/26 09:32:32 mpi Exp $ */ /* * Copyright (c) 2009 Michele Marchetto @@ -223,7 +223,7 @@ divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port) if (inp) { sa = inp->inp_socket; - if (sbappendaddr(&sa->so_rcv, sin6tosa(&addr), m, NULL) == 0) { + if (sbappendaddr(sa, &sa->so_rcv, sin6tosa(&addr), m, NULL) == 0) { div6stat_inc(div6s_fullsock); m_freem(m); return (0); diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c index 73833850790..6bcd205fc65 100644 --- a/sys/netinet6/ip6_mroute.c +++ b/sys/netinet6/ip6_mroute.c @@ -832,7 +832,7 @@ int socket6_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src) { if (s) { - if (sbappendaddr(&s->so_rcv, sin6tosa(src), mm, NULL) != 0) { + if (sbappendaddr(s, &s->so_rcv, sin6tosa(src), mm, NULL) != 0) { sorwakeup(s); return 0; } diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index ccae609d75c..9f989ef006b 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: raw_ip6.c,v 1.114 2017/05/13 17:44:00 bluhm Exp $ */ +/* $OpenBSD: raw_ip6.c,v 1.115 2017/06/26 09:32:32 mpi Exp $ */ /* $KAME: raw_ip6.c,v 1.69 2001/03/04 15:55:44 itojun Exp $ */ /* @@ -191,7 +191,8 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); - if (sbappendaddr(&last->inp_socket->so_rcv, + if (sbappendaddr(last->inp_socket, + &last->inp_socket->so_rcv, sin6tosa(&rip6src), n, opts) == 0) { /* should notify about lost packet */ m_freem(n); @@ -209,7 +210,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, *offp); - if (sbappendaddr(&last->inp_socket->so_rcv, + if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, sin6tosa(&rip6src), m, opts) == 0) { m_freem(m); m_freem(opts); diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c index b94cbeb7b90..f12029927df 100644 --- a/sys/nfs/nfs_socket.c +++ b/sys/nfs/nfs_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_socket.c,v 1.117 2017/06/19 17:58:49 bluhm Exp $ */ +/* $OpenBSD: nfs_socket.c,v 1.118 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: nfs_socket.c,v 1.27 1996/04/15 20:20:00 thorpej Exp $ */ /* @@ -1176,7 +1176,7 @@ nfs_timer(void *arg) * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; - if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && + if (sbspace(so, &so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && diff --git a/sys/sys/event.h b/sys/sys/event.h index c359092be51..6c3de0b5a6b 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -1,4 +1,4 @@ -/* $OpenBSD: event.h,v 1.25 2017/05/31 14:52:05 mikeb Exp $ */ +/* $OpenBSD: event.h,v 1.26 2017/06/26 09:32:32 mpi Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -79,6 +79,13 @@ struct kevent { #define EV_EOF 0x8000 /* EOF detected */ #define EV_ERROR 0x4000 /* error, data contains errno */ +/* + * hint flag for in-kernel use - must not equal any existing note + */ +#ifdef _KERNEL +#define NOTE_SUBMIT 0x01000000 /* initial knote submission */ +#endif + /* * data/hint flags for EVFILT_{READ|WRITE}, shared with userspace */ diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 32967536f53..857e23621f1 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.69 2017/03/13 20:18:21 claudio Exp $ */ +/* $OpenBSD: socketvar.h,v 1.70 2017/06/26 09:32:32 mpi Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -151,6 +151,11 @@ struct socket { #define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */ #ifdef _KERNEL + +#include + +void soassertlocked(struct socket *); + /* * Macros for sockets and socket buffering. */ @@ -161,8 +166,15 @@ struct socket { /* * Do we need to notify the other side when I/O is possible? */ -#define sb_notify(sb) ((((sb)->sb_flags | (sb)->sb_flagsintr) & \ - (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0) +static inline int +sb_notify(struct socket *so, struct sockbuf *sb) +{ + int flags = (sb->sb_flags | sb->sb_flagsintr); + + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); + soassertlocked(so); + return ((flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0); +} /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? @@ -170,8 +182,13 @@ struct socket { * still be negative (cc > hiwat or mbcnt > mbmax). Should detect * overflow and return 0. */ -#define sbspace(sb) \ - lmin((sb)->sb_hiwat - (sb)->sb_cc, (sb)->sb_mbmax - (sb)->sb_mbcnt) +static inline long +sbspace(struct socket *so, struct sockbuf *sb) +{ + KASSERT(sb == &so->so_rcv || sb == &so->so_snd); + soassertlocked(so); + return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); +} /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ @@ -190,7 +207,7 @@ struct socket { /* can we write something to so? */ #define sowriteable(so) \ - ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ + ((sbspace((so), &(so)->so_snd) >= (so)->so_snd.sb_lowat && \ (((so)->so_state & SS_ISCONNECTED) || \ ((so)->so_proto->pr_flags & PR_CONNREQUIRED)==0)) || \ ((so)->so_state & SS_CANTSENDMORE) || (so)->so_error) @@ -258,24 +275,24 @@ int soo_poll(struct file *fp, int events, struct proc *p); int soo_kqfilter(struct file *fp, struct knote *kn); int soo_close(struct file *fp, struct proc *p); int soo_stat(struct file *, struct stat *, struct proc *); -void sbappend(struct sockbuf *sb, struct mbuf *m); -void sbappendstream(struct sockbuf *sb, struct mbuf *m); -int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, - struct mbuf *m0, struct mbuf *control); -int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, - struct mbuf *control); -void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +void sbappend(struct socket *, struct sockbuf *, struct mbuf *); +void sbappendstream(struct socket *, struct sockbuf *, struct mbuf *); +int sbappendaddr(struct socket *, struct sockbuf *, struct sockaddr *, + struct mbuf *, struct mbuf *); +int sbappendcontrol(struct socket *, struct sockbuf *, struct mbuf *, + struct mbuf *); +void sbappendrecord(struct socket *, struct sockbuf *, struct mbuf *); void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * sbcreatecontrol(caddr_t p, int size, int type, int level); -void sbdrop(struct sockbuf *sb, int len); +void sbdrop(struct socket *, struct sockbuf *, int); void sbdroprecord(struct sockbuf *sb); -void sbflush(struct sockbuf *sb); +void sbflush(struct socket *, struct sockbuf *); void sbinsertoob(struct sockbuf *sb, struct mbuf *m0); -void sbrelease(struct sockbuf *sb); +void sbrelease(struct socket *, struct sockbuf *); int sbcheckreserve(u_long cnt, u_long defcnt); int sbchecklowmem(void); -int sbreserve(struct sockbuf *sb, u_long cc); +int sbreserve(struct socket *, struct sockbuf *, u_long); int sbwait(struct socket *, struct sockbuf *sb); int sb_lock(struct sockbuf *sb); void soinit(void); @@ -319,7 +336,6 @@ int sockargs(struct mbuf **, const void *, size_t, int); int sosleep(struct socket *, void *, int, const char *, int); int solock(struct socket *); void sounlock(int); -void soassertlocked(struct socket *); int sendit(struct proc *, int, struct msghdr *, int, register_t *); int recvit(struct proc *, int, struct msghdr *, caddr_t, -- 2.20.1