From 09228e5ff0c3b344278a537147513ebbf85656dd Mon Sep 17 00:00:00 2001 From: bluhm Date: Sat, 24 Jun 2017 20:32:39 +0000 Subject: [PATCH] To avoid packet loss due to reuse of the 16 bit IPv4 fragment id, we need suitable data structures. Organize the pf fragments with two red-black trees. One is holding the address and protocol information and the other has only the fragment id. This will allow to drop fragemts for specific connections more aggressively. ` from markus@; OK sashan@ --- sys/net/pf_norm.c | 176 +++++++++++++++++++++++++++++----------------- 1 file changed, 113 insertions(+), 63 deletions(-) diff --git a/sys/net/pf_norm.c b/sys/net/pf_norm.c index c1f8d5e3e1d..acb26da6edd 100644 --- a/sys/net/pf_norm.c +++ b/sys/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.206 2017/06/19 17:58:49 bluhm Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.207 2017/06/24 20:32:39 bluhm Exp $ */ /* * Copyright 2001 Niels Provos @@ -76,29 +76,28 @@ struct pf_frent { u_int16_t fe_mff; /* more fragment flag */ }; -/* keep synced with struct pf_fragment, used in RB_FIND */ -struct pf_fragment_cmp { - struct pf_addr fr_src; - struct pf_addr fr_dst; - u_int32_t fr_id; - sa_family_t fr_af; - u_int8_t fr_proto; - u_int8_t fr_direction; +RB_HEAD(pf_frag_tree, pf_fragment); +struct pf_frnode { + struct pf_addr fn_src; /* ip source address */ + struct pf_addr fn_dst; /* ip destination address */ + sa_family_t fn_af; /* address family */ + u_int8_t fn_proto; /* protocol for fragments in fn_tree */ + u_int8_t fn_direction; /* pf packet direction */ + u_int32_t fn_fragments; /* number of entries in fn_tree */ + + RB_ENTRY(pf_frnode) fn_entry; + struct pf_frag_tree fn_tree; /* matching fragments, lookup by id */ }; struct pf_fragment { - struct pf_addr fr_src; /* ip source address */ - struct pf_addr fr_dst; /* ip destination address */ u_int32_t fr_id; /* fragment id for reassemble */ - sa_family_t fr_af; /* address family */ - u_int8_t fr_proto; /* protocol of this fragment */ - u_int8_t fr_direction; /* pf packet direction */ RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; int32_t fr_timeout; u_int16_t fr_maxlen; /* maximum length of single fragment */ + struct pf_frnode *fr_node; /* ip src/dst/proto/af for fragments */ }; struct pf_fragment_tag { @@ -109,19 +108,23 @@ struct pf_fragment_tag { TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; +static __inline int pf_frnode_compare(struct pf_frnode *, + struct pf_frnode *); +RB_HEAD(pf_frnode_tree, pf_frnode) pf_frnode_tree; +RB_PROTOTYPE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare); +RB_GENERATE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare); + static __inline int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); -RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); /* Private prototypes */ void pf_flush_fragments(void); void pf_free_fragment(struct pf_fragment *); -struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, - struct pf_frag_tree *); +struct pf_fragment *pf_find_fragment(struct pf_frnode *, u_int32_t); struct pf_frent *pf_create_fragment(u_short *); -struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, +struct pf_fragment *pf_fillup_fragment(struct pf_frnode *, u_int32_t, struct pf_frent *, u_short *); int pf_isfull_fragment(struct pf_fragment *); struct mbuf *pf_join_fragment(struct pf_fragment *); @@ -132,7 +135,7 @@ int pf_reassemble6(struct mbuf **, struct ip6_frag *, #endif /* INET6 */ /* Globals */ -struct pool pf_frent_pl, pf_frag_pl; +struct pool pf_frent_pl, pf_frag_pl, pf_frnode_pl; struct pool pf_state_scrub_pl; int pf_nfrents; @@ -153,6 +156,8 @@ pf_normalize_init(void) { pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, IPL_SOFTNET, 0, "pffrent", NULL); + pool_init(&pf_frnode_pl, sizeof(struct pf_frnode), 0, + IPL_SOFTNET, 0, "pffrnode", NULL); pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, IPL_SOFTNET, 0, "pffrag", NULL); pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, @@ -167,19 +172,28 @@ pf_normalize_init(void) } static __inline int -pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b) { int diff; - if ((diff = a->fr_id - b->fr_id) != 0) + if ((diff = a->fn_proto - b->fn_proto) != 0) return (diff); - if ((diff = a->fr_proto - b->fr_proto) != 0) + if ((diff = a->fn_af - b->fn_af) != 0) return (diff); - if ((diff = a->fr_af - b->fr_af) != 0) + if ((diff = pf_addr_compare(&a->fn_src, &b->fn_src, a->fn_af)) != 0) return (diff); - if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) + if ((diff = pf_addr_compare(&a->fn_dst, &b->fn_dst, a->fn_af)) != 0) return (diff); - if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) + + return (0); +} + +static __inline int +pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +{ + int diff; + + if ((diff = a->fr_id - b->fr_id) != 0) return (diff); return (0); @@ -231,8 +245,17 @@ void pf_free_fragment(struct pf_fragment *frag) { struct pf_frent *frent; - - RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); + struct pf_frnode *frnode; + + frnode = frag->fr_node; + RB_REMOVE(pf_frag_tree, &frnode->fn_tree, frag); + KASSERT(frnode->fn_fragments >= 1); + frnode->fn_fragments--; + if (frnode->fn_fragments == 0) { + KASSERT(RB_EMPTY(&frnode->fn_tree)); + RB_REMOVE(pf_frnode_tree, &pf_frnode_tree, frnode); + pool_put(&pf_frnode_pl, frnode); + } TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); /* Free all fragment entries */ @@ -246,15 +269,21 @@ pf_free_fragment(struct pf_fragment *frag) } struct pf_fragment * -pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) +pf_find_fragment(struct pf_frnode *key, u_int32_t id) { - struct pf_fragment *frag; - - frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); - if (frag != NULL) { - TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); - TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); - } + struct pf_fragment *frag, idkey; + struct pf_frnode *frnode; + + frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key); + if (frnode == NULL) + return (NULL); + KASSERT(frnode->fn_fragments >= 1); + idkey.fr_id = id; + frag = RB_FIND(pf_frag_tree, &frnode->fn_tree, &idkey); + if (frag == NULL) + return (NULL); + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); return (frag); } @@ -279,11 +308,12 @@ pf_create_fragment(u_short *reason) } struct pf_fragment * -pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, - u_short *reason) +pf_fillup_fragment(struct pf_frnode *key, u_int32_t id, + struct pf_frent *frent, u_short *reason) { struct pf_frent *after, *next, *prev; struct pf_fragment *frag; + struct pf_frnode *frnode; u_int16_t total; /* No empty fragments */ @@ -306,12 +336,12 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, goto bad_fragment; } - DPFPRINTF(LOG_INFO, key->fr_af == AF_INET ? + DPFPRINTF(LOG_INFO, key->fn_af == AF_INET ? "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", - key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); + id, frent->fe_off, frent->fe_off + frent->fe_len); /* Fully buffer all of the fragments in this fragment queue */ - frag = pf_find_fragment(key, &pf_frag_tree); + frag = pf_find_fragment(key, id); /* Create a new reassembly queue for this packet */ if (frag == NULL) { @@ -324,13 +354,32 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, goto drop_fragment; } } - - *(struct pf_fragment_cmp *)frag = *key; + frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key); + if (frnode == NULL) { + frnode = pool_get(&pf_frnode_pl, PR_NOWAIT); + if (frnode == NULL) { + pf_flush_fragments(); + frnode = pool_get(&pf_frnode_pl, PR_NOWAIT); + if (frnode == NULL) { + REASON_SET(reason, PFRES_MEMORY); + pool_put(&pf_frag_pl, frag); + goto drop_fragment; + } + } + *frnode = *key; + RB_INIT(&frnode->fn_tree); + frnode->fn_fragments = 0; + } TAILQ_INIT(&frag->fr_queue); frag->fr_timeout = time_uptime; frag->fr_maxlen = frent->fe_len; - - RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); + frag->fr_id = id; + frag->fr_node = frnode; + /* RB_INSERT cannot fail as pf_find_fragment() found nothing */ + RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag); + frnode->fn_fragments++; + if (frnode->fn_fragments == 1) + RB_INSERT(pf_frnode_tree, &pf_frnode_tree, frnode); TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); /* We do not have a previous fragment */ @@ -340,6 +389,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, } KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); + KASSERT(frag->fr_node); /* Remember maximum fragment len for refragmentation */ if (frent->fe_len > frag->fr_maxlen) @@ -377,7 +427,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, u_int16_t precut; #ifdef INET6 - if (frag->fr_af == AF_INET6) + if (frag->fr_node->fn_af == AF_INET6) goto free_fragment; #endif /* INET6 */ @@ -397,7 +447,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, u_int16_t aftercut; #ifdef INET6 - if (frag->fr_af == AF_INET6) + if (frag->fr_node->fn_af == AF_INET6) goto free_fragment; #endif /* INET6 */ @@ -428,7 +478,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, free_ipv6_fragment: #ifdef INET6 - if (frag->fr_af == AF_INET) + if (frag->fr_node->fn_af == AF_INET) goto bad_fragment; free_fragment: /* @@ -530,7 +580,7 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason) struct ip *ip = mtod(m, struct ip *); struct pf_frent *frent; struct pf_fragment *frag; - struct pf_fragment_cmp key; + struct pf_frnode key; u_int16_t total, hdrlen; /* Get an entry for the fragment queue */ @@ -544,15 +594,15 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason) frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; frent->fe_mff = ntohs(ip->ip_off) & IP_MF; - key.fr_src.v4 = ip->ip_src; - key.fr_dst.v4 = ip->ip_dst; - key.fr_af = AF_INET; - key.fr_proto = ip->ip_p; - key.fr_id = ip->ip_id; - key.fr_direction = dir; + key.fn_src.v4 = ip->ip_src; + key.fn_dst.v4 = ip->ip_dst; + key.fn_af = AF_INET; + key.fn_proto = ip->ip_p; + key.fn_direction = dir; PF_FRAG_LOCK(); - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + if ((frag = pf_fillup_fragment(&key, ip->ip_id, frent, reason)) + == NULL) { PF_FRAG_UNLOCK(); return (PF_DROP); } @@ -611,7 +661,7 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, struct pf_fragment_tag *ftag; struct pf_frent *frent; struct pf_fragment *frag; - struct pf_fragment_cmp key; + struct pf_frnode key; int off; u_int16_t total, maxlen; u_int8_t proto; @@ -627,16 +677,16 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; - key.fr_src.v6 = ip6->ip6_src; - key.fr_dst.v6 = ip6->ip6_dst; - key.fr_af = AF_INET6; + key.fn_src.v6 = ip6->ip6_src; + key.fn_dst.v6 = ip6->ip6_dst; + key.fn_af = AF_INET6; /* Only the first fragment's protocol is relevant */ - key.fr_proto = 0; - key.fr_id = fraghdr->ip6f_ident; - key.fr_direction = dir; + key.fn_proto = 0; + key.fn_direction = dir; PF_FRAG_LOCK(); - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + if ((frag = pf_fillup_fragment(&key, fraghdr->ip6f_ident, frent, + reason)) == NULL) { PF_FRAG_UNLOCK(); return (PF_DROP); } -- 2.20.1