From: bluhm Date: Sat, 8 Sep 2018 13:16:58 +0000 (+0000) Subject: Split the pf(4) fragment reassembly queue into smaller parts. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=1a0c761e8e5ff53225410adf4ad1769d9847c18d;p=openbsd Split the pf(4) fragment reassembly queue into smaller parts. Remember 16 entry points based on the fragment offset. Instead of a worst case of 8196 list traversals we now check a maximum of 512 list entries or 16 array elements. discussed with claudio@ and sashan@; OK sashan@ --- diff --git a/sys/net/pf_norm.c b/sys/net/pf_norm.c index 423334fca00..87bb380bac0 100644 --- a/sys/net/pf_norm.c +++ b/sys/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.212 2018/09/04 20:34:10 bluhm Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.213 2018/09/08 13:16:58 bluhm Exp $ */ /* * Copyright 2001 Niels Provos @@ -91,11 +91,11 @@ struct pf_frnode { }; struct pf_fragment { - u_int32_t fr_id; /* fragment id for reassemble */ - + struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS]; RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; + u_int32_t fr_id; /* fragment id for reassemble */ int32_t fr_timeout; u_int32_t fr_gen; /* generation number (per pf_frnode) */ u_int16_t fr_maxlen; /* maximum length of single fragment */ @@ -128,6 +128,13 @@ void pf_free_fragment(struct pf_fragment *); struct pf_fragment *pf_find_fragment(struct pf_frnode *, u_int32_t); struct pf_frent *pf_create_fragment(u_short *); int pf_frent_holes(struct pf_frent *); +static inline int pf_frent_index(struct pf_frent *); +void pf_frent_insert(struct pf_fragment *, + struct pf_frent *, struct pf_frent *); +void pf_frent_remove(struct pf_fragment *, + struct pf_frent *); +struct pf_frent *pf_frent_previous(struct pf_fragment *, + struct pf_frent *); struct pf_fragment *pf_fillup_fragment(struct pf_frnode *, u_int32_t, struct pf_frent *, u_short *); struct mbuf *pf_join_fragment(struct pf_fragment *); @@ -362,6 +369,138 @@ pf_frent_holes(struct pf_frent *frent) return holes; } +static inline int +pf_frent_index(struct pf_frent *frent) +{ + /* + * We have an array of 16 entry points to the queue. A full size + * 65535 octet IP packet can have 8192 fragments. So the queue + * traversal length is at most 512 and at most 16 entry points are + * checked. We need 128 additional bytes on a 64 bit architecture. + */ + CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) == + 16 - 1); + CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1); + + return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS); +} + +void +pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, + struct pf_frent *prev) +{ + int index; + + if (prev == NULL) { + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); + } else { + KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off); + TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); + } + + index = pf_frent_index(frent); + if (frag->fr_firstoff[index] == NULL) { + KASSERT(prev == NULL || pf_frent_index(prev) < index); + frag->fr_firstoff[index] = frent; + } else { + if (frent->fe_off < frag->fr_firstoff[index]->fe_off) { + KASSERT(prev == NULL || pf_frent_index(prev) < index); + frag->fr_firstoff[index] = frent; + } else { + KASSERT(prev != NULL); + KASSERT(pf_frent_index(prev) == index); + } + } + + frag->fr_holes += pf_frent_holes(frent); +} + +void +pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) +{ + struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); + struct pf_frent *next = TAILQ_NEXT(frent, fr_next); + int index; + + frag->fr_holes -= pf_frent_holes(frent); + + index = pf_frent_index(frent); + KASSERT(frag->fr_firstoff[index] != NULL); + if (frag->fr_firstoff[index]->fe_off == frent->fe_off) { + if (next == NULL) { + frag->fr_firstoff[index] = NULL; + } else { + KASSERT(frent->fe_off + frent->fe_len <= next->fe_off); + if (pf_frent_index(next) == index) { + frag->fr_firstoff[index] = next; + } else { + frag->fr_firstoff[index] = NULL; + } + } + } else { + KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off); + KASSERT(prev != NULL); + KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off); + KASSERT(pf_frent_index(prev) == index); + } + + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); +} + +struct pf_frent * +pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent) +{ + struct pf_frent *prev, *next; + int index; + + /* + * If there are no fragments after frag, take the final one. Assume + * that the global queue is not empty. + */ + prev = TAILQ_LAST(&frag->fr_queue, pf_fragq); + KASSERT(prev != NULL); + if (prev->fe_off <= frent->fe_off) + return prev; + /* + * We want to find a fragment entry that is before frag, but still + * close to it. Find the first fragment entry that is in the same + * entry point or in the first entry point after that. As we have + * already checked that there are entries behind frag, this will + * succeed. + */ + for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS; + index++) { + prev = frag->fr_firstoff[index]; + if (prev != NULL) + break; + } + KASSERT(prev != NULL); + /* + * In prev we may have a fragment from the same entry point that is + * before frent, or one that is just one position behind frent. + * In the latter case, we go back one step and have the predecessor. + * There may be none if the new fragment will be the first one. + */ + if (prev->fe_off > frent->fe_off) { + prev = TAILQ_PREV(prev, pf_fragq, fr_next); + if (prev == NULL) + return NULL; + KASSERT(prev->fe_off <= frent->fe_off); + return prev; + } + /* + * In prev is the first fragment of the entry point. The offset + * of frag is behind it. Find the closest previous fragment. + */ + for (next = TAILQ_NEXT(prev, fr_next); next != NULL; + next = TAILQ_NEXT(next, fr_next)) { + if (next->fe_off > frent->fe_off) + break; + prev = next; + } + return prev; +} + struct pf_fragment * pf_fillup_fragment(struct pf_frnode *key, u_int32_t id, struct pf_frent *frent, u_short *reason) @@ -426,12 +565,13 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id, frnode->fn_fragments = 0; frnode->fn_gen = 0; } + memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff)); TAILQ_INIT(&frag->fr_queue); + frag->fr_id = id; frag->fr_timeout = time_uptime; frag->fr_gen = frnode->fn_gen++; frag->fr_maxlen = frent->fe_len; frag->fr_holes = 1; - frag->fr_id = id; frag->fr_node = frnode; /* RB_INSERT cannot fail as pf_find_fragment() found nothing */ RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag); @@ -441,8 +581,7 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id, TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); /* We do not have a previous fragment */ - TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - frag->fr_holes += pf_frent_holes(frent); + pf_frent_insert(frag, frent, NULL); return (frag); } @@ -472,16 +611,15 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id, goto free_ipv6_fragment; } - /* Find a fragment after the current one */ - prev = NULL; - TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { - if (after->fe_off > frent->fe_off) - break; - prev = after; + /* Find neighbors for newly inserted fragment */ + prev = pf_frent_previous(frag, frent); + if (prev == NULL) { + after = TAILQ_FIRST(&frag->fr_queue); + KASSERT(after != NULL); + } else { + after = TAILQ_NEXT(prev, fr_next); } - KASSERT(prev != NULL || after != NULL); - if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { u_int16_t precut; @@ -522,18 +660,13 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id, /* This fragment is completely overlapped, lose it */ DPFPRINTF(LOG_NOTICE, "old frag overlapped"); next = TAILQ_NEXT(after, fr_next); - frag->fr_holes -= pf_frent_holes(after); - TAILQ_REMOVE(&frag->fr_queue, after, fr_next); + pf_frent_remove(frag, after); m_freem(after->fe_m); pool_put(&pf_frent_pl, after); pf_nfrents--; } - if (prev == NULL) - TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - else - TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); - frag->fr_holes += pf_frent_holes(frent); + pf_frent_insert(frag, frent, prev); return (frag); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index faf29f0f487..3d4bcfa9685 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.482 2018/07/22 09:09:18 sf Exp $ */ +/* $OpenBSD: pfvar.h,v 1.483 2018/09/08 13:16:58 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -119,6 +119,12 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, */ #define PF_FRAG_STALE 200 +/* + * Limit the length of the fragment queue traversal. Remember + * search entry points based on the fragment offset. + */ +#define PF_FRAG_ENTRY_POINTS 16 + enum { PF_NOPFROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_PKTDELAY_PKTS,