Split the pf(4) fragment reassembly queue into smaller parts.
authorbluhm <bluhm@openbsd.org>
Sat, 8 Sep 2018 13:16:58 +0000 (13:16 +0000)
committerbluhm <bluhm@openbsd.org>
Sat, 8 Sep 2018 13:16:58 +0000 (13:16 +0000)
Remember 16 entry points based on the fragment offset.  Instead of
a worst case of 8196 list traversals we now check a maximum of 512
list entries or 16 array elements.
discussed with claudio@ and sashan@; OK sashan@

sys/net/pf_norm.c
sys/net/pfvar.h

index 423334f..87bb380 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pf_norm.c,v 1.212 2018/09/04 20:34:10 bluhm Exp $ */
+/*     $OpenBSD: pf_norm.c,v 1.213 2018/09/08 13:16:58 bluhm Exp $ */
 
 /*
  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
@@ -91,11 +91,11 @@ struct pf_frnode {
 };
 
 struct pf_fragment {
-       u_int32_t       fr_id;          /* fragment id for reassemble */
-
+       struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS];
        RB_ENTRY(pf_fragment) fr_entry;
        TAILQ_ENTRY(pf_fragment) frag_next;
        TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
+       u_int32_t       fr_id;          /* fragment id for reassemble */
        int32_t         fr_timeout;
        u_int32_t       fr_gen;         /* generation number (per pf_frnode) */
        u_int16_t       fr_maxlen;      /* maximum length of single fragment */
@@ -128,6 +128,13 @@ void                        pf_free_fragment(struct pf_fragment *);
 struct pf_fragment     *pf_find_fragment(struct pf_frnode *, u_int32_t);
 struct pf_frent                *pf_create_fragment(u_short *);
 int                     pf_frent_holes(struct pf_frent *);
+static inline int       pf_frent_index(struct pf_frent *);
+void                    pf_frent_insert(struct pf_fragment *,
+                           struct pf_frent *, struct pf_frent *);
+void                    pf_frent_remove(struct pf_fragment *,
+                           struct pf_frent *);
+struct pf_frent                *pf_frent_previous(struct pf_fragment *,
+                           struct pf_frent *);
 struct pf_fragment     *pf_fillup_fragment(struct pf_frnode *, u_int32_t,
                            struct pf_frent *, u_short *);
 struct mbuf            *pf_join_fragment(struct pf_fragment *);
@@ -362,6 +369,138 @@ pf_frent_holes(struct pf_frent *frent)
        return holes;
 }
 
+static inline int
+pf_frent_index(struct pf_frent *frent)
+{
+       /*
+        * We have an array of 16 entry points to the queue.  A full size
+        * 65535 octet IP packet can have 8192 fragments.  So the queue
+        * traversal length is at most 512 and at most 16 entry points are
+        * checked.  We need 128 additional bytes on a 64 bit architecture.
+        */
+       CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) ==
+           16 - 1);
+       CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1);
+
+       return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS);
+}
+
+void
+pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
+    struct pf_frent *prev)
+{
+       int index;
+
+       if (prev == NULL) {
+               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+       } else {
+               KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off);
+               TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+       }
+
+       index = pf_frent_index(frent);
+       if (frag->fr_firstoff[index] == NULL) {
+               KASSERT(prev == NULL || pf_frent_index(prev) < index);
+               frag->fr_firstoff[index] = frent;
+       } else {
+               if (frent->fe_off < frag->fr_firstoff[index]->fe_off) {
+                       KASSERT(prev == NULL || pf_frent_index(prev) < index);
+                       frag->fr_firstoff[index] = frent;
+               } else {
+                       KASSERT(prev != NULL);
+                       KASSERT(pf_frent_index(prev) == index);
+               }
+       }
+
+       frag->fr_holes += pf_frent_holes(frent);
+}
+
+void
+pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
+{
+       struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
+       struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
+       int index;
+
+       frag->fr_holes -= pf_frent_holes(frent);
+
+       index = pf_frent_index(frent);
+       KASSERT(frag->fr_firstoff[index] != NULL);
+       if (frag->fr_firstoff[index]->fe_off == frent->fe_off) {
+               if (next == NULL) {
+                       frag->fr_firstoff[index] = NULL;
+               } else {
+                       KASSERT(frent->fe_off + frent->fe_len <= next->fe_off);
+                       if (pf_frent_index(next) == index) {
+                               frag->fr_firstoff[index] = next;
+                       } else {
+                               frag->fr_firstoff[index] = NULL;
+                       }
+               }
+       } else {
+               KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off);
+               KASSERT(prev != NULL);
+               KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off);
+               KASSERT(pf_frent_index(prev) == index);
+       }
+
+       TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+}
+
+struct pf_frent *
+pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent)
+{
+       struct pf_frent *prev, *next;
+       int index;
+
+       /*
+        * If there are no fragments after frag, take the final one.  Assume
+        * that the global queue is not empty.
+        */
+       prev = TAILQ_LAST(&frag->fr_queue, pf_fragq);
+       KASSERT(prev != NULL);
+       if (prev->fe_off <= frent->fe_off)
+               return prev;
+       /*
+        * We want to find a fragment entry that is before frag, but still
+        * close to it.  Find the first fragment entry that is in the same
+        * entry point or in the first entry point after that.  As we have
+        * already checked that there are entries behind frag, this will
+        * succeed.
+        */
+       for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS;
+           index++) {
+               prev = frag->fr_firstoff[index];
+               if (prev != NULL)
+                       break;
+       }
+       KASSERT(prev != NULL);
+       /*
+        * In prev we may have a fragment from the same entry point that is
+        * before frent, or one that is just one position behind frent.
+        * In the latter case, we go back one step and have the predecessor.
+        * There may be none if the new fragment will be the first one.
+        */
+       if (prev->fe_off > frent->fe_off) {
+               prev = TAILQ_PREV(prev, pf_fragq, fr_next);
+               if (prev == NULL)
+                       return NULL;
+               KASSERT(prev->fe_off <= frent->fe_off);
+               return prev;
+       }
+       /*
+        * In prev is the first fragment of the entry point.  The offset
+        * of frag is behind it.  Find the closest previous fragment.
+        */
+       for (next = TAILQ_NEXT(prev, fr_next); next != NULL;
+           next = TAILQ_NEXT(next, fr_next)) {
+               if (next->fe_off > frent->fe_off)
+                       break;
+               prev = next;
+       }
+       return prev;
+}
+
 struct pf_fragment *
 pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
     struct pf_frent *frent, u_short *reason)
@@ -426,12 +565,13 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
                        frnode->fn_fragments = 0;
                        frnode->fn_gen = 0;
                }
+               memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff));
                TAILQ_INIT(&frag->fr_queue);
+               frag->fr_id = id;
                frag->fr_timeout = time_uptime;
                frag->fr_gen = frnode->fn_gen++;
                frag->fr_maxlen = frent->fe_len;
                frag->fr_holes = 1;
-               frag->fr_id = id;
                frag->fr_node = frnode;
                /* RB_INSERT cannot fail as pf_find_fragment() found nothing */
                RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag);
@@ -441,8 +581,7 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
                TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
 
                /* We do not have a previous fragment */
-               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-               frag->fr_holes += pf_frent_holes(frent);
+               pf_frent_insert(frag, frent, NULL);
 
                return (frag);
        }
@@ -472,16 +611,15 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
                        goto free_ipv6_fragment;
        }
 
-       /* Find a fragment after the current one */
-       prev = NULL;
-       TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
-               if (after->fe_off > frent->fe_off)
-                       break;
-               prev = after;
+       /* Find neighbors for newly inserted fragment */
+       prev = pf_frent_previous(frag, frent);
+       if (prev == NULL) {
+               after = TAILQ_FIRST(&frag->fr_queue);
+               KASSERT(after != NULL);
+       } else {
+               after = TAILQ_NEXT(prev, fr_next);
        }
 
-       KASSERT(prev != NULL || after != NULL);
-
        if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
                u_int16_t       precut;
 
@@ -522,18 +660,13 @@ pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
                /* This fragment is completely overlapped, lose it */
                DPFPRINTF(LOG_NOTICE, "old frag overlapped");
                next = TAILQ_NEXT(after, fr_next);
-               frag->fr_holes -= pf_frent_holes(after);
-               TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
+               pf_frent_remove(frag, after);
                m_freem(after->fe_m);
                pool_put(&pf_frent_pl, after);
                pf_nfrents--;
        }
 
-       if (prev == NULL)
-               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-       else
-               TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
-       frag->fr_holes += pf_frent_holes(frent);
+       pf_frent_insert(frag, frent, prev);
 
        return (frag);
 
index faf29f0..3d4bcfa 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pfvar.h,v 1.482 2018/07/22 09:09:18 sf Exp $ */
+/*     $OpenBSD: pfvar.h,v 1.483 2018/09/08 13:16:58 bluhm Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -119,6 +119,12 @@ enum       { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
  */
 #define PF_FRAG_STALE                  200
 
+/*
+ * Limit the length of the fragment queue traversal.  Remember
+ * search entry points based on the fragment offset.
+ */
+#define PF_FRAG_ENTRY_POINTS           16
+
 enum   { PF_NOPFROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
 enum   { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
          PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_PKTDELAY_PKTS,