--- /dev/null
+/* $OpenBSD: subr_blist.c,v 1.1 2022/07/29 17:47:12 semarie Exp $ */
+/*
+ * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
+ *
+ * Copyright (c) 1998,2004 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *
+ * This module implements a general bitmap allocator/deallocator. The
+ * allocator eats around 2 bits per 'block'. The module does not
+ * try to interpret the meaning of a 'block' other than to return
+ * SWAPBLK_NONE on an allocation failure.
+ *
+ * A radix tree is used to maintain the bitmap. Two radix constants are
+ * involved: One for the bitmaps contained in the leaf nodes (typically
+ * 32), and one for the meta nodes (typically 16). Both meta and leaf
+ * nodes have a hint field. This field gives us a hint as to the largest
+ * free contiguous range of blocks under the node. It may contain a
+ * value that is too high, but will never contain a value that is too
+ * low. When the radix tree is searched, allocation failures in subtrees
+ * update the hint.
+ *
+ * The radix tree also implements two collapsed states for meta nodes:
+ * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
+ * in either of these two states, all information contained underneath
+ * the node is considered stale. These states are used to optimize
+ * allocation and freeing operations.
+ *
+ * The hinting greatly increases code efficiency for allocations while
+ * the general radix structure optimizes both allocations and frees. The
+ * radix tree should be able to operate well no matter how much
+ * fragmentation there is and no matter how large a bitmap is used.
+ *
+ * Unlike the rlist code, the blist code wires all necessary memory at
+ * creation time. Neither allocations nor frees require interaction with
+ * the memory subsystem. In contrast, the rlist code may allocate memory
+ * on an blist_free() call. The non-blocking features of the blist code
+ * are used to great advantage in the swap code (uvm/uvm_swap.c). The
+ * rlist code uses a little less overall memory than the blist code (but
+ * due to swap interleaving not all that much less), but the blist code
+ * scales much, much better.
+ *
+ * LAYOUT: The radix tree is layed out recursively using a
+ * linear array. Each meta node is immediately followed (layed out
+ * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
+ * is a recursive structure but one that can be easily scanned through
+ * a very simple 'skip' calculation. In order to support large radixes,
+ * portions of the tree may reside outside our memory allocation. We
+ * handle this with an early-termination optimization (when bighint is
+ * set to -1) on the scan. The memory allocation is only large enough
+ * to cover the number of blocks requested at creation time even if it
+ * must be encompassed in larger root-node radix.
+ *
+ * NOTE: The allocator cannot currently allocate more than
+ * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
+ * large' if you try. This is an area that could use improvement. The
+ * radix is large enough that this restriction does not effect the swap
+ * system, though. Currently only the allocation code is effected by
+ * this algorithmic unfeature. The freeing code can handle arbitrary
+ * ranges.
+ *
+ * NOTE: The radix may exceed BLIST_BMAP_RADIX bits in order to support
+ * up to 2^(BLIST_BMAP_RADIX-1) blocks. The first divison will
+ * drop the radix down and fit it within a signed BLIST_BMAP_RADIX
+ * bit integer.
+ *
+ * This code can be compiled stand-alone for debugging.
+ */
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/blist.h>
+#include <sys/malloc.h>
+
+#else
+
+#ifndef BLIST_NO_DEBUG
+#define BLIST_DEBUG
+#endif
+
+#include <sys/types.h>
+#include <assert.h>
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <limits.h>
+
+#define malloc(s,t,f) calloc(1, s)
+#define mallocarray(n,s,t,f) reallocarray(NULL, n, s)
+#define free(p,t,s) free(p)
+#define KASSERT(exp) assert(exp)
+
+#include "../sys/blist.h"
+
+#define panic(...) do { errx(1, __VA_ARGS__); } while (0)
+
+#endif
+
+/*
+ * static support functions
+ */
+
+static bsblk_t blst_leaf_alloc(blmeta_t *scan, bsblk_t blkat,
+ bsblk_t blk, bsblk_t count);
+static bsblk_t blst_meta_alloc(blmeta_t *scan, bsblk_t blkat,
+ bsblk_t blk, bsblk_t count,
+ bsblk_t radix, bsblk_t skip);
+static void blst_leaf_free(blmeta_t *scan, bsblk_t relblk, bsblk_t count);
+static void blst_meta_free(blmeta_t *scan, bsblk_t freeBlk, bsblk_t count,
+ bsblk_t radix, bsblk_t skip,
+ bsblk_t blk);
+static bsblk_t blst_leaf_fill(blmeta_t *scan, bsblk_t blk, bsblk_t count);
+static bsblk_t blst_meta_fill(blmeta_t *scan, bsblk_t fillBlk, bsblk_t count,
+ bsblk_t radix, bsblk_t skip,
+ bsblk_t blk);
+static void blst_copy(blmeta_t *scan, bsblk_t blk, bsblk_t radix,
+ bsblk_t skip, blist_t dest, bsblk_t count);
+static bsblk_t blst_radix_init(blmeta_t *scan, bsblk_t radix,
+ bsblk_t skip, bsblk_t count);
+static int blst_radix_gapfind(blmeta_t *scan, bsblk_t blk, bsblk_t radix, bsblk_t skip,
+ int state, bsblk_t *maxbp, bsblk_t *maxep, bsblk_t *bp, bsblk_t *ep);
+
+#if defined(BLIST_DEBUG) || defined(DDB)
+static void blst_radix_print(blmeta_t *scan, bsblk_t blk,
+ bsblk_t radix, bsblk_t skip, int tab);
+#endif
+
+/*
+ * blist_create() - create a blist capable of handling up to the specified
+ * number of blocks
+ *
+ * blocks must be greater than 0
+ *
+ * The smallest blist consists of a single leaf node capable of
+ * managing BLIST_BMAP_RADIX blocks.
+ *
+ * The pages are addressable in range [0, nblocks[
+ */
+
+blist_t
+blist_create(bsblk_t blocks)
+{
+ blist_t bl;
+ bsblk_t radix;
+ bsblk_t skip = 0;
+
+ KASSERT(blocks > 0);
+
+ /*
+ * Calculate radix and skip field used for scanning.
+ *
+ * Radix can exceed BLIST_BMAP_RADIX bits even if bsblk_t is limited
+ * to BLIST_BMAP_RADIX bits.
+ *
+ * XXX check overflow
+ */
+ radix = BLIST_BMAP_RADIX;
+
+ while (radix < blocks) {
+ radix *= BLIST_META_RADIX;
+ skip = (skip + 1) * BLIST_META_RADIX;
+ KASSERT(skip > 0);
+ }
+
+ bl = malloc(sizeof(struct blist), M_VMSWAP, M_WAITOK | M_ZERO);
+
+ bl->bl_blocks = blocks;
+ bl->bl_radix = radix;
+ bl->bl_skip = skip;
+ bl->bl_rootblks = 1 +
+ blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
+ bl->bl_root = mallocarray(bl->bl_rootblks, sizeof(blmeta_t),
+ M_VMSWAP, M_WAITOK);
+
+#if defined(BLIST_DEBUG)
+ printf(
+ "BLIST representing %lu blocks (%lu MB of swap)"
+ ", requiring %6.2fM of ram\n",
+ bl->bl_blocks,
+ bl->bl_blocks * 4 / 1024,
+ (bl->bl_rootblks * sizeof(blmeta_t) + 1023) / (1024.0 * 1024.0)
+ );
+ printf("BLIST raw radix tree: %lu records, top-radix %lu\n",
+ bl->bl_rootblks, bl->bl_radix);
+#endif
+ blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
+
+ return(bl);
+}
+
+void
+blist_destroy(blist_t bl)
+{
+ KASSERT(bl != NULL);
+
+ free(bl->bl_root, M_VMSWAP, sizeof(blmeta_t) * bl->bl_rootblks);
+ free(bl, M_VMSWAP, sizeof(struct blist));
+}
+
+/*
+ * blist_alloc() - reserve space in the block bitmap. Return the base
+ * of a contiguous region or SWAPBLK_NONE if space could
+ * not be allocated.
+ */
+
+bsblk_t
+blist_alloc(blist_t bl, bsblk_t count)
+{
+ bsblk_t blk = SWAPBLK_NONE;
+
+ if (bl) {
+ if (bl->bl_radix == BLIST_BMAP_RADIX)
+ blk = blst_leaf_alloc(bl->bl_root, 0, 0, count);
+ else
+ blk = blst_meta_alloc(bl->bl_root, 0, 0, count,
+ bl->bl_radix, bl->bl_skip);
+ if (blk != SWAPBLK_NONE)
+ bl->bl_free -= count;
+ }
+ return(blk);
+}
+
+bsblk_t
+blist_allocat(blist_t bl, bsblk_t count, bsblk_t blkat)
+{
+ bsblk_t blk = SWAPBLK_NONE;
+
+ if (bl) {
+ KASSERT(blkat < bl->bl_blocks);
+ KASSERT(blkat + count <= bl->bl_blocks);
+
+ if (bl->bl_radix == BLIST_BMAP_RADIX)
+ blk = blst_leaf_alloc(bl->bl_root, blkat, 0, count);
+ else
+ blk = blst_meta_alloc(bl->bl_root, blkat, 0, count,
+ bl->bl_radix, bl->bl_skip);
+ if (blk != SWAPBLK_NONE)
+ bl->bl_free -= count;
+ }
+ return(blk);
+}
+
+/*
+ * blist_free() - free up space in the block bitmap. Return the base
+ * of a contiguous region. Panic if an inconsistancy is
+ * found.
+ */
+
+void
+blist_free(blist_t bl, bsblk_t blkno, bsblk_t count)
+{
+ if (bl) {
+ KASSERT(blkno < bl->bl_blocks);
+ KASSERT(blkno + count <= bl->bl_blocks);
+
+ if (bl->bl_radix == BLIST_BMAP_RADIX)
+ blst_leaf_free(bl->bl_root, blkno, count);
+ else
+ blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
+ bl->bl_free += count;
+ }
+}
+
+/*
+ * blist_fill() - mark a region in the block bitmap as off-limits
+ * to the allocator (i.e. allocate it), ignoring any
+ * existing allocations. Return the number of blocks
+ * actually filled that were free before the call.
+ */
+
+bsblk_t
+blist_fill(blist_t bl, bsblk_t blkno, bsblk_t count)
+{
+ bsblk_t filled;
+
+ if (bl) {
+ KASSERT(blkno < bl->bl_blocks);
+ KASSERT(blkno + count <= bl->bl_blocks);
+
+ if (bl->bl_radix == BLIST_BMAP_RADIX) {
+ filled = blst_leaf_fill(bl->bl_root, blkno, count);
+ } else {
+ filled = blst_meta_fill(bl->bl_root, blkno, count,
+ bl->bl_radix, bl->bl_skip, 0);
+ }
+ bl->bl_free -= filled;
+ return (filled);
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * blist_resize() - resize an existing radix tree to handle the
+ * specified number of blocks. This will reallocate
+ * the tree and transfer the previous bitmap to the new
+ * one. When extending the tree you can specify whether
+ * the new blocks are to left allocated or freed.
+ */
+
+void
+blist_resize(blist_t *pbl, bsblk_t count, int freenew)
+{
+ blist_t newbl = blist_create(count);
+ blist_t save = *pbl;
+
+ *pbl = newbl;
+ if (count > save->bl_blocks)
+ count = save->bl_blocks;
+ blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
+
+ /*
+ * If resizing upwards, should we free the new space or not?
+ */
+ if (freenew && count < newbl->bl_blocks) {
+ blist_free(newbl, count, newbl->bl_blocks - count);
+ }
+ blist_destroy(save);
+}
+
+#define GAPFIND_FIRSTFREE 0
+#define GAPFIND_FIRSTUSED 1
+
+/*
+ * blist_gapfind() - return the largest gap (free pages) in blist.
+ * the blist isn't modified. the returned range
+ * is [maxbp, maxep[ . The size of the gap is
+ * maxep - maxbp. If not found, the size is 0.
+ */
+
+void
+blist_gapfind(blist_t bl, bsblk_t *maxbp, bsblk_t *maxep)
+{
+ int state;
+ bsblk_t b, e;
+
+ /* initialize gaps (max and current) */
+ *maxbp = *maxep = 0;
+ b = e = 0;
+
+ /* search the larger gap from block 0 */
+ state = blst_radix_gapfind(bl->bl_root, 0, bl->bl_radix, bl->bl_skip,
+ GAPFIND_FIRSTFREE, maxbp, maxep, &b, &e);
+
+ if (state == GAPFIND_FIRSTUSED) {
+ e = bl->bl_blocks;
+ if (*maxep - *maxbp < e - b) {
+ *maxbp = b;
+ *maxep = e;
+ }
+ }
+
+ KASSERT(*maxbp <= *maxep);
+ KASSERT(*maxbp < bl->bl_blocks);
+ KASSERT(*maxep <= bl->bl_blocks);
+}
+
+/*
+ * blst_radix_gapfind - search the larger gap in one pass
+ *
+ * - search first free block, from X -> set B
+ * - search first used block, from B -> set E
+ * - if the size (E - B) is larger than max, update it
+ * - loop (with X=E) until end of blist
+ * - max is the larger free gap
+ */
+static int
+blst_radix_gapfind(blmeta_t *scan, bsblk_t blk, bsblk_t radix, bsblk_t skip,
+ int state, bsblk_t *maxbp, bsblk_t *maxep, bsblk_t *bp, bsblk_t *ep)
+{
+ bsblk_t i;
+ bsblk_t next_skip;
+
+ if (radix == BLIST_BMAP_RADIX) {
+ /* leaf node: we considere only completely free bitmap as free */
+ if (state == GAPFIND_FIRSTFREE) {
+ if (scan->u.bmu_bitmap == (bsbmp_t)-1) {
+ /* node is fully free */
+ *bp = blk;
+ return GAPFIND_FIRSTUSED;
+ }
+
+ /* it isn't fully free, not found, keep state */
+ return state;
+
+ } else if (state == GAPFIND_FIRSTUSED) {
+ if (scan->u.bmu_bitmap == (bsbmp_t)-1) {
+ /* it is free, not found, keep state */
+ return state;
+ }
+
+ /* it is (at least partially) used */
+ *ep = blk;
+ if (*maxep - *maxbp < *ep - *bp) {
+ *maxbp = *bp;
+ *maxep = *ep;
+ }
+ return GAPFIND_FIRSTFREE;
+ }
+ }
+
+ if (scan->u.bmu_avail == 0) {
+ /* ALL-ALLOCATED */
+ if (state == GAPFIND_FIRSTFREE) {
+ /* searching free block, not found, keep state */
+ return state;
+
+ } else if (state == GAPFIND_FIRSTUSED) {
+ /* searching used block, found */
+ *ep = blk;
+ if (*maxep - *maxbp < *ep - *bp) {
+ *maxbp = *bp;
+ *maxep = *ep;
+ }
+ return GAPFIND_FIRSTFREE;
+ }
+ }
+
+ if (scan->u.bmu_avail == radix) {
+ /* ALL-FREE */
+ if (state == GAPFIND_FIRSTFREE) {
+ /* searching free block, found */
+ *bp = blk;
+ return GAPFIND_FIRSTUSED;
+
+ } else if (state == GAPFIND_FIRSTUSED) {
+ /* searching used block, not found, keep state */
+ return state;
+ }
+ }
+
+ radix /= BLIST_META_RADIX;
+ next_skip = (skip / BLIST_META_RADIX);
+
+ for (i = 1; i <= skip; i += next_skip) {
+ if (scan[i].bm_bighint == (bsblk_t)-1)
+ /* Terminator */
+ break;
+
+ state = blst_radix_gapfind(&scan[i], blk, radix, next_skip - 1,
+ state, maxbp, maxep, bp, ep);
+
+ blk += radix;
+ }
+
+ return state;
+}
+
+#if defined(BLIST_DEBUG) || defined(DDB)
+
+/*
+ * blist_print() - dump radix tree
+ */
+
+void
+blist_print(blist_t bl)
+{
+ printf("BLIST {\n");
+ blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
+ printf("}\n");
+}
+
+#endif
+
+/************************************************************************
+ * ALLOCATION SUPPORT FUNCTIONS *
+ ************************************************************************
+ *
+ * These support functions do all the actual work. They may seem
+ * rather longish, but that's because I've commented them up. The
+ * actual code is straight forward.
+ *
+ */
+
+/*
+ * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
+ *
+ * This is the core of the allocator and is optimized for the 1 block
+ * and the BLIST_BMAP_RADIX block allocation cases. Other cases are
+ * somewhat slower. The 1 block allocation case is log2 and extremely
+ * quick.
+ */
+
+static bsblk_t
+blst_leaf_alloc(blmeta_t *scan, bsblk_t blkat __unused, bsblk_t blk,
+ bsblk_t count)
+{
+ bsbmp_t orig = scan->u.bmu_bitmap;
+
+ if (orig == 0) {
+ /*
+ * Optimize bitmap all-allocated case. Also, count = 1
+ * case assumes at least 1 bit is free in the bitmap, so
+ * we have to take care of this case here.
+ */
+ scan->bm_bighint = 0;
+ return(SWAPBLK_NONE);
+ }
+ if (count == 1) {
+ /*
+ * Optimized code to allocate one bit out of the bitmap
+ */
+ bsbmp_t mask;
+ int j = BLIST_BMAP_RADIX/2;
+ int r = 0;
+
+ mask = (bsbmp_t)-1 >> (BLIST_BMAP_RADIX/2);
+
+ while (j) {
+ if ((orig & mask) == 0) {
+ r += j;
+ orig >>= j;
+ }
+ j >>= 1;
+ mask >>= j;
+ }
+ scan->u.bmu_bitmap &= ~((bsbmp_t)1 << r);
+ return(blk + r);
+ }
+ if (count <= BLIST_BMAP_RADIX) {
+ /*
+ * non-optimized code to allocate N bits out of the bitmap.
+ * The more bits, the faster the code runs. It will run
+ * the slowest allocating 2 bits, but since there aren't any
+ * memory ops in the core loop (or shouldn't be, anyway),
+ * you probably won't notice the difference.
+ */
+ int j;
+ int n = (int)(BLIST_BMAP_RADIX - count);
+ bsbmp_t mask;
+
+ mask = (bsbmp_t)-1 >> n;
+
+ for (j = 0; j <= n; ++j) {
+ if ((orig & mask) == mask) {
+ scan->u.bmu_bitmap &= ~mask;
+ return(blk + j);
+ }
+ mask = (mask << 1);
+ }
+ }
+
+ /*
+ * We couldn't allocate count in this subtree, update bighint.
+ */
+ scan->bm_bighint = count - 1;
+
+ return(SWAPBLK_NONE);
+}
+
+/*
+ * blist_meta_alloc() - allocate at a meta in the radix tree.
+ *
+ * Attempt to allocate at a meta node. If we can't, we update
+ * bighint and return a failure. Updating bighint optimize future
+ * calls that hit this node. We have to check for our collapse cases
+ * and we have a few optimizations strewn in as well.
+ */
+static bsblk_t
+blst_meta_alloc(blmeta_t *scan, bsblk_t blkat,
+ bsblk_t blk, bsblk_t count,
+ bsblk_t radix, bsblk_t skip)
+{
+ int hintok = (blk >= blkat);
+ bsblk_t next_skip = ((bsblk_t)skip / BLIST_META_RADIX);
+ bsblk_t i;
+
+#ifndef _KERNEL
+ printf("blist_meta_alloc blkat %lu blk %lu count %lu radix %lu\n",
+ blkat, blk, count, radix);
+#endif
+
+ /*
+ * ALL-ALLOCATED special case
+ */
+ if (scan->u.bmu_avail == 0) {
+ scan->bm_bighint = 0;
+ return(SWAPBLK_NONE);
+ }
+
+ /*
+ * ALL-FREE special case, initialize uninitialized
+ * sublevel.
+ *
+ * NOTE: radix may exceed 32 bits until first division.
+ */
+ if (scan->u.bmu_avail == radix) {
+ scan->bm_bighint = radix;
+
+ radix /= BLIST_META_RADIX;
+ for (i = 1; i <= skip; i += next_skip) {
+ if (scan[i].bm_bighint == (bsblk_t)-1)
+ break;
+ if (next_skip == 1) {
+ scan[i].u.bmu_bitmap = (bsbmp_t)-1;
+ scan[i].bm_bighint = BLIST_BMAP_RADIX;
+ } else {
+ scan[i].bm_bighint = (bsblk_t)radix;
+ scan[i].u.bmu_avail = (bsblk_t)radix;
+ }
+ }
+ } else {
+ radix /= BLIST_META_RADIX;
+ }
+
+ for (i = 1; i <= skip; i += next_skip) {
+ if (count <= scan[i].bm_bighint &&
+ blk + (bsblk_t)radix > blkat) {
+ /*
+ * count fits in object
+ */
+ bsblk_t r;
+ if (next_skip == 1) {
+ r = blst_leaf_alloc(&scan[i], blkat,
+ blk, count);
+ } else {
+ r = blst_meta_alloc(&scan[i], blkat,
+ blk, count,
+ radix, next_skip - 1);
+ }
+ if (r != SWAPBLK_NONE) {
+ scan->u.bmu_avail -= count;
+ if (scan->bm_bighint > scan->u.bmu_avail)
+ scan->bm_bighint = scan->u.bmu_avail;
+ return(r);
+ }
+ /* bighint was updated by recursion */
+ } else if (scan[i].bm_bighint == (bsblk_t)-1) {
+ /*
+ * Terminator
+ */
+ break;
+ } else if (count > (bsblk_t)radix) {
+ /*
+ * count does not fit in object even if it were
+ * complete free.
+ */
+ panic("%s: allocation too large %lu/%lu",
+ __func__, count, radix);
+ }
+ blk += (bsblk_t)radix;
+ }
+
+ /*
+ * We couldn't allocate count in this subtree, update bighint.
+ */
+ if (hintok && scan->bm_bighint >= count)
+ scan->bm_bighint = count - 1;
+ return(SWAPBLK_NONE);
+}
+
+/*
+ * BLST_LEAF_FREE() - free allocated block from leaf bitmap
+ */
+static void
+blst_leaf_free(blmeta_t *scan, bsblk_t blk, bsblk_t count)
+{
+ /*
+ * free some data in this bitmap
+ *
+ * e.g.
+ * 0000111111111110000
+ * \_________/\__/
+ * v n
+ */
+ int n = blk & (BLIST_BMAP_RADIX - 1);
+ bsbmp_t mask;
+
+ mask = ((bsbmp_t)-1 << n) &
+ ((bsbmp_t)-1 >> (BLIST_BMAP_RADIX - count - n));
+
+ if (scan->u.bmu_bitmap & mask)
+ panic("%s: freeing free block", __func__);
+ scan->u.bmu_bitmap |= mask;
+
+ /*
+ * We could probably do a better job here. We are required to make
+ * bighint at least as large as the biggest contiguous block of
+ * data. If we just shoehorn it, a little extra overhead will
+ * be incured on the next allocation (but only that one typically).
+ */
+ scan->bm_bighint = BLIST_BMAP_RADIX;
+}
+
+/*
+ * BLST_META_FREE() - free allocated blocks from radix tree meta info
+ *
+ * This support routine frees a range of blocks from the bitmap.
+ * The range must be entirely enclosed by this radix node. If a
+ * meta node, we break the range down recursively to free blocks
+ * in subnodes (which means that this code can free an arbitrary
+ * range whereas the allocation code cannot allocate an arbitrary
+ * range).
+ */
+
+static void
+blst_meta_free(blmeta_t *scan, bsblk_t freeBlk, bsblk_t count,
+ bsblk_t radix, bsblk_t skip, bsblk_t blk)
+{
+ bsblk_t i;
+ bsblk_t next_skip = ((bsblk_t)skip / BLIST_META_RADIX);
+
+#if 0
+ printf("FREE (%04lx,%lu) FROM (%04lx,%lu)\n",
+ freeBlk, count,
+ blk, radix
+ );
+#endif
+
+ /*
+ * ALL-ALLOCATED special case, initialize for recursion.
+ *
+ * We will short-cut the ALL-ALLOCATED -> ALL-FREE case.
+ */
+ if (scan->u.bmu_avail == 0) {
+ scan->u.bmu_avail = count;
+ scan->bm_bighint = count;
+
+ if (count != radix) {
+ for (i = 1; i <= skip; i += next_skip) {
+ if (scan[i].bm_bighint == (bsblk_t)-1)
+ break;
+ scan[i].bm_bighint = 0;
+ if (next_skip == 1) {
+ scan[i].u.bmu_bitmap = 0;
+ } else {
+ scan[i].u.bmu_avail = 0;
+ }
+ }
+ /* fall through */
+ }
+ } else {
+ scan->u.bmu_avail += count;
+ /* scan->bm_bighint = radix; */
+ }
+
+ /*
+ * ALL-FREE special case.
+ *
+ * Set bighint for higher levels to snoop.
+ */
+ if (scan->u.bmu_avail == radix) {
+ scan->bm_bighint = radix;
+ return;
+ }
+
+ /*
+ * Break the free down into its components
+ */
+ if (scan->u.bmu_avail > radix) {
+ panic("%s: freeing already "
+ "free blocks (%lu) %lu/%lu",
+ __func__, count, (long)scan->u.bmu_avail, radix);
+ }
+
+ radix /= BLIST_META_RADIX;
+
+ i = (freeBlk - blk) / (bsblk_t)radix;
+ blk += i * (bsblk_t)radix;
+ i = i * next_skip + 1;
+
+ while (i <= skip && blk < freeBlk + count) {
+ bsblk_t v;
+
+ v = blk + (bsblk_t)radix - freeBlk;
+ if (v > count)
+ v = count;
+
+ if (scan->bm_bighint == (bsblk_t)-1)
+ panic("%s: freeing unexpected range", __func__);
+
+ if (next_skip == 1) {
+ blst_leaf_free(&scan[i], freeBlk, v);
+ } else {
+ blst_meta_free(&scan[i], freeBlk, v,
+ radix, next_skip - 1, blk);
+ }
+
+ /*
+ * After having dealt with the becomes-all-free case any
+ * partial free will not be able to bring us to the
+ * becomes-all-free state.
+ *
+ * We can raise bighint to at least the sub-segment's
+ * bighint.
+ */
+ if (scan->bm_bighint < scan[i].bm_bighint) {
+ scan->bm_bighint = scan[i].bm_bighint;
+ }
+ count -= v;
+ freeBlk += v;
+ blk += (bsblk_t)radix;
+ i += next_skip;
+ }
+}
+
+/*
+ * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
+ *
+ * Allocates all blocks in the specified range regardless of
+ * any existing allocations in that range. Returns the number
+ * of blocks allocated by the call.
+ */
+static bsblk_t
+blst_leaf_fill(blmeta_t *scan, bsblk_t blk, bsblk_t count)
+{
+ int n = blk & (BLIST_BMAP_RADIX - 1);
+ bsblk_t nblks;
+ bsbmp_t mask, bitmap;
+
+ mask = ((bsbmp_t)-1 << n) &
+ ((bsbmp_t)-1 >> (BLIST_BMAP_RADIX - count - n));
+
+ /* Count the number of blocks we're about to allocate */
+ bitmap = scan->u.bmu_bitmap & mask;
+ for (nblks = 0; bitmap != 0; nblks++)
+ bitmap &= bitmap - 1;
+
+ scan->u.bmu_bitmap &= ~mask;
+ return (nblks);
+}
+
+/*
+ * BLST_META_FILL() - allocate specific blocks at a meta node
+ *
+ * Allocates the specified range of blocks, regardless of
+ * any existing allocations in the range. The range must
+ * be within the extent of this node. Returns the number
+ * of blocks allocated by the call.
+ */
+static bsblk_t
+blst_meta_fill(blmeta_t *scan, bsblk_t fillBlk, bsblk_t count,
+ bsblk_t radix, bsblk_t skip, bsblk_t blk)
+{
+ bsblk_t i;
+ bsblk_t next_skip = ((bsblk_t)skip / BLIST_META_RADIX);
+ bsblk_t nblks = 0;
+
+ if (count == radix || scan->u.bmu_avail == 0) {
+ /*
+ * ALL-ALLOCATED special case
+ */
+ nblks = scan->u.bmu_avail;
+ scan->u.bmu_avail = 0;
+ scan->bm_bighint = count;
+ return (nblks);
+ }
+
+ if (scan->u.bmu_avail == radix) {
+ radix /= BLIST_META_RADIX;
+
+ /*
+ * ALL-FREE special case, initialize sublevel
+ */
+ for (i = 1; i <= skip; i += next_skip) {
+ if (scan[i].bm_bighint == (bsblk_t)-1)
+ break;
+ if (next_skip == 1) {
+ scan[i].u.bmu_bitmap = (bsbmp_t)-1;
+ scan[i].bm_bighint = BLIST_BMAP_RADIX;
+ } else {
+ scan[i].bm_bighint = (bsblk_t)radix;
+ scan[i].u.bmu_avail = (bsblk_t)radix;
+ }
+ }
+ } else {
+ radix /= BLIST_META_RADIX;
+ }
+
+ if (count > (bsblk_t)radix)
+ panic("%s: allocation too large", __func__);
+
+ i = (fillBlk - blk) / (bsblk_t)radix;
+ blk += i * (bsblk_t)radix;
+ i = i * next_skip + 1;
+
+ while (i <= skip && blk < fillBlk + count) {
+ bsblk_t v;
+
+ v = blk + (bsblk_t)radix - fillBlk;
+ if (v > count)
+ v = count;
+
+ if (scan->bm_bighint == (bsblk_t)-1)
+ panic("%s: filling unexpected range", __func__);
+
+ if (next_skip == 1) {
+ nblks += blst_leaf_fill(&scan[i], fillBlk, v);
+ } else {
+ nblks += blst_meta_fill(&scan[i], fillBlk, v,
+ radix, next_skip - 1, blk);
+ }
+ count -= v;
+ fillBlk += v;
+ blk += (bsblk_t)radix;
+ i += next_skip;
+ }
+ scan->u.bmu_avail -= nblks;
+ return (nblks);
+}
+
+/*
+ * BLIST_RADIX_COPY() - copy one radix tree to another
+ *
+ * Locates free space in the source tree and frees it in the destination
+ * tree. The space may not already be free in the destination.
+ */
+
+static void
+blst_copy(blmeta_t *scan, bsblk_t blk, bsblk_t radix,
+ bsblk_t skip, blist_t dest, bsblk_t count)
+{
+ bsblk_t next_skip;
+ bsblk_t i;
+
+ /*
+ * Leaf node
+ */
+
+ if (radix == BLIST_BMAP_RADIX) {
+ bsbmp_t v = scan->u.bmu_bitmap;
+
+ if (v == (bsbmp_t)-1) {
+ blist_free(dest, blk, count);
+ } else if (v != 0) {
+ for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
+ if (v & ((bsblk_t)1 << i))
+ blist_free(dest, blk + i, 1);
+ }
+ }
+ return;
+ }
+
+ /*
+ * Meta node
+ */
+
+ if (scan->u.bmu_avail == 0) {
+ /*
+ * Source all allocated, leave dest allocated
+ */
+ return;
+ }
+ if (scan->u.bmu_avail == radix) {
+ /*
+ * Source all free, free entire dest
+ */
+ if (count < radix)
+ blist_free(dest, blk, count);
+ else
+ blist_free(dest, blk, (bsblk_t)radix);
+ return;
+ }
+
+
+ radix /= BLIST_META_RADIX;
+ next_skip = ((bsbmp_t)skip / BLIST_META_RADIX);
+
+ for (i = 1; count && i <= skip; i += next_skip) {
+ if (scan[i].bm_bighint == (bsblk_t)-1)
+ break;
+
+ if (count >= (bsblk_t)radix) {
+ blst_copy(
+ &scan[i],
+ blk,
+ radix,
+ next_skip - 1,
+ dest,
+ (bsblk_t)radix
+ );
+ count -= (bsblk_t)radix;
+ } else {
+ if (count) {
+ blst_copy(
+ &scan[i],
+ blk,
+ radix,
+ next_skip - 1,
+ dest,
+ count
+ );
+ }
+ count = 0;
+ }
+ blk += (bsblk_t)radix;
+ }
+}
+
+/*
+ * BLST_RADIX_INIT() - initialize radix tree
+ *
+ * Initialize our meta structures and bitmaps and calculate the exact
+ * amount of space required to manage 'count' blocks - this space may
+ * be considerably less than the calculated radix due to the large
+ * RADIX values we use.
+ */
+
+static bsblk_t
+blst_radix_init(blmeta_t *scan, bsblk_t radix, bsblk_t skip, bsblk_t count)
+{
+ bsblk_t i;
+ bsblk_t next_skip;
+ bsblk_t memindex = 0;
+
+ /*
+ * Leaf node
+ */
+
+ if (radix == BLIST_BMAP_RADIX) {
+ if (scan) {
+ scan->bm_bighint = 0;
+ scan->u.bmu_bitmap = 0;
+ }
+ return(memindex);
+ }
+
+ /*
+ * Meta node. If allocating the entire object we can special
+ * case it. However, we need to figure out how much memory
+ * is required to manage 'count' blocks, so we continue on anyway.
+ */
+
+ if (scan) {
+ scan->bm_bighint = 0;
+ scan->u.bmu_avail = 0;
+ }
+
+ radix /= BLIST_META_RADIX;
+ next_skip = ((bsbmp_t)skip / BLIST_META_RADIX);
+
+ for (i = 1; i <= skip; i += next_skip) {
+ if (count >= (bsblk_t)radix) {
+ /*
+ * Allocate the entire object
+ */
+ memindex = i + blst_radix_init(
+ ((scan) ? &scan[i] : NULL),
+ radix,
+ next_skip - 1,
+ (bsblk_t)radix
+ );
+ count -= (bsblk_t)radix;
+ } else if (count > 0) {
+ /*
+ * Allocate a partial object
+ */
+ memindex = i + blst_radix_init(
+ ((scan) ? &scan[i] : NULL),
+ radix,
+ next_skip - 1,
+ count
+ );
+ count = 0;
+ } else {
+ /*
+ * Add terminator and break out
+ */
+ if (scan)
+ scan[i].bm_bighint = (bsblk_t)-1;
+ break;
+ }
+ }
+ if (memindex < i)
+ memindex = i;
+ return(memindex);
+}
+
+#if defined(BLIST_DEBUG) || defined(DDB)
+
+static void
+blst_radix_print(blmeta_t *scan, bsblk_t blk, bsblk_t radix, bsblk_t skip, int tab)
+{
+ bsblk_t i;
+ bsblk_t next_skip;
+
+ if (radix == BLIST_BMAP_RADIX) {
+ printf(
+ "%*.*s(%04lx,%lu): bitmap %0*lx big=%lu\n",
+ tab, tab, "",
+ blk, radix,
+ (int)(1 + (BLIST_BMAP_RADIX - 1) / 4),
+ scan->u.bmu_bitmap,
+ scan->bm_bighint
+ );
+ return;
+ }
+
+ if (scan->u.bmu_avail == 0) {
+ printf(
+ "%*.*s(%04lx,%lu) ALL ALLOCATED\n",
+ tab, tab, "",
+ blk,
+ radix
+ );
+ return;
+ }
+ if (scan->u.bmu_avail == radix) {
+ printf(
+ "%*.*s(%04lx,%lu) ALL FREE\n",
+ tab, tab, "",
+ blk,
+ radix
+ );
+ return;
+ }
+
+ printf(
+ "%*.*s(%04lx,%lu): subtree (%lu/%lu) big=%lu {\n",
+ tab, tab, "",
+ blk, radix,
+ scan->u.bmu_avail,
+ radix,
+ scan->bm_bighint
+ );
+
+ radix /= BLIST_META_RADIX;
+ next_skip = ((bsbmp_t)skip / BLIST_META_RADIX);
+ tab += 4;
+
+ for (i = 1; i <= skip; i += next_skip) {
+ if (scan[i].bm_bighint == (bsblk_t)-1) {
+ printf(
+ "%*.*s(%04lx,%lu): Terminator\n",
+ tab, tab, "",
+ blk, radix
+ );
+ break;
+ }
+ blst_radix_print(
+ &scan[i],
+ blk,
+ radix,
+ next_skip - 1,
+ tab
+ );
+ blk += (bsblk_t)radix;
+ }
+ tab -= 4;
+
+ printf(
+ "%*.*s}\n",
+ tab, tab, ""
+ );
+}
+
+#endif
+
+#if !defined(_KERNEL) && defined(BLIST_DEBUG)
+
+int
+main(int ac, char **av)
+{
+ bsblk_t size = 1024;
+ bsblk_t i;
+ blist_t bl;
+
+ for (i = 1; i < (bsblk_t)ac; ++i) {
+ const char *ptr = av[i];
+ if (*ptr != '-') {
+ size = strtol(ptr, NULL, 0);
+ continue;
+ }
+ ptr += 2;
+ fprintf(stderr, "Bad option: %s\n", ptr - 2);
+ exit(1);
+ }
+ bl = blist_create(size);
+ blist_free(bl, 0, size);
+
+ for (;;) {
+ char buf[1024];
+ bsblk_t da = 0;
+ bsblk_t count = 0;
+ bsblk_t blkat;
+
+
+ printf("%lu/%lu/%lu> ",
+ bl->bl_free, size, bl->bl_radix);
+ fflush(stdout);
+ if (fgets(buf, sizeof(buf), stdin) == NULL)
+ break;
+ switch(buf[0]) {
+ case '#':
+ continue;
+ case 'r':
+ if (sscanf(buf + 1, "%li", &count) == 1) {
+ blist_resize(&bl, count, 1);
+ size = count;
+ } else {
+ printf("?\n");
+ }
+ case 'p':
+ blist_print(bl);
+ break;
+ case 'a':
+ if (sscanf(buf + 1, "%li %li", &count, &blkat) == 1) {
+ printf("count %lu\n", count);
+ bsblk_t blk = blist_alloc(bl, count);
+ printf(" R=%04lx\n", blk);
+ } else if (sscanf(buf + 1, "%li %li", &count, &blkat) == 2) {
+ bsblk_t blk = blist_allocat(bl, count, blkat);
+ printf(" R=%04lx\n", blk);
+ } else {
+ printf("?\n");
+ }
+ break;
+ case 'f':
+ if (sscanf(buf + 1, "%li %li", &da, &count) == 2) {
+ blist_free(bl, da, count);
+ } else {
+ printf("?\n");
+ }
+ break;
+ case 'g': {
+ bsblk_t b, e;
+ blist_gapfind(bl, &b, &e);
+ printf("gapfind: begin=%04lx end=%04lx size=%lu\n",
+ b, e, e-b);
+ break;
+ }
+ case 'l':
+ if (sscanf(buf + 1, "%li %li", &da, &count) == 2) {
+ printf(" n=%lu\n",
+ blist_fill(bl, da, count));
+ } else {
+ printf("?\n");
+ }
+ break;
+ case '?':
+ case 'h':
+ puts(
+ "p -print\n"
+ "a %li -allocate\n"
+ "f %li %li -free\n"
+ "l %li %li -fill\n"
+ "g -gapfind\n"
+ "r %li -resize\n"
+ "h/? -help\n"
+ " hex may be specified with 0x prefix\n"
+ );
+ break;
+ default:
+ printf("?\n");
+ break;
+ }
+ }
+ return(0);
+}
+
+#endif