From: espie Date: Mon, 12 May 2014 19:09:00 +0000 (+0000) Subject: move the ohash functions into libutil by popular demand. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=42dcb487124b72431b966827e39770df4dd1c0c3;p=openbsd move the ohash functions into libutil by popular demand. It's not a standard interface, so it doesn't belong in libc. I hate duplicating the code in client programs, so do beck@, kettenis@, schwarze@, millert@, miod@... and they agree with libutil. --- diff --git a/include/Makefile b/include/Makefile index 29cc48ac484..a1662404a92 100644 --- a/include/Makefile +++ b/include/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.189 2014/04/22 10:21:56 reyk Exp $ +# $OpenBSD: Makefile,v 1.190 2014/05/12 19:09:00 espie Exp $ # $NetBSD: Makefile,v 1.59 1996/05/15 21:36:43 jtc Exp $ # @(#)Makefile 5.45.1.1 (Berkeley) 5/6/91 @@ -15,7 +15,7 @@ FILES= a.out.h ar.h asr.h assert.h bitstring.h blf.h bsd_auth.h \ ftw.h getopt.h glob.h grp.h ifaddrs.h inttypes.h iso646.h kvm.h \ langinfo.h libgen.h limits.h locale.h login_cap.h malloc.h math.h \ md5.h memory.h mpool.h ndbm.h netdb.h netgroup.h nlist.h nl_types.h \ - ohash.h paths.h poll.h pwd.h ranlib.h readpassphrase.h regex.h \ + paths.h poll.h pwd.h ranlib.h readpassphrase.h regex.h \ resolv.h rmd160.h search.h setjmp.h sha1.h sha2.h signal.h sndio.h \ spawn.h stdbool.h stddef.h stdio.h stdlib.h string.h strings.h struct.h \ sysexits.h tar.h tgmath.h time.h ttyent.h tzfile.h unistd.h utime.h \ diff --git a/lib/libc/shlib_version b/lib/libc/shlib_version index 0a4400a744f..6f2ed916983 100644 --- a/lib/libc/shlib_version +++ b/lib/libc/shlib_version @@ -1,4 +1,4 @@ -major=74 -minor=2 +major=75 +minor=0 # note: If changes were made to include/thread_private.h or if system # calls were added/changed then librthread/shlib_version also be updated. diff --git a/lib/libutil/Makefile b/lib/libutil/Makefile index d4e748231ba..0540d61d30a 100644 --- a/lib/libutil/Makefile +++ b/lib/libutil/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.36 2013/06/03 21:07:02 tedu Exp $ +# $OpenBSD: Makefile,v 1.37 2014/05/12 19:09:00 espie Exp $ # $NetBSD: Makefile,v 1.8 1996/05/16 07:03:28 thorpej Exp $ LIB= util @@ -60,6 +60,26 @@ MLINKS+=uucplock.3 uu_lockerr.3 MLINKS+=uucplock.3 uu_lock_txfr.3 MLINKS+=fmt_scaled.3 scan_scaled.3 +SRCS+= ohash_create_entry.c ohash_delete.c ohash_do.c ohash_entries.c \ + ohash_enum.c ohash_init.c ohash_interval.c \ + ohash_lookup_interval.c ohash_lookup_memory.c \ + ohash_qlookup.c ohash_qlookupi.c + +MAN += ohash_init.3 ohash_interval.3 +MLINKS += ohash_init.3 ohash_delete.3 \ + ohash_init.3 ohash_lookup_interval.3 \ + ohash_init.3 ohash_lookup_memory.3 \ + ohash_init.3 ohash_find.3 \ + ohash_init.3 ohash_remove.3 \ + ohash_init.3 ohash_insert.3 \ + ohash_init.3 ohash_first.3 \ + ohash_init.3 ohash_next.3 \ + ohash_init.3 ohash_entries.3 \ + ohash_interval.3 ohash_create_entry.3 \ + ohash_interval.3 ohash_qlookupi.3 \ + ohash_interval.3 ohash_qlookup.3 +HDRS += ohash.h + includes: @cd ${.CURDIR}; for i in $(HDRS); do \ j="cmp -s $$i ${DESTDIR}/usr/include/$$i || \ diff --git a/lib/libutil/ohash.h b/lib/libutil/ohash.h new file mode 100644 index 00000000000..14a272552ee --- /dev/null +++ b/lib/libutil/ohash.h @@ -0,0 +1,75 @@ +#ifndef OHASH_H +#define OHASH_H +/* $OpenBSD: ohash.h,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* Open hashing support. + * Open hashing was chosen because it is much lighter than other hash + * techniques, and more efficient in most cases. + */ + +/* user-visible data structure */ +struct ohash_info { + ptrdiff_t key_offset; + void *data; /* user data */ + void *(*calloc)(size_t, size_t, void *); + void (*free)(void *, void *); + void *(*alloc)(size_t, void *); +}; + +struct _ohash_record; + +/* private structure. It's there just so you can do a sizeof */ +struct ohash { + struct _ohash_record *t; + struct ohash_info info; + unsigned int size; + unsigned int total; + unsigned int deleted; +}; + +/* For this to be tweakable, we use small primitives, and leave part of the + * logic to the client application. e.g., hashing is left to the client + * application. We also provide a simple table entry lookup that yields + * a hashing table index (opaque) to be used in find/insert/remove. + * The keys are stored at a known position in the client data. + */ +__BEGIN_DECLS +void ohash_init(struct ohash *, unsigned, struct ohash_info *); +void ohash_delete(struct ohash *); + +unsigned int ohash_lookup_interval(struct ohash *, const char *, + const char *, uint32_t); +unsigned int ohash_lookup_memory(struct ohash *, const char *, + size_t, uint32_t) + __attribute__ ((__bounded__(__string__,2,3))); +void *ohash_find(struct ohash *, unsigned int); +void *ohash_remove(struct ohash *, unsigned int); +void *ohash_insert(struct ohash *, unsigned int, void *); +void *ohash_first(struct ohash *, unsigned int *); +void *ohash_next(struct ohash *, unsigned int *); +unsigned int ohash_entries(struct ohash *); + +void *ohash_create_entry(struct ohash_info *, const char *, const char **); +uint32_t ohash_interval(const char *, const char **); + +unsigned int ohash_qlookupi(struct ohash *, const char *, const char **); +unsigned int ohash_qlookup(struct ohash *, const char *); +__END_DECLS +#endif diff --git a/lib/libutil/ohash_create_entry.c b/lib/libutil/ohash_create_entry.c new file mode 100644 index 00000000000..2a5248a3fe2 --- /dev/null +++ b/lib/libutil/ohash_create_entry.c @@ -0,0 +1,38 @@ +/* $OpenBSD: ohash_create_entry.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +/* This handles the common case of variable length keys, where the + * key is stored at the end of the record. + */ +void * +ohash_create_entry(struct ohash_info *i, const char *start, const char **end) +{ + char *p; + + if (!*end) + *end = start + strlen(start); + p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data); + if (p) { + memcpy(p+i->key_offset, start, *end-start); + p[i->key_offset + (*end - start)] = '\0'; + } + return (void *)p; +} diff --git a/lib/libutil/ohash_delete.c b/lib/libutil/ohash_delete.c new file mode 100644 index 00000000000..b6bb2a9b64f --- /dev/null +++ b/lib/libutil/ohash_delete.c @@ -0,0 +1,30 @@ +/* $OpenBSD: ohash_delete.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" +/* hash_delete only frees the hash structure. Use hash_first/hash_next + * to free entries as well. */ +void +ohash_delete(struct ohash *h) +{ + (h->info.free)(h->t, h->info.data); +#ifndef NDEBUG + h->t = NULL; +#endif +} diff --git a/lib/libutil/ohash_do.c b/lib/libutil/ohash_do.c new file mode 100644 index 00000000000..0dc33672f02 --- /dev/null +++ b/lib/libutil/ohash_do.c @@ -0,0 +1,116 @@ +/* $OpenBSD: ohash_do.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include "ohash_int.h" + +static void ohash_resize(struct ohash *); + +static void +ohash_resize(struct ohash *h) +{ + struct _ohash_record *n; + size_t ns; + unsigned int j; + unsigned int i, incr; + + if (4 * h->deleted < h->total) { + if (h->size >= (UINT_MAX >> 1U)) + ns = UINT_MAX; + else + ns = h->size << 1U; + } else if (3 * h->deleted > 2 * h->total) + ns = h->size >> 1U; + else + ns = h->size; + if (ns < MINSIZE) + ns = MINSIZE; +#ifdef STATS_HASH + STAT_HASH_EXPAND++; + STAT_HASH_SIZE += ns - h->size; +#endif + + n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data); + if (!n) + return; + + for (j = 0; j < h->size; j++) { + if (h->t[j].p != NULL && h->t[j].p != DELETED) { + i = h->t[j].hv % ns; + incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1; + while (n[i].p != NULL) { + i += incr; + if (i >= ns) + i -= ns; + } + n[i].hv = h->t[j].hv; + n[i].p = h->t[j].p; + } + } + (h->info.free)(h->t, h->info.data); + h->t = n; + h->size = ns; + h->total -= h->deleted; + h->deleted = 0; +} + +void * +ohash_remove(struct ohash *h, unsigned int i) +{ + void *result = (void *)h->t[i].p; + + if (result == NULL || result == DELETED) + return NULL; + +#ifdef STATS_HASH + STAT_HASH_ENTRIES--; +#endif + h->t[i].p = DELETED; + h->deleted++; + if (h->deleted >= MINDELETED && 4 * h->deleted > h->total) + ohash_resize(h); + return result; +} + +void * +ohash_find(struct ohash *h, unsigned int i) +{ + if (h->t[i].p == DELETED) + return NULL; + else + return (void *)h->t[i].p; +} + +void * +ohash_insert(struct ohash *h, unsigned int i, void *p) +{ +#ifdef STATS_HASH + STAT_HASH_ENTRIES++; +#endif + if (h->t[i].p == DELETED) { + h->deleted--; + h->t[i].p = p; + } else { + h->t[i].p = p; + /* Arbitrary resize boundary. Tweak if not efficient enough. */ + if (++h->total * 4 > h->size * 3) + ohash_resize(h); + } + return p; +} diff --git a/lib/libutil/ohash_entries.c b/lib/libutil/ohash_entries.c new file mode 100644 index 00000000000..f6979a1c1de --- /dev/null +++ b/lib/libutil/ohash_entries.c @@ -0,0 +1,26 @@ +/* $OpenBSD: ohash_entries.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +unsigned int +ohash_entries(struct ohash *h) +{ + return h->total - h->deleted; +} diff --git a/lib/libutil/ohash_enum.c b/lib/libutil/ohash_enum.c new file mode 100644 index 00000000000..616e04a2fba --- /dev/null +++ b/lib/libutil/ohash_enum.c @@ -0,0 +1,36 @@ +/* $OpenBSD: ohash_enum.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +void * +ohash_first(struct ohash *h, unsigned int *pos) +{ + *pos = 0; + return ohash_next(h, pos); +} + +void * +ohash_next(struct ohash *h, unsigned int *pos) +{ + for (; *pos < h->size; (*pos)++) + if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL) + return (void *)h->t[(*pos)++].p; + return NULL; +} diff --git a/lib/libutil/ohash_init.3 b/lib/libutil/ohash_init.3 new file mode 100644 index 00000000000..53ca173b638 --- /dev/null +++ b/lib/libutil/ohash_init.3 @@ -0,0 +1,271 @@ +.\" $OpenBSD: ohash_init.3,v 1.1 2014/05/12 19:09:00 espie Exp $ +.\" Copyright (c) 1999 Marc Espie +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 12 2014 $ +.Dt OHASH_INIT 3 +.Os +.Sh NAME +.Nm ohash_init , +.Nm ohash_delete , +.Nm ohash_lookup_interval , +.Nm ohash_lookup_memory , +.Nm ohash_find , +.Nm ohash_remove , +.Nm ohash_insert , +.Nm ohash_first , +.Nm ohash_next , +.Nm ohash_entries +.Nd light-weight open hashing +.Sh SYNOPSIS +.In stdint.h +.In stddef.h +.In ohash.h +.Ft void +.Fn ohash_init "struct ohash *h" "unsigned int size" "struct ohash_info *info" +.Ft void +.Fn ohash_delete "struct ohash *h" +.Ft "unsigned int" +.Fn ohash_lookup_interval "struct ohash *h" "const char *start" "const char *end" "uint32_t hv" +.Ft "unsigned int" +.Fn ohash_lookup_memory "struct ohash *h" "const char *k" "size_t s" "uint32_t hv" +.Ft void * +.Fn ohash_find "struct ohash *h" "unsigned int i" +.Ft void * +.Fn ohash_remove "struct ohash *h" "unsigned int i" +.Ft void * +.Fn ohash_insert "struct ohash *h" "unsigned int i" "void *p" +.Ft void * +.Fn ohash_first "struct ohash *h" "unsigned int *i" +.Ft void * +.Fn ohash_next "struct ohash *h" "unsigned int *i" +.Ft "unsigned int" +.Fn ohash_entries "struct ohash *h" +.Sh DESCRIPTION +These functions have been designed as a fast, extensible alternative to +the usual hash table functions. +They provide storage and retrieval of records indexed by keys, +where a key is a contiguous sequence of bytes at a fixed position in +each record. +Keys can either be NUL-terminated strings or fixed-size memory areas. +All functions take a pointer to an ohash structure as the +.Fa h +function argument. +Storage for this structure should be provided by user code. +.Pp +.Fn ohash_init +initializes the table to store roughly 2 to the power +.Fa size +elements. +.Fa info +is a pointer to a +.Fa struct ohash_info . +.Bd -literal -offset indent +struct ohash_info { + ptrdiff_t key_offset; + void *data; /* user data */ + void *(*calloc)(size_t, size_t, void *); + void (*free)(void *, void *); + void *(*alloc)(size_t, void *); +}; +.Ed +.Pp +The +.Va offset +field holds the position of the key in each record; +the +.Va calloc +and +.Va free +fields are pointers to +.Xr calloc 3 +and +.Xr free 3 Ns -like +functions, used for managing the table internal storage; +the +.Va alloc +field is only used by the utility function +.Xr ohash_create_entry 3 . +.Pp +Each of these functions are called similarly to their standard counterpart, +but with an extra +.Ft void * +parameter corresponding to the content of the field +.Fa data , +which can be used to communicate specific information to the functions. +.Pp +.Fn ohash_init +stores a copy of those fields internally, so +.Fa info +can be reclaimed after initialization. +.Pp +.Fn ohash_delete +frees storage internal to +.Fa h . +Elements themselves should be freed by the user first, using for instance +.Fn ohash_first +and +.Fn ohash_next . +.Pp +.Fn ohash_lookup_interval +and +.Fn ohash_lookup_memory +are the basic look-up element functions. +The hashing function result is provided by the user as +.Fa hv . +These return a +.Qq slot +in the ohash table +.Fa h , +to be used with +.Fn ohash_find , +.Fn ohash_insert , +or +.Fn ohash_remove . +This slot is only valid up to the next call to +.Fn ohash_insert +or +.Fn ohash_remove . +.Pp +.Fn ohash_lookup_interval +handles string-like keys. +.Fn ohash_lookup_interval +assumes the key is the interval between +.Fa start +and +.Fa end , +exclusive, +though the actual elements stored in the table should only contain +NUL-terminated keys. +.Pp +.Fn ohash_lookup_memory +assumes the key is the memory area starting at +.Fa k +of size +.Fa s . +All bytes are significant in key comparison. +.Pp +.Fn ohash_find +retrieves an element from a slot +.Fa i +returned by the +.Fn ohash_lookup* +functions. +It returns +.Dv NULL +if the slot is empty. +.Pp +.Fn ohash_insert +inserts a new element +.Fa p +at slot +.Fa i . +Slot +.Fa i +must be empty and element +.Fa p +must have a key corresponding to the +.Fn ohash_lookup* +call. +.Pp +.Fn ohash_remove +removes the element at slot +.Fa i . +It returns the removed element, for user code to dispose of, or +.Dv NULL +if the slot was empty. +.Pp +.Fn ohash_first +and +.Fn ohash_next +can be used to access all elements in an ohash table, like this: +.Bd -literal -offset indent +for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i)) + do_something_with(n); +.Ed +.Pp +.Fa i +points to an auxiliary unsigned integer used to record the current position +in the ohash table. +Those functions are safe to use even while entries are added to/removed +from the table, but in such a case they don't guarantee that new entries +will be returned. +As a special case, they can safely be used to free elements in the table. +.Pp +.Fn ohash_entries +returns the number of elements in the hash table. +.Sh STORAGE HANDLING +Only +.Fn ohash_init , +.Fn ohash_insert , +.Fn ohash_remove +and +.Fn ohash_delete +may call the user-supplied memory functions: +.Bd -literal -offset indent +p = (*info->calloc)(n, sizeof_record, info->data); +/* copy data from old to p */ +(*info->free)(old, info->data); +.Ed +.Pp +It is the responsibility of the user memory allocation code to verify +that those calls did not fail. +.Pp +If memory allocation fails, +.Fn ohash_init +returns a useless hash table. +.Fn ohash_insert +and +.Fn ohash_remove +still perform the requested operation, but the returned table should be +considered read-only. +It can still be accessed by +.Fn ohash_lookup* , +.Fn ohash_find , +.Fn ohash_first +and +.Fn ohash_next +to dump relevant information to disk before aborting. +.Sh THREAD SAFETY +The open hashing functions are not thread-safe by design. +In particular, in a threaded environment, there is no guarantee that a +.Qq slot +will not move between a +.Fn ohash_lookup* +and a +.Fn ohash_find , +.Fn ohash_insert +or +.Fn ohash_remove +call. +.Pp +Multi-threaded applications should explicitly protect ohash table access. +.Sh SEE ALSO +.Xr hcreate 3 , +.Xr ohash_interval 3 +.Rs +.%A Donald E. Knuth +.%B The Art of Computer Programming +.%V Vol. 3 +.%P pp 506-550 +.%D 1973 +.Re +.Sh STANDARDS +Those functions are completely non-standard and should be avoided in +portable programs. +.Sh HISTORY +Those functions were designed and written for +.Ox +.Xr make 1 +by Marc Espie in 1999. diff --git a/lib/libutil/ohash_init.c b/lib/libutil/ohash_init.c new file mode 100644 index 00000000000..ff3c8419806 --- /dev/null +++ b/lib/libutil/ohash_init.c @@ -0,0 +1,41 @@ +/* $OpenBSD: ohash_init.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +void +ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info) +{ + h->size = 1UL << size; + if (h->size < MINSIZE) + h->size = MINSIZE; +#ifdef STATS_HASH + STAT_HASH_CREATION++; + STAT_HASH_SIZE += h->size; +#endif + /* Copy info so that caller may free it. */ + h->info.key_offset = info->key_offset; + h->info.calloc = info->calloc; + h->info.free = info->free; + h->info.alloc = info->alloc; + h->info.data = info->data; + h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record), + h->info.data); + h->total = h->deleted = 0; +} diff --git a/lib/libutil/ohash_int.h b/lib/libutil/ohash_int.h new file mode 100644 index 00000000000..1a463dde1a0 --- /dev/null +++ b/lib/libutil/ohash_int.h @@ -0,0 +1,19 @@ +/* $OpenBSD: ohash_int.h,v 1.1 2014/05/12 19:09:00 espie Exp $ */ + +#include +#include +#include +#include +#include "ohash.h" + +struct _ohash_record { + uint32_t hv; + const char *p; +}; + +#define DELETED ((const char *)h) +#define NONE (h->size) + +/* Don't bother changing the hash table if the change is small enough. */ +#define MINSIZE (1UL << 4) +#define MINDELETED 4 diff --git a/lib/libutil/ohash_interval.3 b/lib/libutil/ohash_interval.3 new file mode 100644 index 00000000000..f174a6fd46b --- /dev/null +++ b/lib/libutil/ohash_interval.3 @@ -0,0 +1,93 @@ +.\" $OpenBSD: ohash_interval.3,v 1.1 2014/05/12 19:09:00 espie Exp $ +.\" Copyright (c) 2001 Marc Espie +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 12 2014 $ +.Dt OHASH_INTERVAL 3 +.Os +.Sh NAME +.Nm ohash_interval , +.Nm ohash_create_entry , +.Nm ohash_qlookup , +.Nm ohash_qlookupi +.Nd helper functions for open hashing +.Sh SYNOPSIS +.In stdint.h +.In stddef.h +.In ohash.h +.Ft uint32_t +.Fn ohash_interval "const char *start" "const char **pend" +.Ft "void *" +.Fn ohash_create_entry "struct ohash_info *info" "const char *start" "const char **pend" +.Ft "unsigned int" +.Fn ohash_qlookupi "struct ohash *h" "const char *start" "const char **pend" +.Ft "unsigned int" +.Fn ohash_qlookup "struct ohash *h" "const char *start" +.Sh DESCRIPTION +These functions are commonly used to simplify open hashing usage, and use +similar conventions. +They operate indifferently on NUL-terminated strings +.Po +by setting +.Fa *pend += +.Dv NULL +.Pc +or memory ranges +.Po +delimited by +.Fa start +and +.Fa *pend +.Pc . +For NUL-terminated strings, as a side effect, those functions +set +.Fa *pend +to the terminating NUL byte. +.Pp +.Fn ohash_interval +is a simple hashing function that yields good results on common data sets. +.Pp +.Fn ohash_create_entry +can be used to create a new record with a given key. +In that case, +the alloc field of +.Fa info +should point to a +.Xr malloc 3 Ns -like +function to allocate the storage: +.Bd -literal -offset indent +p = (*info->alloc)(sz, info->data); +.Ed +.Pp +.Fn ohash_qlookupi +is a wrapper function that simply calls +.Fn ohash_interval +and +.Fn ohash_lookup_interval . +.Pp +.Fn ohash_qlookup +is a variation on +.Fn ohash_qlookupi +designed for NUL-terminated strings. +.Sh SEE ALSO +.Xr ohash_init 3 +.Sh STANDARDS +Those functions are completely non-standard and should be avoided in +portable programs. +.Sh HISTORY +Those functions were designed and written for +.Ox +.Xr make 1 +by Marc Espie in 1999. diff --git a/lib/libutil/ohash_interval.c b/lib/libutil/ohash_interval.c new file mode 100644 index 00000000000..c3f22275189 --- /dev/null +++ b/lib/libutil/ohash_interval.c @@ -0,0 +1,36 @@ +/* $OpenBSD: ohash_interval.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +uint32_t +ohash_interval(const char *s, const char **e) +{ + uint32_t k; + + if (!*e) + *e = s + strlen(s); + if (s == *e) + k = 0; + else + k = *s++; + while (s != *e) + k = ((k << 2) | (k >> 30)) ^ *s++; + return k; +} diff --git a/lib/libutil/ohash_lookup_interval.c b/lib/libutil/ohash_lookup_interval.c new file mode 100644 index 00000000000..18cae934d18 --- /dev/null +++ b/lib/libutil/ohash_lookup_interval.c @@ -0,0 +1,68 @@ +/* $OpenBSD: ohash_lookup_interval.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +unsigned int +ohash_lookup_interval(struct ohash *h, const char *start, const char *end, + uint32_t hv) +{ + unsigned int i, incr; + unsigned int empty; + +#ifdef STATS_HASH + STAT_HASH_LOOKUP++; +#endif + empty = NONE; + i = hv % h->size; + incr = ((hv % (h->size-2)) & ~1) + 1; + while (h->t[i].p != NULL) { +#ifdef STATS_HASH + STAT_HASH_LENGTH++; +#endif + if (h->t[i].p == DELETED) { + if (empty == NONE) + empty = i; + } else if (h->t[i].hv == hv && + strncmp(h->t[i].p+h->info.key_offset, start, + end - start) == 0 && + (h->t[i].p+h->info.key_offset)[end-start] == '\0') { + if (empty != NONE) { + h->t[empty].hv = hv; + h->t[empty].p = h->t[i].p; + h->t[i].p = DELETED; + return empty; + } else { +#ifdef STATS_HASH + STAT_HASH_POSITIVE++; +#endif + return i; + } + } + i += incr; + if (i >= h->size) + i -= h->size; + } + + /* Found an empty position. */ + if (empty != NONE) + i = empty; + h->t[i].hv = hv; + return i; +} diff --git a/lib/libutil/ohash_lookup_memory.c b/lib/libutil/ohash_lookup_memory.c new file mode 100644 index 00000000000..650782eaa62 --- /dev/null +++ b/lib/libutil/ohash_lookup_memory.c @@ -0,0 +1,64 @@ +/* $OpenBSD: ohash_lookup_memory.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +unsigned int +ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv) +{ + unsigned int i, incr; + unsigned int empty; + +#ifdef STATS_HASH + STAT_HASH_LOOKUP++; +#endif + empty = NONE; + i = hv % h->size; + incr = ((hv % (h->size-2)) & ~1) + 1; + while (h->t[i].p != NULL) { +#ifdef STATS_HASH + STAT_HASH_LENGTH++; +#endif + if (h->t[i].p == DELETED) { + if (empty == NONE) + empty = i; + } else if (h->t[i].hv == hv && + memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) { + if (empty != NONE) { + h->t[empty].hv = hv; + h->t[empty].p = h->t[i].p; + h->t[i].p = DELETED; + return empty; + } else { +#ifdef STATS_HASH + STAT_HASH_POSITIVE++; +#endif + } return i; + } + i += incr; + if (i >= h->size) + i -= h->size; + } + + /* Found an empty position. */ + if (empty != NONE) + i = empty; + h->t[i].hv = hv; + return i; +} diff --git a/lib/libutil/ohash_qlookup.c b/lib/libutil/ohash_qlookup.c new file mode 100644 index 00000000000..aacbbe0948a --- /dev/null +++ b/lib/libutil/ohash_qlookup.c @@ -0,0 +1,27 @@ +/* $OpenBSD: ohash_qlookup.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +unsigned int +ohash_qlookup(struct ohash *h, const char *s) +{ + const char *e = NULL; + return ohash_qlookupi(h, s, &e); +} diff --git a/lib/libutil/ohash_qlookupi.c b/lib/libutil/ohash_qlookupi.c new file mode 100644 index 00000000000..287ed0ce4b9 --- /dev/null +++ b/lib/libutil/ohash_qlookupi.c @@ -0,0 +1,29 @@ +/* $OpenBSD: ohash_qlookupi.c,v 1.1 2014/05/12 19:09:00 espie Exp $ */ +/* ex:ts=8 sw=4: + */ + +/* Copyright (c) 1999, 2004 Marc Espie + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ohash_int.h" + +unsigned int +ohash_qlookupi(struct ohash *h, const char *s, const char **e) +{ + uint32_t hv; + + hv = ohash_interval(s, e); + return ohash_lookup_interval(h, s, *e, hv); +} diff --git a/lib/libutil/shlib_version b/lib/libutil/shlib_version index 56246d02b24..eb2c603aec0 100644 --- a/lib/libutil/shlib_version +++ b/lib/libutil/shlib_version @@ -1,2 +1,2 @@ major=12 -minor=0 +minor=1