From 5dc1102e32a93720a1663c962592bfdabe76b3ea Mon Sep 17 00:00:00 2001 From: michaels Date: Sun, 15 Sep 1996 16:50:33 +0000 Subject: [PATCH] Blind update of wosch@FreeBSD's code: optimized search algorithm faster IO due mmap(2) [-m | -s] better error check for damaged databases support for databases in network byte order (SunOS/sparc) optional case insensitve search [-i] optional multiple databases optional multiple pattern new enviroment variable LOCATE_PATH for database(s) [-S] print some statistic about the database [-l number] limit output to number file names [-c] suppress normal output; instead print a count of matching file names fix prototypes/forward declarations/return type --- usr.bin/locate/Makefile | 4 +- usr.bin/locate/Makefile.inc | 7 +- usr.bin/locate/bigram/Makefile | 3 +- usr.bin/locate/bigram/locate.bigram.c | 54 ++-- usr.bin/locate/code/Makefile | 5 +- usr.bin/locate/code/locate.code.c | 36 ++- usr.bin/locate/locate/Makefile | 22 +- usr.bin/locate/locate/concatdb.sh | 28 +- usr.bin/locate/locate/fastfind.c | 281 +++++++++++++++++ usr.bin/locate/locate/locate.1 | 137 +++++++- usr.bin/locate/locate/locate.c | 394 +++++++++++++++++------- usr.bin/locate/locate/locate.h | 5 +- usr.bin/locate/locate/locate.updatedb.8 | 10 +- usr.bin/locate/locate/mklocatedb.sh | 28 +- usr.bin/locate/locate/pathnames.h | 2 +- usr.bin/locate/locate/updatedb.sh | 28 +- usr.bin/locate/locate/util.c | 268 ++++++++++++++++ 17 files changed, 1116 insertions(+), 196 deletions(-) create mode 100644 usr.bin/locate/locate/fastfind.c create mode 100644 usr.bin/locate/locate/util.c diff --git a/usr.bin/locate/Makefile b/usr.bin/locate/Makefile index 0de6327c99f..e5b8ff9d0c1 100644 --- a/usr.bin/locate/Makefile +++ b/usr.bin/locate/Makefile @@ -1,7 +1,7 @@ -# $OpenBSD: Makefile,v 1.3 1996/08/16 22:00:09 michaels Exp $ +# $OpenBSD: Makefile,v 1.4 1996/09/15 16:50:33 michaels Exp $ # # @(#)Makefile 8.1 (Berkeley) 6/6/93 -# $Id: Makefile,v 1.3 1996/08/16 22:00:09 michaels Exp $ +# $Id: Makefile,v 1.4 1996/09/15 16:50:33 michaels Exp $ SUBDIR= bigram code locate diff --git a/usr.bin/locate/Makefile.inc b/usr.bin/locate/Makefile.inc index 10413c5cb6f..aaec3865a9b 100644 --- a/usr.bin/locate/Makefile.inc +++ b/usr.bin/locate/Makefile.inc @@ -1,6 +1,5 @@ -# $OpenBSD: Makefile.inc,v 1.1 1996/08/17 11:03:54 michaels Exp $ +# $OpenBSD: Makefile.inc,v 1.2 1996/09/15 16:50:34 michaels Exp $ # -# $Id: Makefile.inc,v 1.1 1996/08/17 11:03:54 michaels Exp $ - -LIBEXECDIR?= /usr/libexec +# $Id: Makefile.inc,v 1.2 1996/09/15 16:50:34 michaels Exp $ +LIBEXECDIR?= /usr/libexec diff --git a/usr.bin/locate/bigram/Makefile b/usr.bin/locate/bigram/Makefile index 9b7ee84c69a..5dc2def6efa 100644 --- a/usr.bin/locate/bigram/Makefile +++ b/usr.bin/locate/bigram/Makefile @@ -1,5 +1,4 @@ -# $OpenBSD: Makefile,v 1.5 1996/08/17 10:51:21 michaels Exp $ -# +# $OpenBSD: Makefile,v 1.6 1996/09/15 16:50:34 michaels Exp $ # @(#)Makefile 8.1 (Berkeley) 6/6/93 PROG= locate.bigram diff --git a/usr.bin/locate/bigram/locate.bigram.c b/usr.bin/locate/bigram/locate.bigram.c index 1f8e5d604c6..ad2f7ae0216 100644 --- a/usr.bin/locate/bigram/locate.bigram.c +++ b/usr.bin/locate/bigram/locate.bigram.c @@ -1,4 +1,5 @@ -/* $OpenBSD: locate.bigram.c,v 1.4 1996/08/30 12:54:16 michaels Exp $ */ +/* $OpenBSD: locate.bigram.c,v 1.5 1996/09/15 16:50:35 michaels Exp $ */ + /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -33,6 +34,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $Id: locate.bigram.c,v 1.5 1996/09/15 16:50:35 michaels Exp $ */ #ifndef lint @@ -45,12 +48,13 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.4 1996/08/30 12:54:16 michaels Exp $"; +static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.5 1996/09/15 16:50:35 michaels Exp $"; #endif #endif /* not lint */ /* - * bigram < text > bigrams + * bigram < sorted_file_names | sort -nr | + * awk 'NR <= 128 { printf $2 }' > bigrams * * List bigrams for 'updatedb' script. * Use 'code' to encode a file using this output. @@ -58,62 +62,58 @@ static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.4 1996/08/30 12:54:16 micha #include #include /* for MAXPATHLEN */ -#include /* memchr */ #include "locate.h" u_char buf1[MAXPATHLEN] = " "; u_char buf2[MAXPATHLEN]; -unsigned int bigram[UCHAR_MAX][UCHAR_MAX]; +u_int bigram[UCHAR_MAX][UCHAR_MAX]; -int main(void) +int +main(void) { register u_char *cp; register u_char *oldpath = buf1, *path = buf2; - register int i, j; + register u_int i, j; + + while (fgets(path, sizeof(buf2), stdin) != NULL) { - while (fgets(path, sizeof(buf2), stdin) != NULL) { - /* skip empty lines */ + /* skip empty lines */ if (*path == '\n') continue; /* Squelch characters that would botch the decoding. */ - for (cp = path; *cp != NUL; cp++) { + for (cp = path; *cp != '\0'; cp++) { /* chop newline */ if (*cp == '\n') - *cp = NUL; + *cp = '\0'; /* range */ else if (*cp < ASCII_MIN || *cp > ASCII_MAX) *cp = '?'; } + /* skip longest common prefix */ - for (cp = path; *cp == *oldpath && *cp != NUL; cp++, oldpath++) - ; - /* - * output post-residue bigrams only - */ + for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++); - /* check later for boundary */ - while ( *cp != NUL && *(cp+1) != NUL ) { + while (*cp != '\0' && *(cp+1) != '\0') { bigram[*cp][*(cp+1)]++; cp += 2; } - if ( path == buf1 ) { /* swap pointers */ + /* swap pointers */ + if (path == buf1) { path = buf2; oldpath = buf1; - } - else { + } else { path = buf1; oldpath = buf2; } - } - - /* output, boundary check */ + } + + /* output, (paranoid) boundary check */ for (i = ASCII_MIN; i <= ASCII_MAX; i++) for (j = ASCII_MIN; j <= ASCII_MAX; j++) if (bigram[i][j] != 0) - fprintf(stdout, "%4d %c%c\n", - bigram[i][j], i, j); + printf("%4u %c%c\n", bigram[i][j], i, j); - return 0; + exit(0); } diff --git a/usr.bin/locate/code/Makefile b/usr.bin/locate/code/Makefile index 9c7d3481578..d35f5800b53 100644 --- a/usr.bin/locate/code/Makefile +++ b/usr.bin/locate/code/Makefile @@ -1,9 +1,8 @@ -# $OpenBSD: Makefile,v 1.5 1996/08/17 10:51:25 michaels Exp $ -# +# $OpenBSD: Makefile,v 1.6 1996/09/15 16:50:36 michaels Exp $ # @(#)Makefile 8.1 (Berkeley) 6/6/93 PROG= locate.code -CFLAGS+= -I${.CURDIR}/../locate +CFLAGS+=-I${.CURDIR}/../locate NOMAN= noman BINDIR= ${LIBEXECDIR} diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c index dae4d532ef3..9daa9f5a0d0 100644 --- a/usr.bin/locate/code/locate.code.c +++ b/usr.bin/locate/code/locate.code.c @@ -1,4 +1,4 @@ -/* $OpenBSD: locate.code.c,v 1.4 1996/08/30 12:54:17 michaels Exp $ */ +/* $OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ */ /* * Copyright (c) 1989, 1993 @@ -34,6 +34,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $Id: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ */ #ifndef lint @@ -46,7 +48,7 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: locate.code.c,v 1.4 1996/08/30 12:54:17 michaels Exp $"; +static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $"; #endif #endif /* not lint */ @@ -99,18 +101,19 @@ u_char buf1[MAXPATHLEN] = " "; u_char buf2[MAXPATHLEN]; char bigrams[BGBUFSIZE + 1] = { 0 }; -#define LOOKUP 1 +#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ + #ifdef LOOKUP #define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)]) typedef u_char bg_t; bg_t big[UCHAR_MAX][UCHAR_MAX]; - #else #define BGINDEX(x) bgindex(x) typedef int bg_t; -#endif - int bgindex __P((char *)); +#endif /* LOOKUP */ + + void usage __P((void)); extern int optind; extern int optopt; @@ -141,6 +144,7 @@ main(argc, argv) /* First copy bigram array to stdout. */ (void)fgets(bigrams, BGBUFSIZE + 1, fp); + if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE) err(1, "stdout"); (void)fclose(fp); @@ -151,13 +155,14 @@ main(argc, argv) for (j = 0; j < UCHAR_MAX; j++) big[i][j] = (bg_t)-1; - for (cp = bigrams, i = 0; *cp != NUL; i += 2, cp += 2) + for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) big[(int)*cp][(int)*(cp + 1)] = (bg_t)i; -#endif +#endif /* LOOKUP */ oldpath = buf1; path = buf2; oldcount = 0; + while (fgets(path, sizeof(buf2), stdin) != NULL) { /* skip empty lines */ @@ -165,17 +170,17 @@ main(argc, argv) continue; /* Squelch characters that would botch the decoding. */ - for (cp = path; *cp != NUL; cp++) { + for (cp = path; *cp != '\0'; cp++) { /* chop newline */ if (*cp == '\n') - *cp = NUL; + *cp = '\0'; /* range */ else if (*cp < ASCII_MIN || *cp > ASCII_MAX) *cp = '?'; } /* Skip longest common prefix. */ - for (cp = path; *cp == *oldpath && *cp; cp++, oldpath++); + for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++); count = cp - path; diffcount = count - oldcount + OFFSET; @@ -188,8 +193,8 @@ main(argc, argv) if (putchar(diffcount) == EOF) err(1, "stdout"); - while (*cp != NUL) { - if (*(cp + 1) == NUL) { + while (*cp != '\0') { + if (*(cp + 1) == '\0') { if (putchar(*cp) == EOF) err(1, "stdout"); break; @@ -216,8 +221,7 @@ main(argc, argv) /* Non-zero status if there were errors */ if (fflush(stdout) != 0 || ferror(stdout)) exit(1); - - return 0; + exit(0); } #ifndef LOOKUP @@ -229,7 +233,7 @@ bgindex(bg) /* Return location of bg in bigrams or -1. */ bg0 = bg[0]; bg1 = bg[1]; - for (p = bigrams; *p != NUL; p++) + for (p = bigrams; *p != NULL; p++) if (*p++ == bg0 && *p == bg1) break; return (*p == NUL ? -1 : (--p - bigrams)); diff --git a/usr.bin/locate/locate/Makefile b/usr.bin/locate/locate/Makefile index 9db3002e789..245ce694fd4 100644 --- a/usr.bin/locate/locate/Makefile +++ b/usr.bin/locate/locate/Makefile @@ -1,20 +1,26 @@ -# $OpenBSD: Makefile,v 1.6 1996/08/17 10:51:26 michaels Exp $ +# $OpenBSD: Makefile,v 1.7 1996/09/15 16:50:37 michaels Exp $ # # @(#)Makefile 8.1 (Berkeley) 6/6/93 -# $Id: Makefile,v 1.6 1996/08/17 10:51:26 michaels Exp $ - +# $Id: Makefile,v 1.7 1996/09/15 16:50:37 michaels Exp $ PROG= locate -MAN= locate.1 locate.updatedb.8 +SRCS= util.c locate.c +CFLAGS+= -I${.CURDIR} -DMMAP # -DDEBUG (print time) -O2 (10% faster) +MAN= locate.1 locate.updatedb.8 SCRIPTS= updatedb mklocatedb concatdb MLINKS+= locate.updatedb.8 updatedb.8 + beforeinstall: - -@for i in $(SCRIPTS); do \ - install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ - ${.CURDIR}/$$i.sh ${DESTDIR}${LIBEXECDIR}/locate.$$i; \ + -@for i in ${SCRIPTS}; do \ + install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ + ${.CURDIR}/$$i.sh ${DESTDIR}${LIBEXECDIR}/locate.$$i;\ done -.include "../Makefile.inc" +# only /usr/src/etc/Makefile install files in /etc +# ${INSTALL} -c -o root -g wheel -m 644 \ +# ${.CURDIR}/locate.rc ${DESTDIR}/etc + .include "../../Makefile.inc" +.include "../Makefile.inc" .include diff --git a/usr.bin/locate/locate/concatdb.sh b/usr.bin/locate/locate/concatdb.sh index f25fc6e9a1b..40d4950d5d5 100644 --- a/usr.bin/locate/locate/concatdb.sh +++ b/usr.bin/locate/locate/concatdb.sh @@ -1,8 +1,30 @@ #!/bin/sh # -# $OpenBSD: concatdb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $ +# $OpenBSD: concatdb.sh,v 1.2 1996/09/15 16:50:37 michaels Exp $ # -# (c) Wolfram Schneider, Berlin. September 1995. Public domain. +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. # # concatdb - concatenate locate databases # @@ -10,7 +32,7 @@ # # Sequence of databases is important. # -# $Id: concatdb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $ +# $Id: concatdb.sh,v 1.2 1996/09/15 16:50:37 michaels Exp $ # The directory containing locate subprograms : ${LIBEXECDIR=/usr/libexec}; export LIBEXECDIR diff --git a/usr.bin/locate/locate/fastfind.c b/usr.bin/locate/locate/fastfind.c new file mode 100644 index 00000000000..0a8bf40315f --- /dev/null +++ b/usr.bin/locate/locate/fastfind.c @@ -0,0 +1,281 @@ +/* $OpenBSD: fastfind.c,v 1.1 1996/09/15 16:50:38 michaels Exp $ */ + +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: fastfind.c,v 1.1 1996/09/15 16:50:38 michaels Exp $ + */ + +#ifndef _LOCATE_STATISTIC_ +#define _LOCATE_STATISTIC_ + +void +statistic (fp, path_fcodes) + FILE *fp; /* open database */ + char *path_fcodes; /* for error message */ +{ + register int lines, chars, size, big; + register u_char *p, *s; + register int c; + int count; + u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } + + lines = chars = big = 0; + size = NBG + NBG; + + for (c = getc(fp), count = 0; c != EOF; size++) { + if (c == SWITCH) { + count += getwf(fp) - OFFSET; + size += sizeof(int); + } else + count += c - OFFSET; + + for (p = path + count; (c = getc(fp)) > SWITCH; size++) + if (c < PARITY) + p++; + else { + big++; + p += 2; + } + + p++; + lines++; + chars += (p - path); + } + + (void)printf("\nDatabase: %s\n", path_fcodes); + (void)printf("Compression: Front: %2.2f%%, ", + (float)(100 * (size + big)) / chars); + (void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); + (void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars); + (void)printf("Filenames: %d, ", lines); + (void)printf("Chars: %d\n", chars); + (void)printf("Database size: %d, ", size); + (void)printf("Bigram chars: %d\n", big); + +} +#endif /* _LOCATE_STATISTIC_ */ + + +void +#ifdef FF_MMAP + + +#ifdef FF_ICASE +fastfind_mmap_icase +#else +fastfind_mmap +#endif +(pathpart, paddr, len, database) + char *pathpart; /* search string */ + caddr_t paddr; /* mmap pointer */ + int len; /* length of database */ + char *database; /* for error message */ + + +#else /* MMAP */ + + +#ifdef FF_ICASE +fastfind_icase +#else /* !FF_ICASE */ +fastfind +#endif /* FF_ICASE */ + +(fp, pathpart, database) + FILE *fp; /* open database */ + char *pathpart; /* search string */ + char *database; /* for error message */ + + +#endif /* MMAP */ + +{ + register u_char *p, *s, *patend, *q, *foundchar; + register int c, cc; + int count, found, globflag; + u_char *cutoff; + u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + +#ifdef FF_ICASE + /* use a lookup table for case insensitive search */ + u_char table[UCHAR_MAX]; + + tolower_word(pathpart); +#endif + + /* init bigram table */ +#ifdef FF_MMAP + if (len < (2*NBG)) { + (void)fprintf(stderr, "database to small: %s\n", database); + exit(1); + } + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { + p[c] = check_bigram_char(*paddr++); + s[c] = check_bigram_char(*paddr++); + } +#else + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } +#endif + + /* find optimal (last) char for searching */ + p = pathpart; + globflag = index(p, '*') || index(p, '?') || index(p, '['); + patend = patprep(p); + cc = *patend; + +#ifdef FF_ICASE + /* set patend char to true */ + table[TOLOWER(*patend)] = 1; + table[toupper(*patend)] = 1; +#endif + + + /* main loop */ + found = count = 0; + foundchar = 0; + +#ifdef FF_MMAP + for (c = (u_char)*paddr++; len-- > 0; ) { +#else + for (c = getc(fp); c != EOF; ) { +#endif + + /* go forward or backward */ + if (c == SWITCH) { /* big step, an integer */ +#ifdef FF_MMAP + count += getwm(paddr) - OFFSET; + len -= INTSIZE; paddr += INTSIZE; +#else + count += getwf(fp) - OFFSET; +#endif + } else { /* slow step, =< 14 chars */ + count += c - OFFSET; + } + + /* overlay old path */ + p = path + count; + foundchar = p - 1; +#ifdef FF_MMAP + for (; (c = (u_char)*paddr++) > SWITCH; len--) +#else + for (; (c = getc(fp)) > SWITCH; ) +#endif + + if (c < PARITY) { +#ifdef FF_ICASE + if (table[c]) +#else + if (c == cc) +#endif + foundchar = p; + *p++ = c; + } + else { + /* bigrams are parity-marked */ + TO7BIT(c); + +#ifndef FF_ICASE + if (bigram1[c] == cc || + bigram2[c] == cc) +#else + + if (table[bigram1[c]] || + table[bigram2[c]]) +#endif + foundchar = p + 1; + + *p++ = bigram1[c]; + *p++ = bigram2[c]; + } + + + if (found) { /* previous line matched */ + cutoff = path; + *p-- = '\0'; + foundchar = p; + } else if (foundchar >= path + count) { /* a char matched */ + *p-- = '\0'; + cutoff = path + count; + } else /* nothing to do */ + continue; + + found = 0; + for (s = foundchar; s >= cutoff; s--) { + if (*s == cc +#ifdef FF_ICASE + || TOLOWER(*s) == cc +#endif + ) { /* fast first char check */ + for (p = patend - 1, q = s - 1; *p != '\0'; + p--, q--) + if (*q != *p +#ifdef FF_ICASE + && TOLOWER(*q) != *p +#endif + ) + break; + if (*p == '\0') { /* fast match success */ + found = 1; + if (!globflag || !fnmatch(pathpart, path, 0)) { + if (f_silent) + counter++; + else if (f_limit) { + counter++; + if (f_limit >= counter) + (void)puts(path); + else { + (void)fprintf(stderr, "[show only %d lines]\n", counter - 1); + exit(0); + } + } else + (void)puts(path); + } + break; + } + } + } + } +} diff --git a/usr.bin/locate/locate/locate.1 b/usr.bin/locate/locate/locate.1 index f2ddad83722..c0614374eb1 100644 --- a/usr.bin/locate/locate/locate.1 +++ b/usr.bin/locate/locate/locate.1 @@ -1,3 +1,6 @@ +.\" $OpenBSD: locate.1,v 1.4 1996/09/15 16:50:38 michaels Exp $ +.\" +.\" Copyright (c) 1995 Wolfram Schneider . Berlin. .\" Copyright (c) 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -30,21 +33,26 @@ .\" SUCH DAMAGE. .\" .\" @(#)locate.1 8.1 (Berkeley) 6/6/93 +.\" $Id: locate.1,v 1.4 1996/09/15 16:50:38 michaels Exp $ .\" .Dd June 6, 1993 .Dt LOCATE 1 .Os BSD 4.4 .Sh NAME .Nm locate -.Nd find files +.Nd find filenames quickly .Sh SYNOPSIS -.Ar locate -pattern +.Nm +.Op Fl Scims +.Op Fl l Ar limit +.Op Fl d Ar database +pattern ... .Sh DESCRIPTION .Nm Locate searches a database for all pathnames which match the specified .Ar pattern . -The database is recomputed periodically, and contains the pathnames +The database is recomputed periodically (usually weekly or daily), +and contains the pathnames of all files which are publicly accessible. .Pp Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``['' @@ -59,12 +67,95 @@ including slashes (``/''). .Pp As a special case, a pattern containing no globbing characters (``foo'') is matched as though it were ``*foo*''. + +The following options are available: +.Bl -tag -width 10n indent +.It Fl S +Print some statistic about the database and exit. +.It Fl c +Suppress normal output; instead print a count of matching file names. +.It Fl d Ar database +Search in +.Ar database +instead the default file name database. +Multiple +.Fl d +options are allowed. Each additional +.Fl d +option adds the specified database to the list +of databases to be searched. + +.Ar database +may be a colon-separated list of databases. A single colon is a reference +to the default database. + +$ locate -d $HOME/lib/mydb: foo + +will first search string ``foo'' in +.Pa $HOME/lib/mydb +and then in +.Pa /var/db/locate.database . + +$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo + +will first search string ``foo'' in +.Pa $HOME/lib/mydb +and then in +.Pa /var/db/locate.database +and then in +.Pa /cdrom/locate.database . + + +``$ locate -d db1 -d db2 -d db3 pattern'' is the same as + +``$ locate -d db1:db2:db3 pattern'' or + +``$ locate -d db1:db2 -d db3 pattern''. + +If +.Ar - +is given as the database name, standard input will be read instead. +For example, you can compress your database +and use: + +$ zcat database.gz | locate -d - pattern + +This might be useful on machines with a fast CPU and little RAM and slow +I/O. Note: you can only use +.Ar one +pattern for stdin. +.It Fl i +Ignore case distinctions in both the pattern and the database. +.It Fl l Ar number +Limit output to +.Ar number +of file names and exit. +.It Fl m +Use +.Xr mmap 2 +instead of the +.Xr stdio 3 +library. This is the default behavior. Usually faster in most cases. +.It Fl s +Use the +.Xr stdio 3 +library instead of +.Xr mmap 2 . .Sh FILES .Bl -tag -width /usr/libexec/locate.updatedb -compact .It Pa /var/db/locate.database -The actual database +locate database .It Pa /usr/libexec/locate.updatedb Script to update the locate database +.It Pa /etc/weekly +Script that usually starts the database rebuild +.El +.Sh ENVIRONMENT +.Bl -tag -width LOCATE_PATH -compact +.It Pa LOCATE_PATH +path to the locate database if set and not empty, ignored if the +.Fl d +option was specified. .El .Sh SEE ALSO .Xr find 1 , @@ -79,17 +170,49 @@ Script to update the locate database .%P pp. 8-10 .Re .Sh BUGS -.Nm Locate +.Nm may fail to list some files that are present, or may to list files that have been removed from the system. This is because locate only reports files that are present in the database, which is typically only regenerated once a week by the -.Nm /etc/weekly +.Pa /etc/weekly script. Use .Xr find 1 to locate files that are of a more transitory nature. + +.Nm +database was built by user +.Dq nobody . +.Xr find 1 +skip directories, +which are not readable for user +.Dq nobody , +group +.Dq nobody , +or +world. E.g. if your HOME directory ist not world-readable, all your +files are +.Ar not +in the database. + +The +.Nm +database is not byte order independ. It is not possible +to share the databases between machines with different byte order. +The current +.Nm +implementation understand databases in host byte order or +network byte order. So you can read on a FreeBSD/i386 machine +(little endian) +a locate database which was built on SunOS/sparc machine +(big endian, net). + .Sh HISTORY The .Nm locate command appears in .Bx 4.4 . +Many new features were +added in +.\".Fx 2.2 . +FreeBSD 2.2. \"I assume diff --git a/usr.bin/locate/locate/locate.c b/usr.bin/locate/locate/locate.c index d4133e45b2f..46bd77db20e 100644 --- a/usr.bin/locate/locate/locate.c +++ b/usr.bin/locate/locate/locate.c @@ -1,8 +1,9 @@ -/* $OpenBSD: locate.c,v 1.4 1996/08/30 12:54:18 michaels Exp $ */ +/* $OpenBSD: locate.c,v 1.5 1996/09/15 16:50:38 michaels Exp $ */ /* + * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * James A. Woods. @@ -17,8 +18,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. + * This product includes software developed by the University of + * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -34,19 +35,22 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $Id: locate.c,v 1.5 1996/09/15 16:50:38 michaels Exp $ */ #ifndef lint static char copyright[] = -"@(#) Copyright (c) 1989, 1993\n\ - The Regents of the University of California. All rights reserved.\n"; +"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ +@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 -static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; +static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: locate.c,v 1.4 1996/08/30 12:54:18 michaels Exp $"; +static char rcsid[] = "$OpenBSD: locate.c,v 1.5 1996/09/15 16:50:38 michaels Exp $"; #endif #endif /* not lint */ @@ -60,10 +64,10 @@ static char rcsid[] = "$OpenBSD: locate.c,v 1.4 1996/08/30 12:54:18 michaels Exp * * The codes are: * - * 0-28 likeliest differential counts + offset to make nonnegative - * 30 switch code for out-of-range count to follow in next word - * 128-255 bigram codes (128 most common, as determined by 'updatedb') - * 32-127 single character (printable) ascii residue (ie, literal) + * 0-28 likeliest differential counts + offset to make nonnegative + * 30 switch code for out-of-range count to follow in next word + * 128-255 bigram codes (128 most common, as determined by 'updatedb') + * 32-127 single character (printable) ascii residue (ie, literal) * * A novel two-tiered string search technique is employed: * @@ -78,125 +82,291 @@ static char rcsid[] = "$OpenBSD: locate.c,v 1.4 1996/08/30 12:54:18 michaels Exp */ #include - #include #include #include #include +#include +#include +#ifdef MMAP +# include +# include +# include +# include +#endif +#include + +#ifdef sun +#include /* SunOS byteorder(3) htohl(3) */ +#ifndef __P +#define __P(x) x +#endif +#endif #include "locate.h" #include "pathnames.h" -FILE *fp; +#ifdef DEBUG +# include +# include +# include +#endif + +char *path_fcodes; /* locate database */ +int f_mmap; /* use mmap */ +int f_icase; /* ignore case */ +int f_stdin; /* read database from stdin */ +int f_statistic; /* print statistic */ +int f_silent; /* suppress output, show only count of matches */ +int f_limit; /* limit number of output lines, 0 == infinite */ +u_int counter; /* counter for matches [-c] */ + + +void usage __P((void)); +void statistic __P((FILE *, char *)); +void fastfind __P((FILE *, char *, char *)); +void fastfind_icase __P((FILE *, char *, char *)); +void fastfind_mmap __P((char *, caddr_t, int, char *)); +void fastfind_mmap_icase __P((char *, caddr_t, int, char *)); +void search_mmap __P((char *, char **)); +void search_fopen __P((char *, char **)); +unsigned long cputime __P((void)); + +extern char **colon __P((char **, char*, char*)); +extern void print_matches __P((u_int)); +extern int getwm __P((caddr_t)); +extern int getwf __P((FILE *)); +extern u_char *tolower_word __P((u_char *)); +extern int check_bigram_char __P((int)); +extern char *patprep __P((char *)); + +extern char *optarg; +extern int optind; + int main(argc, argv) - int argc; - char *argv[]; + int argc; + char **argv; +{ + register int ch; + char **dbv = NULL; +#ifdef MMAP + f_mmap = 1; /* mmap is default */ +#endif + + while ((ch = getopt(argc, argv, "Scd:il:ms")) != EOF) + switch(ch) { + case 'S': /* statistic lines */ + f_statistic = 1; + break; + case 'l': /* limit number of output lines, 0 == infinite */ + f_limit = atoi(optarg); + break; + case 'd': /* database */ + dbv = colon(dbv, optarg, _PATH_FCODES); + break; + case 'i': /* ignore case */ + f_icase = 1; + break; + case 'm': /* mmap */ +#ifdef MMAP + f_mmap = 1; +#else + (void)fprintf(stderr, "mmap(2) not implemented\n"); +#endif + break; + case 's': /* stdio lib */ + f_mmap = 0; + break; + case 'c': /* suppress output, show only count of matches */ + f_silent = 1; + break; + default: + usage(); + } + argv += optind; + argc -= optind; + + /* to few arguments */ + if (argc < 1 && !(f_statistic)) + usage(); + + /* no (valid) database as argument */ + if (dbv == NULL || *dbv == NULL) { + /* try to read database from enviroment */ + if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || + *path_fcodes == '\0') + /* use default database */ + dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); + else /* $LOCATE_PATH */ + dbv = colon(dbv, path_fcodes, _PATH_FCODES); + } + + if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ + for (ch = 0; ch <= UCHAR_MAX; ch++) + myctype[ch] = tolower(ch); + + /* foreach database ... */ + while((path_fcodes = *dbv) != NULL) { + dbv++; + + if (!strcmp(path_fcodes, "-")) + f_stdin = 1; + else + f_stdin = 0; + +#ifndef MMAP + f_mmap = 0; /* be paranoid */ +#endif + if (!f_mmap || f_stdin || f_statistic) + search_fopen(path_fcodes, argv); + else + search_mmap(path_fcodes, argv); + } + + if (f_silent) + print_matches(counter); + exit(0); +} + + +void +search_fopen(db, s) + char *db; /* database */ + char **s; /* search strings */ { - if (argc != 2) { - (void)fprintf(stderr, "usage: locate pattern\n"); - exit(1); + FILE *fp; +#ifdef DEBUG + long t0; +#endif + + /* can only read stdin once */ + if (f_stdin) { + fp = stdin; + if (*(s+1) != NULL) { + (void)fprintf(stderr, + "read database from stdin, use only"); + (void)fprintf(stderr, " `%s' as pattern\n", *s); + *(s+1) = NULL; + } + } + else if ((fp = fopen(path_fcodes, "r")) == NULL) + err(1, "`%s'", path_fcodes); + + /* count only chars or lines */ + if (f_statistic) { + statistic(fp, path_fcodes); + (void)fclose(fp); + return; } - if ((fp = fopen(_PATH_FCODES, "r")) == NULL) { - (void)fprintf(stderr, "locate: no database file %s.\n", - _PATH_FCODES); - exit(1); + + /* foreach search string ... */ + while(*s != NULL) { +#ifdef DEBUG + t0 = cputime(); +#endif + if (!f_stdin && + fseek(fp, (long)0, SEEK_SET) == -1) + err(1, "fseek to begin of ``%s''\n", path_fcodes); + + if (f_icase) + fastfind_icase(fp, *s, path_fcodes); + else + fastfind(fp, *s, path_fcodes); +#ifdef DEBUG + (void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0); +#endif + s++; + } + (void)fclose(fp); +} + +#ifdef MMAP +void +search_mmap(db, s) + char *db; /* database */ + char **s; /* search strings */ +{ + struct stat sb; + int fd; + caddr_t p; + off_t len; +#ifdef DEBUG + long t0; +#endif + if ((fd = open(path_fcodes, O_RDONLY)) == -1 || + fstat(fd, &sb) == -1) + err(1, "`%s'", path_fcodes); + len = sb.st_size; + + if ((p = mmap((caddr_t)0, (size_t)len, + PROT_READ, MAP_SHARED, + fd, (off_t)0)) == (caddr_t)-1) + err(1, "mmap ``%s''", path_fcodes); + + /* foreach search string ... */ + while (*s != NULL) { +#ifdef DEBUG + t0 = cputime(); +#endif + if (f_icase) + fastfind_mmap_icase(*s, p, (int)len, path_fcodes); + else + fastfind_mmap(*s, p, (int)len, path_fcodes); +#ifdef DEBUG + (void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0); +#endif + s++; } - while (*(++argv) != NUL) - fastfind(*argv); + + if (munmap(p, (size_t)len) == -1) + warn("munmap %s\n", path_fcodes); - return 0; + (void)close(fd); } +#endif /* MMAP */ -fastfind(pathpart) - char *pathpart; +#ifdef DEBUG +unsigned long +cputime () { - register char *p, *s; - register int c; - int count, found, globflag; - char *cutoff, *patend, *q, *patprep(); - char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; - - for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) - p[c] = getc(fp), s[c] = getc(fp); - - p = pathpart; - globflag = index(p, '*') || index(p, '?') || index(p, '['); - patend = patprep(p); - - found = 0; - for (c = getc(fp), count = 0; c != EOF; ) { - count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; - /* overlay old path */ - for (p = path + count; (c = getc(fp)) > SWITCH;) - if (c < PARITY) - *p++ = c; - else { /* bigrams are parity-marked */ - c &= PARITY - 1; - *p++ = bigram1[c], *p++ = bigram2[c]; - } - *p-- = NUL; - cutoff = (found ? path : path + count); - for (found = 0, s = p; s >= cutoff; s--) - if (*s == *patend) { /* fast first char check */ - for (p = patend - 1, q = s - 1; *p != NUL; - p--, q--) - if (*q != *p) - break; - if (*p == NUL) { /* fast match success */ - found = 1; - if (!globflag || !fnmatch(pathpart, path, 0)) - (void)printf("%s\n", path); - break; - } - } - } -} + struct rusage rus; -/* - * extract last glob-free subpattern in name for fast pre-match; prepend - * '\0' for backwards match; return end of new pattern - */ -static char globfree[100]; + getrusage(0, &rus); + return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); +} +#endif /* DEBUG */ -char * -patprep(name) - char *name; +void +usage () { - register char *endmark, *p, *subp; - - subp = globfree; - *subp++ = '\0'; - p = name + strlen(name) - 1; - /* skip trailing metacharacters (and [] ranges) */ - for (; p >= name; p--) - if (index("*?", *p) == 0) - break; - if (p < name) - p = name; - if (*p == ']') - for (p--; p >= name; p--) - if (*p == '[') { - p--; - break; - } - if (p < name) - p = name; - /* - * if pattern has only metacharacters, check every path (force '/' - * search) - */ - if ((p == name) && index("?*[]", *p) != 0) - *subp++ = '/'; - else { - for (endmark = p; p >= name; p--) - if (index("]*?", *p) != 0) - break; - for (++p; - (p <= endmark) && subp < (globfree + sizeof(globfree));) - *subp++ = *p++; - } - *subp = '\0'; - return(--subp); + (void)fprintf(stderr, "usage: locate [-Scims] [-l limit] "); + (void)fprintf(stderr, "[-d database] pattern ...\n\n"); + (void)fprintf(stderr, "default database: `%s' or $LOCATE_PATH\n", + _PATH_FCODES); + exit(1); } + + +/* load fastfind functions */ + +/* statistic */ +/* fastfind_mmap, fastfind_mmap_icase */ +#ifdef MMAP +#undef FF_MMAP +#undef FF_ICASE + +#define FF_MMAP +#include +#define FF_ICASE +#include +#endif /* MMAP */ + +/* fopen */ +/* fastfind, fastfind_icase */ +#undef FF_MMAP +#undef FF_ICASE +#include +#define FF_ICASE +#include diff --git a/usr.bin/locate/locate/locate.h b/usr.bin/locate/locate/locate.h index f613855188b..b169027cc3f 100644 --- a/usr.bin/locate/locate/locate.h +++ b/usr.bin/locate/locate/locate.h @@ -1,4 +1,4 @@ -/* $OpenBSD: locate.h,v 1.4 1996/08/30 12:54:18 michaels Exp $ */ +/* $OpenBSD: locate.h,v 1.5 1996/09/15 16:50:39 michaels Exp $ */ /* * Copyright (c) 1989, 1993 @@ -67,6 +67,3 @@ u_char myctype[UCHAR_MAX + 1]; #endif #define INTSIZE (sizeof(int)) - -#define NUL '\0' - diff --git a/usr.bin/locate/locate/locate.updatedb.8 b/usr.bin/locate/locate/locate.updatedb.8 index eeaa9c00621..b6aee735a72 100644 --- a/usr.bin/locate/locate/locate.updatedb.8 +++ b/usr.bin/locate/locate/locate.updatedb.8 @@ -1,3 +1,5 @@ +.\" $OpenBSD: locate.updatedb.8,v 1.2 1996/09/15 16:50:39 michaels Exp $ +.\" .\" Copyright (c) 1996 .\" Mike Pritchard . All rights reserved. .\" @@ -42,9 +44,15 @@ updates the database used by .Xr locate 1 . It is typically run once a week by the .Nm /etc/weekly script. +.Pp +The contents of the newly built database can be controlled by the +.Nm /etc/locate.rc file. .Sh FILES .Bl -tag -width /var/db/locate.database -compact -.It Pa /var/db/locate.database the actual database +.It Pa /var/db/locate.database +the actual database +.It Pa /etc/locate.rc +the configuration file .El .Sh SEE ALSO .Xr locate 1 diff --git a/usr.bin/locate/locate/mklocatedb.sh b/usr.bin/locate/locate/mklocatedb.sh index 4ea9c8e1575..4f8142bca10 100644 --- a/usr.bin/locate/locate/mklocatedb.sh +++ b/usr.bin/locate/locate/mklocatedb.sh @@ -1,14 +1,36 @@ #!/bin/sh # -# $OpenBSD: mklocatedb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $ +# $OpenBSD: mklocatedb.sh,v 1.2 1996/09/15 16:50:40 michaels Exp $ # -# (c) Wolfram Schneider, September 1995. Public domain. +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. # # mklocatedb - build locate database # # usage: mklocatedb [-presort] < filelist > database # -# $Id: mklocatedb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $ +# $Id: mklocatedb.sh,v 1.2 1996/09/15 16:50:40 michaels Exp $ # The directory containing locate subprograms diff --git a/usr.bin/locate/locate/pathnames.h b/usr.bin/locate/locate/pathnames.h index 5ede34ba9b4..c06e3342679 100644 --- a/usr.bin/locate/locate/pathnames.h +++ b/usr.bin/locate/locate/pathnames.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pathnames.h,v 1.3 1996/08/16 22:00:13 michaels Exp $ */ +/* $OpenBSD: pathnames.h,v 1.4 1996/09/15 16:50:40 michaels Exp $ */ /* * Copyright (c) 1989, 1993 diff --git a/usr.bin/locate/locate/updatedb.sh b/usr.bin/locate/locate/updatedb.sh index e303b213257..ebe9e27ad01 100644 --- a/usr.bin/locate/locate/updatedb.sh +++ b/usr.bin/locate/locate/updatedb.sh @@ -1,12 +1,34 @@ #!/bin/sh # -# $OpenBSD: updatedb.sh,v 1.1 1996/08/17 09:37:47 michaels Exp $ +# $OpenBSD: updatedb.sh,v 1.2 1996/09/15 16:50:41 michaels Exp $ # -# (c) Wolfram Schneider, Berlin. September 1995. Public domain. +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. # # updatedb - update locate database for local mounted filesystems # -# $Id: updatedb.sh,v 1.1 1996/08/17 09:37:47 michaels Exp $ +# $Id: updatedb.sh,v 1.2 1996/09/15 16:50:41 michaels Exp $ LOCATE_CONFIG="/etc/locate.rc" if [ -f "$LOCATE_CONFIG" -a -r "$LOCATE_CONFIG" ]; then diff --git a/usr.bin/locate/locate/util.c b/usr.bin/locate/locate/util.c new file mode 100644 index 00000000000..a3ec0e9f703 --- /dev/null +++ b/usr.bin/locate/locate/util.c @@ -0,0 +1,268 @@ +/* $OpenBSD */ + +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: util.c,v 1.1 1996/09/15 16:50:41 michaels Exp $ + */ + +#include +#include +#include +#include +#include + +#include "locate.h" + +char **colon __P((char **, char*, char*)); +char *patprep __P((char *)); +void print_matches __P((u_int)); +u_char *tolower_word __P((u_char *)); +int getwm __P((caddr_t)); +int getwf __P((FILE *)); +int check_bigram_char __P((int)); + +/* + * Validate bigram chars. If the test failed the database is corrupt + * or the database is obviously not a locate database. + */ +int +check_bigram_char(ch) + int ch; +{ + /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */ + if (ch == 0 || + (ch >= ASCII_MIN && ch <= ASCII_MAX)) + return(ch); + + (void)fprintf(stderr, "locate database header corrupt, bigram "); + (void)fprintf(stderr, "char outside 0, %d-%d: %d\n", + ASCII_MIN, ASCII_MAX, ch); + exit(1); +} + +/* split a colon separated string into a char vector + * + * "bla:foo" -> {"foo", "bla"} + * "bla:" -> {"foo", dot} + * "bla" -> {"bla"} + * "" -> do nothing + * + */ +char ** +colon(dbv, path, dot) + char **dbv; + char *path; + char *dot; /* default for single ':' */ +{ + int vlen, slen; + char *c, *ch, *p; + char **pv; + + if (dbv == NULL) { + if ((dbv = malloc(sizeof(char **))) == NULL) + err(1, "malloc"); + *dbv = NULL; + } + + /* empty string */ + if (*path == '\0') { + (void)fprintf(stderr, "empty database name, ignored\n"); + return(dbv); + } + + /* length of string vector */ + for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++); + + for (ch = c = path; ; ch++) { + if (*ch == ':' || + (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) { + /* single colon -> dot */ + if (ch == c) + p = dot; + else { + /* a string */ + slen = ch - c; + if ((p = malloc(sizeof(char) * (slen + 1))) + == NULL) + err(1, "malloc"); + bcopy(c, p, slen); + *(p + slen) = '\0'; + } + /* increase dbv with element p */ + if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2))) + == NULL) + err(1, "realloc"); + *(dbv + vlen) = p; + *(dbv + ++vlen) = NULL; + c = ch + 1; + } + if (*ch == '\0') + break; + } + return (dbv); +} + +void +print_matches(counter) + u_int counter; +{ + (void)printf("%d\n", counter); +} + + +/* + * extract last glob-free subpattern in name for fast pre-match; prepend + * '\0' for backwards match; return end of new pattern + */ +static char globfree[100]; + +char * +patprep(name) + char *name; +{ + register char *endmark, *p, *subp; + + subp = globfree; + *subp++ = '\0'; + p = name + strlen(name) - 1; + /* skip trailing metacharacters (and [] ranges) */ + for (; p >= name; p--) + if (index("*?", *p) == 0) + break; + if (p < name) + p = name; + if (*p == ']') + for (p--; p >= name; p--) + if (*p == '[') { + p--; + break; + } + if (p < name) + p = name; + /* + * if pattern has only metacharacters, check every path (force '/' + * search) + */ + if ((p == name) && index("?*[]", *p) != 0) + *subp++ = '/'; + else { + for (endmark = p; p >= name; p--) + if (index("]*?", *p) != 0) + break; + for (++p; + (p <= endmark) && subp < (globfree + sizeof(globfree));) + *subp++ = *p++; + } + *subp = '\0'; + return(--subp); +} + +/* tolower word */ +u_char * +tolower_word(word) + u_char *word; +{ + register u_char *p; + + for(p = word; *p != '\0'; p++) + *p = TOLOWER(*p); + + return(word); +} + + +/* + * Read integer from mmap pointer. + * Essential a simple ``return *(int *)p'' but avoid sigbus + * for integer alignment (SunOS 4.x, 5.x). + * + * Convert network byte order to host byte order if neccessary. + * So we can read on FreeBSD/i386 (little endian) a locate database + * which was built on SunOS/sparc (big endian). + */ + +int +getwm(p) + caddr_t p; +{ + static char buf[INTSIZE]; + register int i; + + for (i = 0; i < INTSIZE; i++) + buf[i] = *p++; + + i = *(int *)buf; + + if (i > MAXPATHLEN || i < -(MAXPATHLEN)) { + i = ntohl(i); + if (i > MAXPATHLEN || i < -(MAXPATHLEN)) { + (void)fprintf(stderr, + "integer out of +-MAXPATHLEN (%d): %d\n", + MAXPATHLEN, i); + exit(1); + } + } + return(i); +} + +/* + * Read integer from stream. + * + * Convert network byte order to host byte order if neccessary. + * So we can read on FreeBSD/i386 (little endian) a locate database + * which was built on SunOS/sparc (big endian). + */ + +int +getwf(fp) + FILE *fp; +{ + register int word; + + word = getw(fp); + + if (word > MAXPATHLEN || word < -(MAXPATHLEN)) { + word = ntohl(word); + if (word > MAXPATHLEN || word < -(MAXPATHLEN)) { + (void)fprintf(stderr, + "integer out of +-MAXPATHLEN (%d): %d\n", + MAXPATHLEN, word); + exit(1); + } + } + return(word); +} -- 2.20.1