-# $OpenBSD: Makefile,v 1.3 1996/08/16 22:00:09 michaels Exp $
+# $OpenBSD: Makefile,v 1.4 1996/09/15 16:50:33 michaels Exp $
#
# @(#)Makefile 8.1 (Berkeley) 6/6/93
-# $Id: Makefile,v 1.3 1996/08/16 22:00:09 michaels Exp $
+# $Id: Makefile,v 1.4 1996/09/15 16:50:33 michaels Exp $
SUBDIR= bigram code locate
-# $OpenBSD: Makefile.inc,v 1.1 1996/08/17 11:03:54 michaels Exp $
+# $OpenBSD: Makefile.inc,v 1.2 1996/09/15 16:50:34 michaels Exp $
#
-# $Id: Makefile.inc,v 1.1 1996/08/17 11:03:54 michaels Exp $
-
-LIBEXECDIR?= /usr/libexec
+# $Id: Makefile.inc,v 1.2 1996/09/15 16:50:34 michaels Exp $
+LIBEXECDIR?= /usr/libexec
-# $OpenBSD: Makefile,v 1.5 1996/08/17 10:51:21 michaels Exp $
-#
+# $OpenBSD: Makefile,v 1.6 1996/09/15 16:50:34 michaels Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= locate.bigram
-/* $OpenBSD: locate.bigram.c,v 1.4 1996/08/30 12:54:16 michaels Exp $ */
+/* $OpenBSD: locate.bigram.c,v 1.5 1996/09/15 16:50:35 michaels Exp $ */
+
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $Id: locate.bigram.c,v 1.5 1996/09/15 16:50:35 michaels Exp $
*/
#ifndef lint
#if 0
static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93";
#else
-static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.4 1996/08/30 12:54:16 michaels Exp $";
+static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.5 1996/09/15 16:50:35 michaels Exp $";
#endif
#endif /* not lint */
/*
- * bigram < text > bigrams
+ * bigram < sorted_file_names | sort -nr |
+ * awk 'NR <= 128 { printf $2 }' > bigrams
*
* List bigrams for 'updatedb' script.
* Use 'code' to encode a file using this output.
#include <stdio.h>
#include <sys/param.h> /* for MAXPATHLEN */
-#include <string.h> /* memchr */
#include "locate.h"
u_char buf1[MAXPATHLEN] = " ";
u_char buf2[MAXPATHLEN];
-unsigned int bigram[UCHAR_MAX][UCHAR_MAX];
+u_int bigram[UCHAR_MAX][UCHAR_MAX];
-int main(void)
+int
+main(void)
{
register u_char *cp;
register u_char *oldpath = buf1, *path = buf2;
- register int i, j;
+ register u_int i, j;
+
+ while (fgets(path, sizeof(buf2), stdin) != NULL) {
- while (fgets(path, sizeof(buf2), stdin) != NULL) {
- /* skip empty lines */
+ /* skip empty lines */
if (*path == '\n')
continue;
/* Squelch characters that would botch the decoding. */
- for (cp = path; *cp != NUL; cp++) {
+ for (cp = path; *cp != '\0'; cp++) {
/* chop newline */
if (*cp == '\n')
- *cp = NUL;
+ *cp = '\0';
/* range */
else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
*cp = '?';
}
+
/* skip longest common prefix */
- for (cp = path; *cp == *oldpath && *cp != NUL; cp++, oldpath++)
- ;
- /*
- * output post-residue bigrams only
- */
+ for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);
- /* check later for boundary */
- while ( *cp != NUL && *(cp+1) != NUL ) {
+ while (*cp != '\0' && *(cp+1) != '\0') {
bigram[*cp][*(cp+1)]++;
cp += 2;
}
- if ( path == buf1 ) { /* swap pointers */
+ /* swap pointers */
+ if (path == buf1) {
path = buf2;
oldpath = buf1;
- }
- else {
+ } else {
path = buf1;
oldpath = buf2;
}
- }
-
- /* output, boundary check */
+ }
+
+ /* output, (paranoid) boundary check */
for (i = ASCII_MIN; i <= ASCII_MAX; i++)
for (j = ASCII_MIN; j <= ASCII_MAX; j++)
if (bigram[i][j] != 0)
- fprintf(stdout, "%4d %c%c\n",
- bigram[i][j], i, j);
+ printf("%4u %c%c\n", bigram[i][j], i, j);
- return 0;
+ exit(0);
}
-# $OpenBSD: Makefile,v 1.5 1996/08/17 10:51:25 michaels Exp $
-#
+# $OpenBSD: Makefile,v 1.6 1996/09/15 16:50:36 michaels Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= locate.code
-CFLAGS+= -I${.CURDIR}/../locate
+CFLAGS+=-I${.CURDIR}/../locate
NOMAN= noman
BINDIR= ${LIBEXECDIR}
-/* $OpenBSD: locate.code.c,v 1.4 1996/08/30 12:54:17 michaels Exp $ */
+/* $OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ */
/*
* Copyright (c) 1989, 1993
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $Id: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $
*/
#ifndef lint
#if 0
static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93";
#else
-static char rcsid[] = "$OpenBSD: locate.code.c,v 1.4 1996/08/30 12:54:17 michaels Exp $";
+static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $";
#endif
#endif /* not lint */
u_char buf2[MAXPATHLEN];
char bigrams[BGBUFSIZE + 1] = { 0 };
-#define LOOKUP 1
+#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
+
#ifdef LOOKUP
#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)])
typedef u_char bg_t;
bg_t big[UCHAR_MAX][UCHAR_MAX];
-
#else
#define BGINDEX(x) bgindex(x)
typedef int bg_t;
-#endif
-
int bgindex __P((char *));
+#endif /* LOOKUP */
+
+
void usage __P((void));
extern int optind;
extern int optopt;
/* First copy bigram array to stdout. */
(void)fgets(bigrams, BGBUFSIZE + 1, fp);
+
if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE)
err(1, "stdout");
(void)fclose(fp);
for (j = 0; j < UCHAR_MAX; j++)
big[i][j] = (bg_t)-1;
- for (cp = bigrams, i = 0; *cp != NUL; i += 2, cp += 2)
+ for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
big[(int)*cp][(int)*(cp + 1)] = (bg_t)i;
-#endif
+#endif /* LOOKUP */
oldpath = buf1;
path = buf2;
oldcount = 0;
+
while (fgets(path, sizeof(buf2), stdin) != NULL) {
/* skip empty lines */
continue;
/* Squelch characters that would botch the decoding. */
- for (cp = path; *cp != NUL; cp++) {
+ for (cp = path; *cp != '\0'; cp++) {
/* chop newline */
if (*cp == '\n')
- *cp = NUL;
+ *cp = '\0';
/* range */
else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
*cp = '?';
}
/* Skip longest common prefix. */
- for (cp = path; *cp == *oldpath && *cp; cp++, oldpath++);
+ for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);
count = cp - path;
diffcount = count - oldcount + OFFSET;
if (putchar(diffcount) == EOF)
err(1, "stdout");
- while (*cp != NUL) {
- if (*(cp + 1) == NUL) {
+ while (*cp != '\0') {
+ if (*(cp + 1) == '\0') {
if (putchar(*cp) == EOF)
err(1, "stdout");
break;
/* Non-zero status if there were errors */
if (fflush(stdout) != 0 || ferror(stdout))
exit(1);
-
- return 0;
+ exit(0);
}
#ifndef LOOKUP
bg0 = bg[0];
bg1 = bg[1];
- for (p = bigrams; *p != NUL; p++)
+ for (p = bigrams; *p != NULL; p++)
if (*p++ == bg0 && *p == bg1)
break;
return (*p == NUL ? -1 : (--p - bigrams));
-# $OpenBSD: Makefile,v 1.6 1996/08/17 10:51:26 michaels Exp $
+# $OpenBSD: Makefile,v 1.7 1996/09/15 16:50:37 michaels Exp $
#
# @(#)Makefile 8.1 (Berkeley) 6/6/93
-# $Id: Makefile,v 1.6 1996/08/17 10:51:26 michaels Exp $
-
+# $Id: Makefile,v 1.7 1996/09/15 16:50:37 michaels Exp $
PROG= locate
-MAN= locate.1 locate.updatedb.8
+SRCS= util.c locate.c
+CFLAGS+= -I${.CURDIR} -DMMAP # -DDEBUG (print time) -O2 (10% faster)
+MAN= locate.1 locate.updatedb.8
SCRIPTS= updatedb mklocatedb concatdb
MLINKS+= locate.updatedb.8 updatedb.8
+
beforeinstall:
- -@for i in $(SCRIPTS); do \
- install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
- ${.CURDIR}/$$i.sh ${DESTDIR}${LIBEXECDIR}/locate.$$i; \
+ -@for i in ${SCRIPTS}; do \
+ install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
+ ${.CURDIR}/$$i.sh ${DESTDIR}${LIBEXECDIR}/locate.$$i;\
done
-.include "../Makefile.inc"
+# only /usr/src/etc/Makefile install files in /etc
+# ${INSTALL} -c -o root -g wheel -m 644 \
+# ${.CURDIR}/locate.rc ${DESTDIR}/etc
+
.include "../../Makefile.inc"
+.include "../Makefile.inc"
.include <bsd.prog.mk>
#!/bin/sh
#
-# $OpenBSD: concatdb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $
+# $OpenBSD: concatdb.sh,v 1.2 1996/09/15 16:50:37 michaels Exp $
#
-# (c) Wolfram Schneider, Berlin. September 1995. Public domain.
+# Copyright (c) September 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
#
# concatdb - concatenate locate databases
#
#
# Sequence of databases is important.
#
-# $Id: concatdb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $
+# $Id: concatdb.sh,v 1.2 1996/09/15 16:50:37 michaels Exp $
# The directory containing locate subprograms
: ${LIBEXECDIR=/usr/libexec}; export LIBEXECDIR
--- /dev/null
+/* $OpenBSD: fastfind.c,v 1.1 1996/09/15 16:50:38 michaels Exp $ */
+
+/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * James A. Woods.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: fastfind.c,v 1.1 1996/09/15 16:50:38 michaels Exp $
+ */
+
+#ifndef _LOCATE_STATISTIC_
+#define _LOCATE_STATISTIC_
+
+void
+statistic (fp, path_fcodes)
+ FILE *fp; /* open database */
+ char *path_fcodes; /* for error message */
+{
+ register int lines, chars, size, big;
+ register u_char *p, *s;
+ register int c;
+ int count;
+ u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+
+ for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
+ p[c] = check_bigram_char(getc(fp));
+ s[c] = check_bigram_char(getc(fp));
+ }
+
+ lines = chars = big = 0;
+ size = NBG + NBG;
+
+ for (c = getc(fp), count = 0; c != EOF; size++) {
+ if (c == SWITCH) {
+ count += getwf(fp) - OFFSET;
+ size += sizeof(int);
+ } else
+ count += c - OFFSET;
+
+ for (p = path + count; (c = getc(fp)) > SWITCH; size++)
+ if (c < PARITY)
+ p++;
+ else {
+ big++;
+ p += 2;
+ }
+
+ p++;
+ lines++;
+ chars += (p - path);
+ }
+
+ (void)printf("\nDatabase: %s\n", path_fcodes);
+ (void)printf("Compression: Front: %2.2f%%, ",
+ (float)(100 * (size + big)) / chars);
+ (void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
+ (void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars);
+ (void)printf("Filenames: %d, ", lines);
+ (void)printf("Chars: %d\n", chars);
+ (void)printf("Database size: %d, ", size);
+ (void)printf("Bigram chars: %d\n", big);
+
+}
+#endif /* _LOCATE_STATISTIC_ */
+
+
+void
+#ifdef FF_MMAP
+
+
+#ifdef FF_ICASE
+fastfind_mmap_icase
+#else
+fastfind_mmap
+#endif
+(pathpart, paddr, len, database)
+ char *pathpart; /* search string */
+ caddr_t paddr; /* mmap pointer */
+ int len; /* length of database */
+ char *database; /* for error message */
+
+
+#else /* MMAP */
+
+
+#ifdef FF_ICASE
+fastfind_icase
+#else /* !FF_ICASE */
+fastfind
+#endif /* FF_ICASE */
+
+(fp, pathpart, database)
+ FILE *fp; /* open database */
+ char *pathpart; /* search string */
+ char *database; /* for error message */
+
+
+#endif /* MMAP */
+
+{
+ register u_char *p, *s, *patend, *q, *foundchar;
+ register int c, cc;
+ int count, found, globflag;
+ u_char *cutoff;
+ u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+
+#ifdef FF_ICASE
+ /* use a lookup table for case insensitive search */
+ u_char table[UCHAR_MAX];
+
+ tolower_word(pathpart);
+#endif
+
+ /* init bigram table */
+#ifdef FF_MMAP
+ if (len < (2*NBG)) {
+ (void)fprintf(stderr, "database to small: %s\n", database);
+ exit(1);
+ }
+
+ for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
+ p[c] = check_bigram_char(*paddr++);
+ s[c] = check_bigram_char(*paddr++);
+ }
+#else
+ for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
+ p[c] = check_bigram_char(getc(fp));
+ s[c] = check_bigram_char(getc(fp));
+ }
+#endif
+
+ /* find optimal (last) char for searching */
+ p = pathpart;
+ globflag = index(p, '*') || index(p, '?') || index(p, '[');
+ patend = patprep(p);
+ cc = *patend;
+
+#ifdef FF_ICASE
+ /* set patend char to true */
+ table[TOLOWER(*patend)] = 1;
+ table[toupper(*patend)] = 1;
+#endif
+
+
+ /* main loop */
+ found = count = 0;
+ foundchar = 0;
+
+#ifdef FF_MMAP
+ for (c = (u_char)*paddr++; len-- > 0; ) {
+#else
+ for (c = getc(fp); c != EOF; ) {
+#endif
+
+ /* go forward or backward */
+ if (c == SWITCH) { /* big step, an integer */
+#ifdef FF_MMAP
+ count += getwm(paddr) - OFFSET;
+ len -= INTSIZE; paddr += INTSIZE;
+#else
+ count += getwf(fp) - OFFSET;
+#endif
+ } else { /* slow step, =< 14 chars */
+ count += c - OFFSET;
+ }
+
+ /* overlay old path */
+ p = path + count;
+ foundchar = p - 1;
+#ifdef FF_MMAP
+ for (; (c = (u_char)*paddr++) > SWITCH; len--)
+#else
+ for (; (c = getc(fp)) > SWITCH; )
+#endif
+
+ if (c < PARITY) {
+#ifdef FF_ICASE
+ if (table[c])
+#else
+ if (c == cc)
+#endif
+ foundchar = p;
+ *p++ = c;
+ }
+ else {
+ /* bigrams are parity-marked */
+ TO7BIT(c);
+
+#ifndef FF_ICASE
+ if (bigram1[c] == cc ||
+ bigram2[c] == cc)
+#else
+
+ if (table[bigram1[c]] ||
+ table[bigram2[c]])
+#endif
+ foundchar = p + 1;
+
+ *p++ = bigram1[c];
+ *p++ = bigram2[c];
+ }
+
+
+ if (found) { /* previous line matched */
+ cutoff = path;
+ *p-- = '\0';
+ foundchar = p;
+ } else if (foundchar >= path + count) { /* a char matched */
+ *p-- = '\0';
+ cutoff = path + count;
+ } else /* nothing to do */
+ continue;
+
+ found = 0;
+ for (s = foundchar; s >= cutoff; s--) {
+ if (*s == cc
+#ifdef FF_ICASE
+ || TOLOWER(*s) == cc
+#endif
+ ) { /* fast first char check */
+ for (p = patend - 1, q = s - 1; *p != '\0';
+ p--, q--)
+ if (*q != *p
+#ifdef FF_ICASE
+ && TOLOWER(*q) != *p
+#endif
+ )
+ break;
+ if (*p == '\0') { /* fast match success */
+ found = 1;
+ if (!globflag || !fnmatch(pathpart, path, 0)) {
+ if (f_silent)
+ counter++;
+ else if (f_limit) {
+ counter++;
+ if (f_limit >= counter)
+ (void)puts(path);
+ else {
+ (void)fprintf(stderr, "[show only %d lines]\n", counter - 1);
+ exit(0);
+ }
+ } else
+ (void)puts(path);
+ }
+ break;
+ }
+ }
+ }
+ }
+}
+.\" $OpenBSD: locate.1,v 1.4 1996/09/15 16:50:38 michaels Exp $
+.\"
+.\" Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" SUCH DAMAGE.
.\"
.\" @(#)locate.1 8.1 (Berkeley) 6/6/93
+.\" $Id: locate.1,v 1.4 1996/09/15 16:50:38 michaels Exp $
.\"
.Dd June 6, 1993
.Dt LOCATE 1
.Os BSD 4.4
.Sh NAME
.Nm locate
-.Nd find files
+.Nd find filenames quickly
.Sh SYNOPSIS
-.Ar locate
-pattern
+.Nm
+.Op Fl Scims
+.Op Fl l Ar limit
+.Op Fl d Ar database
+pattern ...
.Sh DESCRIPTION
.Nm Locate
searches a database for all pathnames which match the specified
.Ar pattern .
-The database is recomputed periodically, and contains the pathnames
+The database is recomputed periodically (usually weekly or daily),
+and contains the pathnames
of all files which are publicly accessible.
.Pp
Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``[''
.Pp
As a special case, a pattern containing no globbing characters (``foo'')
is matched as though it were ``*foo*''.
+
+The following options are available:
+.Bl -tag -width 10n indent
+.It Fl S
+Print some statistic about the database and exit.
+.It Fl c
+Suppress normal output; instead print a count of matching file names.
+.It Fl d Ar database
+Search in
+.Ar database
+instead the default file name database.
+Multiple
+.Fl d
+options are allowed. Each additional
+.Fl d
+option adds the specified database to the list
+of databases to be searched.
+
+.Ar database
+may be a colon-separated list of databases. A single colon is a reference
+to the default database.
+
+$ locate -d $HOME/lib/mydb: foo
+
+will first search string ``foo'' in
+.Pa $HOME/lib/mydb
+and then in
+.Pa /var/db/locate.database .
+
+$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo
+
+will first search string ``foo'' in
+.Pa $HOME/lib/mydb
+and then in
+.Pa /var/db/locate.database
+and then in
+.Pa /cdrom/locate.database .
+
+
+``$ locate -d db1 -d db2 -d db3 pattern'' is the same as
+
+``$ locate -d db1:db2:db3 pattern'' or
+
+``$ locate -d db1:db2 -d db3 pattern''.
+
+If
+.Ar -
+is given as the database name, standard input will be read instead.
+For example, you can compress your database
+and use:
+
+$ zcat database.gz | locate -d - pattern
+
+This might be useful on machines with a fast CPU and little RAM and slow
+I/O. Note: you can only use
+.Ar one
+pattern for stdin.
+.It Fl i
+Ignore case distinctions in both the pattern and the database.
+.It Fl l Ar number
+Limit output to
+.Ar number
+of file names and exit.
+.It Fl m
+Use
+.Xr mmap 2
+instead of the
+.Xr stdio 3
+library. This is the default behavior. Usually faster in most cases.
+.It Fl s
+Use the
+.Xr stdio 3
+library instead of
+.Xr mmap 2 .
.Sh FILES
.Bl -tag -width /usr/libexec/locate.updatedb -compact
.It Pa /var/db/locate.database
-The actual database
+locate database
.It Pa /usr/libexec/locate.updatedb
Script to update the locate database
+.It Pa /etc/weekly
+Script that usually starts the database rebuild
+.El
+.Sh ENVIRONMENT
+.Bl -tag -width LOCATE_PATH -compact
+.It Pa LOCATE_PATH
+path to the locate database if set and not empty, ignored if the
+.Fl d
+option was specified.
.El
.Sh SEE ALSO
.Xr find 1 ,
.%P pp. 8-10
.Re
.Sh BUGS
-.Nm Locate
+.Nm
may fail to list some files that are present, or may
to list files that have been removed from the system. This is because
locate only reports files that are present in the database, which is
typically only regenerated once a week by the
-.Nm /etc/weekly
+.Pa /etc/weekly
script. Use
.Xr find 1
to locate files that are of a more transitory nature.
+
+.Nm
+database was built by user
+.Dq nobody .
+.Xr find 1
+skip directories,
+which are not readable for user
+.Dq nobody ,
+group
+.Dq nobody ,
+or
+world. E.g. if your HOME directory ist not world-readable, all your
+files are
+.Ar not
+in the database.
+
+The
+.Nm
+database is not byte order independ. It is not possible
+to share the databases between machines with different byte order.
+The current
+.Nm
+implementation understand databases in host byte order or
+network byte order. So you can read on a FreeBSD/i386 machine
+(little endian)
+a locate database which was built on SunOS/sparc machine
+(big endian, net).
+
.Sh HISTORY
The
.Nm locate
command appears in
.Bx 4.4 .
+Many new features were
+added in
+.\".Fx 2.2 .
+FreeBSD 2.2. \"I assume
-/* $OpenBSD: locate.c,v 1.4 1996/08/30 12:54:18 michaels Exp $ */
+/* $OpenBSD: locate.c,v 1.5 1996/09/15 16:50:38 michaels Exp $ */
/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* James A. Woods.
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $Id: locate.c,v 1.5 1996/09/15 16:50:38 michaels Exp $
*/
#ifndef lint
static char copyright[] =
-"@(#) Copyright (c) 1989, 1993\n\
- The Regents of the University of California. All rights reserved.\n";
+"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\
+@(#) Copyright (c) 1989, 1993\n\
+ The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
#if 0
-static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
+static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
#else
-static char rcsid[] = "$OpenBSD: locate.c,v 1.4 1996/08/30 12:54:18 michaels Exp $";
+static char rcsid[] = "$OpenBSD: locate.c,v 1.5 1996/09/15 16:50:38 michaels Exp $";
#endif
#endif /* not lint */
*
* The codes are:
*
- * 0-28 likeliest differential counts + offset to make nonnegative
- * 30 switch code for out-of-range count to follow in next word
- * 128-255 bigram codes (128 most common, as determined by 'updatedb')
- * 32-127 single character (printable) ascii residue (ie, literal)
+ * 0-28 likeliest differential counts + offset to make nonnegative
+ * 30 switch code for out-of-range count to follow in next word
+ * 128-255 bigram codes (128 most common, as determined by 'updatedb')
+ * 32-127 single character (printable) ascii residue (ie, literal)
*
* A novel two-tiered string search technique is employed:
*
*/
#include <sys/param.h>
-
#include <fnmatch.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#ifdef MMAP
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <sys/mman.h>
+# include <fcntl.h>
+#endif
+#include <err.h>
+
+#ifdef sun
+#include <netinet/in.h> /* SunOS byteorder(3) htohl(3) */
+#ifndef __P
+#define __P(x) x
+#endif
+#endif
#include "locate.h"
#include "pathnames.h"
-FILE *fp;
+#ifdef DEBUG
+# include <sys/time.h>
+# include <sys/types.h>
+# include <sys/resource.h>
+#endif
+
+char *path_fcodes; /* locate database */
+int f_mmap; /* use mmap */
+int f_icase; /* ignore case */
+int f_stdin; /* read database from stdin */
+int f_statistic; /* print statistic */
+int f_silent; /* suppress output, show only count of matches */
+int f_limit; /* limit number of output lines, 0 == infinite */
+u_int counter; /* counter for matches [-c] */
+
+
+void usage __P((void));
+void statistic __P((FILE *, char *));
+void fastfind __P((FILE *, char *, char *));
+void fastfind_icase __P((FILE *, char *, char *));
+void fastfind_mmap __P((char *, caddr_t, int, char *));
+void fastfind_mmap_icase __P((char *, caddr_t, int, char *));
+void search_mmap __P((char *, char **));
+void search_fopen __P((char *, char **));
+unsigned long cputime __P((void));
+
+extern char **colon __P((char **, char*, char*));
+extern void print_matches __P((u_int));
+extern int getwm __P((caddr_t));
+extern int getwf __P((FILE *));
+extern u_char *tolower_word __P((u_char *));
+extern int check_bigram_char __P((int));
+extern char *patprep __P((char *));
+
+extern char *optarg;
+extern int optind;
+
int
main(argc, argv)
- int argc;
- char *argv[];
+ int argc;
+ char **argv;
+{
+ register int ch;
+ char **dbv = NULL;
+#ifdef MMAP
+ f_mmap = 1; /* mmap is default */
+#endif
+
+ while ((ch = getopt(argc, argv, "Scd:il:ms")) != EOF)
+ switch(ch) {
+ case 'S': /* statistic lines */
+ f_statistic = 1;
+ break;
+ case 'l': /* limit number of output lines, 0 == infinite */
+ f_limit = atoi(optarg);
+ break;
+ case 'd': /* database */
+ dbv = colon(dbv, optarg, _PATH_FCODES);
+ break;
+ case 'i': /* ignore case */
+ f_icase = 1;
+ break;
+ case 'm': /* mmap */
+#ifdef MMAP
+ f_mmap = 1;
+#else
+ (void)fprintf(stderr, "mmap(2) not implemented\n");
+#endif
+ break;
+ case 's': /* stdio lib */
+ f_mmap = 0;
+ break;
+ case 'c': /* suppress output, show only count of matches */
+ f_silent = 1;
+ break;
+ default:
+ usage();
+ }
+ argv += optind;
+ argc -= optind;
+
+ /* to few arguments */
+ if (argc < 1 && !(f_statistic))
+ usage();
+
+ /* no (valid) database as argument */
+ if (dbv == NULL || *dbv == NULL) {
+ /* try to read database from enviroment */
+ if ((path_fcodes = getenv("LOCATE_PATH")) == NULL ||
+ *path_fcodes == '\0')
+ /* use default database */
+ dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES);
+ else /* $LOCATE_PATH */
+ dbv = colon(dbv, path_fcodes, _PATH_FCODES);
+ }
+
+ if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */
+ for (ch = 0; ch <= UCHAR_MAX; ch++)
+ myctype[ch] = tolower(ch);
+
+ /* foreach database ... */
+ while((path_fcodes = *dbv) != NULL) {
+ dbv++;
+
+ if (!strcmp(path_fcodes, "-"))
+ f_stdin = 1;
+ else
+ f_stdin = 0;
+
+#ifndef MMAP
+ f_mmap = 0; /* be paranoid */
+#endif
+ if (!f_mmap || f_stdin || f_statistic)
+ search_fopen(path_fcodes, argv);
+ else
+ search_mmap(path_fcodes, argv);
+ }
+
+ if (f_silent)
+ print_matches(counter);
+ exit(0);
+}
+
+
+void
+search_fopen(db, s)
+ char *db; /* database */
+ char **s; /* search strings */
{
- if (argc != 2) {
- (void)fprintf(stderr, "usage: locate pattern\n");
- exit(1);
+ FILE *fp;
+#ifdef DEBUG
+ long t0;
+#endif
+
+ /* can only read stdin once */
+ if (f_stdin) {
+ fp = stdin;
+ if (*(s+1) != NULL) {
+ (void)fprintf(stderr,
+ "read database from stdin, use only");
+ (void)fprintf(stderr, " `%s' as pattern\n", *s);
+ *(s+1) = NULL;
+ }
+ }
+ else if ((fp = fopen(path_fcodes, "r")) == NULL)
+ err(1, "`%s'", path_fcodes);
+
+ /* count only chars or lines */
+ if (f_statistic) {
+ statistic(fp, path_fcodes);
+ (void)fclose(fp);
+ return;
}
- if ((fp = fopen(_PATH_FCODES, "r")) == NULL) {
- (void)fprintf(stderr, "locate: no database file %s.\n",
- _PATH_FCODES);
- exit(1);
+
+ /* foreach search string ... */
+ while(*s != NULL) {
+#ifdef DEBUG
+ t0 = cputime();
+#endif
+ if (!f_stdin &&
+ fseek(fp, (long)0, SEEK_SET) == -1)
+ err(1, "fseek to begin of ``%s''\n", path_fcodes);
+
+ if (f_icase)
+ fastfind_icase(fp, *s, path_fcodes);
+ else
+ fastfind(fp, *s, path_fcodes);
+#ifdef DEBUG
+ (void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
+#endif
+ s++;
+ }
+ (void)fclose(fp);
+}
+
+#ifdef MMAP
+void
+search_mmap(db, s)
+ char *db; /* database */
+ char **s; /* search strings */
+{
+ struct stat sb;
+ int fd;
+ caddr_t p;
+ off_t len;
+#ifdef DEBUG
+ long t0;
+#endif
+ if ((fd = open(path_fcodes, O_RDONLY)) == -1 ||
+ fstat(fd, &sb) == -1)
+ err(1, "`%s'", path_fcodes);
+ len = sb.st_size;
+
+ if ((p = mmap((caddr_t)0, (size_t)len,
+ PROT_READ, MAP_SHARED,
+ fd, (off_t)0)) == (caddr_t)-1)
+ err(1, "mmap ``%s''", path_fcodes);
+
+ /* foreach search string ... */
+ while (*s != NULL) {
+#ifdef DEBUG
+ t0 = cputime();
+#endif
+ if (f_icase)
+ fastfind_mmap_icase(*s, p, (int)len, path_fcodes);
+ else
+ fastfind_mmap(*s, p, (int)len, path_fcodes);
+#ifdef DEBUG
+ (void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
+#endif
+ s++;
}
- while (*(++argv) != NUL)
- fastfind(*argv);
+
+ if (munmap(p, (size_t)len) == -1)
+ warn("munmap %s\n", path_fcodes);
- return 0;
+ (void)close(fd);
}
+#endif /* MMAP */
-fastfind(pathpart)
- char *pathpart;
+#ifdef DEBUG
+unsigned long
+cputime ()
{
- register char *p, *s;
- register int c;
- int count, found, globflag;
- char *cutoff, *patend, *q, *patprep();
- char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
-
- for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
- p[c] = getc(fp), s[c] = getc(fp);
-
- p = pathpart;
- globflag = index(p, '*') || index(p, '?') || index(p, '[');
- patend = patprep(p);
-
- found = 0;
- for (c = getc(fp), count = 0; c != EOF; ) {
- count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
- /* overlay old path */
- for (p = path + count; (c = getc(fp)) > SWITCH;)
- if (c < PARITY)
- *p++ = c;
- else { /* bigrams are parity-marked */
- c &= PARITY - 1;
- *p++ = bigram1[c], *p++ = bigram2[c];
- }
- *p-- = NUL;
- cutoff = (found ? path : path + count);
- for (found = 0, s = p; s >= cutoff; s--)
- if (*s == *patend) { /* fast first char check */
- for (p = patend - 1, q = s - 1; *p != NUL;
- p--, q--)
- if (*q != *p)
- break;
- if (*p == NUL) { /* fast match success */
- found = 1;
- if (!globflag || !fnmatch(pathpart, path, 0))
- (void)printf("%s\n", path);
- break;
- }
- }
- }
-}
+ struct rusage rus;
-/*
- * extract last glob-free subpattern in name for fast pre-match; prepend
- * '\0' for backwards match; return end of new pattern
- */
-static char globfree[100];
+ getrusage(0, &rus);
+ return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000);
+}
+#endif /* DEBUG */
-char *
-patprep(name)
- char *name;
+void
+usage ()
{
- register char *endmark, *p, *subp;
-
- subp = globfree;
- *subp++ = '\0';
- p = name + strlen(name) - 1;
- /* skip trailing metacharacters (and [] ranges) */
- for (; p >= name; p--)
- if (index("*?", *p) == 0)
- break;
- if (p < name)
- p = name;
- if (*p == ']')
- for (p--; p >= name; p--)
- if (*p == '[') {
- p--;
- break;
- }
- if (p < name)
- p = name;
- /*
- * if pattern has only metacharacters, check every path (force '/'
- * search)
- */
- if ((p == name) && index("?*[]", *p) != 0)
- *subp++ = '/';
- else {
- for (endmark = p; p >= name; p--)
- if (index("]*?", *p) != 0)
- break;
- for (++p;
- (p <= endmark) && subp < (globfree + sizeof(globfree));)
- *subp++ = *p++;
- }
- *subp = '\0';
- return(--subp);
+ (void)fprintf(stderr, "usage: locate [-Scims] [-l limit] ");
+ (void)fprintf(stderr, "[-d database] pattern ...\n\n");
+ (void)fprintf(stderr, "default database: `%s' or $LOCATE_PATH\n",
+ _PATH_FCODES);
+ exit(1);
}
+
+
+/* load fastfind functions */
+
+/* statistic */
+/* fastfind_mmap, fastfind_mmap_icase */
+#ifdef MMAP
+#undef FF_MMAP
+#undef FF_ICASE
+
+#define FF_MMAP
+#include <fastfind.c>
+#define FF_ICASE
+#include <fastfind.c>
+#endif /* MMAP */
+
+/* fopen */
+/* fastfind, fastfind_icase */
+#undef FF_MMAP
+#undef FF_ICASE
+#include <fastfind.c>
+#define FF_ICASE
+#include <fastfind.c>
-/* $OpenBSD: locate.h,v 1.4 1996/08/30 12:54:18 michaels Exp $ */
+/* $OpenBSD: locate.h,v 1.5 1996/09/15 16:50:39 michaels Exp $ */
/*
* Copyright (c) 1989, 1993
#endif
#define INTSIZE (sizeof(int))
-
-#define NUL '\0'
-
+.\" $OpenBSD: locate.updatedb.8,v 1.2 1996/09/15 16:50:39 michaels Exp $
+.\"
.\" Copyright (c) 1996
.\" Mike Pritchard <mpp@FreeBSD.org>. All rights reserved.
.\"
.Xr locate 1 .
It is typically run once a week by the
.Nm /etc/weekly script.
+.Pp
+The contents of the newly built database can be controlled by the
+.Nm /etc/locate.rc file.
.Sh FILES
.Bl -tag -width /var/db/locate.database -compact
-.It Pa /var/db/locate.database the actual database
+.It Pa /var/db/locate.database
+the actual database
+.It Pa /etc/locate.rc
+the configuration file
.El
.Sh SEE ALSO
.Xr locate 1
#!/bin/sh
#
-# $OpenBSD: mklocatedb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $
+# $OpenBSD: mklocatedb.sh,v 1.2 1996/09/15 16:50:40 michaels Exp $
#
-# (c) Wolfram Schneider, September 1995. Public domain.
+# Copyright (c) September 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
#
# mklocatedb - build locate database
#
# usage: mklocatedb [-presort] < filelist > database
#
-# $Id: mklocatedb.sh,v 1.1 1996/08/17 09:37:46 michaels Exp $
+# $Id: mklocatedb.sh,v 1.2 1996/09/15 16:50:40 michaels Exp $
# The directory containing locate subprograms
-/* $OpenBSD: pathnames.h,v 1.3 1996/08/16 22:00:13 michaels Exp $ */
+/* $OpenBSD: pathnames.h,v 1.4 1996/09/15 16:50:40 michaels Exp $ */
/*
* Copyright (c) 1989, 1993
#!/bin/sh
#
-# $OpenBSD: updatedb.sh,v 1.1 1996/08/17 09:37:47 michaels Exp $
+# $OpenBSD: updatedb.sh,v 1.2 1996/09/15 16:50:41 michaels Exp $
#
-# (c) Wolfram Schneider, Berlin. September 1995. Public domain.
+# Copyright (c) September 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
#
# updatedb - update locate database for local mounted filesystems
#
-# $Id: updatedb.sh,v 1.1 1996/08/17 09:37:47 michaels Exp $
+# $Id: updatedb.sh,v 1.2 1996/09/15 16:50:41 michaels Exp $
LOCATE_CONFIG="/etc/locate.rc"
if [ -f "$LOCATE_CONFIG" -a -r "$LOCATE_CONFIG" ]; then
--- /dev/null
+/* $OpenBSD */
+
+/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * James A. Woods.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: util.c,v 1.1 1996/09/15 16:50:41 michaels Exp $
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <sys/param.h>
+#include <stdio.h>
+
+#include "locate.h"
+
+char **colon __P((char **, char*, char*));
+char *patprep __P((char *));
+void print_matches __P((u_int));
+u_char *tolower_word __P((u_char *));
+int getwm __P((caddr_t));
+int getwf __P((FILE *));
+int check_bigram_char __P((int));
+
+/*
+ * Validate bigram chars. If the test failed the database is corrupt
+ * or the database is obviously not a locate database.
+ */
+int
+check_bigram_char(ch)
+ int ch;
+{
+ /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
+ if (ch == 0 ||
+ (ch >= ASCII_MIN && ch <= ASCII_MAX))
+ return(ch);
+
+ (void)fprintf(stderr, "locate database header corrupt, bigram ");
+ (void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
+ ASCII_MIN, ASCII_MAX, ch);
+ exit(1);
+}
+
+/* split a colon separated string into a char vector
+ *
+ * "bla:foo" -> {"foo", "bla"}
+ * "bla:" -> {"foo", dot}
+ * "bla" -> {"bla"}
+ * "" -> do nothing
+ *
+ */
+char **
+colon(dbv, path, dot)
+ char **dbv;
+ char *path;
+ char *dot; /* default for single ':' */
+{
+ int vlen, slen;
+ char *c, *ch, *p;
+ char **pv;
+
+ if (dbv == NULL) {
+ if ((dbv = malloc(sizeof(char **))) == NULL)
+ err(1, "malloc");
+ *dbv = NULL;
+ }
+
+ /* empty string */
+ if (*path == '\0') {
+ (void)fprintf(stderr, "empty database name, ignored\n");
+ return(dbv);
+ }
+
+ /* length of string vector */
+ for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
+
+ for (ch = c = path; ; ch++) {
+ if (*ch == ':' ||
+ (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
+ /* single colon -> dot */
+ if (ch == c)
+ p = dot;
+ else {
+ /* a string */
+ slen = ch - c;
+ if ((p = malloc(sizeof(char) * (slen + 1)))
+ == NULL)
+ err(1, "malloc");
+ bcopy(c, p, slen);
+ *(p + slen) = '\0';
+ }
+ /* increase dbv with element p */
+ if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2)))
+ == NULL)
+ err(1, "realloc");
+ *(dbv + vlen) = p;
+ *(dbv + ++vlen) = NULL;
+ c = ch + 1;
+ }
+ if (*ch == '\0')
+ break;
+ }
+ return (dbv);
+}
+
+void
+print_matches(counter)
+ u_int counter;
+{
+ (void)printf("%d\n", counter);
+}
+
+
+/*
+ * extract last glob-free subpattern in name for fast pre-match; prepend
+ * '\0' for backwards match; return end of new pattern
+ */
+static char globfree[100];
+
+char *
+patprep(name)
+ char *name;
+{
+ register char *endmark, *p, *subp;
+
+ subp = globfree;
+ *subp++ = '\0';
+ p = name + strlen(name) - 1;
+ /* skip trailing metacharacters (and [] ranges) */
+ for (; p >= name; p--)
+ if (index("*?", *p) == 0)
+ break;
+ if (p < name)
+ p = name;
+ if (*p == ']')
+ for (p--; p >= name; p--)
+ if (*p == '[') {
+ p--;
+ break;
+ }
+ if (p < name)
+ p = name;
+ /*
+ * if pattern has only metacharacters, check every path (force '/'
+ * search)
+ */
+ if ((p == name) && index("?*[]", *p) != 0)
+ *subp++ = '/';
+ else {
+ for (endmark = p; p >= name; p--)
+ if (index("]*?", *p) != 0)
+ break;
+ for (++p;
+ (p <= endmark) && subp < (globfree + sizeof(globfree));)
+ *subp++ = *p++;
+ }
+ *subp = '\0';
+ return(--subp);
+}
+
+/* tolower word */
+u_char *
+tolower_word(word)
+ u_char *word;
+{
+ register u_char *p;
+
+ for(p = word; *p != '\0'; p++)
+ *p = TOLOWER(*p);
+
+ return(word);
+}
+
+
+/*
+ * Read integer from mmap pointer.
+ * Essential a simple ``return *(int *)p'' but avoid sigbus
+ * for integer alignment (SunOS 4.x, 5.x).
+ *
+ * Convert network byte order to host byte order if neccessary.
+ * So we can read on FreeBSD/i386 (little endian) a locate database
+ * which was built on SunOS/sparc (big endian).
+ */
+
+int
+getwm(p)
+ caddr_t p;
+{
+ static char buf[INTSIZE];
+ register int i;
+
+ for (i = 0; i < INTSIZE; i++)
+ buf[i] = *p++;
+
+ i = *(int *)buf;
+
+ if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+ i = ntohl(i);
+ if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+ (void)fprintf(stderr,
+ "integer out of +-MAXPATHLEN (%d): %d\n",
+ MAXPATHLEN, i);
+ exit(1);
+ }
+ }
+ return(i);
+}
+
+/*
+ * Read integer from stream.
+ *
+ * Convert network byte order to host byte order if neccessary.
+ * So we can read on FreeBSD/i386 (little endian) a locate database
+ * which was built on SunOS/sparc (big endian).
+ */
+
+int
+getwf(fp)
+ FILE *fp;
+{
+ register int word;
+
+ word = getw(fp);
+
+ if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+ word = ntohl(word);
+ if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+ (void)fprintf(stderr,
+ "integer out of +-MAXPATHLEN (%d): %d\n",
+ MAXPATHLEN, word);
+ exit(1);
+ }
+ }
+ return(word);
+}