Replace the Berkeley-DB based mandocdb(8) by an SQLite3-based version,
authorschwarze <schwarze@openbsd.org>
Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
committerschwarze <schwarze@openbsd.org>
Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
aiming for more flexible development and optimization options.
Kristaps started this during the summer 2012, i did some very heavy
bugfixing during t2k13 and finally, during the last few days,
got it to a state where it is ripe for in-tree development.
Beware, neither the user interfaces nor the database formats
are expected to be stable just yet.

Will not be installed or activated until further discussion.
No functional change to mandoc(1).

"As long as it remains off until we decide the cost, fine." deraadt@

usr.bin/mandoc/Makefile
usr.bin/mandoc/apropos.c
usr.bin/mandoc/apropos_db.c [deleted file]
usr.bin/mandoc/apropos_db.h [deleted file]
usr.bin/mandoc/mandocdb.c
usr.bin/mandoc/mandocdb.h [deleted file]
usr.bin/mandoc/mansearch.c [new file with mode: 0644]
usr.bin/mandoc/mansearch.h [new file with mode: 0644]

index 0c0c513..6f54636 100644 (file)
@@ -1,9 +1,10 @@
-#      $OpenBSD: Makefile,v 1.71 2013/10/06 23:59:59 schwarze Exp $
+#      $OpenBSD: Makefile,v 1.72 2013/12/31 00:40:19 schwarze Exp $
 
 .include <bsd.own.mk>
 
-CFLAGS+=-DVERSION=\"1.12.2\"
-CFLAGS+=-W -Wall -Wstrict-prototypes -Wno-unused-parameter
+CFLAGS  += -DVERSION=\"1.13.0\"
+CFLAGS  += -W -Wall -Wstrict-prototypes -Wno-unused-parameter
+LDFLAGS += -lsqlite3
 
 SRCS=  roff.c tbl.c tbl_opts.c tbl_layout.c tbl_data.c eqn.c mandoc.c read.c
 SRCS+= mdoc_macro.c mdoc.c mdoc_hash.c \
@@ -14,7 +15,7 @@ SRCS+=        main.c mdoc_term.c chars.c term.c tree.c man_term.c eqn_term.c
 SRCS+= mdoc_man.c
 SRCS+= html.c mdoc_html.c man_html.c out.c eqn_html.c
 SRCS+= term_ps.c term_ascii.c tbl_term.c tbl_html.c
-SRCS+= manpath.c mandocdb.c apropos_db.c apropos.c
+SRCS+= manpath.c mandocdb.c mansearch.c apropos.c
 
 PROG=  mandoc
 
index 3b58c06..d9cf3d1 100644 (file)
@@ -1,7 +1,6 @@
-/*     $Id: apropos.c,v 1.17 2013/07/12 11:01:42 schwarze Exp $ */
+/*     $Id: apropos.c,v 1.18 2013/12/31 00:40:19 schwarze Exp $ */
 /*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
 
 #include <assert.h>
 #include <getopt.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
-#include "apropos_db.h"
-#include "mandoc.h"
 #include "manpath.h"
-
-static int      cmp(const void *, const void *);
-static void     list(struct res *, size_t, void *);
-
-static char    *progname;
+#include "mansearch.h"
 
 int
 apropos(int argc, char *argv[])
 {
-       int              ch, rc, whatis;
-       struct res      *res;
+       int              ch, whatis;
+       struct mansearch search;
+       size_t           i, sz;
+       struct manpage  *res;
        struct manpaths  paths;
-       size_t           terms, ressz;
-       struct opts      opts;
-       struct expr     *e;
        char            *defpaths, *auxpaths;
        char            *conf_file;
+       char            *progname;
        extern char     *optarg;
        extern int       optind;
 
@@ -55,13 +50,10 @@ apropos(int argc, char *argv[])
        whatis = (0 == strncmp(progname, "whatis", 6));
 
        memset(&paths, 0, sizeof(struct manpaths));
-       memset(&opts, 0, sizeof(struct opts));
+       memset(&search, 0, sizeof(struct mansearch));
 
-       ressz = 0;
-       res = NULL;
        auxpaths = defpaths = NULL;
        conf_file = NULL;
-       e = NULL;
 
        while (-1 != (ch = getopt(argc, argv, "C:M:m:S:s:")))
                switch (ch) {
@@ -75,10 +67,10 @@ apropos(int argc, char *argv[])
                        auxpaths = optarg;
                        break;
                case ('S'):
-                       opts.arch = optarg;
+                       search.arch = optarg;
                        break;
                case ('s'):
-                       opts.cat = optarg;
+                       search.sec = optarg;
                        break;
                default:
                        goto usage;
@@ -90,64 +82,28 @@ apropos(int argc, char *argv[])
        if (0 == argc)
                goto usage;
 
-       rc = 0;
+       search.deftype = whatis ? TYPE_Nm : TYPE_Nm | TYPE_Nd;
+       search.flags = whatis ? MANSEARCH_WHATIS : 0;
 
        manpath_parse(&paths, conf_file, defpaths, auxpaths);
+       ch = mansearch(&search, &paths, argc, argv, &res, &sz);
+       manpath_free(&paths);
 
-       e = whatis ? termcomp(argc, argv, &terms) :
-                    exprcomp(argc, argv, &terms);
-               
-       if (NULL == e) {
-               fprintf(stderr, "%s: Bad expression\n", progname);
-               goto out;
-       }
-
-       rc = apropos_search
-               (paths.sz, paths.paths, &opts, 
-                e, terms, NULL, &ressz, &res, list);
+       if (0 == ch)
+               goto usage;
 
-       if (0 == rc) {
-               fprintf(stderr, "%s: Bad database\n", progname);
-               goto out;
+       for (i = 0; i < sz; i++) {
+               printf("%s - %s\n", res[i].names, res[i].desc);
+               free(res[i].file);
+               free(res[i].names);
+               free(res[i].desc);
        }
 
-out:
-       manpath_free(&paths);
-       resfree(res, ressz);
-       exprfree(e);
-       return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
-
+       free(res);
+       return(sz ? EXIT_SUCCESS : EXIT_FAILURE);
 usage:
        fprintf(stderr, "usage: %s [-C file] [-M path] [-m path] "
                        "[-S arch] [-s section]%s ...\n", progname,
                        whatis ? " name" : "\n               expression");
        return(EXIT_FAILURE);
 }
-
-/* ARGSUSED */
-static void
-list(struct res *res, size_t sz, void *arg)
-{
-       size_t           i;
-
-       qsort(res, sz, sizeof(struct res), cmp);
-
-       for (i = 0; i < sz; i++) {
-               if ( ! res[i].matched)
-                       continue;
-               printf("%s(%s%s%s) - %.70s\n",
-                               res[i].title,
-                               res[i].cat,
-                               *res[i].arch ? "/" : "",
-                               *res[i].arch ? res[i].arch : "",
-                               res[i].desc);
-       }
-}
-
-static int
-cmp(const void *p1, const void *p2)
-{
-
-       return(strcasecmp(((const struct res *)p1)->title,
-                               ((const struct res *)p2)->title));
-}
diff --git a/usr.bin/mandoc/apropos_db.c b/usr.bin/mandoc/apropos_db.c
deleted file mode 100644 (file)
index 6c2c0c6..0000000
+++ /dev/null
@@ -1,866 +0,0 @@
-/*     $Id: apropos_db.c,v 1.19 2013/10/05 21:17:29 schwarze Exp $ */
-/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#include <sys/param.h>
-#include <sys/types.h>
-
-#include <assert.h>
-#include <fcntl.h>
-#include <regex.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <db.h>
-
-#include "mandocdb.h"
-#include "apropos_db.h"
-#include "mandoc.h"
-
-#define        RESFREE(_x) \
-       do { \
-               free((_x)->file); \
-               free((_x)->cat); \
-               free((_x)->title); \
-               free((_x)->arch); \
-               free((_x)->desc); \
-               free((_x)->matches); \
-       } while (/*CONSTCOND*/0)
-
-struct expr {
-       int              regex; /* is regex? */
-       int              index; /* index in match array */
-       uint64_t         mask; /* type-mask */
-       int              and; /* is rhs of logical AND? */
-       char            *v; /* search value */
-       regex_t          re; /* compiled re, if regex */
-       struct expr     *next; /* next in sequence */
-       struct expr     *subexpr;
-};
-
-struct type {
-       uint64_t         mask;
-       const char      *name;
-};
-
-struct rectree {
-       struct res      *node; /* record array for dir tree */
-       int              len; /* length of record array */
-};
-
-static const struct type types[] = {
-       { TYPE_An, "An" },
-       { TYPE_Ar, "Ar" },
-       { TYPE_At, "At" },
-       { TYPE_Bsx, "Bsx" },
-       { TYPE_Bx, "Bx" },
-       { TYPE_Cd, "Cd" },
-       { TYPE_Cm, "Cm" },
-       { TYPE_Dv, "Dv" },
-       { TYPE_Dx, "Dx" },
-       { TYPE_Em, "Em" },
-       { TYPE_Er, "Er" },
-       { TYPE_Ev, "Ev" },
-       { TYPE_Fa, "Fa" },
-       { TYPE_Fl, "Fl" },
-       { TYPE_Fn, "Fn" },
-       { TYPE_Fn, "Fo" },
-       { TYPE_Ft, "Ft" },
-       { TYPE_Fx, "Fx" },
-       { TYPE_Ic, "Ic" },
-       { TYPE_In, "In" },
-       { TYPE_Lb, "Lb" },
-       { TYPE_Li, "Li" },
-       { TYPE_Lk, "Lk" },
-       { TYPE_Ms, "Ms" },
-       { TYPE_Mt, "Mt" },
-       { TYPE_Nd, "Nd" },
-       { TYPE_Nm, "Nm" },
-       { TYPE_Nx, "Nx" },
-       { TYPE_Ox, "Ox" },
-       { TYPE_Pa, "Pa" },
-       { TYPE_Rs, "Rs" },
-       { TYPE_Sh, "Sh" },
-       { TYPE_Ss, "Ss" },
-       { TYPE_St, "St" },
-       { TYPE_Sy, "Sy" },
-       { TYPE_Tn, "Tn" },
-       { TYPE_Va, "Va" },
-       { TYPE_Va, "Vt" },
-       { TYPE_Xr, "Xr" },
-       { UINT64_MAX, "any" },
-       { 0, NULL }
-};
-
-static DB      *btree_open(void);
-static int      btree_read(const DBT *, const DBT *,
-                       const struct mchars *,
-                       uint64_t *, recno_t *, char **);
-static int      expreval(const struct expr *, int *);
-static void     exprexec(const struct expr *,
-                       const char *, uint64_t, struct res *);
-static int      exprmark(const struct expr *,
-                       const char *, uint64_t, int *);
-static struct expr *exprexpr(int, char *[], int *, int *, size_t *);
-static struct expr *exprterm(char *, int);
-static DB      *index_open(void);
-static int      index_read(const DBT *, const DBT *, int,
-                       const struct mchars *, struct res *);
-static void     norm_string(const char *,
-                       const struct mchars *, char **);
-static size_t   norm_utf8(unsigned int, char[7]);
-static int      single_search(struct rectree *, const struct opts *,
-                       const struct expr *, size_t terms,
-                       struct mchars *, int);
-
-/*
- * Open the keyword mandoc-db database.
- */
-static DB *
-btree_open(void)
-{
-       BTREEINFO        info;
-       DB              *db;
-
-       memset(&info, 0, sizeof(BTREEINFO));
-       info.lorder = 4321;
-       info.flags = R_DUP;
-
-       db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
-       if (NULL != db)
-               return(db);
-
-       return(NULL);
-}
-
-/*
- * Read a keyword from the database and normalise it.
- * Return 0 if the database is insane, else 1.
- */
-static int
-btree_read(const DBT *k, const DBT *v, const struct mchars *mc,
-               uint64_t *mask, recno_t *rec, char **buf)
-{
-       uint64_t         vbuf[2];
-
-       /* Are our sizes sane? */
-       if (k->size < 2 || sizeof(vbuf) != v->size)
-               return(0);
-
-       /* Is our string nil-terminated? */
-       if ('\0' != ((const char *)k->data)[(int)k->size - 1])
-               return(0);
-
-       norm_string((const char *)k->data, mc, buf);
-       memcpy(vbuf, v->data, v->size);
-       *mask = betoh64(vbuf[0]);
-       *rec  = betoh64(vbuf[1]);
-       return(1);
-}
-
-/*
- * Take a Unicode codepoint and produce its UTF-8 encoding.
- * This isn't the best way to do this, but it works.
- * The magic numbers are from the UTF-8 packaging.
- * They're not as scary as they seem: read the UTF-8 spec for details.
- */
-static size_t
-norm_utf8(unsigned int cp, char out[7])
-{
-       int              rc;
-
-       rc = 0;
-
-       if (cp <= 0x0000007F) {
-               rc = 1;
-               out[0] = (char)cp;
-       } else if (cp <= 0x000007FF) {
-               rc = 2;
-               out[0] = (cp >> 6  & 31) | 192;
-               out[1] = (cp       & 63) | 128;
-       } else if (cp <= 0x0000FFFF) {
-               rc = 3;
-               out[0] = (cp >> 12 & 15) | 224;
-               out[1] = (cp >> 6  & 63) | 128;
-               out[2] = (cp       & 63) | 128;
-       } else if (cp <= 0x001FFFFF) {
-               rc = 4;
-               out[0] = (cp >> 18 & 7) | 240;
-               out[1] = (cp >> 12 & 63) | 128;
-               out[2] = (cp >> 6  & 63) | 128;
-               out[3] = (cp       & 63) | 128;
-       } else if (cp <= 0x03FFFFFF) {
-               rc = 5;
-               out[0] = (cp >> 24 & 3) | 248;
-               out[1] = (cp >> 18 & 63) | 128;
-               out[2] = (cp >> 12 & 63) | 128;
-               out[3] = (cp >> 6  & 63) | 128;
-               out[4] = (cp       & 63) | 128;
-       } else if (cp <= 0x7FFFFFFF) {
-               rc = 6;
-               out[0] = (cp >> 30 & 1) | 252;
-               out[1] = (cp >> 24 & 63) | 128;
-               out[2] = (cp >> 18 & 63) | 128;
-               out[3] = (cp >> 12 & 63) | 128;
-               out[4] = (cp >> 6  & 63) | 128;
-               out[5] = (cp       & 63) | 128;
-       } else
-               return(0);
-
-       out[rc] = '\0';
-       return((size_t)rc);
-}
-
-/*
- * Normalise strings from the index and database.
- * These strings are escaped as defined by mandoc_char(7) along with
- * other goop in mandoc.h (e.g., soft hyphens).
- * This function normalises these into a nice UTF-8 string.
- * Returns 0 if the database is fucked.
- */
-static void
-norm_string(const char *val, const struct mchars *mc, char **buf)
-{
-       size_t            sz, bsz;
-       char              utfbuf[7];
-       const char       *seq, *cpp;
-       int               len, u, pos;
-       enum mandoc_esc   esc;
-       static const char res[] = { '\\', '\t',
-                               ASCII_NBRSP, ASCII_HYPH, '\0' };
-
-       /* Pre-allocate by the length of the input */
-
-       bsz = strlen(val) + 1;
-       *buf = mandoc_realloc(*buf, bsz);
-       pos = 0;
-
-       while ('\0' != *val) {
-               /*
-                * Halt on the first escape sequence.
-                * This also halts on the end of string, in which case
-                * we just copy, fallthrough, and exit the loop.
-                */
-               if ((sz = strcspn(val, res)) > 0) {
-                       memcpy(&(*buf)[pos], val, sz);
-                       pos += (int)sz;
-                       val += (int)sz;
-               }
-
-               if (ASCII_HYPH == *val) {
-                       (*buf)[pos++] = '-';
-                       val++;
-                       continue;
-               } else if ('\t' == *val || ASCII_NBRSP == *val) {
-                       (*buf)[pos++] = ' ';
-                       val++;
-                       continue;
-               } else if ('\\' != *val)
-                       break;
-
-               /* Read past the slash. */
-
-               val++;
-               u = 0;
-
-               /*
-                * Parse the escape sequence and see if it's a
-                * predefined character or special character.
-                */
-
-               esc = mandoc_escape(&val, &seq, &len);
-               if (ESCAPE_ERROR == esc)
-                       break;
-
-               /*
-                * XXX - this just does UTF-8, but we need to know
-                * beforehand whether we should do text substitution.
-                */
-
-               switch (esc) {
-               case (ESCAPE_SPECIAL):
-                       if (0 != (u = mchars_spec2cp(mc, seq, len)))
-                               break;
-                       /* FALLTHROUGH */
-               default:
-                       continue;
-               }
-
-               /*
-                * If we have a Unicode codepoint, try to convert that
-                * to a UTF-8 byte string.
-                */
-
-               cpp = utfbuf;
-               if (0 == (sz = norm_utf8(u, utfbuf)))
-                       continue;
-
-               /* Copy the rendered glyph into the stream. */
-
-               sz = strlen(cpp);
-               bsz += sz;
-
-               *buf = mandoc_realloc(*buf, bsz);
-
-               memcpy(&(*buf)[pos], cpp, sz);
-               pos += (int)sz;
-       }
-
-       (*buf)[pos] = '\0';
-}
-
-/*
- * Open the filename-index mandoc-db database.
- * Returns NULL if opening failed.
- */
-static DB *
-index_open(void)
-{
-       DB              *db;
-
-       db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
-       if (NULL != db)
-               return(db);
-
-       return(NULL);
-}
-
-/*
- * Safely unpack from an index file record into the structure.
- * Returns 1 if an entry was unpacked, 0 if the database is insane.
- */
-static int
-index_read(const DBT *key, const DBT *val, int index,
-               const struct mchars *mc, struct res *rec)
-{
-       size_t           left;
-       char            *np, *cp;
-       char             type;
-
-#define        INDEX_BREAD(_dst) \
-       do { \
-               if (NULL == (np = memchr(cp, '\0', left))) \
-                       return(0); \
-               norm_string(cp, mc, &(_dst)); \
-               left -= (np - cp) + 1; \
-               cp = np + 1; \
-       } while (/* CONSTCOND */ 0)
-
-       if (0 == (left = val->size))
-               return(0);
-
-       cp = val->data;
-       assert(sizeof(recno_t) == key->size);
-       memcpy(&rec->rec, key->data, key->size);
-       rec->volume = index;
-
-       if ('d' == (type = *cp++))
-               rec->type = RESTYPE_MDOC;
-       else if ('a' == type)
-               rec->type = RESTYPE_MAN;
-       else if ('c' == type)
-               rec->type = RESTYPE_CAT;
-       else
-               return(0);
-
-       left--;
-       INDEX_BREAD(rec->file);
-       INDEX_BREAD(rec->cat);
-       INDEX_BREAD(rec->title);
-       INDEX_BREAD(rec->arch);
-       INDEX_BREAD(rec->desc);
-       return(1);
-}
-
-/*
- * Search mandocdb databases in paths for expression "expr".
- * Filter out by "opts".
- * Call "res" with the results, which may be zero.
- * Return 0 if there was a database error, else return 1.
- */
-int
-apropos_search(int pathsz, char **paths, const struct opts *opts,
-               const struct expr *expr, size_t terms, void *arg,
-               size_t *sz, struct res **resp,
-               void (*res)(struct res *, size_t, void *))
-{
-       struct rectree   tree;
-       struct mchars   *mc;
-       int              i;
-
-       memset(&tree, 0, sizeof(struct rectree));
-
-       mc = mchars_alloc();
-       *sz = 0;
-       *resp = NULL;
-
-       /*
-        * Main loop.  Change into the directory containing manpage
-        * databases.  Run our expession over each database in the set.
-        */
-
-       for (i = 0; i < pathsz; i++) {
-               assert('/' == paths[i][0]);
-               if (chdir(paths[i]))
-                       continue;
-               if (single_search(&tree, opts, expr, terms, mc, i))
-                       continue;
-
-               resfree(tree.node, tree.len);
-               mchars_free(mc);
-               return(0);
-       }
-
-       (*res)(tree.node, tree.len, arg);
-       *sz = tree.len;
-       *resp = tree.node;
-       mchars_free(mc);
-       return(1);
-}
-
-static int
-single_search(struct rectree *tree, const struct opts *opts,
-               const struct expr *expr, size_t terms,
-               struct mchars *mc, int vol)
-{
-       int              root, leaf, ch;
-       DBT              key, val;
-       DB              *btree, *idx;
-       char            *buf;
-       struct res      *rs;
-       struct res       r;
-       uint64_t         mask;
-       recno_t          rec;
-
-       root    = -1;
-       leaf    = -1;
-       btree   = NULL;
-       idx     = NULL;
-       buf     = NULL;
-       rs      = tree->node;
-
-       memset(&r, 0, sizeof(struct res));
-
-       if (NULL == (btree = btree_open()))
-               return(1);
-
-       if (NULL == (idx = index_open())) {
-               (*btree->close)(btree);
-               return(1);
-       }
-
-       while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
-               if ( ! btree_read(&key, &val, mc, &mask, &rec, &buf))
-                       break;
-
-               /*
-                * See if this keyword record matches any of the
-                * expressions we have stored.
-                */
-               if ( ! exprmark(expr, buf, mask, NULL))
-                       continue;
-
-               /*
-                * O(log n) scan for prior records.  Since a record
-                * number is unbounded, this has decent performance over
-                * a complex hash function.
-                */
-
-               for (leaf = root; leaf >= 0; )
-                       if (rec > rs[leaf].rec &&
-                                       rs[leaf].rhs >= 0)
-                               leaf = rs[leaf].rhs;
-                       else if (rec < rs[leaf].rec &&
-                                       rs[leaf].lhs >= 0)
-                               leaf = rs[leaf].lhs;
-                       else
-                               break;
-
-               /*
-                * If we find a record, see if it has already evaluated
-                * to true.  If it has, great, just keep going.  If not,
-                * try to evaluate it now and continue anyway.
-                */
-
-               if (leaf >= 0 && rs[leaf].rec == rec) {
-                       if (0 == rs[leaf].matched)
-                               exprexec(expr, buf, mask, &rs[leaf]);
-                       continue;
-               }
-
-               /*
-                * We have a new file to examine.
-                * Extract the manpage's metadata from the index
-                * database, then begin partial evaluation.
-                */
-
-               key.data = &rec;
-               key.size = sizeof(recno_t);
-
-               if (0 != (*idx->get)(idx, &key, &val, 0))
-                       break;
-
-               r.lhs = r.rhs = -1;
-               if ( ! index_read(&key, &val, vol, mc, &r))
-                       break;
-
-               /* XXX: this should be elsewhere, I guess? */
-
-               if (opts->cat && strcasecmp(opts->cat, r.cat))
-                       continue;
-
-               if (opts->arch && *r.arch)
-                       if (strcasecmp(opts->arch, r.arch))
-                               continue;
-
-               tree->node = rs = mandoc_realloc
-                       (rs, (tree->len + 1) * sizeof(struct res));
-
-               memcpy(&rs[tree->len], &r, sizeof(struct res));
-               memset(&r, 0, sizeof(struct res));
-               rs[tree->len].matches =
-                       mandoc_calloc(terms, sizeof(int));
-
-               exprexec(expr, buf, mask, &rs[tree->len]);
-
-               /* Append to our tree. */
-
-               if (leaf >= 0) {
-                       if (rec > rs[leaf].rec)
-                               rs[leaf].rhs = tree->len;
-                       else
-                               rs[leaf].lhs = tree->len;
-               } else
-                       root = tree->len;
-
-               tree->len++;
-       }
-
-       (*btree->close)(btree);
-       (*idx->close)(idx);
-
-       free(buf);
-       RESFREE(&r);
-       return(1 == ch);
-}
-
-void
-resfree(struct res *rec, size_t sz)
-{
-       size_t           i;
-
-       for (i = 0; i < sz; i++)
-               RESFREE(&rec[i]);
-       free(rec);
-}
-
-/*
- * Compile a list of straight-up terms.
- * The arguments are re-written into ~[[:<:]]term[[:>:]], or "term"
- * surrounded by word boundaries, then pumped through exprterm().
- * Terms are case-insensitive.
- * This emulates whatis(1) behaviour.
- */
-struct expr *
-termcomp(int argc, char *argv[], size_t *tt)
-{
-       char            *buf;
-       int              pos;
-       struct expr     *e, *next;
-       size_t           sz;
-
-       buf = NULL;
-       e = NULL;
-       *tt = 0;
-
-       for (pos = argc - 1; pos >= 0; pos--) {
-               sz = strlen(argv[pos]) + 18;
-               buf = mandoc_realloc(buf, sz);
-               strlcpy(buf, "Nm~[[:<:]]", sz);
-               strlcat(buf, argv[pos], sz);
-               strlcat(buf, "[[:>:]]", sz);
-               if (NULL == (next = exprterm(buf, 0))) {
-                       free(buf);
-                       exprfree(e);
-                       return(NULL);
-               }
-               next->next = e;
-               e = next;
-               (*tt)++;
-       }
-
-       free(buf);
-       return(e);
-}
-
-/*
- * Compile a sequence of logical expressions.
- * See apropos.1 for a grammar of this sequence.
- */
-struct expr *
-exprcomp(int argc, char *argv[], size_t *tt)
-{
-       int              pos, lvl;
-       struct expr     *e;
-
-       pos = lvl = 0;
-       *tt = 0;
-
-       e = exprexpr(argc, argv, &pos, &lvl, tt);
-
-       if (0 == lvl && pos >= argc)
-               return(e);
-
-       exprfree(e);
-       return(NULL);
-}
-
-/*
- * Compile an array of tokens into an expression.
- * An informal expression grammar is defined in apropos(1).
- * Return NULL if we fail doing so.  All memory will be cleaned up.
- * Return the root of the expression sequence if alright.
- */
-static struct expr *
-exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
-{
-       struct expr     *e, *first, *next;
-       int              log;
-
-       first = next = NULL;
-
-       for ( ; *pos < argc; (*pos)++) {
-               e = next;
-
-               /*
-                * Close out a subexpression.
-                */
-
-               if (NULL != e && 0 == strcmp(")", argv[*pos])) {
-                       if (--(*lvl) < 0)
-                               goto err;
-                       break;
-               }
-
-               /*
-                * Small note: if we're just starting, don't let "-a"
-                * and "-o" be considered logical operators: they're
-                * just tokens unless pairwise joining, in which case we
-                * record their existence (or assume "OR").
-                */
-               log = 0;
-
-               if (NULL != e && 0 == strcmp("-a", argv[*pos]))
-                       log = 1;
-               else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
-                       log = 2;
-
-               if (log > 0 && ++(*pos) >= argc)
-                       goto err;
-
-               /*
-                * Now we parse the term part.  This can begin with
-                * "-i", in which case the expression is case
-                * insensitive.
-                */
-
-               if (0 == strcmp("(", argv[*pos])) {
-                       ++(*pos);
-                       ++(*lvl);
-                       next = mandoc_calloc(1, sizeof(struct expr));
-                       next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
-                       if (NULL == next->subexpr) {
-                               free(next);
-                               next = NULL;
-                       }
-               } else if (0 == strcmp("-i", argv[*pos])) {
-                       if (++(*pos) >= argc)
-                               goto err;
-                       next = exprterm(argv[*pos], 0);
-               } else
-                       next = exprterm(argv[*pos], 1);
-
-               if (NULL == next)
-                       goto err;
-
-               next->and = log == 1;
-               next->index = (int)(*tt)++;
-
-               /* Append to our chain of expressions. */
-
-               if (NULL == first) {
-                       assert(NULL == e);
-                       first = next;
-               } else {
-                       assert(NULL != e);
-                       e->next = next;
-               }
-       }
-
-       return(first);
-err:
-       exprfree(first);
-       return(NULL);
-}
-
-/*
- * Parse a terminal expression with the grammar as defined in
- * apropos(1).
- * Return NULL if we fail the parse.
- */
-static struct expr *
-exprterm(char *buf, int cs)
-{
-       struct expr      e;
-       struct expr     *p;
-       char            *key;
-       int              i;
-
-       memset(&e, 0, sizeof(struct expr));
-
-       /* Choose regex or substring match. */
-
-       if (NULL == (e.v = strpbrk(buf, "=~"))) {
-               e.regex = 0;
-               e.v = buf;
-       } else {
-               e.regex = '~' == *e.v;
-               *e.v++ = '\0';
-       }
-
-       /* Determine the record types to search for. */
-
-       e.mask = 0;
-       if (buf < e.v) {
-               while (NULL != (key = strsep(&buf, ","))) {
-                       i = 0;
-                       while (types[i].mask &&
-                                       strcmp(types[i].name, key))
-                               i++;
-                       e.mask |= types[i].mask;
-               }
-       }
-       if (0 == e.mask)
-               e.mask = TYPE_Nm | TYPE_Nd;
-
-       if (e.regex) {
-               i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE);
-               if (regcomp(&e.re, e.v, i))
-                       return(NULL);
-       }
-
-       e.v = mandoc_strdup(e.v);
-
-       p = mandoc_calloc(1, sizeof(struct expr));
-       memcpy(p, &e, sizeof(struct expr));
-       return(p);
-}
-
-void
-exprfree(struct expr *p)
-{
-       struct expr     *pp;
-
-       while (NULL != p) {
-               if (p->subexpr)
-                       exprfree(p->subexpr);
-               if (p->regex)
-                       regfree(&p->re);
-               free(p->v);
-               pp = p->next;
-               free(p);
-               p = pp;
-       }
-}
-
-static int
-exprmark(const struct expr *p, const char *cp,
-               uint64_t mask, int *ms)
-{
-
-       for ( ; p; p = p->next) {
-               if (p->subexpr) {
-                       if (exprmark(p->subexpr, cp, mask, ms))
-                               return(1);
-                       continue;
-               } else if ( ! (mask & p->mask))
-                       continue;
-
-               if (p->regex) {
-                       if (regexec(&p->re, cp, 0, NULL, 0))
-                               continue;
-               } else if (NULL == strcasestr(cp, p->v))
-                       continue;
-
-               if (NULL == ms)
-                       return(1);
-               else
-                       ms[p->index] = 1;
-       }
-
-       return(0);
-}
-
-static int
-expreval(const struct expr *p, int *ms)
-{
-       int              match;
-
-       /*
-        * AND has precedence over OR.  Analysis is left-right, though
-        * it doesn't matter because there are no side-effects.
-        * Thus, step through pairwise ANDs and accumulate their Boolean
-        * evaluation.  If we encounter a single true AND collection or
-        * standalone term, the whole expression is true (by definition
-        * of OR).
-        */
-
-       for (match = 0; p && ! match; p = p->next) {
-               /* Evaluate a subexpression, if applicable. */
-               if (p->subexpr && ! ms[p->index])
-                       ms[p->index] = expreval(p->subexpr, ms);
-
-               match = ms[p->index];
-               for ( ; p->next && p->next->and; p = p->next) {
-                       /* Evaluate a subexpression, if applicable. */
-                       if (p->next->subexpr && ! ms[p->next->index])
-                               ms[p->next->index] =
-                                       expreval(p->next->subexpr, ms);
-                       match = match && ms[p->next->index];
-               }
-       }
-
-       return(match);
-}
-
-/*
- * First, update the array of terms for which this expression evaluates
- * to true.
- * Second, logically evaluate all terms over the updated array of truth
- * values.
- * If this evaluates to true, mark the expression as satisfied.
- */
-static void
-exprexec(const struct expr *e, const char *cp,
-               uint64_t mask, struct res *r)
-{
-
-       assert(0 == r->matched);
-       exprmark(e, cp, mask, r->matches);
-       r->matched = expreval(e, r->matches);
-}
diff --git a/usr.bin/mandoc/apropos_db.h b/usr.bin/mandoc/apropos_db.h
deleted file mode 100644 (file)
index 78cc155..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/*     $Id: apropos_db.h,v 1.11 2012/04/15 11:54:47 schwarze Exp $ */
-/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifndef APROPOS_H
-#define APROPOS_H
-
-enum   restype {
-       RESTYPE_MAN, /* man(7) file */
-       RESTYPE_MDOC, /* mdoc(7) file */
-       RESTYPE_CAT /* pre-formatted file */
-};
-
-struct res {
-       enum restype     type; /* input file type */
-       char            *file; /* file in file-system */
-       char            *cat; /* category (3p, 3, etc.) */
-       char            *title; /* title (FOO, etc.) */
-       char            *arch; /* arch (or empty string) */
-       char            *desc; /* description (from Nd) */
-       unsigned int     rec; /* record in index */
-       /*
-        * The index volume.  This indexes into the array of directories
-        * searched for manual page databases.
-        */
-       unsigned int     volume;
-       /*
-        * The following fields are used internally.
-        *
-        * Maintain a binary tree for checking the uniqueness of `rec'
-        * when adding elements to the results array.
-        * Since the results array is dynamic, use offset in the array
-        * instead of a pointer to the structure.
-        */
-       int              lhs;
-       int              rhs;
-       int              matched; /* expression is true */
-       int             *matches; /* partial truth evaluations */
-};
-
-struct opts {
-       const char      *arch; /* restrict to architecture */
-       const char      *cat; /* restrict to manual section */
-};
-
-__BEGIN_DECLS
-
-struct expr;
-
-int             apropos_search(int, char **, const struct opts *,
-                       const struct expr *, size_t, 
-                       void *, size_t *, struct res **,
-                       void (*)(struct res *, size_t, void *));
-struct expr    *exprcomp(int, char *[], size_t *);
-void            exprfree(struct expr *);
-void            resfree(struct res *, size_t);
-struct expr    *termcomp(int, char *[], size_t *);
-
-__END_DECLS
-
-#endif /*!APROPOS_H*/
index 754c09a..84847d9 100644 (file)
@@ -1,7 +1,7 @@
-/*     $Id: mandocdb.c,v 1.46 2013/11/21 01:47:10 schwarze Exp $ */
+/*     $Id: mandocdb.c,v 1.47 2013/12/31 00:40:19 schwarze Exp $ */
 /*
- * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-#include <sys/types.h>
+#include <sys/stat.h>
 
 #include <assert.h>
 #include <ctype.h>
-#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <fts.h>
 #include <getopt.h>
 #include <limits.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <db.h>
 
-#include "man.h"
+#include <ohash.h>
+#include <sqlite3.h>
+
 #include "mdoc.h"
+#include "man.h"
 #include "mandoc.h"
-#include "mandocdb.h"
 #include "manpath.h"
+#include "mansearch.h"
+
+#define        SQL_EXEC(_v) \
+       if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define        SQL_BIND_TEXT(_s, _i, _v) \
+       if (SQLITE_OK != sqlite3_bind_text \
+               ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define        SQL_BIND_INT(_s, _i, _v) \
+       if (SQLITE_OK != sqlite3_bind_int \
+               ((_s), (_i)++, (_v))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define        SQL_BIND_INT64(_s, _i, _v) \
+       if (SQLITE_OK != sqlite3_bind_int64 \
+               ((_s), (_i)++, (_v))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define SQL_STEP(_s) \
+       if (SQLITE_DONE != sqlite3_step((_s))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
 
-#define        MANDOC_BUFSZ      BUFSIZ
-#define        MANDOC_SLOP       1024
-
-#define        MANDOC_SRC        0x1
-#define        MANDOC_FORM       0x2
-
-/* Access to the mandoc database on disk. */
-
-struct mdb {
-       char              idxn[PATH_MAX]; /* index db filename */
-       char              dbn[PATH_MAX]; /* keyword db filename */
-       DB               *idx; /* index recno database */
-       DB               *db; /* keyword btree database */
+enum   op {
+       OP_DEFAULT = 0, /* new dbs from dir list or default config */
+       OP_CONFFILE, /* new databases from custom config file */
+       OP_UPDATE, /* delete/add entries in existing database */
+       OP_DELETE, /* delete entries from existing database */
+       OP_TEST /* change no databases, report potential problems */
 };
 
-/* Stack of temporarily unused index records. */
-
-struct recs {
-       recno_t          *stack; /* pointer to a malloc'ed array */
-       size_t            size; /* number of allocated slots */
-       size_t            cur; /* current number of empty records */
-       recno_t           last; /* last record number in the index */
+enum   form {
+       FORM_NONE,  /* format is unknown */
+       FORM_SRC,   /* format is -man or -mdoc */
+       FORM_CAT    /* format is cat */
 };
 
-/* Tiny list for files.  No need to bring in QUEUE. */
+struct str {
+       char            *utf8; /* key in UTF-8 form */
+       const struct mpage *mpage; /* if set, the owning parse */
+       uint64_t         mask; /* bitmask in sequence */
+       char             key[]; /* the string itself */
+};
 
-struct of {
-       char             *fname; /* heap-allocated */
-       char             *sec;
-       char             *arch;
-       char             *title;
-       int               src_form;
-       struct of        *next; /* NULL for last one */
-       struct of        *first; /* first in list */
+struct inodev {
+       ino_t            st_ino;
+       dev_t            st_dev;
 };
 
-/* Buffer for storing growable data. */
+struct mpage {
+       struct inodev    inodev;  /* used for hashing routine */
+       enum form        form;    /* format from file content */
+       char            *sec;     /* section from file content */
+       char            *arch;    /* architecture from file content */
+       char            *title;   /* title from file content */
+       char            *desc;    /* description from file content */
+       struct mlink    *mlinks;  /* singly linked list */
+};
 
-struct buf {
-       char             *cp;
-       size_t            len; /* current length */
-       size_t            size; /* total buffer size */
+struct mlink {
+       char             file[PATH_MAX]; /* filename rel. to manpath */
+       enum form        dform;   /* format from directory */
+       enum form        fform;   /* format from file name suffix */
+       char            *dsec;    /* section from directory */
+       char            *arch;    /* architecture from directory */
+       char            *name;    /* name from file name (not empty) */
+       char            *fsec;    /* section from file name suffix */
+       struct mlink    *next;    /* singly linked list */
 };
 
-/* Operation we're going to perform. */
+struct title {
+       char            *title; /* name(sec/arch) given inside the file */
+       char            *file; /* file name in case of mismatch */
+};
 
-enum   op {
-       OP_DEFAULT = 0, /* new dbs from dir list or default config */
-       OP_CONFFILE, /* new databases from custom config file */
-       OP_UPDATE, /* delete/add entries in existing database */
-       OP_DELETE, /* delete entries from existing database */
-       OP_TEST /* change no databases, report potential problems */
+enum   stmt {
+       STMT_DELETE_PAGE = 0,   /* delete mpage */
+       STMT_INSERT_PAGE,       /* insert mpage */
+       STMT_INSERT_LINK,       /* insert mlink */
+       STMT_INSERT_KEY,        /* insert parsed key */
+       STMT__MAX
 };
 
-#define        MAN_ARGS          DB *hash, \
-                         struct buf *buf, \
-                         struct buf *dbuf, \
-                         const struct man_node *n
-#define        MDOC_ARGS         DB *hash, \
-                         struct buf *buf, \
-                         struct buf *dbuf, \
-                         const struct mdoc_node *n, \
-                         const struct mdoc_meta *m
-
-static void              buf_appendmdoc(struct buf *, 
-                               const struct mdoc_node *, int);
-static void              buf_append(struct buf *, const char *);
-static void              buf_appendb(struct buf *, 
-                               const void *, size_t);
-static void              dbt_put(DB *, const char *, DBT *, DBT *);
-static void              hash_put(DB *, const struct buf *, uint64_t);
-static void              hash_reset(DB **);
-static void              index_merge(const struct of *, struct mparse *,
-                               struct buf *, struct buf *, DB *,
-                               struct mdb *, struct recs *);
-static void              index_prune(const struct of *, struct mdb *,
-                               struct recs *);
-static void              ofile_argbuild(int, char *[], struct of **,
-                               const char *);
-static void              ofile_dirbuild(const char *, const char *,
-                               const char *, int, struct of **);
-static void              ofile_free(struct of *);
-static void              pformatted(DB *, struct buf *, 
-                               struct buf *, const struct of *);
-static int               pman_node(MAN_ARGS);
-static void              pmdoc_node(MDOC_ARGS);
-static int               pmdoc_head(MDOC_ARGS);
-static int               pmdoc_body(MDOC_ARGS);
-static int               pmdoc_Fd(MDOC_ARGS);
-static int               pmdoc_In(MDOC_ARGS);
-static int               pmdoc_Fn(MDOC_ARGS);
-static int               pmdoc_Nd(MDOC_ARGS);
-static int               pmdoc_Nm(MDOC_ARGS);
-static int               pmdoc_Sh(MDOC_ARGS);
-static int               pmdoc_St(MDOC_ARGS);
-static int               pmdoc_Xr(MDOC_ARGS);
-
-#define        MDOCF_CHILD       0x01  /* Automatically index child nodes. */
+typedef        int (*mdoc_fp)(struct mpage *, const struct mdoc_node *);
 
 struct mdoc_handler {
-       int             (*fp)(MDOC_ARGS);  /* Optional handler. */
-       uint64_t          mask;  /* Set unless handler returns 0. */
-       int               flags;  /* For use by pmdoc_node. */
+       mdoc_fp          fp; /* optional handler */
+       uint64_t         mask;  /* set unless handler returns 0 */
 };
 
+static void     dbclose(int);
+static void     dbindex(const struct mpage *, struct mchars *);
+static int      dbopen(int);
+static void     dbprune(void);
+static void     filescan(const char *);
+static void    *hash_alloc(size_t, void *);
+static void     hash_free(void *, size_t, void *);
+static void    *hash_halloc(size_t, void *);
+static void     mlink_add(struct mlink *, const struct stat *);
+static void     mlink_free(struct mlink *);
+static void     mlinks_undupe(struct mpage *);
+static void     mpages_free(void);
+static void     mpages_merge(struct mchars *, struct mparse *, int);
+static void     parse_cat(struct mpage *);
+static void     parse_man(struct mpage *, const struct man_node *);
+static void     parse_mdoc(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_body(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_head(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Fd(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Fn(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_In(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Nd(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_St(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);
+static void     putkey(const struct mpage *,
+                       const char *, uint64_t);
+static void     putkeys(const struct mpage *,
+                       const char *, size_t, uint64_t);
+static void     putmdockey(const struct mpage *,
+                       const struct mdoc_node *, uint64_t);
+static void     say(const char *, const char *, ...);
+static int      set_basedir(const char *);
+static int      treescan(void);
+static size_t   utf8(unsigned int, char [7]);
+static void     utf8key(struct mchars *, struct str *);
+
+static char            *progname;
+static int              use_all; /* use all found files */
+static int              nodb; /* no database changes */
+static int              verb; /* print what we're doing */
+static int              warnings; /* warn about crap */
+static int              exitcode; /* to be returned by main */
+static enum op          op; /* operational mode */
+static char             basedir[PATH_MAX]; /* current base directory */
+static struct ohash     mpages; /* table of distinct manual pages */
+static struct ohash     mlinks; /* table of directory entries */
+static struct ohash     strings; /* table of all strings */
+static sqlite3         *db = NULL; /* current database */
+static sqlite3_stmt    *stmts[STMT__MAX]; /* current statements */
+
 static const struct mdoc_handler mdocs[MDOC_MAX] = {
-       { NULL, 0, 0 },  /* Ap */
-       { NULL, 0, 0 },  /* Dd */
-       { NULL, 0, 0 },  /* Dt */
-       { NULL, 0, 0 },  /* Os */
-       { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
-       { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
-       { NULL, 0, 0 },  /* Pp */
-       { NULL, 0, 0 },  /* D1 */
-       { NULL, 0, 0 },  /* Dl */
-       { NULL, 0, 0 },  /* Bd */
-       { NULL, 0, 0 },  /* Ed */
-       { NULL, 0, 0 },  /* Bl */
-       { NULL, 0, 0 },  /* El */
-       { NULL, 0, 0 },  /* It */
-       { NULL, 0, 0 },  /* Ad */
-       { NULL, TYPE_An, MDOCF_CHILD },  /* An */
-       { NULL, TYPE_Ar, MDOCF_CHILD },  /* Ar */
-       { NULL, TYPE_Cd, MDOCF_CHILD },  /* Cd */
-       { NULL, TYPE_Cm, MDOCF_CHILD },  /* Cm */
-       { NULL, TYPE_Dv, MDOCF_CHILD },  /* Dv */
-       { NULL, TYPE_Er, MDOCF_CHILD },  /* Er */
-       { NULL, TYPE_Ev, MDOCF_CHILD },  /* Ev */
-       { NULL, 0, 0 },  /* Ex */
-       { NULL, TYPE_Fa, MDOCF_CHILD },  /* Fa */
-       { pmdoc_Fd, TYPE_In, 0 },  /* Fd */
-       { NULL, TYPE_Fl, MDOCF_CHILD },  /* Fl */
-       { pmdoc_Fn, 0, 0 },  /* Fn */
-       { NULL, TYPE_Ft, MDOCF_CHILD },  /* Ft */
-       { NULL, TYPE_Ic, MDOCF_CHILD },  /* Ic */
-       { pmdoc_In, TYPE_In, 0 },  /* In */
-       { NULL, TYPE_Li, MDOCF_CHILD },  /* Li */
-       { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD },  /* Nd */
-       { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD },  /* Nm */
-       { NULL, 0, 0 },  /* Op */
-       { NULL, 0, 0 },  /* Ot */
-       { NULL, TYPE_Pa, MDOCF_CHILD },  /* Pa */
-       { NULL, 0, 0 },  /* Rv */
-       { pmdoc_St, TYPE_St, 0 },  /* St */
-       { NULL, TYPE_Va, MDOCF_CHILD },  /* Va */
-       { pmdoc_body, TYPE_Va, MDOCF_CHILD },  /* Vt */
-       { pmdoc_Xr, TYPE_Xr, 0 },  /* Xr */
-       { NULL, 0, 0 },  /* %A */
-       { NULL, 0, 0 },  /* %B */
-       { NULL, 0, 0 },  /* %D */
-       { NULL, 0, 0 },  /* %I */
-       { NULL, 0, 0 },  /* %J */
-       { NULL, 0, 0 },  /* %N */
-       { NULL, 0, 0 },  /* %O */
-       { NULL, 0, 0 },  /* %P */
-       { NULL, 0, 0 },  /* %R */
-       { NULL, 0, 0 },  /* %T */
-       { NULL, 0, 0 },  /* %V */
-       { NULL, 0, 0 },  /* Ac */
-       { NULL, 0, 0 },  /* Ao */
-       { NULL, 0, 0 },  /* Aq */
-       { NULL, TYPE_At, MDOCF_CHILD },  /* At */
-       { NULL, 0, 0 },  /* Bc */
-       { NULL, 0, 0 },  /* Bf */
-       { NULL, 0, 0 },  /* Bo */
-       { NULL, 0, 0 },  /* Bq */
-       { NULL, TYPE_Bsx, MDOCF_CHILD },  /* Bsx */
-       { NULL, TYPE_Bx, MDOCF_CHILD },  /* Bx */
-       { NULL, 0, 0 },  /* Db */
-       { NULL, 0, 0 },  /* Dc */
-       { NULL, 0, 0 },  /* Do */
-       { NULL, 0, 0 },  /* Dq */
-       { NULL, 0, 0 },  /* Ec */
-       { NULL, 0, 0 },  /* Ef */
-       { NULL, TYPE_Em, MDOCF_CHILD },  /* Em */
-       { NULL, 0, 0 },  /* Eo */
-       { NULL, TYPE_Fx, MDOCF_CHILD },  /* Fx */
-       { NULL, TYPE_Ms, MDOCF_CHILD },  /* Ms */
-       { NULL, 0, 0 },  /* No */
-       { NULL, 0, 0 },  /* Ns */
-       { NULL, TYPE_Nx, MDOCF_CHILD },  /* Nx */
-       { NULL, TYPE_Ox, MDOCF_CHILD },  /* Ox */
-       { NULL, 0, 0 },  /* Pc */
-       { NULL, 0, 0 },  /* Pf */
-       { NULL, 0, 0 },  /* Po */
-       { NULL, 0, 0 },  /* Pq */
-       { NULL, 0, 0 },  /* Qc */
-       { NULL, 0, 0 },  /* Ql */
-       { NULL, 0, 0 },  /* Qo */
-       { NULL, 0, 0 },  /* Qq */
-       { NULL, 0, 0 },  /* Re */
-       { NULL, 0, 0 },  /* Rs */
-       { NULL, 0, 0 },  /* Sc */
-       { NULL, 0, 0 },  /* So */
-       { NULL, 0, 0 },  /* Sq */
-       { NULL, 0, 0 },  /* Sm */
-       { NULL, 0, 0 },  /* Sx */
-       { NULL, TYPE_Sy, MDOCF_CHILD },  /* Sy */
-       { NULL, TYPE_Tn, MDOCF_CHILD },  /* Tn */
-       { NULL, 0, 0 },  /* Ux */
-       { NULL, 0, 0 },  /* Xc */
-       { NULL, 0, 0 },  /* Xo */
-       { pmdoc_head, TYPE_Fn, 0 },  /* Fo */
-       { NULL, 0, 0 },  /* Fc */
-       { NULL, 0, 0 },  /* Oo */
-       { NULL, 0, 0 },  /* Oc */
-       { NULL, 0, 0 },  /* Bk */
-       { NULL, 0, 0 },  /* Ek */
-       { NULL, 0, 0 },  /* Bt */
-       { NULL, 0, 0 },  /* Hf */
-       { NULL, 0, 0 },  /* Fr */
-       { NULL, 0, 0 },  /* Ud */
-       { NULL, TYPE_Lb, MDOCF_CHILD },  /* Lb */
-       { NULL, 0, 0 },  /* Lp */
-       { NULL, TYPE_Lk, MDOCF_CHILD },  /* Lk */
-       { NULL, TYPE_Mt, MDOCF_CHILD },  /* Mt */
-       { NULL, 0, 0 },  /* Brq */
-       { NULL, 0, 0 },  /* Bro */
-       { NULL, 0, 0 },  /* Brc */
-       { NULL, 0, 0 },  /* %C */
-       { NULL, 0, 0 },  /* Es */
-       { NULL, 0, 0 },  /* En */
-       { NULL, TYPE_Dx, MDOCF_CHILD },  /* Dx */
-       { NULL, 0, 0 },  /* %Q */
-       { NULL, 0, 0 },  /* br */
-       { NULL, 0, 0 },  /* sp */
-       { NULL, 0, 0 },  /* %U */
-       { NULL, 0, 0 },  /* Ta */
+       { NULL, 0 },  /* Ap */
+       { NULL, 0 },  /* Dd */
+       { NULL, 0 },  /* Dt */
+       { NULL, 0 },  /* Os */
+       { parse_mdoc_Sh, TYPE_Sh }, /* Sh */
+       { parse_mdoc_head, TYPE_Ss }, /* Ss */
+       { NULL, 0 },  /* Pp */
+       { NULL, 0 },  /* D1 */
+       { NULL, 0 },  /* Dl */
+       { NULL, 0 },  /* Bd */
+       { NULL, 0 },  /* Ed */
+       { NULL, 0 },  /* Bl */
+       { NULL, 0 },  /* El */
+       { NULL, 0 },  /* It */
+       { NULL, 0 },  /* Ad */
+       { NULL, TYPE_An },  /* An */
+       { NULL, TYPE_Ar },  /* Ar */
+       { NULL, TYPE_Cd },  /* Cd */
+       { NULL, TYPE_Cm },  /* Cm */
+       { NULL, TYPE_Dv },  /* Dv */
+       { NULL, TYPE_Er },  /* Er */
+       { NULL, TYPE_Ev },  /* Ev */
+       { NULL, 0 },  /* Ex */
+       { NULL, TYPE_Fa },  /* Fa */
+       { parse_mdoc_Fd, 0 },  /* Fd */
+       { NULL, TYPE_Fl },  /* Fl */
+       { parse_mdoc_Fn, 0 },  /* Fn */
+       { NULL, TYPE_Ft },  /* Ft */
+       { NULL, TYPE_Ic },  /* Ic */
+       { parse_mdoc_In, TYPE_In },  /* In */
+       { NULL, TYPE_Li },  /* Li */
+       { parse_mdoc_Nd, TYPE_Nd },  /* Nd */
+       { parse_mdoc_Nm, TYPE_Nm },  /* Nm */
+       { NULL, 0 },  /* Op */
+       { NULL, 0 },  /* Ot */
+       { NULL, TYPE_Pa },  /* Pa */
+       { NULL, 0 },  /* Rv */
+       { parse_mdoc_St, 0 },  /* St */
+       { NULL, TYPE_Va },  /* Va */
+       { parse_mdoc_body, TYPE_Va },  /* Vt */
+       { parse_mdoc_Xr, 0 },  /* Xr */
+       { NULL, 0 },  /* %A */
+       { NULL, 0 },  /* %B */
+       { NULL, 0 },  /* %D */
+       { NULL, 0 },  /* %I */
+       { NULL, 0 },  /* %J */
+       { NULL, 0 },  /* %N */
+       { NULL, 0 },  /* %O */
+       { NULL, 0 },  /* %P */
+       { NULL, 0 },  /* %R */
+       { NULL, 0 },  /* %T */
+       { NULL, 0 },  /* %V */
+       { NULL, 0 },  /* Ac */
+       { NULL, 0 },  /* Ao */
+       { NULL, 0 },  /* Aq */
+       { NULL, TYPE_At },  /* At */
+       { NULL, 0 },  /* Bc */
+       { NULL, 0 },  /* Bf */
+       { NULL, 0 },  /* Bo */
+       { NULL, 0 },  /* Bq */
+       { NULL, TYPE_Bsx },  /* Bsx */
+       { NULL, TYPE_Bx },  /* Bx */
+       { NULL, 0 },  /* Db */
+       { NULL, 0 },  /* Dc */
+       { NULL, 0 },  /* Do */
+       { NULL, 0 },  /* Dq */
+       { NULL, 0 },  /* Ec */
+       { NULL, 0 },  /* Ef */
+       { NULL, TYPE_Em },  /* Em */
+       { NULL, 0 },  /* Eo */
+       { NULL, TYPE_Fx },  /* Fx */
+       { NULL, TYPE_Ms },  /* Ms */
+       { NULL, 0 },  /* No */
+       { NULL, 0 },  /* Ns */
+       { NULL, TYPE_Nx },  /* Nx */
+       { NULL, TYPE_Ox },  /* Ox */
+       { NULL, 0 },  /* Pc */
+       { NULL, 0 },  /* Pf */
+       { NULL, 0 },  /* Po */
+       { NULL, 0 },  /* Pq */
+       { NULL, 0 },  /* Qc */
+       { NULL, 0 },  /* Ql */
+       { NULL, 0 },  /* Qo */
+       { NULL, 0 },  /* Qq */
+       { NULL, 0 },  /* Re */
+       { NULL, 0 },  /* Rs */
+       { NULL, 0 },  /* Sc */
+       { NULL, 0 },  /* So */
+       { NULL, 0 },  /* Sq */
+       { NULL, 0 },  /* Sm */
+       { NULL, 0 },  /* Sx */
+       { NULL, TYPE_Sy },  /* Sy */
+       { NULL, TYPE_Tn },  /* Tn */
+       { NULL, 0 },  /* Ux */
+       { NULL, 0 },  /* Xc */
+       { NULL, 0 },  /* Xo */
+       { parse_mdoc_head, 0 },  /* Fo */
+       { NULL, 0 },  /* Fc */
+       { NULL, 0 },  /* Oo */
+       { NULL, 0 },  /* Oc */
+       { NULL, 0 },  /* Bk */
+       { NULL, 0 },  /* Ek */
+       { NULL, 0 },  /* Bt */
+       { NULL, 0 },  /* Hf */
+       { NULL, 0 },  /* Fr */
+       { NULL, 0 },  /* Ud */
+       { NULL, TYPE_Lb },  /* Lb */
+       { NULL, 0 },  /* Lp */
+       { NULL, TYPE_Lk },  /* Lk */
+       { NULL, TYPE_Mt },  /* Mt */
+       { NULL, 0 },  /* Brq */
+       { NULL, 0 },  /* Bro */
+       { NULL, 0 },  /* Brc */
+       { NULL, 0 },  /* %C */
+       { NULL, 0 },  /* Es */
+       { NULL, 0 },  /* En */
+       { NULL, TYPE_Dx },  /* Dx */
+       { NULL, 0 },  /* %Q */
+       { NULL, 0 },  /* br */
+       { NULL, 0 },  /* sp */
+       { NULL, 0 },  /* %U */
+       { NULL, 0 },  /* Ta */
 };
 
-static const char       *progname;
-static int               use_all;  /* Use all directories and files. */
-static int               verb;  /* Output verbosity level. */
-static int               warnings;  /* Potential problems in manuals. */
-
 int
 mandocdb(int argc, char *argv[])
 {
-       struct mparse   *mp; /* parse sequence */
-       struct manpaths  dirs;
-       struct mdb       mdb;
-       struct recs      recs;
-       enum op          op; /* current operation */
-       const char      *dir;
-       char            *cp;
-       char             pbuf[PATH_MAX];
-       int              ch, i, flags;
-       DB              *hash; /* temporary keyword hashtable */
-       BTREEINFO        info; /* btree configuration */
-       size_t           sz1, sz2, ipath;
-       struct buf       buf, /* keyword buffer */
-                        dbuf; /* description buffer */
-       struct of       *of; /* list of files for processing */
-       extern int       optind;
-       extern char     *optarg;
+       int               ch, i;
+       size_t            j, sz;
+       const char       *path_arg;
+       struct mchars    *mc;
+       struct manpaths   dirs;
+       struct mparse    *mp;
+       struct ohash_info mpages_info, mlinks_info;
+
+       memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *));
+       memset(&dirs, 0, sizeof(struct manpaths));
+
+       mpages_info.alloc  = mlinks_info.alloc  = hash_alloc;
+       mpages_info.halloc = mlinks_info.halloc = hash_halloc;
+       mpages_info.hfree  = mlinks_info.hfree  = hash_free;
+
+       mpages_info.key_offset = offsetof(struct mpage, inodev);
+       mlinks_info.key_offset = offsetof(struct mlink, file);
 
        progname = strrchr(argv[0], '/');
        if (progname == NULL)
@@ -299,57 +330,47 @@ mandocdb(int argc, char *argv[])
        else
                ++progname;
 
-       memset(&dirs, 0, sizeof(struct manpaths));
-       memset(&mdb, 0, sizeof(struct mdb));
-       memset(&recs, 0, sizeof(struct recs));
+       /*
+        * We accept a few different invocations.  
+        * The CHECKOP macro makes sure that invocation styles don't
+        * clobber each other.
+        */
+#define        CHECKOP(_op, _ch) do \
+       if (OP_DEFAULT != (_op)) { \
+               fprintf(stderr, "-%c: Conflicting option\n", (_ch)); \
+               goto usage; \
+       } while (/*CONSTCOND*/0)
 
-       of = NULL;
-       mp = NULL;
-       hash = NULL;
+       path_arg = NULL;
        op = OP_DEFAULT;
-       dir = NULL;
 
-       while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
+       while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW")))
                switch (ch) {
                case ('a'):
                        use_all = 1;
                        break;
                case ('C'):
-                       if (op) {
-                               fprintf(stderr,
-                                   "-C: conflicting options\n");
-                               goto usage;
-                       }
-                       dir = optarg;
+                       CHECKOP(op, ch);
+                       path_arg = optarg;
                        op = OP_CONFFILE;
                        break;
                case ('d'):
-                       if (op) {
-                               fprintf(stderr,
-                                   "-d: conflicting options\n");
-                               goto usage;
-                       }
-                       dir = optarg;
+                       CHECKOP(op, ch);
+                       path_arg = optarg;
                        op = OP_UPDATE;
                        break;
+               case ('n'):
+                       nodb = 1;
+                       break;
                case ('t'):
+                       CHECKOP(op, ch);
                        dup2(STDOUT_FILENO, STDERR_FILENO);
-                       if (op) {
-                               fprintf(stderr,
-                                   "-t: conflicting options\n");
-                               goto usage;
-                       }
                        op = OP_TEST;
-                       use_all = 1;
-                       warnings = 1;
+                       nodb = warnings = 1;
                        break;
                case ('u'):
-                       if (op) {
-                               fprintf(stderr,
-                                   "-u: conflicting options\n");
-                               goto usage;
-                       }
-                       dir = optarg;
+                       CHECKOP(op, ch);
+                       path_arg = optarg;
                        op = OP_DELETE;
                        break;
                case ('v'):
@@ -366,282 +387,584 @@ mandocdb(int argc, char *argv[])
        argv += optind;
 
        if (OP_CONFFILE == op && argc > 0) {
-               fprintf(stderr, "-C: too many arguments\n");
+               fprintf(stderr, "-C: Too many arguments\n");
                goto usage;
        }
 
-       memset(&info, 0, sizeof(BTREEINFO));
-       info.lorder = 4321;
-       info.flags = R_DUP;
-
-       mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+       exitcode = (int)MANDOCLEVEL_OK;
+       mp = mparse_alloc(MPARSE_AUTO, 
+               MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+       mc = mchars_alloc();
 
-       memset(&buf, 0, sizeof(struct buf));
-       memset(&dbuf, 0, sizeof(struct buf));
+       ohash_init(&mpages, 6, &mpages_info);
+       ohash_init(&mlinks, 6, &mlinks_info);
 
-       buf.size = dbuf.size = MANDOC_BUFSZ;
-
-       buf.cp = mandoc_malloc(buf.size);
-       dbuf.cp = mandoc_malloc(dbuf.size);
+       if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
+               /* 
+                * Force processing all files.
+                */
+               use_all = 1;
 
-       if (OP_TEST == op) {
-               ofile_argbuild(argc, argv, &of, NULL);
-               if (NULL == of)
+               /*
+                * All of these deal with a specific directory.
+                * Jump into that directory then collect files specified
+                * on the command-line.
+                */
+               if (0 == set_basedir(path_arg))
                        goto out;
-               index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
-               goto out;
-       }
+               for (i = 0; i < argc; i++)
+                       filescan(argv[i]);
+               if (0 == dbopen(1))
+                       goto out;
+               if (OP_TEST != op)
+                       dbprune();
+               if (OP_DELETE != op)
+                       mpages_merge(mc, mp, 0);
+               dbclose(1);
+       } else {
+               /*
+                * If we have arguments, use them as our manpaths.
+                * If we don't, grok from manpath(1) or however else
+                * manpath_parse() wants to do it.
+                */
+               if (argc > 0) {
+                       dirs.paths = mandoc_calloc
+                               (argc, sizeof(char *));
+                       dirs.sz = (size_t)argc;
+                       for (i = 0; i < argc; i++)
+                               dirs.paths[i] = mandoc_strdup(argv[i]);
+               } else
+                       manpath_parse(&dirs, path_arg, NULL, NULL);
 
-       if (OP_UPDATE == op || OP_DELETE == op) {
-               if (NULL == realpath(dir, pbuf)) {
-                       perror(dir);
-                       exit((int)MANDOCLEVEL_BADARG);
-               }
-               if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) {
-                       fprintf(stderr, "%s: path too long\n", pbuf);
-                       exit((int)MANDOCLEVEL_BADARG);
-               }
+               /*
+                * First scan the tree rooted at a base directory, then
+                * build a new database and finally move it into place.
+                * Ignore zero-length directories and strip trailing
+                * slashes.
+                */
+               for (j = 0; j < dirs.sz; j++) {
+                       sz = strlen(dirs.paths[j]);
+                       if (sz && '/' == dirs.paths[j][sz - 1])
+                               dirs.paths[j][--sz] = '\0';
+                       if (0 == sz)
+                               continue;
+
+                       if (j) {
+                               ohash_init(&mpages, 6, &mpages_info);
+                               ohash_init(&mlinks, 6, &mlinks_info);
+                       }
 
-               strlcat(mdb.dbn, pbuf, PATH_MAX);
-               sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX);
+                       if (0 == set_basedir(dirs.paths[j]))
+                               goto out;
+                       if (0 == treescan())
+                               goto out;
+                       if (0 == set_basedir(dirs.paths[j]))
+                               goto out;
+                       if (0 == dbopen(0))
+                               goto out;
 
-               strlcat(mdb.idxn, pbuf, PATH_MAX);
-               sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX);
+                       mpages_merge(mc, mp, warnings && !use_all);
+                       dbclose(0);
 
-               if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) {
-                       fprintf(stderr, "%s: path too long\n", mdb.idxn);
-                       exit((int)MANDOCLEVEL_BADARG);
+                       if (j + 1 < dirs.sz) {
+                               mpages_free();
+                               ohash_delete(&mpages);
+                               ohash_delete(&mlinks);
+                       }
                }
+       }
+out:
+       set_basedir(NULL);
+       manpath_free(&dirs);
+       mchars_free(mc);
+       mparse_free(mp);
+       mpages_free();
+       ohash_delete(&mpages);
+       ohash_delete(&mlinks);
+       return(exitcode);
+usage:
+       fprintf(stderr, "usage: %s [-anvW] [-C file]\n"
+                       "       %s [-anvW] dir ...\n"
+                       "       %s [-nvW] -d dir [file ...]\n"
+                       "       %s [-nvW] -u dir [file ...]\n"
+                       "       %s -t file ...\n",
+                      progname, progname, progname, 
+                      progname, progname);
 
-               flags = O_CREAT | O_RDWR;
-               mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
-               mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
+       return((int)MANDOCLEVEL_BADARG);
+}
 
-               if (NULL == mdb.db) {
-                       perror(mdb.dbn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               } else if (NULL == mdb.idx) {
-                       perror(mdb.idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               }
+/*
+ * Scan a directory tree rooted at "basedir" for manpages.
+ * We use fts(), scanning directory parts along the way for clues to our
+ * section and architecture.
+ *
+ * If use_all has been specified, grok all files.
+ * If not, sanitise paths to the following:
+ *
+ *   [./]man*[/<arch>]/<name>.<section> 
+ *   or
+ *   [./]cat<section>[/<arch>]/<name>.0
+ *
+ * TODO: accomodate for multi-language directories.
+ */
+static int
+treescan(void)
+{
+       FTS             *f;
+       FTSENT          *ff;
+       struct mlink    *mlink;
+       int              dform;
+       char            *fsec;
+       const char      *dsec, *arch, *cp, *path;
+       const char      *argv[2];
 
-               ofile_argbuild(argc, argv, &of, pbuf);
+       argv[0] = ".";
+       argv[1] = (char *)NULL;
 
-               if (NULL == of)
-                       goto out;
+       /*
+        * Walk through all components under the directory, using the
+        * logical descent of files.
+        */
+       f = fts_open((char * const *)argv, FTS_LOGICAL, NULL);
+       if (NULL == f) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say("", NULL);
+               return(0);
+       }
 
-               index_prune(of, &mdb, &recs);
+       dsec = arch = NULL;
+       dform = FORM_NONE;
 
+       while (NULL != (ff = fts_read(f))) {
+               path = ff->fts_path + 2;
                /*
-                * Go to the root of the respective manual tree.
-                * This must work or no manuals may be found (they're
-                * indexed relative to the root).
+                * If we're a regular file, add an mlink by using the
+                * stored directory data and handling the filename.
                 */
+               if (FTS_F == ff->fts_info) {
+                       if (0 == strcmp(path, MANDOC_DB))
+                               continue;
+                       if ( ! use_all && ff->fts_level < 2) {
+                               if (warnings)
+                                       say(path, "Extraneous file");
+                               continue;
+                       } else if (NULL == (fsec =
+                                       strrchr(ff->fts_name, '.'))) {
+                               if ( ! use_all) {
+                                       if (warnings)
+                                               say(path,
+                                                   "No filename suffix");
+                                       continue;
+                               }
+                       } else if (0 == strcmp(++fsec, "html")) {
+                               if (warnings)
+                                       say(path, "Skip html");
+                               continue;
+                       } else if (0 == strcmp(fsec, "gz")) {
+                               if (warnings)
+                                       say(path, "Skip gz");
+                               continue;
+                       } else if (0 == strcmp(fsec, "ps")) {
+                               if (warnings)
+                                       say(path, "Skip ps");
+                               continue;
+                       } else if (0 == strcmp(fsec, "pdf")) {
+                               if (warnings)
+                                       say(path, "Skip pdf");
+                               continue;
+                       } else if ( ! use_all &&
+                           ((FORM_SRC == dform && strcmp(fsec, dsec)) ||
+                            (FORM_CAT == dform && strcmp(fsec, "0")))) {
+                               if (warnings)
+                                       say(path, "Wrong filename suffix");
+                               continue;
+                       } else
+                               fsec[-1] = '\0';
+                       mlink = mandoc_calloc(1, sizeof(struct mlink));
+                       strlcpy(mlink->file, path, sizeof(mlink->file));
+                       mlink->dform = dform;
+                       if (NULL != dsec)
+                               mlink->dsec = mandoc_strdup(dsec);
+                       if (NULL != arch)
+                               mlink->arch = mandoc_strdup(arch);
+                       mlink->name = mandoc_strdup(ff->fts_name);
+                       if (NULL != fsec)
+                               mlink->fsec = mandoc_strdup(fsec);
+                       mlink_add(mlink, ff->fts_statp);
+                       continue;
+               } else if (FTS_D != ff->fts_info &&
+                               FTS_DP != ff->fts_info) {
+                       if (warnings)
+                               say(path, "Not a regular file");
+                       continue;
+               }
 
-               if (OP_UPDATE == op) {
-                       if (-1 == chdir(dir)) {
-                               perror(dir);
-                               exit((int)MANDOCLEVEL_SYSERR);
+               switch (ff->fts_level) {
+               case (0):
+                       /* Ignore the root directory. */
+                       break;
+               case (1):
+                       /*
+                        * This might contain manX/ or catX/.
+                        * Try to infer this from the name.
+                        * If we're not in use_all, enforce it.
+                        */
+                       dsec = NULL;
+                       dform = FORM_NONE;
+                       cp = ff->fts_name;
+                       if (FTS_DP == ff->fts_info)
+                               break;
+
+                       if (0 == strncmp(cp, "man", 3)) {
+                               dform = FORM_SRC;
+                               dsec = cp + 3;
+                       } else if (0 == strncmp(cp, "cat", 3)) {
+                               dform = FORM_CAT;
+                               dsec = cp + 3;
                        }
-                       index_merge(of, mp, &dbuf, &buf, hash,
-                                       &mdb, &recs);
+
+                       if (NULL != dsec || use_all) 
+                               break;
+
+                       if (warnings)
+                               say(path, "Unknown directory part");
+                       fts_set(f, ff, FTS_SKIP);
+                       break;
+               case (2):
+                       /*
+                        * Possibly our architecture.
+                        * If we're descending, keep tabs on it.
+                        */
+                       arch = NULL;
+                       if (FTS_DP != ff->fts_info && NULL != dsec)
+                               arch = ff->fts_name;
+                       break;
+               default:
+                       if (FTS_DP == ff->fts_info || use_all)
+                               break;
+                       if (warnings)
+                               say(path, "Extraneous directory part");
+                       fts_set(f, ff, FTS_SKIP);
+                       break;
                }
+       }
+
+       fts_close(f);
+       return(1);
+}
+
+/*
+ * Add a file to the mlinks table.
+ * Do not verify that it's a "valid" looking manpage (we'll do that
+ * later).
+ *
+ * Try to infer the manual section, architecture, and page name from the
+ * path, assuming it looks like
+ *
+ *   [./]man*[/<arch>]/<name>.<section> 
+ *   or
+ *   [./]cat<section>[/<arch>]/<name>.0
+ *
+ * See treescan() for the fts(3) version of this.
+ */
+static void
+filescan(const char *file)
+{
+       char             buf[PATH_MAX];
+       struct stat      st;
+       struct mlink    *mlink;
+       char            *p, *start;
+
+       assert(use_all);
 
-               goto out;
+       if (0 == strncmp(file, "./", 2))
+               file += 2;
+
+       if (NULL == realpath(file, buf)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(file, NULL);
+               return;
+       } else if (OP_TEST != op && strstr(buf, basedir) != buf) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say("", "%s: outside base directory", buf);
+               return;
+       } else if (-1 == stat(buf, &st)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(file, NULL);
+               return;
+       } else if ( ! (S_IFREG & st.st_mode)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(file, "Not a regular file");
+               return;
        }
+       start = buf + strlen(basedir);
+       mlink = mandoc_calloc(1, sizeof(struct mlink));
+       strlcpy(mlink->file, start, sizeof(mlink->file));
 
        /*
-        * Configure the directories we're going to scan.
-        * If we have command-line arguments, use them.
-        * If not, we use man(1)'s method (see mandocdb.8).
+        * First try to guess our directory structure.
+        * If we find a separator, try to look for man* or cat*.
+        * If we find one of these and what's underneath is a directory,
+        * assume it's an architecture.
         */
+       if (NULL != (p = strchr(start, '/'))) {
+               *p++ = '\0';
+               if (0 == strncmp(start, "man", 3)) {
+                       mlink->dform = FORM_SRC;
+                       mlink->dsec = mandoc_strdup(start + 3);
+               } else if (0 == strncmp(start, "cat", 3)) {
+                       mlink->dform = FORM_CAT;
+                       mlink->dsec = mandoc_strdup(start + 3);
+               }
 
-       if (argc > 0) {
-               dirs.paths = mandoc_calloc(argc, sizeof(char *));
-               dirs.sz = argc;
-               for (i = 0; i < argc; i++) {
-                       if (NULL == (cp = realpath(argv[i], pbuf))) {
-                               perror(argv[i]);
-                               goto out;
-                       }
-                       dirs.paths[i] = mandoc_strdup(cp);
+               start = p;
+               if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
+                       *p++ = '\0';
+                       mlink->arch = mandoc_strdup(start);
+                       start = p;
                }
-       } else
-               manpath_parse(&dirs, dir, NULL, NULL);
+       }
 
-       for (ipath = 0; ipath < dirs.sz; ipath++) {
+       /*
+        * Now check the file suffix.
+        * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
+        */
+       p = strrchr(start, '\0');
+       while (p-- > start && '/' != *p && '.' != *p)
+               /* Loop. */ ;
 
-               /*
-                * Go to the root of the respective manual tree.
-                * This must work or no manuals may be found:
-                * They are indexed relative to the root.
-                */
+       if ('.' == *p) {
+               *p++ = '\0';
+               mlink->fsec = mandoc_strdup(p);
+       }
 
-               if (-1 == chdir(dirs.paths[ipath])) {
-                       perror(dirs.paths[ipath]);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               }
+       /*
+        * Now try to parse the name.
+        * Use the filename portion of the path.
+        */
+       mlink->name = start;
+       if (NULL != (p = strrchr(start, '/'))) {
+               mlink->name = p + 1;
+               *p = '\0';
+       }
+       mlink->name = mandoc_strdup(mlink->name);
 
-               /* Create a new database in two temporary files. */
+       mlink_add(mlink, &st);
+}
 
-               flags = O_CREAT | O_EXCL | O_RDWR;
-               while (NULL == mdb.db) {
-                       strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX);
-                       strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX);
-                       if (NULL == mktemp(mdb.dbn)) {
-                               perror(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-                       mdb.db = dbopen(mdb.dbn, flags, 0644,
-                                       DB_BTREE, &info);
-                       if (NULL == mdb.db && EEXIST != errno) {
-                               perror(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-               }
-               while (NULL == mdb.idx) {
-                       strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX);
-                       strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX);
-                       if (NULL == mktemp(mdb.idxn)) {
-                               perror(mdb.idxn);
-                               unlink(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-                       mdb.idx = dbopen(mdb.idxn, flags, 0644,
-                                       DB_RECNO, NULL);
-                       if (NULL == mdb.idx && EEXIST != errno) {
-                               perror(mdb.idxn);
-                               unlink(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-               }
+static void
+mlink_add(struct mlink *mlink, const struct stat *st)
+{
+       struct inodev    inodev;
+       struct mpage    *mpage;
+       unsigned int     slot;
+
+       assert(NULL != mlink->file);
+
+       if (NULL == mlink->dsec)
+               mlink->dsec = mandoc_strdup("");
+       if (NULL == mlink->arch)
+               mlink->arch = mandoc_strdup("");
+       if (NULL == mlink->name)
+               mlink->name = mandoc_strdup("");
+       if (NULL == mlink->fsec)
+               mlink->fsec = mandoc_strdup("");
+
+       if ('0' == *mlink->fsec) {
+               free(mlink->fsec);
+               mlink->fsec = mandoc_strdup(mlink->dsec);
+               mlink->fform = FORM_CAT;
+       } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
+               mlink->fform = FORM_SRC;
+       else
+               mlink->fform = FORM_NONE;
+
+       slot = ohash_qlookup(&mlinks, mlink->file);
+       assert(NULL == ohash_find(&mlinks, slot));
+       ohash_insert(&mlinks, slot, mlink);
+
+       inodev.st_ino = st->st_ino;
+       inodev.st_dev = st->st_dev;
+       slot = ohash_lookup_memory(&mpages, (char *)&inodev,
+           sizeof(struct inodev), inodev.st_ino);
+       mpage = ohash_find(&mpages, slot);
+       if (NULL == mpage) {
+               mpage = mandoc_calloc(1, sizeof(struct mpage));
+               mpage->inodev.st_ino = inodev.st_ino;
+               mpage->inodev.st_dev = inodev.st_dev;
+               ohash_insert(&mpages, slot, mpage);
+       } else
+               mlink->next = mpage->mlinks;
+       mpage->mlinks = mlink;
+}
 
-               /*
-                * Search for manuals and fill the new database.
-                */
+static void
+mlink_free(struct mlink *mlink)
+{
 
-               ofile_dirbuild(".", "", "", 0, &of);
+       free(mlink->dsec);
+       free(mlink->arch);
+       free(mlink->name);
+       free(mlink->fsec);
+       free(mlink);
+}
 
-               if (NULL != of) {
-                       index_merge(of, mp, &dbuf, &buf, hash,
-                            &mdb, &recs);
-                       ofile_free(of);
-                       of = NULL;
+static void
+mpages_free(void)
+{
+       struct mpage    *mpage;
+       struct mlink    *mlink;
+       unsigned int     slot;
+
+       mpage = ohash_first(&mpages, &slot);
+       while (NULL != mpage) {
+               while (NULL != (mlink = mpage->mlinks)) {
+                       mpage->mlinks = mlink->next;
+                       mlink_free(mlink);
                }
+               free(mpage->sec);
+               free(mpage->arch);
+               free(mpage->title);
+               free(mpage->desc);
+               free(mpage);
+               mpage = ohash_next(&mpages, &slot);
+       }
+}
 
-               (*mdb.db->close)(mdb.db);
-               (*mdb.idx->close)(mdb.idx);
-               mdb.db = NULL;
-               mdb.idx = NULL;
-
-               /*
-                * Replace the old database with the new one.
-                * This is not perfectly atomic,
-                * but i cannot think of a better way.
-                */
-
-               if (-1 == rename(mdb.dbn, MANDOC_DB)) {
-                       perror(MANDOC_DB);
-                       unlink(mdb.dbn);
-                       unlink(mdb.idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
+/*
+ * For each mlink to the mpage, check whether the path looks like
+ * it is formatted, and if it does, check whether a source manual
+ * exists by the same name, ignoring the suffix.
+ * If both conditions hold, drop the mlink.
+ */
+static void
+mlinks_undupe(struct mpage *mpage)
+{
+       char              buf[PATH_MAX];
+       struct mlink    **prev;
+       struct mlink     *mlink;
+       char             *bufp;
+
+       mpage->form = FORM_CAT;
+       prev = &mpage->mlinks;
+       while (NULL != (mlink = *prev)) {
+               if (FORM_CAT != mlink->dform) {
+                       mpage->form = FORM_NONE;
+                       goto nextlink;
                }
-               if (-1 == rename(mdb.idxn, MANDOC_IDX)) {
-                       perror(MANDOC_IDX);
-                       unlink(MANDOC_DB);
-                       unlink(MANDOC_IDX);
-                       unlink(mdb.idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
+               if (strlcpy(buf, mlink->file, PATH_MAX) >= PATH_MAX) {
+                       if (warnings)
+                               say(mlink->file, "Filename too long");
+                       goto nextlink;
                }
+               bufp = strstr(buf, "cat");
+               assert(NULL != bufp);
+               memcpy(bufp, "man", 3);
+               if (NULL != (bufp = strrchr(buf, '.')))
+                       *++bufp = '\0';
+               strlcat(buf, mlink->dsec, PATH_MAX);
+               if (NULL == ohash_find(&mlinks,
+                               ohash_qlookup(&mlinks, buf)))
+                       goto nextlink;
+               if (warnings)
+                       say(mlink->file, "Man source exists: %s", buf);
+               if (use_all)
+                       goto nextlink;
+               *prev = mlink->next;
+               mlink_free(mlink);
+               continue;
+nextlink:
+               prev = &(*prev)->next;
        }
-
-out:
-       if (mdb.db)
-               (*mdb.db->close)(mdb.db);
-       if (mdb.idx)
-               (*mdb.idx->close)(mdb.idx);
-       if (hash)
-               (*hash->close)(hash);
-       if (mp)
-               mparse_free(mp);
-
-       manpath_free(&dirs);
-       ofile_free(of);
-       free(buf.cp);
-       free(dbuf.cp);
-       free(recs.stack);
-
-       return(MANDOCLEVEL_OK);
-
-usage:
-       fprintf(stderr,
-               "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
-               "                        -d dir [file ...] | "
-               "-u dir [file ...]\n",
-               progname);
-
-       return((int)MANDOCLEVEL_BADARG);
 }
 
-void
-index_merge(const struct of *of, struct mparse *mp,
-               struct buf *dbuf, struct buf *buf, DB *hash,
-               struct mdb *mdb, struct recs *recs)
+/*
+ * Run through the files in the global vector "mpages"
+ * and add them to the database specified in "basedir".
+ *
+ * This handles the parsing scheme itself, using the cues of directory
+ * and filename to determine whether the file is parsable or not.
+ */
+static void
+mpages_merge(struct mchars *mc, struct mparse *mp, int check_reachable)
 {
-       recno_t          rec;
-       int              ch, skip;
-       DBT              key, val;
-       DB              *files;  /* temporary file name table */
-       struct mdoc     *mdoc;
-       struct man      *man;
-       const char      *fn, *msec, *march, *mtitle;
-       char            *p;
-       uint64_t         mask;
-       size_t           sv;
-       unsigned         seq;
-       uint64_t         vbuf[2];
-       char             type;
-
-       static char      emptystring[] = "";
-
-       if (warnings) {
-               files = NULL;
-               hash_reset(&files);
+       struct ohash             title_table;
+       struct ohash_info        title_info, str_info;
+       struct mpage            *mpage;
+       struct mdoc             *mdoc;
+       struct man              *man;
+       struct title            *title_entry;
+       char                    *title_str;
+       const char              *cp;
+       int                      match;
+       unsigned int             pslot, tslot;
+       enum mandoclevel         lvl;
+
+       str_info.alloc = hash_alloc;
+       str_info.halloc = hash_halloc;
+       str_info.hfree = hash_free;
+       str_info.key_offset = offsetof(struct str, key);
+
+       if (check_reachable) {
+               title_info.alloc = hash_alloc;
+               title_info.halloc = hash_halloc;
+               title_info.hfree = hash_free;
+               title_info.key_offset = offsetof(struct title, title);
+               ohash_init(&title_table, 6, &title_info);
        }
 
-       rec = 0;
-       for (of = of->first; of; of = of->next) {
-               fn = of->fname;
+       mpage = ohash_first(&mpages, &pslot);
+       while (NULL != mpage) {
+               mlinks_undupe(mpage);
+               if (NULL == mpage->mlinks) {
+                       mpage = ohash_next(&mpages, &pslot);
+                       continue;
+               }
+
+               ohash_init(&strings, 6, &str_info);
+               mparse_reset(mp);
+               mdoc = NULL;
+               man = NULL;
+               match = 1;
 
                /*
                 * Try interpreting the file as mdoc(7) or man(7)
                 * source code, unless it is already known to be
                 * formatted.  Fall back to formatted mode.
                 */
-
-               mparse_reset(mp);
-               mdoc = NULL;
-               man = NULL;
-
-               if ((MANDOC_SRC & of->src_form ||
-                   ! (MANDOC_FORM & of->src_form)) &&
-                   MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
-                       mparse_result(mp, &mdoc, &man);
+               if (FORM_CAT != mpage->mlinks->dform ||
+                   FORM_CAT != mpage->mlinks->fform) {
+                       lvl = mparse_readfd(mp, -1, mpage->mlinks->file);
+                       if (lvl < MANDOCLEVEL_FATAL)
+                               mparse_result(mp, &mdoc, &man);
+               }
 
                if (NULL != mdoc) {
-                       msec = mdoc_meta(mdoc)->msec;
-                       march = mdoc_meta(mdoc)->arch;
-                       if (NULL == march)
-                               march = "";
-                       mtitle = mdoc_meta(mdoc)->title;
+                       mpage->form = FORM_SRC;
+                       mpage->sec =
+                           mandoc_strdup(mdoc_meta(mdoc)->msec);
+                       mpage->arch = mdoc_meta(mdoc)->arch;
+                       mpage->arch = mandoc_strdup(
+                           NULL == mpage->arch ? "" : mpage->arch);
+                       mpage->title =
+                           mandoc_strdup(mdoc_meta(mdoc)->title);
                } else if (NULL != man) {
-                       msec = man_meta(man)->msec;
-                       march = "";
-                       mtitle = man_meta(man)->title;
+                       mpage->form = FORM_SRC;
+                       mpage->sec =
+                           mandoc_strdup(man_meta(man)->msec);
+                       mpage->arch =
+                           mandoc_strdup(mpage->mlinks->arch);
+                       mpage->title =
+                           mandoc_strdup(man_meta(man)->title);
                } else {
-                       msec = of->sec;
-                       march = of->arch;
-                       mtitle = of->title;
+                       mpage->form = FORM_CAT;
+                       mpage->sec =
+                           mandoc_strdup(mpage->mlinks->dsec);
+                       mpage->arch =
+                           mandoc_strdup(mpage->mlinks->arch);
+                       mpage->title =
+                           mandoc_strdup(mpage->mlinks->name);
                }
 
                /*
@@ -653,16 +976,13 @@ index_merge(const struct of *of, struct mparse *mp,
                 * section, like encrypt(1) = makekey(8).  Do not skip
                 * manuals for such reasons.
                 */
-
-               skip = 0;
-               assert(of->sec);
-               assert(msec);
-               if (warnings)
-                       if (strcasecmp(msec, of->sec))
-                               fprintf(stderr, "%s: "
-                                       "section \"%s\" manual "
-                                       "in \"%s\" directory\n",
-                                       fn, msec, of->sec);
+               if (warnings && !use_all && FORM_SRC == mpage->form &&
+                   strcasecmp(mpage->sec, mpage->mlinks->dsec)) {
+                       match = 0;
+                       say(mpage->mlinks->file, "Section \"%s\" "
+                               "manual in %s directory",
+                               mpage->sec, mpage->mlinks->dsec);
+               }
 
                /*
                 * Manual page directories exist for each kernel
@@ -678,26 +998,30 @@ index_merge(const struct of *of, struct mparse *mp,
                 * Thus, warn about architecture mismatches,
                 * but don't skip manuals for this reason.
                 */
+               if (warnings && !use_all &&
+                   strcasecmp(mpage->arch, mpage->mlinks->arch)) {
+                       match = 0;
+                       say(mpage->mlinks->file, "Architecture \"%s\" "
+                               "manual in \"%s\" directory",
+                               mpage->arch, mpage->mlinks->arch);
+               }
+               if (warnings && !use_all &&
+                   strcasecmp(mpage->title, mpage->mlinks->name))
+                       match = 0;
 
-               assert(of->arch);
-               assert(march);
-               if (warnings)
-                       if (strcasecmp(march, of->arch))
-                               fprintf(stderr, "%s: "
-                                       "architecture \"%s\" manual "
-                                       "in \"%s\" directory\n",
-                                       fn, march, of->arch);
-
-               /*
-                * By default, skip a file if the title given
-                * in the file disagrees with the file name.
-                * Do not warn, this happens for all MLINKs.
-                */
+               putkey(mpage, mpage->mlinks->name, TYPE_Nm);
 
-               assert(of->title);
-               assert(mtitle);
-               if (strcasecmp(mtitle, of->title))
-                       skip = 1;
+               if (NULL != mdoc) {
+                       if (NULL != (cp = mdoc_meta(mdoc)->name))
+                               putkey(mpage, cp, TYPE_Nm);
+                       assert(NULL == mpage->desc);
+                       parse_mdoc(mpage, mdoc_node(mdoc));
+                       putkey(mpage, NULL != mpage->desc ?
+                           mpage->desc : mpage->mlinks->name, TYPE_Nd);
+               } else if (NULL != man)
+                       parse_man(mpage, man_node(man));
+               else
+                       parse_cat(mpage);
 
                /*
                 * Build a title string for the file.  If it matches
@@ -705,390 +1029,386 @@ index_merge(const struct of *of, struct mparse *mp,
                 * found; else, remember it as missing.
                 */
 
-               if (warnings) {
-                       buf->len = 0;
-                       buf_appendb(buf, mtitle, strlen(mtitle));
-                       buf_appendb(buf, "(", 1);
-                       buf_appendb(buf, msec, strlen(msec));
-                       if ('\0' != *march) {
-                               buf_appendb(buf, "/", 1);
-                               buf_appendb(buf, march, strlen(march));
-                       }
-                       buf_appendb(buf, ")", 2);
-                       for (p = buf->cp; '\0' != *p; p++)
-                               *p = tolower((unsigned char)*p);
-                       key.data = buf->cp;
-                       key.size = buf->len;
-                       val.data = NULL;
-                       val.size = 0;
-                       if (0 == skip)
-                               val.data = emptystring;
-                       else {
-                               ch = (*files->get)(files, &key, &val, 0);
-                               if (ch < 0) {
-                                       perror("hash");
-                                       exit((int)MANDOCLEVEL_SYSERR);
-                               } else if (ch > 0) {
-                                       val.data = (void *)fn;
-                                       val.size = strlen(fn) + 1;
-                               } else
-                                       val.data = NULL;
-                       }
-                       if (NULL != val.data &&
-                           (*files->put)(files, &key, &val, 0) < 0) {
-                               perror("hash");
+               if (check_reachable) {
+                       if (-1 == asprintf(&title_str, "%s(%s%s%s)",
+                           mpage->title, mpage->sec,
+                           '\0' == *mpage->arch ? "" : "/",
+                           mpage->arch)) {
+                               perror(NULL);
                                exit((int)MANDOCLEVEL_SYSERR);
                        }
+                       tslot = ohash_qlookup(&title_table, title_str);
+                       title_entry = ohash_find(&title_table, tslot);
+                       if (NULL == title_entry) {
+                               title_entry = mandoc_malloc(
+                                               sizeof(struct title));
+                               title_entry->title = title_str;
+                               title_entry->file = mandoc_strdup(
+                                   match ? "" : mpage->mlinks->file);
+                               ohash_insert(&title_table, tslot,
+                                               title_entry);
+                       } else {
+                               if (match)
+                                       *title_entry->file = '\0';
+                               free(title_str);
+                       }
                }
 
-               if (skip && !use_all)
-                       continue;
+               dbindex(mpage, mc);
+               ohash_delete(&strings);
+               mpage = ohash_next(&mpages, &pslot);
+       }
 
-               /*
-                * The index record value consists of a nil-terminated
-                * filename, a nil-terminated manual section, and a
-                * nil-terminated description.  Use the actual
-                * location of the file, such that the user can find
-                * it with man(1).  Since the description may not be
-                * set, we set a sentinel to see if we're going to
-                * write a nil byte in its place.
-                */
+       if (check_reachable) {
+               title_entry = ohash_first(&title_table, &tslot);
+               while (NULL != title_entry) {
+                       if ('\0' != *title_entry->file)
+                               say(title_entry->file,
+                                   "Probably unreachable, title is %s",
+                                   title_entry->title);
+                       free(title_entry->title);
+                       free(title_entry->file);
+                       free(title_entry);
+                       title_entry = ohash_next(&title_table, &tslot);
+               }
+               ohash_delete(&title_table);
+       }
+}
 
-               dbuf->len = 0;
-               type = mdoc ? 'd' : (man ? 'a' : 'c');
-               buf_appendb(dbuf, &type, 1);
-               buf_appendb(dbuf, fn, strlen(fn) + 1);
-               buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
-               buf_appendb(dbuf, of->title, strlen(of->title) + 1);
-               buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
+static void
+parse_cat(struct mpage *mpage)
+{
+       FILE            *stream;
+       char            *line, *p, *title;
+       size_t           len, plen, titlesz;
 
-               sv = dbuf->len;
+       if (NULL == (stream = fopen(mpage->mlinks->file, "r"))) {
+               if (warnings)
+                       say(mpage->mlinks->file, NULL);
+               return;
+       }
 
-               /*
-                * Collect keyword/mask pairs.
-                * Each pair will become a new btree node.
-                */
+       /* Skip to first blank line. */
 
-               hash_reset(&hash);
-               if (mdoc)
-                       pmdoc_node(hash, buf, dbuf,
-                               mdoc_node(mdoc), mdoc_meta(mdoc));
-               else if (man)
-                       pman_node(hash, buf, dbuf, man_node(man));
-               else
-                       pformatted(hash, buf, dbuf, of);
+       while (NULL != (line = fgetln(stream, &len)))
+               if ('\n' == *line)
+                       break;
 
-               /* Test mode, do not access any database. */
+       /*
+        * Assume the first line that is not indented
+        * is the first section header.  Skip to it.
+        */
 
-               if (NULL == mdb->db || NULL == mdb->idx)
-                       continue;
+       while (NULL != (line = fgetln(stream, &len)))
+               if ('\n' != *line && ' ' != *line)
+                       break;
+       
+       /*
+        * Read up until the next section into a buffer.
+        * Strip the leading and trailing newline from each read line,
+        * appending a trailing space.
+        * Ignore empty (whitespace-only) lines.
+        */
 
-               /*
-                * Make sure the file name is always registered
-                * as an .Nm search key.
-                */
-               buf->len = 0;
-               buf_append(buf, of->title);
-               hash_put(hash, buf, TYPE_Nm);
-
-               /*
-                * Reclaim an empty index record, if available.
-                * Use its record number for all new btree nodes.
-                */
-
-               if (recs->cur > 0) {
-                       recs->cur--;
-                       rec = recs->stack[(int)recs->cur];
-               } else if (recs->last > 0) {
-                       rec = recs->last;
-                       recs->last = 0;
-               } else
-                       rec++;
-               vbuf[1] = htobe64(rec);
-
-               /*
-                * Copy from the in-memory hashtable of pending
-                * keyword/mask pairs into the database.
-                */
+       titlesz = 0;
+       title = NULL;
 
-               seq = R_FIRST;
-               while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
-                       seq = R_NEXT;
-                       assert(sizeof(uint64_t) == val.size);
-                       memcpy(&mask, val.data, val.size);
-                       vbuf[0] = htobe64(mask);
-                       val.size = sizeof(vbuf);
-                       val.data = &vbuf;
-                       dbt_put(mdb->db, mdb->dbn, &key, &val);
-               }
-               if (ch < 0) {
-                       perror("hash");
-                       unlink(mdb->dbn);
-                       unlink(mdb->idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
+       while (NULL != (line = fgetln(stream, &len))) {
+               if (' ' != *line || '\n' != line[len - 1])
+                       break;
+               while (len > 0 && isspace((unsigned char)*line)) {
+                       line++;
+                       len--;
                }
+               if (1 == len)
+                       continue;
+               title = mandoc_realloc(title, titlesz + len);
+               memcpy(title + titlesz, line, len);
+               titlesz += len;
+               title[titlesz - 1] = ' ';
+       }
 
-               /*
-                * Apply to the index.  If we haven't had a description
-                * set, put an empty one in now.
-                */
-
-               if (dbuf->len == sv)
-                       buf_appendb(dbuf, "", 1);
+       /*
+        * If no page content can be found, or the input line
+        * is already the next section header, or there is no
+        * trailing newline, reuse the page title as the page
+        * description.
+        */
 
-               key.data = &rec;
-               key.size = sizeof(recno_t);
+       if (NULL == title || '\0' == *title) {
+               if (warnings)
+                       say(mpage->mlinks->file,
+                           "Cannot find NAME section");
+               assert(NULL == mpage->desc);
+               mpage->desc = mandoc_strdup(mpage->mlinks->name);
+               putkey(mpage, mpage->mlinks->name, TYPE_Nd);
+               fclose(stream);
+               free(title);
+               return;
+       }
 
-               val.data = dbuf->cp;
-               val.size = dbuf->len;
+       title = mandoc_realloc(title, titlesz + 1);
+       title[titlesz] = '\0';
 
-               if (verb)
-                       printf("%s: adding to index\n", fn);
+       /*
+        * Skip to the first dash.
+        * Use the remaining line as the description (no more than 70
+        * bytes).
+        */
 
-               dbt_put(mdb->idx, mdb->idxn, &key, &val);
+       if (NULL != (p = strstr(title, "- "))) {
+               for (p += 2; ' ' == *p || '\b' == *p; p++)
+                       /* Skip to next word. */ ;
+       } else {
+               if (warnings)
+                       say(mpage->mlinks->file,
+                           "No dash in title line");
+               p = title;
        }
 
-       /*
-        * Iterate the remembered file titles and check that
-        * all files can be found by their main title.
-        */
+       plen = strlen(p);
 
-       if (warnings) {
-               seq = R_FIRST;
-               while (0 == (*files->seq)(files, &key, &val, seq)) {
-                       seq = R_NEXT;
-                       if (val.size)
-                               fprintf(stderr, "%s: probably "
-                                   "unreachable, title is %s\n",
-                                   (char *)val.data, (char *)key.data);
-               }
-               (*files->close)(files);
+       /* Strip backspace-encoding from line. */
+
+       while (NULL != (line = memchr(p, '\b', plen))) {
+               len = line - p;
+               if (0 == len) {
+                       memmove(line, line + 1, plen--);
+                       continue;
+               } 
+               memmove(line - 1, line + 1, plen - len);
+               plen -= 2;
        }
+
+       assert(NULL == mpage->desc);
+       mpage->desc = mandoc_strdup(p);
+       putkey(mpage, mpage->desc, TYPE_Nd);
+       fclose(stream);
+       free(title);
 }
 
 /*
- * Scan through all entries in the index file `idx' and prune those
- * entries in `ofile'.
- * Pruning consists of removing from `db', then invalidating the entry
- * in `idx' (zeroing its value size).
+ * Put a type/word pair into the word database for this particular file.
  */
 static void
-index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
+putkey(const struct mpage *mpage, const char *value, uint64_t type)
 {
-       const struct of *of;
-       const char      *fn;
-       uint64_t         vbuf[2];
-       unsigned         seq, sseq;
-       DBT              key, val;
-       int              ch;
-
-       recs->cur = 0;
-       seq = R_FIRST;
-       while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
-               seq = R_NEXT;
-               assert(sizeof(recno_t) == key.size);
-               memcpy(&recs->last, key.data, key.size);
 
-               /* Deleted records are zero-sized.  Skip them. */
+       assert(NULL != value);
+       putkeys(mpage, value, strlen(value), type);
+}
 
-               if (0 == val.size)
-                       goto cont;
+/*
+ * Grok all nodes at or below a certain mdoc node into putkey().
+ */
+static void
+putmdockey(const struct mpage *mpage,
+       const struct mdoc_node *n, uint64_t m)
+{
 
-               /*
-                * Make sure we're sane.
-                * Read past our mdoc/man/cat type to the next string,
-                * then make sure it's bounded by a NUL.
-                * Failing any of these, we go into our error handler.
-                */
+       for ( ; NULL != n; n = n->next) {
+               if (NULL != n->child)
+                       putmdockey(mpage, n->child, m);
+               if (MDOC_TEXT == n->type)
+                       putkey(mpage, n->string, m);
+       }
+}
 
-               fn = (char *)val.data + 1;
-               if (NULL == memchr(fn, '\0', val.size - 1))
-                       break;
+static void
+parse_man(struct mpage *mpage, const struct man_node *n)
+{
+       const struct man_node *head, *body;
+       char            *start, *sv, *title;
+       char             byte;
+       size_t           sz, titlesz;
 
-               /*
-                * Search for the file in those we care about.
-                * XXX: build this into a tree.  Too slow.
-                */
+       if (NULL == n)
+               return;
 
-               for (of = ofile->first; of; of = of->next)
-                       if (0 == strcmp(fn, of->fname))
-                               break;
+       /*
+        * We're only searching for one thing: the first text child in
+        * the BODY of a NAME section.  Since we don't keep track of
+        * sections in -man, run some hoops to find out whether we're in
+        * the correct section or not.
+        */
 
-               if (NULL == of)
-                       continue;
+       if (MAN_BODY == n->type && MAN_SH == n->tok) {
+               body = n;
+               assert(body->parent);
+               if (NULL != (head = body->parent->head) &&
+                               1 == head->nchild &&
+                               NULL != (head = (head->child)) &&
+                               MAN_TEXT == head->type &&
+                               0 == strcmp(head->string, "NAME") &&
+                               NULL != (body = body->child) &&
+                               MAN_TEXT == body->type) {
 
-               /*
-                * Search through the keyword database, throwing out all
-                * references to our file.
-                */
+                       title = NULL;
+                       titlesz = 0;
 
-               sseq = R_FIRST;
-               while (0 == (ch = (*mdb->db->seq)(mdb->db,
-                                       &key, &val, sseq))) {
-                       sseq = R_NEXT;
-                       if (sizeof(vbuf) != val.size)
-                               break;
+                       /*
+                        * Suck the entire NAME section into memory.
+                        * Yes, we might run away.
+                        * But too many manuals have big, spread-out
+                        * NAME sections over many lines.
+                        */
 
-                       memcpy(vbuf, val.data, val.size);
-                       if (recs->last != betoh64(vbuf[1]))
-                               continue;
+                       for ( ; NULL != body; body = body->next) {
+                               if (MAN_TEXT != body->type)
+                                       break;
+                               if (0 == (sz = strlen(body->string)))
+                                       continue;
+                               title = mandoc_realloc
+                                       (title, titlesz + sz + 1);
+                               memcpy(title + titlesz, body->string, sz);
+                               titlesz += sz + 1;
+                               title[titlesz - 1] = ' ';
+                       }
+                       if (NULL == title)
+                               return;
 
-                       if ((ch = (*mdb->db->del)(mdb->db,
-                                       &key, R_CURSOR)) < 0)
-                               break;
-               }
+                       title = mandoc_realloc(title, titlesz + 1);
+                       title[titlesz] = '\0';
 
-               if (ch < 0) {
-                       perror(mdb->dbn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               } else if (1 != ch) {
-                       fprintf(stderr, "%s: corrupt database\n",
-                                       mdb->dbn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               }
+                       /* Skip leading space.  */
 
-               if (verb)
-                       printf("%s: deleting from index\n", fn);
+                       sv = title;
+                       while (isspace((unsigned char)*sv))
+                               sv++;
 
-               val.size = 0;
-               ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
+                       if (0 == (sz = strlen(sv))) {
+                               free(title);
+                               return;
+                       }
 
-               if (ch < 0)
-                       break;
-cont:
-               if (recs->cur >= recs->size) {
-                       recs->size += MANDOC_SLOP;
-                       recs->stack = mandoc_realloc(recs->stack,
-                                       recs->size * sizeof(recno_t));
-               }
+                       /* Erase trailing space. */
 
-               recs->stack[(int)recs->cur] = recs->last;
-               recs->cur++;
-       }
+                       start = &sv[sz - 1];
+                       while (start > sv && isspace((unsigned char)*start))
+                               *start-- = '\0';
 
-       if (ch < 0) {
-               perror(mdb->idxn);
-               exit((int)MANDOCLEVEL_SYSERR);
-       } else if (1 != ch) {
-               fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
-               exit((int)MANDOCLEVEL_SYSERR);
-       }
+                       if (start == sv) {
+                               free(title);
+                               return;
+                       }
 
-       recs->last++;
-}
+                       start = sv;
 
-/*
- * Grow the buffer (if necessary) and copy in a binary string.
- */
-static void
-buf_appendb(struct buf *buf, const void *cp, size_t sz)
-{
+                       /* 
+                        * Go through a special heuristic dance here.
+                        * Conventionally, one or more manual names are
+                        * comma-specified prior to a whitespace, then a
+                        * dash, then a description.  Try to puzzle out
+                        * the name parts here.
+                        */
 
-       /* Overshoot by MANDOC_BUFSZ. */
+                       for ( ;; ) {
+                               sz = strcspn(start, " ,");
+                               if ('\0' == start[sz])
+                                       break;
 
-       while (buf->len + sz >= buf->size) {
-               buf->size = buf->len + sz + MANDOC_BUFSZ;
-               buf->cp = mandoc_realloc(buf->cp, buf->size);
-       }
+                               byte = start[sz];
+                               start[sz] = '\0';
 
-       memcpy(buf->cp + (int)buf->len, cp, sz);
-       buf->len += sz;
-}
+                               putkey(mpage, start, TYPE_Nm);
 
-/*
- * Append a nil-terminated string to the buffer.  
- * This can be invoked multiple times.  
- * The buffer string will be nil-terminated.
- * If invoked multiple times, a space is put between strings.
- */
-static void
-buf_append(struct buf *buf, const char *cp)
-{
-       size_t           sz;
+                               if (' ' == byte) {
+                                       start += sz + 1;
+                                       break;
+                               }
 
-       if (0 == (sz = strlen(cp)))
-               return;
+                               assert(',' == byte);
+                               start += sz + 1;
+                               while (' ' == *start)
+                                       start++;
+                       }
 
-       if (buf->len)
-               buf->cp[(int)buf->len - 1] = ' ';
+                       if (sv == start) {
+                               putkey(mpage, start, TYPE_Nm);
+                               free(title);
+                               return;
+                       }
 
-       buf_appendb(buf, cp, sz + 1);
-}
+                       while (isspace((unsigned char)*start))
+                               start++;
 
-/*
- * Recursively add all text from a given node.  
- * This is optimised for general mdoc nodes in this context, which do
- * not consist of subexpressions and having a recursive call for n->next
- * would be wasteful.
- * The "f" variable should be 0 unless called from pmdoc_Nd for the
- * description buffer, which does not start at the beginning of the
- * buffer.
- */
-static void
-buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
-{
+                       if (0 == strncmp(start, "-", 1))
+                               start += 1;
+                       else if (0 == strncmp(start, "\\-\\-", 4))
+                               start += 4;
+                       else if (0 == strncmp(start, "\\-", 2))
+                               start += 2;
+                       else if (0 == strncmp(start, "\\(en", 4))
+                               start += 4;
+                       else if (0 == strncmp(start, "\\(em", 4))
+                               start += 4;
 
-       for ( ; n; n = n->next) {
-               if (n->child)
-                       buf_appendmdoc(buf, n->child, f);
+                       while (' ' == *start)
+                               start++;
 
-               if (MDOC_TEXT == n->type && f) {
-                       f = 0;
-                       buf_appendb(buf, n->string, 
-                                       strlen(n->string) + 1);
-               } else if (MDOC_TEXT == n->type)
-                       buf_append(buf, n->string);
+                       assert(NULL == mpage->desc);
+                       mpage->desc = mandoc_strdup(start);
+                       putkey(mpage, mpage->desc, TYPE_Nd);
+                       free(title);
+                       return;
+               }
+       }
 
+       for (n = n->child; n; n = n->next) {
+               if (NULL != mpage->desc)
+                       break;
+               parse_man(mpage, n);
        }
 }
 
 static void
-hash_reset(DB **db)
+parse_mdoc(struct mpage *mpage, const struct mdoc_node *n)
 {
-       DB              *hash;
 
-       if (NULL != (hash = *db))
-               (*hash->close)(hash);
-
-       *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
-       if (NULL == *db) {
-               perror("hash");
-               exit((int)MANDOCLEVEL_SYSERR);
+       assert(NULL != n);
+       for (n = n->child; NULL != n; n = n->next) {
+               switch (n->type) {
+               case (MDOC_ELEM):
+                       /* FALLTHROUGH */
+               case (MDOC_BLOCK):
+                       /* FALLTHROUGH */
+               case (MDOC_HEAD):
+                       /* FALLTHROUGH */
+               case (MDOC_BODY):
+                       /* FALLTHROUGH */
+               case (MDOC_TAIL):
+                       if (NULL != mdocs[n->tok].fp)
+                              if (0 == (*mdocs[n->tok].fp)(mpage, n))
+                                      break;
+                       if (mdocs[n->tok].mask)
+                               putmdockey(mpage, n->child,
+                                   mdocs[n->tok].mask);
+                       break;
+               default:
+                       assert(MDOC_ROOT != n->type);
+                       continue;
+               }
+               if (NULL != n->child)
+                       parse_mdoc(mpage, n);
        }
 }
 
-/* ARGSUSED */
-static int
-pmdoc_head(MDOC_ARGS)
-{
-
-       return(MDOC_HEAD == n->type);
-}
-
-/* ARGSUSED */
-static int
-pmdoc_body(MDOC_ARGS)
-{
-
-       return(MDOC_BODY == n->type);
-}
-
-/* ARGSUSED */
 static int
-pmdoc_Fd(MDOC_ARGS)
+parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n)
 {
        const char      *start, *end;
        size_t           sz;
 
-       if (SEC_SYNOPSIS != n->sec)
-               return(0);
-       if (NULL == (n = n->child) || MDOC_TEXT != n->type)
+       if (SEC_SYNOPSIS != n->sec ||
+                       NULL == (n = n->child) || 
+                       MDOC_TEXT != n->type)
                return(0);
 
        /*
         * Only consider those `Fd' macro fields that begin with an
         * "inclusion" token (versus, e.g., #define).
         */
+
        if (strcmp("#include", n->string))
                return(0);
 
@@ -1111,121 +1431,121 @@ pmdoc_Fd(MDOC_ARGS)
        if ('>' == *end || '"' == *end)
                end--;
 
-       assert(end >= start);
-
-       buf_appendb(buf, start, (size_t)(end - start + 1));
-       buf_appendb(buf, "", 1);
+       if (end > start)
+               putkeys(mpage, start, end - start + 1, TYPE_In);
        return(1);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_In(MDOC_ARGS)
+parse_mdoc_In(struct mpage *mpage, const struct mdoc_node *n)
 {
 
-       if (NULL == n->child || MDOC_TEXT != n->child->type)
+       if (NULL != n->child && MDOC_TEXT == n->child->type)
                return(0);
 
-       buf_append(buf, n->child->string);
+       putkey(mpage, n->child->string, TYPE_In);
        return(1);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_Fn(MDOC_ARGS)
+parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)
 {
-       struct mdoc_node *nn;
        const char      *cp;
 
-       nn = n->child;
-
-       if (NULL == nn || MDOC_TEXT != nn->type)
+       if (NULL == (n = n->child) || MDOC_TEXT != n->type)
                return(0);
 
-       /* .Fn "struct type *name" "char *arg" */
-
-       cp = strrchr(nn->string, ' ');
-       if (NULL == cp)
-               cp = nn->string;
+       /* 
+        * Parse: .Fn "struct type *name" "char *arg".
+        * First strip away pointer symbol. 
+        * Then store the function name, then type.
+        * Finally, store the arguments. 
+        */
 
-       /* Strip away pointer symbol. */
+       if (NULL == (cp = strrchr(n->string, ' ')))
+               cp = n->string;
 
        while ('*' == *cp)
                cp++;
 
-       /* Store the function name. */
-
-       buf_append(buf, cp);
-       hash_put(hash, buf, TYPE_Fn);
+       putkey(mpage, cp, TYPE_Fn);
 
-       /* Store the function type. */
+       if (n->string < cp)
+               putkeys(mpage, n->string, cp - n->string, TYPE_Ft);
 
-       if (nn->string < cp) {
-               buf->len = 0;
-               buf_appendb(buf, nn->string, cp - nn->string);
-               buf_appendb(buf, "", 1);
-               hash_put(hash, buf, TYPE_Ft);
-       }
-
-       /* Store the arguments. */
-
-       for (nn = nn->next; nn; nn = nn->next) {
-               if (MDOC_TEXT != nn->type)
-                       continue;
-               buf->len = 0;
-               buf_append(buf, nn->string);
-               hash_put(hash, buf, TYPE_Fa);
-       }
+       for (n = n->next; NULL != n; n = n->next)
+               if (MDOC_TEXT == n->type)
+                       putkey(mpage, n->string, TYPE_Fa);
 
        return(0);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_St(MDOC_ARGS)
+parse_mdoc_St(struct mpage *mpage, const struct mdoc_node *n)
 {
 
        if (NULL == n->child || MDOC_TEXT != n->child->type)
                return(0);
 
-       buf_append(buf, n->child->string);
+       putkey(mpage, n->child->string, TYPE_St);
        return(1);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_Xr(MDOC_ARGS)
+parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n)
 {
+       char    *cp;
 
        if (NULL == (n = n->child))
                return(0);
 
-       buf_appendb(buf, n->string, strlen(n->string));
-
-       if (NULL != (n = n->next)) {
-               buf_appendb(buf, ".", 1);
-               buf_appendb(buf, n->string, strlen(n->string) + 1);
-       } else
-               buf_appendb(buf, ".", 2);
+       if (NULL == n->next) {
+               putkey(mpage, n->string, TYPE_Xr);
+               return(0);
+       }
 
-       return(1);
+       if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) {
+               perror(NULL);
+               exit((int)MANDOCLEVEL_SYSERR);
+       }
+       putkey(mpage, cp, TYPE_Xr);
+       free(cp);
+       return(0);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_Nd(MDOC_ARGS)
+parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n)
 {
+       size_t           sz;
 
        if (MDOC_BODY != n->type)
                return(0);
 
-       buf_appendmdoc(dbuf, n->child, 1);
+       /*
+        * Special-case the `Nd' because we need to put the description
+        * into the document table.
+        */
+
+       for (n = n->child; NULL != n; n = n->next) {
+               if (MDOC_TEXT == n->type) {
+                       if (NULL != mpage->desc) {
+                               sz = strlen(mpage->desc) +
+                                    strlen(n->string) + 2;
+                               mpage->desc = mandoc_realloc(
+                                   mpage->desc, sz);
+                               strlcat(mpage->desc, " ", sz);
+                               strlcat(mpage->desc, n->string, sz);
+                       } else
+                               mpage->desc = mandoc_strdup(n->string);
+               }
+               if (NULL != n->child)
+                       parse_mdoc_Nd(mpage, n);
+       }
        return(1);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_Nm(MDOC_ARGS)
+parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n)
 {
 
        if (SEC_NAME == n->sec)
@@ -1233,758 +1553,558 @@ pmdoc_Nm(MDOC_ARGS)
        else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
                return(0);
 
-       if (NULL == n->child)
-               buf_append(buf, m->name);
-
        return(1);
 }
 
-/* ARGSUSED */
 static int
-pmdoc_Sh(MDOC_ARGS)
+parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n)
 {
 
        return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
 }
 
-static void
-hash_put(DB *db, const struct buf *buf, uint64_t mask)
+static int
+parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n)
 {
-       uint64_t         oldmask;
-       DBT              key, val;
-       int              rc;
-
-       if (buf->len < 2)
-               return;
-
-       key.data = buf->cp;
-       key.size = buf->len;
-
-       if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
-               perror("hash");
-               exit((int)MANDOCLEVEL_SYSERR);
-       } else if (0 == rc) {
-               assert(sizeof(uint64_t) == val.size);
-               memcpy(&oldmask, val.data, val.size);
-               mask |= oldmask;
-       }
-
-       val.data = &mask;
-       val.size = sizeof(uint64_t); 
 
-       if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
-               perror("hash");
-               exit((int)MANDOCLEVEL_SYSERR);
-       } 
+       return(MDOC_HEAD == n->type);
 }
 
-static void
-dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+static int
+parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n)
 {
 
-       assert(key->size);
-       assert(val->size);
-
-       if (0 == (*db->put)(db, key, val, 0))
-               return;
-       
-       perror(dbn);
-       exit((int)MANDOCLEVEL_SYSERR);
-       /* NOTREACHED */
+       return(MDOC_BODY == n->type);
 }
 
 /*
- * Call out to per-macro handlers after clearing the persistent database
- * key.  If the macro sets the database key, flush it to the database.
+ * Add a string to the hash table for the current manual.
+ * Each string has a bitmask telling which macros it belongs to.
+ * When we finish the manual, we'll dump the table.
  */
 static void
-pmdoc_node(MDOC_ARGS)
+putkeys(const struct mpage *mpage,
+       const char *cp, size_t sz, uint64_t v)
 {
+       struct str      *s;
+       unsigned int     slot;
+       const char      *end;
 
-       if (NULL == n)
+       if (0 == sz)
                return;
 
-       switch (n->type) {
-       case (MDOC_HEAD):
-               /* FALLTHROUGH */
-       case (MDOC_BODY):
-               /* FALLTHROUGH */
-       case (MDOC_TAIL):
-               /* FALLTHROUGH */
-       case (MDOC_BLOCK):
-               /* FALLTHROUGH */
-       case (MDOC_ELEM):
-               buf->len = 0;
-
-               /*
-                * Both NULL handlers and handlers returning true
-                * request using the data.  Only skip the element
-                * when the handler returns false.
-                */
-
-               if (NULL != mdocs[n->tok].fp &&
-                   0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
-                       break;
+       end = cp + sz;
+       slot = ohash_qlookupi(&strings, cp, &end);
+       s = ohash_find(&strings, slot);
 
-               /*
-                * For many macros, use the text from all children.
-                * Set zero flags for macros not needing this.
-                * In that case, the handler must fill the buffer.
-                */
-
-               if (MDOCF_CHILD & mdocs[n->tok].flags)
-                       buf_appendmdoc(buf, n->child, 0);
-
-               /*
-                * Cover the most common case:
-                * Automatically stage one string per element.
-                * Set a zero mask for macros not needing this.
-                * Additional staging can be done in the handler.
-                */
-
-               if (mdocs[n->tok].mask)
-                       hash_put(hash, buf, mdocs[n->tok].mask);
-               break;
-       default:
-               break;
+       if (NULL != s && mpage == s->mpage) {
+               s->mask |= v;
+               return;
+       } else if (NULL == s) {
+               s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);
+               memcpy(s->key, cp, sz);
+               ohash_insert(&strings, slot, s);
        }
-
-       pmdoc_node(hash, buf, dbuf, n->child, m);
-       pmdoc_node(hash, buf, dbuf, n->next, m);
+       s->mpage = mpage;
+       s->mask = v;
 }
 
-static int
-pman_node(MAN_ARGS)
+/*
+ * Take a Unicode codepoint and produce its UTF-8 encoding.
+ * This isn't the best way to do this, but it works.
+ * The magic numbers are from the UTF-8 packaging.
+ * They're not as scary as they seem: read the UTF-8 spec for details.
+ */
+static size_t
+utf8(unsigned int cp, char out[7])
 {
-       const struct man_node *head, *body;
-       char            *start, *sv, *title;
-       size_t           sz, titlesz;
-
-       if (NULL == n)
+       size_t           rc;
+
+       rc = 0;
+       if (cp <= 0x0000007F) {
+               rc = 1;
+               out[0] = (char)cp;
+       } else if (cp <= 0x000007FF) {
+               rc = 2;
+               out[0] = (cp >> 6  & 31) | 192;
+               out[1] = (cp       & 63) | 128;
+       } else if (cp <= 0x0000FFFF) {
+               rc = 3;
+               out[0] = (cp >> 12 & 15) | 224;
+               out[1] = (cp >> 6  & 63) | 128;
+               out[2] = (cp       & 63) | 128;
+       } else if (cp <= 0x001FFFFF) {
+               rc = 4;
+               out[0] = (cp >> 18 &  7) | 240;
+               out[1] = (cp >> 12 & 63) | 128;
+               out[2] = (cp >> 6  & 63) | 128;
+               out[3] = (cp       & 63) | 128;
+       } else if (cp <= 0x03FFFFFF) {
+               rc = 5;
+               out[0] = (cp >> 24 &  3) | 248;
+               out[1] = (cp >> 18 & 63) | 128;
+               out[2] = (cp >> 12 & 63) | 128;
+               out[3] = (cp >> 6  & 63) | 128;
+               out[4] = (cp       & 63) | 128;
+       } else if (cp <= 0x7FFFFFFF) {
+               rc = 6;
+               out[0] = (cp >> 30 &  1) | 252;
+               out[1] = (cp >> 24 & 63) | 128;
+               out[2] = (cp >> 18 & 63) | 128;
+               out[3] = (cp >> 12 & 63) | 128;
+               out[4] = (cp >> 6  & 63) | 128;
+               out[5] = (cp       & 63) | 128;
+       } else
                return(0);
 
-       /*
-        * We're only searching for one thing: the first text child in
-        * the BODY of a NAME section.  Since we don't keep track of
-        * sections in -man, run some hoops to find out whether we're in
-        * the correct section or not.
-        */
-
-       if (MAN_BODY == n->type && MAN_SH == n->tok) {
-               body = n;
-               assert(body->parent);
-               if (NULL != (head = body->parent->head) &&
-                               1 == head->nchild &&
-                               NULL != (head = (head->child)) &&
-                               MAN_TEXT == head->type &&
-                               0 == strcmp(head->string, "NAME") &&
-                               NULL != (body = body->child) &&
-                               MAN_TEXT == body->type) {
-
-                       title = NULL;
-                       titlesz = 0;
-                       /*
-                        * Suck the entire NAME section into memory.
-                        * Yes, we might run away.
-                        * But too many manuals have big, spread-out
-                        * NAME sections over many lines.
-                        */
-                       for ( ; NULL != body; body = body->next) {
-                               if (MAN_TEXT != body->type)
-                                       break;
-                               if (0 == (sz = strlen(body->string)))
-                                       continue;
-                               title = mandoc_realloc
-                                       (title, titlesz + sz + 1);
-                               memcpy(title + titlesz, body->string, sz);
-                               titlesz += sz + 1;
-                               title[(int)titlesz - 1] = ' ';
-                       }
-                       if (NULL == title)
-                               return(0);
-
-                       title = mandoc_realloc(title, titlesz + 1);
-                       title[(int)titlesz] = '\0';
-
-                       /* Skip leading space.  */
-
-                       sv = title;
-                       while (isspace((unsigned char)*sv))
-                               sv++;
+       out[rc] = '\0';
+       return(rc);
+}
 
-                       if (0 == (sz = strlen(sv))) {
-                               free(title);
-                               return(0);
-                       }
+/*
+ * Store the UTF-8 version of a key, or alias the pointer if the key has
+ * no UTF-8 transcription marks in it.
+ */
+static void
+utf8key(struct mchars *mc, struct str *key)
+{
+       size_t           sz, bsz, pos;
+       char             utfbuf[7], res[5];
+       char            *buf;
+       const char      *seq, *cpp, *val;
+       int              len, u;
+       enum mandoc_esc  esc;
 
-                       /* Erase trailing space. */
+       assert(NULL == key->utf8);
 
-                       start = &sv[sz - 1];
-                       while (start > sv && isspace((unsigned char)*start))
-                               *start-- = '\0';
+       res[0] = '\\';
+       res[1] = '\t';
+       res[2] = ASCII_NBRSP;
+       res[3] = ASCII_HYPH;
+       res[4] = '\0';
 
-                       if (start == sv) {
-                               free(title);
-                               return(0);
-                       }
+       val = key->key;
+       bsz = strlen(val);
 
-                       start = sv;
+       /*
+        * Pre-check: if we have no stop-characters, then set the
+        * pointer as ourselvse and get out of here.
+        */
+       if (strcspn(val, res) == bsz) {
+               key->utf8 = key->key;
+               return;
+       } 
 
-                       /* 
-                        * Go through a special heuristic dance here.
-                        * This is why -man manuals are great!
-                        * (I'm being sarcastic: my eyes are bleeding.)
-                        * Conventionally, one or more manual names are
-                        * comma-specified prior to a whitespace, then a
-                        * dash, then a description.  Try to puzzle out
-                        * the name parts here.
-                        */
+       /* Pre-allocate by the length of the input */
 
-                       for ( ;; ) {
-                               sz = strcspn(start, " ,");
-                               if ('\0' == start[(int)sz])
-                                       break;
+       buf = mandoc_malloc(++bsz);
+       pos = 0;
 
-                               buf->len = 0;
-                               buf_appendb(buf, start, sz);
-                               buf_appendb(buf, "", 1);
+       while ('\0' != *val) {
+               /*
+                * Halt on the first escape sequence.
+                * This also halts on the end of string, in which case
+                * we just copy, fallthrough, and exit the loop.
+                */
+               if ((sz = strcspn(val, res)) > 0) {
+                       memcpy(&buf[pos], val, sz);
+                       pos += sz;
+                       val += sz;
+               }
 
-                               hash_put(hash, buf, TYPE_Nm);
+               if (ASCII_HYPH == *val) {
+                       buf[pos++] = '-';
+                       val++;
+                       continue;
+               } else if ('\t' == *val || ASCII_NBRSP == *val) {
+                       buf[pos++] = ' ';
+                       val++;
+                       continue;
+               } else if ('\\' != *val)
+                       break;
 
-                               if (' ' == start[(int)sz]) {
-                                       start += (int)sz + 1;
-                                       break;
-                               }
+               /* Read past the slash. */
 
-                               assert(',' == start[(int)sz]);
-                               start += (int)sz + 1;
-                               while (' ' == *start)
-                                       start++;
-                       }
+               val++;
+               u = 0;
 
-                       buf->len = 0;
+               /*
+                * Parse the escape sequence and see if it's a
+                * predefined character or special character.
+                */
+               esc = mandoc_escape
+                       ((const char **)&val, &seq, &len);
+               if (ESCAPE_ERROR == esc)
+                       break;
 
-                       if (sv == start) {
-                               buf_append(buf, start);
-                               free(title);
-                               return(1);
-                       }
+               if (ESCAPE_SPECIAL != esc)
+                       continue;
+               if (0 == (u = mchars_spec2cp(mc, seq, len)))
+                       continue;
 
-                       while (isspace((unsigned char)*start))
-                               start++;
+               /*
+                * If we have a Unicode codepoint, try to convert that
+                * to a UTF-8 byte string.
+                */
+               cpp = utfbuf;
+               if (0 == (sz = utf8(u, utfbuf)))
+                       continue;
 
-                       if (0 == strncmp(start, "-", 1))
-                               start += 1;
-                       else if (0 == strncmp(start, "\\-\\-", 4))
-                               start += 4;
-                       else if (0 == strncmp(start, "\\-", 2))
-                               start += 2;
-                       else if (0 == strncmp(start, "\\(en", 4))
-                               start += 4;
-                       else if (0 == strncmp(start, "\\(em", 4))
-                               start += 4;
+               /* Copy the rendered glyph into the stream. */
 
-                       while (' ' == *start)
-                               start++;
+               sz = strlen(cpp);
+               bsz += sz;
 
-                       sz = strlen(start) + 1;
-                       buf_appendb(dbuf, start, sz);
-                       buf_appendb(buf, start, sz);
+               buf = mandoc_realloc(buf, bsz);
 
-                       hash_put(hash, buf, TYPE_Nd);
-                       free(title);
-               }
+               memcpy(&buf[pos], cpp, sz);
+               pos += sz;
        }
 
-       for (n = n->child; n; n = n->next)
-               if (pman_node(hash, buf, dbuf, n))
-                       return(1);
-
-       return(0);
+       buf[pos] = '\0';
+       key->utf8 = buf;
 }
 
 /*
- * Parse a formatted manual page.
- * By necessity, this involves rather crude guesswork.
+ * Flush the current page's terms (and their bits) into the database.
+ * Wrap the entire set of additions in a transaction to make sqlite be a
+ * little faster.
+ * Also, UTF-8-encode the description at the last possible moment.
  */
 static void
-pformatted(DB *hash, struct buf *buf, 
-               struct buf *dbuf, const struct of *of)
+dbindex(const struct mpage *mpage, struct mchars *mc)
 {
-       FILE            *stream;
-       char            *line, *p, *title;
-       size_t           len, plen, titlesz;
-
-       if (NULL == (stream = fopen(of->fname, "r"))) {
-               if (warnings)
-                       perror(of->fname);
-               return;
-       }
-
-       /*
-        * Always use the title derived from the filename up front,
-        * do not even try to find it in the file.  This also makes
-        * sure we don't end up with an orphan index record, even if
-        * the file content turns out to be completely unintelligible.
-        */
+       struct mlink    *mlink;
+       struct str      *key;
+       const char      *desc;
+       int64_t          recno;
+       size_t           i;
+       unsigned int     slot;
 
-       buf->len = 0;
-       buf_append(buf, of->title);
-       hash_put(hash, buf, TYPE_Nm);
-
-       /* Skip to first blank line. */
-
-       while (NULL != (line = fgetln(stream, &len)))
-               if ('\n' == *line)
-                       break;
+       if (verb)
+               say(mpage->mlinks->file, "Adding to index");
 
-       /*
-        * Assume the first line that is not indented
-        * is the first section header.  Skip to it.
-        */
-
-       while (NULL != (line = fgetln(stream, &len)))
-               if ('\n' != *line && ' ' != *line)
-                       break;
-       
-       /*
-        * Read up until the next section into a buffer.
-        * Strip the leading and trailing newline from each read line,
-        * appending a trailing space.
-        * Ignore empty (whitespace-only) lines.
-        */
-
-       titlesz = 0;
-       title = NULL;
-
-       while (NULL != (line = fgetln(stream, &len))) {
-               if (' ' != *line || '\n' != line[(int)len - 1])
-                       break;
-               while (len > 0 && isspace((unsigned char)*line)) {
-                       line++;
-                       len--;
-               }
-               if (1 == len)
-                       continue;
-               title = mandoc_realloc(title, titlesz + len);
-               memcpy(title + titlesz, line, len);
-               titlesz += len;
-               title[(int)titlesz - 1] = ' ';
-       }
-
-
-       /*
-        * If no page content can be found, or the input line
-        * is already the next section header, or there is no
-        * trailing newline, reuse the page title as the page
-        * description.
-        */
-
-       if (NULL == title || '\0' == *title) {
-               if (warnings)
-                       fprintf(stderr, "%s: cannot find NAME section\n",
-                                       of->fname);
-               buf_appendb(dbuf, buf->cp, buf->size);
-               hash_put(hash, buf, TYPE_Nd);
-               fclose(stream);
-               free(title);
+       if (nodb)
                return;
+
+       desc = "";
+       if (NULL != mpage->desc && '\0' != *mpage->desc) {
+               key = ohash_find(&strings,
+                       ohash_qlookup(&strings, mpage->desc));
+               assert(NULL != key);
+               if (NULL == key->utf8)
+                       utf8key(mc, key);
+               desc = key->utf8;
        }
 
-       title = mandoc_realloc(title, titlesz + 1);
-       title[(int)titlesz] = '\0';
+       SQL_EXEC("BEGIN TRANSACTION");
 
+       i = 1;
        /*
-        * Skip to the first dash.
-        * Use the remaining line as the description (no more than 70
-        * bytes).
+        * XXX The following three lines are obsolete
+        * and only kept for backward compatibility
+        * until apropos(1) and friends have caught up.
         */
-
-       if (NULL != (p = strstr(title, "- "))) {
-               for (p += 2; ' ' == *p || '\b' == *p; p++)
-                       /* Skip to next word. */ ;
-       } else {
-               if (warnings)
-                       fprintf(stderr, "%s: no dash in title line\n",
-                                       of->fname);
-               p = title;
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->file);
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->dsec);
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->arch);
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, desc);
+       SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
+       SQL_STEP(stmts[STMT_INSERT_PAGE]);
+       recno = sqlite3_last_insert_rowid(db);
+       sqlite3_reset(stmts[STMT_INSERT_PAGE]);
+
+       for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
+               i = 1;
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->file);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
+               SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, recno);
+               SQL_STEP(stmts[STMT_INSERT_LINK]);
+               sqlite3_reset(stmts[STMT_INSERT_LINK]);
        }
 
-       plen = strlen(p);
-
-       /* Strip backspace-encoding from line. */
-
-       while (NULL != (line = memchr(p, '\b', plen))) {
-               len = line - p;
-               if (0 == len) {
-                       memmove(line, line + 1, plen--);
-                       continue;
-               } 
-               memmove(line - 1, line + 1, plen - len);
-               plen -= 2;
+       for (key = ohash_first(&strings, &slot); NULL != key;
+            key = ohash_next(&strings, &slot)) {
+               assert(key->mpage == mpage);
+               if (NULL == key->utf8)
+                       utf8key(mc, key);
+               i = 1;
+               SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8);
+               SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno);
+               SQL_STEP(stmts[STMT_INSERT_KEY]);
+               sqlite3_reset(stmts[STMT_INSERT_KEY]);
+               if (key->utf8 != key->key)
+                       free(key->utf8);
+               free(key);
        }
 
-       buf_appendb(dbuf, p, plen + 1);
-       buf->len = 0;
-       buf_appendb(buf, p, plen + 1);
-       hash_put(hash, buf, TYPE_Nd);
-       fclose(stream);
-       free(title);
+       SQL_EXEC("END TRANSACTION");
 }
 
 static void
-ofile_argbuild(int argc, char *argv[], struct of **of,
-               const char *basedir)
+dbprune(void)
 {
-       char             buf[PATH_MAX];
-       char             pbuf[PATH_MAX];
-       const char      *sec, *arch, *title;
-       char            *relpath, *p;
-       int              i, src_form;
-       struct of       *nof;
-
-       for (i = 0; i < argc; i++) {
-               if (NULL == (relpath = realpath(argv[i], pbuf))) {
-                       perror(argv[i]);
-                       continue;
-               }
-               if (NULL != basedir) {
-                       if (strstr(pbuf, basedir) != pbuf) {
-                               fprintf(stderr, "%s: file outside "
-                                   "base directory %s\n",
-                                   pbuf, basedir);
-                               continue;
-                       }
-                       relpath = pbuf + strlen(basedir);
-               }
-
-               /*
-                * Try to infer the manual section, architecture and
-                * page title from the path, assuming it looks like
-                *   man*[/<arch>]/<title>.<section>   or
-                *   cat<section>[/<arch>]/<title>.0
-                */
-
-               if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) {
-                       fprintf(stderr, "%s: path too long\n", relpath);
-                       continue;
-               }
-               sec = arch = title = "";
-               src_form = 0;
-               p = strrchr(buf, '\0');
-               while (p-- > buf) {
-                       if ('\0' == *sec && '.' == *p) {
-                               sec = p + 1;
-                               *p = '\0';
-                               if ('0' == *sec)
-                                       src_form |= MANDOC_FORM;
-                               else if ('1' <= *sec && '9' >= *sec)
-                                       src_form |= MANDOC_SRC;
-                               continue;
-                       }
-                       if ('/' != *p)
-                               continue;
-                       if ('\0' == *title) {
-                               title = p + 1;
-                               *p = '\0';
-                               continue;
-                       }
-                       if (0 == strncmp("man", p + 1, 3))
-                               src_form |= MANDOC_SRC;
-                       else if (0 == strncmp("cat", p + 1, 3))
-                               src_form |= MANDOC_FORM;
-                       else
-                               arch = p + 1;
-                       break;
-               }
-               if ('\0' == *title) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s: cannot deduce title "
-                                   "from filename\n",
-                                   relpath);
-                       title = buf;
-               }
+       struct mpage    *mpage;
+       struct mlink    *mlink;
+       size_t           i;
+       unsigned int     slot;
 
-               /*
-                * Build the file structure.
-                */
-
-               nof = mandoc_calloc(1, sizeof(struct of));
-               nof->fname = mandoc_strdup(relpath);
-               nof->sec = mandoc_strdup(sec);
-               nof->arch = mandoc_strdup(arch);
-               nof->title = mandoc_strdup(title);
-               nof->src_form = src_form;
-
-               /*
-                * Add the structure to the list.
-                */
+       if (nodb)
+               return;
 
-               if (verb > 1)
-                       printf("%s: scheduling\n", relpath);
-               if (NULL == *of) {
-                       *of = nof;
-                       (*of)->first = nof;
-               } else {
-                       nof->first = (*of)->first;
-                       (*of)->next = nof;
-                       *of = nof;
-               }
+       mpage = ohash_first(&mpages, &slot);
+       while (NULL != mpage) {
+               mlink = mpage->mlinks;
+               i = 1;
+               SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], i, mlink->file);
+               SQL_STEP(stmts[STMT_DELETE_PAGE]);
+               sqlite3_reset(stmts[STMT_DELETE_PAGE]);
+               if (verb)
+                       say(mlink->file, "Deleted from index");
+               mpage = ohash_next(&mpages, &slot);
        }
 }
 
 /*
- * Recursively build up a list of files to parse.
- * We use this instead of ftw() and so on because I don't want global
- * variables hanging around.
- * This ignores the mandoc.db and mandoc.index files, but assumes that
- * everything else is a manual.
- * Pass in a pointer to a NULL structure for the first invocation.
+ * Close an existing database and its prepared statements.
+ * If "real" is not set, rename the temporary file into the real one.
  */
 static void
-ofile_dirbuild(const char *dir, const char* psec, const char *parch,
-               int p_src_form, struct of **of)
+dbclose(int real)
 {
-       char             buf[PATH_MAX];
-       size_t           sz;
-       DIR             *d;
-       const char      *fn, *sec, *arch;
-       char            *p, *q, *suffix;
-       struct of       *nof;
-       struct dirent   *dp;
-       int              src_form;
-
-       if (NULL == (d = opendir(dir))) {
-               if (warnings)
-                       perror(dir);
+       size_t           i;
+
+       if (nodb)
                return;
+
+       for (i = 0; i < STMT__MAX; i++) {
+               sqlite3_finalize(stmts[i]);
+               stmts[i] = NULL;
        }
 
-       while (NULL != (dp = readdir(d))) {
-               fn = dp->d_name;
+       sqlite3_close(db);
+       db = NULL;
 
-               if ('.' == *fn)
-                       continue;
+       if (real)
+               return;
 
-               src_form = p_src_form;
+       if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(MANDOC_DB, NULL);
+       }
+}
 
-               if (DT_DIR == dp->d_type) {
-                       sec = psec;
-                       arch = parch;
+/*
+ * This is straightforward stuff.
+ * Open a database connection to a "temporary" database, then open a set
+ * of prepared statements we'll use over and over again.
+ * If "real" is set, we use the existing database; if not, we truncate a
+ * temporary one.
+ * Must be matched by dbclose().
+ */
+static int
+dbopen(int real)
+{
+       const char      *file, *sql;
+       int              rc, ofl;
 
-                       /*
-                        * By default, only use directories called:
-                        *   man<section>/[<arch>/]   or
-                        *   cat<section>/[<arch>/]
-                        */
+       if (nodb) 
+               return(1);
 
-                       if ('\0' == *sec) {
-                               if(0 == strncmp("man", fn, 3)) {
-                                       src_form |= MANDOC_SRC;
-                                       sec = fn + 3;
-                               } else if (0 == strncmp("cat", fn, 3)) {
-                                       src_form |= MANDOC_FORM;
-                                       sec = fn + 3;
-                               } else {
-                                       if (warnings) fprintf(stderr,
-                                           "%s/%s: bad section\n",
-                                           dir, fn);
-                                       if (use_all)
-                                               sec = fn;
-                                       else
-                                               continue;
-                               }
-                       } else if ('\0' == *arch) {
-                               if (NULL != strchr(fn, '.')) {
-                                       if (warnings) fprintf(stderr,
-                                           "%s/%s: bad architecture\n",
-                                           dir, fn);
-                                       if (0 == use_all)
-                                               continue;
-                               }
-                               arch = fn;
-                       } else {
-                               if (warnings) fprintf(stderr, "%s/%s: "
-                                   "excessive subdirectory\n", dir, fn);
-                               if (0 == use_all)
-                                       continue;
-                       }
+       ofl = SQLITE_OPEN_READWRITE;
+       if (0 == real) {
+               file = MANDOC_DB "~";
+               if (-1 == remove(file) && ENOENT != errno) {
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       say(file, NULL);
+                       return(0);
+               }
+               ofl |= SQLITE_OPEN_EXCLUSIVE;
+       } else
+               file = MANDOC_DB;
+
+       rc = sqlite3_open_v2(file, &db, ofl, NULL);
+       if (SQLITE_OK == rc) 
+               goto prepare_statements;
+       if (SQLITE_CANTOPEN != rc) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(file, NULL);
+               return(0);
+       }
 
-                       buf[0] = '\0';
-                       strlcat(buf, dir, PATH_MAX);
-                       strlcat(buf, "/", PATH_MAX);
-                       sz = strlcat(buf, fn, PATH_MAX);
+       sqlite3_close(db);
+       db = NULL;
 
-                       if (PATH_MAX <= sz) {
-                               if (warnings) fprintf(stderr, "%s/%s: "
-                                   "path too long\n", dir, fn);
-                               continue;
-                       }
+       if (SQLITE_OK != (rc = sqlite3_open(file, &db))) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(file, NULL);
+               return(0);
+       }
 
-                       if (verb > 1)
-                               printf("%s: scanning\n", buf);
+       /*
+        * XXX The first three columns in table mpages are obsolete
+        * and only kept for backward compatibility
+        * until apropos(1) and friends have caught up.
+        */
+       sql = "CREATE TABLE \"mpages\" (\n"
+             " \"file\" TEXT NOT NULL,\n"
+             " \"sec\" TEXT NOT NULL,\n"
+             " \"arch\" TEXT NOT NULL,\n"
+             " \"desc\" TEXT NOT NULL,\n"
+             " \"form\" INTEGER NOT NULL,\n"
+             " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+             ");\n"
+             "\n"
+             "CREATE TABLE \"mlinks\" (\n"
+             " \"file\" TEXT NOT NULL,\n"
+             " \"sec\" TEXT NOT NULL,\n"
+             " \"arch\" TEXT NOT NULL,\n"
+             " \"name\" TEXT NOT NULL,\n"
+             " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
+               "ON DELETE CASCADE,\n"
+             " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+             ");\n"
+             "\n"
+             "CREATE TABLE \"keys\" (\n"
+             " \"bits\" INTEGER NOT NULL,\n"
+             " \"key\" TEXT NOT NULL,\n"
+             " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
+               "ON DELETE CASCADE,\n"
+             " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+             ");\n"
+             "\n"
+             "CREATE INDEX \"key_index\" ON keys (key);\n";
+
+       if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(file, "%s", sqlite3_errmsg(db));
+               return(0);
+       }
 
-                       ofile_dirbuild(buf, sec, arch, src_form, of);
-                       continue;
-               }
+prepare_statements:
+       SQL_EXEC("PRAGMA foreign_keys = ON");
+       sql = "DELETE FROM mpages where file=?";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
+       sql = "INSERT INTO mpages "
+               "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
+       sql = "INSERT INTO mlinks "
+               "(file,sec,arch,name,pageid) VALUES (?,?,?,?,?)";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
+       sql = "INSERT INTO keys "
+               "(bits,key,pageid) VALUES (?,?,?)";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
 
-               if (DT_REG != dp->d_type) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: not a regular file\n",
-                                   dir, fn);
-                       continue;
-               }
-               if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
-                       continue;
-               if ('\0' == *psec) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: file outside section\n",
-                                   dir, fn);
-                       if (0 == use_all)
-                               continue;
-               }
+       /*
+        * When opening a new database, we can turn off
+        * synchronous mode for much better performance.
+        */
 
-               /*
-                * By default, skip files where the file name suffix
-                * does not agree with the section directory
-                * they are located in.
-                */
+       if (real)
+               SQL_EXEC("PRAGMA synchronous = OFF");
 
-               suffix = strrchr(fn, '.');
-               if (NULL == suffix) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: no filename suffix\n",
-                                   dir, fn);
-                       if (0 == use_all)
-                               continue;
-               } else if ((MANDOC_SRC & src_form &&
-                               strcmp(suffix + 1, psec)) ||
-                           (MANDOC_FORM & src_form &&
-                               strcmp(suffix + 1, "0"))) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: wrong filename suffix\n",
-                                   dir, fn);
-                       if (0 == use_all)
-                               continue;
-                       if ('0' == suffix[1])
-                               src_form |= MANDOC_FORM;
-                       else if ('1' <= suffix[1] && '9' >= suffix[1])
-                               src_form |= MANDOC_SRC;
-               }
+       return(1);
+}
 
-               /*
-                * Skip formatted manuals if a source version is
-                * available.  Ignore the age: it is very unlikely
-                * that people install newer formatted base manuals
-                * when they used to have source manuals before,
-                * and in ports, old manuals get removed on update.
-                */
-               if (0 == use_all && MANDOC_FORM & src_form &&
-                               '\0' != *psec) {
-                       buf[0] = '\0';
-                       strlcat(buf, dir, PATH_MAX);
-                       p = strrchr(buf, '/');
-                       if ('\0' != *parch && NULL != p)
-                               for (p--; p > buf; p--)
-                                       if ('/' == *p)
-                                               break;
-                       if (NULL == p)
-                               p = buf;
-                       else
-                               p++;
-                       if (0 == strncmp("cat", p, 3))
-                               memcpy(p, "man", 3);
-                       strlcat(buf, "/", PATH_MAX);
-                       sz = strlcat(buf, fn, PATH_MAX);
-                       if (sz >= PATH_MAX) {
-                               if (warnings) fprintf(stderr,
-                                   "%s/%s: path too long\n",
-                                   dir, fn);
-                               continue;
-                       }
-                       q = strrchr(buf, '.');
-                       if (NULL != q && p < q++) {
-                               *q = '\0';
-                               sz = strlcat(buf, psec, PATH_MAX);
-                               if (sz >= PATH_MAX) {
-                                       if (warnings) fprintf(stderr,
-                                           "%s/%s: path too long\n",
-                                           dir, fn);
-                                       continue;
-                               }
-                               if (0 == access(buf, R_OK))
-                                       continue;
-                       }
-               }
+static void *
+hash_halloc(size_t sz, void *arg)
+{
 
-               buf[0] = '\0';
-               assert('.' == dir[0]);
-               if ('/' == dir[1]) {
-                       strlcat(buf, dir + 2, PATH_MAX);
-                       strlcat(buf, "/", PATH_MAX);
-               }
-               sz = strlcat(buf, fn, PATH_MAX);
-               if (sz >= PATH_MAX) {
-                       if (warnings) fprintf(stderr,
-                           "%s/%s: path too long\n", dir, fn);
-                       continue;
-               }
+       return(mandoc_calloc(sz, 1));
+}
 
-               nof = mandoc_calloc(1, sizeof(struct of));
-               nof->fname = mandoc_strdup(buf);
-               nof->sec = mandoc_strdup(psec);
-               nof->arch = mandoc_strdup(parch);
-               nof->src_form = src_form;
+static void *
+hash_alloc(size_t sz, void *arg)
+{
 
-               /*
-                * Remember the file name without the extension,
-                * to be used as the page title in the database.
-                */
+       return(mandoc_malloc(sz));
+}
 
-               if (NULL != suffix)
-                       *suffix = '\0';
-               nof->title = mandoc_strdup(fn);
+static void
+hash_free(void *p, size_t sz, void *arg)
+{
 
-               /*
-                * Add the structure to the list.
-                */
+       free(p);
+}
 
-               if (verb > 1)
-                       printf("%s: scheduling\n", buf);
+static int
+set_basedir(const char *targetdir)
+{
+       static char      startdir[PATH_MAX];
+       static int       fd;
 
-               if (NULL == *of) {
-                       *of = nof;
-                       (*of)->first = nof;
-               } else {
-                       nof->first = (*of)->first;
-                       (*of)->next = nof;
-                       *of = nof;
+       /*
+        * Remember where we started by keeping a fd open to the origin
+        * path component: throughout this utility, we chdir() a lot to
+        * handle relative paths, and by doing this, we can return to
+        * the starting point.
+        */
+       if ('\0' == *startdir) {
+               if (NULL == getcwd(startdir, PATH_MAX)) {
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       if (NULL != targetdir)
+                               say(".", NULL);
+                       return(0);
+               }
+               if (-1 == (fd = open(startdir, O_RDONLY, 0))) {
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       say(startdir, NULL);
+                       return(0);
+               }
+               if (NULL == targetdir)
+                       targetdir = startdir;
+       } else {
+               if (-1 == fd)
+                       return(0);
+               if (-1 == fchdir(fd)) {
+                       close(fd);
+                       basedir[0] = '\0';
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       say(startdir, NULL);
+                       return(0);
+               }
+               if (NULL == targetdir) {
+                       close(fd);
+                       return(1);
                }
        }
-
-       closedir(d);
+       if (NULL == realpath(targetdir, basedir)) {
+               basedir[0] = '\0';
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(targetdir, NULL);
+               return(0);
+       } else if (-1 == chdir(basedir)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say("", NULL);
+               return(0);
+       }
+       return(1);
 }
 
 static void
-ofile_free(struct of *of)
+say(const char *file, const char *format, ...)
 {
-       struct of       *nof;
-
-       if (NULL != of)
-               of = of->first;
-
-       while (NULL != of) {
-               nof = of->next;
-               free(of->fname);
-               free(of->sec);
-               free(of->arch);
-               free(of->title);
-               free(of);
-               of = nof;
+       va_list          ap;
+
+       if ('\0' != *basedir)
+               fprintf(stderr, "%s", basedir);
+       if ('\0' != *basedir && '\0' != *file)
+               fputs("//", stderr);
+       if ('\0' != *file)
+               fprintf(stderr, "%s", file);
+       fputs(": ", stderr);
+
+       if (NULL == format) {
+               perror(NULL);
+               return;
        }
+
+       va_start(ap, format);
+       vfprintf(stderr, format, ap);
+       va_end(ap);
+
+       fputc('\n', stderr);
 }
diff --git a/usr.bin/mandoc/mandocdb.h b/usr.bin/mandoc/mandocdb.h
deleted file mode 100644 (file)
index af61f3f..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*      $Id: mandocdb.h,v 1.6 2012/01/09 01:59:08 schwarze Exp $ */
-/*
- * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifndef MANDOCDB_H
-#define MANDOCDB_H
-
-#define        MANDOC_DB       "mandoc.db"
-#define        MANDOC_IDX      "mandoc.index"
-
-#define        TYPE_An         0x0000000000000001ULL
-#define        TYPE_Ar         0x0000000000000002ULL
-#define        TYPE_At         0x0000000000000004ULL
-#define        TYPE_Bsx        0x0000000000000008ULL
-#define        TYPE_Bx         0x0000000000000010ULL
-#define        TYPE_Cd         0x0000000000000020ULL
-#define        TYPE_Cm         0x0000000000000040ULL
-#define        TYPE_Dv         0x0000000000000080ULL
-#define        TYPE_Dx         0x0000000000000100ULL
-#define        TYPE_Em         0x0000000000000200ULL
-#define        TYPE_Er         0x0000000000000400ULL
-#define        TYPE_Ev         0x0000000000000800ULL
-#define        TYPE_Fa         0x0000000000001000ULL
-#define        TYPE_Fl         0x0000000000002000ULL
-#define        TYPE_Fn         0x0000000000004000ULL
-#define        TYPE_Ft         0x0000000000008000ULL
-#define        TYPE_Fx         0x0000000000010000ULL
-#define        TYPE_Ic         0x0000000000020000ULL
-#define        TYPE_In         0x0000000000040000ULL
-#define        TYPE_Lb         0x0000000000080000ULL
-#define        TYPE_Li         0x0000000000100000ULL
-#define        TYPE_Lk         0x0000000000200000ULL
-#define        TYPE_Ms         0x0000000000400000ULL
-#define        TYPE_Mt         0x0000000000800000ULL
-#define        TYPE_Nd         0x0000000001000000ULL
-#define        TYPE_Nm         0x0000000002000000ULL
-#define        TYPE_Nx         0x0000000004000000ULL
-#define        TYPE_Ox         0x0000000008000000ULL
-#define        TYPE_Pa         0x0000000010000000ULL
-#define        TYPE_Rs         0x0000000020000000ULL
-#define        TYPE_Sh         0x0000000040000000ULL
-#define        TYPE_Ss         0x0000000080000000ULL
-#define        TYPE_St         0x0000000100000000ULL
-#define        TYPE_Sy         0x0000000200000000ULL
-#define        TYPE_Tn         0x0000000400000000ULL
-#define        TYPE_Va         0x0000000800000000ULL
-#define        TYPE_Vt         0x0000001000000000ULL
-#define        TYPE_Xr         0x0000002000000000ULL
-
-#endif /*!MANDOCDB_H */
diff --git a/usr.bin/mandoc/mansearch.c b/usr.bin/mandoc/mansearch.c
new file mode 100644 (file)
index 0000000..222fd58
--- /dev/null
@@ -0,0 +1,565 @@
+/*     $Id: mansearch.c,v 1.1 2013/12/31 00:40:19 schwarze Exp $ */
+/*
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <ohash.h>
+#include <sqlite3.h>
+
+#include "mandoc.h"
+#include "manpath.h"
+#include "mansearch.h"
+
+#define        SQL_BIND_TEXT(_db, _s, _i, _v) \
+       do { if (SQLITE_OK != sqlite3_bind_text \
+               ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
+       } while (0)
+#define        SQL_BIND_INT64(_db, _s, _i, _v) \
+       do { if (SQLITE_OK != sqlite3_bind_int64 \
+               ((_s), (_i)++, (_v))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
+       } while (0)
+#define        SQL_BIND_BLOB(_db, _s, _i, _v) \
+       do { if (SQLITE_OK != sqlite3_bind_blob \
+               ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
+       } while (0)
+
+struct expr {
+       uint64_t         bits;    /* type-mask */
+       const char      *substr;  /* to search for, if applicable */
+       regex_t          regexp;  /* compiled regexp, if applicable */
+       struct expr     *next;    /* next in sequence */
+};
+
+struct match {
+       uint64_t         id; /* identifier in database */
+       char            *file; /* relative filepath of manpage */
+       char            *desc; /* description of manpage */
+       int              form; /* 0 == catpage */
+};
+
+struct type {
+       uint64_t         bits;
+       const char      *name;
+};
+
+static const struct type types[] = {
+       { TYPE_An,  "An" },
+       { TYPE_Ar,  "Ar" },
+       { TYPE_At,  "At" },
+       { TYPE_Bsx, "Bsx" },
+       { TYPE_Bx,  "Bx" },
+       { TYPE_Cd,  "Cd" },
+       { TYPE_Cm,  "Cm" },
+       { TYPE_Dv,  "Dv" },
+       { TYPE_Dx,  "Dx" },
+       { TYPE_Em,  "Em" },
+       { TYPE_Er,  "Er" },
+       { TYPE_Ev,  "Ev" },
+       { TYPE_Fa,  "Fa" },
+       { TYPE_Fl,  "Fl" },
+       { TYPE_Fn,  "Fn" },
+       { TYPE_Fn,  "Fo" },
+       { TYPE_Ft,  "Ft" },
+       { TYPE_Fx,  "Fx" },
+       { TYPE_Ic,  "Ic" },
+       { TYPE_In,  "In" },
+       { TYPE_Lb,  "Lb" },
+       { TYPE_Li,  "Li" },
+       { TYPE_Lk,  "Lk" },
+       { TYPE_Ms,  "Ms" },
+       { TYPE_Mt,  "Mt" },
+       { TYPE_Nd,  "Nd" },
+       { TYPE_Nm,  "Nm" },
+       { TYPE_Nx,  "Nx" },
+       { TYPE_Ox,  "Ox" },
+       { TYPE_Pa,  "Pa" },
+       { TYPE_Rs,  "Rs" },
+       { TYPE_Sh,  "Sh" },
+       { TYPE_Ss,  "Ss" },
+       { TYPE_St,  "St" },
+       { TYPE_Sy,  "Sy" },
+       { TYPE_Tn,  "Tn" },
+       { TYPE_Va,  "Va" },
+       { TYPE_Va,  "Vt" },
+       { TYPE_Xr,  "Xr" },
+       { ~0ULL,    "any" },
+       { 0ULL, NULL }
+};
+
+static void            *hash_alloc(size_t, void *);
+static void             hash_free(void *, size_t, void *);
+static void            *hash_halloc(size_t, void *);
+static struct expr     *exprcomp(const struct mansearch *, 
+                               int, char *[]);
+static void             exprfree(struct expr *);
+static struct expr     *exprterm(const struct mansearch *, char *, int);
+static void             sql_match(sqlite3_context *context,
+                               int argc, sqlite3_value **argv);
+static void             sql_regexp(sqlite3_context *context,
+                               int argc, sqlite3_value **argv);
+static char            *sql_statement(const struct expr *,
+                               const char *, const char *);
+
+int
+mansearch(const struct mansearch *search,
+               const struct manpaths *paths, 
+               int argc, char *argv[], 
+               struct manpage **res, size_t *sz)
+{
+       int              fd, rc, c;
+       int64_t          id;
+       char             buf[PATH_MAX];
+       char            *sql, *newnames;
+       const char      *oldnames, *sep1, *name, *sec, *sep2, *arch;
+       struct manpage  *mpage;
+       struct expr     *e, *ep;
+       sqlite3         *db;
+       sqlite3_stmt    *s;
+       struct match    *mp;
+       struct ohash_info info;
+       struct ohash     htab;
+       unsigned int     idx;
+       size_t           i, j, cur, maxres;
+
+       memset(&info, 0, sizeof(struct ohash_info));
+
+       info.halloc = hash_halloc;
+       info.alloc = hash_alloc;
+       info.hfree = hash_free;
+       info.key_offset = offsetof(struct match, id);
+
+       *sz = cur = maxres = 0;
+       sql = NULL;
+       *res = NULL;
+       fd = -1;
+       e = NULL;
+       rc = 0;
+
+       if (0 == argc)
+               goto out;
+       if (NULL == (e = exprcomp(search, argc, argv)))
+               goto out;
+
+       /*
+        * Save a descriptor to the current working directory.
+        * Since pathnames in the "paths" variable might be relative,
+        * and we'll be chdir()ing into them, we need to keep a handle
+        * on our current directory from which to start the chdir().
+        */
+
+       if (NULL == getcwd(buf, PATH_MAX)) {
+               perror(NULL);
+               goto out;
+       } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
+               perror(buf);
+               goto out;
+       }
+
+       sql = sql_statement(e, search->arch, search->sec);
+
+       /*
+        * Loop over the directories (containing databases) for us to
+        * search.
+        * Don't let missing/bad databases/directories phase us.
+        * In each, try to open the resident database and, if it opens,
+        * scan it for our match expression.
+        */
+
+       for (i = 0; i < paths->sz; i++) {
+               if (-1 == fchdir(fd)) {
+                       perror(buf);
+                       free(*res);
+                       break;
+               } else if (-1 == chdir(paths->paths[i])) {
+                       perror(paths->paths[i]);
+                       continue;
+               } 
+
+               c =  sqlite3_open_v2
+                       (MANDOC_DB, &db, 
+                        SQLITE_OPEN_READONLY, NULL);
+
+               if (SQLITE_OK != c) {
+                       perror(MANDOC_DB);
+                       sqlite3_close(db);
+                       continue;
+               }
+
+               /*
+                * Define the SQL functions for substring
+                * and regular expression matching.
+                */
+
+               c = sqlite3_create_function(db, "match", 2,
+                   SQLITE_ANY, NULL, sql_match, NULL, NULL);
+               assert(SQLITE_OK == c);
+               c = sqlite3_create_function(db, "regexp", 2,
+                   SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
+               assert(SQLITE_OK == c);
+
+               j = 1;
+               c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
+               if (SQLITE_OK != c)
+                       fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+
+               if (NULL != search->arch)
+                       SQL_BIND_TEXT(db, s, j, search->arch);
+               if (NULL != search->sec)
+                       SQL_BIND_TEXT(db, s, j, search->sec);
+
+               for (ep = e; NULL != ep; ep = ep->next) {
+                       if (NULL == ep->substr) {
+                               SQL_BIND_BLOB(db, s, j, ep->regexp);
+                       } else
+                               SQL_BIND_TEXT(db, s, j, ep->substr);
+                       SQL_BIND_INT64(db, s, j, ep->bits);
+               }
+
+               memset(&htab, 0, sizeof(struct ohash));
+               ohash_init(&htab, 4, &info);
+
+               /*
+                * Hash each entry on its [unique] document identifier.
+                * This is a uint64_t.
+                * Instead of using a hash function, simply convert the
+                * uint64_t to a uint32_t, the hash value's type.
+                * This gives good performance and preserves the
+                * distribution of buckets in the table.
+                */
+               while (SQLITE_ROW == (c = sqlite3_step(s))) {
+                       id = sqlite3_column_int64(s, 0);
+                       idx = ohash_lookup_memory
+                               (&htab, (char *)&id, 
+                                sizeof(uint64_t), (uint32_t)id);
+
+                       if (NULL != ohash_find(&htab, idx))
+                               continue;
+
+                       mp = mandoc_calloc(1, sizeof(struct match));
+                       mp->id = id;
+                       mp->file = mandoc_strdup
+                               ((char *)sqlite3_column_text(s, 3));
+                       mp->desc = mandoc_strdup
+                               ((char *)sqlite3_column_text(s, 4));
+                       mp->form = sqlite3_column_int(s, 5);
+                       ohash_insert(&htab, idx, mp);
+               }
+
+               if (SQLITE_DONE != c)
+                       fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+
+               sqlite3_finalize(s);
+
+               c = sqlite3_prepare_v2(db, 
+                   "SELECT * FROM mlinks WHERE pageid=?",
+                   -1, &s, NULL);
+               if (SQLITE_OK != c)
+                       fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+
+               for (mp = ohash_first(&htab, &idx);
+                               NULL != mp;
+                               mp = ohash_next(&htab, &idx)) {
+                       if (cur + 1 > maxres) {
+                               maxres += 1024;
+                               *res = mandoc_realloc
+                                       (*res, maxres * sizeof(struct manpage));
+                       }
+                       mpage = *res + cur;
+                       if (-1 == asprintf(&mpage->file, "%s/%s",
+                           paths->paths[i], mp->file)) {
+                               perror(0);
+                               exit((int)MANDOCLEVEL_SYSERR);
+                       }
+                       mpage->names = NULL;
+                       mpage->desc = mp->desc;
+                       mpage->form = mp->form;
+
+                       j = 1;
+                       SQL_BIND_INT64(db, s, j, mp->id);
+                       while (SQLITE_ROW == (c = sqlite3_step(s))) {
+                               if (NULL == mpage->names) {
+                                       oldnames = "";
+                                       sep1 = "";
+                               } else {
+                                       oldnames = mpage->names;
+                                       sep1 = ", ";
+                               }
+                               sec = sqlite3_column_text(s, 1);
+                               arch = sqlite3_column_text(s, 2);
+                               name = sqlite3_column_text(s, 3);
+                               sep2 = '\0' == *arch ? "" : "/";
+                               if (-1 == asprintf(&newnames,
+                                   "%s%s%s(%s%s%s)", oldnames, sep1,
+                                   name, sec, sep2, arch)) {
+                                       perror(0);
+                                       exit((int)MANDOCLEVEL_SYSERR);
+                               }
+                               free(mpage->names);
+                               mpage->names = newnames;
+                       }
+                       if (SQLITE_DONE != c)
+                               fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+                       sqlite3_reset(s);
+
+                       free(mp->file);
+                       free(mp);
+                       cur++;
+               }
+
+               sqlite3_finalize(s);
+               sqlite3_close(db);
+               ohash_delete(&htab);
+       }
+       rc = 1;
+out:
+       exprfree(e);
+       if (-1 != fd)
+               close(fd);
+       free(sql);
+       *sz = cur;
+       return(rc);
+}
+
+/*
+ * Implement substring match as an application-defined SQL function.
+ * Using the SQL LIKE or GLOB operators instead would be a bad idea
+ * because that would require escaping metacharacters in the string
+ * being searched for.
+ */
+static void
+sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+
+       assert(2 == argc);
+       sqlite3_result_int(context, NULL != strcasestr(
+           (const char *)sqlite3_value_text(argv[1]),
+           (const char *)sqlite3_value_text(argv[0])));
+}
+
+/*
+ * Implement regular expression match
+ * as an application-defined SQL function.
+ */
+static void
+sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+
+       assert(2 == argc);
+       sqlite3_result_int(context, !regexec(
+           (regex_t *)sqlite3_value_blob(argv[0]),
+           (const char *)sqlite3_value_text(argv[1]),
+           0, NULL, 0));
+}
+
+/*
+ * Prepare the search SQL statement.
+ * We search for any of the words specified in our match expression.
+ * We filter the per-doc AND expressions when collecting results.
+ */
+static char *
+sql_statement(const struct expr *e, const char *arch, const char *sec)
+{
+       char            *sql;
+       const char      *substr = "(key MATCH ? AND bits & ?)";
+       const char      *regexp = "(key REGEXP ? AND bits & ?)";
+       const char      *andarch = "arch = ? AND ";
+       const char      *andsec = "sec = ? AND ";
+       size_t           substrsz;
+       size_t           regexpsz;
+       size_t           sz;
+
+       sql = mandoc_strdup
+               ("SELECT pageid,bits,key,file,desc,form,sec,arch "
+                "FROM keys "
+                "INNER JOIN mpages ON mpages.id=keys.pageid "
+                "WHERE ");
+       sz = strlen(sql);
+       substrsz = strlen(substr);
+       regexpsz = strlen(regexp);
+
+       if (NULL != arch) {
+               sz += strlen(andarch) + 1;
+               sql = mandoc_realloc(sql, sz);
+               strlcat(sql, andarch, sz);
+       }
+
+       if (NULL != sec) {
+               sz += strlen(andsec) + 1;
+               sql = mandoc_realloc(sql, sz);
+               strlcat(sql, andsec, sz);
+       }
+
+       sz += 2;
+       sql = mandoc_realloc(sql, sz);
+       strlcat(sql, "(", sz);
+
+       for ( ; NULL != e; e = e->next) {
+               sz += (NULL == e->substr ? regexpsz : substrsz) + 
+                       (NULL == e->next ? 3 : 5);
+               sql = mandoc_realloc(sql, sz);
+               strlcat(sql, NULL == e->substr ? regexp : substr, sz);
+               strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
+       }
+
+       return(sql);
+}
+
+/*
+ * Compile a set of string tokens into an expression.
+ * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
+ * "(", "foo=bar", etc.).
+ */
+static struct expr *
+exprcomp(const struct mansearch *search, int argc, char *argv[])
+{
+       int              i, cs;
+       struct expr     *first, *next, *cur;
+
+       first = cur = NULL;
+
+       for (i = 0; i < argc; i++) {
+               if (0 == strcmp("-i", argv[i])) {
+                       if (++i >= argc)
+                               return(NULL);
+                       cs = 0;
+               } else
+                       cs = 1;
+               next = exprterm(search, argv[i], cs);
+               if (NULL == next) {
+                       exprfree(first);
+                       return(NULL);
+               }
+               if (NULL != first) {
+                       cur->next = next;
+                       cur = next;
+               } else
+                       cur = first = next;
+       }
+
+       return(first);
+}
+
+static struct expr *
+exprterm(const struct mansearch *search, char *buf, int cs)
+{
+       struct expr     *e;
+       char            *key, *v;
+       size_t           i;
+
+       if ('\0' == *buf)
+               return(NULL);
+
+       e = mandoc_calloc(1, sizeof(struct expr));
+
+       /*"whatis" mode uses an opaque string and default fields. */
+
+       if (MANSEARCH_WHATIS & search->flags) {
+               e->substr = buf;
+               e->bits = search->deftype;
+               return(e);
+       }
+
+       /*
+        * If no =~ is specified, search with equality over names and
+        * descriptions.
+        * If =~ begins the phrase, use name and description fields.
+        */
+
+       if (NULL == (v = strpbrk(buf, "=~"))) {
+               e->substr = buf;
+               e->bits = search->deftype;
+               return(e);
+       } else if (v == buf)
+               e->bits = search->deftype;
+
+       if ('~' == *v++) {
+               if (regcomp(&e->regexp, v,
+                   REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
+                       free(e);
+                       return(NULL);
+               }
+       } else
+               e->substr = v;
+       v[-1] = '\0';
+
+       /*
+        * Parse out all possible fields.
+        * If the field doesn't resolve, bail.
+        */
+
+       while (NULL != (key = strsep(&buf, ","))) {
+               if ('\0' == *key)
+                       continue;
+               i = 0;
+               while (types[i].bits && 
+                       strcasecmp(types[i].name, key))
+                       i++;
+               if (0 == types[i].bits) {
+                       free(e);
+                       return(NULL);
+               }
+               e->bits |= types[i].bits;
+       }
+
+       return(e);
+}
+
+static void
+exprfree(struct expr *p)
+{
+       struct expr     *pp;
+
+       while (NULL != p) {
+               pp = p->next;
+               free(p);
+               p = pp;
+       }
+}
+
+static void *
+hash_halloc(size_t sz, void *arg)
+{
+
+       return(mandoc_calloc(sz, 1));
+}
+
+static void *
+hash_alloc(size_t sz, void *arg)
+{
+
+       return(mandoc_malloc(sz));
+}
+
+static void
+hash_free(void *p, size_t sz, void *arg)
+{
+
+       free(p);
+}
diff --git a/usr.bin/mandoc/mansearch.h b/usr.bin/mandoc/mansearch.h
new file mode 100644 (file)
index 0000000..bcb0696
--- /dev/null
@@ -0,0 +1,87 @@
+/*     $Id: mansearch.h,v 1.1 2013/12/31 00:40:19 schwarze Exp $ */
+/*
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MANSEARCH_H
+#define MANSEARCH_H
+
+#define        MANDOC_DB        "mandoc.db"
+
+#define        TYPE_An          0x0000000000000001ULL
+#define        TYPE_Ar          0x0000000000000002ULL
+#define        TYPE_At          0x0000000000000004ULL
+#define        TYPE_Bsx         0x0000000000000008ULL
+#define        TYPE_Bx          0x0000000000000010ULL
+#define        TYPE_Cd          0x0000000000000020ULL
+#define        TYPE_Cm          0x0000000000000040ULL
+#define        TYPE_Dv          0x0000000000000080ULL
+#define        TYPE_Dx          0x0000000000000100ULL
+#define        TYPE_Em          0x0000000000000200ULL
+#define        TYPE_Er          0x0000000000000400ULL
+#define        TYPE_Ev          0x0000000000000800ULL
+#define        TYPE_Fa          0x0000000000001000ULL
+#define        TYPE_Fl          0x0000000000002000ULL
+#define        TYPE_Fn          0x0000000000004000ULL
+#define        TYPE_Ft          0x0000000000008000ULL
+#define        TYPE_Fx          0x0000000000010000ULL
+#define        TYPE_Ic          0x0000000000020000ULL
+#define        TYPE_In          0x0000000000040000ULL
+#define        TYPE_Lb          0x0000000000080000ULL
+#define        TYPE_Li          0x0000000000100000ULL
+#define        TYPE_Lk          0x0000000000200000ULL
+#define        TYPE_Ms          0x0000000000400000ULL
+#define        TYPE_Mt          0x0000000000800000ULL
+#define        TYPE_Nd          0x0000000001000000ULL
+#define        TYPE_Nm          0x0000000002000000ULL
+#define        TYPE_Nx          0x0000000004000000ULL
+#define        TYPE_Ox          0x0000000008000000ULL
+#define        TYPE_Pa          0x0000000010000000ULL
+#define        TYPE_Rs          0x0000000020000000ULL
+#define        TYPE_Sh          0x0000000040000000ULL
+#define        TYPE_Ss          0x0000000080000000ULL
+#define        TYPE_St          0x0000000100000000ULL
+#define        TYPE_Sy          0x0000000200000000ULL
+#define        TYPE_Tn          0x0000000400000000ULL
+#define        TYPE_Va          0x0000000800000000ULL
+#define        TYPE_Vt          0x0000001000000000ULL
+#define        TYPE_Xr          0x0000002000000000ULL
+
+__BEGIN_DECLS
+
+struct manpage {
+       char            *file; /* to be prefixed by manpath */
+       char            *names; /* a list of names with sections */
+       char            *desc; /* description of manpage */
+       int              form; /* 0 == catpage */
+};
+
+struct mansearch {
+       const char      *arch; /* architecture/NULL */
+       const char      *sec; /* mansection/NULL */
+       uint64_t         deftype; /* type if no key  */
+       int              flags;
+#define        MANSEARCH_WHATIS 0x01 /* whatis mode: equality, no key */
+};
+
+int    mansearch(const struct mansearch *cfg, /* options */
+               const struct manpaths *paths, /* manpaths */
+               int argc, /* size of argv */
+               char *argv[],  /* search terms */
+               struct manpage **res, /* results */
+               size_t *ressz); /* results returned */
+
+__END_DECLS
+
+#endif /*!MANSEARCH_H*/