Replace the Berkeley-DB based mandocdb(8) by an SQLite3-based version,

author schwarze <schwarze@openbsd.org>

Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)

committer schwarze <schwarze@openbsd.org>

Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
author schwarze <schwarze@openbsd.org>
Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
committer schwarze <schwarze@openbsd.org>
Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile

index 0c0c513..6f54636 100644 (file)
--- a/usr.bin/mandoc/Makefile
+++ b/usr.bin/mandoc/Makefile
@@ -1,9 +1,10 @@
-#      $OpenBSD: Makefile,v 1.71 2013/10/06 23:59:59 schwarze Exp $
+#      $OpenBSD: Makefile,v 1.72 2013/12/31 00:40:19 schwarze Exp $
  
  .include <bsd.own.mk>
  
-CFLAGS+=-DVERSION=\"1.12.2\"
-CFLAGS+=-W -Wall -Wstrict-prototypes -Wno-unused-parameter
+CFLAGS  += -DVERSION=\"1.13.0\"
+CFLAGS  += -W -Wall -Wstrict-prototypes -Wno-unused-parameter
+LDFLAGS += -lsqlite3
  
  SRCS=  roff.c tbl.c tbl_opts.c tbl_layout.c tbl_data.c eqn.c mandoc.c read.c
  SRCS+= mdoc_macro.c mdoc.c mdoc_hash.c \
@@ -14,7 +15,7 @@ SRCS+=        main.c mdoc_term.c chars.c term.c tree.c man_term.c eqn_term.c
  SRCS+= mdoc_man.c
  SRCS+= html.c mdoc_html.c man_html.c out.c eqn_html.c
  SRCS+= term_ps.c term_ascii.c tbl_term.c tbl_html.c
-SRCS+= manpath.c mandocdb.c apropos_db.c apropos.c
+SRCS+= manpath.c mandocdb.c mansearch.c apropos.c
  
  PROG=  mandoc
  
diff --git a/usr.bin/mandoc/apropos.c b/usr.bin/mandoc/apropos.c

index 3b58c06..d9cf3d1 100644 (file)
--- a/usr.bin/mandoc/apropos.c
+++ b/usr.bin/mandoc/apropos.c
@@ -1,7 +1,6 @@
-/*     $Id: apropos.c,v 1.17 2013/07/12 11:01:42 schwarze Exp $ */
+/*     $Id: apropos.c,v 1.18 2013/12/31 00:40:19 schwarze Exp $ */
  /*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
@@ -19,30 +18,26 @@
  
  #include <assert.h>
  #include <getopt.h>
+#include <stdint.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+#include <unistd.h>
  
-#include "apropos_db.h"
-#include "mandoc.h"
  #include "manpath.h"
-
-static int      cmp(const void *, const void *);
-static void     list(struct res *, size_t, void *);
-
-static char    *progname;
+#include "mansearch.h"
  
  int
  apropos(int argc, char *argv[])
  {
-       int              ch, rc, whatis;
-       struct res      *res;
+       int              ch, whatis;
+       struct mansearch search;
+       size_t           i, sz;
+       struct manpage  *res;
         struct manpaths  paths;
-       size_t           terms, ressz;
-       struct opts      opts;
-       struct expr     *e;
         char            *defpaths, *auxpaths;
         char            *conf_file;
+       char            *progname;
         extern char     *optarg;
         extern int       optind;
  
@@ -55,13 +50,10 @@ apropos(int argc, char *argv[])
         whatis = (0 == strncmp(progname, "whatis", 6));
  
         memset(&paths, 0, sizeof(struct manpaths));
-       memset(&opts, 0, sizeof(struct opts));
+       memset(&search, 0, sizeof(struct mansearch));
  
-       ressz = 0;
-       res = NULL;
         auxpaths = defpaths = NULL;
         conf_file = NULL;
-       e = NULL;
  
         while (-1 != (ch = getopt(argc, argv, "C:M:m:S:s:")))
                 switch (ch) {
@@ -75,10 +67,10 @@ apropos(int argc, char *argv[])
                         auxpaths = optarg;
                         break;
                 case ('S'):
-                       opts.arch = optarg;
+                       search.arch = optarg;
                         break;
                 case ('s'):
-                       opts.cat = optarg;
+                       search.sec = optarg;
                         break;
                 default:
                         goto usage;
@@ -90,64 +82,28 @@ apropos(int argc, char *argv[])
         if (0 == argc)
                 goto usage;
  
-       rc = 0;
+       search.deftype = whatis ? TYPE_Nm : TYPE_Nm | TYPE_Nd;
+       search.flags = whatis ? MANSEARCH_WHATIS : 0;
  
         manpath_parse(&paths, conf_file, defpaths, auxpaths);
+       ch = mansearch(&search, &paths, argc, argv, &res, &sz);
+       manpath_free(&paths);
  
-       e = whatis ? termcomp(argc, argv, &terms) :
-                    exprcomp(argc, argv, &terms);
-               
-       if (NULL == e) {
-               fprintf(stderr, "%s: Bad expression\n", progname);
-               goto out;
-       }
-
-       rc = apropos_search
-               (paths.sz, paths.paths, &opts, 
-                e, terms, NULL, &ressz, &res, list);
+       if (0 == ch)
+               goto usage;
  
-       if (0 == rc) {
-               fprintf(stderr, "%s: Bad database\n", progname);
-               goto out;
+       for (i = 0; i < sz; i++) {
+               printf("%s - %s\n", res[i].names, res[i].desc);
+               free(res[i].file);
+               free(res[i].names);
+               free(res[i].desc);
         }
  
-out:
-       manpath_free(&paths);
-       resfree(res, ressz);
-       exprfree(e);
-       return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
-
+       free(res);
+       return(sz ? EXIT_SUCCESS : EXIT_FAILURE);
  usage:
         fprintf(stderr, "usage: %s [-C file] [-M path] [-m path] "
                         "[-S arch] [-s section]%s ...\n", progname,
                         whatis ? " name" : "\n               expression");
         return(EXIT_FAILURE);
  }
-
-/* ARGSUSED */
-static void
-list(struct res *res, size_t sz, void *arg)
-{
-       size_t           i;
-
-       qsort(res, sz, sizeof(struct res), cmp);
-
-       for (i = 0; i < sz; i++) {
-               if ( ! res[i].matched)
-                       continue;
-               printf("%s(%s%s%s) - %.70s\n",
-                               res[i].title,
-                               res[i].cat,
-                               *res[i].arch ? "/" : "",
-                               *res[i].arch ? res[i].arch : "",
-                               res[i].desc);
-       }
-}
-
-static int
-cmp(const void *p1, const void *p2)
-{
-
-       return(strcasecmp(((const struct res *)p1)->title,
-                               ((const struct res *)p2)->title));
-}
diff --git a/usr.bin/mandoc/apropos_db.c b/usr.bin/mandoc/apropos_db.c

deleted file mode 100644 (file)

index 6c2c0c6..0000000
--- a/usr.bin/mandoc/apropos_db.c
+++ /dev/null
@@ -1,866 +0,0 @@
-/*     $Id: apropos_db.c,v 1.19 2013/10/05 21:17:29 schwarze Exp $ */
-/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#include <sys/param.h>
-#include <sys/types.h>
-
-#include <assert.h>
-#include <fcntl.h>
-#include <regex.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <db.h>
-
-#include "mandocdb.h"
-#include "apropos_db.h"
-#include "mandoc.h"
-
-#define        RESFREE(_x) \
-       do { \
-               free((_x)->file); \
-               free((_x)->cat); \
-               free((_x)->title); \
-               free((_x)->arch); \
-               free((_x)->desc); \
-               free((_x)->matches); \
-       } while (/*CONSTCOND*/0)
-
-struct expr {
-       int              regex; /* is regex? */
-       int              index; /* index in match array */
-       uint64_t         mask; /* type-mask */
-       int              and; /* is rhs of logical AND? */
-       char            *v; /* search value */
-       regex_t          re; /* compiled re, if regex */
-       struct expr     *next; /* next in sequence */
-       struct expr     *subexpr;
-};
-
-struct type {
-       uint64_t         mask;
-       const char      *name;
-};
-
-struct rectree {
-       struct res      *node; /* record array for dir tree */
-       int              len; /* length of record array */
-};
-
-static const struct type types[] = {
-       { TYPE_An, "An" },
-       { TYPE_Ar, "Ar" },
-       { TYPE_At, "At" },
-       { TYPE_Bsx, "Bsx" },
-       { TYPE_Bx, "Bx" },
-       { TYPE_Cd, "Cd" },
-       { TYPE_Cm, "Cm" },
-       { TYPE_Dv, "Dv" },
-       { TYPE_Dx, "Dx" },
-       { TYPE_Em, "Em" },
-       { TYPE_Er, "Er" },
-       { TYPE_Ev, "Ev" },
-       { TYPE_Fa, "Fa" },
-       { TYPE_Fl, "Fl" },
-       { TYPE_Fn, "Fn" },
-       { TYPE_Fn, "Fo" },
-       { TYPE_Ft, "Ft" },
-       { TYPE_Fx, "Fx" },
-       { TYPE_Ic, "Ic" },
-       { TYPE_In, "In" },
-       { TYPE_Lb, "Lb" },
-       { TYPE_Li, "Li" },
-       { TYPE_Lk, "Lk" },
-       { TYPE_Ms, "Ms" },
-       { TYPE_Mt, "Mt" },
-       { TYPE_Nd, "Nd" },
-       { TYPE_Nm, "Nm" },
-       { TYPE_Nx, "Nx" },
-       { TYPE_Ox, "Ox" },
-       { TYPE_Pa, "Pa" },
-       { TYPE_Rs, "Rs" },
-       { TYPE_Sh, "Sh" },
-       { TYPE_Ss, "Ss" },
-       { TYPE_St, "St" },
-       { TYPE_Sy, "Sy" },
-       { TYPE_Tn, "Tn" },
-       { TYPE_Va, "Va" },
-       { TYPE_Va, "Vt" },
-       { TYPE_Xr, "Xr" },
-       { UINT64_MAX, "any" },
-       { 0, NULL }
-};
-
-static DB      *btree_open(void);
-static int      btree_read(const DBT *, const DBT *,
-                       const struct mchars *,
-                       uint64_t *, recno_t *, char **);
-static int      expreval(const struct expr *, int *);
-static void     exprexec(const struct expr *,
-                       const char *, uint64_t, struct res *);
-static int      exprmark(const struct expr *,
-                       const char *, uint64_t, int *);
-static struct expr *exprexpr(int, char *[], int *, int *, size_t *);
-static struct expr *exprterm(char *, int);
-static DB      *index_open(void);
-static int      index_read(const DBT *, const DBT *, int,
-                       const struct mchars *, struct res *);
-static void     norm_string(const char *,
-                       const struct mchars *, char **);
-static size_t   norm_utf8(unsigned int, char[7]);
-static int      single_search(struct rectree *, const struct opts *,
-                       const struct expr *, size_t terms,
-                       struct mchars *, int);
-
-/*
- * Open the keyword mandoc-db database.
- */
-static DB *
-btree_open(void)
-{
-       BTREEINFO        info;
-       DB              *db;
-
-       memset(&info, 0, sizeof(BTREEINFO));
-       info.lorder = 4321;
-       info.flags = R_DUP;
-
-       db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
-       if (NULL != db)
-               return(db);
-
-       return(NULL);
-}
-
-/*
- * Read a keyword from the database and normalise it.
- * Return 0 if the database is insane, else 1.
- */
-static int
-btree_read(const DBT *k, const DBT *v, const struct mchars *mc,
-               uint64_t *mask, recno_t *rec, char **buf)
-{
-       uint64_t         vbuf[2];
-
-       /* Are our sizes sane? */
-       if (k->size < 2 || sizeof(vbuf) != v->size)
-               return(0);
-
-       /* Is our string nil-terminated? */
-       if ('\0' != ((const char *)k->data)[(int)k->size - 1])
-               return(0);
-
-       norm_string((const char *)k->data, mc, buf);
-       memcpy(vbuf, v->data, v->size);
-       *mask = betoh64(vbuf[0]);
-       *rec  = betoh64(vbuf[1]);
-       return(1);
-}
-
-/*
- * Take a Unicode codepoint and produce its UTF-8 encoding.
- * This isn't the best way to do this, but it works.
- * The magic numbers are from the UTF-8 packaging.
- * They're not as scary as they seem: read the UTF-8 spec for details.
- */
-static size_t
-norm_utf8(unsigned int cp, char out[7])
-{
-       int              rc;
-
-       rc = 0;
-
-       if (cp <= 0x0000007F) {
-               rc = 1;
-               out[0] = (char)cp;
-       } else if (cp <= 0x000007FF) {
-               rc = 2;
-               out[0] = (cp >> 6  & 31) | 192;
-               out[1] = (cp       & 63) | 128;
-       } else if (cp <= 0x0000FFFF) {
-               rc = 3;
-               out[0] = (cp >> 12 & 15) | 224;
-               out[1] = (cp >> 6  & 63) | 128;
-               out[2] = (cp       & 63) | 128;
-       } else if (cp <= 0x001FFFFF) {
-               rc = 4;
-               out[0] = (cp >> 18 & 7) | 240;
-               out[1] = (cp >> 12 & 63) | 128;
-               out[2] = (cp >> 6  & 63) | 128;
-               out[3] = (cp       & 63) | 128;
-       } else if (cp <= 0x03FFFFFF) {
-               rc = 5;
-               out[0] = (cp >> 24 & 3) | 248;
-               out[1] = (cp >> 18 & 63) | 128;
-               out[2] = (cp >> 12 & 63) | 128;
-               out[3] = (cp >> 6  & 63) | 128;
-               out[4] = (cp       & 63) | 128;
-       } else if (cp <= 0x7FFFFFFF) {
-               rc = 6;
-               out[0] = (cp >> 30 & 1) | 252;
-               out[1] = (cp >> 24 & 63) | 128;
-               out[2] = (cp >> 18 & 63) | 128;
-               out[3] = (cp >> 12 & 63) | 128;
-               out[4] = (cp >> 6  & 63) | 128;
-               out[5] = (cp       & 63) | 128;
-       } else
-               return(0);
-
-       out[rc] = '\0';
-       return((size_t)rc);
-}
-
-/*
- * Normalise strings from the index and database.
- * These strings are escaped as defined by mandoc_char(7) along with
- * other goop in mandoc.h (e.g., soft hyphens).
- * This function normalises these into a nice UTF-8 string.
- * Returns 0 if the database is fucked.
- */
-static void
-norm_string(const char *val, const struct mchars *mc, char **buf)
-{
-       size_t            sz, bsz;
-       char              utfbuf[7];
-       const char       *seq, *cpp;
-       int               len, u, pos;
-       enum mandoc_esc   esc;
-       static const char res[] = { '\\', '\t',
-                               ASCII_NBRSP, ASCII_HYPH, '\0' };
-
-       /* Pre-allocate by the length of the input */
-
-       bsz = strlen(val) + 1;
-       *buf = mandoc_realloc(*buf, bsz);
-       pos = 0;
-
-       while ('\0' != *val) {
-               /*
-                * Halt on the first escape sequence.
-                * This also halts on the end of string, in which case
-                * we just copy, fallthrough, and exit the loop.
-                */
-               if ((sz = strcspn(val, res)) > 0) {
-                       memcpy(&(*buf)[pos], val, sz);
-                       pos += (int)sz;
-                       val += (int)sz;
-               }
-
-               if (ASCII_HYPH == *val) {
-                       (*buf)[pos++] = '-';
-                       val++;
-                       continue;
-               } else if ('\t' == *val || ASCII_NBRSP == *val) {
-                       (*buf)[pos++] = ' ';
-                       val++;
-                       continue;
-               } else if ('\\' != *val)
-                       break;
-
-               /* Read past the slash. */
-
-               val++;
-               u = 0;
-
-               /*
-                * Parse the escape sequence and see if it's a
-                * predefined character or special character.
-                */
-
-               esc = mandoc_escape(&val, &seq, &len);
-               if (ESCAPE_ERROR == esc)
-                       break;
-
-               /*
-                * XXX - this just does UTF-8, but we need to know
-                * beforehand whether we should do text substitution.
-                */
-
-               switch (esc) {
-               case (ESCAPE_SPECIAL):
-                       if (0 != (u = mchars_spec2cp(mc, seq, len)))
-                               break;
-                       /* FALLTHROUGH */
-               default:
-                       continue;
-               }
-
-               /*
-                * If we have a Unicode codepoint, try to convert that
-                * to a UTF-8 byte string.
-                */
-
-               cpp = utfbuf;
-               if (0 == (sz = norm_utf8(u, utfbuf)))
-                       continue;
-
-               /* Copy the rendered glyph into the stream. */
-
-               sz = strlen(cpp);
-               bsz += sz;
-
-               *buf = mandoc_realloc(*buf, bsz);
-
-               memcpy(&(*buf)[pos], cpp, sz);
-               pos += (int)sz;
-       }
-
-       (*buf)[pos] = '\0';
-}
-
-/*
- * Open the filename-index mandoc-db database.
- * Returns NULL if opening failed.
- */
-static DB *
-index_open(void)
-{
-       DB              *db;
-
-       db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
-       if (NULL != db)
-               return(db);
-
-       return(NULL);
-}
-
-/*
- * Safely unpack from an index file record into the structure.
- * Returns 1 if an entry was unpacked, 0 if the database is insane.
- */
-static int
-index_read(const DBT *key, const DBT *val, int index,
-               const struct mchars *mc, struct res *rec)
-{
-       size_t           left;
-       char            *np, *cp;
-       char             type;
-
-#define        INDEX_BREAD(_dst) \
-       do { \
-               if (NULL == (np = memchr(cp, '\0', left))) \
-                       return(0); \
-               norm_string(cp, mc, &(_dst)); \
-               left -= (np - cp) + 1; \
-               cp = np + 1; \
-       } while (/* CONSTCOND */ 0)
-
-       if (0 == (left = val->size))
-               return(0);
-
-       cp = val->data;
-       assert(sizeof(recno_t) == key->size);
-       memcpy(&rec->rec, key->data, key->size);
-       rec->volume = index;
-
-       if ('d' == (type = *cp++))
-               rec->type = RESTYPE_MDOC;
-       else if ('a' == type)
-               rec->type = RESTYPE_MAN;
-       else if ('c' == type)
-               rec->type = RESTYPE_CAT;
-       else
-               return(0);
-
-       left--;
-       INDEX_BREAD(rec->file);
-       INDEX_BREAD(rec->cat);
-       INDEX_BREAD(rec->title);
-       INDEX_BREAD(rec->arch);
-       INDEX_BREAD(rec->desc);
-       return(1);
-}
-
-/*
- * Search mandocdb databases in paths for expression "expr".
- * Filter out by "opts".
- * Call "res" with the results, which may be zero.
- * Return 0 if there was a database error, else return 1.
- */
-int
-apropos_search(int pathsz, char **paths, const struct opts *opts,
-               const struct expr *expr, size_t terms, void *arg,
-               size_t *sz, struct res **resp,
-               void (*res)(struct res *, size_t, void *))
-{
-       struct rectree   tree;
-       struct mchars   *mc;
-       int              i;
-
-       memset(&tree, 0, sizeof(struct rectree));
-
-       mc = mchars_alloc();
-       *sz = 0;
-       *resp = NULL;
-
-       /*
-        * Main loop.  Change into the directory containing manpage
-        * databases.  Run our expession over each database in the set.
-        */
-
-       for (i = 0; i < pathsz; i++) {
-               assert('/' == paths[i][0]);
-               if (chdir(paths[i]))
-                       continue;
-               if (single_search(&tree, opts, expr, terms, mc, i))
-                       continue;
-
-               resfree(tree.node, tree.len);
-               mchars_free(mc);
-               return(0);
-       }
-
-       (*res)(tree.node, tree.len, arg);
-       *sz = tree.len;
-       *resp = tree.node;
-       mchars_free(mc);
-       return(1);
-}
-
-static int
-single_search(struct rectree *tree, const struct opts *opts,
-               const struct expr *expr, size_t terms,
-               struct mchars *mc, int vol)
-{
-       int              root, leaf, ch;
-       DBT              key, val;
-       DB              *btree, *idx;
-       char            *buf;
-       struct res      *rs;
-       struct res       r;
-       uint64_t         mask;
-       recno_t          rec;
-
-       root    = -1;
-       leaf    = -1;
-       btree   = NULL;
-       idx     = NULL;
-       buf     = NULL;
-       rs      = tree->node;
-
-       memset(&r, 0, sizeof(struct res));
-
-       if (NULL == (btree = btree_open()))
-               return(1);
-
-       if (NULL == (idx = index_open())) {
-               (*btree->close)(btree);
-               return(1);
-       }
-
-       while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
-               if ( ! btree_read(&key, &val, mc, &mask, &rec, &buf))
-                       break;
-
-               /*
-                * See if this keyword record matches any of the
-                * expressions we have stored.
-                */
-               if ( ! exprmark(expr, buf, mask, NULL))
-                       continue;
-
-               /*
-                * O(log n) scan for prior records.  Since a record
-                * number is unbounded, this has decent performance over
-                * a complex hash function.
-                */
-
-               for (leaf = root; leaf >= 0; )
-                       if (rec > rs[leaf].rec &&
-                                       rs[leaf].rhs >= 0)
-                               leaf = rs[leaf].rhs;
-                       else if (rec < rs[leaf].rec &&
-                                       rs[leaf].lhs >= 0)
-                               leaf = rs[leaf].lhs;
-                       else
-                               break;
-
-               /*
-                * If we find a record, see if it has already evaluated
-                * to true.  If it has, great, just keep going.  If not,
-                * try to evaluate it now and continue anyway.
-                */
-
-               if (leaf >= 0 && rs[leaf].rec == rec) {
-                       if (0 == rs[leaf].matched)
-                               exprexec(expr, buf, mask, &rs[leaf]);
-                       continue;
-               }
-
-               /*
-                * We have a new file to examine.
-                * Extract the manpage's metadata from the index
-                * database, then begin partial evaluation.
-                */
-
-               key.data = &rec;
-               key.size = sizeof(recno_t);
-
-               if (0 != (*idx->get)(idx, &key, &val, 0))
-                       break;
-
-               r.lhs = r.rhs = -1;
-               if ( ! index_read(&key, &val, vol, mc, &r))
-                       break;
-
-               /* XXX: this should be elsewhere, I guess? */
-
-               if (opts->cat && strcasecmp(opts->cat, r.cat))
-                       continue;
-
-               if (opts->arch && *r.arch)
-                       if (strcasecmp(opts->arch, r.arch))
-                               continue;
-
-               tree->node = rs = mandoc_realloc
-                       (rs, (tree->len + 1) * sizeof(struct res));
-
-               memcpy(&rs[tree->len], &r, sizeof(struct res));
-               memset(&r, 0, sizeof(struct res));
-               rs[tree->len].matches =
-                       mandoc_calloc(terms, sizeof(int));
-
-               exprexec(expr, buf, mask, &rs[tree->len]);
-
-               /* Append to our tree. */
-
-               if (leaf >= 0) {
-                       if (rec > rs[leaf].rec)
-                               rs[leaf].rhs = tree->len;
-                       else
-                               rs[leaf].lhs = tree->len;
-               } else
-                       root = tree->len;
-
-               tree->len++;
-       }
-
-       (*btree->close)(btree);
-       (*idx->close)(idx);
-
-       free(buf);
-       RESFREE(&r);
-       return(1 == ch);
-}
-
-void
-resfree(struct res *rec, size_t sz)
-{
-       size_t           i;
-
-       for (i = 0; i < sz; i++)
-               RESFREE(&rec[i]);
-       free(rec);
-}
-
-/*
- * Compile a list of straight-up terms.
- * The arguments are re-written into ~[[:<:]]term[[:>:]], or "term"
- * surrounded by word boundaries, then pumped through exprterm().
- * Terms are case-insensitive.
- * This emulates whatis(1) behaviour.
- */
-struct expr *
-termcomp(int argc, char *argv[], size_t *tt)
-{
-       char            *buf;
-       int              pos;
-       struct expr     *e, *next;
-       size_t           sz;
-
-       buf = NULL;
-       e = NULL;
-       *tt = 0;
-
-       for (pos = argc - 1; pos >= 0; pos--) {
-               sz = strlen(argv[pos]) + 18;
-               buf = mandoc_realloc(buf, sz);
-               strlcpy(buf, "Nm~[[:<:]]", sz);
-               strlcat(buf, argv[pos], sz);
-               strlcat(buf, "[[:>:]]", sz);
-               if (NULL == (next = exprterm(buf, 0))) {
-                       free(buf);
-                       exprfree(e);
-                       return(NULL);
-               }
-               next->next = e;
-               e = next;
-               (*tt)++;
-       }
-
-       free(buf);
-       return(e);
-}
-
-/*
- * Compile a sequence of logical expressions.
- * See apropos.1 for a grammar of this sequence.
- */
-struct expr *
-exprcomp(int argc, char *argv[], size_t *tt)
-{
-       int              pos, lvl;
-       struct expr     *e;
-
-       pos = lvl = 0;
-       *tt = 0;
-
-       e = exprexpr(argc, argv, &pos, &lvl, tt);
-
-       if (0 == lvl && pos >= argc)
-               return(e);
-
-       exprfree(e);
-       return(NULL);
-}
-
-/*
- * Compile an array of tokens into an expression.
- * An informal expression grammar is defined in apropos(1).
- * Return NULL if we fail doing so.  All memory will be cleaned up.
- * Return the root of the expression sequence if alright.
- */
-static struct expr *
-exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
-{
-       struct expr     *e, *first, *next;
-       int              log;
-
-       first = next = NULL;
-
-       for ( ; *pos < argc; (*pos)++) {
-               e = next;
-
-               /*
-                * Close out a subexpression.
-                */
-
-               if (NULL != e && 0 == strcmp(")", argv[*pos])) {
-                       if (--(*lvl) < 0)
-                               goto err;
-                       break;
-               }
-
-               /*
-                * Small note: if we're just starting, don't let "-a"
-                * and "-o" be considered logical operators: they're
-                * just tokens unless pairwise joining, in which case we
-                * record their existence (or assume "OR").
-                */
-               log = 0;
-
-               if (NULL != e && 0 == strcmp("-a", argv[*pos]))
-                       log = 1;
-               else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
-                       log = 2;
-
-               if (log > 0 && ++(*pos) >= argc)
-                       goto err;
-
-               /*
-                * Now we parse the term part.  This can begin with
-                * "-i", in which case the expression is case
-                * insensitive.
-                */
-
-               if (0 == strcmp("(", argv[*pos])) {
-                       ++(*pos);
-                       ++(*lvl);
-                       next = mandoc_calloc(1, sizeof(struct expr));
-                       next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
-                       if (NULL == next->subexpr) {
-                               free(next);
-                               next = NULL;
-                       }
-               } else if (0 == strcmp("-i", argv[*pos])) {
-                       if (++(*pos) >= argc)
-                               goto err;
-                       next = exprterm(argv[*pos], 0);
-               } else
-                       next = exprterm(argv[*pos], 1);
-
-               if (NULL == next)
-                       goto err;
-
-               next->and = log == 1;
-               next->index = (int)(*tt)++;
-
-               /* Append to our chain of expressions. */
-
-               if (NULL == first) {
-                       assert(NULL == e);
-                       first = next;
-               } else {
-                       assert(NULL != e);
-                       e->next = next;
-               }
-       }
-
-       return(first);
-err:
-       exprfree(first);
-       return(NULL);
-}
-
-/*
- * Parse a terminal expression with the grammar as defined in
- * apropos(1).
- * Return NULL if we fail the parse.
- */
-static struct expr *
-exprterm(char *buf, int cs)
-{
-       struct expr      e;
-       struct expr     *p;
-       char            *key;
-       int              i;
-
-       memset(&e, 0, sizeof(struct expr));
-
-       /* Choose regex or substring match. */
-
-       if (NULL == (e.v = strpbrk(buf, "=~"))) {
-               e.regex = 0;
-               e.v = buf;
-       } else {
-               e.regex = '~' == *e.v;
-               *e.v++ = '\0';
-       }
-
-       /* Determine the record types to search for. */
-
-       e.mask = 0;
-       if (buf < e.v) {
-               while (NULL != (key = strsep(&buf, ","))) {
-                       i = 0;
-                       while (types[i].mask &&
-                                       strcmp(types[i].name, key))
-                               i++;
-                       e.mask |= types[i].mask;
-               }
-       }
-       if (0 == e.mask)
-               e.mask = TYPE_Nm | TYPE_Nd;
-
-       if (e.regex) {
-               i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE);
-               if (regcomp(&e.re, e.v, i))
-                       return(NULL);
-       }
-
-       e.v = mandoc_strdup(e.v);
-
-       p = mandoc_calloc(1, sizeof(struct expr));
-       memcpy(p, &e, sizeof(struct expr));
-       return(p);
-}
-
-void
-exprfree(struct expr *p)
-{
-       struct expr     *pp;
-
-       while (NULL != p) {
-               if (p->subexpr)
-                       exprfree(p->subexpr);
-               if (p->regex)
-                       regfree(&p->re);
-               free(p->v);
-               pp = p->next;
-               free(p);
-               p = pp;
-       }
-}
-
-static int
-exprmark(const struct expr *p, const char *cp,
-               uint64_t mask, int *ms)
-{
-
-       for ( ; p; p = p->next) {
-               if (p->subexpr) {
-                       if (exprmark(p->subexpr, cp, mask, ms))
-                               return(1);
-                       continue;
-               } else if ( ! (mask & p->mask))
-                       continue;
-
-               if (p->regex) {
-                       if (regexec(&p->re, cp, 0, NULL, 0))
-                               continue;
-               } else if (NULL == strcasestr(cp, p->v))
-                       continue;
-
-               if (NULL == ms)
-                       return(1);
-               else
-                       ms[p->index] = 1;
-       }
-
-       return(0);
-}
-
-static int
-expreval(const struct expr *p, int *ms)
-{
-       int              match;
-
-       /*
-        * AND has precedence over OR.  Analysis is left-right, though
-        * it doesn't matter because there are no side-effects.
-        * Thus, step through pairwise ANDs and accumulate their Boolean
-        * evaluation.  If we encounter a single true AND collection or
-        * standalone term, the whole expression is true (by definition
-        * of OR).
-        */
-
-       for (match = 0; p && ! match; p = p->next) {
-               /* Evaluate a subexpression, if applicable. */
-               if (p->subexpr && ! ms[p->index])
-                       ms[p->index] = expreval(p->subexpr, ms);
-
-               match = ms[p->index];
-               for ( ; p->next && p->next->and; p = p->next) {
-                       /* Evaluate a subexpression, if applicable. */
-                       if (p->next->subexpr && ! ms[p->next->index])
-                               ms[p->next->index] =
-                                       expreval(p->next->subexpr, ms);
-                       match = match && ms[p->next->index];
-               }
-       }
-
-       return(match);
-}
-
-/*
- * First, update the array of terms for which this expression evaluates
- * to true.
- * Second, logically evaluate all terms over the updated array of truth
- * values.
- * If this evaluates to true, mark the expression as satisfied.
- */
-static void
-exprexec(const struct expr *e, const char *cp,
-               uint64_t mask, struct res *r)
-{
-
-       assert(0 == r->matched);
-       exprmark(e, cp, mask, r->matches);
-       r->matched = expreval(e, r->matches);
-}
diff --git a/usr.bin/mandoc/apropos_db.h b/usr.bin/mandoc/apropos_db.h

deleted file mode 100644 (file)

index 78cc155..0000000
--- a/usr.bin/mandoc/apropos_db.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*     $Id: apropos_db.h,v 1.11 2012/04/15 11:54:47 schwarze Exp $ */
-/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifndef APROPOS_H
-#define APROPOS_H
-
-enum   restype {
-       RESTYPE_MAN, /* man(7) file */
-       RESTYPE_MDOC, /* mdoc(7) file */
-       RESTYPE_CAT /* pre-formatted file */
-};
-
-struct res {
-       enum restype     type; /* input file type */
-       char            *file; /* file in file-system */
-       char            *cat; /* category (3p, 3, etc.) */
-       char            *title; /* title (FOO, etc.) */
-       char            *arch; /* arch (or empty string) */
-       char            *desc; /* description (from Nd) */
-       unsigned int     rec; /* record in index */
-       /*
-        * The index volume.  This indexes into the array of directories
-        * searched for manual page databases.
-        */
-       unsigned int     volume;
-       /*
-        * The following fields are used internally.
-        *
-        * Maintain a binary tree for checking the uniqueness of `rec'
-        * when adding elements to the results array.
-        * Since the results array is dynamic, use offset in the array
-        * instead of a pointer to the structure.
-        */
-       int              lhs;
-       int              rhs;
-       int              matched; /* expression is true */
-       int             *matches; /* partial truth evaluations */
-};
-
-struct opts {
-       const char      *arch; /* restrict to architecture */
-       const char      *cat; /* restrict to manual section */
-};
-
-__BEGIN_DECLS
-
-struct expr;
-
-int             apropos_search(int, char **, const struct opts *,
-                       const struct expr *, size_t, 
-                       void *, size_t *, struct res **,
-                       void (*)(struct res *, size_t, void *));
-struct expr    *exprcomp(int, char *[], size_t *);
-void            exprfree(struct expr *);
-void            resfree(struct res *, size_t);
-struct expr    *termcomp(int, char *[], size_t *);
-
-__END_DECLS
-
-#endif /*!APROPOS_H*/
diff --git a/usr.bin/mandoc/mandocdb.c b/usr.bin/mandoc/mandocdb.c

index 754c09a..84847d9 100644 (file)
--- a/usr.bin/mandoc/mandocdb.c
+++ b/usr.bin/mandoc/mandocdb.c
@@ -1,7 +1,7 @@
-/*     $Id: mandocdb.c,v 1.46 2013/11/21 01:47:10 schwarze Exp $ */
+/*     $Id: mandocdb.c,v 1.47 2013/12/31 00:40:19 schwarze Exp $ */
  /*
- * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
@@ -15,283 +15,314 @@
   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   */
-#include <sys/types.h>
+#include <sys/stat.h>
  
  #include <assert.h>
  #include <ctype.h>
-#include <dirent.h>
  #include <errno.h>
  #include <fcntl.h>
+#include <fts.h>
  #include <getopt.h>
  #include <limits.h>
+#include <stddef.h>
  #include <stdio.h>
  #include <stdint.h>
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
-#include <db.h>
  
-#include "man.h"
+#include <ohash.h>
+#include <sqlite3.h>
+
  #include "mdoc.h"
+#include "man.h"
  #include "mandoc.h"
-#include "mandocdb.h"
  #include "manpath.h"
+#include "mansearch.h"
+
+#define        SQL_EXEC(_v) \
+       if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define        SQL_BIND_TEXT(_s, _i, _v) \
+       if (SQLITE_OK != sqlite3_bind_text \
+               ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define        SQL_BIND_INT(_s, _i, _v) \
+       if (SQLITE_OK != sqlite3_bind_int \
+               ((_s), (_i)++, (_v))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define        SQL_BIND_INT64(_s, _i, _v) \
+       if (SQLITE_OK != sqlite3_bind_int64 \
+               ((_s), (_i)++, (_v))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
+#define SQL_STEP(_s) \
+       if (SQLITE_DONE != sqlite3_step((_s))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg(db))
  
-#define        MANDOC_BUFSZ      BUFSIZ
-#define        MANDOC_SLOP       1024
-
-#define        MANDOC_SRC        0x1
-#define        MANDOC_FORM       0x2
-
-/* Access to the mandoc database on disk. */
-
-struct mdb {
-       char              idxn[PATH_MAX]; /* index db filename */
-       char              dbn[PATH_MAX]; /* keyword db filename */
-       DB               *idx; /* index recno database */
-       DB               *db; /* keyword btree database */
+enum   op {
+       OP_DEFAULT = 0, /* new dbs from dir list or default config */
+       OP_CONFFILE, /* new databases from custom config file */
+       OP_UPDATE, /* delete/add entries in existing database */
+       OP_DELETE, /* delete entries from existing database */
+       OP_TEST /* change no databases, report potential problems */
  };
  
-/* Stack of temporarily unused index records. */
-
-struct recs {
-       recno_t          *stack; /* pointer to a malloc'ed array */
-       size_t            size; /* number of allocated slots */
-       size_t            cur; /* current number of empty records */
-       recno_t           last; /* last record number in the index */
+enum   form {
+       FORM_NONE,  /* format is unknown */
+       FORM_SRC,   /* format is -man or -mdoc */
+       FORM_CAT    /* format is cat */
  };
  
-/* Tiny list for files.  No need to bring in QUEUE. */
+struct str {
+       char            *utf8; /* key in UTF-8 form */
+       const struct mpage *mpage; /* if set, the owning parse */
+       uint64_t         mask; /* bitmask in sequence */
+       char             key[]; /* the string itself */
+};
  
-struct of {
-       char             *fname; /* heap-allocated */
-       char             *sec;
-       char             *arch;
-       char             *title;
-       int               src_form;
-       struct of        *next; /* NULL for last one */
-       struct of        *first; /* first in list */
+struct inodev {
+       ino_t            st_ino;
+       dev_t            st_dev;
  };
  
-/* Buffer for storing growable data. */
+struct mpage {
+       struct inodev    inodev;  /* used for hashing routine */
+       enum form        form;    /* format from file content */
+       char            *sec;     /* section from file content */
+       char            *arch;    /* architecture from file content */
+       char            *title;   /* title from file content */
+       char            *desc;    /* description from file content */
+       struct mlink    *mlinks;  /* singly linked list */
+};
  
-struct buf {
-       char             *cp;
-       size_t            len; /* current length */
-       size_t            size; /* total buffer size */
+struct mlink {
+       char             file[PATH_MAX]; /* filename rel. to manpath */
+       enum form        dform;   /* format from directory */
+       enum form        fform;   /* format from file name suffix */
+       char            *dsec;    /* section from directory */
+       char            *arch;    /* architecture from directory */
+       char            *name;    /* name from file name (not empty) */
+       char            *fsec;    /* section from file name suffix */
+       struct mlink    *next;    /* singly linked list */
  };
  
-/* Operation we're going to perform. */
+struct title {
+       char            *title; /* name(sec/arch) given inside the file */
+       char            *file; /* file name in case of mismatch */
+};
  
-enum   op {
-       OP_DEFAULT = 0, /* new dbs from dir list or default config */
-       OP_CONFFILE, /* new databases from custom config file */
-       OP_UPDATE, /* delete/add entries in existing database */
-       OP_DELETE, /* delete entries from existing database */
-       OP_TEST /* change no databases, report potential problems */
+enum   stmt {
+       STMT_DELETE_PAGE = 0,   /* delete mpage */
+       STMT_INSERT_PAGE,       /* insert mpage */
+       STMT_INSERT_LINK,       /* insert mlink */
+       STMT_INSERT_KEY,        /* insert parsed key */
+       STMT__MAX
  };
  
-#define        MAN_ARGS          DB *hash, \
-                         struct buf *buf, \
-                         struct buf *dbuf, \
-                         const struct man_node *n
-#define        MDOC_ARGS         DB *hash, \
-                         struct buf *buf, \
-                         struct buf *dbuf, \
-                         const struct mdoc_node *n, \
-                         const struct mdoc_meta *m
-
-static void              buf_appendmdoc(struct buf *, 
-                               const struct mdoc_node *, int);
-static void              buf_append(struct buf *, const char *);
-static void              buf_appendb(struct buf *, 
-                               const void *, size_t);
-static void              dbt_put(DB *, const char *, DBT *, DBT *);
-static void              hash_put(DB *, const struct buf *, uint64_t);
-static void              hash_reset(DB **);
-static void              index_merge(const struct of *, struct mparse *,
-                               struct buf *, struct buf *, DB *,
-                               struct mdb *, struct recs *);
-static void              index_prune(const struct of *, struct mdb *,
-                               struct recs *);
-static void              ofile_argbuild(int, char *[], struct of **,
-                               const char *);
-static void              ofile_dirbuild(const char *, const char *,
-                               const char *, int, struct of **);
-static void              ofile_free(struct of *);
-static void              pformatted(DB *, struct buf *, 
-                               struct buf *, const struct of *);
-static int               pman_node(MAN_ARGS);
-static void              pmdoc_node(MDOC_ARGS);
-static int               pmdoc_head(MDOC_ARGS);
-static int               pmdoc_body(MDOC_ARGS);
-static int               pmdoc_Fd(MDOC_ARGS);
-static int               pmdoc_In(MDOC_ARGS);
-static int               pmdoc_Fn(MDOC_ARGS);
-static int               pmdoc_Nd(MDOC_ARGS);
-static int               pmdoc_Nm(MDOC_ARGS);
-static int               pmdoc_Sh(MDOC_ARGS);
-static int               pmdoc_St(MDOC_ARGS);
-static int               pmdoc_Xr(MDOC_ARGS);
-
-#define        MDOCF_CHILD       0x01  /* Automatically index child nodes. */
+typedef        int (*mdoc_fp)(struct mpage *, const struct mdoc_node *);
  
  struct mdoc_handler {
-       int             (*fp)(MDOC_ARGS);  /* Optional handler. */
-       uint64_t          mask;  /* Set unless handler returns 0. */
-       int               flags;  /* For use by pmdoc_node. */
+       mdoc_fp          fp; /* optional handler */
+       uint64_t         mask;  /* set unless handler returns 0 */
  };
  
+static void     dbclose(int);
+static void     dbindex(const struct mpage *, struct mchars *);
+static int      dbopen(int);
+static void     dbprune(void);
+static void     filescan(const char *);
+static void    *hash_alloc(size_t, void *);
+static void     hash_free(void *, size_t, void *);
+static void    *hash_halloc(size_t, void *);
+static void     mlink_add(struct mlink *, const struct stat *);
+static void     mlink_free(struct mlink *);
+static void     mlinks_undupe(struct mpage *);
+static void     mpages_free(void);
+static void     mpages_merge(struct mchars *, struct mparse *, int);
+static void     parse_cat(struct mpage *);
+static void     parse_man(struct mpage *, const struct man_node *);
+static void     parse_mdoc(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_body(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_head(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Fd(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Fn(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_In(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Nd(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_St(struct mpage *, const struct mdoc_node *);
+static int      parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);
+static void     putkey(const struct mpage *,
+                       const char *, uint64_t);
+static void     putkeys(const struct mpage *,
+                       const char *, size_t, uint64_t);
+static void     putmdockey(const struct mpage *,
+                       const struct mdoc_node *, uint64_t);
+static void     say(const char *, const char *, ...);
+static int      set_basedir(const char *);
+static int      treescan(void);
+static size_t   utf8(unsigned int, char [7]);
+static void     utf8key(struct mchars *, struct str *);
+
+static char            *progname;
+static int              use_all; /* use all found files */
+static int              nodb; /* no database changes */
+static int              verb; /* print what we're doing */
+static int              warnings; /* warn about crap */
+static int              exitcode; /* to be returned by main */
+static enum op          op; /* operational mode */
+static char             basedir[PATH_MAX]; /* current base directory */
+static struct ohash     mpages; /* table of distinct manual pages */
+static struct ohash     mlinks; /* table of directory entries */
+static struct ohash     strings; /* table of all strings */
+static sqlite3         *db = NULL; /* current database */
+static sqlite3_stmt    *stmts[STMT__MAX]; /* current statements */
+
  static const struct mdoc_handler mdocs[MDOC_MAX] = {
-       { NULL, 0, 0 },  /* Ap */
-       { NULL, 0, 0 },  /* Dd */
-       { NULL, 0, 0 },  /* Dt */
-       { NULL, 0, 0 },  /* Os */
-       { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
-       { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
-       { NULL, 0, 0 },  /* Pp */
-       { NULL, 0, 0 },  /* D1 */
-       { NULL, 0, 0 },  /* Dl */
-       { NULL, 0, 0 },  /* Bd */
-       { NULL, 0, 0 },  /* Ed */
-       { NULL, 0, 0 },  /* Bl */
-       { NULL, 0, 0 },  /* El */
-       { NULL, 0, 0 },  /* It */
-       { NULL, 0, 0 },  /* Ad */
-       { NULL, TYPE_An, MDOCF_CHILD },  /* An */
-       { NULL, TYPE_Ar, MDOCF_CHILD },  /* Ar */
-       { NULL, TYPE_Cd, MDOCF_CHILD },  /* Cd */
-       { NULL, TYPE_Cm, MDOCF_CHILD },  /* Cm */
-       { NULL, TYPE_Dv, MDOCF_CHILD },  /* Dv */
-       { NULL, TYPE_Er, MDOCF_CHILD },  /* Er */
-       { NULL, TYPE_Ev, MDOCF_CHILD },  /* Ev */
-       { NULL, 0, 0 },  /* Ex */
-       { NULL, TYPE_Fa, MDOCF_CHILD },  /* Fa */
-       { pmdoc_Fd, TYPE_In, 0 },  /* Fd */
-       { NULL, TYPE_Fl, MDOCF_CHILD },  /* Fl */
-       { pmdoc_Fn, 0, 0 },  /* Fn */
-       { NULL, TYPE_Ft, MDOCF_CHILD },  /* Ft */
-       { NULL, TYPE_Ic, MDOCF_CHILD },  /* Ic */
-       { pmdoc_In, TYPE_In, 0 },  /* In */
-       { NULL, TYPE_Li, MDOCF_CHILD },  /* Li */
-       { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD },  /* Nd */
-       { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD },  /* Nm */
-       { NULL, 0, 0 },  /* Op */
-       { NULL, 0, 0 },  /* Ot */
-       { NULL, TYPE_Pa, MDOCF_CHILD },  /* Pa */
-       { NULL, 0, 0 },  /* Rv */
-       { pmdoc_St, TYPE_St, 0 },  /* St */
-       { NULL, TYPE_Va, MDOCF_CHILD },  /* Va */
-       { pmdoc_body, TYPE_Va, MDOCF_CHILD },  /* Vt */
-       { pmdoc_Xr, TYPE_Xr, 0 },  /* Xr */
-       { NULL, 0, 0 },  /* %A */
-       { NULL, 0, 0 },  /* %B */
-       { NULL, 0, 0 },  /* %D */
-       { NULL, 0, 0 },  /* %I */
-       { NULL, 0, 0 },  /* %J */
-       { NULL, 0, 0 },  /* %N */
-       { NULL, 0, 0 },  /* %O */
-       { NULL, 0, 0 },  /* %P */
-       { NULL, 0, 0 },  /* %R */
-       { NULL, 0, 0 },  /* %T */
-       { NULL, 0, 0 },  /* %V */
-       { NULL, 0, 0 },  /* Ac */
-       { NULL, 0, 0 },  /* Ao */
-       { NULL, 0, 0 },  /* Aq */
-       { NULL, TYPE_At, MDOCF_CHILD },  /* At */
-       { NULL, 0, 0 },  /* Bc */
-       { NULL, 0, 0 },  /* Bf */
-       { NULL, 0, 0 },  /* Bo */
-       { NULL, 0, 0 },  /* Bq */
-       { NULL, TYPE_Bsx, MDOCF_CHILD },  /* Bsx */
-       { NULL, TYPE_Bx, MDOCF_CHILD },  /* Bx */
-       { NULL, 0, 0 },  /* Db */
-       { NULL, 0, 0 },  /* Dc */
-       { NULL, 0, 0 },  /* Do */
-       { NULL, 0, 0 },  /* Dq */
-       { NULL, 0, 0 },  /* Ec */
-       { NULL, 0, 0 },  /* Ef */
-       { NULL, TYPE_Em, MDOCF_CHILD },  /* Em */
-       { NULL, 0, 0 },  /* Eo */
-       { NULL, TYPE_Fx, MDOCF_CHILD },  /* Fx */
-       { NULL, TYPE_Ms, MDOCF_CHILD },  /* Ms */
-       { NULL, 0, 0 },  /* No */
-       { NULL, 0, 0 },  /* Ns */
-       { NULL, TYPE_Nx, MDOCF_CHILD },  /* Nx */
-       { NULL, TYPE_Ox, MDOCF_CHILD },  /* Ox */
-       { NULL, 0, 0 },  /* Pc */
-       { NULL, 0, 0 },  /* Pf */
-       { NULL, 0, 0 },  /* Po */
-       { NULL, 0, 0 },  /* Pq */
-       { NULL, 0, 0 },  /* Qc */
-       { NULL, 0, 0 },  /* Ql */
-       { NULL, 0, 0 },  /* Qo */
-       { NULL, 0, 0 },  /* Qq */
-       { NULL, 0, 0 },  /* Re */
-       { NULL, 0, 0 },  /* Rs */
-       { NULL, 0, 0 },  /* Sc */
-       { NULL, 0, 0 },  /* So */
-       { NULL, 0, 0 },  /* Sq */
-       { NULL, 0, 0 },  /* Sm */
-       { NULL, 0, 0 },  /* Sx */
-       { NULL, TYPE_Sy, MDOCF_CHILD },  /* Sy */
-       { NULL, TYPE_Tn, MDOCF_CHILD },  /* Tn */
-       { NULL, 0, 0 },  /* Ux */
-       { NULL, 0, 0 },  /* Xc */
-       { NULL, 0, 0 },  /* Xo */
-       { pmdoc_head, TYPE_Fn, 0 },  /* Fo */
-       { NULL, 0, 0 },  /* Fc */
-       { NULL, 0, 0 },  /* Oo */
-       { NULL, 0, 0 },  /* Oc */
-       { NULL, 0, 0 },  /* Bk */
-       { NULL, 0, 0 },  /* Ek */
-       { NULL, 0, 0 },  /* Bt */
-       { NULL, 0, 0 },  /* Hf */
-       { NULL, 0, 0 },  /* Fr */
-       { NULL, 0, 0 },  /* Ud */
-       { NULL, TYPE_Lb, MDOCF_CHILD },  /* Lb */
-       { NULL, 0, 0 },  /* Lp */
-       { NULL, TYPE_Lk, MDOCF_CHILD },  /* Lk */
-       { NULL, TYPE_Mt, MDOCF_CHILD },  /* Mt */
-       { NULL, 0, 0 },  /* Brq */
-       { NULL, 0, 0 },  /* Bro */
-       { NULL, 0, 0 },  /* Brc */
-       { NULL, 0, 0 },  /* %C */
-       { NULL, 0, 0 },  /* Es */
-       { NULL, 0, 0 },  /* En */
-       { NULL, TYPE_Dx, MDOCF_CHILD },  /* Dx */
-       { NULL, 0, 0 },  /* %Q */
-       { NULL, 0, 0 },  /* br */
-       { NULL, 0, 0 },  /* sp */
-       { NULL, 0, 0 },  /* %U */
-       { NULL, 0, 0 },  /* Ta */
+       { NULL, 0 },  /* Ap */
+       { NULL, 0 },  /* Dd */
+       { NULL, 0 },  /* Dt */
+       { NULL, 0 },  /* Os */
+       { parse_mdoc_Sh, TYPE_Sh }, /* Sh */
+       { parse_mdoc_head, TYPE_Ss }, /* Ss */
+       { NULL, 0 },  /* Pp */
+       { NULL, 0 },  /* D1 */
+       { NULL, 0 },  /* Dl */
+       { NULL, 0 },  /* Bd */
+       { NULL, 0 },  /* Ed */
+       { NULL, 0 },  /* Bl */
+       { NULL, 0 },  /* El */
+       { NULL, 0 },  /* It */
+       { NULL, 0 },  /* Ad */
+       { NULL, TYPE_An },  /* An */
+       { NULL, TYPE_Ar },  /* Ar */
+       { NULL, TYPE_Cd },  /* Cd */
+       { NULL, TYPE_Cm },  /* Cm */
+       { NULL, TYPE_Dv },  /* Dv */
+       { NULL, TYPE_Er },  /* Er */
+       { NULL, TYPE_Ev },  /* Ev */
+       { NULL, 0 },  /* Ex */
+       { NULL, TYPE_Fa },  /* Fa */
+       { parse_mdoc_Fd, 0 },  /* Fd */
+       { NULL, TYPE_Fl },  /* Fl */
+       { parse_mdoc_Fn, 0 },  /* Fn */
+       { NULL, TYPE_Ft },  /* Ft */
+       { NULL, TYPE_Ic },  /* Ic */
+       { parse_mdoc_In, TYPE_In },  /* In */
+       { NULL, TYPE_Li },  /* Li */
+       { parse_mdoc_Nd, TYPE_Nd },  /* Nd */
+       { parse_mdoc_Nm, TYPE_Nm },  /* Nm */
+       { NULL, 0 },  /* Op */
+       { NULL, 0 },  /* Ot */
+       { NULL, TYPE_Pa },  /* Pa */
+       { NULL, 0 },  /* Rv */
+       { parse_mdoc_St, 0 },  /* St */
+       { NULL, TYPE_Va },  /* Va */
+       { parse_mdoc_body, TYPE_Va },  /* Vt */
+       { parse_mdoc_Xr, 0 },  /* Xr */
+       { NULL, 0 },  /* %A */
+       { NULL, 0 },  /* %B */
+       { NULL, 0 },  /* %D */
+       { NULL, 0 },  /* %I */
+       { NULL, 0 },  /* %J */
+       { NULL, 0 },  /* %N */
+       { NULL, 0 },  /* %O */
+       { NULL, 0 },  /* %P */
+       { NULL, 0 },  /* %R */
+       { NULL, 0 },  /* %T */
+       { NULL, 0 },  /* %V */
+       { NULL, 0 },  /* Ac */
+       { NULL, 0 },  /* Ao */
+       { NULL, 0 },  /* Aq */
+       { NULL, TYPE_At },  /* At */
+       { NULL, 0 },  /* Bc */
+       { NULL, 0 },  /* Bf */
+       { NULL, 0 },  /* Bo */
+       { NULL, 0 },  /* Bq */
+       { NULL, TYPE_Bsx },  /* Bsx */
+       { NULL, TYPE_Bx },  /* Bx */
+       { NULL, 0 },  /* Db */
+       { NULL, 0 },  /* Dc */
+       { NULL, 0 },  /* Do */
+       { NULL, 0 },  /* Dq */
+       { NULL, 0 },  /* Ec */
+       { NULL, 0 },  /* Ef */
+       { NULL, TYPE_Em },  /* Em */
+       { NULL, 0 },  /* Eo */
+       { NULL, TYPE_Fx },  /* Fx */
+       { NULL, TYPE_Ms },  /* Ms */
+       { NULL, 0 },  /* No */
+       { NULL, 0 },  /* Ns */
+       { NULL, TYPE_Nx },  /* Nx */
+       { NULL, TYPE_Ox },  /* Ox */
+       { NULL, 0 },  /* Pc */
+       { NULL, 0 },  /* Pf */
+       { NULL, 0 },  /* Po */
+       { NULL, 0 },  /* Pq */
+       { NULL, 0 },  /* Qc */
+       { NULL, 0 },  /* Ql */
+       { NULL, 0 },  /* Qo */
+       { NULL, 0 },  /* Qq */
+       { NULL, 0 },  /* Re */
+       { NULL, 0 },  /* Rs */
+       { NULL, 0 },  /* Sc */
+       { NULL, 0 },  /* So */
+       { NULL, 0 },  /* Sq */
+       { NULL, 0 },  /* Sm */
+       { NULL, 0 },  /* Sx */
+       { NULL, TYPE_Sy },  /* Sy */
+       { NULL, TYPE_Tn },  /* Tn */
+       { NULL, 0 },  /* Ux */
+       { NULL, 0 },  /* Xc */
+       { NULL, 0 },  /* Xo */
+       { parse_mdoc_head, 0 },  /* Fo */
+       { NULL, 0 },  /* Fc */
+       { NULL, 0 },  /* Oo */
+       { NULL, 0 },  /* Oc */
+       { NULL, 0 },  /* Bk */
+       { NULL, 0 },  /* Ek */
+       { NULL, 0 },  /* Bt */
+       { NULL, 0 },  /* Hf */
+       { NULL, 0 },  /* Fr */
+       { NULL, 0 },  /* Ud */
+       { NULL, TYPE_Lb },  /* Lb */
+       { NULL, 0 },  /* Lp */
+       { NULL, TYPE_Lk },  /* Lk */
+       { NULL, TYPE_Mt },  /* Mt */
+       { NULL, 0 },  /* Brq */
+       { NULL, 0 },  /* Bro */
+       { NULL, 0 },  /* Brc */
+       { NULL, 0 },  /* %C */
+       { NULL, 0 },  /* Es */
+       { NULL, 0 },  /* En */
+       { NULL, TYPE_Dx },  /* Dx */
+       { NULL, 0 },  /* %Q */
+       { NULL, 0 },  /* br */
+       { NULL, 0 },  /* sp */
+       { NULL, 0 },  /* %U */
+       { NULL, 0 },  /* Ta */
  };
  
-static const char       *progname;
-static int               use_all;  /* Use all directories and files. */
-static int               verb;  /* Output verbosity level. */
-static int               warnings;  /* Potential problems in manuals. */
-
  int
  mandocdb(int argc, char *argv[])
  {
-       struct mparse   *mp; /* parse sequence */
-       struct manpaths  dirs;
-       struct mdb       mdb;
-       struct recs      recs;
-       enum op          op; /* current operation */
-       const char      *dir;
-       char            *cp;
-       char             pbuf[PATH_MAX];
-       int              ch, i, flags;
-       DB              *hash; /* temporary keyword hashtable */
-       BTREEINFO        info; /* btree configuration */
-       size_t           sz1, sz2, ipath;
-       struct buf       buf, /* keyword buffer */
-                        dbuf; /* description buffer */
-       struct of       *of; /* list of files for processing */
-       extern int       optind;
-       extern char     *optarg;
+       int               ch, i;
+       size_t            j, sz;
+       const char       *path_arg;
+       struct mchars    *mc;
+       struct manpaths   dirs;
+       struct mparse    *mp;
+       struct ohash_info mpages_info, mlinks_info;
+
+       memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *));
+       memset(&dirs, 0, sizeof(struct manpaths));
+
+       mpages_info.alloc  = mlinks_info.alloc  = hash_alloc;
+       mpages_info.halloc = mlinks_info.halloc = hash_halloc;
+       mpages_info.hfree  = mlinks_info.hfree  = hash_free;
+
+       mpages_info.key_offset = offsetof(struct mpage, inodev);
+       mlinks_info.key_offset = offsetof(struct mlink, file);
  
         progname = strrchr(argv[0], '/');
         if (progname == NULL)
@@ -299,57 +330,47 @@ mandocdb(int argc, char *argv[])
         else
                 ++progname;
  
-       memset(&dirs, 0, sizeof(struct manpaths));
-       memset(&mdb, 0, sizeof(struct mdb));
-       memset(&recs, 0, sizeof(struct recs));
+       /*
+        * We accept a few different invocations.  
+        * The CHECKOP macro makes sure that invocation styles don't
+        * clobber each other.
+        */
+#define        CHECKOP(_op, _ch) do \
+       if (OP_DEFAULT != (_op)) { \
+               fprintf(stderr, "-%c: Conflicting option\n", (_ch)); \
+               goto usage; \
+       } while (/*CONSTCOND*/0)
  
-       of = NULL;
-       mp = NULL;
-       hash = NULL;
+       path_arg = NULL;
         op = OP_DEFAULT;
-       dir = NULL;
  
-       while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
+       while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW")))
                 switch (ch) {
                 case ('a'):
                         use_all = 1;
                         break;
                 case ('C'):
-                       if (op) {
-                               fprintf(stderr,
-                                   "-C: conflicting options\n");
-                               goto usage;
-                       }
-                       dir = optarg;
+                       CHECKOP(op, ch);
+                       path_arg = optarg;
                         op = OP_CONFFILE;
                         break;
                 case ('d'):
-                       if (op) {
-                               fprintf(stderr,
-                                   "-d: conflicting options\n");
-                               goto usage;
-                       }
-                       dir = optarg;
+                       CHECKOP(op, ch);
+                       path_arg = optarg;
                         op = OP_UPDATE;
                         break;
+               case ('n'):
+                       nodb = 1;
+                       break;
                 case ('t'):
+                       CHECKOP(op, ch);
                         dup2(STDOUT_FILENO, STDERR_FILENO);
-                       if (op) {
-                               fprintf(stderr,
-                                   "-t: conflicting options\n");
-                               goto usage;
-                       }
                         op = OP_TEST;
-                       use_all = 1;
-                       warnings = 1;
+                       nodb = warnings = 1;
                         break;
                 case ('u'):
-                       if (op) {
-                               fprintf(stderr,
-                                   "-u: conflicting options\n");
-                               goto usage;
-                       }
-                       dir = optarg;
+                       CHECKOP(op, ch);
+                       path_arg = optarg;
                         op = OP_DELETE;
                         break;
                 case ('v'):
@@ -366,282 +387,584 @@ mandocdb(int argc, char *argv[])
         argv += optind;
  
         if (OP_CONFFILE == op && argc > 0) {
-               fprintf(stderr, "-C: too many arguments\n");
+               fprintf(stderr, "-C: Too many arguments\n");
                 goto usage;
         }
  
-       memset(&info, 0, sizeof(BTREEINFO));
-       info.lorder = 4321;
-       info.flags = R_DUP;
-
-       mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+       exitcode = (int)MANDOCLEVEL_OK;
+       mp = mparse_alloc(MPARSE_AUTO, 
+               MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+       mc = mchars_alloc();
  
-       memset(&buf, 0, sizeof(struct buf));
-       memset(&dbuf, 0, sizeof(struct buf));
+       ohash_init(&mpages, 6, &mpages_info);
+       ohash_init(&mlinks, 6, &mlinks_info);
  
-       buf.size = dbuf.size = MANDOC_BUFSZ;
-
-       buf.cp = mandoc_malloc(buf.size);
-       dbuf.cp = mandoc_malloc(dbuf.size);
+       if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
+               /* 
+                * Force processing all files.
+                */
+               use_all = 1;
  
-       if (OP_TEST == op) {
-               ofile_argbuild(argc, argv, &of, NULL);
-               if (NULL == of)
+               /*
+                * All of these deal with a specific directory.
+                * Jump into that directory then collect files specified
+                * on the command-line.
+                */
+               if (0 == set_basedir(path_arg))
                         goto out;
-               index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
-               goto out;
-       }
+               for (i = 0; i < argc; i++)
+                       filescan(argv[i]);
+               if (0 == dbopen(1))
+                       goto out;
+               if (OP_TEST != op)
+                       dbprune();
+               if (OP_DELETE != op)
+                       mpages_merge(mc, mp, 0);
+               dbclose(1);
+       } else {
+               /*
+                * If we have arguments, use them as our manpaths.
+                * If we don't, grok from manpath(1) or however else
+                * manpath_parse() wants to do it.
+                */
+               if (argc > 0) {
+                       dirs.paths = mandoc_calloc
+                               (argc, sizeof(char *));
+                       dirs.sz = (size_t)argc;
+                       for (i = 0; i < argc; i++)
+                               dirs.paths[i] = mandoc_strdup(argv[i]);
+               } else
+                       manpath_parse(&dirs, path_arg, NULL, NULL);
  
-       if (OP_UPDATE == op || OP_DELETE == op) {
-               if (NULL == realpath(dir, pbuf)) {
-                       perror(dir);
-                       exit((int)MANDOCLEVEL_BADARG);
-               }
-               if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) {
-                       fprintf(stderr, "%s: path too long\n", pbuf);
-                       exit((int)MANDOCLEVEL_BADARG);
-               }
+               /*
+                * First scan the tree rooted at a base directory, then
+                * build a new database and finally move it into place.
+                * Ignore zero-length directories and strip trailing
+                * slashes.
+                */
+               for (j = 0; j < dirs.sz; j++) {
+                       sz = strlen(dirs.paths[j]);
+                       if (sz && '/' == dirs.paths[j][sz - 1])
+                               dirs.paths[j][--sz] = '\0';
+                       if (0 == sz)
+                               continue;
+
+                       if (j) {
+                               ohash_init(&mpages, 6, &mpages_info);
+                               ohash_init(&mlinks, 6, &mlinks_info);
+                       }
  
-               strlcat(mdb.dbn, pbuf, PATH_MAX);
-               sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX);
+                       if (0 == set_basedir(dirs.paths[j]))
+                               goto out;
+                       if (0 == treescan())
+                               goto out;
+                       if (0 == set_basedir(dirs.paths[j]))
+                               goto out;
+                       if (0 == dbopen(0))
+                               goto out;
  
-               strlcat(mdb.idxn, pbuf, PATH_MAX);
-               sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX);
+                       mpages_merge(mc, mp, warnings && !use_all);
+                       dbclose(0);
  
-               if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) {
-                       fprintf(stderr, "%s: path too long\n", mdb.idxn);
-                       exit((int)MANDOCLEVEL_BADARG);
+                       if (j + 1 < dirs.sz) {
+                               mpages_free();
+                               ohash_delete(&mpages);
+                               ohash_delete(&mlinks);
+                       }
                 }
+       }
+out:
+       set_basedir(NULL);
+       manpath_free(&dirs);
+       mchars_free(mc);
+       mparse_free(mp);
+       mpages_free();
+       ohash_delete(&mpages);
+       ohash_delete(&mlinks);
+       return(exitcode);
+usage:
+       fprintf(stderr, "usage: %s [-anvW] [-C file]\n"
+                       "       %s [-anvW] dir ...\n"
+                       "       %s [-nvW] -d dir [file ...]\n"
+                       "       %s [-nvW] -u dir [file ...]\n"
+                       "       %s -t file ...\n",
+                      progname, progname, progname, 
+                      progname, progname);
  
-               flags = O_CREAT | O_RDWR;
-               mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
-               mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
+       return((int)MANDOCLEVEL_BADARG);
+}
  
-               if (NULL == mdb.db) {
-                       perror(mdb.dbn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               } else if (NULL == mdb.idx) {
-                       perror(mdb.idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               }
+/*
+ * Scan a directory tree rooted at "basedir" for manpages.
+ * We use fts(), scanning directory parts along the way for clues to our
+ * section and architecture.
+ *
+ * If use_all has been specified, grok all files.
+ * If not, sanitise paths to the following:
+ *
+ *   [./]man*[/<arch>]/<name>.<section> 
+ *   or
+ *   [./]cat<section>[/<arch>]/<name>.0
+ *
+ * TODO: accomodate for multi-language directories.
+ */
+static int
+treescan(void)
+{
+       FTS             *f;
+       FTSENT          *ff;
+       struct mlink    *mlink;
+       int              dform;
+       char            *fsec;
+       const char      *dsec, *arch, *cp, *path;
+       const char      *argv[2];
  
-               ofile_argbuild(argc, argv, &of, pbuf);
+       argv[0] = ".";
+       argv[1] = (char *)NULL;
  
-               if (NULL == of)
-                       goto out;
+       /*
+        * Walk through all components under the directory, using the
+        * logical descent of files.
+        */
+       f = fts_open((char * const *)argv, FTS_LOGICAL, NULL);
+       if (NULL == f) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say("", NULL);
+               return(0);
+       }
  
-               index_prune(of, &mdb, &recs);
+       dsec = arch = NULL;
+       dform = FORM_NONE;
  
+       while (NULL != (ff = fts_read(f))) {
+               path = ff->fts_path + 2;
                 /*
-                * Go to the root of the respective manual tree.
-                * This must work or no manuals may be found (they're
-                * indexed relative to the root).
+                * If we're a regular file, add an mlink by using the
+                * stored directory data and handling the filename.
                  */
+               if (FTS_F == ff->fts_info) {
+                       if (0 == strcmp(path, MANDOC_DB))
+                               continue;
+                       if ( ! use_all && ff->fts_level < 2) {
+                               if (warnings)
+                                       say(path, "Extraneous file");
+                               continue;
+                       } else if (NULL == (fsec =
+                                       strrchr(ff->fts_name, '.'))) {
+                               if ( ! use_all) {
+                                       if (warnings)
+                                               say(path,
+                                                   "No filename suffix");
+                                       continue;
+                               }
+                       } else if (0 == strcmp(++fsec, "html")) {
+                               if (warnings)
+                                       say(path, "Skip html");
+                               continue;
+                       } else if (0 == strcmp(fsec, "gz")) {
+                               if (warnings)
+                                       say(path, "Skip gz");
+                               continue;
+                       } else if (0 == strcmp(fsec, "ps")) {
+                               if (warnings)
+                                       say(path, "Skip ps");
+                               continue;
+                       } else if (0 == strcmp(fsec, "pdf")) {
+                               if (warnings)
+                                       say(path, "Skip pdf");
+                               continue;
+                       } else if ( ! use_all &&
+                           ((FORM_SRC == dform && strcmp(fsec, dsec)) ||
+                            (FORM_CAT == dform && strcmp(fsec, "0")))) {
+                               if (warnings)
+                                       say(path, "Wrong filename suffix");
+                               continue;
+                       } else
+                               fsec[-1] = '\0';
+                       mlink = mandoc_calloc(1, sizeof(struct mlink));
+                       strlcpy(mlink->file, path, sizeof(mlink->file));
+                       mlink->dform = dform;
+                       if (NULL != dsec)
+                               mlink->dsec = mandoc_strdup(dsec);
+                       if (NULL != arch)
+                               mlink->arch = mandoc_strdup(arch);
+                       mlink->name = mandoc_strdup(ff->fts_name);
+                       if (NULL != fsec)
+                               mlink->fsec = mandoc_strdup(fsec);
+                       mlink_add(mlink, ff->fts_statp);
+                       continue;
+               } else if (FTS_D != ff->fts_info &&
+                               FTS_DP != ff->fts_info) {
+                       if (warnings)
+                               say(path, "Not a regular file");
+                       continue;
+               }
  
-               if (OP_UPDATE == op) {
-                       if (-1 == chdir(dir)) {
-                               perror(dir);
-                               exit((int)MANDOCLEVEL_SYSERR);
+               switch (ff->fts_level) {
+               case (0):
+                       /* Ignore the root directory. */
+                       break;
+               case (1):
+                       /*
+                        * This might contain manX/ or catX/.
+                        * Try to infer this from the name.
+                        * If we're not in use_all, enforce it.
+                        */
+                       dsec = NULL;
+                       dform = FORM_NONE;
+                       cp = ff->fts_name;
+                       if (FTS_DP == ff->fts_info)
+                               break;
+
+                       if (0 == strncmp(cp, "man", 3)) {
+                               dform = FORM_SRC;
+                               dsec = cp + 3;
+                       } else if (0 == strncmp(cp, "cat", 3)) {
+                               dform = FORM_CAT;
+                               dsec = cp + 3;
                         }
-                       index_merge(of, mp, &dbuf, &buf, hash,
-                                       &mdb, &recs);
+
+                       if (NULL != dsec || use_all) 
+                               break;
+
+                       if (warnings)
+                               say(path, "Unknown directory part");
+                       fts_set(f, ff, FTS_SKIP);
+                       break;
+               case (2):
+                       /*
+                        * Possibly our architecture.
+                        * If we're descending, keep tabs on it.
+                        */
+                       arch = NULL;
+                       if (FTS_DP != ff->fts_info && NULL != dsec)
+                               arch = ff->fts_name;
+                       break;
+               default:
+                       if (FTS_DP == ff->fts_info || use_all)
+                               break;
+                       if (warnings)
+                               say(path, "Extraneous directory part");
+                       fts_set(f, ff, FTS_SKIP);
+                       break;
                 }
+       }
+
+       fts_close(f);
+       return(1);
+}
+
+/*
+ * Add a file to the mlinks table.
+ * Do not verify that it's a "valid" looking manpage (we'll do that
+ * later).
+ *
+ * Try to infer the manual section, architecture, and page name from the
+ * path, assuming it looks like
+ *
+ *   [./]man*[/<arch>]/<name>.<section> 
+ *   or
+ *   [./]cat<section>[/<arch>]/<name>.0
+ *
+ * See treescan() for the fts(3) version of this.
+ */
+static void
+filescan(const char *file)
+{
+       char             buf[PATH_MAX];
+       struct stat      st;
+       struct mlink    *mlink;
+       char            *p, *start;
+
+       assert(use_all);
  
-               goto out;
+       if (0 == strncmp(file, "./", 2))
+               file += 2;
+
+       if (NULL == realpath(file, buf)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(file, NULL);
+               return;
+       } else if (OP_TEST != op && strstr(buf, basedir) != buf) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say("", "%s: outside base directory", buf);
+               return;
+       } else if (-1 == stat(buf, &st)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(file, NULL);
+               return;
+       } else if ( ! (S_IFREG & st.st_mode)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(file, "Not a regular file");
+               return;
         }
+       start = buf + strlen(basedir);
+       mlink = mandoc_calloc(1, sizeof(struct mlink));
+       strlcpy(mlink->file, start, sizeof(mlink->file));
  
         /*
-        * Configure the directories we're going to scan.
-        * If we have command-line arguments, use them.
-        * If not, we use man(1)'s method (see mandocdb.8).
+        * First try to guess our directory structure.
+        * If we find a separator, try to look for man* or cat*.
+        * If we find one of these and what's underneath is a directory,
+        * assume it's an architecture.
          */
+       if (NULL != (p = strchr(start, '/'))) {
+               *p++ = '\0';
+               if (0 == strncmp(start, "man", 3)) {
+                       mlink->dform = FORM_SRC;
+                       mlink->dsec = mandoc_strdup(start + 3);
+               } else if (0 == strncmp(start, "cat", 3)) {
+                       mlink->dform = FORM_CAT;
+                       mlink->dsec = mandoc_strdup(start + 3);
+               }
  
-       if (argc > 0) {
-               dirs.paths = mandoc_calloc(argc, sizeof(char *));
-               dirs.sz = argc;
-               for (i = 0; i < argc; i++) {
-                       if (NULL == (cp = realpath(argv[i], pbuf))) {
-                               perror(argv[i]);
-                               goto out;
-                       }
-                       dirs.paths[i] = mandoc_strdup(cp);
+               start = p;
+               if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
+                       *p++ = '\0';
+                       mlink->arch = mandoc_strdup(start);
+                       start = p;
                 }
-       } else
-               manpath_parse(&dirs, dir, NULL, NULL);
+       }
  
-       for (ipath = 0; ipath < dirs.sz; ipath++) {
+       /*
+        * Now check the file suffix.
+        * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
+        */
+       p = strrchr(start, '\0');
+       while (p-- > start && '/' != *p && '.' != *p)
+               /* Loop. */ ;
  
-               /*
-                * Go to the root of the respective manual tree.
-                * This must work or no manuals may be found:
-                * They are indexed relative to the root.
-                */
+       if ('.' == *p) {
+               *p++ = '\0';
+               mlink->fsec = mandoc_strdup(p);
+       }
  
-               if (-1 == chdir(dirs.paths[ipath])) {
-                       perror(dirs.paths[ipath]);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               }
+       /*
+        * Now try to parse the name.
+        * Use the filename portion of the path.
+        */
+       mlink->name = start;
+       if (NULL != (p = strrchr(start, '/'))) {
+               mlink->name = p + 1;
+               *p = '\0';
+       }
+       mlink->name = mandoc_strdup(mlink->name);
  
-               /* Create a new database in two temporary files. */
+       mlink_add(mlink, &st);
+}
  
-               flags = O_CREAT | O_EXCL | O_RDWR;
-               while (NULL == mdb.db) {
-                       strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX);
-                       strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX);
-                       if (NULL == mktemp(mdb.dbn)) {
-                               perror(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-                       mdb.db = dbopen(mdb.dbn, flags, 0644,
-                                       DB_BTREE, &info);
-                       if (NULL == mdb.db && EEXIST != errno) {
-                               perror(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-               }
-               while (NULL == mdb.idx) {
-                       strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX);
-                       strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX);
-                       if (NULL == mktemp(mdb.idxn)) {
-                               perror(mdb.idxn);
-                               unlink(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-                       mdb.idx = dbopen(mdb.idxn, flags, 0644,
-                                       DB_RECNO, NULL);
-                       if (NULL == mdb.idx && EEXIST != errno) {
-                               perror(mdb.idxn);
-                               unlink(mdb.dbn);
-                               exit((int)MANDOCLEVEL_SYSERR);
-                       }
-               }
+static void
+mlink_add(struct mlink *mlink, const struct stat *st)
+{
+       struct inodev    inodev;
+       struct mpage    *mpage;
+       unsigned int     slot;
+
+       assert(NULL != mlink->file);
+
+       if (NULL == mlink->dsec)
+               mlink->dsec = mandoc_strdup("");
+       if (NULL == mlink->arch)
+               mlink->arch = mandoc_strdup("");
+       if (NULL == mlink->name)
+               mlink->name = mandoc_strdup("");
+       if (NULL == mlink->fsec)
+               mlink->fsec = mandoc_strdup("");
+
+       if ('0' == *mlink->fsec) {
+               free(mlink->fsec);
+               mlink->fsec = mandoc_strdup(mlink->dsec);
+               mlink->fform = FORM_CAT;
+       } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
+               mlink->fform = FORM_SRC;
+       else
+               mlink->fform = FORM_NONE;
+
+       slot = ohash_qlookup(&mlinks, mlink->file);
+       assert(NULL == ohash_find(&mlinks, slot));
+       ohash_insert(&mlinks, slot, mlink);
+
+       inodev.st_ino = st->st_ino;
+       inodev.st_dev = st->st_dev;
+       slot = ohash_lookup_memory(&mpages, (char *)&inodev,
+           sizeof(struct inodev), inodev.st_ino);
+       mpage = ohash_find(&mpages, slot);
+       if (NULL == mpage) {
+               mpage = mandoc_calloc(1, sizeof(struct mpage));
+               mpage->inodev.st_ino = inodev.st_ino;
+               mpage->inodev.st_dev = inodev.st_dev;
+               ohash_insert(&mpages, slot, mpage);
+       } else
+               mlink->next = mpage->mlinks;
+       mpage->mlinks = mlink;
+}
  
-               /*
-                * Search for manuals and fill the new database.
-                */
+static void
+mlink_free(struct mlink *mlink)
+{
  
-               ofile_dirbuild(".", "", "", 0, &of);
+       free(mlink->dsec);
+       free(mlink->arch);
+       free(mlink->name);
+       free(mlink->fsec);
+       free(mlink);
+}
  
-               if (NULL != of) {
-                       index_merge(of, mp, &dbuf, &buf, hash,
-                            &mdb, &recs);
-                       ofile_free(of);
-                       of = NULL;
+static void
+mpages_free(void)
+{
+       struct mpage    *mpage;
+       struct mlink    *mlink;
+       unsigned int     slot;
+
+       mpage = ohash_first(&mpages, &slot);
+       while (NULL != mpage) {
+               while (NULL != (mlink = mpage->mlinks)) {
+                       mpage->mlinks = mlink->next;
+                       mlink_free(mlink);
                 }
+               free(mpage->sec);
+               free(mpage->arch);
+               free(mpage->title);
+               free(mpage->desc);
+               free(mpage);
+               mpage = ohash_next(&mpages, &slot);
+       }
+}
  
-               (*mdb.db->close)(mdb.db);
-               (*mdb.idx->close)(mdb.idx);
-               mdb.db = NULL;
-               mdb.idx = NULL;
-
-               /*
-                * Replace the old database with the new one.
-                * This is not perfectly atomic,
-                * but i cannot think of a better way.
-                */
-
-               if (-1 == rename(mdb.dbn, MANDOC_DB)) {
-                       perror(MANDOC_DB);
-                       unlink(mdb.dbn);
-                       unlink(mdb.idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
+/*
+ * For each mlink to the mpage, check whether the path looks like
+ * it is formatted, and if it does, check whether a source manual
+ * exists by the same name, ignoring the suffix.
+ * If both conditions hold, drop the mlink.
+ */
+static void
+mlinks_undupe(struct mpage *mpage)
+{
+       char              buf[PATH_MAX];
+       struct mlink    **prev;
+       struct mlink     *mlink;
+       char             *bufp;
+
+       mpage->form = FORM_CAT;
+       prev = &mpage->mlinks;
+       while (NULL != (mlink = *prev)) {
+               if (FORM_CAT != mlink->dform) {
+                       mpage->form = FORM_NONE;
+                       goto nextlink;
                 }
-               if (-1 == rename(mdb.idxn, MANDOC_IDX)) {
-                       perror(MANDOC_IDX);
-                       unlink(MANDOC_DB);
-                       unlink(MANDOC_IDX);
-                       unlink(mdb.idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
+               if (strlcpy(buf, mlink->file, PATH_MAX) >= PATH_MAX) {
+                       if (warnings)
+                               say(mlink->file, "Filename too long");
+                       goto nextlink;
                 }
+               bufp = strstr(buf, "cat");
+               assert(NULL != bufp);
+               memcpy(bufp, "man", 3);
+               if (NULL != (bufp = strrchr(buf, '.')))
+                       *++bufp = '\0';
+               strlcat(buf, mlink->dsec, PATH_MAX);
+               if (NULL == ohash_find(&mlinks,
+                               ohash_qlookup(&mlinks, buf)))
+                       goto nextlink;
+               if (warnings)
+                       say(mlink->file, "Man source exists: %s", buf);
+               if (use_all)
+                       goto nextlink;
+               *prev = mlink->next;
+               mlink_free(mlink);
+               continue;
+nextlink:
+               prev = &(*prev)->next;
         }
-
-out:
-       if (mdb.db)
-               (*mdb.db->close)(mdb.db);
-       if (mdb.idx)
-               (*mdb.idx->close)(mdb.idx);
-       if (hash)
-               (*hash->close)(hash);
-       if (mp)
-               mparse_free(mp);
-
-       manpath_free(&dirs);
-       ofile_free(of);
-       free(buf.cp);
-       free(dbuf.cp);
-       free(recs.stack);
-
-       return(MANDOCLEVEL_OK);
-
-usage:
-       fprintf(stderr,
-               "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
-               "                        -d dir [file ...] | "
-               "-u dir [file ...]\n",
-               progname);
-
-       return((int)MANDOCLEVEL_BADARG);
  }
  
-void
-index_merge(const struct of *of, struct mparse *mp,
-               struct buf *dbuf, struct buf *buf, DB *hash,
-               struct mdb *mdb, struct recs *recs)
+/*
+ * Run through the files in the global vector "mpages"
+ * and add them to the database specified in "basedir".
+ *
+ * This handles the parsing scheme itself, using the cues of directory
+ * and filename to determine whether the file is parsable or not.
+ */
+static void
+mpages_merge(struct mchars *mc, struct mparse *mp, int check_reachable)
  {
-       recno_t          rec;
-       int              ch, skip;
-       DBT              key, val;
-       DB              *files;  /* temporary file name table */
-       struct mdoc     *mdoc;
-       struct man      *man;
-       const char      *fn, *msec, *march, *mtitle;
-       char            *p;
-       uint64_t         mask;
-       size_t           sv;
-       unsigned         seq;
-       uint64_t         vbuf[2];
-       char             type;
-
-       static char      emptystring[] = "";
-
-       if (warnings) {
-               files = NULL;
-               hash_reset(&files);
+       struct ohash             title_table;
+       struct ohash_info        title_info, str_info;
+       struct mpage            *mpage;
+       struct mdoc             *mdoc;
+       struct man              *man;
+       struct title            *title_entry;
+       char                    *title_str;
+       const char              *cp;
+       int                      match;
+       unsigned int             pslot, tslot;
+       enum mandoclevel         lvl;
+
+       str_info.alloc = hash_alloc;
+       str_info.halloc = hash_halloc;
+       str_info.hfree = hash_free;
+       str_info.key_offset = offsetof(struct str, key);
+
+       if (check_reachable) {
+               title_info.alloc = hash_alloc;
+               title_info.halloc = hash_halloc;
+               title_info.hfree = hash_free;
+               title_info.key_offset = offsetof(struct title, title);
+               ohash_init(&title_table, 6, &title_info);
         }
  
-       rec = 0;
-       for (of = of->first; of; of = of->next) {
-               fn = of->fname;
+       mpage = ohash_first(&mpages, &pslot);
+       while (NULL != mpage) {
+               mlinks_undupe(mpage);
+               if (NULL == mpage->mlinks) {
+                       mpage = ohash_next(&mpages, &pslot);
+                       continue;
+               }
+
+               ohash_init(&strings, 6, &str_info);
+               mparse_reset(mp);
+               mdoc = NULL;
+               man = NULL;
+               match = 1;
  
                 /*
                  * Try interpreting the file as mdoc(7) or man(7)
                  * source code, unless it is already known to be
                  * formatted.  Fall back to formatted mode.
                  */
-
-               mparse_reset(mp);
-               mdoc = NULL;
-               man = NULL;
-
-               if ((MANDOC_SRC & of->src_form ||
-                   ! (MANDOC_FORM & of->src_form)) &&
-                   MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
-                       mparse_result(mp, &mdoc, &man);
+               if (FORM_CAT != mpage->mlinks->dform ||
+                   FORM_CAT != mpage->mlinks->fform) {
+                       lvl = mparse_readfd(mp, -1, mpage->mlinks->file);
+                       if (lvl < MANDOCLEVEL_FATAL)
+                               mparse_result(mp, &mdoc, &man);
+               }
  
                 if (NULL != mdoc) {
-                       msec = mdoc_meta(mdoc)->msec;
-                       march = mdoc_meta(mdoc)->arch;
-                       if (NULL == march)
-                               march = "";
-                       mtitle = mdoc_meta(mdoc)->title;
+                       mpage->form = FORM_SRC;
+                       mpage->sec =
+                           mandoc_strdup(mdoc_meta(mdoc)->msec);
+                       mpage->arch = mdoc_meta(mdoc)->arch;
+                       mpage->arch = mandoc_strdup(
+                           NULL == mpage->arch ? "" : mpage->arch);
+                       mpage->title =
+                           mandoc_strdup(mdoc_meta(mdoc)->title);
                 } else if (NULL != man) {
-                       msec = man_meta(man)->msec;
-                       march = "";
-                       mtitle = man_meta(man)->title;
+                       mpage->form = FORM_SRC;
+                       mpage->sec =
+                           mandoc_strdup(man_meta(man)->msec);
+                       mpage->arch =
+                           mandoc_strdup(mpage->mlinks->arch);
+                       mpage->title =
+                           mandoc_strdup(man_meta(man)->title);
                 } else {
-                       msec = of->sec;
-                       march = of->arch;
-                       mtitle = of->title;
+                       mpage->form = FORM_CAT;
+                       mpage->sec =
+                           mandoc_strdup(mpage->mlinks->dsec);
+                       mpage->arch =
+                           mandoc_strdup(mpage->mlinks->arch);
+                       mpage->title =
+                           mandoc_strdup(mpage->mlinks->name);
                 }
  
                 /*
@@ -653,16 +976,13 @@ index_merge(const struct of *of, struct mparse *mp,
                  * section, like encrypt(1) = makekey(8).  Do not skip
                  * manuals for such reasons.
                  */
-
-               skip = 0;
-               assert(of->sec);
-               assert(msec);
-               if (warnings)
-                       if (strcasecmp(msec, of->sec))
-                               fprintf(stderr, "%s: "
-                                       "section \"%s\" manual "
-                                       "in \"%s\" directory\n",
-                                       fn, msec, of->sec);
+               if (warnings && !use_all && FORM_SRC == mpage->form &&
+                   strcasecmp(mpage->sec, mpage->mlinks->dsec)) {
+                       match = 0;
+                       say(mpage->mlinks->file, "Section \"%s\" "
+                               "manual in %s directory",
+                               mpage->sec, mpage->mlinks->dsec);
+               }
  
                 /*
                  * Manual page directories exist for each kernel
@@ -678,26 +998,30 @@ index_merge(const struct of *of, struct mparse *mp,
                  * Thus, warn about architecture mismatches,
                  * but don't skip manuals for this reason.
                  */
+               if (warnings && !use_all &&
+                   strcasecmp(mpage->arch, mpage->mlinks->arch)) {
+                       match = 0;
+                       say(mpage->mlinks->file, "Architecture \"%s\" "
+                               "manual in \"%s\" directory",
+                               mpage->arch, mpage->mlinks->arch);
+               }
+               if (warnings && !use_all &&
+                   strcasecmp(mpage->title, mpage->mlinks->name))
+                       match = 0;
  
-               assert(of->arch);
-               assert(march);
-               if (warnings)
-                       if (strcasecmp(march, of->arch))
-                               fprintf(stderr, "%s: "
-                                       "architecture \"%s\" manual "
-                                       "in \"%s\" directory\n",
-                                       fn, march, of->arch);
-
-               /*
-                * By default, skip a file if the title given
-                * in the file disagrees with the file name.
-                * Do not warn, this happens for all MLINKs.
-                */
+               putkey(mpage, mpage->mlinks->name, TYPE_Nm);
  
-               assert(of->title);
-               assert(mtitle);
-               if (strcasecmp(mtitle, of->title))
-                       skip = 1;
+               if (NULL != mdoc) {
+                       if (NULL != (cp = mdoc_meta(mdoc)->name))
+                               putkey(mpage, cp, TYPE_Nm);
+                       assert(NULL == mpage->desc);
+                       parse_mdoc(mpage, mdoc_node(mdoc));
+                       putkey(mpage, NULL != mpage->desc ?
+                           mpage->desc : mpage->mlinks->name, TYPE_Nd);
+               } else if (NULL != man)
+                       parse_man(mpage, man_node(man));
+               else
+                       parse_cat(mpage);
  
                 /*
                  * Build a title string for the file.  If it matches
@@ -705,390 +1029,386 @@ index_merge(const struct of *of, struct mparse *mp,
                  * found; else, remember it as missing.
                  */
  
-               if (warnings) {
-                       buf->len = 0;
-                       buf_appendb(buf, mtitle, strlen(mtitle));
-                       buf_appendb(buf, "(", 1);
-                       buf_appendb(buf, msec, strlen(msec));
-                       if ('\0' != *march) {
-                               buf_appendb(buf, "/", 1);
-                               buf_appendb(buf, march, strlen(march));
-                       }
-                       buf_appendb(buf, ")", 2);
-                       for (p = buf->cp; '\0' != *p; p++)
-                               *p = tolower((unsigned char)*p);
-                       key.data = buf->cp;
-                       key.size = buf->len;
-                       val.data = NULL;
-                       val.size = 0;
-                       if (0 == skip)
-                               val.data = emptystring;
-                       else {
-                               ch = (*files->get)(files, &key, &val, 0);
-                               if (ch < 0) {
-                                       perror("hash");
-                                       exit((int)MANDOCLEVEL_SYSERR);
-                               } else if (ch > 0) {
-                                       val.data = (void *)fn;
-                                       val.size = strlen(fn) + 1;
-                               } else
-                                       val.data = NULL;
-                       }
-                       if (NULL != val.data &&
-                           (*files->put)(files, &key, &val, 0) < 0) {
-                               perror("hash");
+               if (check_reachable) {
+                       if (-1 == asprintf(&title_str, "%s(%s%s%s)",
+                           mpage->title, mpage->sec,
+                           '\0' == *mpage->arch ? "" : "/",
+                           mpage->arch)) {
+                               perror(NULL);
                                 exit((int)MANDOCLEVEL_SYSERR);
                         }
+                       tslot = ohash_qlookup(&title_table, title_str);
+                       title_entry = ohash_find(&title_table, tslot);
+                       if (NULL == title_entry) {
+                               title_entry = mandoc_malloc(
+                                               sizeof(struct title));
+                               title_entry->title = title_str;
+                               title_entry->file = mandoc_strdup(
+                                   match ? "" : mpage->mlinks->file);
+                               ohash_insert(&title_table, tslot,
+                                               title_entry);
+                       } else {
+                               if (match)
+                                       *title_entry->file = '\0';
+                               free(title_str);
+                       }
                 }
  
-               if (skip && !use_all)
-                       continue;
+               dbindex(mpage, mc);
+               ohash_delete(&strings);
+               mpage = ohash_next(&mpages, &pslot);
+       }
  
-               /*
-                * The index record value consists of a nil-terminated
-                * filename, a nil-terminated manual section, and a
-                * nil-terminated description.  Use the actual
-                * location of the file, such that the user can find
-                * it with man(1).  Since the description may not be
-                * set, we set a sentinel to see if we're going to
-                * write a nil byte in its place.
-                */
+       if (check_reachable) {
+               title_entry = ohash_first(&title_table, &tslot);
+               while (NULL != title_entry) {
+                       if ('\0' != *title_entry->file)
+                               say(title_entry->file,
+                                   "Probably unreachable, title is %s",
+                                   title_entry->title);
+                       free(title_entry->title);
+                       free(title_entry->file);
+                       free(title_entry);
+                       title_entry = ohash_next(&title_table, &tslot);
+               }
+               ohash_delete(&title_table);
+       }
+}
  
-               dbuf->len = 0;
-               type = mdoc ? 'd' : (man ? 'a' : 'c');
-               buf_appendb(dbuf, &type, 1);
-               buf_appendb(dbuf, fn, strlen(fn) + 1);
-               buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
-               buf_appendb(dbuf, of->title, strlen(of->title) + 1);
-               buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
+static void
+parse_cat(struct mpage *mpage)
+{
+       FILE            *stream;
+       char            *line, *p, *title;
+       size_t           len, plen, titlesz;
  
-               sv = dbuf->len;
+       if (NULL == (stream = fopen(mpage->mlinks->file, "r"))) {
+               if (warnings)
+                       say(mpage->mlinks->file, NULL);
+               return;
+       }
  
-               /*
-                * Collect keyword/mask pairs.
-                * Each pair will become a new btree node.
-                */
+       /* Skip to first blank line. */
  
-               hash_reset(&hash);
-               if (mdoc)
-                       pmdoc_node(hash, buf, dbuf,
-                               mdoc_node(mdoc), mdoc_meta(mdoc));
-               else if (man)
-                       pman_node(hash, buf, dbuf, man_node(man));
-               else
-                       pformatted(hash, buf, dbuf, of);
+       while (NULL != (line = fgetln(stream, &len)))
+               if ('\n' == *line)
+                       break;
  
-               /* Test mode, do not access any database. */
+       /*
+        * Assume the first line that is not indented
+        * is the first section header.  Skip to it.
+        */
  
-               if (NULL == mdb->db || NULL == mdb->idx)
-                       continue;
+       while (NULL != (line = fgetln(stream, &len)))
+               if ('\n' != *line && ' ' != *line)
+                       break;
+       
+       /*
+        * Read up until the next section into a buffer.
+        * Strip the leading and trailing newline from each read line,
+        * appending a trailing space.
+        * Ignore empty (whitespace-only) lines.
+        */
  
-               /*
-                * Make sure the file name is always registered
-                * as an .Nm search key.
-                */
-               buf->len = 0;
-               buf_append(buf, of->title);
-               hash_put(hash, buf, TYPE_Nm);
-
-               /*
-                * Reclaim an empty index record, if available.
-                * Use its record number for all new btree nodes.
-                */
-
-               if (recs->cur > 0) {
-                       recs->cur--;
-                       rec = recs->stack[(int)recs->cur];
-               } else if (recs->last > 0) {
-                       rec = recs->last;
-                       recs->last = 0;
-               } else
-                       rec++;
-               vbuf[1] = htobe64(rec);
-
-               /*
-                * Copy from the in-memory hashtable of pending
-                * keyword/mask pairs into the database.
-                */
+       titlesz = 0;
+       title = NULL;
  
-               seq = R_FIRST;
-               while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
-                       seq = R_NEXT;
-                       assert(sizeof(uint64_t) == val.size);
-                       memcpy(&mask, val.data, val.size);
-                       vbuf[0] = htobe64(mask);
-                       val.size = sizeof(vbuf);
-                       val.data = &vbuf;
-                       dbt_put(mdb->db, mdb->dbn, &key, &val);
-               }
-               if (ch < 0) {
-                       perror("hash");
-                       unlink(mdb->dbn);
-                       unlink(mdb->idxn);
-                       exit((int)MANDOCLEVEL_SYSERR);
+       while (NULL != (line = fgetln(stream, &len))) {
+               if (' ' != *line || '\n' != line[len - 1])
+                       break;
+               while (len > 0 && isspace((unsigned char)*line)) {
+                       line++;
+                       len--;
                 }
+               if (1 == len)
+                       continue;
+               title = mandoc_realloc(title, titlesz + len);
+               memcpy(title + titlesz, line, len);
+               titlesz += len;
+               title[titlesz - 1] = ' ';
+       }
  
-               /*
-                * Apply to the index.  If we haven't had a description
-                * set, put an empty one in now.
-                */
-
-               if (dbuf->len == sv)
-                       buf_appendb(dbuf, "", 1);
+       /*
+        * If no page content can be found, or the input line
+        * is already the next section header, or there is no
+        * trailing newline, reuse the page title as the page
+        * description.
+        */
  
-               key.data = &rec;
-               key.size = sizeof(recno_t);
+       if (NULL == title || '\0' == *title) {
+               if (warnings)
+                       say(mpage->mlinks->file,
+                           "Cannot find NAME section");
+               assert(NULL == mpage->desc);
+               mpage->desc = mandoc_strdup(mpage->mlinks->name);
+               putkey(mpage, mpage->mlinks->name, TYPE_Nd);
+               fclose(stream);
+               free(title);
+               return;
+       }
  
-               val.data = dbuf->cp;
-               val.size = dbuf->len;
+       title = mandoc_realloc(title, titlesz + 1);
+       title[titlesz] = '\0';
  
-               if (verb)
-                       printf("%s: adding to index\n", fn);
+       /*
+        * Skip to the first dash.
+        * Use the remaining line as the description (no more than 70
+        * bytes).
+        */
  
-               dbt_put(mdb->idx, mdb->idxn, &key, &val);
+       if (NULL != (p = strstr(title, "- "))) {
+               for (p += 2; ' ' == *p || '\b' == *p; p++)
+                       /* Skip to next word. */ ;
+       } else {
+               if (warnings)
+                       say(mpage->mlinks->file,
+                           "No dash in title line");
+               p = title;
         }
  
-       /*
-        * Iterate the remembered file titles and check that
-        * all files can be found by their main title.
-        */
+       plen = strlen(p);
  
-       if (warnings) {
-               seq = R_FIRST;
-               while (0 == (*files->seq)(files, &key, &val, seq)) {
-                       seq = R_NEXT;
-                       if (val.size)
-                               fprintf(stderr, "%s: probably "
-                                   "unreachable, title is %s\n",
-                                   (char *)val.data, (char *)key.data);
-               }
-               (*files->close)(files);
+       /* Strip backspace-encoding from line. */
+
+       while (NULL != (line = memchr(p, '\b', plen))) {
+               len = line - p;
+               if (0 == len) {
+                       memmove(line, line + 1, plen--);
+                       continue;
+               } 
+               memmove(line - 1, line + 1, plen - len);
+               plen -= 2;
         }
+
+       assert(NULL == mpage->desc);
+       mpage->desc = mandoc_strdup(p);
+       putkey(mpage, mpage->desc, TYPE_Nd);
+       fclose(stream);
+       free(title);
  }
  
  /*
- * Scan through all entries in the index file `idx' and prune those
- * entries in `ofile'.
- * Pruning consists of removing from `db', then invalidating the entry
- * in `idx' (zeroing its value size).
+ * Put a type/word pair into the word database for this particular file.
   */
  static void
-index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
+putkey(const struct mpage *mpage, const char *value, uint64_t type)
  {
-       const struct of *of;
-       const char      *fn;
-       uint64_t         vbuf[2];
-       unsigned         seq, sseq;
-       DBT              key, val;
-       int              ch;
-
-       recs->cur = 0;
-       seq = R_FIRST;
-       while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
-               seq = R_NEXT;
-               assert(sizeof(recno_t) == key.size);
-               memcpy(&recs->last, key.data, key.size);
  
-               /* Deleted records are zero-sized.  Skip them. */
+       assert(NULL != value);
+       putkeys(mpage, value, strlen(value), type);
+}
  
-               if (0 == val.size)
-                       goto cont;
+/*
+ * Grok all nodes at or below a certain mdoc node into putkey().
+ */
+static void
+putmdockey(const struct mpage *mpage,
+       const struct mdoc_node *n, uint64_t m)
+{
  
-               /*
-                * Make sure we're sane.
-                * Read past our mdoc/man/cat type to the next string,
-                * then make sure it's bounded by a NUL.
-                * Failing any of these, we go into our error handler.
-                */
+       for ( ; NULL != n; n = n->next) {
+               if (NULL != n->child)
+                       putmdockey(mpage, n->child, m);
+               if (MDOC_TEXT == n->type)
+                       putkey(mpage, n->string, m);
+       }
+}
  
-               fn = (char *)val.data + 1;
-               if (NULL == memchr(fn, '\0', val.size - 1))
-                       break;
+static void
+parse_man(struct mpage *mpage, const struct man_node *n)
+{
+       const struct man_node *head, *body;
+       char            *start, *sv, *title;
+       char             byte;
+       size_t           sz, titlesz;
  
-               /*
-                * Search for the file in those we care about.
-                * XXX: build this into a tree.  Too slow.
-                */
+       if (NULL == n)
+               return;
  
-               for (of = ofile->first; of; of = of->next)
-                       if (0 == strcmp(fn, of->fname))
-                               break;
+       /*
+        * We're only searching for one thing: the first text child in
+        * the BODY of a NAME section.  Since we don't keep track of
+        * sections in -man, run some hoops to find out whether we're in
+        * the correct section or not.
+        */
  
-               if (NULL == of)
-                       continue;
+       if (MAN_BODY == n->type && MAN_SH == n->tok) {
+               body = n;
+               assert(body->parent);
+               if (NULL != (head = body->parent->head) &&
+                               1 == head->nchild &&
+                               NULL != (head = (head->child)) &&
+                               MAN_TEXT == head->type &&
+                               0 == strcmp(head->string, "NAME") &&
+                               NULL != (body = body->child) &&
+                               MAN_TEXT == body->type) {
  
-               /*
-                * Search through the keyword database, throwing out all
-                * references to our file.
-                */
+                       title = NULL;
+                       titlesz = 0;
  
-               sseq = R_FIRST;
-               while (0 == (ch = (*mdb->db->seq)(mdb->db,
-                                       &key, &val, sseq))) {
-                       sseq = R_NEXT;
-                       if (sizeof(vbuf) != val.size)
-                               break;
+                       /*
+                        * Suck the entire NAME section into memory.
+                        * Yes, we might run away.
+                        * But too many manuals have big, spread-out
+                        * NAME sections over many lines.
+                        */
  
-                       memcpy(vbuf, val.data, val.size);
-                       if (recs->last != betoh64(vbuf[1]))
-                               continue;
+                       for ( ; NULL != body; body = body->next) {
+                               if (MAN_TEXT != body->type)
+                                       break;
+                               if (0 == (sz = strlen(body->string)))
+                                       continue;
+                               title = mandoc_realloc
+                                       (title, titlesz + sz + 1);
+                               memcpy(title + titlesz, body->string, sz);
+                               titlesz += sz + 1;
+                               title[titlesz - 1] = ' ';
+                       }
+                       if (NULL == title)
+                               return;
  
-                       if ((ch = (*mdb->db->del)(mdb->db,
-                                       &key, R_CURSOR)) < 0)
-                               break;
-               }
+                       title = mandoc_realloc(title, titlesz + 1);
+                       title[titlesz] = '\0';
  
-               if (ch < 0) {
-                       perror(mdb->dbn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               } else if (1 != ch) {
-                       fprintf(stderr, "%s: corrupt database\n",
-                                       mdb->dbn);
-                       exit((int)MANDOCLEVEL_SYSERR);
-               }
+                       /* Skip leading space.  */
  
-               if (verb)
-                       printf("%s: deleting from index\n", fn);
+                       sv = title;
+                       while (isspace((unsigned char)*sv))
+                               sv++;
  
-               val.size = 0;
-               ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
+                       if (0 == (sz = strlen(sv))) {
+                               free(title);
+                               return;
+                       }
  
-               if (ch < 0)
-                       break;
-cont:
-               if (recs->cur >= recs->size) {
-                       recs->size += MANDOC_SLOP;
-                       recs->stack = mandoc_realloc(recs->stack,
-                                       recs->size * sizeof(recno_t));
-               }
+                       /* Erase trailing space. */
  
-               recs->stack[(int)recs->cur] = recs->last;
-               recs->cur++;
-       }
+                       start = &sv[sz - 1];
+                       while (start > sv && isspace((unsigned char)*start))
+                               *start-- = '\0';
  
-       if (ch < 0) {
-               perror(mdb->idxn);
-               exit((int)MANDOCLEVEL_SYSERR);
-       } else if (1 != ch) {
-               fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
-               exit((int)MANDOCLEVEL_SYSERR);
-       }
+                       if (start == sv) {
+                               free(title);
+                               return;
+                       }
  
-       recs->last++;
-}
+                       start = sv;
  
-/*
- * Grow the buffer (if necessary) and copy in a binary string.
- */
-static void
-buf_appendb(struct buf *buf, const void *cp, size_t sz)
-{
+                       /* 
+                        * Go through a special heuristic dance here.
+                        * Conventionally, one or more manual names are
+                        * comma-specified prior to a whitespace, then a
+                        * dash, then a description.  Try to puzzle out
+                        * the name parts here.
+                        */
  
-       /* Overshoot by MANDOC_BUFSZ. */
+                       for ( ;; ) {
+                               sz = strcspn(start, " ,");
+                               if ('\0' == start[sz])
+                                       break;
  
-       while (buf->len + sz >= buf->size) {
-               buf->size = buf->len + sz + MANDOC_BUFSZ;
-               buf->cp = mandoc_realloc(buf->cp, buf->size);
-       }
+                               byte = start[sz];
+                               start[sz] = '\0';
  
-       memcpy(buf->cp + (int)buf->len, cp, sz);
-       buf->len += sz;
-}
+                               putkey(mpage, start, TYPE_Nm);
  
-/*
- * Append a nil-terminated string to the buffer.  
- * This can be invoked multiple times.  
- * The buffer string will be nil-terminated.
- * If invoked multiple times, a space is put between strings.
- */
-static void
-buf_append(struct buf *buf, const char *cp)
-{
-       size_t           sz;
+                               if (' ' == byte) {
+                                       start += sz + 1;
+                                       break;
+                               }
  
-       if (0 == (sz = strlen(cp)))
-               return;
+                               assert(',' == byte);
+                               start += sz + 1;
+                               while (' ' == *start)
+                                       start++;
+                       }
  
-       if (buf->len)
-               buf->cp[(int)buf->len - 1] = ' ';
+                       if (sv == start) {
+                               putkey(mpage, start, TYPE_Nm);
+                               free(title);
+                               return;
+                       }
  
-       buf_appendb(buf, cp, sz + 1);
-}
+                       while (isspace((unsigned char)*start))
+                               start++;
  
-/*
- * Recursively add all text from a given node.  
- * This is optimised for general mdoc nodes in this context, which do
- * not consist of subexpressions and having a recursive call for n->next
- * would be wasteful.
- * The "f" variable should be 0 unless called from pmdoc_Nd for the
- * description buffer, which does not start at the beginning of the
- * buffer.
- */
-static void
-buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
-{
+                       if (0 == strncmp(start, "-", 1))
+                               start += 1;
+                       else if (0 == strncmp(start, "\\-\\-", 4))
+                               start += 4;
+                       else if (0 == strncmp(start, "\\-", 2))
+                               start += 2;
+                       else if (0 == strncmp(start, "\\(en", 4))
+                               start += 4;
+                       else if (0 == strncmp(start, "\\(em", 4))
+                               start += 4;
  
-       for ( ; n; n = n->next) {
-               if (n->child)
-                       buf_appendmdoc(buf, n->child, f);
+                       while (' ' == *start)
+                               start++;
  
-               if (MDOC_TEXT == n->type && f) {
-                       f = 0;
-                       buf_appendb(buf, n->string, 
-                                       strlen(n->string) + 1);
-               } else if (MDOC_TEXT == n->type)
-                       buf_append(buf, n->string);
+                       assert(NULL == mpage->desc);
+                       mpage->desc = mandoc_strdup(start);
+                       putkey(mpage, mpage->desc, TYPE_Nd);
+                       free(title);
+                       return;
+               }
+       }
  
+       for (n = n->child; n; n = n->next) {
+               if (NULL != mpage->desc)
+                       break;
+               parse_man(mpage, n);
         }
  }
  
  static void
-hash_reset(DB **db)
+parse_mdoc(struct mpage *mpage, const struct mdoc_node *n)
  {
-       DB              *hash;
  
-       if (NULL != (hash = *db))
-               (*hash->close)(hash);
-
-       *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
-       if (NULL == *db) {
-               perror("hash");
-               exit((int)MANDOCLEVEL_SYSERR);
+       assert(NULL != n);
+       for (n = n->child; NULL != n; n = n->next) {
+               switch (n->type) {
+               case (MDOC_ELEM):
+                       /* FALLTHROUGH */
+               case (MDOC_BLOCK):
+                       /* FALLTHROUGH */
+               case (MDOC_HEAD):
+                       /* FALLTHROUGH */
+               case (MDOC_BODY):
+                       /* FALLTHROUGH */
+               case (MDOC_TAIL):
+                       if (NULL != mdocs[n->tok].fp)
+                              if (0 == (*mdocs[n->tok].fp)(mpage, n))
+                                      break;
+                       if (mdocs[n->tok].mask)
+                               putmdockey(mpage, n->child,
+                                   mdocs[n->tok].mask);
+                       break;
+               default:
+                       assert(MDOC_ROOT != n->type);
+                       continue;
+               }
+               if (NULL != n->child)
+                       parse_mdoc(mpage, n);
         }
  }
  
-/* ARGSUSED */
-static int
-pmdoc_head(MDOC_ARGS)
-{
-
-       return(MDOC_HEAD == n->type);
-}
-
-/* ARGSUSED */
-static int
-pmdoc_body(MDOC_ARGS)
-{
-
-       return(MDOC_BODY == n->type);
-}
-
-/* ARGSUSED */
  static int
-pmdoc_Fd(MDOC_ARGS)
+parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n)
  {
         const char      *start, *end;
         size_t           sz;
  
-       if (SEC_SYNOPSIS != n->sec)
-               return(0);
-       if (NULL == (n = n->child) || MDOC_TEXT != n->type)
+       if (SEC_SYNOPSIS != n->sec ||
+                       NULL == (n = n->child) || 
+                       MDOC_TEXT != n->type)
                 return(0);
  
         /*
          * Only consider those `Fd' macro fields that begin with an
          * "inclusion" token (versus, e.g., #define).
          */
+
         if (strcmp("#include", n->string))
                 return(0);
  
@@ -1111,121 +1431,121 @@ pmdoc_Fd(MDOC_ARGS)
         if ('>' == *end || '"' == *end)
                 end--;
  
-       assert(end >= start);
-
-       buf_appendb(buf, start, (size_t)(end - start + 1));
-       buf_appendb(buf, "", 1);
+       if (end > start)
+               putkeys(mpage, start, end - start + 1, TYPE_In);
         return(1);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_In(MDOC_ARGS)
+parse_mdoc_In(struct mpage *mpage, const struct mdoc_node *n)
  {
  
-       if (NULL == n->child || MDOC_TEXT != n->child->type)
+       if (NULL != n->child && MDOC_TEXT == n->child->type)
                 return(0);
  
-       buf_append(buf, n->child->string);
+       putkey(mpage, n->child->string, TYPE_In);
         return(1);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_Fn(MDOC_ARGS)
+parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)
  {
-       struct mdoc_node *nn;
         const char      *cp;
  
-       nn = n->child;
-
-       if (NULL == nn || MDOC_TEXT != nn->type)
+       if (NULL == (n = n->child) || MDOC_TEXT != n->type)
                 return(0);
  
-       /* .Fn "struct type *name" "char *arg" */
-
-       cp = strrchr(nn->string, ' ');
-       if (NULL == cp)
-               cp = nn->string;
+       /* 
+        * Parse: .Fn "struct type *name" "char *arg".
+        * First strip away pointer symbol. 
+        * Then store the function name, then type.
+        * Finally, store the arguments. 
+        */
  
-       /* Strip away pointer symbol. */
+       if (NULL == (cp = strrchr(n->string, ' ')))
+               cp = n->string;
  
         while ('*' == *cp)
                 cp++;
  
-       /* Store the function name. */
-
-       buf_append(buf, cp);
-       hash_put(hash, buf, TYPE_Fn);
+       putkey(mpage, cp, TYPE_Fn);
  
-       /* Store the function type. */
+       if (n->string < cp)
+               putkeys(mpage, n->string, cp - n->string, TYPE_Ft);
  
-       if (nn->string < cp) {
-               buf->len = 0;
-               buf_appendb(buf, nn->string, cp - nn->string);
-               buf_appendb(buf, "", 1);
-               hash_put(hash, buf, TYPE_Ft);
-       }
-
-       /* Store the arguments. */
-
-       for (nn = nn->next; nn; nn = nn->next) {
-               if (MDOC_TEXT != nn->type)
-                       continue;
-               buf->len = 0;
-               buf_append(buf, nn->string);
-               hash_put(hash, buf, TYPE_Fa);
-       }
+       for (n = n->next; NULL != n; n = n->next)
+               if (MDOC_TEXT == n->type)
+                       putkey(mpage, n->string, TYPE_Fa);
  
         return(0);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_St(MDOC_ARGS)
+parse_mdoc_St(struct mpage *mpage, const struct mdoc_node *n)
  {
  
         if (NULL == n->child || MDOC_TEXT != n->child->type)
                 return(0);
  
-       buf_append(buf, n->child->string);
+       putkey(mpage, n->child->string, TYPE_St);
         return(1);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_Xr(MDOC_ARGS)
+parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n)
  {
+       char    *cp;
  
         if (NULL == (n = n->child))
                 return(0);
  
-       buf_appendb(buf, n->string, strlen(n->string));
-
-       if (NULL != (n = n->next)) {
-               buf_appendb(buf, ".", 1);
-               buf_appendb(buf, n->string, strlen(n->string) + 1);
-       } else
-               buf_appendb(buf, ".", 2);
+       if (NULL == n->next) {
+               putkey(mpage, n->string, TYPE_Xr);
+               return(0);
+       }
  
-       return(1);
+       if (-1 == asprintf(&cp, "%s(%s)", n->string, n->next->string)) {
+               perror(NULL);
+               exit((int)MANDOCLEVEL_SYSERR);
+       }
+       putkey(mpage, cp, TYPE_Xr);
+       free(cp);
+       return(0);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_Nd(MDOC_ARGS)
+parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n)
  {
+       size_t           sz;
  
         if (MDOC_BODY != n->type)
                 return(0);
  
-       buf_appendmdoc(dbuf, n->child, 1);
+       /*
+        * Special-case the `Nd' because we need to put the description
+        * into the document table.
+        */
+
+       for (n = n->child; NULL != n; n = n->next) {
+               if (MDOC_TEXT == n->type) {
+                       if (NULL != mpage->desc) {
+                               sz = strlen(mpage->desc) +
+                                    strlen(n->string) + 2;
+                               mpage->desc = mandoc_realloc(
+                                   mpage->desc, sz);
+                               strlcat(mpage->desc, " ", sz);
+                               strlcat(mpage->desc, n->string, sz);
+                       } else
+                               mpage->desc = mandoc_strdup(n->string);
+               }
+               if (NULL != n->child)
+                       parse_mdoc_Nd(mpage, n);
+       }
         return(1);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_Nm(MDOC_ARGS)
+parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n)
  {
  
         if (SEC_NAME == n->sec)
@@ -1233,758 +1553,558 @@ pmdoc_Nm(MDOC_ARGS)
         else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
                 return(0);
  
-       if (NULL == n->child)
-               buf_append(buf, m->name);
-
         return(1);
  }
  
-/* ARGSUSED */
  static int
-pmdoc_Sh(MDOC_ARGS)
+parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n)
  {
  
         return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
  }
  
-static void
-hash_put(DB *db, const struct buf *buf, uint64_t mask)
+static int
+parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n)
  {
-       uint64_t         oldmask;
-       DBT              key, val;
-       int              rc;
-
-       if (buf->len < 2)
-               return;
-
-       key.data = buf->cp;
-       key.size = buf->len;
-
-       if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
-               perror("hash");
-               exit((int)MANDOCLEVEL_SYSERR);
-       } else if (0 == rc) {
-               assert(sizeof(uint64_t) == val.size);
-               memcpy(&oldmask, val.data, val.size);
-               mask |= oldmask;
-       }
-
-       val.data = &mask;
-       val.size = sizeof(uint64_t); 
  
-       if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
-               perror("hash");
-               exit((int)MANDOCLEVEL_SYSERR);
-       } 
+       return(MDOC_HEAD == n->type);
  }
  
-static void
-dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+static int
+parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n)
  {
  
-       assert(key->size);
-       assert(val->size);
-
-       if (0 == (*db->put)(db, key, val, 0))
-               return;
-       
-       perror(dbn);
-       exit((int)MANDOCLEVEL_SYSERR);
-       /* NOTREACHED */
+       return(MDOC_BODY == n->type);
  }
  
  /*
- * Call out to per-macro handlers after clearing the persistent database
- * key.  If the macro sets the database key, flush it to the database.
+ * Add a string to the hash table for the current manual.
+ * Each string has a bitmask telling which macros it belongs to.
+ * When we finish the manual, we'll dump the table.
   */
  static void
-pmdoc_node(MDOC_ARGS)
+putkeys(const struct mpage *mpage,
+       const char *cp, size_t sz, uint64_t v)
  {
+       struct str      *s;
+       unsigned int     slot;
+       const char      *end;
  
-       if (NULL == n)
+       if (0 == sz)
                 return;
  
-       switch (n->type) {
-       case (MDOC_HEAD):
-               /* FALLTHROUGH */
-       case (MDOC_BODY):
-               /* FALLTHROUGH */
-       case (MDOC_TAIL):
-               /* FALLTHROUGH */
-       case (MDOC_BLOCK):
-               /* FALLTHROUGH */
-       case (MDOC_ELEM):
-               buf->len = 0;
-
-               /*
-                * Both NULL handlers and handlers returning true
-                * request using the data.  Only skip the element
-                * when the handler returns false.
-                */
-
-               if (NULL != mdocs[n->tok].fp &&
-                   0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
-                       break;
+       end = cp + sz;
+       slot = ohash_qlookupi(&strings, cp, &end);
+       s = ohash_find(&strings, slot);
  
-               /*
-                * For many macros, use the text from all children.
-                * Set zero flags for macros not needing this.
-                * In that case, the handler must fill the buffer.
-                */
-
-               if (MDOCF_CHILD & mdocs[n->tok].flags)
-                       buf_appendmdoc(buf, n->child, 0);
-
-               /*
-                * Cover the most common case:
-                * Automatically stage one string per element.
-                * Set a zero mask for macros not needing this.
-                * Additional staging can be done in the handler.
-                */
-
-               if (mdocs[n->tok].mask)
-                       hash_put(hash, buf, mdocs[n->tok].mask);
-               break;
-       default:
-               break;
+       if (NULL != s && mpage == s->mpage) {
+               s->mask |= v;
+               return;
+       } else if (NULL == s) {
+               s = mandoc_calloc(sizeof(struct str) + sz + 1, 1);
+               memcpy(s->key, cp, sz);
+               ohash_insert(&strings, slot, s);
         }
-
-       pmdoc_node(hash, buf, dbuf, n->child, m);
-       pmdoc_node(hash, buf, dbuf, n->next, m);
+       s->mpage = mpage;
+       s->mask = v;
  }
  
-static int
-pman_node(MAN_ARGS)
+/*
+ * Take a Unicode codepoint and produce its UTF-8 encoding.
+ * This isn't the best way to do this, but it works.
+ * The magic numbers are from the UTF-8 packaging.
+ * They're not as scary as they seem: read the UTF-8 spec for details.
+ */
+static size_t
+utf8(unsigned int cp, char out[7])
  {
-       const struct man_node *head, *body;
-       char            *start, *sv, *title;
-       size_t           sz, titlesz;
-
-       if (NULL == n)
+       size_t           rc;
+
+       rc = 0;
+       if (cp <= 0x0000007F) {
+               rc = 1;
+               out[0] = (char)cp;
+       } else if (cp <= 0x000007FF) {
+               rc = 2;
+               out[0] = (cp >> 6  & 31) | 192;
+               out[1] = (cp       & 63) | 128;
+       } else if (cp <= 0x0000FFFF) {
+               rc = 3;
+               out[0] = (cp >> 12 & 15) | 224;
+               out[1] = (cp >> 6  & 63) | 128;
+               out[2] = (cp       & 63) | 128;
+       } else if (cp <= 0x001FFFFF) {
+               rc = 4;
+               out[0] = (cp >> 18 &  7) | 240;
+               out[1] = (cp >> 12 & 63) | 128;
+               out[2] = (cp >> 6  & 63) | 128;
+               out[3] = (cp       & 63) | 128;
+       } else if (cp <= 0x03FFFFFF) {
+               rc = 5;
+               out[0] = (cp >> 24 &  3) | 248;
+               out[1] = (cp >> 18 & 63) | 128;
+               out[2] = (cp >> 12 & 63) | 128;
+               out[3] = (cp >> 6  & 63) | 128;
+               out[4] = (cp       & 63) | 128;
+       } else if (cp <= 0x7FFFFFFF) {
+               rc = 6;
+               out[0] = (cp >> 30 &  1) | 252;
+               out[1] = (cp >> 24 & 63) | 128;
+               out[2] = (cp >> 18 & 63) | 128;
+               out[3] = (cp >> 12 & 63) | 128;
+               out[4] = (cp >> 6  & 63) | 128;
+               out[5] = (cp       & 63) | 128;
+       } else
                 return(0);
  
-       /*
-        * We're only searching for one thing: the first text child in
-        * the BODY of a NAME section.  Since we don't keep track of
-        * sections in -man, run some hoops to find out whether we're in
-        * the correct section or not.
-        */
-
-       if (MAN_BODY == n->type && MAN_SH == n->tok) {
-               body = n;
-               assert(body->parent);
-               if (NULL != (head = body->parent->head) &&
-                               1 == head->nchild &&
-                               NULL != (head = (head->child)) &&
-                               MAN_TEXT == head->type &&
-                               0 == strcmp(head->string, "NAME") &&
-                               NULL != (body = body->child) &&
-                               MAN_TEXT == body->type) {
-
-                       title = NULL;
-                       titlesz = 0;
-                       /*
-                        * Suck the entire NAME section into memory.
-                        * Yes, we might run away.
-                        * But too many manuals have big, spread-out
-                        * NAME sections over many lines.
-                        */
-                       for ( ; NULL != body; body = body->next) {
-                               if (MAN_TEXT != body->type)
-                                       break;
-                               if (0 == (sz = strlen(body->string)))
-                                       continue;
-                               title = mandoc_realloc
-                                       (title, titlesz + sz + 1);
-                               memcpy(title + titlesz, body->string, sz);
-                               titlesz += sz + 1;
-                               title[(int)titlesz - 1] = ' ';
-                       }
-                       if (NULL == title)
-                               return(0);
-
-                       title = mandoc_realloc(title, titlesz + 1);
-                       title[(int)titlesz] = '\0';
-
-                       /* Skip leading space.  */
-
-                       sv = title;
-                       while (isspace((unsigned char)*sv))
-                               sv++;
+       out[rc] = '\0';
+       return(rc);
+}
  
-                       if (0 == (sz = strlen(sv))) {
-                               free(title);
-                               return(0);
-                       }
+/*
+ * Store the UTF-8 version of a key, or alias the pointer if the key has
+ * no UTF-8 transcription marks in it.
+ */
+static void
+utf8key(struct mchars *mc, struct str *key)
+{
+       size_t           sz, bsz, pos;
+       char             utfbuf[7], res[5];
+       char            *buf;
+       const char      *seq, *cpp, *val;
+       int              len, u;
+       enum mandoc_esc  esc;
  
-                       /* Erase trailing space. */
+       assert(NULL == key->utf8);
  
-                       start = &sv[sz - 1];
-                       while (start > sv && isspace((unsigned char)*start))
-                               *start-- = '\0';
+       res[0] = '\\';
+       res[1] = '\t';
+       res[2] = ASCII_NBRSP;
+       res[3] = ASCII_HYPH;
+       res[4] = '\0';
  
-                       if (start == sv) {
-                               free(title);
-                               return(0);
-                       }
+       val = key->key;
+       bsz = strlen(val);
  
-                       start = sv;
+       /*
+        * Pre-check: if we have no stop-characters, then set the
+        * pointer as ourselvse and get out of here.
+        */
+       if (strcspn(val, res) == bsz) {
+               key->utf8 = key->key;
+               return;
+       } 
  
-                       /* 
-                        * Go through a special heuristic dance here.
-                        * This is why -man manuals are great!
-                        * (I'm being sarcastic: my eyes are bleeding.)
-                        * Conventionally, one or more manual names are
-                        * comma-specified prior to a whitespace, then a
-                        * dash, then a description.  Try to puzzle out
-                        * the name parts here.
-                        */
+       /* Pre-allocate by the length of the input */
  
-                       for ( ;; ) {
-                               sz = strcspn(start, " ,");
-                               if ('\0' == start[(int)sz])
-                                       break;
+       buf = mandoc_malloc(++bsz);
+       pos = 0;
  
-                               buf->len = 0;
-                               buf_appendb(buf, start, sz);
-                               buf_appendb(buf, "", 1);
+       while ('\0' != *val) {
+               /*
+                * Halt on the first escape sequence.
+                * This also halts on the end of string, in which case
+                * we just copy, fallthrough, and exit the loop.
+                */
+               if ((sz = strcspn(val, res)) > 0) {
+                       memcpy(&buf[pos], val, sz);
+                       pos += sz;
+                       val += sz;
+               }
  
-                               hash_put(hash, buf, TYPE_Nm);
+               if (ASCII_HYPH == *val) {
+                       buf[pos++] = '-';
+                       val++;
+                       continue;
+               } else if ('\t' == *val || ASCII_NBRSP == *val) {
+                       buf[pos++] = ' ';
+                       val++;
+                       continue;
+               } else if ('\\' != *val)
+                       break;
  
-                               if (' ' == start[(int)sz]) {
-                                       start += (int)sz + 1;
-                                       break;
-                               }
+               /* Read past the slash. */
  
-                               assert(',' == start[(int)sz]);
-                               start += (int)sz + 1;
-                               while (' ' == *start)
-                                       start++;
-                       }
+               val++;
+               u = 0;
  
-                       buf->len = 0;
+               /*
+                * Parse the escape sequence and see if it's a
+                * predefined character or special character.
+                */
+               esc = mandoc_escape
+                       ((const char **)&val, &seq, &len);
+               if (ESCAPE_ERROR == esc)
+                       break;
  
-                       if (sv == start) {
-                               buf_append(buf, start);
-                               free(title);
-                               return(1);
-                       }
+               if (ESCAPE_SPECIAL != esc)
+                       continue;
+               if (0 == (u = mchars_spec2cp(mc, seq, len)))
+                       continue;
  
-                       while (isspace((unsigned char)*start))
-                               start++;
+               /*
+                * If we have a Unicode codepoint, try to convert that
+                * to a UTF-8 byte string.
+                */
+               cpp = utfbuf;
+               if (0 == (sz = utf8(u, utfbuf)))
+                       continue;
  
-                       if (0 == strncmp(start, "-", 1))
-                               start += 1;
-                       else if (0 == strncmp(start, "\\-\\-", 4))
-                               start += 4;
-                       else if (0 == strncmp(start, "\\-", 2))
-                               start += 2;
-                       else if (0 == strncmp(start, "\\(en", 4))
-                               start += 4;
-                       else if (0 == strncmp(start, "\\(em", 4))
-                               start += 4;
+               /* Copy the rendered glyph into the stream. */
  
-                       while (' ' == *start)
-                               start++;
+               sz = strlen(cpp);
+               bsz += sz;
  
-                       sz = strlen(start) + 1;
-                       buf_appendb(dbuf, start, sz);
-                       buf_appendb(buf, start, sz);
+               buf = mandoc_realloc(buf, bsz);
  
-                       hash_put(hash, buf, TYPE_Nd);
-                       free(title);
-               }
+               memcpy(&buf[pos], cpp, sz);
+               pos += sz;
         }
  
-       for (n = n->child; n; n = n->next)
-               if (pman_node(hash, buf, dbuf, n))
-                       return(1);
-
-       return(0);
+       buf[pos] = '\0';
+       key->utf8 = buf;
  }
  
  /*
- * Parse a formatted manual page.
- * By necessity, this involves rather crude guesswork.
+ * Flush the current page's terms (and their bits) into the database.
+ * Wrap the entire set of additions in a transaction to make sqlite be a
+ * little faster.
+ * Also, UTF-8-encode the description at the last possible moment.
   */
  static void
-pformatted(DB *hash, struct buf *buf, 
-               struct buf *dbuf, const struct of *of)
+dbindex(const struct mpage *mpage, struct mchars *mc)
  {
-       FILE            *stream;
-       char            *line, *p, *title;
-       size_t           len, plen, titlesz;
-
-       if (NULL == (stream = fopen(of->fname, "r"))) {
-               if (warnings)
-                       perror(of->fname);
-               return;
-       }
-
-       /*
-        * Always use the title derived from the filename up front,
-        * do not even try to find it in the file.  This also makes
-        * sure we don't end up with an orphan index record, even if
-        * the file content turns out to be completely unintelligible.
-        */
+       struct mlink    *mlink;
+       struct str      *key;
+       const char      *desc;
+       int64_t          recno;
+       size_t           i;
+       unsigned int     slot;
  
-       buf->len = 0;
-       buf_append(buf, of->title);
-       hash_put(hash, buf, TYPE_Nm);
-
-       /* Skip to first blank line. */
-
-       while (NULL != (line = fgetln(stream, &len)))
-               if ('\n' == *line)
-                       break;
+       if (verb)
+               say(mpage->mlinks->file, "Adding to index");
  
-       /*
-        * Assume the first line that is not indented
-        * is the first section header.  Skip to it.
-        */
-
-       while (NULL != (line = fgetln(stream, &len)))
-               if ('\n' != *line && ' ' != *line)
-                       break;
-       
-       /*
-        * Read up until the next section into a buffer.
-        * Strip the leading and trailing newline from each read line,
-        * appending a trailing space.
-        * Ignore empty (whitespace-only) lines.
-        */
-
-       titlesz = 0;
-       title = NULL;
-
-       while (NULL != (line = fgetln(stream, &len))) {
-               if (' ' != *line || '\n' != line[(int)len - 1])
-                       break;
-               while (len > 0 && isspace((unsigned char)*line)) {
-                       line++;
-                       len--;
-               }
-               if (1 == len)
-                       continue;
-               title = mandoc_realloc(title, titlesz + len);
-               memcpy(title + titlesz, line, len);
-               titlesz += len;
-               title[(int)titlesz - 1] = ' ';
-       }
-
-
-       /*
-        * If no page content can be found, or the input line
-        * is already the next section header, or there is no
-        * trailing newline, reuse the page title as the page
-        * description.
-        */
-
-       if (NULL == title || '\0' == *title) {
-               if (warnings)
-                       fprintf(stderr, "%s: cannot find NAME section\n",
-                                       of->fname);
-               buf_appendb(dbuf, buf->cp, buf->size);
-               hash_put(hash, buf, TYPE_Nd);
-               fclose(stream);
-               free(title);
+       if (nodb)
                 return;
+
+       desc = "";
+       if (NULL != mpage->desc && '\0' != *mpage->desc) {
+               key = ohash_find(&strings,
+                       ohash_qlookup(&strings, mpage->desc));
+               assert(NULL != key);
+               if (NULL == key->utf8)
+                       utf8key(mc, key);
+               desc = key->utf8;
         }
  
-       title = mandoc_realloc(title, titlesz + 1);
-       title[(int)titlesz] = '\0';
+       SQL_EXEC("BEGIN TRANSACTION");
  
+       i = 1;
         /*
-        * Skip to the first dash.
-        * Use the remaining line as the description (no more than 70
-        * bytes).
+        * XXX The following three lines are obsolete
+        * and only kept for backward compatibility
+        * until apropos(1) and friends have caught up.
          */
-
-       if (NULL != (p = strstr(title, "- "))) {
-               for (p += 2; ' ' == *p || '\b' == *p; p++)
-                       /* Skip to next word. */ ;
-       } else {
-               if (warnings)
-                       fprintf(stderr, "%s: no dash in title line\n",
-                                       of->fname);
-               p = title;
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->file);
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->dsec);
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, mpage->mlinks->arch);
+       SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, desc);
+       SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
+       SQL_STEP(stmts[STMT_INSERT_PAGE]);
+       recno = sqlite3_last_insert_rowid(db);
+       sqlite3_reset(stmts[STMT_INSERT_PAGE]);
+
+       for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
+               i = 1;
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->file);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
+               SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, recno);
+               SQL_STEP(stmts[STMT_INSERT_LINK]);
+               sqlite3_reset(stmts[STMT_INSERT_LINK]);
         }
  
-       plen = strlen(p);
-
-       /* Strip backspace-encoding from line. */
-
-       while (NULL != (line = memchr(p, '\b', plen))) {
-               len = line - p;
-               if (0 == len) {
-                       memmove(line, line + 1, plen--);
-                       continue;
-               } 
-               memmove(line - 1, line + 1, plen - len);
-               plen -= 2;
+       for (key = ohash_first(&strings, &slot); NULL != key;
+            key = ohash_next(&strings, &slot)) {
+               assert(key->mpage == mpage);
+               if (NULL == key->utf8)
+                       utf8key(mc, key);
+               i = 1;
+               SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);
+               SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->utf8);
+               SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, recno);
+               SQL_STEP(stmts[STMT_INSERT_KEY]);
+               sqlite3_reset(stmts[STMT_INSERT_KEY]);
+               if (key->utf8 != key->key)
+                       free(key->utf8);
+               free(key);
         }
  
-       buf_appendb(dbuf, p, plen + 1);
-       buf->len = 0;
-       buf_appendb(buf, p, plen + 1);
-       hash_put(hash, buf, TYPE_Nd);
-       fclose(stream);
-       free(title);
+       SQL_EXEC("END TRANSACTION");
  }
  
  static void
-ofile_argbuild(int argc, char *argv[], struct of **of,
-               const char *basedir)
+dbprune(void)
  {
-       char             buf[PATH_MAX];
-       char             pbuf[PATH_MAX];
-       const char      *sec, *arch, *title;
-       char            *relpath, *p;
-       int              i, src_form;
-       struct of       *nof;
-
-       for (i = 0; i < argc; i++) {
-               if (NULL == (relpath = realpath(argv[i], pbuf))) {
-                       perror(argv[i]);
-                       continue;
-               }
-               if (NULL != basedir) {
-                       if (strstr(pbuf, basedir) != pbuf) {
-                               fprintf(stderr, "%s: file outside "
-                                   "base directory %s\n",
-                                   pbuf, basedir);
-                               continue;
-                       }
-                       relpath = pbuf + strlen(basedir);
-               }
-
-               /*
-                * Try to infer the manual section, architecture and
-                * page title from the path, assuming it looks like
-                *   man*[/<arch>]/<title>.<section>   or
-                *   cat<section>[/<arch>]/<title>.0
-                */
-
-               if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) {
-                       fprintf(stderr, "%s: path too long\n", relpath);
-                       continue;
-               }
-               sec = arch = title = "";
-               src_form = 0;
-               p = strrchr(buf, '\0');
-               while (p-- > buf) {
-                       if ('\0' == *sec && '.' == *p) {
-                               sec = p + 1;
-                               *p = '\0';
-                               if ('0' == *sec)
-                                       src_form |= MANDOC_FORM;
-                               else if ('1' <= *sec && '9' >= *sec)
-                                       src_form |= MANDOC_SRC;
-                               continue;
-                       }
-                       if ('/' != *p)
-                               continue;
-                       if ('\0' == *title) {
-                               title = p + 1;
-                               *p = '\0';
-                               continue;
-                       }
-                       if (0 == strncmp("man", p + 1, 3))
-                               src_form |= MANDOC_SRC;
-                       else if (0 == strncmp("cat", p + 1, 3))
-                               src_form |= MANDOC_FORM;
-                       else
-                               arch = p + 1;
-                       break;
-               }
-               if ('\0' == *title) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s: cannot deduce title "
-                                   "from filename\n",
-                                   relpath);
-                       title = buf;
-               }
+       struct mpage    *mpage;
+       struct mlink    *mlink;
+       size_t           i;
+       unsigned int     slot;
  
-               /*
-                * Build the file structure.
-                */
-
-               nof = mandoc_calloc(1, sizeof(struct of));
-               nof->fname = mandoc_strdup(relpath);
-               nof->sec = mandoc_strdup(sec);
-               nof->arch = mandoc_strdup(arch);
-               nof->title = mandoc_strdup(title);
-               nof->src_form = src_form;
-
-               /*
-                * Add the structure to the list.
-                */
+       if (nodb)
+               return;
  
-               if (verb > 1)
-                       printf("%s: scheduling\n", relpath);
-               if (NULL == *of) {
-                       *of = nof;
-                       (*of)->first = nof;
-               } else {
-                       nof->first = (*of)->first;
-                       (*of)->next = nof;
-                       *of = nof;
-               }
+       mpage = ohash_first(&mpages, &slot);
+       while (NULL != mpage) {
+               mlink = mpage->mlinks;
+               i = 1;
+               SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], i, mlink->file);
+               SQL_STEP(stmts[STMT_DELETE_PAGE]);
+               sqlite3_reset(stmts[STMT_DELETE_PAGE]);
+               if (verb)
+                       say(mlink->file, "Deleted from index");
+               mpage = ohash_next(&mpages, &slot);
         }
  }
  
  /*
- * Recursively build up a list of files to parse.
- * We use this instead of ftw() and so on because I don't want global
- * variables hanging around.
- * This ignores the mandoc.db and mandoc.index files, but assumes that
- * everything else is a manual.
- * Pass in a pointer to a NULL structure for the first invocation.
+ * Close an existing database and its prepared statements.
+ * If "real" is not set, rename the temporary file into the real one.
   */
  static void
-ofile_dirbuild(const char *dir, const char* psec, const char *parch,
-               int p_src_form, struct of **of)
+dbclose(int real)
  {
-       char             buf[PATH_MAX];
-       size_t           sz;
-       DIR             *d;
-       const char      *fn, *sec, *arch;
-       char            *p, *q, *suffix;
-       struct of       *nof;
-       struct dirent   *dp;
-       int              src_form;
-
-       if (NULL == (d = opendir(dir))) {
-               if (warnings)
-                       perror(dir);
+       size_t           i;
+
+       if (nodb)
                 return;
+
+       for (i = 0; i < STMT__MAX; i++) {
+               sqlite3_finalize(stmts[i]);
+               stmts[i] = NULL;
         }
  
-       while (NULL != (dp = readdir(d))) {
-               fn = dp->d_name;
+       sqlite3_close(db);
+       db = NULL;
  
-               if ('.' == *fn)
-                       continue;
+       if (real)
+               return;
  
-               src_form = p_src_form;
+       if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(MANDOC_DB, NULL);
+       }
+}
  
-               if (DT_DIR == dp->d_type) {
-                       sec = psec;
-                       arch = parch;
+/*
+ * This is straightforward stuff.
+ * Open a database connection to a "temporary" database, then open a set
+ * of prepared statements we'll use over and over again.
+ * If "real" is set, we use the existing database; if not, we truncate a
+ * temporary one.
+ * Must be matched by dbclose().
+ */
+static int
+dbopen(int real)
+{
+       const char      *file, *sql;
+       int              rc, ofl;
  
-                       /*
-                        * By default, only use directories called:
-                        *   man<section>/[<arch>/]   or
-                        *   cat<section>/[<arch>/]
-                        */
+       if (nodb) 
+               return(1);
  
-                       if ('\0' == *sec) {
-                               if(0 == strncmp("man", fn, 3)) {
-                                       src_form |= MANDOC_SRC;
-                                       sec = fn + 3;
-                               } else if (0 == strncmp("cat", fn, 3)) {
-                                       src_form |= MANDOC_FORM;
-                                       sec = fn + 3;
-                               } else {
-                                       if (warnings) fprintf(stderr,
-                                           "%s/%s: bad section\n",
-                                           dir, fn);
-                                       if (use_all)
-                                               sec = fn;
-                                       else
-                                               continue;
-                               }
-                       } else if ('\0' == *arch) {
-                               if (NULL != strchr(fn, '.')) {
-                                       if (warnings) fprintf(stderr,
-                                           "%s/%s: bad architecture\n",
-                                           dir, fn);
-                                       if (0 == use_all)
-                                               continue;
-                               }
-                               arch = fn;
-                       } else {
-                               if (warnings) fprintf(stderr, "%s/%s: "
-                                   "excessive subdirectory\n", dir, fn);
-                               if (0 == use_all)
-                                       continue;
-                       }
+       ofl = SQLITE_OPEN_READWRITE;
+       if (0 == real) {
+               file = MANDOC_DB "~";
+               if (-1 == remove(file) && ENOENT != errno) {
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       say(file, NULL);
+                       return(0);
+               }
+               ofl |= SQLITE_OPEN_EXCLUSIVE;
+       } else
+               file = MANDOC_DB;
+
+       rc = sqlite3_open_v2(file, &db, ofl, NULL);
+       if (SQLITE_OK == rc) 
+               goto prepare_statements;
+       if (SQLITE_CANTOPEN != rc) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(file, NULL);
+               return(0);
+       }
  
-                       buf[0] = '\0';
-                       strlcat(buf, dir, PATH_MAX);
-                       strlcat(buf, "/", PATH_MAX);
-                       sz = strlcat(buf, fn, PATH_MAX);
+       sqlite3_close(db);
+       db = NULL;
  
-                       if (PATH_MAX <= sz) {
-                               if (warnings) fprintf(stderr, "%s/%s: "
-                                   "path too long\n", dir, fn);
-                               continue;
-                       }
+       if (SQLITE_OK != (rc = sqlite3_open(file, &db))) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(file, NULL);
+               return(0);
+       }
  
-                       if (verb > 1)
-                               printf("%s: scanning\n", buf);
+       /*
+        * XXX The first three columns in table mpages are obsolete
+        * and only kept for backward compatibility
+        * until apropos(1) and friends have caught up.
+        */
+       sql = "CREATE TABLE \"mpages\" (\n"
+             " \"file\" TEXT NOT NULL,\n"
+             " \"sec\" TEXT NOT NULL,\n"
+             " \"arch\" TEXT NOT NULL,\n"
+             " \"desc\" TEXT NOT NULL,\n"
+             " \"form\" INTEGER NOT NULL,\n"
+             " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+             ");\n"
+             "\n"
+             "CREATE TABLE \"mlinks\" (\n"
+             " \"file\" TEXT NOT NULL,\n"
+             " \"sec\" TEXT NOT NULL,\n"
+             " \"arch\" TEXT NOT NULL,\n"
+             " \"name\" TEXT NOT NULL,\n"
+             " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
+               "ON DELETE CASCADE,\n"
+             " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+             ");\n"
+             "\n"
+             "CREATE TABLE \"keys\" (\n"
+             " \"bits\" INTEGER NOT NULL,\n"
+             " \"key\" TEXT NOT NULL,\n"
+             " \"pageid\" INTEGER NOT NULL REFERENCES mpages(id) "
+               "ON DELETE CASCADE,\n"
+             " \"id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+             ");\n"
+             "\n"
+             "CREATE INDEX \"key_index\" ON keys (key);\n";
+
+       if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
+               exitcode = (int)MANDOCLEVEL_SYSERR;
+               say(file, "%s", sqlite3_errmsg(db));
+               return(0);
+       }
  
-                       ofile_dirbuild(buf, sec, arch, src_form, of);
-                       continue;
-               }
+prepare_statements:
+       SQL_EXEC("PRAGMA foreign_keys = ON");
+       sql = "DELETE FROM mpages where file=?";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
+       sql = "INSERT INTO mpages "
+               "(file,sec,arch,desc,form) VALUES (?,?,?,?,?)";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
+       sql = "INSERT INTO mlinks "
+               "(file,sec,arch,name,pageid) VALUES (?,?,?,?,?)";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
+       sql = "INSERT INTO keys "
+               "(bits,key,pageid) VALUES (?,?,?)";
+       sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
  
-               if (DT_REG != dp->d_type) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: not a regular file\n",
-                                   dir, fn);
-                       continue;
-               }
-               if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
-                       continue;
-               if ('\0' == *psec) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: file outside section\n",
-                                   dir, fn);
-                       if (0 == use_all)
-                               continue;
-               }
+       /*
+        * When opening a new database, we can turn off
+        * synchronous mode for much better performance.
+        */
  
-               /*
-                * By default, skip files where the file name suffix
-                * does not agree with the section directory
-                * they are located in.
-                */
+       if (real)
+               SQL_EXEC("PRAGMA synchronous = OFF");
  
-               suffix = strrchr(fn, '.');
-               if (NULL == suffix) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: no filename suffix\n",
-                                   dir, fn);
-                       if (0 == use_all)
-                               continue;
-               } else if ((MANDOC_SRC & src_form &&
-                               strcmp(suffix + 1, psec)) ||
-                           (MANDOC_FORM & src_form &&
-                               strcmp(suffix + 1, "0"))) {
-                       if (warnings)
-                               fprintf(stderr,
-                                   "%s/%s: wrong filename suffix\n",
-                                   dir, fn);
-                       if (0 == use_all)
-                               continue;
-                       if ('0' == suffix[1])
-                               src_form |= MANDOC_FORM;
-                       else if ('1' <= suffix[1] && '9' >= suffix[1])
-                               src_form |= MANDOC_SRC;
-               }
+       return(1);
+}
  
-               /*
-                * Skip formatted manuals if a source version is
-                * available.  Ignore the age: it is very unlikely
-                * that people install newer formatted base manuals
-                * when they used to have source manuals before,
-                * and in ports, old manuals get removed on update.
-                */
-               if (0 == use_all && MANDOC_FORM & src_form &&
-                               '\0' != *psec) {
-                       buf[0] = '\0';
-                       strlcat(buf, dir, PATH_MAX);
-                       p = strrchr(buf, '/');
-                       if ('\0' != *parch && NULL != p)
-                               for (p--; p > buf; p--)
-                                       if ('/' == *p)
-                                               break;
-                       if (NULL == p)
-                               p = buf;
-                       else
-                               p++;
-                       if (0 == strncmp("cat", p, 3))
-                               memcpy(p, "man", 3);
-                       strlcat(buf, "/", PATH_MAX);
-                       sz = strlcat(buf, fn, PATH_MAX);
-                       if (sz >= PATH_MAX) {
-                               if (warnings) fprintf(stderr,
-                                   "%s/%s: path too long\n",
-                                   dir, fn);
-                               continue;
-                       }
-                       q = strrchr(buf, '.');
-                       if (NULL != q && p < q++) {
-                               *q = '\0';
-                               sz = strlcat(buf, psec, PATH_MAX);
-                               if (sz >= PATH_MAX) {
-                                       if (warnings) fprintf(stderr,
-                                           "%s/%s: path too long\n",
-                                           dir, fn);
-                                       continue;
-                               }
-                               if (0 == access(buf, R_OK))
-                                       continue;
-                       }
-               }
+static void *
+hash_halloc(size_t sz, void *arg)
+{
  
-               buf[0] = '\0';
-               assert('.' == dir[0]);
-               if ('/' == dir[1]) {
-                       strlcat(buf, dir + 2, PATH_MAX);
-                       strlcat(buf, "/", PATH_MAX);
-               }
-               sz = strlcat(buf, fn, PATH_MAX);
-               if (sz >= PATH_MAX) {
-                       if (warnings) fprintf(stderr,
-                           "%s/%s: path too long\n", dir, fn);
-                       continue;
-               }
+       return(mandoc_calloc(sz, 1));
+}
  
-               nof = mandoc_calloc(1, sizeof(struct of));
-               nof->fname = mandoc_strdup(buf);
-               nof->sec = mandoc_strdup(psec);
-               nof->arch = mandoc_strdup(parch);
-               nof->src_form = src_form;
+static void *
+hash_alloc(size_t sz, void *arg)
+{
  
-               /*
-                * Remember the file name without the extension,
-                * to be used as the page title in the database.
-                */
+       return(mandoc_malloc(sz));
+}
  
-               if (NULL != suffix)
-                       *suffix = '\0';
-               nof->title = mandoc_strdup(fn);
+static void
+hash_free(void *p, size_t sz, void *arg)
+{
  
-               /*
-                * Add the structure to the list.
-                */
+       free(p);
+}
  
-               if (verb > 1)
-                       printf("%s: scheduling\n", buf);
+static int
+set_basedir(const char *targetdir)
+{
+       static char      startdir[PATH_MAX];
+       static int       fd;
  
-               if (NULL == *of) {
-                       *of = nof;
-                       (*of)->first = nof;
-               } else {
-                       nof->first = (*of)->first;
-                       (*of)->next = nof;
-                       *of = nof;
+       /*
+        * Remember where we started by keeping a fd open to the origin
+        * path component: throughout this utility, we chdir() a lot to
+        * handle relative paths, and by doing this, we can return to
+        * the starting point.
+        */
+       if ('\0' == *startdir) {
+               if (NULL == getcwd(startdir, PATH_MAX)) {
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       if (NULL != targetdir)
+                               say(".", NULL);
+                       return(0);
+               }
+               if (-1 == (fd = open(startdir, O_RDONLY, 0))) {
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       say(startdir, NULL);
+                       return(0);
+               }
+               if (NULL == targetdir)
+                       targetdir = startdir;
+       } else {
+               if (-1 == fd)
+                       return(0);
+               if (-1 == fchdir(fd)) {
+                       close(fd);
+                       basedir[0] = '\0';
+                       exitcode = (int)MANDOCLEVEL_SYSERR;
+                       say(startdir, NULL);
+                       return(0);
+               }
+               if (NULL == targetdir) {
+                       close(fd);
+                       return(1);
                 }
         }
-
-       closedir(d);
+       if (NULL == realpath(targetdir, basedir)) {
+               basedir[0] = '\0';
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say(targetdir, NULL);
+               return(0);
+       } else if (-1 == chdir(basedir)) {
+               exitcode = (int)MANDOCLEVEL_BADARG;
+               say("", NULL);
+               return(0);
+       }
+       return(1);
  }
  
  static void
-ofile_free(struct of *of)
+say(const char *file, const char *format, ...)
  {
-       struct of       *nof;
-
-       if (NULL != of)
-               of = of->first;
-
-       while (NULL != of) {
-               nof = of->next;
-               free(of->fname);
-               free(of->sec);
-               free(of->arch);
-               free(of->title);
-               free(of);
-               of = nof;
+       va_list          ap;
+
+       if ('\0' != *basedir)
+               fprintf(stderr, "%s", basedir);
+       if ('\0' != *basedir && '\0' != *file)
+               fputs("//", stderr);
+       if ('\0' != *file)
+               fprintf(stderr, "%s", file);
+       fputs(": ", stderr);
+
+       if (NULL == format) {
+               perror(NULL);
+               return;
         }
+
+       va_start(ap, format);
+       vfprintf(stderr, format, ap);
+       va_end(ap);
+
+       fputc('\n', stderr);
  }
diff --git a/usr.bin/mandoc/mandocdb.h b/usr.bin/mandoc/mandocdb.h

deleted file mode 100644 (file)

index af61f3f..0000000
--- a/usr.bin/mandoc/mandocdb.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*      $Id: mandocdb.h,v 1.6 2012/01/09 01:59:08 schwarze Exp $ */
-/*
- * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifndef MANDOCDB_H
-#define MANDOCDB_H
-
-#define        MANDOC_DB       "mandoc.db"
-#define        MANDOC_IDX      "mandoc.index"
-
-#define        TYPE_An         0x0000000000000001ULL
-#define        TYPE_Ar         0x0000000000000002ULL
-#define        TYPE_At         0x0000000000000004ULL
-#define        TYPE_Bsx        0x0000000000000008ULL
-#define        TYPE_Bx         0x0000000000000010ULL
-#define        TYPE_Cd         0x0000000000000020ULL
-#define        TYPE_Cm         0x0000000000000040ULL
-#define        TYPE_Dv         0x0000000000000080ULL
-#define        TYPE_Dx         0x0000000000000100ULL
-#define        TYPE_Em         0x0000000000000200ULL
-#define        TYPE_Er         0x0000000000000400ULL
-#define        TYPE_Ev         0x0000000000000800ULL
-#define        TYPE_Fa         0x0000000000001000ULL
-#define        TYPE_Fl         0x0000000000002000ULL
-#define        TYPE_Fn         0x0000000000004000ULL
-#define        TYPE_Ft         0x0000000000008000ULL
-#define        TYPE_Fx         0x0000000000010000ULL
-#define        TYPE_Ic         0x0000000000020000ULL
-#define        TYPE_In         0x0000000000040000ULL
-#define        TYPE_Lb         0x0000000000080000ULL
-#define        TYPE_Li         0x0000000000100000ULL
-#define        TYPE_Lk         0x0000000000200000ULL
-#define        TYPE_Ms         0x0000000000400000ULL
-#define        TYPE_Mt         0x0000000000800000ULL
-#define        TYPE_Nd         0x0000000001000000ULL
-#define        TYPE_Nm         0x0000000002000000ULL
-#define        TYPE_Nx         0x0000000004000000ULL
-#define        TYPE_Ox         0x0000000008000000ULL
-#define        TYPE_Pa         0x0000000010000000ULL
-#define        TYPE_Rs         0x0000000020000000ULL
-#define        TYPE_Sh         0x0000000040000000ULL
-#define        TYPE_Ss         0x0000000080000000ULL
-#define        TYPE_St         0x0000000100000000ULL
-#define        TYPE_Sy         0x0000000200000000ULL
-#define        TYPE_Tn         0x0000000400000000ULL
-#define        TYPE_Va         0x0000000800000000ULL
-#define        TYPE_Vt         0x0000001000000000ULL
-#define        TYPE_Xr         0x0000002000000000ULL
-
-#endif /*!MANDOCDB_H */
diff --git a/usr.bin/mandoc/mansearch.c b/usr.bin/mandoc/mansearch.c

new file mode 100644 (file)

index 0000000..222fd58
--- /dev/null
+++ b/usr.bin/mandoc/mansearch.c
@@ -0,0 +1,565 @@
+/*     $Id: mansearch.c,v 1.1 2013/12/31 00:40:19 schwarze Exp $ */
+/*
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <ohash.h>
+#include <sqlite3.h>
+
+#include "mandoc.h"
+#include "manpath.h"
+#include "mansearch.h"
+
+#define        SQL_BIND_TEXT(_db, _s, _i, _v) \
+       do { if (SQLITE_OK != sqlite3_bind_text \
+               ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
+       } while (0)
+#define        SQL_BIND_INT64(_db, _s, _i, _v) \
+       do { if (SQLITE_OK != sqlite3_bind_int64 \
+               ((_s), (_i)++, (_v))) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
+       } while (0)
+#define        SQL_BIND_BLOB(_db, _s, _i, _v) \
+       do { if (SQLITE_OK != sqlite3_bind_blob \
+               ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
+               fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
+       } while (0)
+
+struct expr {
+       uint64_t         bits;    /* type-mask */
+       const char      *substr;  /* to search for, if applicable */
+       regex_t          regexp;  /* compiled regexp, if applicable */
+       struct expr     *next;    /* next in sequence */
+};
+
+struct match {
+       uint64_t         id; /* identifier in database */
+       char            *file; /* relative filepath of manpage */
+       char            *desc; /* description of manpage */
+       int              form; /* 0 == catpage */
+};
+
+struct type {
+       uint64_t         bits;
+       const char      *name;
+};
+
+static const struct type types[] = {
+       { TYPE_An,  "An" },
+       { TYPE_Ar,  "Ar" },
+       { TYPE_At,  "At" },
+       { TYPE_Bsx, "Bsx" },
+       { TYPE_Bx,  "Bx" },
+       { TYPE_Cd,  "Cd" },
+       { TYPE_Cm,  "Cm" },
+       { TYPE_Dv,  "Dv" },
+       { TYPE_Dx,  "Dx" },
+       { TYPE_Em,  "Em" },
+       { TYPE_Er,  "Er" },
+       { TYPE_Ev,  "Ev" },
+       { TYPE_Fa,  "Fa" },
+       { TYPE_Fl,  "Fl" },
+       { TYPE_Fn,  "Fn" },
+       { TYPE_Fn,  "Fo" },
+       { TYPE_Ft,  "Ft" },
+       { TYPE_Fx,  "Fx" },
+       { TYPE_Ic,  "Ic" },
+       { TYPE_In,  "In" },
+       { TYPE_Lb,  "Lb" },
+       { TYPE_Li,  "Li" },
+       { TYPE_Lk,  "Lk" },
+       { TYPE_Ms,  "Ms" },
+       { TYPE_Mt,  "Mt" },
+       { TYPE_Nd,  "Nd" },
+       { TYPE_Nm,  "Nm" },
+       { TYPE_Nx,  "Nx" },
+       { TYPE_Ox,  "Ox" },
+       { TYPE_Pa,  "Pa" },
+       { TYPE_Rs,  "Rs" },
+       { TYPE_Sh,  "Sh" },
+       { TYPE_Ss,  "Ss" },
+       { TYPE_St,  "St" },
+       { TYPE_Sy,  "Sy" },
+       { TYPE_Tn,  "Tn" },
+       { TYPE_Va,  "Va" },
+       { TYPE_Va,  "Vt" },
+       { TYPE_Xr,  "Xr" },
+       { ~0ULL,    "any" },
+       { 0ULL, NULL }
+};
+
+static void            *hash_alloc(size_t, void *);
+static void             hash_free(void *, size_t, void *);
+static void            *hash_halloc(size_t, void *);
+static struct expr     *exprcomp(const struct mansearch *, 
+                               int, char *[]);
+static void             exprfree(struct expr *);
+static struct expr     *exprterm(const struct mansearch *, char *, int);
+static void             sql_match(sqlite3_context *context,
+                               int argc, sqlite3_value **argv);
+static void             sql_regexp(sqlite3_context *context,
+                               int argc, sqlite3_value **argv);
+static char            *sql_statement(const struct expr *,
+                               const char *, const char *);
+
+int
+mansearch(const struct mansearch *search,
+               const struct manpaths *paths, 
+               int argc, char *argv[], 
+               struct manpage **res, size_t *sz)
+{
+       int              fd, rc, c;
+       int64_t          id;
+       char             buf[PATH_MAX];
+       char            *sql, *newnames;
+       const char      *oldnames, *sep1, *name, *sec, *sep2, *arch;
+       struct manpage  *mpage;
+       struct expr     *e, *ep;
+       sqlite3         *db;
+       sqlite3_stmt    *s;
+       struct match    *mp;
+       struct ohash_info info;
+       struct ohash     htab;
+       unsigned int     idx;
+       size_t           i, j, cur, maxres;
+
+       memset(&info, 0, sizeof(struct ohash_info));
+
+       info.halloc = hash_halloc;
+       info.alloc = hash_alloc;
+       info.hfree = hash_free;
+       info.key_offset = offsetof(struct match, id);
+
+       *sz = cur = maxres = 0;
+       sql = NULL;
+       *res = NULL;
+       fd = -1;
+       e = NULL;
+       rc = 0;
+
+       if (0 == argc)
+               goto out;
+       if (NULL == (e = exprcomp(search, argc, argv)))
+               goto out;
+
+       /*
+        * Save a descriptor to the current working directory.
+        * Since pathnames in the "paths" variable might be relative,
+        * and we'll be chdir()ing into them, we need to keep a handle
+        * on our current directory from which to start the chdir().
+        */
+
+       if (NULL == getcwd(buf, PATH_MAX)) {
+               perror(NULL);
+               goto out;
+       } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
+               perror(buf);
+               goto out;
+       }
+
+       sql = sql_statement(e, search->arch, search->sec);
+
+       /*
+        * Loop over the directories (containing databases) for us to
+        * search.
+        * Don't let missing/bad databases/directories phase us.
+        * In each, try to open the resident database and, if it opens,
+        * scan it for our match expression.
+        */
+
+       for (i = 0; i < paths->sz; i++) {
+               if (-1 == fchdir(fd)) {
+                       perror(buf);
+                       free(*res);
+                       break;
+               } else if (-1 == chdir(paths->paths[i])) {
+                       perror(paths->paths[i]);
+                       continue;
+               } 
+
+               c =  sqlite3_open_v2
+                       (MANDOC_DB, &db, 
+                        SQLITE_OPEN_READONLY, NULL);
+
+               if (SQLITE_OK != c) {
+                       perror(MANDOC_DB);
+                       sqlite3_close(db);
+                       continue;
+               }
+
+               /*
+                * Define the SQL functions for substring
+                * and regular expression matching.
+                */
+
+               c = sqlite3_create_function(db, "match", 2,
+                   SQLITE_ANY, NULL, sql_match, NULL, NULL);
+               assert(SQLITE_OK == c);
+               c = sqlite3_create_function(db, "regexp", 2,
+                   SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
+               assert(SQLITE_OK == c);
+
+               j = 1;
+               c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
+               if (SQLITE_OK != c)
+                       fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+
+               if (NULL != search->arch)
+                       SQL_BIND_TEXT(db, s, j, search->arch);
+               if (NULL != search->sec)
+                       SQL_BIND_TEXT(db, s, j, search->sec);
+
+               for (ep = e; NULL != ep; ep = ep->next) {
+                       if (NULL == ep->substr) {
+                               SQL_BIND_BLOB(db, s, j, ep->regexp);
+                       } else
+                               SQL_BIND_TEXT(db, s, j, ep->substr);
+                       SQL_BIND_INT64(db, s, j, ep->bits);
+               }
+
+               memset(&htab, 0, sizeof(struct ohash));
+               ohash_init(&htab, 4, &info);
+
+               /*
+                * Hash each entry on its [unique] document identifier.
+                * This is a uint64_t.
+                * Instead of using a hash function, simply convert the
+                * uint64_t to a uint32_t, the hash value's type.
+                * This gives good performance and preserves the
+                * distribution of buckets in the table.
+                */
+               while (SQLITE_ROW == (c = sqlite3_step(s))) {
+                       id = sqlite3_column_int64(s, 0);
+                       idx = ohash_lookup_memory
+                               (&htab, (char *)&id, 
+                                sizeof(uint64_t), (uint32_t)id);
+
+                       if (NULL != ohash_find(&htab, idx))
+                               continue;
+
+                       mp = mandoc_calloc(1, sizeof(struct match));
+                       mp->id = id;
+                       mp->file = mandoc_strdup
+                               ((char *)sqlite3_column_text(s, 3));
+                       mp->desc = mandoc_strdup
+                               ((char *)sqlite3_column_text(s, 4));
+                       mp->form = sqlite3_column_int(s, 5);
+                       ohash_insert(&htab, idx, mp);
+               }
+
+               if (SQLITE_DONE != c)
+                       fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+
+               sqlite3_finalize(s);
+
+               c = sqlite3_prepare_v2(db, 
+                   "SELECT * FROM mlinks WHERE pageid=?",
+                   -1, &s, NULL);
+               if (SQLITE_OK != c)
+                       fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+
+               for (mp = ohash_first(&htab, &idx);
+                               NULL != mp;
+                               mp = ohash_next(&htab, &idx)) {
+                       if (cur + 1 > maxres) {
+                               maxres += 1024;
+                               *res = mandoc_realloc
+                                       (*res, maxres * sizeof(struct manpage));
+                       }
+                       mpage = *res + cur;
+                       if (-1 == asprintf(&mpage->file, "%s/%s",
+                           paths->paths[i], mp->file)) {
+                               perror(0);
+                               exit((int)MANDOCLEVEL_SYSERR);
+                       }
+                       mpage->names = NULL;
+                       mpage->desc = mp->desc;
+                       mpage->form = mp->form;
+
+                       j = 1;
+                       SQL_BIND_INT64(db, s, j, mp->id);
+                       while (SQLITE_ROW == (c = sqlite3_step(s))) {
+                               if (NULL == mpage->names) {
+                                       oldnames = "";
+                                       sep1 = "";
+                               } else {
+                                       oldnames = mpage->names;
+                                       sep1 = ", ";
+                               }
+                               sec = sqlite3_column_text(s, 1);
+                               arch = sqlite3_column_text(s, 2);
+                               name = sqlite3_column_text(s, 3);
+                               sep2 = '\0' == *arch ? "" : "/";
+                               if (-1 == asprintf(&newnames,
+                                   "%s%s%s(%s%s%s)", oldnames, sep1,
+                                   name, sec, sep2, arch)) {
+                                       perror(0);
+                                       exit((int)MANDOCLEVEL_SYSERR);
+                               }
+                               free(mpage->names);
+                               mpage->names = newnames;
+                       }
+                       if (SQLITE_DONE != c)
+                               fprintf(stderr, "%s\n", sqlite3_errmsg(db));
+                       sqlite3_reset(s);
+
+                       free(mp->file);
+                       free(mp);
+                       cur++;
+               }
+
+               sqlite3_finalize(s);
+               sqlite3_close(db);
+               ohash_delete(&htab);
+       }
+       rc = 1;
+out:
+       exprfree(e);
+       if (-1 != fd)
+               close(fd);
+       free(sql);
+       *sz = cur;
+       return(rc);
+}
+
+/*
+ * Implement substring match as an application-defined SQL function.
+ * Using the SQL LIKE or GLOB operators instead would be a bad idea
+ * because that would require escaping metacharacters in the string
+ * being searched for.
+ */
+static void
+sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+
+       assert(2 == argc);
+       sqlite3_result_int(context, NULL != strcasestr(
+           (const char *)sqlite3_value_text(argv[1]),
+           (const char *)sqlite3_value_text(argv[0])));
+}
+
+/*
+ * Implement regular expression match
+ * as an application-defined SQL function.
+ */
+static void
+sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+
+       assert(2 == argc);
+       sqlite3_result_int(context, !regexec(
+           (regex_t *)sqlite3_value_blob(argv[0]),
+           (const char *)sqlite3_value_text(argv[1]),
+           0, NULL, 0));
+}
+
+/*
+ * Prepare the search SQL statement.
+ * We search for any of the words specified in our match expression.
+ * We filter the per-doc AND expressions when collecting results.
+ */
+static char *
+sql_statement(const struct expr *e, const char *arch, const char *sec)
+{
+       char            *sql;
+       const char      *substr = "(key MATCH ? AND bits & ?)";
+       const char      *regexp = "(key REGEXP ? AND bits & ?)";
+       const char      *andarch = "arch = ? AND ";
+       const char      *andsec = "sec = ? AND ";
+       size_t           substrsz;
+       size_t           regexpsz;
+       size_t           sz;
+
+       sql = mandoc_strdup
+               ("SELECT pageid,bits,key,file,desc,form,sec,arch "
+                "FROM keys "
+                "INNER JOIN mpages ON mpages.id=keys.pageid "
+                "WHERE ");
+       sz = strlen(sql);
+       substrsz = strlen(substr);
+       regexpsz = strlen(regexp);
+
+       if (NULL != arch) {
+               sz += strlen(andarch) + 1;
+               sql = mandoc_realloc(sql, sz);
+               strlcat(sql, andarch, sz);
+       }
+
+       if (NULL != sec) {
+               sz += strlen(andsec) + 1;
+               sql = mandoc_realloc(sql, sz);
+               strlcat(sql, andsec, sz);
+       }
+
+       sz += 2;
+       sql = mandoc_realloc(sql, sz);
+       strlcat(sql, "(", sz);
+
+       for ( ; NULL != e; e = e->next) {
+               sz += (NULL == e->substr ? regexpsz : substrsz) + 
+                       (NULL == e->next ? 3 : 5);
+               sql = mandoc_realloc(sql, sz);
+               strlcat(sql, NULL == e->substr ? regexp : substr, sz);
+               strlcat(sql, NULL == e->next ? ");" : " OR ", sz);
+       }
+
+       return(sql);
+}
+
+/*
+ * Compile a set of string tokens into an expression.
+ * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
+ * "(", "foo=bar", etc.).
+ */
+static struct expr *
+exprcomp(const struct mansearch *search, int argc, char *argv[])
+{
+       int              i, cs;
+       struct expr     *first, *next, *cur;
+
+       first = cur = NULL;
+
+       for (i = 0; i < argc; i++) {
+               if (0 == strcmp("-i", argv[i])) {
+                       if (++i >= argc)
+                               return(NULL);
+                       cs = 0;
+               } else
+                       cs = 1;
+               next = exprterm(search, argv[i], cs);
+               if (NULL == next) {
+                       exprfree(first);
+                       return(NULL);
+               }
+               if (NULL != first) {
+                       cur->next = next;
+                       cur = next;
+               } else
+                       cur = first = next;
+       }
+
+       return(first);
+}
+
+static struct expr *
+exprterm(const struct mansearch *search, char *buf, int cs)
+{
+       struct expr     *e;
+       char            *key, *v;
+       size_t           i;
+
+       if ('\0' == *buf)
+               return(NULL);
+
+       e = mandoc_calloc(1, sizeof(struct expr));
+
+       /*"whatis" mode uses an opaque string and default fields. */
+
+       if (MANSEARCH_WHATIS & search->flags) {
+               e->substr = buf;
+               e->bits = search->deftype;
+               return(e);
+       }
+
+       /*
+        * If no =~ is specified, search with equality over names and
+        * descriptions.
+        * If =~ begins the phrase, use name and description fields.
+        */
+
+       if (NULL == (v = strpbrk(buf, "=~"))) {
+               e->substr = buf;
+               e->bits = search->deftype;
+               return(e);
+       } else if (v == buf)
+               e->bits = search->deftype;
+
+       if ('~' == *v++) {
+               if (regcomp(&e->regexp, v,
+                   REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE))) {
+                       free(e);
+                       return(NULL);
+               }
+       } else
+               e->substr = v;
+       v[-1] = '\0';
+
+       /*
+        * Parse out all possible fields.
+        * If the field doesn't resolve, bail.
+        */
+
+       while (NULL != (key = strsep(&buf, ","))) {
+               if ('\0' == *key)
+                       continue;
+               i = 0;
+               while (types[i].bits && 
+                       strcasecmp(types[i].name, key))
+                       i++;
+               if (0 == types[i].bits) {
+                       free(e);
+                       return(NULL);
+               }
+               e->bits |= types[i].bits;
+       }
+
+       return(e);
+}
+
+static void
+exprfree(struct expr *p)
+{
+       struct expr     *pp;
+
+       while (NULL != p) {
+               pp = p->next;
+               free(p);
+               p = pp;
+       }
+}
+
+static void *
+hash_halloc(size_t sz, void *arg)
+{
+
+       return(mandoc_calloc(sz, 1));
+}
+
+static void *
+hash_alloc(size_t sz, void *arg)
+{
+
+       return(mandoc_malloc(sz));
+}
+
+static void
+hash_free(void *p, size_t sz, void *arg)
+{
+
+       free(p);
+}
diff --git a/usr.bin/mandoc/mansearch.h b/usr.bin/mandoc/mansearch.h

new file mode 100644 (file)

index 0000000..bcb0696
--- /dev/null
+++ b/usr.bin/mandoc/mansearch.h
@@ -0,0 +1,87 @@
+/*     $Id: mansearch.h,v 1.1 2013/12/31 00:40:19 schwarze Exp $ */
+/*
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MANSEARCH_H
+#define MANSEARCH_H
+
+#define        MANDOC_DB        "mandoc.db"
+
+#define        TYPE_An          0x0000000000000001ULL
+#define        TYPE_Ar          0x0000000000000002ULL
+#define        TYPE_At          0x0000000000000004ULL
+#define        TYPE_Bsx         0x0000000000000008ULL
+#define        TYPE_Bx          0x0000000000000010ULL
+#define        TYPE_Cd          0x0000000000000020ULL
+#define        TYPE_Cm          0x0000000000000040ULL
+#define        TYPE_Dv          0x0000000000000080ULL
+#define        TYPE_Dx          0x0000000000000100ULL
+#define        TYPE_Em          0x0000000000000200ULL
+#define        TYPE_Er          0x0000000000000400ULL
+#define        TYPE_Ev          0x0000000000000800ULL
+#define        TYPE_Fa          0x0000000000001000ULL
+#define        TYPE_Fl          0x0000000000002000ULL
+#define        TYPE_Fn          0x0000000000004000ULL
+#define        TYPE_Ft          0x0000000000008000ULL
+#define        TYPE_Fx          0x0000000000010000ULL
+#define        TYPE_Ic          0x0000000000020000ULL
+#define        TYPE_In          0x0000000000040000ULL
+#define        TYPE_Lb          0x0000000000080000ULL
+#define        TYPE_Li          0x0000000000100000ULL
+#define        TYPE_Lk          0x0000000000200000ULL
+#define        TYPE_Ms          0x0000000000400000ULL
+#define        TYPE_Mt          0x0000000000800000ULL
+#define        TYPE_Nd          0x0000000001000000ULL
+#define        TYPE_Nm          0x0000000002000000ULL
+#define        TYPE_Nx          0x0000000004000000ULL
+#define        TYPE_Ox          0x0000000008000000ULL
+#define        TYPE_Pa          0x0000000010000000ULL
+#define        TYPE_Rs          0x0000000020000000ULL
+#define        TYPE_Sh          0x0000000040000000ULL
+#define        TYPE_Ss          0x0000000080000000ULL
+#define        TYPE_St          0x0000000100000000ULL
+#define        TYPE_Sy          0x0000000200000000ULL
+#define        TYPE_Tn          0x0000000400000000ULL
+#define        TYPE_Va          0x0000000800000000ULL
+#define        TYPE_Vt          0x0000001000000000ULL
+#define        TYPE_Xr          0x0000002000000000ULL
+
+__BEGIN_DECLS
+
+struct manpage {
+       char            *file; /* to be prefixed by manpath */
+       char            *names; /* a list of names with sections */
+       char            *desc; /* description of manpage */
+       int              form; /* 0 == catpage */
+};
+
+struct mansearch {
+       const char      *arch; /* architecture/NULL */
+       const char      *sec; /* mansection/NULL */
+       uint64_t         deftype; /* type if no key  */
+       int              flags;
+#define        MANSEARCH_WHATIS 0x01 /* whatis mode: equality, no key */
+};
+
+int    mansearch(const struct mansearch *cfg, /* options */
+               const struct manpaths *paths, /* manpaths */
+               int argc, /* size of argv */
+               char *argv[],  /* search terms */
+               struct manpage **res, /* results */
+               size_t *ressz); /* results returned */
+
+__END_DECLS
+
+#endif /*!MANSEARCH_H*/
author	schwarze <schwarze@openbsd.org>
	Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
committer	schwarze <schwarze@openbsd.org>
	Tue, 31 Dec 2013 00:40:19 +0000 (00:40 +0000)
usr.bin/mandoc/Makefile		patch \| blob \| history
usr.bin/mandoc/apropos.c		patch \| blob \| history
usr.bin/mandoc/apropos_db.c	[deleted file]	patch \| blob \| history
usr.bin/mandoc/apropos_db.h	[deleted file]	patch \| blob \| history
usr.bin/mandoc/mandocdb.c		patch \| blob \| history
usr.bin/mandoc/mandocdb.h	[deleted file]	patch \| blob \| history
usr.bin/mandoc/mansearch.c	[new file with mode: 0644]	patch \| blob
usr.bin/mandoc/mansearch.h	[new file with mode: 0644]	patch \| blob