Major character table cleanup:
authorschwarze <schwarze@openbsd.org>
Tue, 13 Oct 2015 22:57:49 +0000 (22:57 +0000)
committerschwarze <schwarze@openbsd.org>
Tue, 13 Oct 2015 22:57:49 +0000 (22:57 +0000)
* Use ohash(3) rather than a hand-rolled hash table.
* Make the character table static in the chars.c module:
There is no need to pass a pointer around, we most certainly
never want to use two different character tables concurrently.
* No need to keep the characters in a separate file chars.in;
that merely encourages downstream porters to mess with them.
* Sort the characters to agree with the mandoc_chars(7) manual page.
* Specify Unicode codepoints in hex, not decimal (that's the detail
that originally triggered this patch).
No functional change, minus 100 LOC, and i don't see a performance change.

16 files changed:
usr.bin/mandoc/cgi.c
usr.bin/mandoc/chars.c
usr.bin/mandoc/chars.in [deleted file]
usr.bin/mandoc/html.c
usr.bin/mandoc/html.h
usr.bin/mandoc/libmandoc.h
usr.bin/mandoc/main.c
usr.bin/mandoc/main.h
usr.bin/mandoc/mandoc.h
usr.bin/mandoc/mandocdb.c
usr.bin/mandoc/read.c
usr.bin/mandoc/roff.c
usr.bin/mandoc/term.c
usr.bin/mandoc/term.h
usr.bin/mandoc/term_ascii.c
usr.bin/mandoc/term_ps.c

index ffa6b6b..b2fc30c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cgi.c,v 1.48 2015/10/06 18:30:43 schwarze Exp $ */
+/*     $OpenBSD: cgi.c,v 1.49 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
@@ -817,7 +817,6 @@ format(const struct req *req, const char *file)
 {
        struct manoutput conf;
        struct mparse   *mp;
-       struct mchars   *mchars;
        struct roff_man *man;
        void            *vp;
        int              fd;
@@ -828,9 +827,8 @@ format(const struct req *req, const char *file)
                return;
        }
 
-       mchars = mchars_alloc();
-       mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL,
-           mchars, req->q.manpath);
+       mchars_alloc();
+       mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
        mparse_readfd(mp, fd, file);
        close(fd);
 
@@ -850,11 +848,11 @@ format(const struct req *req, const char *file)
                    req->q.manpath, file);
                pg_error_internal();
                mparse_free(mp);
-               mchars_free(mchars);
+               mchars_free();
                return;
        }
 
-       vp = html_alloc(mchars, &conf);
+       vp = html_alloc(&conf);
 
        if (man->macroset == MACROSET_MDOC)
                html_mdoc(vp, man);
@@ -863,7 +861,7 @@ format(const struct req *req, const char *file)
 
        html_free(vp);
        mparse_free(mp);
-       mchars_free(mchars);
+       mchars_free();
        free(conf.man);
 }
 
index 9522fc7..c4cff4b 100644 (file)
@@ -1,7 +1,7 @@
-/*     $OpenBSD: chars.c,v 1.37 2015/10/06 18:30:43 schwarze Exp $ */
+/*     $OpenBSD: chars.c,v 1.38 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
 
 #include <assert.h>
 #include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "mandoc.h"
 #include "mandoc_aux.h"
+#include "mandoc_ohash.h"
 #include "libmandoc.h"
 
-#define        PRINT_HI         126
-#define        PRINT_LO         32
-
 struct ln {
-       struct ln        *next;
-       const char       *code;
+       const char        roffcode[16];
        const char       *ascii;
        int               unicode;
 };
 
-#define        LINES_MAX         332
-
-#define CHAR(in, ch, code) \
-       { NULL, (in), (ch), (code) },
-
-#define        CHAR_TBL_START    static struct ln lines[LINES_MAX] = {
-#define        CHAR_TBL_END      };
-
-#include "chars.in"
-
-struct mchars {
-       struct ln       **htab;
+/* Special break control characters. */
+static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
+static const char ascii_break[2] = { ASCII_BREAK, '\0' };
+
+static struct ln lines[] = {
+
+       /* Spacing. */
+       { " ",                  ascii_nbrsp,    0x00a0  },
+       { "~",                  ascii_nbrsp,    0x00a0  },
+       { "0",                  " ",            0x2002  },
+       { "|",                  "",             0       },
+       { "^",                  "",             0       },
+       { "&",                  "",             0       },
+       { "%",                  "",             0       },
+       { ":",                  ascii_break,    0       },
+       /* XXX The following three do not really belong here. */
+       { "t",                  "",             0       },
+       { "c",                  "",             0       },
+       { "}",                  "",             0       },
+
+       /* Lines. */
+       { "ba",                 "|",            0x007c  },
+       { "br",                 "|",            0x2502  },
+       { "ul",                 "_",            0x005f  },
+       { "rn",                 "-",            0x203e  },
+       { "bb",                 "|",            0x00a6  },
+       { "sl",                 "/",            0x002f  },
+       { "rs",                 "\\",           0x005c  },
+
+       /* Text markers. */
+       { "ci",                 "O",            0x25cb  },
+       { "bu",                 "+\bo",         0x2022  },
+       { "dd",                 "|\b=",         0x2021  },
+       { "dg",                 "|\b-",         0x2020  },
+       { "lz",                 "<>",           0x25ca  },
+       { "sq",                 "[]",           0x25a1  },
+       { "ps",                 "<par>",        0x00b6  },
+       { "sc",                 "<sec>",        0x00a7  },
+       { "lh",                 "<=",           0x261c  },
+       { "rh",                 "=>",           0x261e  },
+       { "at",                 "@",            0x0040  },
+       { "sh",                 "#",            0x0023  },
+       { "CR",                 "_|",           0x21b5  },
+       { "OK",                 "\\/",          0x2713  },
+
+       /* Legal symbols. */
+       { "co",                 "(C)",          0x00a9  },
+       { "rg",                 "(R)",          0x00ae  },
+       { "tm",                 "tm",           0x2122  },
+
+       /* Punctuation. */
+       { "em",                 "--",           0x2014  },
+       { "en",                 "-",            0x2013  },
+       { "hy",                 "-",            0x2010  },
+       { "e",                  "\\",           0x005c  },
+       { ".",                  ".",            0x002e  },
+       { "r!",                 "!",            0x00a1  },
+       { "r?",                 "?",            0x00bf  },
+
+       /* Quotes. */
+       { "Bq",                 ",,",           0x201e  },
+       { "bq",                 ",",            0x201a  },
+       { "lq",                 "\"",           0x201c  },
+       { "rq",                 "\"",           0x201d  },
+       { "Lq",                 "``",           0x201c  },
+       { "Rq",                 "''",           0x201d  },
+       { "oq",                 "`",            0x2018  },
+       { "cq",                 "\'",           0x2019  },
+       { "aq",                 "\'",           0x0027  },
+       { "dq",                 "\"",           0x0022  },
+       { "Fo",                 "<<",           0x00ab  },
+       { "Fc",                 ">>",           0x00bb  },
+       { "fo",                 "<",            0x2039  },
+       { "fc",                 ">",            0x203a  },
+
+       /* Brackets. */
+       { "lB",                 "[",            0x005b  },
+       { "rB",                 "]",            0x005d  },
+       { "lC",                 "{",            0x007b  },
+       { "rC",                 "}",            0x007d  },
+       { "la",                 "<",            0x27e8  },
+       { "ra",                 ">",            0x27e9  },
+       { "bv",                 "|",            0x23aa  },
+       { "braceex",            "|",            0x23aa  },
+       { "bracketlefttp",      "|",            0x23a1  },
+       { "bracketleftbt",      "|",            0x23a3  },
+       { "bracketleftex",      "|",            0x23a2  },
+       { "bracketrighttp",     "|",            0x23a4  },
+       { "bracketrightbt",     "|",            0x23a6  },
+       { "bracketrightex",     "|",            0x23a5  },
+       { "lt",                 ",-",           0x23a7  },
+       { "bracelefttp",        ",-",           0x23a7  },
+       { "lk",                 "{",            0x23a8  },
+       { "braceleftmid",       "{",            0x23a8  },
+       { "lb",                 "`-",           0x23a9  },
+       { "braceleftbt",        "`-",           0x23a9  },
+       { "braceleftex",        "|",            0x23aa  },
+       { "rt",                 "-.",           0x23ab  },
+       { "bracerighttp",       "-.",           0x23ab  },
+       { "rk",                 "}",            0x23ac  },
+       { "bracerightmid",      "}",            0x23ac  },
+       { "rb",                 "-\'",          0x23ad  },
+       { "bracerightbt",       "-\'",          0x23ad  },
+       { "bracerightex",       "|",            0x23aa  },
+       { "parenlefttp",        "/",            0x239b  },
+       { "parenleftbt",        "\\",           0x239d  },
+       { "parenleftex",        "|",            0x239c  },
+       { "parenrighttp",       "\\",           0x239e  },
+       { "parenrightbt",       "/",            0x23a0  },
+       { "parenrightex",       "|",            0x239f  },
+
+       /* Arrows and lines. */
+       { "<-",                 "<-",           0x2190  },
+       { "->",                 "->",           0x2192  },
+       { "<>",                 "<->",          0x2194  },
+       { "da",                 "|\bv",         0x2193  },
+       { "ua",                 "|\b^",         0x2191  },
+       { "va",                 "^v",           0x2195  },
+       { "lA",                 "<=",           0x21d0  },
+       { "rA",                 "=>",           0x21d2  },
+       { "hA",                 "<=>",          0x21d4  },
+       { "uA",                 "=\b^",         0x21d1  },
+       { "dA",                 "=\bv",         0x21d3  },
+       { "vA",                 "^=v",          0x21d5  },
+
+       /* Logic. */
+       { "AN",                 "^",            0x2227  },
+       { "OR",                 "v",            0x2228  },
+       { "no",                 "~",            0x00ac  },
+       { "tno",                "~",            0x00ac  },
+       { "te",                 "3",            0x2203  },
+       { "fa",                 "-\bV",         0x2200  },
+       { "st",                 "-)",           0x220b  },
+       { "tf",                 ".:.",          0x2234  },
+       { "3d",                 ".:.",          0x2234  },
+       { "or",                 "|",            0x007c  },
+
+       /* Mathematicals. */
+       { "pl",                 "+",            0x002b  },
+       { "mi",                 "-",            0x2212  },
+       { "-",                  "-",            0x002d  },
+       { "-+",                 "-+",           0x2213  },
+       { "+-",                 "+-",           0x00b1  },
+       { "t+-",                "+-",           0x00b1  },
+       { "pc",                 ".",            0x00b7  },
+       { "md",                 ".",            0x22c5  },
+       { "mu",                 "x",            0x00d7  },
+       { "tmu",                "x",            0x00d7  },
+       { "c*",                 "O\bx",         0x2297  },
+       { "c+",                 "O\b+",         0x2295  },
+       { "di",                 "-:-",          0x00f7  },
+       { "tdi",                "-:-",          0x00f7  },
+       { "f/",                 "/",            0x2044  },
+       { "**",                 "*",            0x2217  },
+       { "<=",                 "<=",           0x2264  },
+       { ">=",                 ">=",           0x2265  },
+       { "<<",                 "<<",           0x226a  },
+       { ">>",                 ">>",           0x226b  },
+       { "eq",                 "=",            0x003d  },
+       { "!=",                 "!=",           0x2260  },
+       { "==",                 "==",           0x2261  },
+       { "ne",                 "!==",          0x2262  },
+       { "ap",                 "~",            0x223c  },
+       { "|=",                 "-~",           0x2243  },
+       { "=~",                 "=~",           0x2245  },
+       { "~~",                 "~~",           0x2248  },
+       { "~=",                 "~=",           0x2248  },
+       { "pt",                 "oc",           0x221d  },
+       { "es",                 "{}",           0x2205  },
+       { "mo",                 "E",            0x2208  },
+       { "nm",                 "!E",           0x2209  },
+       { "sb",                 "(=",           0x2282  },
+       { "nb",                 "(!=",          0x2284  },
+       { "sp",                 "=)",           0x2283  },
+       { "nc",                 "!=)",          0x2285  },
+       { "ib",                 "(=\b_",        0x2286  },
+       { "ip",                 "=\b_)",        0x2287  },
+       { "ca",                 "(^)",          0x2229  },
+       { "cu",                 "U",            0x222a  },
+       { "/_",                 "_\b/",         0x2220  },
+       { "pp",                 "_\b|",         0x22a5  },
+       { "is",                 "'\b,\bI",      0x222b  },
+       { "integral",           "'\b,\bI",      0x222b  },
+       { "sum",                "E",            0x2211  },
+       { "product",            "TT",           0x220f  },
+       { "coproduct",          "U",            0x2210  },
+       { "gr",                 "V",            0x2207  },
+       { "sr",                 "\\/",          0x221a  },
+       { "sqrt",               "\\/",          0x221a  },
+       { "lc",                 "|~",           0x2308  },
+       { "rc",                 "~|",           0x2309  },
+       { "lf",                 "|_",           0x230a  },
+       { "rf",                 "_|",           0x230b  },
+       { "if",                 "oo",           0x221e  },
+       { "Ah",                 "N",            0x2135  },
+       { "Im",                 "I",            0x2111  },
+       { "Re",                 "R",            0x211c  },
+       { "pd",                 "a",            0x2202  },
+       { "-h",                 "/h",           0x210f  },
+       { "12",                 "1/2",          0x00bd  },
+       { "14",                 "1/4",          0x00bc  },
+       { "34",                 "3/4",          0x00be  },
+
+       /* Ligatures. */
+       { "ff",                 "ff",           0xfb00  },
+       { "fi",                 "fi",           0xfb01  },
+       { "fl",                 "fl",           0xfb02  },
+       { "Fi",                 "ffi",          0xfb03  },
+       { "Fl",                 "ffl",          0xfb04  },
+       { "AE",                 "AE",           0x00c6  },
+       { "ae",                 "ae",           0x00e6  },
+       { "OE",                 "OE",           0x0152  },
+       { "oe",                 "oe",           0x0153  },
+       { "ss",                 "ss",           0x00df  },
+       { "IJ",                 "IJ",           0x0132  },
+       { "ij",                 "ij",           0x0133  },
+
+       /* Accents. */
+       { "a\"",                "\"",           0x02dd  },
+       { "a-",                 "-",            0x00af  },
+       { "a.",                 ".",            0x02d9  },
+       { "a^",                 "^",            0x005e  },
+       { "aa",                 "\'",           0x00b4  },
+       { "\'",                 "\'",           0x00b4  },
+       { "ga",                 "`",            0x0060  },
+       { "`",                  "`",            0x0060  },
+       { "ab",                 "'\b`",         0x02d8  },
+       { "ac",                 ",",            0x00b8  },
+       { "ad",                 "\"",           0x00a8  },
+       { "ah",                 "v",            0x02c7  },
+       { "ao",                 "o",            0x02da  },
+       { "a~",                 "~",            0x007e  },
+       { "ho",                 ",",            0x02db  },
+       { "ha",                 "^",            0x005e  },
+       { "ti",                 "~",            0x007e  },
+
+       /* Accented letters. */
+       { "'A",                 "'\bA",         0x00c1  },
+       { "'E",                 "'\bE",         0x00c9  },
+       { "'I",                 "'\bI",         0x00cd  },
+       { "'O",                 "'\bO",         0x00d3  },
+       { "'U",                 "'\bU",         0x00da  },
+       { "'a",                 "'\ba",         0x00e1  },
+       { "'e",                 "'\be",         0x00e9  },
+       { "'i",                 "'\bi",         0x00ed  },
+       { "'o",                 "'\bo",         0x00f3  },
+       { "'u",                 "'\bu",         0x00fa  },
+       { "`A",                 "`\bA",         0x00c0  },
+       { "`E",                 "`\bE",         0x00c8  },
+       { "`I",                 "`\bI",         0x00cc  },
+       { "`O",                 "`\bO",         0x00d2  },
+       { "`U",                 "`\bU",         0x00d9  },
+       { "`a",                 "`\ba",         0x00e0  },
+       { "`e",                 "`\be",         0x00e8  },
+       { "`i",                 "`\bi",         0x00ec  },
+       { "`o",                 "`\bo",         0x00f2  },
+       { "`u",                 "`\bu",         0x00f9  },
+       { "~A",                 "~\bA",         0x00c3  },
+       { "~N",                 "~\bN",         0x00d1  },
+       { "~O",                 "~\bO",         0x00d5  },
+       { "~a",                 "~\ba",         0x00e3  },
+       { "~n",                 "~\bn",         0x00f1  },
+       { "~o",                 "~\bo",         0x00f5  },
+       { ":A",                 "\"\bA",        0x00c4  },
+       { ":E",                 "\"\bE",        0x00cb  },
+       { ":I",                 "\"\bI",        0x00cf  },
+       { ":O",                 "\"\bO",        0x00d6  },
+       { ":U",                 "\"\bU",        0x00dc  },
+       { ":a",                 "\"\ba",        0x00e4  },
+       { ":e",                 "\"\be",        0x00eb  },
+       { ":i",                 "\"\bi",        0x00ef  },
+       { ":o",                 "\"\bo",        0x00f6  },
+       { ":u",                 "\"\bu",        0x00fc  },
+       { ":y",                 "\"\by",        0x00ff  },
+       { "^A",                 "^\bA",         0x00c2  },
+       { "^E",                 "^\bE",         0x00ca  },
+       { "^I",                 "^\bI",         0x00ce  },
+       { "^O",                 "^\bO",         0x00d4  },
+       { "^U",                 "^\bU",         0x00db  },
+       { "^a",                 "^\ba",         0x00e2  },
+       { "^e",                 "^\be",         0x00ea  },
+       { "^i",                 "^\bi",         0x00ee  },
+       { "^o",                 "^\bo",         0x00f4  },
+       { "^u",                 "^\bu",         0x00fb  },
+       { ",C",                 ",\bC",         0x00c7  },
+       { ",c",                 ",\bc",         0x00e7  },
+       { "/L",                 "/\bL",         0x0141  },
+       { "/l",                 "/\bl",         0x0142  },
+       { "/O",                 "/\bO",         0x00d8  },
+       { "/o",                 "/\bo",         0x00f8  },
+       { "oA",                 "o\bA",         0x00c5  },
+       { "oa",                 "o\ba",         0x00e5  },
+
+       /* Special letters. */
+       { "-D",                 "-\bD",         0x00d0  },
+       { "Sd",                 "d",            0x00f0  },
+       { "TP",                 "Th",           0x00de  },
+       { "Tp",                 "th",           0x00fe  },
+       { ".i",                 "i",            0x0131  },
+       { ".j",                 "j",            0x0237  },
+
+       /* Currency. */
+       { "Do",                 "$",            0x0024  },
+       { "ct",                 "/\bc",         0x00a2  },
+       { "Eu",                 "EUR",          0x20ac  },
+       { "eu",                 "EUR",          0x20ac  },
+       { "Ye",                 "=\bY",         0x00a5  },
+       { "Po",                 "GBP",          0x00a3  },
+       { "Cs",                 "o\bx",         0x00a4  },
+       { "Fn",                 ",\bf",         0x0192  },
+
+       /* Units. */
+       { "de",                 "<deg>",        0x00b0  },
+       { "%0",                 "%o",           0x2030  },
+       { "fm",                 "\'",           0x2032  },
+       { "sd",                 "''",           0x2033  },
+       { "mc",                 ",\bu",         0x00b5  },
+
+       /* Greek characters. */
+       { "*A",                 "A",            0x0391  },
+       { "*B",                 "B",            0x0392  },
+       { "*G",                 "G",            0x0393  },
+       { "*D",                 "_\b/_\b\\",    0x0394  },
+       { "*E",                 "E",            0x0395  },
+       { "*Z",                 "Z",            0x0396  },
+       { "*Y",                 "H",            0x0397  },
+       { "*H",                 "-\bO",         0x0398  },
+       { "*I",                 "I",            0x0399  },
+       { "*K",                 "K",            0x039a  },
+       { "*L",                 "/\\",          0x039b  },
+       { "*M",                 "M",            0x039c  },
+       { "*N",                 "N",            0x039d  },
+       { "*C",                 "_\bH",         0x039e  },
+       { "*O",                 "O",            0x039f  },
+       { "*P",                 "TT",           0x03a0  },
+       { "*R",                 "P",            0x03a1  },
+       { "*S",                 "S",            0x03a3  },
+       { "*T",                 "T",            0x03a4  },
+       { "*U",                 "Y",            0x03a5  },
+       { "*F",                 "I\bO",         0x03a6  },
+       { "*X",                 "X",            0x03a7  },
+       { "*Q",                 "I\bY",         0x03a8  },
+       { "*W",                 "_\bO",         0x03a9  },
+       { "*a",                 "a",            0x03b1  },
+       { "*b",                 "B",            0x03b2  },
+       { "*g",                 "y",            0x03b3  },
+       { "*d",                 "d",            0x03b4  },
+       { "*e",                 "e",            0x03b5  },
+       { "*z",                 ",\bC",         0x03b6  },
+       { "*y",                 "n",            0x03b7  },
+       { "*h",                 "-\b0",         0x03b8  },
+       { "*i",                 "i",            0x03b9  },
+       { "*k",                 "k",            0x03ba  },
+       { "*l",                 ">\b\\",        0x03bb  },
+       { "*m",                 ",\bu",         0x03bc  },
+       { "*n",                 "v",            0x03bd  },
+       { "*c",                 ",\bE",         0x03be  },
+       { "*o",                 "o",            0x03bf  },
+       { "*p",                 "-\bn",         0x03c0  },
+       { "*r",                 "p",            0x03c1  },
+       { "*s",                 "-\bo",         0x03c3  },
+       { "*t",                 "~\bt",         0x03c4  },
+       { "*u",                 "u",            0x03c5  },
+       { "*f",                 "|\bo",         0x03d5  },
+       { "*x",                 "x",            0x03c7  },
+       { "*q",                 "|\bu",         0x03c8  },
+       { "*w",                 "w",            0x03c9  },
+       { "+h",                 "-\b0",         0x03d1  },
+       { "+f",                 "|\bo",         0x03c6  },
+       { "+p",                 "-\bw",         0x03d6  },
+       { "+e",                 "e",            0x03f5  },
+       { "ts",                 "s",            0x03c2  },
 };
 
-static const struct ln  *find(const struct mchars *,
-                               const char *, size_t);
+static struct ohash      mchars;
 
 
 void
-mchars_free(struct mchars *arg)
+mchars_free(void)
 {
 
-       free(arg->htab);
-       free(arg);
+       ohash_delete(&mchars);
 }
 
-struct mchars *
+void
 mchars_alloc(void)
 {
-       struct mchars    *tab;
-       struct ln       **htab;
-       struct ln        *pp;
-       int               i, hash;
-
-       /*
-        * Constructs a very basic chaining hashtable.  The hash routine
-        * is simply the integral value of the first character.
-        * Subsequent entries are chained in the order they're processed.
-        */
-
-       tab = mandoc_malloc(sizeof(struct mchars));
-       htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));
-
-       for (i = 0; i < LINES_MAX; i++) {
-               hash = (int)lines[i].code[0] - PRINT_LO;
-
-               if (NULL == (pp = htab[hash])) {
-                       htab[hash] = &lines[i];
-                       continue;
-               }
-
-               for ( ; pp->next; pp = pp->next)
-                       /* Scan ahead. */ ;
-               pp->next = &lines[i];
+       size_t            i;
+       unsigned int      slot;
+
+       mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
+       for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
+               slot = ohash_qlookup(&mchars, lines[i].roffcode);
+               assert(ohash_find(&mchars, slot) == NULL);
+               ohash_insert(&mchars, slot, lines + i);
        }
-
-       tab->htab = htab;
-       return tab;
 }
 
 int
-mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
+mchars_spec2cp(const char *p, size_t sz)
 {
        const struct ln *ln;
+       const char      *end;
 
-       ln = find(arg, p, sz);
+       end = p + sz;
+       ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
        return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1;
 }
 
@@ -125,12 +464,13 @@ mchars_num2uc(const char *p, size_t sz)
 }
 
 const char *
-mchars_spec2str(const struct mchars *arg,
-               const char *p, size_t sz, size_t *rsz)
+mchars_spec2str(const char *p, size_t sz, size_t *rsz)
 {
        const struct ln *ln;
+       const char      *end;
 
-       ln = find(arg, p, sz);
+       end = p + sz;
+       ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
        if (ln == NULL) {
                *rsz = 1;
                return sz == 1 ? p : NULL;
@@ -143,31 +483,10 @@ mchars_spec2str(const struct mchars *arg,
 const char *
 mchars_uc2str(int uc)
 {
-       int      i;
+       size_t    i;
 
-       for (i = 0; i < LINES_MAX; i++)
+       for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
                if (uc == lines[i].unicode)
                        return lines[i].ascii;
        return "<?>";
 }
-
-static const struct ln *
-find(const struct mchars *tab, const char *p, size_t sz)
-{
-       const struct ln  *pp;
-       int               hash;
-
-       assert(p);
-
-       if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
-               return NULL;
-
-       hash = (int)p[0] - PRINT_LO;
-
-       for (pp = tab->htab[hash]; pp; pp = pp->next)
-               if (0 == strncmp(pp->code, p, sz) &&
-                   '\0' == pp->code[(int)sz])
-                       return pp;
-
-       return NULL;
-}
diff --git a/usr.bin/mandoc/chars.in b/usr.bin/mandoc/chars.in
deleted file mode 100644 (file)
index 813faa8..0000000
+++ /dev/null
@@ -1,404 +0,0 @@
-/*     $OpenBSD: chars.in,v 1.28 2015/02/17 20:33:44 schwarze Exp $ */
-/*
- * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * The ASCII translation tables.
- *
- * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx
- * and so on) whose length is listed second element.  The right-hand
- * side is what's produced by the front-end, with the fourth element
- * being its length.
- *
- * XXX - C-escape strings!
- * XXX - update LINES_MAX if adding more!
- */
-
-/* Special break control characters. */
-static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
-static const char ascii_break[2] = { ASCII_BREAK, '\0' };
-
-CHAR_TBL_START
-
-/* Spacing. */
-CHAR(" ",                      ascii_nbrsp,    160)
-CHAR("~",                      ascii_nbrsp,    160)
-CHAR("0",                      " ",            8194)
-CHAR("|",                      "",             0)
-CHAR("^",                      "",             0)
-CHAR("&",                      "",             0)
-CHAR("%",                      "",             0)
-CHAR(":",                      ascii_break,    0)
-/* XXX The following three do not really belong into this file. */
-CHAR("t",                      "",             0)
-CHAR("c",                      "",             0)
-CHAR("}",                      "",             0)
-
-/* Accents. */
-CHAR("a\"",                    "\"",           733)
-CHAR("a-",                     "-",            175)
-CHAR("a.",                     ".",            729)
-CHAR("a^",                     "^",            94)
-CHAR("\'",                     "\'",           180)
-CHAR("aa",                     "\'",           180)
-CHAR("ga",                     "`",            96)
-CHAR("`",                      "`",            96)
-CHAR("ab",                     "'\b`",         728)
-CHAR("ac",                     ",",            184)
-CHAR("ad",                     "\"",           168)
-CHAR("ah",                     "v",            711)
-CHAR("ao",                     "o",            730)
-CHAR("a~",                     "~",            126)
-CHAR("ho",                     ",",            731)
-CHAR("ha",                     "^",            94)
-CHAR("ti",                     "~",            126)
-
-/* Quotes. */
-CHAR("Bq",                     ",,",           8222)
-CHAR("bq",                     ",",            8218)
-CHAR("lq",                     "\"",           8220)
-CHAR("rq",                     "\"",           8221)
-CHAR("Lq",                     "``",           8220)
-CHAR("Rq",                     "''",           8221)
-CHAR("oq",                     "`",            8216)
-CHAR("cq",                     "\'",           8217)
-CHAR("aq",                     "\'",           39)
-CHAR("dq",                     "\"",           34)
-CHAR("Fo",                     "<<",           171)
-CHAR("Fc",                     ">>",           187)
-CHAR("fo",                     "<",            8249)
-CHAR("fc",                     ">",            8250)
-
-/* Brackets. */
-CHAR("lB",                     "[",            91)
-CHAR("rB",                     "]",            93)
-CHAR("lC",                     "{",            123)
-CHAR("rC",                     "}",            125)
-CHAR("la",                     "<",            10216)
-CHAR("ra",                     ">",            10217)
-CHAR("bv",                     "|",            9130)
-CHAR("braceex",                        "|",            9130)
-CHAR("bracketlefttp",          "|",            9121)
-CHAR("bracketleftbt",          "|",            9123)
-CHAR("bracketleftex",          "|",            9122)
-CHAR("bracketrighttp",         "|",            9124)
-CHAR("bracketrightbt",         "|",            9126)
-CHAR("bracketrightex",         "|",            9125)
-CHAR("lt",                     ",-",           9127)
-CHAR("bracelefttp",            ",-",           9127)
-CHAR("lk",                     "{",            9128)
-CHAR("braceleftmid",           "{",            9128)
-CHAR("lb",                     "`-",           9129)
-CHAR("braceleftbt",            "`-",           9129)
-CHAR("braceleftex",            "|",            9130)
-CHAR("rt",                     "-.",           9131)
-CHAR("bracerighttp",           "-.",           9131)
-CHAR("rk",                     "}",            9132)
-CHAR("bracerightmid",          "}",            9132)
-CHAR("rb",                     "-\'",          9133)
-CHAR("bracerightbt",           "-\'",          9133)
-CHAR("bracerightex",           "|",            9130)
-CHAR("parenlefttp",            "/",            9115)
-CHAR("parenleftbt",            "\\",           9117)
-CHAR("parenleftex",            "|",            9116)
-CHAR("parenrighttp",           "\\",           9118)
-CHAR("parenrightbt",           "/",            9120)
-CHAR("parenrightex",           "|",            9119)
-
-/* Greek characters. */
-CHAR("*A",                     "A",            913)
-CHAR("*B",                     "B",            914)
-CHAR("*G",                     "G",            915)
-CHAR("*D",                     "_\b/_\b\\",    916)
-CHAR("*E",                     "E",            917)
-CHAR("*Z",                     "Z",            918)
-CHAR("*Y",                     "H",            919)
-CHAR("*H",                     "-\bO",         920)
-CHAR("*I",                     "I",            921)
-CHAR("*K",                     "K",            922)
-CHAR("*L",                     "/\\",          923)
-CHAR("*M",                     "M",            924)
-CHAR("*N",                     "N",            925)
-CHAR("*C",                     "_\bH",         926)
-CHAR("*O",                     "O",            927)
-CHAR("*P",                     "TT",           928)
-CHAR("*R",                     "P",            929)
-CHAR("*S",                     "S",            931)
-CHAR("*T",                     "T",            932)
-CHAR("*U",                     "Y",            933)
-CHAR("*F",                     "I\bO",         934)
-CHAR("*X",                     "X",            935)
-CHAR("*Q",                     "I\bY",         936)
-CHAR("*W",                     "_\bO",         937)
-CHAR("*a",                     "a",            945)
-CHAR("*b",                     "B",            946)
-CHAR("*g",                     "y",            947)
-CHAR("*d",                     "d",            948)
-CHAR("*e",                     "e",            949)
-CHAR("*z",                     ",\bC",         950)
-CHAR("*y",                     "n",            951)
-CHAR("*h",                     "-\b0",         952)
-CHAR("*i",                     "i",            953)
-CHAR("*k",                     "k",            954)
-CHAR("*l",                     ">\b\\",        955)
-CHAR("*m",                     ",\bu",         956)
-CHAR("*n",                     "v",            957)
-CHAR("*c",                     ",\bE",         958)
-CHAR("*o",                     "o",            959)
-CHAR("*p",                     "-\bn",         960)
-CHAR("*r",                     "p",            961)
-CHAR("*s",                     "-\bo",         963)
-CHAR("*t",                     "~\bt",         964)
-CHAR("*u",                     "u",            965)
-CHAR("*f",                     "|\bo",         981)
-CHAR("*x",                     "x",            967)
-CHAR("*q",                     "|\bu",         968)
-CHAR("*w",                     "w",            969)
-CHAR("+h",                     "-\b0",         977)
-CHAR("+f",                     "|\bo",         966)
-CHAR("+p",                     "-\bw",         982)
-CHAR("+e",                     "e",            1013)
-CHAR("ts",                     "s",            962)
-
-/* Accented letters. */
-CHAR(",C",                     ",\bC",         199)
-CHAR(",c",                     ",\bc",         231)
-CHAR("/L",                     "/\bL",         321)
-CHAR("/O",                     "/\bO",         216)
-CHAR("/l",                     "/\bl",         322)
-CHAR("/o",                     "/\bo",         248)
-CHAR("oA",                     "o\bA",         197)
-CHAR("oa",                     "o\ba",         229)
-CHAR(":A",                     "\"\bA",        196)
-CHAR(":E",                     "\"\bE",        203)
-CHAR(":I",                     "\"\bI",        207)
-CHAR(":O",                     "\"\bO",        214)
-CHAR(":U",                     "\"\bU",        220)
-CHAR(":a",                     "\"\ba",        228)
-CHAR(":e",                     "\"\be",        235)
-CHAR(":i",                     "\"\bi",        239)
-CHAR(":o",                     "\"\bo",        246)
-CHAR(":u",                     "\"\bu",        252)
-CHAR(":y",                     "\"\by",        255)
-CHAR("'A",                     "'\bA",         193)
-CHAR("'E",                     "'\bE",         201)
-CHAR("'I",                     "'\bI",         205)
-CHAR("'O",                     "'\bO",         211)
-CHAR("'U",                     "'\bU",         218)
-CHAR("'a",                     "'\ba",         225)
-CHAR("'e",                     "'\be",         233)
-CHAR("'i",                     "'\bi",         237)
-CHAR("'o",                     "'\bo",         243)
-CHAR("'u",                     "'\bu",         250)
-CHAR("^A",                     "^\bA",         194)
-CHAR("^E",                     "^\bE",         202)
-CHAR("^I",                     "^\bI",         206)
-CHAR("^O",                     "^\bO",         212)
-CHAR("^U",                     "^\bU",         219)
-CHAR("^a",                     "^\ba",         226)
-CHAR("^e",                     "^\be",         234)
-CHAR("^i",                     "^\bi",         238)
-CHAR("^o",                     "^\bo",         244)
-CHAR("^u",                     "^\bu",         251)
-CHAR("`A",                     "`\bA",         192)
-CHAR("`E",                     "`\bE",         200)
-CHAR("`I",                     "`\bI",         204)
-CHAR("`O",                     "`\bO",         210)
-CHAR("`U",                     "`\bU",         217)
-CHAR("`a",                     "`\ba",         224)
-CHAR("`e",                     "`\be",         232)
-CHAR("`i",                     "`\bi",         236)
-CHAR("`o",                     "`\bo",         242)
-CHAR("`u",                     "`\bu",         249)
-CHAR("~A",                     "~\bA",         195)
-CHAR("~N",                     "~\bN",         209)
-CHAR("~O",                     "~\bO",         213)
-CHAR("~a",                     "~\ba",         227)
-CHAR("~n",                     "~\bn",         241)
-CHAR("~o",                     "~\bo",         245)
-
-/* Arrows and lines. */
-CHAR("<-",                     "<-",           8592)
-CHAR("->",                     "->",           8594)
-CHAR("<>",                     "<->",          8596)
-CHAR("da",                     "|\bv",         8595)
-CHAR("ua",                     "|\b^",         8593)
-CHAR("va",                     "^v",           8597)
-CHAR("lA",                     "<=",           8656)
-CHAR("rA",                     "=>",           8658)
-CHAR("hA",                     "<=>",          8660)
-CHAR("dA",                     "=\bv",         8659)
-CHAR("uA",                     "=\b^",         8657)
-CHAR("vA",                     "^=v",          8661)
-
-/* Logic. */
-CHAR("AN",                     "^",            8743)
-CHAR("OR",                     "v",            8744)
-CHAR("no",                     "~",            172)
-CHAR("tno",                    "~",            172)
-CHAR("te",                     "3",            8707)
-CHAR("fa",                     "-\bV",         8704)
-CHAR("st",                     "-)",           8715)
-CHAR("tf",                     ".:.",          8756)
-CHAR("3d",                     ".:.",          8756)
-CHAR("or",                     "|",            124)
-
-/* Mathematicals. */
-CHAR("pl",                     "+",            43)
-CHAR("mi",                     "-",            8722)
-CHAR("-",                      "-",            45)
-CHAR("-+",                     "-+",           8723)
-CHAR("+-",                     "+-",           177)
-CHAR("t+-",                    "+-",           177)
-CHAR("pc",                     ".",            183)
-CHAR("md",                     ".",            8901)
-CHAR("mu",                     "x",            215)
-CHAR("tmu",                    "x",            215)
-CHAR("c*",                     "O\bx",         8855)
-CHAR("c+",                     "O\b+",         8853)
-CHAR("di",                     "-:-",          247)
-CHAR("tdi",                    "-:-",          247)
-CHAR("f/",                     "/",            8260)
-CHAR("**",                     "*",            8727)
-CHAR("<=",                     "<=",           8804)
-CHAR(">=",                     ">=",           8805)
-CHAR("<<",                     "<<",           8810)
-CHAR(">>",                     ">>",           8811)
-CHAR("eq",                     "=",            61)
-CHAR("!=",                     "!=",           8800)
-CHAR("==",                     "==",           8801)
-CHAR("ne",                     "!==",          8802)
-CHAR("=~",                     "=~",           8773)
-CHAR("|=",                     "-~",           8771)
-CHAR("ap",                     "~",            8764)
-CHAR("~~",                     "~~",           8776)
-CHAR("~=",                     "~=",           8776)
-CHAR("pt",                     "oc",           8733)
-CHAR("es",                     "{}",           8709)
-CHAR("mo",                     "E",            8712)
-CHAR("nm",                     "!E",           8713)
-CHAR("sb",                     "(=",           8834)
-CHAR("nb",                     "(!=",          8836)
-CHAR("sp",                     "=)",           8835)
-CHAR("nc",                     "!=)",          8837)
-CHAR("ib",                     "(=\b_",        8838)
-CHAR("ip",                     "=\b_)",        8839)
-CHAR("ca",                     "(^)",          8745)
-CHAR("cu",                     "U",            8746)
-CHAR("/_",                     "_\b/",         8736)
-CHAR("pp",                     "_\b|",         8869)
-CHAR("is",                     "'\b,\bI",      8747)
-CHAR("integral",               "'\b,\bI",      8747)
-CHAR("sum",                    "E",            8721)
-CHAR("product",                        "TT",           8719)
-CHAR("coproduct",              "U",            8720)
-CHAR("gr",                     "V",            8711)
-CHAR("sr",                     "\\/",          8730)
-CHAR("sqrt",                   "\\/",          8730)
-CHAR("lc",                     "|~",           8968)
-CHAR("rc",                     "~|",           8969)
-CHAR("lf",                     "|_",           8970)
-CHAR("rf",                     "_|",           8971)
-CHAR("if",                     "oo",           8734)
-CHAR("Ah",                     "N",            8501)
-CHAR("Im",                     "I",            8465)
-CHAR("Re",                     "R",            8476)
-CHAR("pd",                     "a",            8706)
-CHAR("-h",                     "/h",           8463)
-CHAR("12",                     "1/2",          189)
-CHAR("14",                     "1/4",          188)
-CHAR("34",                     "3/4",          190)
-
-/* Ligatures. */
-CHAR("ff",                     "ff",           64256)
-CHAR("fi",                     "fi",           64257)
-CHAR("fl",                     "fl",           64258)
-CHAR("Fi",                     "ffi",          64259)
-CHAR("Fl",                     "ffl",          64260)
-CHAR("AE",                     "AE",           198)
-CHAR("ae",                     "ae",           230)
-CHAR("OE",                     "OE",           338)
-CHAR("oe",                     "oe",           339)
-CHAR("ss",                     "ss",           223)
-CHAR("IJ",                     "IJ",           306)
-CHAR("ij",                     "ij",           307)
-
-/* Special letters. */
-CHAR("-D",                     "-\bD",         208)
-CHAR("Sd",                     "d",            240)
-CHAR("TP",                     "Th",           222)
-CHAR("Tp",                     "th",           254)
-CHAR(".i",                     "i",            305)
-CHAR(".j",                     "j",            567)
-
-/* Currency. */
-CHAR("Do",                     "$",            36)
-CHAR("ct",                     "/\bc",         162)
-CHAR("Eu",                     "EUR",          8364)
-CHAR("eu",                     "EUR",          8364)
-CHAR("Ye",                     "=\bY",         165)
-CHAR("Po",                     "GBP",          163)
-CHAR("Cs",                     "o\bx",         164)
-CHAR("Fn",                     ",\bf",         402)
-
-/* Lines. */
-CHAR("ba",                     "|",            124)
-CHAR("br",                     "|",            9474)
-CHAR("ul",                     "_",            95)
-CHAR("rn",                     "-",            8254)
-CHAR("bb",                     "|",            166)
-CHAR("sl",                     "/",            47)
-CHAR("rs",                     "\\",           92)
-
-/* Text markers. */
-CHAR("ci",                     "O",            9675)
-CHAR("bu",                     "+\bo",         8226)
-CHAR("dd",                     "|\b=",         8225)
-CHAR("dg",                     "|\b-",         8224)
-CHAR("lz",                     "<>",           9674)
-CHAR("sq",                     "[]",           9633)
-CHAR("ps",                     "<par>",        182)
-CHAR("sc",                     "<sec>",        167)
-CHAR("lh",                     "<=",           9756)
-CHAR("rh",                     "=>",           9758)
-CHAR("at",                     "@",            64)
-CHAR("sh",                     "#",            35)
-CHAR("CR",                     "_|",           8629)
-CHAR("OK",                     "\\/",          10003)
-
-/* Legal symbols. */
-CHAR("co",                     "(C)",          169)
-CHAR("rg",                     "(R)",          174)
-CHAR("tm",                     "tm",           8482)
-
-/* Punctuation. */
-CHAR(".",                      ".",            46)
-CHAR("r!",                     "!",            161)
-CHAR("r?",                     "?",            191)
-CHAR("em",                     "--",           8212)
-CHAR("en",                     "-",            8211)
-CHAR("hy",                     "-",            8208)
-CHAR("e",                      "\\",           92)
-
-/* Units. */
-CHAR("de",                     "<deg>",        176)
-CHAR("%0",                     "%o",           8240)
-CHAR("fm",                     "\'",           8242)
-CHAR("sd",                     "''",           8243)
-CHAR("mc",                     ",\bu",         181)
-
-CHAR_TBL_END
index 348b43f..3502ea9 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: html.c,v 1.60 2015/10/12 00:14:41 schwarze Exp $ */
+/*     $OpenBSD: html.c,v 1.61 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -128,15 +128,13 @@ static    void     print_attr(struct html *, const char *, const char *);
 
 
 void *
-html_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+html_alloc(const struct manoutput *outopts)
 {
        struct html     *h;
 
        h = mandoc_calloc(1, sizeof(struct html));
 
        h->tags.head = NULL;
-       h->symtab = mchars;
-
        h->style = outopts->style;
        h->base_man = outopts->man;
        h->base_includes = outopts->includes;
@@ -396,7 +394,7 @@ print_encode(struct html *h, const char *p, int norecurse)
                                continue;
                        break;
                case ESCAPE_SPECIAL:
-                       c = mchars_spec2cp(h->symtab, seq, len);
+                       c = mchars_spec2cp(seq, len);
                        if (c <= 0)
                                continue;
                        break;
index 3a340fb..d8b22aa 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: html.h,v 1.32 2014/12/02 10:07:17 schwarze Exp $ */
+/*     $OpenBSD: html.h,v 1.33 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -130,7 +130,6 @@ struct      html {
        struct tagq       tags; /* stack of open tags */
        struct rofftbl    tbl; /* current table */
        struct tag       *tblt; /* current open table scope */
-       const struct mchars *symtab; /* character table */
        char             *base_man; /* base for manpage href */
        char             *base_includes; /* base for include href */
        char             *style; /* style-sheet URI */
index a5bb74b..104d075 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: libmandoc.h,v 1.46 2015/04/19 14:25:05 schwarze Exp $ */
+/*     $OpenBSD: libmandoc.h,v 1.47 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -35,7 +35,6 @@ struct        buf {
 __BEGIN_DECLS
 
 struct mparse;
-struct mchars;
 struct tbl_span;
 struct eqn;
 struct roff;
@@ -65,7 +64,7 @@ int            preconv_encode(struct buf *, size_t *,
                        struct buf *, size_t *, int *);
 
 void            roff_free(struct roff *);
-struct roff    *roff_alloc(struct mparse *, const struct mchars *, int);
+struct roff    *roff_alloc(struct mparse *, int);
 void            roff_reset(struct roff *);
 void            roff_man_free(struct roff_man *);
 struct roff_man        *roff_man_alloc(struct roff *, struct mparse *,
index a74303a..6dab7fa 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: main.c,v 1.154 2015/10/12 22:41:18 schwarze Exp $ */
+/*     $OpenBSD: main.c,v 1.155 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -70,7 +70,6 @@ enum  outt {
 
 struct curparse {
        struct mparse    *mp;
-       struct mchars    *mchars;       /* character table */
        enum mandoclevel  wlevel;       /* ignore messages below this */
        int               wstop;        /* stop after a file with a warning */
        enum outt         outtype;      /* which output to use */
@@ -398,9 +397,8 @@ main(int argc, char *argv[])
        if (search.argmode == ARG_FILE && ! moptions(&options, auxpaths))
                return (int)MANDOCLEVEL_BADARG;
 
-       curp.mchars = mchars_alloc();
-       curp.mp = mparse_alloc(options, curp.wlevel, mmsg,
-           curp.mchars, defos);
+       mchars_alloc();
+       curp.mp = mparse_alloc(options, curp.wlevel, mmsg, defos);
 
        /*
         * Conditionally start up the lookaside buffer before parsing.
@@ -454,7 +452,7 @@ main(int argc, char *argv[])
        if (curp.outfree)
                (*curp.outfree)(curp.outdata);
        mparse_free(curp.mp);
-       mchars_free(curp.mchars);
+       mchars_free();
 
 out:
        if (search.argmode != ARG_FILE) {
@@ -634,33 +632,27 @@ parse(struct curparse *curp, int fd, const char *file)
        if ( ! (curp->outman && curp->outmdoc)) {
                switch (curp->outtype) {
                case OUTT_HTML:
-                       curp->outdata = html_alloc(curp->mchars,
-                           curp->outopts);
+                       curp->outdata = html_alloc(curp->outopts);
                        curp->outfree = html_free;
                        break;
                case OUTT_UTF8:
-                       curp->outdata = utf8_alloc(curp->mchars,
-                           curp->outopts);
+                       curp->outdata = utf8_alloc(curp->outopts);
                        curp->outfree = ascii_free;
                        break;
                case OUTT_LOCALE:
-                       curp->outdata = locale_alloc(curp->mchars,
-                           curp->outopts);
+                       curp->outdata = locale_alloc(curp->outopts);
                        curp->outfree = ascii_free;
                        break;
                case OUTT_ASCII:
-                       curp->outdata = ascii_alloc(curp->mchars,
-                           curp->outopts);
+                       curp->outdata = ascii_alloc(curp->outopts);
                        curp->outfree = ascii_free;
                        break;
                case OUTT_PDF:
-                       curp->outdata = pdf_alloc(curp->mchars,
-                           curp->outopts);
+                       curp->outdata = pdf_alloc(curp->outopts);
                        curp->outfree = pspdf_free;
                        break;
                case OUTT_PS:
-                       curp->outdata = ps_alloc(curp->mchars,
-                           curp->outopts);
+                       curp->outdata = ps_alloc(curp->outopts);
                        curp->outfree = pspdf_free;
                        break;
                default:
index 3c9cb56..8685ef3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: main.h,v 1.17 2015/04/18 16:04:40 schwarze Exp $ */
+/*     $OpenBSD: main.h,v 1.18 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -20,7 +20,6 @@
 
 __BEGIN_DECLS
 
-struct mchars;
 struct roff_man;
 struct manoutput;
 
@@ -31,8 +30,7 @@ struct        manoutput;
  * terminal output routines with different character settings.
  */
 
-void            *html_alloc(const struct mchars *,
-                       const struct manoutput *);
+void            *html_alloc(const struct manoutput *);
 void             html_mdoc(void *, const struct roff_man *);
 void             html_man(void *, const struct roff_man *);
 void             html_free(void *);
@@ -43,19 +41,14 @@ void                  tree_man(void *, const struct roff_man *);
 void             man_mdoc(void *, const struct roff_man *);
 void             man_man(void *, const struct roff_man *);
 
-void            *locale_alloc(const struct mchars *,
-                       const struct manoutput *);
-void            *utf8_alloc(const struct mchars *,
-                       const struct manoutput *);
-void            *ascii_alloc(const struct mchars *,
-                       const struct manoutput *);
+void            *locale_alloc(const struct manoutput *);
+void            *utf8_alloc(const struct manoutput *);
+void            *ascii_alloc(const struct manoutput *);
 void             ascii_free(void *);
 void             ascii_sepline(void *);
 
-void            *pdf_alloc(const struct mchars *,
-                       const struct manoutput *);
-void            *ps_alloc(const struct mchars *,
-                       const struct manoutput *);
+void            *pdf_alloc(const struct manoutput *);
+void            *ps_alloc(const struct manoutput *);
 void             pspdf_free(void *);
 
 void             terminal_mdoc(void *, const struct roff_man *);
index fc01785..25490fd 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: mandoc.h,v 1.147 2015/09/14 15:35:47 schwarze Exp $ */
+/*     $OpenBSD: mandoc.h,v 1.148 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -410,21 +410,17 @@ typedef   void    (*mandocmsg)(enum mandocerr, enum mandoclevel,
 __BEGIN_DECLS
 
 struct mparse;
-struct mchars;
 struct roff_man;
 
 enum mandoc_esc          mandoc_escape(const char **, const char **, int *);
-struct mchars   *mchars_alloc(void);
-void             mchars_free(struct mchars *);
+void             mchars_alloc(void);
+void             mchars_free(void);
 int              mchars_num2char(const char *, size_t);
 const char      *mchars_uc2str(int);
 int              mchars_num2uc(const char *, size_t);
-int              mchars_spec2cp(const struct mchars *,
-                       const char *, size_t);
-const char      *mchars_spec2str(const struct mchars *,
-                       const char *, size_t, size_t *);
-struct mparse   *mparse_alloc(int, enum mandoclevel, mandocmsg,
-                       const struct mchars *, const char *);
+int              mchars_spec2cp(const char *, size_t);
+const char      *mchars_spec2str(const char *, size_t, size_t *);
+struct mparse   *mparse_alloc(int, enum mandoclevel, mandocmsg, const char *);
 void             mparse_free(struct mparse *);
 void             mparse_keep(struct mparse *);
 enum mandoclevel  mparse_open(struct mparse *, int *, const char *);
index 9c0bd93..a1a4af7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: mandocdb.c,v 1.156 2015/10/13 15:50:15 schwarze Exp $ */
+/*     $OpenBSD: mandocdb.c,v 1.157 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -189,7 +189,6 @@ static      int              write_utf8; /* write UTF-8 output; else ASCII */
 static int              exitcode; /* to be returned by main */
 static enum op          op; /* operational mode */
 static char             basedir[PATH_MAX]; /* current base directory */
-static struct mchars   *mchars; /* table of named characters */
 static struct ohash     mpages; /* table of distinct manual pages */
 static struct ohash     mlinks; /* table of directory entries */
 static struct ohash     names; /* table of all names */
@@ -423,9 +422,8 @@ mandocdb(int argc, char *argv[])
        }
 
        exitcode = (int)MANDOCLEVEL_OK;
-       mchars = mchars_alloc();
-       mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL,
-           mchars, NULL);
+       mchars_alloc();
+       mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, NULL);
        mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
        mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
 
@@ -532,7 +530,7 @@ mandocdb(int argc, char *argv[])
 out:
        manconf_free(&conf);
        mparse_free(mp);
-       mchars_free(mchars);
+       mchars_free();
        mpages_free();
        ohash_delete(&mpages);
        ohash_delete(&mlinks);
@@ -1930,7 +1928,7 @@ render_string(char **public, size_t *psz)
                 */
 
                if (write_utf8) {
-                       unicode = mchars_spec2cp(mchars, seq, seqlen);
+                       unicode = mchars_spec2cp(seq, seqlen);
                        if (unicode <= 0)
                                continue;
                        addsz = utf8(unicode, utfbuf);
@@ -1938,7 +1936,7 @@ render_string(char **public, size_t *psz)
                                continue;
                        addcp = utfbuf;
                } else {
-                       addcp = mchars_spec2str(mchars, seq, seqlen, &addsz);
+                       addcp = mchars_spec2str(seq, seqlen, &addsz);
                        if (addcp == NULL)
                                continue;
                        if (*addcp == ASCII_NBRSP) {
index 4f5f722..830c73e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: read.c,v 1.118 2015/10/11 21:06:59 schwarze Exp $ */
+/*     $OpenBSD: read.c,v 1.119 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -45,7 +45,6 @@
 struct mparse {
        struct roff_man  *man; /* man parser */
        struct roff      *roff; /* roff parser (!NULL) */
-       const struct mchars *mchars; /* character table */
        char             *sodest; /* filename pointed to by .so */
        const char       *file; /* filename of current input file */
        struct buf       *primary; /* buffer currently being parsed */
@@ -792,7 +791,7 @@ mparse_open(struct mparse *curp, int *fd, const char *file)
 
 struct mparse *
 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
-    const struct mchars *mchars, const char *defos)
+    const char *defos)
 {
        struct mparse   *curp;
 
@@ -803,8 +802,7 @@ mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
        curp->mmsg = mmsg;
        curp->defos = defos;
 
-       curp->mchars = mchars;
-       curp->roff = roff_alloc(curp, curp->mchars, options);
+       curp->roff = roff_alloc(curp, options);
        curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
                curp->options & MPARSE_QUICK ? 1 : 0);
        if (curp->options & MPARSE_MDOC) {
index a991461..a32d474 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: roff.c,v 1.150 2015/10/12 00:07:27 schwarze Exp $ */
+/*     $OpenBSD: roff.c,v 1.151 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -314,7 +314,6 @@ struct      roffreg {
 
 struct roff {
        struct mparse   *parse; /* parse point */
-       const struct mchars *mchars; /* character table */
        struct roffnode *last; /* leaf of stack */
        int             *rstack; /* stack of inverted `ie' values */
        struct roffreg  *regtab; /* number registers */
@@ -899,13 +898,12 @@ roff_free(struct roff *r)
 }
 
 struct roff *
-roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
+roff_alloc(struct mparse *parse, int options)
 {
        struct roff     *r;
 
        r = mandoc_calloc(1, sizeof(struct roff));
        r->parse = parse;
-       r->mchars = mchars;
        r->options = options;
        r->format = options & (MPARSE_MDOC | MPARSE_MAN);
        r->rstackpos = -1;
@@ -1342,7 +1340,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                        esc = mandoc_escape(&cp, &stnam, &inaml);
                        if (esc == ESCAPE_ERROR ||
                            (esc == ESCAPE_SPECIAL &&
-                            mchars_spec2cp(r->mchars, stnam, inaml) < 0))
+                            mchars_spec2cp(stnam, inaml) < 0))
                                mandoc_vmsg(MANDOCERR_ESC_BAD,
                                    r->parse, ln, (int)(stesc - buf->buf),
                                    "%.*s", (int)(cp - stesc), stesc);
index b454148..3cb7cdf 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: term.c,v 1.113 2015/10/12 00:07:27 schwarze Exp $ */
+/*     $OpenBSD: term.c,v 1.114 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -452,12 +452,11 @@ term_word(struct termp *p, const char *word)
                        break;
                case ESCAPE_SPECIAL:
                        if (p->enc == TERMENC_ASCII) {
-                               cp = mchars_spec2str(p->symtab,
-                                   seq, sz, &ssz);
+                               cp = mchars_spec2str(seq, sz, &ssz);
                                if (cp != NULL)
                                        encode(p, cp, ssz);
                        } else {
-                               uc = mchars_spec2cp(p->symtab, seq, sz);
+                               uc = mchars_spec2cp(seq, sz);
                                if (uc > 0)
                                        encode1(p, uc);
                        }
@@ -698,13 +697,11 @@ term_strlen(const struct termp *p, const char *cp)
                                break;
                        case ESCAPE_SPECIAL:
                                if (p->enc == TERMENC_ASCII) {
-                                       rhs = mchars_spec2str(p->symtab,
-                                           seq, ssz, &rsz);
+                                       rhs = mchars_spec2str(seq, ssz, &rsz);
                                        if (rhs != NULL)
                                                break;
                                } else {
-                                       uc = mchars_spec2cp(p->symtab,
-                                           seq, ssz);
+                                       uc = mchars_spec2cp(seq, ssz);
                                        if (uc > 0)
                                                sz += cond_width(p, uc, &skip);
                                }
index 7e59a3b..e2f3add 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: term.h,v 1.60 2015/09/21 13:24:32 schwarze Exp $ */
+/*     $OpenBSD: term.h,v 1.61 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -86,7 +86,6 @@ struct        termp {
 #define        TERMP_NONEWLINE  (1 << 15)      /* No line break in nofill mode. */
        int              *buf;          /* Output buffer. */
        enum termenc      enc;          /* Type of encoding. */
-       const struct mchars *symtab;    /* Character table. */
        enum termfont     fontl;        /* Last font set. */
        enum termfont    *fontq;        /* Symmetric fonts. */
        int               fontsz;       /* Allocated size of font stack */
index 03fcb92..3fcc825 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: term_ascii.c,v 1.36 2015/10/12 00:07:27 schwarze Exp $ */
+/*     $OpenBSD: term_ascii.c,v 1.37 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -32,8 +32,7 @@
 #include "manconf.h"
 #include "main.h"
 
-static struct termp     *ascii_init(enum termenc, const struct mchars *,
-                               const struct manoutput *);
+static struct termp     *ascii_init(enum termenc, const struct manoutput *);
 static int               ascii_hspan(const struct termp *,
                                const struct roffsu *);
 static size_t            ascii_width(const struct termp *, int);
@@ -51,15 +50,13 @@ static      size_t            locale_width(const struct termp *, int);
 
 
 static struct termp *
-ascii_init(enum termenc enc, const struct mchars *mchars,
-       const struct manoutput *outopts)
+ascii_init(enum termenc enc, const struct manoutput *outopts)
 {
        char            *v;
        struct termp    *p;
 
        p = mandoc_calloc(1, sizeof(struct termp));
 
-       p->symtab = mchars;
        p->line = 1;
        p->tabwidth = 5;
        p->defrmargin = p->lastrmargin = 78;
@@ -107,24 +104,24 @@ ascii_init(enum termenc enc, const struct mchars *mchars,
 }
 
 void *
-ascii_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+ascii_alloc(const struct manoutput *outopts)
 {
 
-       return ascii_init(TERMENC_ASCII, mchars, outopts);
+       return ascii_init(TERMENC_ASCII, outopts);
 }
 
 void *
-utf8_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+utf8_alloc(const struct manoutput *outopts)
 {
 
-       return ascii_init(TERMENC_UTF8, mchars, outopts);
+       return ascii_init(TERMENC_UTF8, outopts);
 }
 
 void *
-locale_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+locale_alloc(const struct manoutput *outopts)
 {
 
-       return ascii_init(TERMENC_LOCALE, mchars, outopts);
+       return ascii_init(TERMENC_LOCALE, outopts);
 }
 
 static void
index 09c6610..ed7916e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: term_ps.c,v 1.42 2015/10/12 00:07:27 schwarze Exp $ */
+/*     $OpenBSD: term_ps.c,v 1.43 2015/10/13 22:57:49 schwarze Exp $ */
 /*
  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -103,8 +103,7 @@ static      void              ps_printf(struct termp *, const char *, ...);
 static void              ps_putchar(struct termp *, char);
 static void              ps_setfont(struct termp *, enum termfont);
 static void              ps_setwidth(struct termp *, int, int);
-static struct termp     *pspdf_alloc(const struct mchars *,
-                               const struct manoutput *);
+static struct termp     *pspdf_alloc(const struct manoutput *);
 static void              pdf_obj(struct termp *, size_t);
 
 /*
@@ -505,29 +504,29 @@ static    const struct font fonts[TERMFONT__MAX] = {
 };
 
 void *
-pdf_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+pdf_alloc(const struct manoutput *outopts)
 {
        struct termp    *p;
 
-       if (NULL != (p = pspdf_alloc(mchars, outopts)))
+       if (NULL != (p = pspdf_alloc(outopts)))
                p->type = TERMTYPE_PDF;
 
        return p;
 }
 
 void *
-ps_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+ps_alloc(const struct manoutput *outopts)
 {
        struct termp    *p;
 
-       if (NULL != (p = pspdf_alloc(mchars, outopts)))
+       if (NULL != (p = pspdf_alloc(outopts)))
                p->type = TERMTYPE_PS;
 
        return p;
 }
 
 static struct termp *
-pspdf_alloc(const struct mchars *mchars, const struct manoutput *outopts)
+pspdf_alloc(const struct manoutput *outopts)
 {
        struct termp    *p;
        unsigned int     pagex, pagey;
@@ -535,7 +534,6 @@ pspdf_alloc(const struct mchars *mchars, const struct manoutput *outopts)
        const char      *pp;
 
        p = mandoc_calloc(1, sizeof(struct termp));
-       p->symtab = mchars;
        p->enc = TERMENC_ASCII;
        p->fontq = mandoc_reallocarray(NULL,
            (p->fontsz = 8), sizeof(enum termfont));