From bdf8ed0fca3f7b8218aa0cc77004b335958dc97a Mon Sep 17 00:00:00 2001 From: schwarze Date: Mon, 26 Jan 2015 00:54:09 +0000 Subject: [PATCH] Improve (or rather, rewrite) tbl(7) option parsing. * Allow the layout to start after the semicolon on the options line. * Ignore leading commas. * Option arguments cannot contain closing parentheses. * Avoid needless UNSUPP messages. * Better ERROR reporting. * Delete unused "linesize" field in struct tbl_opts. * No need for static buffers. * Garbage collect one almost empty wrapper function. Improved functionality, but minus 40 lines of code. --- regress/usr.bin/mandoc/tbl/Makefile | 6 +- regress/usr.bin/mandoc/tbl/opt.in | 19 ++ regress/usr.bin/mandoc/tbl/opt.out_ascii | 24 +++ regress/usr.bin/mandoc/tbl/opt.out_lint | 4 + usr.bin/mandoc/mandoc.h | 9 +- usr.bin/mandoc/read.c | 8 +- usr.bin/mandoc/tbl.c | 51 +++-- usr.bin/mandoc/tbl_opts.c | 231 ++++++++--------------- 8 files changed, 169 insertions(+), 183 deletions(-) create mode 100644 regress/usr.bin/mandoc/tbl/opt.in create mode 100644 regress/usr.bin/mandoc/tbl/opt.out_ascii create mode 100644 regress/usr.bin/mandoc/tbl/opt.out_lint diff --git a/regress/usr.bin/mandoc/tbl/Makefile b/regress/usr.bin/mandoc/tbl/Makefile index 270fcf9c0bb..83d48a226cb 100644 --- a/regress/usr.bin/mandoc/tbl/Makefile +++ b/regress/usr.bin/mandoc/tbl/Makefile @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile,v 1.7 2015/01/21 00:45:16 schwarze Exp $ +# $OpenBSD: Makefile,v 1.8 2015/01/26 00:54:09 schwarze Exp $ REGRESS_TARGETS = blankline center fonts macro misalign -REGRESS_TARGETS += nested numbers span vert -LINT_TARGETS = macro nested +REGRESS_TARGETS += nested numbers opt span vert +LINT_TARGETS = macro nested opt # groff-1.22.3 defect: # - When space is insufficient (on either side) for properly aligning diff --git a/regress/usr.bin/mandoc/tbl/opt.in b/regress/usr.bin/mandoc/tbl/opt.in new file mode 100644 index 00000000000..e21a9b687ef --- /dev/null +++ b/regress/usr.bin/mandoc/tbl/opt.in @@ -0,0 +1,19 @@ +.TH TBL-OPT 1 "January 25, 2015" OpenBSD +.SH NAME +tbl-opt \- table options +.SH DESCRIPTION +normal text +.TS +tab decimalpoint (,x) %foo box; +n n . +10.0 0.01 +0.01 10.0 +.TE +.PP +normal text +.TS + , box,tab(:); l l . +a:b +.TE +.PP +normal text diff --git a/regress/usr.bin/mandoc/tbl/opt.out_ascii b/regress/usr.bin/mandoc/tbl/opt.out_ascii new file mode 100644 index 00000000000..320b8023c0e --- /dev/null +++ b/regress/usr.bin/mandoc/tbl/opt.out_ascii @@ -0,0 +1,24 @@ +TBL-OPT(1) General Commands Manual TBL-OPT(1) + + + +NNAAMMEE + tbl-opt - table options + +DDEESSCCRRIIPPTTIIOONN + normal text + + +--------------+ + |10.0 0.01 | + | 0.01 10.0 | + +--------------+ + normal text + + +------+ + |a b | + +------+ + normal text + + + +OpenBSD January 25, 2015 TBL-OPT(1) diff --git a/regress/usr.bin/mandoc/tbl/opt.out_lint b/regress/usr.bin/mandoc/tbl/opt.out_lint new file mode 100644 index 00000000000..85ba35cf7c4 --- /dev/null +++ b/regress/usr.bin/mandoc/tbl/opt.out_lint @@ -0,0 +1,4 @@ +mandoc: opt.in:7:5: ERROR: missing tbl option argument: tab +mandoc: opt.in:7:19: ERROR: wrong tbl option argument size: decimalpoint want 1 have 2 +mandoc: opt.in:7:23: ERROR: non-alphabetic character in tbl options: % +mandoc: opt.in:7:24: ERROR: skipping unknown tbl option: foo diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h index 91696fdd60a..23ccbfa1301 100644 --- a/usr.bin/mandoc/mandoc.h +++ b/usr.bin/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mandoc.h,v 1.128 2015/01/24 01:59:40 schwarze Exp $ */ +/* $OpenBSD: mandoc.h,v 1.129 2015/01/26 00:54:09 schwarze Exp $ */ /* * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons * Copyright (c) 2010-2015 Ingo Schwarze @@ -136,6 +136,10 @@ enum mandocerr { MANDOCERR_EQNEOF, /* unexpected end of equation */ /* related to tables */ + MANDOCERR_TBLOPT_ALPHA, /* non-alphabetic character in tbl options */ + MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */ + MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */ + MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */ MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */ MANDOCERR_TBLNODATA, /* no table data cells specified */ MANDOCERR_TBLIGNDATA, /* ignore data in cell */ @@ -175,8 +179,6 @@ enum mandocerr { MANDOCERR_TOOLARGE, /* input too large */ MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */ MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */ - MANDOCERR_TBL, /* unsupported table syntax */ - MANDOCERR_TBLOPT, /* unsupported table option */ MANDOCERR_TBLLAYOUT, /* unsupported table layout */ MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */ @@ -186,7 +188,6 @@ enum mandocerr { struct tbl_opts { char tab; /* cell-separator */ char decimal; /* decimal point */ - int linesize; int opts; #define TBL_OPT_CENTRE (1 << 0) #define TBL_OPT_EXPAND (1 << 1) diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c index 7f8c4c62b0a..f2b4d96d28d 100644 --- a/usr.bin/mandoc/read.c +++ b/usr.bin/mandoc/read.c @@ -1,4 +1,4 @@ -/* $OpenBSD: read.c,v 1.90 2015/01/24 01:59:40 schwarze Exp $ */ +/* $OpenBSD: read.c,v 1.91 2015/01/26 00:54:09 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010-2015 Ingo Schwarze @@ -175,6 +175,10 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "unexpected end of equation", /* related to tables */ + "non-alphabetic character in tbl options", + "skipping unknown tbl option", + "missing tbl option argument", + "wrong tbl option argument size", "no table layout cells specified", "no table data cells specified", "ignore data in cell", @@ -213,8 +217,6 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "input too large", "unsupported control character", "unsupported roff request", - "unsupported table syntax", - "unsupported table option", "unsupported table layout", "ignoring macro in table", }; diff --git a/usr.bin/mandoc/tbl.c b/usr.bin/mandoc/tbl.c index ab6667c0dff..16b4a401189 100644 --- a/usr.bin/mandoc/tbl.c +++ b/usr.bin/mandoc/tbl.c @@ -1,7 +1,7 @@ -/* $OpenBSD: tbl.c,v 1.13 2015/01/21 00:45:16 schwarze Exp $ */ +/* $OpenBSD: tbl.c,v 1.14 2015/01/26 00:54:09 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2011 Ingo Schwarze + * Copyright (c) 2011, 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -32,29 +32,45 @@ enum rofferr tbl_read(struct tbl_node *tbl, int ln, const char *p, int offs) { - int len; const char *cp; - - cp = &p[offs]; - len = (int)strlen(cp); + int active; /* - * If we're in the options section and we don't have a - * terminating semicolon, assume we've moved directly into the - * layout section. No need to report a warning: this is, - * apparently, standard behaviour. + * In the options section, proceed to the layout section + * after a semicolon, or right away if there is no semicolon. + * Ignore semicolons in arguments. */ - if (TBL_PART_OPTS == tbl->part && len) - if (';' != cp[len - 1]) - tbl->part = TBL_PART_LAYOUT; + if (tbl->part == TBL_PART_OPTS) { + tbl->part = TBL_PART_LAYOUT; + active = 1; + for (cp = p; *cp != '\0'; cp++) { + switch (*cp) { + case '(': + active = 0; + continue; + case ')': + active = 1; + continue; + case ';': + if (active) + break; + continue; + default: + continue; + } + break; + } + if (*cp == ';') { + tbl_option(tbl, ln, p); + if (*(p = cp + 1) == '\0') + return(ROFF_IGN); + } + } - /* Now process each logical section of the table. */ + /* Process the other section types. */ switch (tbl->part) { - case TBL_PART_OPTS: - tbl_option(tbl, ln, p); - return(ROFF_IGN); case TBL_PART_LAYOUT: tbl_layout(tbl, ln, p); return(ROFF_IGN); @@ -79,7 +95,6 @@ tbl_alloc(int pos, int line, struct mparse *parse) tbl->parse = parse; tbl->part = TBL_PART_OPTS; tbl->opts.tab = '\t'; - tbl->opts.linesize = 12; tbl->opts.decimal = '.'; return(tbl); } diff --git a/usr.bin/mandoc/tbl_opts.c b/usr.bin/mandoc/tbl_opts.c index 04fb25aac76..1593e7add36 100644 --- a/usr.bin/mandoc/tbl_opts.c +++ b/usr.bin/mandoc/tbl_opts.c @@ -1,6 +1,7 @@ -/* $OpenBSD: tbl_opts.c,v 1.8 2015/01/14 22:44:51 schwarze Exp $ */ +/* $OpenBSD: tbl_opts.c,v 1.9 2015/01/26 00:54:09 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons + * Copyright (c) 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -51,12 +52,6 @@ struct tbl_phrase { /* Handle Commonwealth/American spellings. */ #define KEY_MAXKEYS 14 -/* Maximum length of key name string. */ -#define KEY_MAXNAME 13 - -/* Maximum length of key number size. */ -#define KEY_MAXNUMSZ 10 - static const struct tbl_phrase keys[KEY_MAXKEYS] = { { "center", TBL_OPT_CENTRE, KEY_CENTRE}, { "centre", TBL_OPT_CENTRE, KEY_CENTRE}, @@ -74,193 +69,119 @@ static const struct tbl_phrase keys[KEY_MAXKEYS] = { { "nospaces", TBL_OPT_NOSPACE, KEY_NOSPACE}, }; -static int arg(struct tbl_node *, int, +static void arg(struct tbl_node *, int, const char *, int *, enum tbl_ident); -static void opt(struct tbl_node *, int, - const char *, int *); -static int +static void arg(struct tbl_node *tbl, int ln, const char *p, int *pos, enum tbl_ident key) { - int i; - char buf[KEY_MAXNUMSZ]; + const char *optname; + int len, want; while (isspace((unsigned char)p[*pos])) (*pos)++; - /* Arguments always begin with a parenthesis. */ + /* Arguments are enclosed in parentheses. */ - if ('(' != p[*pos]) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos, NULL); - return(0); + len = 0; + if (p[*pos] == '(') { + (*pos)++; + while (p[*pos + len] != ')') + len++; } - (*pos)++; - - /* - * The arguments can be ANY value, so we can't just stop at the - * next close parenthesis (the argument can be a closed - * parenthesis itself). - */ - switch (key) { case KEY_DELIM: - if ('\0' == p[(*pos)++]) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); - } - - if ('\0' == p[(*pos)++]) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); - } + optname = "delim"; + want = 2; break; case KEY_TAB: - if ('\0' != (tbl->opts.tab = p[(*pos)++])) - break; - - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); + optname = "tab"; + want = 1; + if (len == want) + tbl->opts.tab = p[*pos]; + break; case KEY_LINESIZE: - for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) { - buf[i] = p[*pos]; - if ( ! isdigit((unsigned char)buf[i])) - break; - } - - if (i < KEY_MAXNUMSZ) { - buf[i] = '\0'; - tbl->opts.linesize = atoi(buf); - break; - } - - mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL); - return(0); + optname = "linesize"; + want = 0; + break; case KEY_DPOINT: - if ('\0' != (tbl->opts.decimal = p[(*pos)++])) - break; - - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); + optname = "decimalpoint"; + want = 1; + if (len == want) + tbl->opts.decimal = p[*pos]; + break; default: abort(); /* NOTREACHED */ } - /* End with a close parenthesis. */ + if (len == 0) + mandoc_msg(MANDOCERR_TBLOPT_NOARG, + tbl->parse, ln, *pos, optname); + else if (want && len != want) + mandoc_vmsg(MANDOCERR_TBLOPT_ARGSZ, + tbl->parse, ln, *pos, + "%s want %d have %d", optname, want, len); - if (')' == p[(*pos)++]) - return(1); - - mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos - 1, NULL); - return(0); + *pos += len; + if (p[*pos] == ')') + (*pos)++; } -static void -opt(struct tbl_node *tbl, int ln, const char *p, int *pos) +/* + * Parse one line of options up to the semicolon. + * Each option can be preceded by blanks and/or commas, + * and some options are followed by arguments. + */ +void +tbl_option(struct tbl_node *tbl, int ln, const char *p) { - int i, sv; - char buf[KEY_MAXNAME]; - - /* - * Parse individual options from the stream as surrounded by - * this goto. Each pass through the routine parses out a single - * option and registers it. Option arguments are processed in - * the arg() function. - */ - -again: /* - * EBNF describing this section: - * - * options ::= option_list [:space:]* [;][\n] - * option_list ::= option option_tail - * option_tail ::= [,:space:]+ option_list | - * ::= epsilon - * option ::= [:alpha:]+ args - * args ::= [:space:]* [(] [:alpha:]+ [)] - */ + int i, pos, len; - while (isspace((unsigned char)p[*pos])) - (*pos)++; - - /* Safe exit point. */ - - if (';' == p[*pos]) - return; - - /* Copy up to first non-alpha character. */ + pos = 0; + for (;;) { + while (isspace((unsigned char)p[pos]) || p[pos] == ',') + pos++; - for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) { - buf[i] = (char)tolower((unsigned char)p[*pos]); - if ( ! isalpha((unsigned char)buf[i])) - break; - } + if (p[pos] == ';') + return; - /* Exit if buffer is empty (or overrun). */ + /* Parse one option name. */ - if (KEY_MAXNAME == i || 0 == i) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL); - return; - } + len = 0; + while (isalpha((unsigned char)p[pos + len])) + len++; - buf[i] = '\0'; + if (len == 0) { + mandoc_vmsg(MANDOCERR_TBLOPT_ALPHA, + tbl->parse, ln, pos, "%c", p[pos]); + pos++; + continue; + } - while (isspace((unsigned char)p[*pos]) || p[*pos] == ',') - (*pos)++; + /* Look up the option name. */ - /* - * Look through all of the available keys to find one that - * matches the input. FIXME: hashtable this. - */ + i = 0; + while (i < KEY_MAXKEYS && + (strncasecmp(p + pos, keys[i].name, len) || + keys[i].name[len] != '\0')) + i++; - for (i = 0; i < KEY_MAXKEYS; i++) { - if (strcmp(buf, keys[i].name)) + if (i == KEY_MAXKEYS) { + mandoc_vmsg(MANDOCERR_TBLOPT_BAD, tbl->parse, + ln, pos, "%.*s", len, p + pos); + pos += len; continue; + } - /* - * Note: this is more difficult to recover from, as we - * can be anywhere in the option sequence and it's - * harder to jump to the next. Meanwhile, just bail out - * of the sequence altogether. - */ + /* Handle the option. */ + pos += len; if (keys[i].key) tbl->opts.opts |= keys[i].key; - else if ( ! arg(tbl, ln, p, pos, keys[i].ident)) - return; - - break; + else + arg(tbl, ln, p, &pos, keys[i].ident); } - - /* - * Allow us to recover from bad options by continuing to another - * parse sequence. - */ - - if (KEY_MAXKEYS == i) - mandoc_msg(MANDOCERR_TBLOPT, tbl->parse, ln, sv, NULL); - - goto again; - /* NOTREACHED */ -} - -void -tbl_option(struct tbl_node *tbl, int ln, const char *p) -{ - int pos; - - /* - * Table options are always on just one line, so automatically - * switch into the next input mode here. - */ - tbl->part = TBL_PART_LAYOUT; - - pos = 0; - opt(tbl, ln, p, &pos); } -- 2.20.1