From bba1fa43bc398b49ffddf2d2df307b9dfb899a4c Mon Sep 17 00:00:00 2001 From: schwarze Date: Mon, 26 Jan 2015 18:41:45 +0000 Subject: [PATCH] Rework tbl(7) layout parsing: * Continue parsing even if part of the input is invalid. * Do not require whitespace between cell specifications. * Allow tabs as well as blanks between modifiers. * Mark the 'm' modifier as unsupported. * Parse and ignore the 'p' and 'v' modifiers. * Better warning and error messages. * Get rid of a static buffer. Improved functionality but minus 50 lines of code. --- regress/usr.bin/mandoc/tbl/Makefile | 6 +- regress/usr.bin/mandoc/tbl/layout.in | 22 ++ regress/usr.bin/mandoc/tbl/layout.out_ascii | 25 ++ regress/usr.bin/mandoc/tbl/layout.out_lint | 5 + share/man/man7/tbl.7 | 35 ++- usr.bin/mandoc/mandoc.h | 13 +- usr.bin/mandoc/read.c | 13 +- usr.bin/mandoc/tbl_layout.c | 238 +++++++------------- 8 files changed, 183 insertions(+), 174 deletions(-) create mode 100644 regress/usr.bin/mandoc/tbl/layout.in create mode 100644 regress/usr.bin/mandoc/tbl/layout.out_ascii create mode 100644 regress/usr.bin/mandoc/tbl/layout.out_lint diff --git a/regress/usr.bin/mandoc/tbl/Makefile b/regress/usr.bin/mandoc/tbl/Makefile index 83d48a226cb..02d0da2b96d 100644 --- a/regress/usr.bin/mandoc/tbl/Makefile +++ b/regress/usr.bin/mandoc/tbl/Makefile @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile,v 1.8 2015/01/26 00:54:09 schwarze Exp $ +# $OpenBSD: Makefile,v 1.9 2015/01/26 18:41:45 schwarze Exp $ -REGRESS_TARGETS = blankline center fonts macro misalign +REGRESS_TARGETS = blankline center fonts layout macro misalign REGRESS_TARGETS += nested numbers opt span vert -LINT_TARGETS = macro nested opt +LINT_TARGETS = layout macro nested opt # groff-1.22.3 defect: # - When space is insufficient (on either side) for properly aligning diff --git a/regress/usr.bin/mandoc/tbl/layout.in b/regress/usr.bin/mandoc/tbl/layout.in new file mode 100644 index 00000000000..3e505bc34d6 --- /dev/null +++ b/regress/usr.bin/mandoc/tbl/layout.in @@ -0,0 +1,22 @@ +.TH TBL-LAYOUT 1 "January 26, 2015" OpenBSD +.SH NAME +tbl-layout \- table layout +.SH DESCRIPTION +normal text +.TS +box tab(:); +lp-1|l bsil|||l,l|l ilb^i|||l. +a:b:c:d +e:f:g:h:i +.TE +.PP +normal text +.TS +box tab(:); +l ^ +l l. +a:b +c:d +.TE +.PP +normal text diff --git a/regress/usr.bin/mandoc/tbl/layout.out_ascii b/regress/usr.bin/mandoc/tbl/layout.out_ascii new file mode 100644 index 00000000000..8aaaf49a894 --- /dev/null +++ b/regress/usr.bin/mandoc/tbl/layout.out_ascii @@ -0,0 +1,25 @@ +TBL-LAYOUT(1) General Commands Manual TBL-LAYOUT(1) + + + +NNAAMMEE + tbl-layout - table layout + +DDEESSCCRRIIPPTTIIOONN + normal text + + +--+-----------++--+ + |a | bb c ||d | + |e | _f gg ||i | + +--+-----------++--+ + normal text + + +------+ + |a | + |c d | + +------+ + normal text + + + +OpenBSD January 26, 2015 TBL-LAYOUT(1) diff --git a/regress/usr.bin/mandoc/tbl/layout.out_lint b/regress/usr.bin/mandoc/tbl/layout.out_lint new file mode 100644 index 00000000000..855c2c26968 --- /dev/null +++ b/regress/usr.bin/mandoc/tbl/layout.out_lint @@ -0,0 +1,5 @@ +mandoc: layout.in:8:14: WARNING: skipping vertical bar in tbl layout +mandoc: layout.in:8:28: WARNING: skipping vertical bar in tbl layout +mandoc: layout.in:10:7: ERROR: ignore data in cell +mandoc: layout.in:16:3: WARNING: tbl column starts with span +mandoc: layout.in:18:3: ERROR: ignore data in cell diff --git a/share/man/man7/tbl.7 b/share/man/man7/tbl.7 index e911d69dccd..19a10015851 100644 --- a/share/man/man7/tbl.7 +++ b/share/man/man7/tbl.7 @@ -1,4 +1,4 @@ -.\" $OpenBSD: tbl.7,v 1.11 2015/01/26 13:02:53 schwarze Exp $ +.\" $OpenBSD: tbl.7,v 1.12 2015/01/26 18:41:45 schwarze Exp $ .\" .\" Copyright (c) 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2014, 2015 Ingo Schwarze @@ -251,6 +251,9 @@ The following case-insensitive modifier keys are available: .Bl -tag -width 2n .It Cm b Use a bold font for the contents of this column. +.It Cm d +Move cell content down to the last cell of a vertical span. +Currently ignored. .It Cm e Make this column wider to match the maximum width of any other column also having the @@ -263,6 +266,27 @@ See the manual for supported one-character font names. .It Cm i Use an italic font for the contents of this column. +.It Cm m +Specify a cell start macro. +This is a GNU extension and currently unsupported. +.It Cm p +Set the point size to the following unsigned argument, +or change it by the following signed argument. +Currently ignored. +.It Cm v +Set the vertical line spacing to the following unsigned argument, +or change it by the following signed argument. +Currently ignored. +.It Cm t +Do not vertically center cell content in the vertical span, +leave it at the top. +Currently ignored. +.It Cm u +Move cell content up by half a table line. +Currently ignored. +.It Cm w +Specify minimum column width. +Currently ignored. .It Cm x After determining the width of all other columns, distribute the rest of the line length among all columns having the @@ -272,15 +296,6 @@ modifier. Do not use this cell for determining the width of this column. .El .Pp -The modifiers -.Cm d , -.Cm t , -.Cm u , -and -.Cm w -are ignored by -.Xr mandoc 1 . -.Pp For example, the following layout specifies a center-justified column of minimum width 10, followed by vertical bar, followed by a left-justified column of minimum width 10, another vertical bar, then a column using diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h index 639dd6ac36e..53f93f202c4 100644 --- a/usr.bin/mandoc/mandoc.h +++ b/usr.bin/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mandoc.h,v 1.130 2015/01/26 13:02:53 schwarze Exp $ */ +/* $OpenBSD: mandoc.h,v 1.131 2015/01/26 18:41:45 schwarze Exp $ */ /* * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons * Copyright (c) 2010-2015 Ingo Schwarze @@ -127,6 +127,11 @@ enum mandocerr { MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */ MANDOCERR_STR_UNDEF, /* undefined string, using "": name */ + /* related to tables */ + MANDOCERR_TBLLAYOUT_SPAN, /* tbl line starts with span */ + MANDOCERR_TBLLAYOUT_DOWN, /* tbl column starts with span */ + MANDOCERR_TBLLAYOUT_VERT, /* skipping vertical bar in tbl layout */ + MANDOCERR_ERROR, /* ===== start of errors ===== */ /* related to equations */ @@ -140,7 +145,9 @@ enum mandocerr { MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */ MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */ MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */ - MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */ + MANDOCERR_TBLLAYOUT_NONE, /* empty tbl layout */ + MANDOCERR_TBLLAYOUT_CHAR, /* invalid character in tbl layout: char */ + MANDOCERR_TBLLAYOUT_PAR, /* unmatched parenthesis in tbl layout */ MANDOCERR_TBLNODATA, /* no table data cells specified */ MANDOCERR_TBLIGNDATA, /* ignore data in cell */ MANDOCERR_TBLBLOCK, /* data block still open */ @@ -179,7 +186,7 @@ enum mandocerr { MANDOCERR_TOOLARGE, /* input too large */ MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */ MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */ - MANDOCERR_TBLLAYOUT, /* unsupported table layout */ + MANDOCERR_TBLLAYOUT_MOD, /* unsupported tbl layout modifier: m */ MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */ MANDOCERR_TBLEQN, /* eqn in tbl */ diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c index f38fbc62083..18e25e9e87c 100644 --- a/usr.bin/mandoc/read.c +++ b/usr.bin/mandoc/read.c @@ -1,4 +1,4 @@ -/* $OpenBSD: read.c,v 1.92 2015/01/26 13:02:53 schwarze Exp $ */ +/* $OpenBSD: read.c,v 1.93 2015/01/26 18:41:45 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010-2015 Ingo Schwarze @@ -166,6 +166,11 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "invalid escape sequence", "undefined string, using \"\"", + /* related to tables */ + "tbl line starts with span", + "tbl column starts with span", + "skipping vertical bar in tbl layout", + "generic error", /* related to equations */ @@ -179,7 +184,9 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "skipping unknown tbl option", "missing tbl option argument", "wrong tbl option argument size", - "no table layout cells specified", + "empty tbl layout", + "invalid character in tbl layout", + "unmatched parenthesis in tbl layout", "no table data cells specified", "ignore data in cell", "data block still open", @@ -217,7 +224,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "input too large", "unsupported control character", "unsupported roff request", - "unsupported table layout", + "unsupported tbl layout modifier", "ignoring macro in table", "eqn in tbl", }; diff --git a/usr.bin/mandoc/tbl_layout.c b/usr.bin/mandoc/tbl_layout.c index 31506fb98cb..daa49a26f82 100644 --- a/usr.bin/mandoc/tbl_layout.c +++ b/usr.bin/mandoc/tbl_layout.c @@ -1,7 +1,7 @@ -/* $OpenBSD: tbl_layout.c,v 1.18 2015/01/14 22:44:51 schwarze Exp $ */ +/* $OpenBSD: tbl_layout.c,v 1.19 2015/01/26 18:41:45 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2012, 2014 Ingo Schwarze + * Copyright (c) 2012, 2014, 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -32,15 +32,7 @@ struct tbl_phrase { enum tbl_cellt key; }; -/* - * FIXME: we can make this parse a lot nicer by, when an error is - * encountered in a layout key, bailing to the next key (i.e. to the - * next whitespace then continuing). - */ - -#define KEYS_MAX 11 - -static const struct tbl_phrase keys[KEYS_MAX] = { +static const struct tbl_phrase keys[] = { { 'c', TBL_CELL_CENTRE }, { 'r', TBL_CELL_RIGHT }, { 'l', TBL_CELL_LEFT }, @@ -53,57 +45,30 @@ static const struct tbl_phrase keys[KEYS_MAX] = { { '=', TBL_CELL_DHORIZ } }; -static int mods(struct tbl_node *, struct tbl_cell *, +#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0]))) + +static void mods(struct tbl_node *, struct tbl_cell *, int, const char *, int *); -static int cell(struct tbl_node *, struct tbl_row *, +static void cell(struct tbl_node *, struct tbl_row *, int, const char *, int *); static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *, enum tbl_cellt, int vert); -static int +static void mods(struct tbl_node *tbl, struct tbl_cell *cp, int ln, const char *p, int *pos) { - char buf[5]; - int i; + char *endptr; - /* Not all types accept modifiers. */ +mod: + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; - switch (cp->pos) { - case TBL_CELL_DOWN: - /* FALLTHROUGH */ - case TBL_CELL_HORIZ: - /* FALLTHROUGH */ - case TBL_CELL_DHORIZ: - return(1); - default: - break; - } + /* Row delimiters and cell specifiers end modifier lists. */ -mod: - /* - * XXX: since, at least for now, modifiers are non-conflicting - * (are separable by value, regardless of position), we let - * modifiers come in any order. The existing tbl doesn't let - * this happen. - */ - switch (p[*pos]) { - case '\0': - /* FALLTHROUGH */ - case ' ': - /* FALLTHROUGH */ - case '\t': - /* FALLTHROUGH */ - case ',': - /* FALLTHROUGH */ - case '.': - /* FALLTHROUGH */ - case '|': - return(1); - default: - break; - } + if (strchr(".,-=^_ACLNRSaclnrs|", p[*pos]) != NULL) + return; /* Throw away parenthesised expression. */ @@ -115,72 +80,65 @@ mod: (*pos)++; goto mod; } - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse, ln, *pos, NULL); - return(0); + return; } /* Parse numerical spacing from modifier string. */ if (isdigit((unsigned char)p[*pos])) { - for (i = 0; i < 4; i++) { - if ( ! isdigit((unsigned char)p[*pos + i])) - break; - buf[i] = p[*pos + i]; - } - buf[i] = '\0'; - - /* No greater than 4 digits. */ - - if (4 == i) { - mandoc_msg(MANDOCERR_TBLLAYOUT, - tbl->parse, ln, *pos, NULL); - return(0); - } - - *pos += i; - cp->spacing = (size_t)atoi(buf); - + cp->spacing = strtoull(p + *pos, &endptr, 10); + *pos = endptr - p; goto mod; - /* NOTREACHED */ } - /* TODO: GNU has many more extensions. */ - switch (tolower((unsigned char)p[(*pos)++])) { - case 'z': - cp->flags |= TBL_CELL_WIGN; - goto mod; - case 'u': - cp->flags |= TBL_CELL_UP; + case 'b': + /* FALLTHROUGH */ + case 'i': + /* FALLTHROUGH */ + case 'r': + (*pos)--; + break; + case 'd': + cp->flags |= TBL_CELL_BALIGN; goto mod; case 'e': cp->flags |= TBL_CELL_EQUAL; goto mod; + case 'f': + break; + case 'm': + mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse, + ln, *pos, "m"); + goto mod; + case 'p': + /* FALLTHROUGH */ + case 'v': + if (p[*pos] == '-' || p[*pos] == '+') + (*pos)++; + while (isdigit((unsigned char)p[*pos])) + (*pos)++; + goto mod; case 't': cp->flags |= TBL_CELL_TALIGN; goto mod; - case 'd': - cp->flags |= TBL_CELL_BALIGN; + case 'u': + cp->flags |= TBL_CELL_UP; goto mod; case 'w': /* XXX for now, ignore minimal column width */ goto mod; case 'x': cp->flags |= TBL_CELL_WMAX; goto mod; - case 'f': - break; - case 'r': - /* FALLTHROUGH */ - case 'b': - /* FALLTHROUGH */ - case 'i': - (*pos)--; - break; + case 'z': + cp->flags |= TBL_CELL_WIGN; + goto mod; default: - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos - 1, NULL); - return(0); + mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse, + ln, *pos - 1, "%c", p[*pos - 1]); + goto mod; } switch (tolower((unsigned char)p[(*pos)++])) { @@ -199,20 +157,13 @@ mod: case 'r': goto mod; default: - break; - } - if (isalnum((unsigned char)p[*pos - 1])) { mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse, ln, *pos - 1, "TS f%c", p[*pos - 1]); goto mod; } - - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos - 1, NULL); - return(0); } -static int +static void cell(struct tbl_node *tbl, struct tbl_row *rp, int ln, const char *p, int *pos) { @@ -221,16 +172,24 @@ cell(struct tbl_node *tbl, struct tbl_row *rp, /* Handle vertical lines. */ - for (vert = 0; '|' == p[*pos]; ++*pos) - vert++; - while (' ' == p[*pos]) + vert = 0; +again: + while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') { + if (p[*pos] == '|') { + if (vert < 2) + vert++; + else + mandoc_msg(MANDOCERR_TBLLAYOUT_VERT, + tbl->parse, ln, *pos, NULL); + } (*pos)++; + } /* Handle trailing vertical lines */ if ('.' == p[*pos] || '\0' == p[*pos]) { rp->vert = vert; - return(1); + return; } /* Parse the column position (`c', `l', `r', ...). */ @@ -239,62 +198,32 @@ cell(struct tbl_node *tbl, struct tbl_row *rp, if (tolower((unsigned char)p[*pos]) == keys[i].name) break; - if (KEYS_MAX == i) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos, NULL); - return(0); + if (i == KEYS_MAX) { + mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse, + ln, *pos, "%c", p[*pos]); + (*pos)++; + goto again; } - c = keys[i].key; - /* - * If a span cell is found first, raise a warning and abort the - * parse. If a span cell is found and the last layout element - * isn't a "normal" layout, bail. - * - * FIXME: recover from this somehow? - */ - - if (TBL_CELL_SPAN == c) { - if (NULL == rp->first) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, - ln, *pos, NULL); - return(0); - } else if (rp->last) - switch (rp->last->pos) { - case TBL_CELL_HORIZ: - /* FALLTHROUGH */ - case TBL_CELL_DHORIZ: - mandoc_msg(MANDOCERR_TBLLAYOUT, - tbl->parse, ln, *pos, NULL); - return(0); - default: - break; - } - } + /* Special cases of spanners. */ - /* - * If a vertical spanner is found, we may not be in the first - * row. - */ - - if (TBL_CELL_DOWN == c && rp == tbl->first_row) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL); - return(0); - } + if (c == TBL_CELL_SPAN) { + if (rp->last == NULL) + mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN, + tbl->parse, ln, *pos, NULL); + else if (rp->last->pos == TBL_CELL_HORIZ || + rp->last->pos == TBL_CELL_DHORIZ) + c = rp->last->pos; + } else if (c == TBL_CELL_DOWN && rp == tbl->first_row) + mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN, + tbl->parse, ln, *pos, NULL); (*pos)++; - /* Disallow adjacent spacers. */ - - if (vert > 2) { - mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL); - return(0); - } - /* Allocate cell then parse its modifiers. */ - return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos)); + mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos); } void @@ -309,7 +238,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p) for (;;) { /* Skip whitespace before and after each cell. */ - while (isspace((unsigned char)p[pos])) + while (p[pos] == ' ' || p[pos] == '\t') pos++; switch (p[pos]) { @@ -324,7 +253,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p) tbl->part = TBL_PART_DATA; if (tbl->first_row != NULL) return; - mandoc_msg(MANDOCERR_TBLNOLAYOUT, + mandoc_msg(MANDOCERR_TBLLAYOUT_NONE, tbl->parse, ln, pos, NULL); rp = mandoc_calloc(1, sizeof(*rp)); cell_alloc(tbl, rp, TBL_CELL_LEFT, 0); @@ -342,8 +271,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p) tbl->first_row = rp; tbl->last_row = rp; } - if ( ! cell(tbl, rp, ln, p, &pos)) - return; + cell(tbl, rp, ln, p, &pos); } } -- 2.20.1