From: schwarze Date: Sat, 17 Oct 2015 00:19:58 +0000 (+0000) Subject: Very tricky diff to fix macro interpretation and spacing around tabs X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=407d12bbb4d2c9a9763a78f95c7e141779859db3;p=openbsd Very tricky diff to fix macro interpretation and spacing around tabs in .Bl -column; it took me more than a day to get this right. Triggered by a loosely related bug report from tim@. The lesson for you is: Use .Ta macros in .Bl -column, avoid tabs, or you are in for surprises: The last word before a tab is not interpreted as a macro (unless there is a blank in between), the first word after a tab isn't either (unless there is a blank in between), and a blank after a tab causes a leading blank in the respective output cell. Yes, "blank", "tab", "blank tab" and "tab blank" all have different semantics; if you write code relying on that, good luck maintaining it afterwards... --- diff --git a/regress/usr.bin/mandoc/mdoc/Bl/column.in b/regress/usr.bin/mandoc/mdoc/Bl/column.in index d959612bdeb..be6ccdca9d7 100644 --- a/regress/usr.bin/mandoc/mdoc/Bl/column.in +++ b/regress/usr.bin/mandoc/mdoc/Bl/column.in @@ -85,6 +85,20 @@ .It a Ta b Ta c d .It a Ta b Ta c Ta d .El +.\" Macros before and after tabs +.Bl -column "aa" "OpenBSD OpenBSD OpenBSD" +.It aa Ta Ox Ox Ox Ta tab-tab +.It aa Ta Ox Ox Ox tab-ta +.It aa Ox Ox Ox Ta ta-tab +.It aa Ox Ox Ox ta-ta +.It aa Ox Ox Ox ta-bl/ta +.It aa Ox Ox Ox ta/bl-ta +.It aa Ox Ox Ox ta/bl-bl/ta +.It aa Ta bb Ta +ta at eol +.It aa bb +tab at eol +.El .\" Interrupted column list .Bl -column "aa" -width 6n -compact "bb" "cc" .It aa Ta bb Ta cc Ta dd diff --git a/regress/usr.bin/mandoc/mdoc/Bl/column.out_ascii b/regress/usr.bin/mandoc/mdoc/Bl/column.out_ascii index 7cbce2e95e3..a7c07d32b22 100644 --- a/regress/usr.bin/mandoc/mdoc/Bl/column.out_ascii +++ b/regress/usr.bin/mandoc/mdoc/Bl/column.out_ascii @@ -59,6 +59,16 @@ DDEESSCCRRIIPPTTIIOONN a b c d a b c d a b c d + + aa OpenBSD OpenBSD OpenBSD tab-tab + aa OpenBSD OpenBSD Ox tab-ta + aa Ox OpenBSD OpenBSD ta-tab + aa Ox OpenBSD Ox ta-ta + aa Ox OpenBSD OpenBSD ta-bl/ta + aa OpenBSD OpenBSD Ox ta/bl-ta + aa OpenBSD OpenBSD OpenBSD ta/bl-bl/ta + aa bb ta at eol + aa bb tab at eol aa bb cc dd OpenBSD October 14, 2015 OpenBSD diff --git a/regress/usr.bin/mandoc/mdoc/Bl/column.out_lint b/regress/usr.bin/mandoc/mdoc/Bl/column.out_lint index 1cadbff8cfe..eed080bdc7f 100644 --- a/regress/usr.bin/mandoc/mdoc/Bl/column.out_lint +++ b/regress/usr.bin/mandoc/mdoc/Bl/column.out_lint @@ -1,4 +1,4 @@ mandoc: column.in:71:2: WARNING: wrong number of cells: 2 columns, 1 cells mandoc: column.in:74:2: WARNING: wrong number of cells: 2 columns, 4 cells mandoc: column.in:75:2: WARNING: wrong number of cells: 2 columns, 5 cells -mandoc: column.in:89:18: WARNING: skipping -width argument: Bl -column +mandoc: column.in:103:18: WARNING: skipping -width argument: Bl -column diff --git a/usr.bin/mandoc/libmdoc.h b/usr.bin/mandoc/libmdoc.h index 0b6d41e1e97..ef3f17bce24 100644 --- a/usr.bin/mandoc/libmdoc.h +++ b/usr.bin/mandoc/libmdoc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: libmdoc.h,v 1.76 2015/04/23 15:35:39 schwarze Exp $ */ +/* $OpenBSD: libmdoc.h,v 1.77 2015/10/17 00:19:58 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2013, 2014, 2015 Ingo Schwarze @@ -40,9 +40,7 @@ enum margserr { ARGS_WORD, /* normal word */ ARGS_PUNCT, /* series of punctuation */ ARGS_QWORD, /* quoted word */ - ARGS_PHRASE, /* Ta'd phrase (-column) */ - ARGS_PPHRASE, /* tabbed phrase (-column) */ - ARGS_PEND /* last phrase (-column) */ + ARGS_PHRASE /* Bl -column phrase */ }; /* diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c index 27dc5084ef5..42124f82ddd 100644 --- a/usr.bin/mandoc/mdoc_argv.c +++ b/usr.bin/mandoc/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mdoc_argv.c,v 1.62 2015/10/15 22:45:07 schwarze Exp $ */ +/* $OpenBSD: mdoc_argv.c,v 1.63 2015/10/17 00:19:58 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2012, 2014, 2015 Ingo Schwarze @@ -447,11 +447,10 @@ args(struct roff_man *mdoc, int line, int *pos, { char *p; int pairs; - enum margserr rc; if (buf[*pos] == '\0') { if (mdoc->flags & MDOC_PHRASELIT && - ! (mdoc->flags & MDOC_PPHRASE)) { + ! (mdoc->flags & MDOC_PHRASE)) { mandoc_msg(MANDOCERR_ARG_QUOTE, mdoc->parse, line, *pos, NULL); mdoc->flags &= ~MDOC_PHRASELIT; @@ -471,18 +470,41 @@ args(struct roff_man *mdoc, int line, int *pos, if (fl == ARGSFL_TABSEP) { if ((p = strchr(*v, '\t')) != NULL) { - /* Skip any blank characters after the tab. */ + + /* + * Words right before and right after + * tab characters are not parsed, + * unless there is a blank in between. + */ + + if (p[-1] != ' ') + mdoc->flags |= MDOC_PHRASEQL; + if (p[1] != ' ') + mdoc->flags |= MDOC_PHRASEQN; + + /* + * One or more blanks after a tab cause + * one leading blank in the next column. + * So skip all but one of them. + */ + *pos += (int)(p - *v) + 1; - while (buf[*pos] == ' ') + while (buf[*pos] == ' ' && buf[*pos + 1] == ' ') (*pos)++; - rc = ARGS_PPHRASE; + + /* + * A tab at the end of an input line + * switches to the next column. + */ + + if (buf[*pos] == '\0' || buf[*pos + 1] == '\0') + mdoc->flags |= MDOC_PHRASEQN; } else { p = strchr(*v, '\0'); if (p[-1] == ' ') mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, line, *pos, NULL); *pos += (int)(p - *v); - rc = ARGS_PEND; } /* Skip any trailing blank characters. */ @@ -491,7 +513,7 @@ args(struct roff_man *mdoc, int line, int *pos, p--; *p = '\0'; - return rc; + return ARGS_PHRASE; } /* @@ -502,11 +524,11 @@ args(struct roff_man *mdoc, int line, int *pos, * Whitespace is NOT involved in literal termination. */ - if (MDOC_PHRASELIT & mdoc->flags || '\"' == buf[*pos]) { - if ( ! (MDOC_PHRASELIT & mdoc->flags)) + if (mdoc->flags & MDOC_PHRASELIT || buf[*pos] == '\"') { + if ( ! (mdoc->flags & MDOC_PHRASELIT)) *v = &buf[++(*pos)]; - if (MDOC_PPHRASE & mdoc->flags) + if (mdoc->flags & MDOC_PHRASE) mdoc->flags |= MDOC_PHRASELIT; pairs = 0; @@ -526,11 +548,10 @@ args(struct roff_man *mdoc, int line, int *pos, if (pairs) buf[*pos - pairs] = '\0'; - if ('\0' == buf[*pos]) { - if (MDOC_PPHRASE & mdoc->flags) - return ARGS_QWORD; - mandoc_msg(MANDOCERR_ARG_QUOTE, - mdoc->parse, line, *pos, NULL); + if (buf[*pos] == '\0') { + if ( ! (mdoc->flags & MDOC_PHRASE)) + mandoc_msg(MANDOCERR_ARG_QUOTE, + mdoc->parse, line, *pos, NULL); return ARGS_QWORD; } @@ -553,6 +574,15 @@ args(struct roff_man *mdoc, int line, int *pos, p = &buf[*pos]; *v = mandoc_getarg(mdoc->parse, &p, line, pos); + /* + * After parsing the last word in this phrase, + * tell lookup() whether or not to interpret it. + */ + + if (*p == '\0' && mdoc->flags & MDOC_PHRASEQL) { + mdoc->flags &= ~MDOC_PHRASEQL; + mdoc->flags |= MDOC_PHRASEQF; + } return ARGS_WORD; } diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c index 7f53a3a92c8..1de61e78d80 100644 --- a/usr.bin/mandoc/mdoc_macro.c +++ b/usr.bin/mandoc/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mdoc_macro.c,v 1.160 2015/10/15 22:27:09 schwarze Exp $ */ +/* $OpenBSD: mdoc_macro.c,v 1.161 2015/10/17 00:19:58 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010, 2012-2015 Ingo Schwarze @@ -237,6 +237,10 @@ lookup(struct roff_man *mdoc, int from, int line, int ppos, const char *p) { int res; + if (mdoc->flags & MDOC_PHRASEQF) { + mdoc->flags &= ~MDOC_PHRASEQF; + return TOKEN_NONE; + } if (from == TOKEN_NONE || mdoc_macros[from].flags & MDOC_PARSED) { res = mdoc_hash_find(p); if (res != TOKEN_NONE) { @@ -1028,26 +1032,39 @@ blk_full(MACRO_PROT_ARGS) if (tok == MDOC_Bk) mdoc->flags |= MDOC_KEEP; - ac = ARGS_PEND; + ac = ARGS_EOLN; for (;;) { + + /* + * If we are right after a tab character, + * do not parse the first word for macros. + */ + + if (mdoc->flags & MDOC_PHRASEQN) { + mdoc->flags &= ~MDOC_PHRASEQN; + mdoc->flags |= MDOC_PHRASEQF; + } + la = *pos; lac = ac; ac = mdoc_args(mdoc, line, pos, buf, tok, &p); if (ac == ARGS_EOLN) { - if (lac != ARGS_PPHRASE && lac != ARGS_PHRASE) + if (lac != ARGS_PHRASE || + ! (mdoc->flags & MDOC_PHRASEQF)) break; + /* - * This is necessary: if the last token on a - * line is a `Ta' or tab, then we'll get - * ARGS_EOLN, so we must be smart enough to - * reopen our scope if the last parse was a - * phrase or partial phrase. + * This line ends in a tab; start the next + * column now, with a leading blank. */ + if (body != NULL) rew_last(mdoc, body); body = roff_body_alloc(mdoc, line, ppos, tok); + roff_word_alloc(mdoc, line, ppos, "\\&"); break; } + if (tok == MDOC_Bd || tok == MDOC_Bk) { mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, line, la, "%s ... %s", @@ -1068,9 +1085,7 @@ blk_full(MACRO_PROT_ARGS) */ if (head == NULL && - ac != ARGS_PEND && ac != ARGS_PHRASE && - ac != ARGS_PPHRASE && ac != ARGS_QWORD && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); @@ -1082,9 +1097,7 @@ blk_full(MACRO_PROT_ARGS) if (head == NULL) head = roff_head_alloc(mdoc, line, ppos, tok); - if (ac == ARGS_PHRASE || - ac == ARGS_PEND || - ac == ARGS_PPHRASE) { + if (ac == ARGS_PHRASE) { /* * If we haven't opened a body yet, rewind the @@ -1094,18 +1107,11 @@ blk_full(MACRO_PROT_ARGS) rew_last(mdoc, body == NULL ? head : body); body = roff_body_alloc(mdoc, line, ppos, tok); - /* - * Process phrases: set whether we're in a - * partial-phrase (this effects line handling) - * then call down into the phrase parser. - */ + /* Process to the tab or to the end of the line. */ - if (ac == ARGS_PPHRASE) - mdoc->flags |= MDOC_PPHRASE; - if (ac == ARGS_PEND && lac == ARGS_PPHRASE) - mdoc->flags |= MDOC_PPHRASE; + mdoc->flags |= MDOC_PHRASE; parse_rest(mdoc, TOKEN_NONE, line, &la, buf); - mdoc->flags &= ~MDOC_PPHRASE; + mdoc->flags &= ~MDOC_PHRASE; /* There may have been `Ta' macros. */ diff --git a/usr.bin/mandoc/roff.h b/usr.bin/mandoc/roff.h index 478c835d5a7..7fd8d71f925 100644 --- a/usr.bin/mandoc/roff.h +++ b/usr.bin/mandoc/roff.h @@ -1,4 +1,4 @@ -/* $OpenBSD: roff.h,v 1.16 2015/04/23 16:17:04 schwarze Exp $ */ +/* $OpenBSD: roff.h,v 1.17 2015/10/17 00:19:58 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2013, 2014, 2015 Ingo Schwarze @@ -141,8 +141,8 @@ struct roff_man { #define MDOC_LITERAL (1 << 1) /* In a literal scope. */ #define MDOC_PBODY (1 << 2) /* In the document body. */ #define MDOC_NEWLINE (1 << 3) /* First macro/text in a line. */ -#define MDOC_PHRASELIT (1 << 4) /* Literal within a partial phrase. */ -#define MDOC_PPHRASE (1 << 5) /* Within a partial phrase. */ +#define MDOC_PHRASE (1 << 4) /* In a Bl -column phrase. */ +#define MDOC_PHRASELIT (1 << 5) /* Literal within a phrase. */ #define MDOC_FREECOL (1 << 6) /* `It' invocation should close. */ #define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting. */ #define MDOC_KEEP (1 << 8) /* In a word keep. */ @@ -150,6 +150,9 @@ struct roff_man { #define MDOC_NODELIMC (1 << 10) /* Disable closing delimiter handling. */ #define MAN_ELINE (1 << 11) /* Next-line element scope. */ #define MAN_BLINE (1 << 12) /* Next-line block scope. */ +#define MDOC_PHRASEQF (1 << 13) /* Quote first word encountered. */ +#define MDOC_PHRASEQL (1 << 14) /* Quote last word of this phrase. */ +#define MDOC_PHRASEQN (1 << 15) /* Quote first word of the next phrase. */ #define MAN_LITERAL MDOC_LITERAL #define MAN_NEWLINE MDOC_NEWLINE enum roff_macroset macroset; /* Kind of high-level macros used. */