From: lum Date: Sat, 20 Mar 2021 19:39:30 +0000 (+0000) Subject: Looking at loading all expressions initially, working towards multi X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=bbed9a29562e65f2d4b3ceb3968f51f8e8c42ace;p=openbsd Looking at loading all expressions initially, working towards multi line. Next to look at "values" (quotes around values). Current regress tests pass. --- diff --git a/usr.bin/mg/interpreter.c b/usr.bin/mg/interpreter.c index 1100237d836..3fb64508bea 100644 --- a/usr.bin/mg/interpreter.c +++ b/usr.bin/mg/interpreter.c @@ -1,4 +1,4 @@ -/* $OpenBSD: interpreter.c,v 1.9 2021/03/08 20:01:43 lum Exp $ */ +/* $OpenBSD: interpreter.c,v 1.10 2021/03/20 19:39:30 lum Exp $ */ /* * This file is in the public domain. * @@ -35,9 +35,8 @@ * 1. multiline parsing - currently only single lines supported. * 2. parsing for '(' and ')' throughout whole string and evaluate correctly. * 3. conditional execution. - * 4. define single value variables (define i 0) - * 5. deal with quotes around a string: "x x" - * 6. oh so many things.... + * 4. deal with quotes around a string: "x x" + * 5. oh so many things.... * [...] * n. implement user definable functions. */ @@ -60,6 +59,18 @@ static int multiarg(char *); static int isvar(char **, char **, int); static int foundvar(char *); static int doregex(char *, char *); +static int parseexp(char *); +static void clearexp(void); + +struct expentry { + SLIST_ENTRY(expentry) eentry; + char *exp; /* The string found between paraenthesis. */ + int par1; /* Parenthesis at start of string (=1 */ + int par2; /* Parenthesis at end of string )=2 */ + int expctr; /* An incremental counter:+1 for each exp */ + int blkid; /* Which block are we in? */ +}; +SLIST_HEAD(elisthead, expentry) exphead = SLIST_HEAD_INITIALIZER(exphead); /* * Structure for variables during buffer evaluation. @@ -72,6 +83,163 @@ struct varentry { }; SLIST_HEAD(vlisthead, varentry) varhead = SLIST_HEAD_INITIALIZER(varhead); +/* + * Line has a '(' as the first non-white char. + * Do some very basic parsing of line. + * Multi-line not supported at the moment, To do. + */ +int +foundparen(char *funstr) +{ + struct expentry *e1 = NULL; + char *p, *valp, *endp = NULL, *regs; + char expbuf[BUFSIZE], tmpbuf[BUFSIZE]; + int ret, pctr, fndstart, expctr, blkid, fndchr, fndend; + + pctr = fndstart = expctr = fndchr = fndend = 0; + blkid = 1; + /* + * Check for blocks of code with opening and closing (). + * One block = (cmd p a r a m) + * Two blocks = (cmd p a r a m s)(hola) + * Two blocks = (cmd p a r (list a m s))(hola) + * Only single line at moment, but more for multiline. + */ + p = funstr; + + /* + * Currently can't do () or (( at the moment, + * just drop out - stops a segv. TODO. + */ + regs = "[(]+[\t ]*[)]+"; + if (doregex(regs, funstr)) + return(dobeep_msg("Empty lists not supported at moment")); + regs = "[(]+[\t ]*[(]+"; + if (doregex(regs, funstr)) + return(dobeep_msg("Multiple left parantheses found")); + /* + * load expressions into a list called 'expentry', to be processd + * when all are obtained. + * Not really live code at the moment. Just part of the process of + * working out what needs to be done. + */ + while (*p != '\0') { + if (*p == '(') { + if (fndstart == 1) { + if (endp == NULL) + *p = '\0'; + else + *endp = '\0'; + e1->par2 = 1; + if ((e1->exp = strndup(valp, BUFSIZE)) == NULL) + return(dobeep_msg("strndup error")); + } + if ((e1 = malloc(sizeof(struct expentry))) == NULL) + return (dobeep_msg("malloc Error")); + SLIST_INSERT_HEAD(&exphead, e1, eentry); + e1->exp = NULL; + e1->expctr = ++expctr; + e1->blkid = blkid; + e1->par1 = 1; + fndstart = 1; + fndend = 0; + fndchr = 0; + endp = NULL; + pctr++; + } else if (*p == ')') { + if (endp == NULL) + *p = '\0'; + else + *endp = '\0'; + if ((e1->exp = strndup(valp, BUFSIZE)) == NULL) + return(dobeep_msg("strndup error")); + fndstart = 0; + pctr--; + } else if (*p != ' ' && *p != '\t') { + if (fndchr == 0) { + valp = p; + fndchr = 1; + } + fndend = 0; + endp = NULL; + } else if (fndend == 0 && (*p == ' ' || *p == '\t')) { + *p = ' '; + fndend = 1; + endp = p; + } else if (*p == '\t') /* need to check not between "" */ + *p = ' '; + if (pctr == 0) + blkid++; + p++; + } + expbuf[0] = tmpbuf[0] = '\0'; + + /* + * Join expressions together for the moment, to progess. + * This needs to be totally redone and + * iterate in-to-out, evaluating as we go. Eventually. + */ + SLIST_FOREACH(e1, &exphead, eentry) { + if (strlcpy(tmpbuf, expbuf, sizeof(tmpbuf)) >= sizeof(tmpbuf)) + return (dobeep_msg("strlcpy error")); + expbuf[0] = '\0'; + if (strlcpy(expbuf, e1->exp, sizeof(expbuf)) >= sizeof(expbuf)) + return (dobeep_msg("strlcat error")); + if (*tmpbuf != '\0') + if (strlcat(expbuf, " ", sizeof(expbuf)) >= + sizeof(expbuf)) + return (dobeep_msg("strlcat error")); + if (strlcat(expbuf, tmpbuf, sizeof(expbuf)) >= sizeof(expbuf)) + return (dobeep_msg("strlcat error")); +#ifdef MGLOG + mglog_misc("exp|%s|\n", e1->exp); +#endif + } + if (pctr != 0) { + clearexp(); + return(dobeep_msg("Opening and closing parentheses error")); + } + + ret = parseexp(expbuf); + clearexp(); + + return (ret); +} + +/* + * At the moment, only paring list defines. Much more to do. + */ +static int +parseexp(char *funstr) +{ + char *regs; + + /* Does the line have a list 'define' like: */ + /* (define alist(list 1 2 3 4)) */ + regs = "^define[ ]+.*[ ]+list[ ]+.*[ ]*"; + if (doregex(regs, funstr)) + return(foundvar(funstr)); + + /* Does the line have a incorrect variable 'define' like: */ + /* (define i y z) */ + regs = "^define[ ]+.*[ ]+.*[ ]+.*$"; + if (doregex(regs, funstr)) + return(dobeep_msg("Invalid use of define.")); + + /* Does the line have a single variable 'define' like: */ + /* (define i 0) */ + regs = "^define[ ]+.*[ ]+.*$"; + if (doregex(regs, funstr)) + return(foundvar(funstr)); + + /* Does the line have an unrecognised 'define' */ + regs = "^define[\t ]+"; + if (doregex(regs, funstr)) + return(dobeep_msg("Invalid use of define")); + + return(multiarg(funstr)); +} + /* * Pass a list of arguments to a function. */ @@ -79,42 +247,17 @@ static int multiarg(char *funstr) { PF funcp; - char excbuf[BUFSIZE], argbuf[BUFSIZE], *contbuf, tmpbuf[BUFSIZE]; - char *cmdp, *argp, *fendp, *endp, *p, *t, *s = " "; - int singlecmd = 0, spc, numparams, numspc; - int inlist, foundlst = 0, eolst, rpar, sizof, fin; + char excbuf[BUFSIZE], argbuf[BUFSIZE]; + char contbuf[BUFSIZE], varbuf[BUFSIZE]; + char *cmdp = NULL, *argp, *fendp = NULL, *endp, *p, *v, *s = " "; + int spc, numparams, numspc; + int inlist, sizof, fin; - contbuf = NULL; - endp = strrchr(funstr, ')'); - if (endp == NULL) { - ewprintf("No closing parenthesis found"); - return(FALSE); - } - p = endp + 1; - if (*p != '\0') - *p = '\0'; - /* we now know that string starts with '(' and ends with ')' */ - if (doregex("^[(][\t ]*[)]$", funstr)) - return(dobeep_msg("No command found")); - - if (doregex("^[(][\t ]*[A-Za-z-]+[\t ]*[)]$", funstr)) - singlecmd = 1; - - p = funstr + 1; /* move past first '(' char. */ - cmdp = skipwhite(p); /* find first char of command. */ - - if (singlecmd) { - /* remove ')', then check for spaces at the end */ - cmdp[strlen(cmdp) - 1] = '\0'; - if ((fendp = strchr(cmdp, ' ')) != NULL) - *fendp = '\0'; - else if ((fendp = strchr(cmdp, '\t')) != NULL) - *fendp = '\0'; - return(excline(cmdp)); - } - if ((fendp = strchr(cmdp, ' ')) == NULL) - fendp = strchr(cmdp, '\t'); + if (doregex("^[A-Za-z-]+$", funstr)) + return(excline(funstr)); + cmdp = funstr; + fendp = strchr(cmdp, ' '); *fendp = '\0'; /* * If no extant mg command found, just return. @@ -127,101 +270,79 @@ multiarg(char *funstr) return (dobeep_msgs("Command takes no arguments: ", cmdp)); /* now find the first argument */ - if (fendp) - p = fendp + 1; - else - p = ""; + p = fendp + 1; p = skipwhite(p); + if (strlcpy(argbuf, p, sizeof(argbuf)) >= sizeof(argbuf)) return (dobeep_msg("strlcpy error")); argp = argbuf; numspc = spc = 1; /* initially fake a space so we find first argument */ - inlist = eolst = fin = rpar = 0; + inlist = fin = 0; - for (p = argp; fin == 0; p++) { -#ifdef MGLOG - mglog_execbuf("", excbuf, argbuf, argp, eolst, inlist, cmdp, - p, contbuf); -#endif - if (foundlst) { - foundlst = 0; - p--; /* otherwise 1st arg is missed from list. */ - } - if (*p == ')') { - rpar = 1; - *p = '\0'; + for (p = argbuf; *p != '\0'; p++) { + if (*(p + 1) == '\0') + fin = 1; + + if (*p != ' ') { + if (spc == 1) + argp = p; + spc = 0; } - if (*p == ' ' || *p == '\t' || *p == '\0') { + if (*p == ' ' || fin) { if (spc == 1) continue; - if (spc == 0 && (numspc % numparams == 0)) { - if (*p == '\0') - eolst = 1; - else - eolst = 0; - *p = '\0'; /* terminate arg string */ - endp = p + 1; - excbuf[0] = '\0'; - /* Is arg a var? */ - if (!inlist) { - sizof = sizeof(tmpbuf); - t = tmpbuf; - if (isvar(&argp, &t, sizof)) { - if ((contbuf = strndup(endp, - BUFSIZE)) == NULL) - return(FALSE); - *p = ' '; - (void)(strlcpy(argbuf, tmpbuf, - sizof) >= sizof); - p = argp = argbuf; - spc = 1; - foundlst = inlist = 1; - continue; - } - } - if (strlcpy(excbuf, cmdp, sizeof(excbuf)) - >= sizeof(excbuf)) - return (dobeep_msg("strlcpy error")); - if (strlcat(excbuf, s, sizeof(excbuf)) - >= sizeof(excbuf)) - return (dobeep_msg("strlcat error")); - if (strlcat(excbuf, argp, sizeof(excbuf)) - >= sizeof(excbuf)) - return (dobeep_msg("strlcat error")); - - excline(excbuf); -#ifdef MGLOG - mglog_execbuf(" ", excbuf, argbuf, argp, - eolst, inlist, cmdp, p, contbuf); -#endif - *p = ' '; /* so 'for' loop can continue */ - if (eolst) { - if (contbuf != NULL) { - (void)strlcpy(argbuf, contbuf, - sizeof(argbuf)); - free(contbuf); - contbuf = NULL; - p = argp = argbuf; - foundlst = 1; - inlist = 0; - if (rpar) - fin = 1; - continue; - } - spc = 1; - inlist = 0; + + if (*p == ' ') { + *p = '\0'; /* terminate arg string */ + } + endp = p + 1; + excbuf[0] = '\0'; + varbuf[0] = '\0'; + contbuf[0] = '\0'; + sizof = sizeof(varbuf); + v = varbuf; + if (isvar(&argp, &v, sizof)) { + (void)(strlcat(varbuf, " ", + sizof) >= sizof); + + *p = ' '; + + (void)(strlcpy(contbuf, endp, + sizeof(contbuf)) >= sizeof(contbuf)); + + (void)(strlcat(varbuf, contbuf, + sizof) >= sizof); + + (void)(strlcpy(argbuf, varbuf, + sizof) >= sizof); + + p = argp = argbuf; + while (*p != ' ') { + if (*p == '\0') + break; + p++; } - if (eolst && rpar) - fin = 1; + *p = '\0'; + spc = 1; + fin = 0; } - numspc++; + if (strlcpy(excbuf, cmdp, sizeof(excbuf)) + >= sizeof(excbuf)) + return (dobeep_msg("strlcpy error")); + if (strlcat(excbuf, s, sizeof(excbuf)) + >= sizeof(excbuf)) + return (dobeep_msg("strlcat error")); + if (strlcat(excbuf, argp, sizeof(excbuf)) + >= sizeof(excbuf)) + return (dobeep_msg("strlcat error")); + + excline(excbuf); + + if (fin) + break; + + *p = ' '; /* unterminate arg string */ spc = 1; - } else { - if (spc == 1) - if ((numparams == 1) || - ((numspc + 1) % numparams) == 0) - argp = p; - spc = 0; } } return (TRUE); @@ -232,18 +353,18 @@ multiarg(char *funstr) * Is an item a value or a variable? */ static int -isvar(char **argp, char **tmpbuf, int sizof) +isvar(char **argp, char **varbuf, int sizof) { struct varentry *v1 = NULL; if (SLIST_EMPTY(&varhead)) return (FALSE); #ifdef MGLOG - mglog_isvar(*tmpbuf, *argp, sizof); + mglog_isvar(*varbuf, *argp, sizof); #endif SLIST_FOREACH(v1, &varhead, entry) { if (strcmp(*argp, v1->name) == 0) { - (void)(strlcpy(*tmpbuf, v1->vals, sizof) >= sizof); + (void)(strlcpy(*varbuf, v1->vals, sizof) >= sizof); return (TRUE); } } @@ -252,7 +373,7 @@ isvar(char **argp, char **tmpbuf, int sizof) /* - * The (define string _must_ adhere to the regex in foundparen. + * The define string _must_ adhere to the regex in parsexp(). * This is not the correct way to do parsing but it does highlight * the issues. */ @@ -260,28 +381,22 @@ static int foundvar(char *defstr) { struct varentry *vt, *v1 = NULL; - const char e[2] = "e", t[2] = "t"; - char *p, *vnamep, *vendp = NULL, *valp, *o; - int spc, foundlist = 0; - - p = defstr + 1; /* move past first '(' char. */ - p = skipwhite(p); /* find first char of 'define'. */ - p = strstr(p, e); /* find first 'e' in 'define'. */ - p = strstr(++p, e); /* find second 'e' in 'define'. */ - p++; /* move past second 'e'. */ - vnamep = skipwhite(p); /* find first char of var name. */ + const char t[2] = "t"; + char *p, *vnamep, *vendp = NULL, *valp; + int spc; + + p = strstr(defstr, " "); /* move to first ' ' char. */ + vnamep = skipwhite(p); /* find first char of var name. */ vendp = vnamep; /* now find the end of the list name */ while (1) { ++vendp; - if (*vendp == '(') { - foundlist = 1; - break; - } else if (*vendp == ' ' || *vendp == '\t') + if (*vendp == ' ') break; } *vendp = '\0'; + /* * Check list name is not an existing function. * Although could this be allowed? Shouldn't context dictate? @@ -291,7 +406,8 @@ foundvar(char *defstr) p = ++vendp; p = skipwhite(p); - if (foundlist) { + + if ((*p == 'l') && (*(p + 1) == 'i') && (*(p + 2) == 's')) { p = strstr(p, t); /* find 't' in 'list'. */ valp = skipwhite(++p); /* find first value */ } else @@ -319,36 +435,25 @@ foundvar(char *defstr) spc = 1; /* now loop through values in list value string while counting them */ for (p = valp; *p != '\0'; p++) { - if (*p == ' ' || *p == '\t') { - if (spc == 0) - vendp = p; - spc = 1; - } else if (*p == ')') { - o = p - 1; - if (*o != ' ' && *o != '\t') - vendp = p; - break; - } else { + if (*p != ' ' && *p != '\t') { if (spc == 1) v1->count++; spc = 0; } } - if (vendp) - *vendp = '\0'; - if ((v1->vals = strndup(valp, BUFSIZE)) == NULL) return(dobeep_msg("strndup error")); #ifdef MGLOG - mglog_misc("var:%s\t#items:%d\tvals:%s\n", vnamep, v1->count, v1->vals); + mglog_misc("var:%s\t#items:%d\tvals:|%s|\n", vnamep, v1->count, v1->vals); #endif return (TRUE); } /* - * Finished with evaluation, so clean up any vars. + * Finished with buffer evaluation, so clean up any vars. + * Perhaps keeps them in mg even after use,... */ int clearvars(void) @@ -366,62 +471,26 @@ clearvars(void) } /* - * Line has a '(' as the first non-white char. - * Do some very basic parsing of line with '(' as the first character. - * Multi-line not supported at the moment, To do. + * Finished with block evaluation, so clean up any expressions. */ -int -foundparen(char *funstr) +static void +clearexp(void) { - char *regs, *p; - int pctr; + struct expentry *e1 = NULL; - pctr = 0; - - /* - * Check for blocks of code with opening and closing (). - * One block = (cmd p a r a m) - * Two blocks = (cmd p a r a m s)(hola) - * Two blocks = (cmd p a r (list a m s))(hola) - * Only single line at moment, but more for multiline. - */ - p = funstr; - while (*p != '\0') { - if (*p == '(') { - pctr++; - } else if (*p == ')') { - pctr--; - } - p++; + while (!SLIST_EMPTY(&exphead)) { + e1 = SLIST_FIRST(&exphead); + SLIST_REMOVE_HEAD(&exphead, eentry); + free(e1->exp); + free(e1); } - if (pctr != 0) - return(dobeep_msg("Opening and closing parentheses error")); - - /* Does the line have a list 'define' like: */ - /* (define alist(list 1 2 3 4)) */ - regs = "^[(][\t ]*define[\t ]+[^\t (]+[\t ]*[(][\t ]*list[\t ]+"\ - "[^\t ]+.*[)][\t ]*[)]"; - if (doregex(regs, funstr)) - return(foundvar(funstr)); - - /* Does the line have a single variable 'define' like: */ - /* (define i 0) */ - regs = "^[(][\t ]*define[\t ]+[^\t (]+[\t ]*[^\t (]+[\t ]*[)]"; - if (doregex(regs, funstr)) - return(foundvar(funstr)); - - /* Does the line have an unrecognised 'define' */ - regs = "^[(][\t ]*define[\t ]+"; - if (doregex(regs, funstr)) - return(dobeep_msg("Invalid use of define")); - - return(multiarg(funstr)); + return; } /* * Test a string against a regular expression. */ -int +static int doregex(char *r, char *e) { regex_t regex_buff;