From dff94300f9bd0f3e6e33a22d480ea11380ddd039 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Thu, 11 Jul 2019 22:32:53 -0500 Subject: Next sh checkpoint. Not remotely load bearing yet. --- toys/pending/sh.c | 496 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 323 insertions(+), 173 deletions(-) diff --git a/toys/pending/sh.c b/toys/pending/sh.c index e8a11ce7..19504383 100644 --- a/toys/pending/sh.c +++ b/toys/pending/sh.c @@ -212,22 +212,32 @@ void syntax_err(char *msg, ...) if (*toys.optargs) xexit(); } +// return length of match found at this point +static int anyof(char *s, char **try) +{ + while (*try) { + if (strstart(&s, *try)) return strlen(*try); + try++; + } -// Parse one word from the command line, appending one or more argv[] entries -// to struct command. Handles environment variable substitution and -// substrings. Returns pointer to next used byte, or NULL if it -// hit an ending token. - -// caller eats leading spaces + return 0; +} // parse next word from command line. Returns end, or 0 if need continuation +// caller eats leading spaces static char *parse_word(char *start) { int i, quote = 0; char *end = start, *s; - // find end of string + // Skip leading whitespace/comment + for (;;) { + if (isspace(*start)) ++start; + else if (*start=='#') while (*start && *start != '\n') ++start; + else break; + } + // find end of this word while (*end) { i = 0; @@ -243,11 +253,20 @@ static char *parse_word(char *start) // start quote if (strchr("\"'`", *end)) toybuf[quote++] = *end++; else if (strstart(&end, "<(") || strstart(&end,">(")) toybuf[quote++]=')'; - else if (*end==')') return end+(end==start); else { - // control chars - for (s = end; strchr(";|&<>(", *s); s++); - if (s != end) return (end == start) ? s : end; + // control chars. + // 123<&; becomes ">&" ";" because first loop, then second loop. + s = end; + if (*s == '{') s++; + for (s = end; isdigit(*s); s++); + if (*end == '{' && *s == '}') s++; + s += anyof(s, (char *[]){"<<<", "<<-", "<<", "<&", "<>", "<", ">>", + ">&", ">|", ">", 0}); + if (s == end || isdigit(s[-1])) + s += anyof(s, (char *[]){";;&", ";;", ";&", ";", "||", "|&", "|", + "&&", "&>>", "&>", "&", "(", ")", 0}); + if (s != end && !isdigit(*s)) return (end == start) ? s : end; i++; } } @@ -259,7 +278,7 @@ static char *parse_word(char *start) // backslash escapes if (*end == '\\') { - if (!end[1]) return 0; + if (!end[1] || (end[1]=='\n' && !end[2])) return 0; end += 2; } else if (*end == '$') { // barf if we're near overloading quote stack (nesting ridiculously deep) @@ -286,228 +305,359 @@ static char *parse_word(char *start) return quote ? 0 : end; } -// Consume a line of shell script and do what it says. Returns 0 if finished, -// pointer to start of unused part of line if it needs another line of input. -static char *parse_line(char *line, struct double_list **pipeline) +// Parse flow control statement(s), returns index of first statement to execute, +// pp->arg->c if none, -1 if we need to flush due to syntax error +int flow_control(int why, struct sh_arg *arg, struct double_list **expect, + char **end) { - char *start = line, *end, *s, *ex, *add; - struct sh_arg *arg = 0; - struct double_list *pl, *expect = 0; - unsigned i, paren = 0; - - // Resume appending to last pipeline's last argument list - if (*pipeline) arg = (void *)(*pipeline)->prev->data; - if (arg) for (i = 0; ic; i++) { - if (!strcmp(arg->v[i], "(")) paren++; - else if (!strcmp(arg->v[i], ")")) paren--; + char *add = 0; + int i, pend = 0; + + // Blank line shouldn't change end, but two ends in a row are an error + if (!arg->c) { + if (arg->v[0]) { + syntax_err("bad %s", arg->v[0]); + return -1; + } + return 0; } - // Loop handling each word - for (;;) { - // Skip leading whitespace/comment - while (isspace(*start)) ++start; - if (*start=='#') { - while (*start && *start != '\n') start++; - continue; + // parse flow control statements in this command line + for (i = 0; ; i++) { + char *ex = *expect ? (*expect)->prev->data : 0, *s = arg->v[i]; + + // push word to expect at end of block, and expect a command first + if (add) { + dlist_add(expect, add); // end of context + if (why) dlist_add(expect, arg->v[i-1]); // context for command + dlist_add(expect, add = 0); // expect a command } - // Parse next word and detect continuation/overflow. - if ((end = parse_word(start)) == (void *)1) return 0; - if (!end) return start; + // end of argument list? + if (i == arg->c) break; - // Extend pipeline and argv[], handle EOL - if (!arg) - dlist_add(pipeline, (void *)(arg = xzalloc(sizeof(struct sh_arg)))); - if (!(31&arg->c)) arg->v = xrealloc(arg->v, (32+arg->c)*sizeof(void *)); - if (end == start) { - arg->v[arg->c] = 0; - break; + // When waiting for { it must be next symbol, but can be on a new line. + if (ex && !strcmp(ex, "{")) { + if (strcmp(s, "{") || (!i && *end && strcmp(*end, ";"))) { + syntax_err("need {"); + return -1; + } } - // Save argument (strdup) and check if it's special - s = arg->v[arg->c] = xstrndup(start, end-start); - if (!strcmp(s, "(")) paren++; - else if (!strcmp(s, ")") && !paren--) syntax_err("bad %s", s); - if (paren || !strchr(";|&", *start)) arg->c++; - else { - if (!arg->c) { - syntax_err("bad %s", arg->v[arg->c]); - goto flush; + if (!strcmp(s, "if")) add = "then"; + else if (!strcmp(s, "for") || !strcmp(s, "select") + || !strcmp(s, "while") || !strcmp(s, "until")) add = "do"; + else if (!strcmp(s, "case")) add = "esac"; + else if (!strcmp(s, "{")) add = "}"; + else if (!strcmp(s, "[[")) add = "]]"; + else if (!strcmp(s, "(")) add = ")"; + + // function NAME () [nl] { [nl] body ; } + // Why can you to declare functions inside other functions? + else if (arg->c>i+1 && !strcmp(arg->v[i+1], "(")) goto funky; + else if (!strcmp(s, "function")) { + i++; +funky: + // At this point we can only have a function: barf if it's invalid + if (arg->cv[i+1], "(") || !strcmp(arg->v[i+2], ")")){ + syntax_err("bad function ()"); + return -1; } - arg = 0; - } - start = end; - } + // perform abnormal add (one extra piece of info) manually. + dlist_add(expect, "}"); + dlist_add(expect, "function"); + dlist_add(expect, 0); + dlist_add(expect, "{"); - // We parsed to the end of the line, which ended a pipeline. - // Now handle flow control commands, which can also need more lines. + continue; - // array of command lines separated by | and such - // Note: don't preparse past ; because environment variables differ + // Expecting NULL means a statement: any otherwise unrecognized word + } else if (expect && !ex) { + free(dlist_pop(expect)); - // Check for flow control continuations - end = 0; - for (pl = *pipeline; pl ; pl = (pl->next == *pipeline) ? 0 : pl->next) { - arg = (void *)pl->data; - if (!arg->c) continue; - add = 0; - - // parse flow control statements in this command line - for (i = 0; ; i++) { - ex = expect ? expect->prev->data : 0; - s = arg->v[i]; - - // push word to expect to end this block, and expect a command first - if (add) { - dlist_add(&expect, add); - dlist_add(&expect, add = 0); + // if (why) context in which statement executes now at top of expect stack + + // Does this statement end with a close parentheses? + if (!strcmp(")", arg->v[arg->c-1])) { + + // Did we expect one? + if (!*expect || !strcmp(")", (*expect)->prev->data)) { + syntax_err("bad %s", ")"); + return -1; + } + + free(dlist_pop(expect)); + // only need one statement in ( ( ( echo ) ) ) + if (*expect && !(*expect)->prev->data) free(dlist_pop(expect)); + + pend++; + goto gotparen; } + break; - // end of statement? - if (i == arg->c) break; + // If we aren't expecting and didn't just start a new flow control block, + // rest of statement is a command and arguments, so stop now + } else if (!ex) break; - // When waiting for { it must be next symbol, but can be on a new line. - if (ex && !strcmp(ex, "{") && (strcmp(s, "{") || (!i && end))) { - syntax_err("need {"); - goto flush; + if (add) continue; + + // If we got here we expect a specific word to end this block: is this it? + if (!strcmp(arg->v[i], ex) + || (!strcmp(ex, ")") && !strcmp(ex, arg->v[arg->c-1]))) + { + // can't "if | then" or "while && do", only ; & or newline works + if (*end && strcmp(*end, ";") && strcmp(*end, "&")) { + syntax_err("bad %s", *end); + return -1; } - if (!strcmp(s, "if")) add = "then"; - else if (!strcmp(s, "for") || !strcmp(s, "select") - || !strcmp(s, "while") || !strcmp(s, "until")) add = "do"; - else if (!strcmp(s, "case")) add = "esac"; - else if (!strcmp(s, "{")) add = "}"; - else if (!strcmp(s, "[[")) add = "]]"; - - // function NAME () [nl] { [nl] body ; } - // Why can you to declare functions inside other functions? - else if (arg->c>i+1 && !strcmp(arg->v[i+1], "(")) goto funky; - else if (!strcmp(s, "function")) { - i++; -funky: - // At this point we can only have a function: barf if it's invalid - if (arg->cv[i+1], "(") - || !strcmp(arg->v[i+2], ")")) - { - syntax_err("bad function ()"); - goto flush; +gotparen: + free(dlist_pop(expect)); + // Only innermost statement needed in { { { echo ;} ;} ;} and such + if (*expect && !(*expect)->prev->data) free(dlist_pop(expect)); + + // If this was a command ending in parentheses + if (pend) break; + + // if it's a multipart block, what comes next? + if (!strcmp(s, "do")) ex = "done"; + else if (!strcmp(s, "then")) add = "fi\0A"; + // fi could have elif, which queues a then. + } else if (!strcmp(ex, "fi")) { + if (!strcmp(s, "elif")) { + free(dlist_pop(expect)); + add = "then"; + // catch duplicate else while we're here + } else if (!strcmp(s, "else")) { + if (ex[3] != 'A') { + syntax_err("2 else"); + return -1; } - dlist_add(&expect, "}"); - dlist_add(&expect, 0); - dlist_add(&expect, "{"); + free(dlist_pop(expect)); + add = "fi\0B"; + } + } + } - // Expecting NULL will take any otherwise unrecognized word - } else if (expect && !ex) { - free(dlist_pop(&expect)); - continue; + // Record how the previous stanza ended: ; | & ;; || && ;& ;;& |& NULL + *end = arg->v[arg->c]; + + return i; +} - // If we expect nothing and didn't just start a new flow control block, - // rest of statement is a command and arguments, so stop now - } else if (!ex) break; +// Consume a line of shell script and do what it says. Returns 0 if finished, +// 1 to request another line of input. - if (add) continue; +struct sh_parse { + struct double_list *pipeline, *plstart, *expect, *here; + char *end; +}; - // If we got here we expect a word to end this block: is this it? - if (!strcmp(arg->v[i], ex)) { - free(dlist_pop(&expect)); +// pipeline and expect are scratch space, state held between calls which +// I don't want to make global yet because this could be reentrant. +// returns 1 to request another line (> prompt), 0 if line consumed. +static int parse_line(char *line, struct sh_parse *sp) +{ + char *start = line, *delete = 0, *end, *s; + struct sh_arg *arg = 0; + struct double_list *pl; + long i; - // can't "if | then" or "while && do", only ; or newline works - if (end && !strcmp(end, ";")) { - syntax_err("bad %s", end); - goto flush; - } + // Resume appending to last statement? + if (sp->pipeline) { + arg = (void *)sp->pipeline->prev->data; - // if it's a multipart block, what comes next? - if (!strcmp(s, "do")) ex = "done"; - else if (!strcmp(s, "then")) add = "fi\0A"; - // fi could have elif, which queues a then. - } else if (!strcmp(ex, "fi")) { - if (!strcmp(s, "elif")) { - free(dlist_pop(&expect)); - add = "then"; - // catch duplicate else while we're here - } else if (!strcmp(s, "else")) { - if (ex[3] != 'A') { - syntax_err("2 else"); - goto flush; - } - free(dlist_pop(&expect)); - add = "fi\0B"; - } + // Extend/resume quoted block + if (arg->c<0) { + start = delete = xmprintf("%s%s", arg->v[arg->c = (-arg->c)-1], start); + free(arg->v[arg->c]); + arg->v[arg->c] = 0; + + // is a HERE document in progress? + } else if (sp->here && ((struct sh_arg *)sp->here->data)->c<0) { + unsigned long c; + + arg = (void *)sp->here->data; + c = -arg->c - 1; + + // HERE's arg->c < 0 means still adding to it, EOF string is last entry + if (!(31&c)) arg->v = xrealloc(arg->v, (32+c)*sizeof(void *)); + if (strcmp(line, arg->v[c])) { + // Add this line + arg->v[c+1] = arg->v[c]; + arg->v[c] = xstrdup(line); + arg->c--; + } else { + // EOF hit, end HERE document + arg->v[arg->c = c] = 0; + sp->here = sp->here->next; } + start = 0; } - // Record how the previous stanza ended: ; | & ;; || && ;& ;;& |& NULL - end = arg->v[arg->c]; } - // Do we need more lines to finish a flow control statement? - if (expect || paren) { - llist_traverse(expect, free); - return start; + // Parse words, assemble argv[] pipelines, check flow control and HERE docs + if (start) for (;;) { + s = 0; + + // Parse next word and detect overflow (too many nested quotes). + if ((end = parse_word(start)) == (void *)1) goto flush; + + // Extend pipeline and argv[] to store result + if (!arg) + dlist_add(&sp->pipeline, (void *)(arg = xzalloc(sizeof(struct sh_arg)))); + if (!(31&arg->c)) arg->v = xrealloc(arg->v, (32+arg->c)*sizeof(void *)); + + // Do we need to request another line to finish word (find ending quote)? + if (!end) { + // Save unparsed bit of this line, we'll need to re-parse it. + arg->v[arg->c] = xstrndup(start, strlen(start)); + arg->c = -(arg->c+1); + free(delete); + + return 1; + } + + // Did we hit the end of this line of input? + if (end == start) { + arg->v[arg->c] = 0; + + // Parse flow control data from last statement + if (-1 == flow_control(0, arg, &sp->expect, &sp->end)) goto flush; + + // Grab HERE document(s) + for (pl = sp->plstart ? sp->plstart : sp->pipeline; pl; + pl = (pl->next == sp->pipeline) ? 0 : pl->next) + { + struct sh_arg *here; + + arg = (void *)pl->data; + + for (i = 0; ic; i++) { + // find [n]<<[-] with an argument after it + s = arg->v[i]; + if (*s == '{') s++; + while (isdigit(*s)) s++; + if (*arg->v[i] == '{' && *s == '}') s++; + if (strcmp(s, "<<") && strcmp(s, "<<-")) continue; + if (i+1 == arg->c) goto flush; + + here = xzalloc(sizeof(struct sh_arg)); + here->v = xzalloc(32*sizeof(void *)); + *here->v = arg->v[++i]; + here->c = -1; + } + } + + // Stop reading. + break; + } + + // ) only saved at start of a statement, else ends statement with NULL + if (arg->c && *start == ')') { + arg->v[arg->c] = 0; + end--; + if (-1 == flow_control(0, arg, &sp->expect, &sp->end)) goto flush; + arg = 0; + } else { + // Save argument (strdup) and check if it's special + s = arg->v[arg->c] = xstrndup(start, end-start); + if (!strchr(");|&", *start)) arg->c++; + else { + // end of statement due to flow control character. + s = 0; + if (!arg->c) goto flush; + if (-1 == flow_control(0, arg, &sp->expect, &sp->end)) goto flush; + arg = 0; + } + } + start = end; } + free(delete); + + // return if HERE document or more flow control + if (sp->expect || (sp->pipeline && sp->pipeline->prev->data==(void *)1)) + return 1; + + // At this point, we've don't need more input and can start executing. + + // **************************** do the thing ******************************* + + // Now we have a complete thought and can start running stuff. // iterate through the commands running each one - for (pl = *pipeline; pl ; pl = (pl->next == *pipeline) ? 0 : pl->next) { + + // run a pipeline of commands + + for (pl = sp->pipeline; pl ; pl = (pl->next == sp->pipeline) ? 0 : pl->next) { struct sh_process *pp = xzalloc(sizeof(struct sh_process)); - for (i = 0; i<((struct sh_arg *)pl->data)->c; i++) - expand_arg(&pp->arg, ((struct sh_arg *)pl->data)->v[i]); + for (i = 0; ic; i++) expand_arg(&pp->arg, arg->v[i]); run_command(pp); + llist_traverse(pp->delete, free); } + s = 0; flush: - while ((pl = dlist_pop(pipeline))) { + + if (s) syntax_err("bad %s", s); + while ((pl = dlist_pop(&sp->pipeline))) { + arg = (void *)pl->data; + free(pl); + for (i = 0; ic; i++) free(arg->v[i]); + free(arg->v); + free(arg); + } + + while ((pl = dlist_pop(&sp->here))) { arg = (void *)pl->data; free(pl); + if (arg->c<0) arg->c = -arg->c - 1; for (i = 0; ic; i++) free(arg->v[i]); free(arg->v); free(arg); } - *pipeline = 0; + + llist_traverse(sp->expect, free); return 0; } void sh_main(void) { - FILE *f = 0; - char *command = 0, *old = 0; - struct double_list *scratch = 0; + FILE *f; + struct sh_parse scratch; + int prompt = 0; // Set up signal handlers and grab control of this tty. - if (isatty(0)) toys.optflags |= FLAG_i; - if (*toys.optargs) f = xfopen(*toys.optargs, "r"); - if (TT.command) command = parse_line(TT.command, &scratch); - else for (;;) { + memset(&scratch, 0, sizeof(scratch)); + if (TT.command) f = fmemopen(TT.command, strlen(TT.command), "r"); + else if (*toys.optargs) f = xfopen(*toys.optargs, "r"); + else { + f = stdin; + if (isatty(0)) toys.optflags |= FLAG_i; + } + + for (;;) { char *new = 0; size_t linelen = 0; // Prompt and read line - if (!f) { - char *s = getenv(command ? "PS2" : "PS1"); + if (f == stdin) { + char *s = getenv(prompt ? "PS2" : "PS1"); - if (!s) s = command ? "> " : (getpid() ? "\\$ " : "# "); + if (!s) s = prompt ? "> " : (getpid() ? "\\$ " : "# "); do_prompt(s); - } - if (1 > getline(&new, &linelen, f ? f : stdin)) break; - if (f) TT.lineno++; - - // Append to unused portion of previous line if any - if (command) { - command = xmprintf("%s%s", command, new); - free(old); - free(new); - old = command; - } else { - free(old); - old = new; - } + } else TT.lineno++; + if (1>(linelen = getline(&new, &linelen, f ? f : stdin))) break; + if (new[linelen-1] == '\n') new[--linelen] = 0; // returns 0 if line consumed, command if it needs more data - command = parse_line(old, &scratch); + prompt = parse_line(new, &scratch); + free(new); } - if (command) error_exit("unfinished line"); + if (prompt) error_exit("%ld:unfinished line"+4*!TT.lineno, TT.lineno); toys.exitval = f && ferror(f); } -- cgit v1.2.3