aboutsummaryrefslogtreecommitdiff
path: root/toys/pending/sh.c
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2019-07-11 22:32:53 -0500
committerRob Landley <rob@landley.net>2019-07-11 22:32:53 -0500
commitdff94300f9bd0f3e6e33a22d480ea11380ddd039 (patch)
tree47b9eb8dc8b44f4de5815b84b3adb552e1045d4a /toys/pending/sh.c
parenta57721d14642291696be4ffd5d8bb0915fbe861b (diff)
downloadtoybox-dff94300f9bd0f3e6e33a22d480ea11380ddd039.tar.gz
Next sh checkpoint. Not remotely load bearing yet.
Diffstat (limited to 'toys/pending/sh.c')
-rw-r--r--toys/pending/sh.c496
1 files changed, 323 insertions, 173 deletions
diff --git a/toys/pending/sh.c b/toys/pending/sh.c
index e8a11ce7..19504383 100644
--- a/toys/pending/sh.c
+++ b/toys/pending/sh.c
@@ -212,22 +212,32 @@ void syntax_err(char *msg, ...)
if (*toys.optargs) xexit();
}
+// return length of match found at this point
+static int anyof(char *s, char **try)
+{
+ while (*try) {
+ if (strstart(&s, *try)) return strlen(*try);
+ try++;
+ }
-// Parse one word from the command line, appending one or more argv[] entries
-// to struct command. Handles environment variable substitution and
-// substrings. Returns pointer to next used byte, or NULL if it
-// hit an ending token.
-
-// caller eats leading spaces
+ return 0;
+}
// parse next word from command line. Returns end, or 0 if need continuation
+// caller eats leading spaces
static char *parse_word(char *start)
{
int i, quote = 0;
char *end = start, *s;
- // find end of string
+ // Skip leading whitespace/comment
+ for (;;) {
+ if (isspace(*start)) ++start;
+ else if (*start=='#') while (*start && *start != '\n') ++start;
+ else break;
+ }
+ // find end of this word
while (*end) {
i = 0;
@@ -243,11 +253,20 @@ static char *parse_word(char *start)
// start quote
if (strchr("\"'`", *end)) toybuf[quote++] = *end++;
else if (strstart(&end, "<(") || strstart(&end,">(")) toybuf[quote++]=')';
- else if (*end==')') return end+(end==start);
else {
- // control chars
- for (s = end; strchr(";|&<>(", *s); s++);
- if (s != end) return (end == start) ? s : end;
+ // control chars.
+ // 123<<file- parses as 2 args: "123<<" "file-".
+ // note: >&; becomes ">&" ";" because first loop, then second loop.
+ s = end;
+ if (*s == '{') s++;
+ for (s = end; isdigit(*s); s++);
+ if (*end == '{' && *s == '}') s++;
+ s += anyof(s, (char *[]){"<<<", "<<-", "<<", "<&", "<>", "<", ">>",
+ ">&", ">|", ">", 0});
+ if (s == end || isdigit(s[-1]))
+ s += anyof(s, (char *[]){";;&", ";;", ";&", ";", "||", "|&", "|",
+ "&&", "&>>", "&>", "&", "(", ")", 0});
+ if (s != end && !isdigit(*s)) return (end == start) ? s : end;
i++;
}
}
@@ -259,7 +278,7 @@ static char *parse_word(char *start)
// backslash escapes
if (*end == '\\') {
- if (!end[1]) return 0;
+ if (!end[1] || (end[1]=='\n' && !end[2])) return 0;
end += 2;
} else if (*end == '$') {
// barf if we're near overloading quote stack (nesting ridiculously deep)
@@ -286,228 +305,359 @@ static char *parse_word(char *start)
return quote ? 0 : end;
}
-// Consume a line of shell script and do what it says. Returns 0 if finished,
-// pointer to start of unused part of line if it needs another line of input.
-static char *parse_line(char *line, struct double_list **pipeline)
+// Parse flow control statement(s), returns index of first statement to execute,
+// pp->arg->c if none, -1 if we need to flush due to syntax error
+int flow_control(int why, struct sh_arg *arg, struct double_list **expect,
+ char **end)
{
- char *start = line, *end, *s, *ex, *add;
- struct sh_arg *arg = 0;
- struct double_list *pl, *expect = 0;
- unsigned i, paren = 0;
-
- // Resume appending to last pipeline's last argument list
- if (*pipeline) arg = (void *)(*pipeline)->prev->data;
- if (arg) for (i = 0; i<arg->c; i++) {
- if (!strcmp(arg->v[i], "(")) paren++;
- else if (!strcmp(arg->v[i], ")")) paren--;
+ char *add = 0;
+ int i, pend = 0;
+
+ // Blank line shouldn't change end, but two ends in a row are an error
+ if (!arg->c) {
+ if (arg->v[0]) {
+ syntax_err("bad %s", arg->v[0]);
+ return -1;
+ }
+ return 0;
}
- // Loop handling each word
- for (;;) {
- // Skip leading whitespace/comment
- while (isspace(*start)) ++start;
- if (*start=='#') {
- while (*start && *start != '\n') start++;
- continue;
+ // parse flow control statements in this command line
+ for (i = 0; ; i++) {
+ char *ex = *expect ? (*expect)->prev->data : 0, *s = arg->v[i];
+
+ // push word to expect at end of block, and expect a command first
+ if (add) {
+ dlist_add(expect, add); // end of context
+ if (why) dlist_add(expect, arg->v[i-1]); // context for command
+ dlist_add(expect, add = 0); // expect a command
}
- // Parse next word and detect continuation/overflow.
- if ((end = parse_word(start)) == (void *)1) return 0;
- if (!end) return start;
+ // end of argument list?
+ if (i == arg->c) break;
- // Extend pipeline and argv[], handle EOL
- if (!arg)
- dlist_add(pipeline, (void *)(arg = xzalloc(sizeof(struct sh_arg))));
- if (!(31&arg->c)) arg->v = xrealloc(arg->v, (32+arg->c)*sizeof(void *));
- if (end == start) {
- arg->v[arg->c] = 0;
- break;
+ // When waiting for { it must be next symbol, but can be on a new line.
+ if (ex && !strcmp(ex, "{")) {
+ if (strcmp(s, "{") || (!i && *end && strcmp(*end, ";"))) {
+ syntax_err("need {");
+ return -1;
+ }
}
- // Save argument (strdup) and check if it's special
- s = arg->v[arg->c] = xstrndup(start, end-start);
- if (!strcmp(s, "(")) paren++;
- else if (!strcmp(s, ")") && !paren--) syntax_err("bad %s", s);
- if (paren || !strchr(";|&", *start)) arg->c++;
- else {
- if (!arg->c) {
- syntax_err("bad %s", arg->v[arg->c]);
- goto flush;
+ if (!strcmp(s, "if")) add = "then";
+ else if (!strcmp(s, "for") || !strcmp(s, "select")
+ || !strcmp(s, "while") || !strcmp(s, "until")) add = "do";
+ else if (!strcmp(s, "case")) add = "esac";
+ else if (!strcmp(s, "{")) add = "}";
+ else if (!strcmp(s, "[[")) add = "]]";
+ else if (!strcmp(s, "(")) add = ")";
+
+ // function NAME () [nl] { [nl] body ; }
+ // Why can you to declare functions inside other functions?
+ else if (arg->c>i+1 && !strcmp(arg->v[i+1], "(")) goto funky;
+ else if (!strcmp(s, "function")) {
+ i++;
+funky:
+ // At this point we can only have a function: barf if it's invalid
+ if (arg->c<i+3 || !strcmp(arg->v[i+1], "(") || !strcmp(arg->v[i+2], ")")){
+ syntax_err("bad function ()");
+ return -1;
}
- arg = 0;
- }
- start = end;
- }
+ // perform abnormal add (one extra piece of info) manually.
+ dlist_add(expect, "}");
+ dlist_add(expect, "function");
+ dlist_add(expect, 0);
+ dlist_add(expect, "{");
- // We parsed to the end of the line, which ended a pipeline.
- // Now handle flow control commands, which can also need more lines.
+ continue;
- // array of command lines separated by | and such
- // Note: don't preparse past ; because environment variables differ
+ // Expecting NULL means a statement: any otherwise unrecognized word
+ } else if (expect && !ex) {
+ free(dlist_pop(expect));
- // Check for flow control continuations
- end = 0;
- for (pl = *pipeline; pl ; pl = (pl->next == *pipeline) ? 0 : pl->next) {
- arg = (void *)pl->data;
- if (!arg->c) continue;
- add = 0;
-
- // parse flow control statements in this command line
- for (i = 0; ; i++) {
- ex = expect ? expect->prev->data : 0;
- s = arg->v[i];
-
- // push word to expect to end this block, and expect a command first
- if (add) {
- dlist_add(&expect, add);
- dlist_add(&expect, add = 0);
+ // if (why) context in which statement executes now at top of expect stack
+
+ // Does this statement end with a close parentheses?
+ if (!strcmp(")", arg->v[arg->c-1])) {
+
+ // Did we expect one?
+ if (!*expect || !strcmp(")", (*expect)->prev->data)) {
+ syntax_err("bad %s", ")");
+ return -1;
+ }
+
+ free(dlist_pop(expect));
+ // only need one statement in ( ( ( echo ) ) )
+ if (*expect && !(*expect)->prev->data) free(dlist_pop(expect));
+
+ pend++;
+ goto gotparen;
}
+ break;
- // end of statement?
- if (i == arg->c) break;
+ // If we aren't expecting and didn't just start a new flow control block,
+ // rest of statement is a command and arguments, so stop now
+ } else if (!ex) break;
- // When waiting for { it must be next symbol, but can be on a new line.
- if (ex && !strcmp(ex, "{") && (strcmp(s, "{") || (!i && end))) {
- syntax_err("need {");
- goto flush;
+ if (add) continue;
+
+ // If we got here we expect a specific word to end this block: is this it?
+ if (!strcmp(arg->v[i], ex)
+ || (!strcmp(ex, ")") && !strcmp(ex, arg->v[arg->c-1])))
+ {
+ // can't "if | then" or "while && do", only ; & or newline works
+ if (*end && strcmp(*end, ";") && strcmp(*end, "&")) {
+ syntax_err("bad %s", *end);
+ return -1;
}
- if (!strcmp(s, "if")) add = "then";
- else if (!strcmp(s, "for") || !strcmp(s, "select")
- || !strcmp(s, "while") || !strcmp(s, "until")) add = "do";
- else if (!strcmp(s, "case")) add = "esac";
- else if (!strcmp(s, "{")) add = "}";
- else if (!strcmp(s, "[[")) add = "]]";
-
- // function NAME () [nl] { [nl] body ; }
- // Why can you to declare functions inside other functions?
- else if (arg->c>i+1 && !strcmp(arg->v[i+1], "(")) goto funky;
- else if (!strcmp(s, "function")) {
- i++;
-funky:
- // At this point we can only have a function: barf if it's invalid
- if (arg->c<i+3 || !strcmp(arg->v[i+1], "(")
- || !strcmp(arg->v[i+2], ")"))
- {
- syntax_err("bad function ()");
- goto flush;
+gotparen:
+ free(dlist_pop(expect));
+ // Only innermost statement needed in { { { echo ;} ;} ;} and such
+ if (*expect && !(*expect)->prev->data) free(dlist_pop(expect));
+
+ // If this was a command ending in parentheses
+ if (pend) break;
+
+ // if it's a multipart block, what comes next?
+ if (!strcmp(s, "do")) ex = "done";
+ else if (!strcmp(s, "then")) add = "fi\0A";
+ // fi could have elif, which queues a then.
+ } else if (!strcmp(ex, "fi")) {
+ if (!strcmp(s, "elif")) {
+ free(dlist_pop(expect));
+ add = "then";
+ // catch duplicate else while we're here
+ } else if (!strcmp(s, "else")) {
+ if (ex[3] != 'A') {
+ syntax_err("2 else");
+ return -1;
}
- dlist_add(&expect, "}");
- dlist_add(&expect, 0);
- dlist_add(&expect, "{");
+ free(dlist_pop(expect));
+ add = "fi\0B";
+ }
+ }
+ }
- // Expecting NULL will take any otherwise unrecognized word
- } else if (expect && !ex) {
- free(dlist_pop(&expect));
- continue;
+ // Record how the previous stanza ended: ; | & ;; || && ;& ;;& |& NULL
+ *end = arg->v[arg->c];
+
+ return i;
+}
- // If we expect nothing and didn't just start a new flow control block,
- // rest of statement is a command and arguments, so stop now
- } else if (!ex) break;
+// Consume a line of shell script and do what it says. Returns 0 if finished,
+// 1 to request another line of input.
- if (add) continue;
+struct sh_parse {
+ struct double_list *pipeline, *plstart, *expect, *here;
+ char *end;
+};
- // If we got here we expect a word to end this block: is this it?
- if (!strcmp(arg->v[i], ex)) {
- free(dlist_pop(&expect));
+// pipeline and expect are scratch space, state held between calls which
+// I don't want to make global yet because this could be reentrant.
+// returns 1 to request another line (> prompt), 0 if line consumed.
+static int parse_line(char *line, struct sh_parse *sp)
+{
+ char *start = line, *delete = 0, *end, *s;
+ struct sh_arg *arg = 0;
+ struct double_list *pl;
+ long i;
- // can't "if | then" or "while && do", only ; or newline works
- if (end && !strcmp(end, ";")) {
- syntax_err("bad %s", end);
- goto flush;
- }
+ // Resume appending to last statement?
+ if (sp->pipeline) {
+ arg = (void *)sp->pipeline->prev->data;
- // if it's a multipart block, what comes next?
- if (!strcmp(s, "do")) ex = "done";
- else if (!strcmp(s, "then")) add = "fi\0A";
- // fi could have elif, which queues a then.
- } else if (!strcmp(ex, "fi")) {
- if (!strcmp(s, "elif")) {
- free(dlist_pop(&expect));
- add = "then";
- // catch duplicate else while we're here
- } else if (!strcmp(s, "else")) {
- if (ex[3] != 'A') {
- syntax_err("2 else");
- goto flush;
- }
- free(dlist_pop(&expect));
- add = "fi\0B";
- }
+ // Extend/resume quoted block
+ if (arg->c<0) {
+ start = delete = xmprintf("%s%s", arg->v[arg->c = (-arg->c)-1], start);
+ free(arg->v[arg->c]);
+ arg->v[arg->c] = 0;
+
+ // is a HERE document in progress?
+ } else if (sp->here && ((struct sh_arg *)sp->here->data)->c<0) {
+ unsigned long c;
+
+ arg = (void *)sp->here->data;
+ c = -arg->c - 1;
+
+ // HERE's arg->c < 0 means still adding to it, EOF string is last entry
+ if (!(31&c)) arg->v = xrealloc(arg->v, (32+c)*sizeof(void *));
+ if (strcmp(line, arg->v[c])) {
+ // Add this line
+ arg->v[c+1] = arg->v[c];
+ arg->v[c] = xstrdup(line);
+ arg->c--;
+ } else {
+ // EOF hit, end HERE document
+ arg->v[arg->c = c] = 0;
+ sp->here = sp->here->next;
}
+ start = 0;
}
- // Record how the previous stanza ended: ; | & ;; || && ;& ;;& |& NULL
- end = arg->v[arg->c];
}
- // Do we need more lines to finish a flow control statement?
- if (expect || paren) {
- llist_traverse(expect, free);
- return start;
+ // Parse words, assemble argv[] pipelines, check flow control and HERE docs
+ if (start) for (;;) {
+ s = 0;
+
+ // Parse next word and detect overflow (too many nested quotes).
+ if ((end = parse_word(start)) == (void *)1) goto flush;
+
+ // Extend pipeline and argv[] to store result
+ if (!arg)
+ dlist_add(&sp->pipeline, (void *)(arg = xzalloc(sizeof(struct sh_arg))));
+ if (!(31&arg->c)) arg->v = xrealloc(arg->v, (32+arg->c)*sizeof(void *));
+
+ // Do we need to request another line to finish word (find ending quote)?
+ if (!end) {
+ // Save unparsed bit of this line, we'll need to re-parse it.
+ arg->v[arg->c] = xstrndup(start, strlen(start));
+ arg->c = -(arg->c+1);
+ free(delete);
+
+ return 1;
+ }
+
+ // Did we hit the end of this line of input?
+ if (end == start) {
+ arg->v[arg->c] = 0;
+
+ // Parse flow control data from last statement
+ if (-1 == flow_control(0, arg, &sp->expect, &sp->end)) goto flush;
+
+ // Grab HERE document(s)
+ for (pl = sp->plstart ? sp->plstart : sp->pipeline; pl;
+ pl = (pl->next == sp->pipeline) ? 0 : pl->next)
+ {
+ struct sh_arg *here;
+
+ arg = (void *)pl->data;
+
+ for (i = 0; i<arg->c; i++) {
+ // find [n]<<[-] with an argument after it
+ s = arg->v[i];
+ if (*s == '{') s++;
+ while (isdigit(*s)) s++;
+ if (*arg->v[i] == '{' && *s == '}') s++;
+ if (strcmp(s, "<<") && strcmp(s, "<<-")) continue;
+ if (i+1 == arg->c) goto flush;
+
+ here = xzalloc(sizeof(struct sh_arg));
+ here->v = xzalloc(32*sizeof(void *));
+ *here->v = arg->v[++i];
+ here->c = -1;
+ }
+ }
+
+ // Stop reading.
+ break;
+ }
+
+ // ) only saved at start of a statement, else ends statement with NULL
+ if (arg->c && *start == ')') {
+ arg->v[arg->c] = 0;
+ end--;
+ if (-1 == flow_control(0, arg, &sp->expect, &sp->end)) goto flush;
+ arg = 0;
+ } else {
+ // Save argument (strdup) and check if it's special
+ s = arg->v[arg->c] = xstrndup(start, end-start);
+ if (!strchr(");|&", *start)) arg->c++;
+ else {
+ // end of statement due to flow control character.
+ s = 0;
+ if (!arg->c) goto flush;
+ if (-1 == flow_control(0, arg, &sp->expect, &sp->end)) goto flush;
+ arg = 0;
+ }
+ }
+ start = end;
}
+ free(delete);
+
+ // return if HERE document or more flow control
+ if (sp->expect || (sp->pipeline && sp->pipeline->prev->data==(void *)1))
+ return 1;
+
+ // At this point, we've don't need more input and can start executing.
+
+ // **************************** do the thing *******************************
+
+ // Now we have a complete thought and can start running stuff.
// iterate through the commands running each one
- for (pl = *pipeline; pl ; pl = (pl->next == *pipeline) ? 0 : pl->next) {
+
+ // run a pipeline of commands
+
+ for (pl = sp->pipeline; pl ; pl = (pl->next == sp->pipeline) ? 0 : pl->next) {
struct sh_process *pp = xzalloc(sizeof(struct sh_process));
- for (i = 0; i<((struct sh_arg *)pl->data)->c; i++)
- expand_arg(&pp->arg, ((struct sh_arg *)pl->data)->v[i]);
+ for (i = 0; i<arg->c; i++) expand_arg(&pp->arg, arg->v[i]);
run_command(pp);
+ llist_traverse(pp->delete, free);
}
+ s = 0;
flush:
- while ((pl = dlist_pop(pipeline))) {
+
+ if (s) syntax_err("bad %s", s);
+ while ((pl = dlist_pop(&sp->pipeline))) {
+ arg = (void *)pl->data;
+ free(pl);
+ for (i = 0; i<arg->c; i++) free(arg->v[i]);
+ free(arg->v);
+ free(arg);
+ }
+
+ while ((pl = dlist_pop(&sp->here))) {
arg = (void *)pl->data;
free(pl);
+ if (arg->c<0) arg->c = -arg->c - 1;
for (i = 0; i<arg->c; i++) free(arg->v[i]);
free(arg->v);
free(arg);
}
- *pipeline = 0;
+
+ llist_traverse(sp->expect, free);
return 0;
}
void sh_main(void)
{
- FILE *f = 0;
- char *command = 0, *old = 0;
- struct double_list *scratch = 0;
+ FILE *f;
+ struct sh_parse scratch;
+ int prompt = 0;
// Set up signal handlers and grab control of this tty.
- if (isatty(0)) toys.optflags |= FLAG_i;
- if (*toys.optargs) f = xfopen(*toys.optargs, "r");
- if (TT.command) command = parse_line(TT.command, &scratch);
- else for (;;) {
+ memset(&scratch, 0, sizeof(scratch));
+ if (TT.command) f = fmemopen(TT.command, strlen(TT.command), "r");
+ else if (*toys.optargs) f = xfopen(*toys.optargs, "r");
+ else {
+ f = stdin;
+ if (isatty(0)) toys.optflags |= FLAG_i;
+ }
+
+ for (;;) {
char *new = 0;
size_t linelen = 0;
// Prompt and read line
- if (!f) {
- char *s = getenv(command ? "PS2" : "PS1");
+ if (f == stdin) {
+ char *s = getenv(prompt ? "PS2" : "PS1");
- if (!s) s = command ? "> " : (getpid() ? "\\$ " : "# ");
+ if (!s) s = prompt ? "> " : (getpid() ? "\\$ " : "# ");
do_prompt(s);
- }
- if (1 > getline(&new, &linelen, f ? f : stdin)) break;
- if (f) TT.lineno++;
-
- // Append to unused portion of previous line if any
- if (command) {
- command = xmprintf("%s%s", command, new);
- free(old);
- free(new);
- old = command;
- } else {
- free(old);
- old = new;
- }
+ } else TT.lineno++;
+ if (1>(linelen = getline(&new, &linelen, f ? f : stdin))) break;
+ if (new[linelen-1] == '\n') new[--linelen] = 0;
// returns 0 if line consumed, command if it needs more data
- command = parse_line(old, &scratch);
+ prompt = parse_line(new, &scratch);
+ free(new);
}
- if (command) error_exit("unfinished line");
+ if (prompt) error_exit("%ld:unfinished line"+4*!TT.lineno, TT.lineno);
toys.exitval = f && ferror(f);
}