From 67bd0be1a4ed817954c9dcededf9bd9cb8c2f431 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sun, 11 Oct 2020 02:59:54 -0500 Subject: toysh: more variable/wildcard plumbing and tests. --- lib/dirtree.c | 13 +++-- lib/lib.c | 19 ++++++ lib/lib.h | 5 +- tests/sh.test | 9 +++ toys/pending/sh.c | 168 ++++++++++++++++++++++++++++++++++++++---------------- 5 files changed, 159 insertions(+), 55 deletions(-) diff --git a/lib/dirtree.c b/lib/dirtree.c index 2bd7c404..70b567d2 100644 --- a/lib/dirtree.c +++ b/lib/dirtree.c @@ -53,12 +53,13 @@ struct dirtree *dirtree_add_node(struct dirtree *parent, char *name, int flags) // Allocate/populate return structure dt = xmalloc((len = sizeof(struct dirtree)+len+1)+linklen); - memset(dt, 0, statless ? offsetof(struct dirtree, again) + memset(dt, 0, statless ? sizeof(struct dirtree)+1 : offsetof(struct dirtree, st)); dt->parent = parent; dt->again = statless ? 2 : 0; if (!statless) memcpy(&dt->st, &st, sizeof(struct stat)); - strcpy(dt->name, name ? name : ""); + if (name) strcpy(dt->name, name); + else dt->st.st_mode = S_IFDIR; if (linklen) dt->symlink = memcpy(len+(char *)dt, libbuf, linklen); return dt; @@ -142,10 +143,12 @@ int dirtree_recurse(struct dirtree *node, { struct dirtree *new, **ddt = &(node->child); struct dirent *entry; - DIR *dir; + DIR *dir = 0; - node->dirfd = dirfd; - if (node->dirfd == -1 || !(dir = fdopendir(node->dirfd))) { + // Why doesn't fdopendir() support AT_FDCWD? + if (AT_FDCWD == (node->dirfd = dirfd)) dir = opendir("."); + else if (node->dirfd != -1) dir = fdopendir(node->dirfd); + if (!dir) { if (!(flags & DIRTREE_SHUTUP)) { char *path = dirtree_path(node, 0); perror_msg_raw(path); diff --git a/lib/lib.c b/lib/lib.c index 752fd0a1..c4e70dfe 100644 --- a/lib/lib.c +++ b/lib/lib.c @@ -346,6 +346,25 @@ int stridx(char *haystack, char needle) return off-haystack; } +// Convert wc to utf8, returning bytes written. Does not null terminate. +int wctoutf8(char *s, unsigned wc) +{ + int len = (wc>0x7ff)+(wc>0xffff), mask = 12+len+!!len; + + if (wc<128) { + *s = wc; + return 1; + } else { + do { + s[1+len] = 0x80+(wc&0x3f); + wc >>= 7; + } while (len--); + *s = wc|mask; + } + + return 2+len; +} + // Convert utf8 sequence to a unicode wide character // returns bytes consumed, or -1 if err, or -2 if need more data. int utf8towc(wchar_t *wc, char *str, unsigned len) diff --git a/lib/lib.h b/lib/lib.h index db851631..6851c4aa 100644 --- a/lib/lib.h +++ b/lib/lib.h @@ -74,11 +74,11 @@ void get_optflags(void); // Don't warn about failure to stat #define DIRTREE_SHUTUP 16 // Breadth first traversal, conserves filehandles at the expense of memory -#define DIRTREE_BREADTH 32 +#define DIRTREE_BREADTH 32 // TODO not implemented yet // skip non-numeric entries #define DIRTREE_PROC 64 // Return files we can't stat -#define DIRTREE_STATLESS 128 +#define DIRTREE_STATLESS 128 // Don't look at any more files in this directory. #define DIRTREE_ABORT 256 @@ -229,6 +229,7 @@ long long xstrtol(char *str, char **end, int base); long long atolx(char *c); long long atolx_range(char *numstr, long long low, long long high); int stridx(char *haystack, char needle); +int wctoutf8(char *s, unsigned wc); int utf8towc(wchar_t *wc, char *str, unsigned len); char *strlower(char *s); char *strafter(char *haystack, char *needle); diff --git a/tests/sh.test b/tests/sh.test index f0764858..b095bb95 100644 --- a/tests/sh.test +++ b/tests/sh.test @@ -1,5 +1,9 @@ #!/bin/echo no +# TODO https://mywiki.wooledge.org/BashFAQ +# http://tiswww.case.edu/php/chet/bash/FAQ +# https://mywiki.wooledge.org/BashPitfalls#set_-euo_pipefail + # // ${#} ${#x} ${#@} ${#x[@]} ${#!} ${!#} # // ${!} ${!@} ${!@Q} ${!x} ${!x@} ${!x@Q} ${!x#} ${!x[} ${!x[*]} @@ -169,10 +173,15 @@ testing '${x#prefix}' 'x=abcde; echo ${x#abc}' 'de\n' '' '' testing '${x#short} ${x##long}' 'x=banana; echo ${x#b*n} ${x##b*n}' \ 'ana a\n' '' '' toyonly testing '${x#utf8}' 'x=aそcde; echo ${x##a?c}' 'de\n' '' '' +mkdir -p abc/def/ghi +touch www +testing 'wildcards' 'echo w[v-x]w w[x-v]w abc/*/ghi' \ + 'www w[x-v]w abc/def/ghi\n' '' '' #testing "backtick1" 'X=fred; echo `echo $x`' 'fred\n' "" "" #testing "backtick2" 'X=fred; echo `x=y; echo $x`' 'y\n' "" "" testing '$(( ) )' 'echo ab$((echo hello) | tr e x)cd' "abhxllocd\n" "" "" +testing '$((x=y)) lifetime' 'a=boing; echo $a $a$((a=4))$a $a' 'boing boing44 4\n' '' '' # Loops and flow control testing "case" 'for i in A C J B; do case "$i" in A) echo got A ;; B) echo and B ;; C) echo then C ;; *) echo default ;; esac; done' \ diff --git a/toys/pending/sh.c b/toys/pending/sh.c index 47086363..d6aaaff2 100644 --- a/toys/pending/sh.c +++ b/toys/pending/sh.c @@ -27,6 +27,7 @@ * TODO: getuid() vs geteuid() * TODO: test that $PS1 color changes work without stupid \[ \] hack * TODO: Handle embedded NUL bytes in the command line? (When/how?) + * TODO: set -e -u -o pipefail, shopt -s nullglob * * bash man page: * control operators || & && ; ;; ;& ;;& ( ) | |& @@ -249,21 +250,23 @@ static void arg_add(struct sh_arg *arg, char *data) } // add argument to an arg_list -static void push_arg(struct arg_list **list, char *arg) +static char *push_arg(struct arg_list **list, char *arg) { struct arg_list *al; - if (!list) return; - al = xmalloc(sizeof(struct arg_list)); - al->next = *list; - al->arg = arg; - *list = al; + if (list) { + al = xmalloc(sizeof(struct arg_list)); + al->next = *list; + al->arg = arg; + *list = al; + } + + return arg; } static void arg_add_del(struct sh_arg *arg, char *data,struct arg_list **delete) { - push_arg(delete, data); - arg_add(arg, data); + arg_add(arg, push_arg(delete, data)); } // return length of valid variable name @@ -782,6 +785,9 @@ int getutf8(char *s, int len, int *cc) #define WILD_SHORT 1 // else longest match #define WILD_CASE 2 // case insensitive +#define WILD_ANY 4 // advance through pattern instead of str +#define WILD_SCAN 8 // search from beginning for start/end +#define WILD_BACK 16 // search from end // Returns length of str matched by pattern, or -1 if not all pattern consumed static int wildcard_match(char *str, int len, char *pattern, int plen, struct sh_arg *deck, int flags) @@ -792,6 +798,7 @@ static int wildcard_match(char *str, int len, char *pattern, int plen, // Loop through wildcards in pattern. for (ss = pp = dd = 0; ;) { + if ((flags&WILD_ANY) && best!=-1) break; // did we consume pattern? if (pp==plen) { @@ -813,7 +820,7 @@ static int wildcard_match(char *str, int len, char *pattern, int plen, } else { c = pattern[pp++]; dd++; - if (c=='?') { + if (c=='?' || ((flags&WILD_ANY) && c=='*')) { ss += (i = getutf8(str+ss, len-ss, 0)); if (i) continue; } else if (c=='*') { @@ -837,7 +844,7 @@ static int wildcard_match(char *str, int len, char *pattern, int plen, } else if (not^(i==c)) break; } if (i) { - pp = (long)deck->v[dd++]; + pp = 1+(long)deck->v[dd++]; continue; } @@ -850,8 +857,14 @@ static int wildcard_match(char *str, int len, char *pattern, int plen, } } - // match failure, pop retry stack or return failure - // TODO: seek to next | in paren + // match failure + if (flags&WILD_ANY) { + ss = 0; + if (plen==pp) break; + continue; + } + + // pop retry stack or return failure (TODO: seek to next | in paren) while (ant.c) { if ((c = pattern[(long)deck->v[--dd]])=='*') { if (len<(ss = (long)ant.v[ant.c-2]+(long)++ant.v[ant.c-1])) ant.c -= 2; @@ -869,6 +882,28 @@ static int wildcard_match(char *str, int len, char *pattern, int plen, return best; } +static int wildcard_scan(char *s, char *pattern, struct sh_arg *deck, int flags) +{ + int ll = strlen(s), bb = flags&WILD_BACK, ii = bb ? ll-1 : 0, + pp = strlen(pattern), rc, best = -1; + + for (;;) { + rc = wildcard_match(s+ii, ll-ii, pattern, pp, deck, flags); + if (!(flags&(WILD_BACK|WILD_SCAN))) return rc; + if (rc>0 && !s[rc]) { + if ((flags&(WILD_SHORT|WILD_BACK))!=WILD_BACK) return rc; + best = ii; + } + if (bb) { + if (!ii--) return best; + } else { + if (!--ll) return -1; + s++; + } + } +} +// TODO: test that * matches "" + // skip to next slash in wildcard path, passing count active ranges. // start at pattern[off] and deck[*idx], return pattern pos and update *idx char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx, @@ -879,10 +914,10 @@ char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx, // Skip [] and nested () ranges within deck until / or NUL for (p = old = pattern+off;; p++) { - if (!*p) return p; while (*p=='/') { old = p++; + if (j && !count) return old; j = 0; } @@ -890,7 +925,7 @@ char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx, if (*idxc && p-pattern == (long)deck->v[*idx]) { if (!j++ && !count--) return old; ++*idx; - if (*p=='[') p = deck->v[(*idx)++]; + if (*p=='[') p = pattern+(long)deck->v[(*idx)++]; else if (*p=='(') while (*++p) if (p-pattern == (long)deck->v[*idx]) { ++*idx; if (*p == ')') { @@ -914,11 +949,13 @@ int do_wildcard_files(struct dirtree *node) int lvl, ll = 0, ii = 0, rc; struct sh_arg ant; + // Top level entry has no pattern in it if (!node->parent) return DIRTREE_RECURSE; // Find active pattern range - for (nn = node->parent->parent; nn; nn = nn->parent) ii++; - pattern = wildcard_path(TT.wcpat, 0, TT.wcdeck, &ll, ii)+1; + for (nn = node->parent; nn; nn = nn->parent) if (nn->parent) ii++; + pattern = wildcard_path(TT.wcpat, 0, TT.wcdeck, &ll, ii); + while (*pattern=='/') pattern++; lvl = ll; patend = wildcard_path(TT.wcpat, pattern-TT.wcpat, TT.wcdeck, &ll, 1); @@ -935,19 +972,27 @@ int do_wildcard_files(struct dirtree *node) rc = wildcard_match(node->name, strlen(node->name), pattern, patend-pattern, &ant, 0); for (ii = 0; iiv[lvl+ii] += pattern-TT.wcpat; - if (rc<0 || node->name[rc]) return 0; - // We matched: recurse or save + // Return failure or save exact match. + if (rc<0 || node->name[rc]) return 0; if (!*patend) return DIRTREE_SAVE; - if (!*wildcard_path(TT.wcpat, patend-TT.wcpat, TT.wcdeck, &ll, 0)) { - pattern = xmprintf("%s%s", node->name, patend); - rc = faccessat(dirtree_parentfd(node), pattern, F_OK, AT_SYMLINK_NOFOLLOW); - free(pattern); - return DIRTREE_SAVE*!rc; - } + // Are there more wildcards to test children against? + if (TT.wcdeck->c!=ll) return DIRTREE_RECURSE; + + // No more wildcards: check for child and return failure if it isn't there. + pattern = xmprintf("%s%s", node->name, patend); + rc = faccessat(dirtree_parentfd(node), pattern, F_OK, AT_SYMLINK_NOFOLLOW); + free(pattern); + if (rc) return 0; + + // Save child and self. (Child could be trailing / but only one saved.) + while (*patend=='/' && patend[1]) patend++; + node->child = xzalloc(sizeof(struct dirtree)+1+strlen(patend)); + node->child->parent = node; + strcpy(node->child->name, patend); - return DIRTREE_RECURSE; + return DIRTREE_SAVE; } // Record active wildcard chars in output string @@ -997,11 +1042,12 @@ static void collect_wildcards(char *new, long oo, struct sh_arg *deck) // complete [range], discard wildcards within, add [, fall through to add ] else if (cc == ']' && (bracket = *vv>>16)) { + // don't end range yet for [] or [^] if (bracket+1 == oo || (bracket+2 == oo && strchr("!^", new[oo-1]))) return; while (deck->c>1 && vv[deck->c-1]>=bracket) deck->c--; *vv &= 65535; - arg_add(deck, (void *)--bracket); + arg_add(deck, (void *)bracket); // Not a wildcard } else { @@ -1020,25 +1066,23 @@ static void wildcard_add_files(struct sh_arg *arg, char *pattern, struct sh_arg *deck, struct arg_list **delete) { struct dirtree *dt; - char *p, *pp; + char *pp; int ll = 0; // fast path: when no wildcards, add pattern verbatim collect_wildcards("", 0, deck); if (!deck->c) return arg_add(arg, pattern); - // Find leading patternless path (if any) - p = wildcard_path(TT.wcpat = pattern, 0, TT.wcdeck = deck, &ll, 0); - if ((pp = (p==pattern) ? 0 : xstrndup(pattern, p-pattern))) p++; - - // Traverse. If no match, save pattern verbatim. + // Traverse starting with leading patternless path. + pp = wildcard_path(TT.wcpat = pattern, 0, TT.wcdeck = deck, &ll, 0); + pp = (pp==pattern) ? 0 : xstrndup(pattern, pp-pattern); dt = dirtree_flagread(pp, DIRTREE_STATLESS|DIRTREE_SYMFOLLOW, do_wildcard_files); free(pp); deck->c = 0; - if (!dt) return arg_add(arg, pattern); - // traverse dirtree via child and parent pointers, consuming/freeing nodes + // If no match save pattern, else free tree saving each path found. + if (!dt) return arg_add(arg, pattern); while (dt) { while (dt->child) dt = dt->child; arg_add(arg, dirtree_path(dt, 0)); @@ -1226,12 +1270,9 @@ dprintf(2, "TODO: do math for %.*s\n", kk, s); // special case: normal varname followed by @} or *} = prefix list if (ss[jj] == '*' || (ss[jj] == '@' && !isalpha(ss[jj+1]))) { - for (slice++, kk = 0; kkarg!=ifs) + push_arg(delete, ifs = xstrdup(ifs)); + if (dd != (ll = wctoutf8(buf, ll))) { + if (ddarg = xrealloc(ifs, strlen(ifs)+1+dd-ll); + memmove(ifs+yy+dd-ll, ifs+yy+ll, strlen(ifs+yy+ll)+1); + } + memcpy(ss = ifs+yy, buf, dd = ll); + } + } + if (!xx) break; + ss += dd; + yy -= dd; + } + } else if (0<(dd = wildcard_scan(ifs, s, &wild, + WILD_SHORT*!xx+WILD_BACK*(*slice=='%')))) + { + if (*slice == '#') ifs += dd; + else if (ifs[dd]) { + if (*delete && (*delete)->arg==ifs) ifs[dd] = 0; + else push_arg(delete, ifs = xstrndup(ifs, dd)); + } + } free(s); free(wild.v); - if (dd>0) ifs += dd; +// } else if (*slice=='/') { +//murgle // TODO test x can be @ or * } else { @@ -1382,7 +1457,6 @@ barf: // ${x/pat/sub} substitute ${x//pat/sub} global ${x/#pat/sub} begin // ${x/%pat/sub} end ${x/pat} delete pat // x can be @ or * -// ${x^pat} ${x^^pat} uppercase/g ${x,} ${x,,} lowercase/g (no pat = ?) // ${x@QEPAa} Q=$'blah' E=blah without the $'' wrap, P=expand as $PS1 // A=declare that recreates var a=attribute flags // x can be @* @@ -1624,8 +1698,7 @@ static int expand_arg(struct sh_arg *arg, char *old, unsigned flags, } // Save result, aborting on expand error - push_arg(delete, ss); - if (expand_arg_nobrace(arg, ss, flags, delete, 0)) { + if (expand_arg_nobrace(arg, push_arg(delete, ss), flags, delete, 0)) { llist_traverse(blist, free); return 1; @@ -2793,8 +2866,7 @@ dprintf(2, "TODO skipped init for((;;)), need math parser\n"); if ((err = expand_arg_nobrace(&arg, *vv++, NO_SPLIT, &blk->fdelete, &arg2))) break; s = arg.c ? *arg.v : ""; - match = wildcard_match(blk->fvar, strlen(blk->fvar), s, strlen(s), - &arg2, 0); + match = wildcard_scan(blk->fvar, s, &arg2, 0); if (match>=0 && !s[match]) break; else if (**vv++ == ')') { vv = 0; -- cgit v1.2.3