aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2020-10-11 02:59:54 -0500
committerRob Landley <rob@landley.net>2020-10-11 02:59:54 -0500
commit67bd0be1a4ed817954c9dcededf9bd9cb8c2f431 (patch)
tree4df0aa1044af467469e77c757bbf58dffe2f6178
parent0f2658c806586190be3aca21826e77fff9e50f1b (diff)
downloadtoybox-67bd0be1a4ed817954c9dcededf9bd9cb8c2f431.tar.gz
toysh: more variable/wildcard plumbing and tests.
-rw-r--r--lib/dirtree.c13
-rw-r--r--lib/lib.c19
-rw-r--r--lib/lib.h5
-rw-r--r--tests/sh.test9
-rw-r--r--toys/pending/sh.c168
5 files changed, 159 insertions, 55 deletions
diff --git a/lib/dirtree.c b/lib/dirtree.c
index 2bd7c404..70b567d2 100644
--- a/lib/dirtree.c
+++ b/lib/dirtree.c
@@ -53,12 +53,13 @@ struct dirtree *dirtree_add_node(struct dirtree *parent, char *name, int flags)
// Allocate/populate return structure
dt = xmalloc((len = sizeof(struct dirtree)+len+1)+linklen);
- memset(dt, 0, statless ? offsetof(struct dirtree, again)
+ memset(dt, 0, statless ? sizeof(struct dirtree)+1
: offsetof(struct dirtree, st));
dt->parent = parent;
dt->again = statless ? 2 : 0;
if (!statless) memcpy(&dt->st, &st, sizeof(struct stat));
- strcpy(dt->name, name ? name : "");
+ if (name) strcpy(dt->name, name);
+ else dt->st.st_mode = S_IFDIR;
if (linklen) dt->symlink = memcpy(len+(char *)dt, libbuf, linklen);
return dt;
@@ -142,10 +143,12 @@ int dirtree_recurse(struct dirtree *node,
{
struct dirtree *new, **ddt = &(node->child);
struct dirent *entry;
- DIR *dir;
+ DIR *dir = 0;
- node->dirfd = dirfd;
- if (node->dirfd == -1 || !(dir = fdopendir(node->dirfd))) {
+ // Why doesn't fdopendir() support AT_FDCWD?
+ if (AT_FDCWD == (node->dirfd = dirfd)) dir = opendir(".");
+ else if (node->dirfd != -1) dir = fdopendir(node->dirfd);
+ if (!dir) {
if (!(flags & DIRTREE_SHUTUP)) {
char *path = dirtree_path(node, 0);
perror_msg_raw(path);
diff --git a/lib/lib.c b/lib/lib.c
index 752fd0a1..c4e70dfe 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -346,6 +346,25 @@ int stridx(char *haystack, char needle)
return off-haystack;
}
+// Convert wc to utf8, returning bytes written. Does not null terminate.
+int wctoutf8(char *s, unsigned wc)
+{
+ int len = (wc>0x7ff)+(wc>0xffff), mask = 12+len+!!len;
+
+ if (wc<128) {
+ *s = wc;
+ return 1;
+ } else {
+ do {
+ s[1+len] = 0x80+(wc&0x3f);
+ wc >>= 7;
+ } while (len--);
+ *s = wc|mask;
+ }
+
+ return 2+len;
+}
+
// Convert utf8 sequence to a unicode wide character
// returns bytes consumed, or -1 if err, or -2 if need more data.
int utf8towc(wchar_t *wc, char *str, unsigned len)
diff --git a/lib/lib.h b/lib/lib.h
index db851631..6851c4aa 100644
--- a/lib/lib.h
+++ b/lib/lib.h
@@ -74,11 +74,11 @@ void get_optflags(void);
// Don't warn about failure to stat
#define DIRTREE_SHUTUP 16
// Breadth first traversal, conserves filehandles at the expense of memory
-#define DIRTREE_BREADTH 32
+#define DIRTREE_BREADTH 32 // TODO not implemented yet
// skip non-numeric entries
#define DIRTREE_PROC 64
// Return files we can't stat
-#define DIRTREE_STATLESS 128
+#define DIRTREE_STATLESS 128
// Don't look at any more files in this directory.
#define DIRTREE_ABORT 256
@@ -229,6 +229,7 @@ long long xstrtol(char *str, char **end, int base);
long long atolx(char *c);
long long atolx_range(char *numstr, long long low, long long high);
int stridx(char *haystack, char needle);
+int wctoutf8(char *s, unsigned wc);
int utf8towc(wchar_t *wc, char *str, unsigned len);
char *strlower(char *s);
char *strafter(char *haystack, char *needle);
diff --git a/tests/sh.test b/tests/sh.test
index f0764858..b095bb95 100644
--- a/tests/sh.test
+++ b/tests/sh.test
@@ -1,5 +1,9 @@
#!/bin/echo no
+# TODO https://mywiki.wooledge.org/BashFAQ
+# http://tiswww.case.edu/php/chet/bash/FAQ
+# https://mywiki.wooledge.org/BashPitfalls#set_-euo_pipefail
+
# // ${#} ${#x} ${#@} ${#x[@]} ${#!} ${!#}
# // ${!} ${!@} ${!@Q} ${!x} ${!x@} ${!x@Q} ${!x#} ${!x[} ${!x[*]}
@@ -169,10 +173,15 @@ testing '${x#prefix}' 'x=abcde; echo ${x#abc}' 'de\n' '' ''
testing '${x#short} ${x##long}' 'x=banana; echo ${x#b*n} ${x##b*n}' \
'ana a\n' '' ''
toyonly testing '${x#utf8}' 'x=aそcde; echo ${x##a?c}' 'de\n' '' ''
+mkdir -p abc/def/ghi
+touch www
+testing 'wildcards' 'echo w[v-x]w w[x-v]w abc/*/ghi' \
+ 'www w[x-v]w abc/def/ghi\n' '' ''
#testing "backtick1" 'X=fred; echo `echo $x`' 'fred\n' "" ""
#testing "backtick2" 'X=fred; echo `x=y; echo $x`' 'y\n' "" ""
testing '$(( ) )' 'echo ab$((echo hello) | tr e x)cd' "abhxllocd\n" "" ""
+testing '$((x=y)) lifetime' 'a=boing; echo $a $a$((a=4))$a $a' 'boing boing44 4\n' '' ''
# Loops and flow control
testing "case" 'for i in A C J B; do case "$i" in A) echo got A ;; B) echo and B ;; C) echo then C ;; *) echo default ;; esac; done' \
diff --git a/toys/pending/sh.c b/toys/pending/sh.c
index 47086363..d6aaaff2 100644
--- a/toys/pending/sh.c
+++ b/toys/pending/sh.c
@@ -27,6 +27,7 @@
* TODO: getuid() vs geteuid()
* TODO: test that $PS1 color changes work without stupid \[ \] hack
* TODO: Handle embedded NUL bytes in the command line? (When/how?)
+ * TODO: set -e -u -o pipefail, shopt -s nullglob
*
* bash man page:
* control operators || & && ; ;; ;& ;;& ( ) | |& <newline>
@@ -249,21 +250,23 @@ static void arg_add(struct sh_arg *arg, char *data)
}
// add argument to an arg_list
-static void push_arg(struct arg_list **list, char *arg)
+static char *push_arg(struct arg_list **list, char *arg)
{
struct arg_list *al;
- if (!list) return;
- al = xmalloc(sizeof(struct arg_list));
- al->next = *list;
- al->arg = arg;
- *list = al;
+ if (list) {
+ al = xmalloc(sizeof(struct arg_list));
+ al->next = *list;
+ al->arg = arg;
+ *list = al;
+ }
+
+ return arg;
}
static void arg_add_del(struct sh_arg *arg, char *data,struct arg_list **delete)
{
- push_arg(delete, data);
- arg_add(arg, data);
+ arg_add(arg, push_arg(delete, data));
}
// return length of valid variable name
@@ -782,6 +785,9 @@ int getutf8(char *s, int len, int *cc)
#define WILD_SHORT 1 // else longest match
#define WILD_CASE 2 // case insensitive
+#define WILD_ANY 4 // advance through pattern instead of str
+#define WILD_SCAN 8 // search from beginning for start/end
+#define WILD_BACK 16 // search from end
// Returns length of str matched by pattern, or -1 if not all pattern consumed
static int wildcard_match(char *str, int len, char *pattern, int plen,
struct sh_arg *deck, int flags)
@@ -792,6 +798,7 @@ static int wildcard_match(char *str, int len, char *pattern, int plen,
// Loop through wildcards in pattern.
for (ss = pp = dd = 0; ;) {
+ if ((flags&WILD_ANY) && best!=-1) break;
// did we consume pattern?
if (pp==plen) {
@@ -813,7 +820,7 @@ static int wildcard_match(char *str, int len, char *pattern, int plen,
} else {
c = pattern[pp++];
dd++;
- if (c=='?') {
+ if (c=='?' || ((flags&WILD_ANY) && c=='*')) {
ss += (i = getutf8(str+ss, len-ss, 0));
if (i) continue;
} else if (c=='*') {
@@ -837,7 +844,7 @@ static int wildcard_match(char *str, int len, char *pattern, int plen,
} else if (not^(i==c)) break;
}
if (i) {
- pp = (long)deck->v[dd++];
+ pp = 1+(long)deck->v[dd++];
continue;
}
@@ -850,8 +857,14 @@ static int wildcard_match(char *str, int len, char *pattern, int plen,
}
}
- // match failure, pop retry stack or return failure
- // TODO: seek to next | in paren
+ // match failure
+ if (flags&WILD_ANY) {
+ ss = 0;
+ if (plen==pp) break;
+ continue;
+ }
+
+ // pop retry stack or return failure (TODO: seek to next | in paren)
while (ant.c) {
if ((c = pattern[(long)deck->v[--dd]])=='*') {
if (len<(ss = (long)ant.v[ant.c-2]+(long)++ant.v[ant.c-1])) ant.c -= 2;
@@ -869,6 +882,28 @@ static int wildcard_match(char *str, int len, char *pattern, int plen,
return best;
}
+static int wildcard_scan(char *s, char *pattern, struct sh_arg *deck, int flags)
+{
+ int ll = strlen(s), bb = flags&WILD_BACK, ii = bb ? ll-1 : 0,
+ pp = strlen(pattern), rc, best = -1;
+
+ for (;;) {
+ rc = wildcard_match(s+ii, ll-ii, pattern, pp, deck, flags);
+ if (!(flags&(WILD_BACK|WILD_SCAN))) return rc;
+ if (rc>0 && !s[rc]) {
+ if ((flags&(WILD_SHORT|WILD_BACK))!=WILD_BACK) return rc;
+ best = ii;
+ }
+ if (bb) {
+ if (!ii--) return best;
+ } else {
+ if (!--ll) return -1;
+ s++;
+ }
+ }
+}
+// TODO: test that * matches ""
+
// skip to next slash in wildcard path, passing count active ranges.
// start at pattern[off] and deck[*idx], return pattern pos and update *idx
char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx,
@@ -879,10 +914,10 @@ char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx,
// Skip [] and nested () ranges within deck until / or NUL
for (p = old = pattern+off;; p++) {
-
if (!*p) return p;
while (*p=='/') {
old = p++;
+ if (j && !count) return old;
j = 0;
}
@@ -890,7 +925,7 @@ char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx,
if (*idx<deck->c && p-pattern == (long)deck->v[*idx]) {
if (!j++ && !count--) return old;
++*idx;
- if (*p=='[') p = deck->v[(*idx)++];
+ if (*p=='[') p = pattern+(long)deck->v[(*idx)++];
else if (*p=='(') while (*++p) if (p-pattern == (long)deck->v[*idx]) {
++*idx;
if (*p == ')') {
@@ -914,11 +949,13 @@ int do_wildcard_files(struct dirtree *node)
int lvl, ll = 0, ii = 0, rc;
struct sh_arg ant;
+ // Top level entry has no pattern in it
if (!node->parent) return DIRTREE_RECURSE;
// Find active pattern range
- for (nn = node->parent->parent; nn; nn = nn->parent) ii++;
- pattern = wildcard_path(TT.wcpat, 0, TT.wcdeck, &ll, ii)+1;
+ for (nn = node->parent; nn; nn = nn->parent) if (nn->parent) ii++;
+ pattern = wildcard_path(TT.wcpat, 0, TT.wcdeck, &ll, ii);
+ while (*pattern=='/') pattern++;
lvl = ll;
patend = wildcard_path(TT.wcpat, pattern-TT.wcpat, TT.wcdeck, &ll, 1);
@@ -935,19 +972,27 @@ int do_wildcard_files(struct dirtree *node)
rc = wildcard_match(node->name, strlen(node->name), pattern, patend-pattern,
&ant, 0);
for (ii = 0; ii<ant.c; ii++) TT.wcdeck->v[lvl+ii] += pattern-TT.wcpat;
- if (rc<0 || node->name[rc]) return 0;
- // We matched: recurse or save
+ // Return failure or save exact match.
+ if (rc<0 || node->name[rc]) return 0;
if (!*patend) return DIRTREE_SAVE;
- if (!*wildcard_path(TT.wcpat, patend-TT.wcpat, TT.wcdeck, &ll, 0)) {
- pattern = xmprintf("%s%s", node->name, patend);
- rc = faccessat(dirtree_parentfd(node), pattern, F_OK, AT_SYMLINK_NOFOLLOW);
- free(pattern);
- return DIRTREE_SAVE*!rc;
- }
+ // Are there more wildcards to test children against?
+ if (TT.wcdeck->c!=ll) return DIRTREE_RECURSE;
+
+ // No more wildcards: check for child and return failure if it isn't there.
+ pattern = xmprintf("%s%s", node->name, patend);
+ rc = faccessat(dirtree_parentfd(node), pattern, F_OK, AT_SYMLINK_NOFOLLOW);
+ free(pattern);
+ if (rc) return 0;
+
+ // Save child and self. (Child could be trailing / but only one saved.)
+ while (*patend=='/' && patend[1]) patend++;
+ node->child = xzalloc(sizeof(struct dirtree)+1+strlen(patend));
+ node->child->parent = node;
+ strcpy(node->child->name, patend);
- return DIRTREE_RECURSE;
+ return DIRTREE_SAVE;
}
// Record active wildcard chars in output string
@@ -997,11 +1042,12 @@ static void collect_wildcards(char *new, long oo, struct sh_arg *deck)
// complete [range], discard wildcards within, add [, fall through to add ]
else if (cc == ']' && (bracket = *vv>>16)) {
+
// don't end range yet for [] or [^]
if (bracket+1 == oo || (bracket+2 == oo && strchr("!^", new[oo-1]))) return;
while (deck->c>1 && vv[deck->c-1]>=bracket) deck->c--;
*vv &= 65535;
- arg_add(deck, (void *)--bracket);
+ arg_add(deck, (void *)bracket);
// Not a wildcard
} else {
@@ -1020,25 +1066,23 @@ static void wildcard_add_files(struct sh_arg *arg, char *pattern,
struct sh_arg *deck, struct arg_list **delete)
{
struct dirtree *dt;
- char *p, *pp;
+ char *pp;
int ll = 0;
// fast path: when no wildcards, add pattern verbatim
collect_wildcards("", 0, deck);
if (!deck->c) return arg_add(arg, pattern);
- // Find leading patternless path (if any)
- p = wildcard_path(TT.wcpat = pattern, 0, TT.wcdeck = deck, &ll, 0);
- if ((pp = (p==pattern) ? 0 : xstrndup(pattern, p-pattern))) p++;
-
- // Traverse. If no match, save pattern verbatim.
+ // Traverse starting with leading patternless path.
+ pp = wildcard_path(TT.wcpat = pattern, 0, TT.wcdeck = deck, &ll, 0);
+ pp = (pp==pattern) ? 0 : xstrndup(pattern, pp-pattern);
dt = dirtree_flagread(pp, DIRTREE_STATLESS|DIRTREE_SYMFOLLOW,
do_wildcard_files);
free(pp);
deck->c = 0;
- if (!dt) return arg_add(arg, pattern);
- // traverse dirtree via child and parent pointers, consuming/freeing nodes
+ // If no match save pattern, else free tree saving each path found.
+ if (!dt) return arg_add(arg, pattern);
while (dt) {
while (dt->child) dt = dt->child;
arg_add(arg, dirtree_path(dt, 0));
@@ -1226,12 +1270,9 @@ dprintf(2, "TODO: do math for %.*s\n", kk, s);
// special case: normal varname followed by @} or *} = prefix list
if (ss[jj] == '*' || (ss[jj] == '@' && !isalpha(ss[jj+1]))) {
- for (slice++, kk = 0; kk<TT.varslen; kk++) {
- if (!strncmp(s = TT.vars[kk].str, ss, jj)) {
- arg_add(&aa, s = xstrndup(s, stridx(s, '=')));
- push_arg(delete, s);
- }
- }
+ for (slice++, kk = 0; kk<TT.varslen; kk++)
+ if (!strncmp(s = TT.vars[kk].str, ss, jj))
+ arg_add(&aa, push_arg(delete, s = xstrndup(s, stridx(s, '='))));
if (aa.c) push_arg(delete, (void *)aa.v);
// else dereference to get new varname, discarding if none, check err
@@ -1363,13 +1404,47 @@ barf:
// ${x#y} remove shortest prefix ${x##y} remove longest prefix
} else if (strchr("#%^,", *slice)) {
struct sh_arg wild = {0};
+ char buf[8];
s = slashcopy(slice+(xx = slice[1]==*slice)+1, '}', &wild);
- dd = wildcard_match(ifs, strlen(ifs), s, strlen(s), &wild,
- WILD_SHORT*!xx);
+
+ // ${x^pat} ${x^^pat} uppercase ${x,} ${x,,} lowercase (no pat = ?)
+ if (strchr("^,", *slice)) {
+ for (ss = ifs; *ss; ss += dd) {
+ dd = getutf8(ss, 4, &jj);
+ if (0<wildcard_scan(ss, s, &wild, WILD_ANY)) {
+ ll = ((*slice=='^') ? towupper : towlower)(jj);
+
+ // Of COURSE unicode case switch can change utf8 encoding length
+ if (ll != jj) {
+ yy = ss-ifs;
+ if (!*delete || (*delete)->arg!=ifs)
+ push_arg(delete, ifs = xstrdup(ifs));
+ if (dd != (ll = wctoutf8(buf, ll))) {
+ if (dd<ll)
+ ifs = (*delete)->arg = xrealloc(ifs, strlen(ifs)+1+dd-ll);
+ memmove(ifs+yy+dd-ll, ifs+yy+ll, strlen(ifs+yy+ll)+1);
+ }
+ memcpy(ss = ifs+yy, buf, dd = ll);
+ }
+ }
+ if (!xx) break;
+ ss += dd;
+ yy -= dd;
+ }
+ } else if (0<(dd = wildcard_scan(ifs, s, &wild,
+ WILD_SHORT*!xx+WILD_BACK*(*slice=='%'))))
+ {
+ if (*slice == '#') ifs += dd;
+ else if (ifs[dd]) {
+ if (*delete && (*delete)->arg==ifs) ifs[dd] = 0;
+ else push_arg(delete, ifs = xstrndup(ifs, dd));
+ }
+ }
free(s);
free(wild.v);
- if (dd>0) ifs += dd;
+// } else if (*slice=='/') {
+//murgle
// TODO test x can be @ or *
} else {
@@ -1382,7 +1457,6 @@ barf:
// ${x/pat/sub} substitute ${x//pat/sub} global ${x/#pat/sub} begin
// ${x/%pat/sub} end ${x/pat} delete pat
// x can be @ or *
-// ${x^pat} ${x^^pat} uppercase/g ${x,} ${x,,} lowercase/g (no pat = ?)
// ${x@QEPAa} Q=$'blah' E=blah without the $'' wrap, P=expand as $PS1
// A=declare that recreates var a=attribute flags
// x can be @*
@@ -1624,8 +1698,7 @@ static int expand_arg(struct sh_arg *arg, char *old, unsigned flags,
}
// Save result, aborting on expand error
- push_arg(delete, ss);
- if (expand_arg_nobrace(arg, ss, flags, delete, 0)) {
+ if (expand_arg_nobrace(arg, push_arg(delete, ss), flags, delete, 0)) {
llist_traverse(blist, free);
return 1;
@@ -2793,8 +2866,7 @@ dprintf(2, "TODO skipped init for((;;)), need math parser\n");
if ((err = expand_arg_nobrace(&arg, *vv++, NO_SPLIT, &blk->fdelete,
&arg2))) break;
s = arg.c ? *arg.v : "";
- match = wildcard_match(blk->fvar, strlen(blk->fvar), s, strlen(s),
- &arg2, 0);
+ match = wildcard_scan(blk->fvar, s, &arg2, 0);
if (match>=0 && !s[match]) break;
else if (**vv++ == ')') {
vv = 0;