aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Chu <andychu@google.com>2016-03-15 13:52:25 -0700
committerRob Landley <rob@landley.net>2016-03-16 11:59:59 -0500
commite6912f90d663120b32b894d1f10a0d8cd530e2e7 (patch)
tree08dda2bf28fd14768ddfa4f4e490539dcb631af3
parent14c91c1ebd68daacce9794cf8894dcfea68efd7b (diff)
downloadtoybox-e6912f90d663120b32b894d1f10a0d8cd530e2e7.tar.gz
Fix type coercion bugs in expr.
All tests pass now; this fixes the 2 remaining failures, including a segfault. The structure of the code has changed a lot -- instead of having a tiny function per operator, we have eval_op() which does common type coercion and then evaluates the operator. I tried writing it a couple different ways, and this was the cleanest. The OPS table now contains the operator string, precedence level, signature for type coercion, and operator ID.
-rwxr-xr-xtests/expr.test7
-rw-r--r--toys/pending/expr.c290
2 files changed, 144 insertions, 153 deletions
diff --git a/tests/expr.test b/tests/expr.test
index 69e4bc4d..19db19eb 100755
--- a/tests/expr.test
+++ b/tests/expr.test
@@ -5,9 +5,10 @@
testing "integer" "expr 5" "5\n" "" ""
testing "integer negative" "expr -5" "-5\n" "" ""
testing "string" "expr astring" "astring\n" "" ""
-testing "1 + 3" "expr 1 + 3" "4\n" "" ""
+testing "addition" "expr 1 + 3" "4\n" "" ""
testing "5 + 6 * 3" "expr 5 + 6 \* 3" "23\n" "" ""
testing "( 5 + 6 ) * 3" "expr \( 5 + 6 \) \* 3" "33\n" "" ""
+testing ">" "expr 3 \> 2" "1\n" "" ""
testing "* / same priority" "expr 4 \* 3 / 2" "6\n" "" ""
testing "/ * same priority" "expr 3 / 2 \* 4" "4\n" "" ""
testing "& before |" "expr 0 \| 1 \& 0" "0\n" "" ""
@@ -24,6 +25,7 @@ testing "% by zero" "expr 1 % 0; echo \$?" "2\n" "" ""
testing "regex position" "expr ab21xx : '[^0-9]*[0-9]*'" "4\n" "" ""
testing "regex extraction" "expr ab21xx : '[^0-9]*\([0-9]*\).*'" "21\n" "" ""
testing "regex no match" "expr ab21xx : x" "0\n" "" ""
+testing "long str" "expr abcdefghijklmnopqrstuvwxyz : '\(.*\)' = a" "0\n" "" ""
# result of ':' regex match can subsequently be used for arithmetic
testing "string becomes integer" "expr ab21xx : '[^0-9]*\([0-9]*\)' + 3" \
@@ -31,7 +33,7 @@ testing "string becomes integer" "expr ab21xx : '[^0-9]*\([0-9]*\)' + 3" \
testing "integer comparison" "expr -3 \< -2" "1\n" "" ""
testing "string comparison" "expr -3 \< -2s" "0\n" "" ""
-testing "integer expr comparison" "expr 2 - 5 \< -2" "1\n" "" ""
+testing "integer expression comparison" "expr 2 - 5 \< -2" "1\n" "" ""
# result of arithmetic can subsequently be compared as a string
testing "string expr comparison" "expr 2 - 5 \< -2s" "0\n" "" ""
@@ -52,3 +54,4 @@ testing "missing )" "expr \( 1; echo \$?" "2\n" "" ""
testing "missing outer )" "expr \( 1 + \( 2 \* 3 \); echo \$?" "2\n" "" ""
testing "bad operator" "expr 1 @ 2; echo \$?" "2\n" "" ""
testing "adjacent literals" "expr 1 2; echo \$?" "2\n" "" ""
+testing "non-integer argument" "expr 1 + a; echo \$?" "2\n" "" ""
diff --git a/toys/pending/expr.c b/toys/pending/expr.c
index ac7edace..71d5d192 100644
--- a/toys/pending/expr.c
+++ b/toys/pending/expr.c
@@ -1,5 +1,6 @@
/* expr.c - evaluate expression
*
+ * Copyright 2016 Google Inc.
* Copyright 2013 Daniel Verkamp <daniel@drv.nu>
*
* http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
@@ -49,178 +50,166 @@ GLOBALS(
char* tok; // current token, not on the stack since recursive calls mutate it
)
-// Scalar value.
-// If s is NULL, the value is an integer (i).
-// If s is not NULL, the value is a string (s).
+// Scalar value. If s != NULL, it's a string, otherwise it's an int.
struct value {
char *s;
long long i;
};
-// check if v is the integer 0 or the empty string
-static int is_zero(struct value *v)
-{
- return v->s ? !*v->s : !v->i;
+void syntax_error(char *msg, ...) {
+ if (0) { // detailed message for debugging. TODO: add CFG_ var to enable
+ va_list va;
+ va_start(va, msg);
+ verror_msg(msg, 0, va);
+ va_end(va);
+ xexit();
+ } else
+ error_exit("syntax error");
}
-static char *num_to_str(long long num)
-{
- static char num_buf[21];
- snprintf(num_buf, sizeof(num_buf), "%lld", num);
- return num_buf;
-}
+#define LONG_LONG_MAX_LEN 21
-static int cmp(struct value *lhs, struct value *rhs)
+// Get the value as a string.
+void get_str(struct value *v, char** ret)
{
- if (lhs->s || rhs->s) {
- // at least one operand is a string
- char *ls = lhs->s ? lhs->s : num_to_str(lhs->i);
- char *rs = rhs->s ? rhs->s : num_to_str(rhs->i);
- return strcmp(ls, rs);
- } else return lhs->i - rhs->i;
+ if (v->s)
+ *ret = v->s;
+ else {
+ *ret = xmalloc(LONG_LONG_MAX_LEN);
+ snprintf(*ret, LONG_LONG_MAX_LEN, "%lld", v->i);
+ }
}
-static void re(struct value *lhs, struct value *rhs)
+// Get the value as an integer and return 1, or return 0 on error.
+int get_int(struct value *v, long long *ret)
{
- regex_t rp;
- regmatch_t rm[2];
-
- xregcomp(&rp, rhs->s, 0);
- // BUG: lhs->s is NULL when it looks like an integer, causing a segfault.
- if (!regexec(&rp, lhs->s, 2, rm, 0) && rm[0].rm_so == 0) {
- if (rp.re_nsub > 0 && rm[1].rm_so >= 0)
- lhs->s = xmprintf("%.*s", rm[1].rm_eo - rm[1].rm_so, lhs->s+rm[1].rm_so);
- else {
- lhs->i = rm[0].rm_eo;
- lhs->s = 0;
- }
+ if (v->s) {
+ char *endp;
+ *ret = strtoll(v->s, &endp, 10);
+ return *endp ? 0 : 1; // If endp points to NUL, all chars were converted
} else {
- if (!rp.re_nsub) {
- lhs->i = 0;
- lhs->s = 0;
- } else lhs->s = "";
+ *ret = v->i;
+ return 1;
}
}
-static void mod(struct value *lhs, struct value *rhs)
-{
- if (lhs->s || rhs->s) error_exit("non-integer argument");
- if (is_zero(rhs)) error_exit("division by zero");
- lhs->i %= rhs->i;
-}
-
-static void divi(struct value *lhs, struct value *rhs)
-{
- if (lhs->s || rhs->s) error_exit("non-integer argument");
- if (is_zero(rhs)) error_exit("division by zero");
- lhs->i /= rhs->i;
-}
-
-static void mul(struct value *lhs, struct value *rhs)
-{
- if (lhs->s || rhs->s) error_exit("non-integer argument");
- lhs->i *= rhs->i;
-}
-
-static void sub(struct value *lhs, struct value *rhs)
-{
- if (lhs->s || rhs->s) error_exit("non-integer argument");
- lhs->i -= rhs->i;
-}
-
-static void add(struct value *lhs, struct value *rhs)
-{
- if (lhs->s || rhs->s) error_exit("non-integer argument");
- lhs->i += rhs->i;
-}
-
-static void ne(struct value *lhs, struct value *rhs)
-{
- lhs->i = cmp(lhs, rhs) != 0;
- lhs->s = NULL;
-}
-
-static void lte(struct value *lhs, struct value *rhs)
-{
- lhs->i = cmp(lhs, rhs) <= 0;
- lhs->s = NULL;
-}
-
-static void lt(struct value *lhs, struct value *rhs)
-{
- lhs->i = cmp(lhs, rhs) < 0;
- lhs->s = NULL;
-}
-
-static void gte(struct value *lhs, struct value *rhs)
+// Preserve the invariant that v.s is NULL when the value is an integer.
+void assign_int(struct value *v, long long i)
{
- lhs->i = cmp(lhs, rhs) >= 0;
- lhs->s = NULL;
+ v->i = i;
+ v->s = NULL;
}
-static void gt(struct value *lhs, struct value *rhs)
+// Check if v is 0 or the empty string.
+static int is_false(struct value *v)
{
- lhs->i = cmp(lhs, rhs) > 0;
- lhs->s = NULL;
+ if (v->s)
+ return !*v->s || !strcmp(v->s, "0"); // get_int("0") -> 0
+ else
+ return !v->i;
}
-static void eq(struct value *lhs, struct value *rhs)
+// 'ret' is filled with a string capture or int match position.
+static void re(char *target, char *pattern, struct value *ret)
{
- lhs->i = !cmp(lhs, rhs);
- lhs->s = NULL;
-}
-
-static void and(struct value *lhs, struct value *rhs)
-{
- if (is_zero(lhs) || is_zero(rhs)) {
- lhs->i = 0;
- lhs->s = NULL;
+ regex_t pat;
+ regmatch_t m[2];
+
+ xregcomp(&pat, pattern, 0);
+ if (!regexec(&pat, target, 2, m, 0) && m[0].rm_so == 0) { // match at pos 0
+ regmatch_t* g1 = &m[1]; // group capture 1
+ if (pat.re_nsub > 0 && g1->rm_so >= 0) // has capture
+ ret->s = xmprintf("%.*s", g1->rm_eo - g1->rm_so, target + g1->rm_so);
+ else
+ assign_int(ret, m[0].rm_eo);
+ } else { // no match
+ if (pat.re_nsub > 0) // has capture
+ ret->s = "";
+ else
+ assign_int(ret, 0);
}
}
-static void or(struct value *lhs, struct value *rhs)
-{
- if (is_zero(lhs)) *lhs = *rhs;
-}
+// 4 different signatures of operators. S = string, I = int, SI = string or
+// int.
+enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
-// Converts an arg string to a value struct. Assumes arg != NULL.
-static void parse_value(char* arg, struct value *v)
-{
- char *endp;
- v->i = strtoll(arg, &endp, 10);
- // if non-NULL, there's still stuff left, and it's a string. Otherwise no
- // string.
- v->s = *endp ? arg : NULL;
-}
-
-void syntax_error(char *msg, ...) {
- if (0) { // detailed message for debugging. TODO: add CFG_ var to enable
- va_list va;
- va_start(va, msg);
- verror_msg(msg, 0, va);
- va_end(va);
- xexit();
- } else
- error_exit("syntax error");
-}
+enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
// operators grouped by precedence
-static struct op {
+static struct op_def {
char *tok;
- char prec;
- // calculate "lhs op rhs" (e.g. lhs + rhs) and store result in lhs
- void (*calc)(struct value *lhs, struct value *rhs);
+ char prec, sig, op; // precedence, signature for type coercion, operator ID
} OPS[] = {
- {"|", 1, or },
- {"&", 2, and },
- {"=", 3, eq }, {"==", 3, eq }, {">", 3, gt }, {">=", 3, gte },
- {"<", 3, lt }, {"<=", 3, lte }, {"!=", 3, ne },
- {"+", 4, add }, {"-", 4, sub },
- {"*", 5, mul }, {"/", 5, divi }, {"%", 5, mod },
- {":", 6, re },
- {"", 0, NULL}, // sentinel
+ // logical ops, precedence 1 and 2, signature SI_TO_SI
+ {"|", 1, SI_TO_SI, OR },
+ {"&", 2, SI_TO_SI, AND },
+ // comparison ops, precedence 3, signature SI_TO_I
+ {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE },
+ {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
+ {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
+ // arithmetic ops, precedence 4 and 5, signature I_TO_I
+ {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB },
+ {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
+ // regex match, precedence 6, signature S_TO_SI
+ {":", 6, S_TO_SI, RE },
+ {NULL, 0, 0, 0}, // sentinel
};
+void eval_op(struct op_def *o, struct value *ret, struct value *rhs) {
+ long long a, b, x = 0; // x = a OP b for ints.
+ char *s, *t; // string operands
+ int cmp;
+
+ switch (o->sig) {
+
+ case SI_TO_SI:
+ switch (o->op) {
+ case OR: if (is_false(ret)) *ret = *rhs; break;
+ case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
+ }
+ break;
+
+ case SI_TO_I:
+ if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
+ cmp = a - b;
+ } else { // otherwise compare both as strings
+ get_str(ret, &s);
+ get_str(rhs, &t);
+ cmp = strcmp(s, t);
+ }
+ switch (o->op) {
+ case EQ: x = cmp == 0; break;
+ case NE: x = cmp != 0; break;
+ case GT: x = cmp > 0; break;
+ case GTE: x = cmp >= 0; break;
+ case LT: x = cmp < 0; break;
+ case LTE: x = cmp <= 0; break;
+ }
+ assign_int(ret, x);
+ break;
+
+ case I_TO_I:
+ if (!get_int(ret, &a) || !get_int(rhs, &b))
+ error_exit("non-integer argument");
+ switch (o->op) {
+ case ADD: x = a + b; break;
+ case SUB: x = a - b; break;
+ case MUL: x = a * b; break;
+ case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
+ case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break;
+ }
+ assign_int(ret, x);
+ break;
+
+ case S_TO_SI: // op == RE
+ get_str(ret, &s);
+ get_str(rhs, &t);
+ re(s, t, ret);
+ break;
+ }
+}
+
// Point TT.tok at the next token. It's NULL when there are no more tokens.
void advance() {
TT.tok = *toys.optargs++;
@@ -243,45 +232,44 @@ static void eval_expr(struct value *ret, int min_prec)
// Evaluate LHS atom, setting 'ret'.
if (!strcmp(TT.tok, "(")) { // parenthesized expression
- advance(); // consume (
- eval_expr(ret, 1); // We're inside ( ), so start with min_prec = 1
+ advance(); // consume (
+ eval_expr(ret, 1); // We're inside ( ), so min_prec = 1
if (!TT.tok) syntax_error("Expected )");
if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok);
- advance(); // consume )
- } else { // simple literal
- parse_value(TT.tok, ret);
+ advance(); // consume )
+ } else { // simple literal
+ ret->s = TT.tok; // all values start as strings
advance();
}
// Evaluate RHS and apply operator until precedence is too low.
struct value rhs;
while (TT.tok) {
- struct op *o = OPS;
- while (o->calc) { // Look up the precedence of operator TT.tok
+ struct op_def *o = OPS;
+ while (o->tok) { // Look up operator
if (!strcmp(TT.tok, o->tok)) break;
o++;
}
- if (!o->calc) break; // Not an operator (extra input will fail later)
+ if (!o->tok) break; // Not an operator (extra input will fail later)
if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
advance();
eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
- o->calc(ret, &rhs); // Apply operator, setting 'ret'.
+ eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
}
}
void expr_main(void)
{
struct value ret = {0};
- toys.exitval = 2; // if exiting early, indicate invalid expression
+ toys.exitval = 2; // if exiting early, indicate error
advance(); // initialize global token
eval_expr(&ret, 1);
-
if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok);
if (ret.s) printf("%s\n", ret.s);
else printf("%lld\n", ret.i);
- exit(is_zero(&ret));
+ exit(is_false(&ret));
}