From e6912f90d663120b32b894d1f10a0d8cd530e2e7 Mon Sep 17 00:00:00 2001 From: Andy Chu Date: Tue, 15 Mar 2016 13:52:25 -0700 Subject: Fix type coercion bugs in expr. All tests pass now; this fixes the 2 remaining failures, including a segfault. The structure of the code has changed a lot -- instead of having a tiny function per operator, we have eval_op() which does common type coercion and then evaluates the operator. I tried writing it a couple different ways, and this was the cleanest. The OPS table now contains the operator string, precedence level, signature for type coercion, and operator ID. --- toys/pending/expr.c | 290 +++++++++++++++++++++++++--------------------------- 1 file changed, 139 insertions(+), 151 deletions(-) (limited to 'toys') diff --git a/toys/pending/expr.c b/toys/pending/expr.c index ac7edace..71d5d192 100644 --- a/toys/pending/expr.c +++ b/toys/pending/expr.c @@ -1,5 +1,6 @@ /* expr.c - evaluate expression * + * Copyright 2016 Google Inc. * Copyright 2013 Daniel Verkamp * * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html @@ -49,178 +50,166 @@ GLOBALS( char* tok; // current token, not on the stack since recursive calls mutate it ) -// Scalar value. -// If s is NULL, the value is an integer (i). -// If s is not NULL, the value is a string (s). +// Scalar value. If s != NULL, it's a string, otherwise it's an int. struct value { char *s; long long i; }; -// check if v is the integer 0 or the empty string -static int is_zero(struct value *v) -{ - return v->s ? !*v->s : !v->i; +void syntax_error(char *msg, ...) { + if (0) { // detailed message for debugging. TODO: add CFG_ var to enable + va_list va; + va_start(va, msg); + verror_msg(msg, 0, va); + va_end(va); + xexit(); + } else + error_exit("syntax error"); } -static char *num_to_str(long long num) -{ - static char num_buf[21]; - snprintf(num_buf, sizeof(num_buf), "%lld", num); - return num_buf; -} +#define LONG_LONG_MAX_LEN 21 -static int cmp(struct value *lhs, struct value *rhs) +// Get the value as a string. +void get_str(struct value *v, char** ret) { - if (lhs->s || rhs->s) { - // at least one operand is a string - char *ls = lhs->s ? lhs->s : num_to_str(lhs->i); - char *rs = rhs->s ? rhs->s : num_to_str(rhs->i); - return strcmp(ls, rs); - } else return lhs->i - rhs->i; + if (v->s) + *ret = v->s; + else { + *ret = xmalloc(LONG_LONG_MAX_LEN); + snprintf(*ret, LONG_LONG_MAX_LEN, "%lld", v->i); + } } -static void re(struct value *lhs, struct value *rhs) +// Get the value as an integer and return 1, or return 0 on error. +int get_int(struct value *v, long long *ret) { - regex_t rp; - regmatch_t rm[2]; - - xregcomp(&rp, rhs->s, 0); - // BUG: lhs->s is NULL when it looks like an integer, causing a segfault. - if (!regexec(&rp, lhs->s, 2, rm, 0) && rm[0].rm_so == 0) { - if (rp.re_nsub > 0 && rm[1].rm_so >= 0) - lhs->s = xmprintf("%.*s", rm[1].rm_eo - rm[1].rm_so, lhs->s+rm[1].rm_so); - else { - lhs->i = rm[0].rm_eo; - lhs->s = 0; - } + if (v->s) { + char *endp; + *ret = strtoll(v->s, &endp, 10); + return *endp ? 0 : 1; // If endp points to NUL, all chars were converted } else { - if (!rp.re_nsub) { - lhs->i = 0; - lhs->s = 0; - } else lhs->s = ""; + *ret = v->i; + return 1; } } -static void mod(struct value *lhs, struct value *rhs) -{ - if (lhs->s || rhs->s) error_exit("non-integer argument"); - if (is_zero(rhs)) error_exit("division by zero"); - lhs->i %= rhs->i; -} - -static void divi(struct value *lhs, struct value *rhs) -{ - if (lhs->s || rhs->s) error_exit("non-integer argument"); - if (is_zero(rhs)) error_exit("division by zero"); - lhs->i /= rhs->i; -} - -static void mul(struct value *lhs, struct value *rhs) -{ - if (lhs->s || rhs->s) error_exit("non-integer argument"); - lhs->i *= rhs->i; -} - -static void sub(struct value *lhs, struct value *rhs) -{ - if (lhs->s || rhs->s) error_exit("non-integer argument"); - lhs->i -= rhs->i; -} - -static void add(struct value *lhs, struct value *rhs) -{ - if (lhs->s || rhs->s) error_exit("non-integer argument"); - lhs->i += rhs->i; -} - -static void ne(struct value *lhs, struct value *rhs) -{ - lhs->i = cmp(lhs, rhs) != 0; - lhs->s = NULL; -} - -static void lte(struct value *lhs, struct value *rhs) -{ - lhs->i = cmp(lhs, rhs) <= 0; - lhs->s = NULL; -} - -static void lt(struct value *lhs, struct value *rhs) -{ - lhs->i = cmp(lhs, rhs) < 0; - lhs->s = NULL; -} - -static void gte(struct value *lhs, struct value *rhs) +// Preserve the invariant that v.s is NULL when the value is an integer. +void assign_int(struct value *v, long long i) { - lhs->i = cmp(lhs, rhs) >= 0; - lhs->s = NULL; + v->i = i; + v->s = NULL; } -static void gt(struct value *lhs, struct value *rhs) +// Check if v is 0 or the empty string. +static int is_false(struct value *v) { - lhs->i = cmp(lhs, rhs) > 0; - lhs->s = NULL; + if (v->s) + return !*v->s || !strcmp(v->s, "0"); // get_int("0") -> 0 + else + return !v->i; } -static void eq(struct value *lhs, struct value *rhs) +// 'ret' is filled with a string capture or int match position. +static void re(char *target, char *pattern, struct value *ret) { - lhs->i = !cmp(lhs, rhs); - lhs->s = NULL; -} - -static void and(struct value *lhs, struct value *rhs) -{ - if (is_zero(lhs) || is_zero(rhs)) { - lhs->i = 0; - lhs->s = NULL; + regex_t pat; + regmatch_t m[2]; + + xregcomp(&pat, pattern, 0); + if (!regexec(&pat, target, 2, m, 0) && m[0].rm_so == 0) { // match at pos 0 + regmatch_t* g1 = &m[1]; // group capture 1 + if (pat.re_nsub > 0 && g1->rm_so >= 0) // has capture + ret->s = xmprintf("%.*s", g1->rm_eo - g1->rm_so, target + g1->rm_so); + else + assign_int(ret, m[0].rm_eo); + } else { // no match + if (pat.re_nsub > 0) // has capture + ret->s = ""; + else + assign_int(ret, 0); } } -static void or(struct value *lhs, struct value *rhs) -{ - if (is_zero(lhs)) *lhs = *rhs; -} +// 4 different signatures of operators. S = string, I = int, SI = string or +// int. +enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI }; -// Converts an arg string to a value struct. Assumes arg != NULL. -static void parse_value(char* arg, struct value *v) -{ - char *endp; - v->i = strtoll(arg, &endp, 10); - // if non-NULL, there's still stuff left, and it's a string. Otherwise no - // string. - v->s = *endp ? arg : NULL; -} - -void syntax_error(char *msg, ...) { - if (0) { // detailed message for debugging. TODO: add CFG_ var to enable - va_list va; - va_start(va, msg); - verror_msg(msg, 0, va); - va_end(va); - xexit(); - } else - error_exit("syntax error"); -} +enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE }; // operators grouped by precedence -static struct op { +static struct op_def { char *tok; - char prec; - // calculate "lhs op rhs" (e.g. lhs + rhs) and store result in lhs - void (*calc)(struct value *lhs, struct value *rhs); + char prec, sig, op; // precedence, signature for type coercion, operator ID } OPS[] = { - {"|", 1, or }, - {"&", 2, and }, - {"=", 3, eq }, {"==", 3, eq }, {">", 3, gt }, {">=", 3, gte }, - {"<", 3, lt }, {"<=", 3, lte }, {"!=", 3, ne }, - {"+", 4, add }, {"-", 4, sub }, - {"*", 5, mul }, {"/", 5, divi }, {"%", 5, mod }, - {":", 6, re }, - {"", 0, NULL}, // sentinel + // logical ops, precedence 1 and 2, signature SI_TO_SI + {"|", 1, SI_TO_SI, OR }, + {"&", 2, SI_TO_SI, AND }, + // comparison ops, precedence 3, signature SI_TO_I + {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE }, + {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE }, + {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE }, + // arithmetic ops, precedence 4 and 5, signature I_TO_I + {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB }, + {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD }, + // regex match, precedence 6, signature S_TO_SI + {":", 6, S_TO_SI, RE }, + {NULL, 0, 0, 0}, // sentinel }; +void eval_op(struct op_def *o, struct value *ret, struct value *rhs) { + long long a, b, x = 0; // x = a OP b for ints. + char *s, *t; // string operands + int cmp; + + switch (o->sig) { + + case SI_TO_SI: + switch (o->op) { + case OR: if (is_false(ret)) *ret = *rhs; break; + case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break; + } + break; + + case SI_TO_I: + if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints + cmp = a - b; + } else { // otherwise compare both as strings + get_str(ret, &s); + get_str(rhs, &t); + cmp = strcmp(s, t); + } + switch (o->op) { + case EQ: x = cmp == 0; break; + case NE: x = cmp != 0; break; + case GT: x = cmp > 0; break; + case GTE: x = cmp >= 0; break; + case LT: x = cmp < 0; break; + case LTE: x = cmp <= 0; break; + } + assign_int(ret, x); + break; + + case I_TO_I: + if (!get_int(ret, &a) || !get_int(rhs, &b)) + error_exit("non-integer argument"); + switch (o->op) { + case ADD: x = a + b; break; + case SUB: x = a - b; break; + case MUL: x = a * b; break; + case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break; + case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break; + } + assign_int(ret, x); + break; + + case S_TO_SI: // op == RE + get_str(ret, &s); + get_str(rhs, &t); + re(s, t, ret); + break; + } +} + // Point TT.tok at the next token. It's NULL when there are no more tokens. void advance() { TT.tok = *toys.optargs++; @@ -243,45 +232,44 @@ static void eval_expr(struct value *ret, int min_prec) // Evaluate LHS atom, setting 'ret'. if (!strcmp(TT.tok, "(")) { // parenthesized expression - advance(); // consume ( - eval_expr(ret, 1); // We're inside ( ), so start with min_prec = 1 + advance(); // consume ( + eval_expr(ret, 1); // We're inside ( ), so min_prec = 1 if (!TT.tok) syntax_error("Expected )"); if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok); - advance(); // consume ) - } else { // simple literal - parse_value(TT.tok, ret); + advance(); // consume ) + } else { // simple literal + ret->s = TT.tok; // all values start as strings advance(); } // Evaluate RHS and apply operator until precedence is too low. struct value rhs; while (TT.tok) { - struct op *o = OPS; - while (o->calc) { // Look up the precedence of operator TT.tok + struct op_def *o = OPS; + while (o->tok) { // Look up operator if (!strcmp(TT.tok, o->tok)) break; o++; } - if (!o->calc) break; // Not an operator (extra input will fail later) + if (!o->tok) break; // Not an operator (extra input will fail later) if (o->prec < min_prec) break; // Precedence too low, pop a stack frame advance(); eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence - o->calc(ret, &rhs); // Apply operator, setting 'ret'. + eval_op(o, ret, &rhs); // Apply operator, setting 'ret' } } void expr_main(void) { struct value ret = {0}; - toys.exitval = 2; // if exiting early, indicate invalid expression + toys.exitval = 2; // if exiting early, indicate error advance(); // initialize global token eval_expr(&ret, 1); - if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok); if (ret.s) printf("%s\n", ret.s); else printf("%lld\n", ret.i); - exit(is_zero(&ret)); + exit(is_false(&ret)); } -- cgit v1.2.3