aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Chu <andychu@google.com>2016-03-15 13:42:30 -0700
committerRob Landley <rob@landley.net>2016-03-16 11:59:56 -0500
commit14c91c1ebd68daacce9794cf8894dcfea68efd7b (patch)
tree052b6e72b9e3fa926e70a5f01690863836a8d2c6
parent2665cd0cf0d1116fae397d5b598a5ae1bd055afa (diff)
downloadtoybox-14c91c1ebd68daacce9794cf8894dcfea68efd7b.tar.gz
Fix the operator precedence in expr.
expr now uses the precedence table specified by POSIX, implemented using the "precedence climbing" algorithm. See the references at the top of eval_expr(). This fixes 3 of 4 failing tests. I also added more tests for correct behavior and for syntax errors. This includes a new test exposing a segfault, related to type coercion.
-rwxr-xr-xtests/expr.test33
-rw-r--r--toys/pending/expr.c135
2 files changed, 106 insertions, 62 deletions
diff --git a/tests/expr.test b/tests/expr.test
index c95d0301..69e4bc4d 100755
--- a/tests/expr.test
+++ b/tests/expr.test
@@ -17,5 +17,38 @@ testing "% * same priority" "expr 3 % 2 \* 4" "4\n" "" ""
testing "* % same priority" "expr 3 \* 2 % 4" "2\n" "" ""
testing "= > same priority" "expr 0 = 2 \> 3" "0\n" "" ""
testing "> = same priority" "expr 3 \> 2 = 1" "1\n" "" ""
+
+testing "/ by zero" "expr 1 / 0; echo \$?" "2\n" "" ""
+testing "% by zero" "expr 1 % 0; echo \$?" "2\n" "" ""
+
+testing "regex position" "expr ab21xx : '[^0-9]*[0-9]*'" "4\n" "" ""
+testing "regex extraction" "expr ab21xx : '[^0-9]*\([0-9]*\).*'" "21\n" "" ""
+testing "regex no match" "expr ab21xx : x" "0\n" "" ""
+
+# result of ':' regex match can subsequently be used for arithmetic
testing "string becomes integer" "expr ab21xx : '[^0-9]*\([0-9]*\)' + 3" \
"24\n" "" ""
+
+testing "integer comparison" "expr -3 \< -2" "1\n" "" ""
+testing "string comparison" "expr -3 \< -2s" "0\n" "" ""
+testing "integer expr comparison" "expr 2 - 5 \< -2" "1\n" "" ""
+# result of arithmetic can subsequently be compared as a string
+testing "string expr comparison" "expr 2 - 5 \< -2s" "0\n" "" ""
+
+testing "parens around literal" "expr \( a \)" "a\n" "" ""
+
+testing "exit code when true" "expr a; echo \$?" "a\n0\n" "" ""
+testing "exit code when false" "expr 0; echo \$?" "0\n1\n" "" ""
+testing "exit code with syntax error" "expr \(; echo \$?" "2\n" "" ""
+testing "exit code when evaluating to 0" "expr -1 + 1; echo \$?" "0\n1\n" "" ""
+
+# BUG: segfaults because '3' is coerced to integer and regexc gets NULL
+testing "regex segfault" "expr 3 : '\(.\)'" "3\n" "" ""
+
+# syntax errors
+testing "no expression" "expr; echo \$?" "2\n" "" ""
+testing "empty ()" "expr \( \); echo \$?" "2\n" "" ""
+testing "missing )" "expr \( 1; echo \$?" "2\n" "" ""
+testing "missing outer )" "expr \( 1 + \( 2 \* 3 \); echo \$?" "2\n" "" ""
+testing "bad operator" "expr 1 @ 2; echo \$?" "2\n" "" ""
+testing "adjacent literals" "expr 1 2; echo \$?" "2\n" "" ""
diff --git a/toys/pending/expr.c b/toys/pending/expr.c
index 763ad02d..ac7edace 100644
--- a/toys/pending/expr.c
+++ b/toys/pending/expr.c
@@ -45,9 +45,8 @@ config EXPR
#define FOR_expr
#include "toys.h"
-
GLOBALS(
- int argidx;
+ char* tok; // current token, not on the stack since recursive calls mutate it
)
// Scalar value.
@@ -87,6 +86,7 @@ static void re(struct value *lhs, struct value *rhs)
regmatch_t rm[2];
xregcomp(&rp, rhs->s, 0);
+ // BUG: lhs->s is NULL when it looks like an integer, causing a segfault.
if (!regexec(&rp, lhs->s, 2, rm, 0) && rm[0].rm_so == 0) {
if (rp.re_nsub > 0 && rm[1].rm_so >= 0)
lhs->s = xmprintf("%.*s", rm[1].rm_eo - rm[1].rm_so, lhs->s+rm[1].rm_so);
@@ -183,91 +183,102 @@ static void or(struct value *lhs, struct value *rhs)
if (is_zero(lhs)) *lhs = *rhs;
}
-static void get_value(struct value *v)
+// Converts an arg string to a value struct. Assumes arg != NULL.
+static void parse_value(char* arg, struct value *v)
{
- char *endp, *arg;
-
- if (TT.argidx == toys.optc) {
- v->i = 0;
- v->s = ""; // signal end of expression
- return;
- }
-
-// can't happen, the increment is after the == test
-// if (TT.argidx >= toys.optc) error_exit("syntax error");
-
- arg = toys.optargs[TT.argidx++];
-
+ char *endp;
v->i = strtoll(arg, &endp, 10);
+ // if non-NULL, there's still stuff left, and it's a string. Otherwise no
+ // string.
v->s = *endp ? arg : NULL;
}
-// check if v matches a token, and consume it if so
-static int match(struct value *v, char *tok)
-{
- if (v->s && !strcmp(v->s, tok)) {
- get_value(v);
- return 1;
- }
-
- return 0;
+void syntax_error(char *msg, ...) {
+ if (0) { // detailed message for debugging. TODO: add CFG_ var to enable
+ va_list va;
+ va_start(va, msg);
+ verror_msg(msg, 0, va);
+ va_end(va);
+ xexit();
+ } else
+ error_exit("syntax error");
}
-// operators in order of increasing precedence
+// operators grouped by precedence
static struct op {
char *tok;
-
+ char prec;
// calculate "lhs op rhs" (e.g. lhs + rhs) and store result in lhs
void (*calc)(struct value *lhs, struct value *rhs);
-} ops[] = {
- {"|", or }, {"&", and }, {"=", eq }, {"==", eq }, {">", gt },
- {">=", gte }, {"<", lt }, {"<=", lte }, {"!=", ne }, {"+", add },
- {"-", sub }, {"*", mul }, {"/", divi}, {"%", mod }, {":", re },
- {"(", NULL}, // special case - must be last
+} OPS[] = {
+ {"|", 1, or },
+ {"&", 2, and },
+ {"=", 3, eq }, {"==", 3, eq }, {">", 3, gt }, {">=", 3, gte },
+ {"<", 3, lt }, {"<=", 3, lte }, {"!=", 3, ne },
+ {"+", 4, add }, {"-", 4, sub },
+ {"*", 5, mul }, {"/", 5, divi }, {"%", 5, mod },
+ {":", 6, re },
+ {"", 0, NULL}, // sentinel
};
-// "|,&,= ==> >=< <= !=,+-,*/%,:"
+// Point TT.tok at the next token. It's NULL when there are no more tokens.
+void advance() {
+ TT.tok = *toys.optargs++;
+}
-static void parse_op(struct value *lhs, struct value *tok, struct op *op)
+// Evalute a compound expression, setting 'ret'.
+//
+// This function uses the recursive "Precedence Climbing" algorithm:
+//
+// Clarke, Keith. "The top-down parsing of expressions." University of London.
+// Queen Mary College. Department of Computer Science and Statistics, 1986.
+//
+// http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
+//
+// Nice explanation and Python implementation:
+// http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
+static void eval_expr(struct value *ret, int min_prec)
{
- if (!op) op = ops;
-
- // special case parsing for parentheses
- if (*op->tok == '(') {
- if (match(tok, "(")) {
- parse_op(lhs, tok, 0);
- if (!match(tok, ")")) error_exit("syntax error"); // missing closing paren
- } else {
- // tok is a string or integer - return it and get the next token
- *lhs = *tok;
- get_value(tok);
- }
-
- return;
+ if (!TT.tok) syntax_error("Unexpected end of input");
+
+ // Evaluate LHS atom, setting 'ret'.
+ if (!strcmp(TT.tok, "(")) { // parenthesized expression
+ advance(); // consume (
+ eval_expr(ret, 1); // We're inside ( ), so start with min_prec = 1
+ if (!TT.tok) syntax_error("Expected )");
+ if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok);
+ advance(); // consume )
+ } else { // simple literal
+ parse_value(TT.tok, ret);
+ advance();
}
- parse_op(lhs, tok, op + 1);
- while (match(tok, op->tok)) {
- struct value rhs;
- parse_op(&rhs, tok, op + 1);
- if (rhs.s && !*rhs.s) error_exit("syntax error"); // premature end of expression
- op->calc(lhs, &rhs);
+ // Evaluate RHS and apply operator until precedence is too low.
+ struct value rhs;
+ while (TT.tok) {
+ struct op *o = OPS;
+ while (o->calc) { // Look up the precedence of operator TT.tok
+ if (!strcmp(TT.tok, o->tok)) break;
+ o++;
+ }
+ if (!o->calc) break; // Not an operator (extra input will fail later)
+ if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
+ advance();
+
+ eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
+ o->calc(ret, &rhs); // Apply operator, setting 'ret'.
}
}
void expr_main(void)
{
- struct value tok, ret = {0};
-
+ struct value ret = {0};
toys.exitval = 2; // if exiting early, indicate invalid expression
- TT.argidx = 0;
-
- get_value(&tok); // warm up the parser with the initial value
- parse_op(&ret, &tok, 0);
+ advance(); // initialize global token
+ eval_expr(&ret, 1);
- // final token should be end of expression
- if (!tok.s || *tok.s) error_exit("syntax error");
+ if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok);
if (ret.s) printf("%s\n", ret.s);
else printf("%lld\n", ret.i);