aboutsummaryrefslogtreecommitdiff
path: root/toys/pending/expr.c
blob: 25aa3cd6d8f2914868737604b1e9845ab3c79323 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/* expr.c - evaluate expression
 *
 * Copyright 2016 Google Inc.
 * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
 *
 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
 *
 * The web standard is incomplete (precedence grouping missing), see:
 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
 *
 * eval_expr() uses the recursive "Precedence Climbing" algorithm:
 *
 * Clarke, Keith. "The top-down parsing of expressions." University of London.
 * Queen Mary College. Department of Computer Science and Statistics, 1986.
 *
 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
 *
 * Nice explanation and Python implementation:
 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing

USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))

config EXPR
  bool "expr"
  default n
  help
    usage: expr ARG1 OPERATOR ARG2...

    Evaluate expression and print result. For example, "expr 1 + 2".

    The supported operators are (grouped from highest to lowest priority):

      ( )    :    * / %    + -    != <= < >= > =    &    |

    Each constant and operator must be a separate command line argument.
    All operators are infix, meaning they expect a constant (or expression
    that resolves to a constant) on each side of the operator. Operators of
    the same priority (within each group above) are evaluated left to right.
    Parentheses may be used (as separate arguments) to elevate the priority
    of expressions.

    Calling expr from a command shell requires a lot of \( or '*' escaping
    to avoid interpreting shell control characters.

    The & and | operators are logical (not bitwise) and may operate on
    strings (a blank string is "false"). Comparison operators may also
    operate on strings (alphabetical sort).

    Constants may be strings or integers. Comparison, logical, and regex
    operators may operate on strings (a blank string is "false"), other
    operators require integers.
*/

// TODO: int overflow checking

#define FOR_expr
#include "toys.h"

GLOBALS(
  char **tok; // current token, not on the stack since recursive calls mutate it
)

// Scalar value.  If s != NULL, it's a string, otherwise it's an int.
struct value {
  char *s;
  long long i;
};

// Get the value as a string.
char *get_str(struct value *v)
{
  if (v->s) return v->s;
  else return xmprintf("%lld", v->i);
}

// Get the value as an integer and return 1, or return 0 on error.
int get_int(struct value *v, long long *ret)
{
  if (v->s) {
    char *endp;

    *ret = strtoll(v->s, &endp, 10);

    if (*endp) return 0; // If endp points to NUL, all chars were converted
  } else *ret = v->i;

  return 1;
}

// Preserve the invariant that v.s is NULL when the value is an integer.
void assign_int(struct value *v, long long i)
{
  v->i = i;
  v->s = NULL;
}

// Check if v is 0 or the empty string.
static int is_false(struct value *v)
{
  if (v->s) return !*v->s || !strcmp(v->s, "0");  // get_int("0") -> 0
  return !v->i;
}

// 'ret' is filled with a string capture or int match position.
static void re(char *target, char *pattern, struct value *ret)
{
  regex_t pat;
  regmatch_t m[2];

  xregcomp(&pat, pattern, 0);
  if (!regexec(&pat, target, 2, m, 0) && m[0].rm_so == 0) { // match at pos 0
    regmatch_t *g1 = &m[1]; // group capture 1
    if (pat.re_nsub > 0 && g1->rm_so >= 0) // has capture
      ret->s = xmprintf("%.*s", g1->rm_eo - g1->rm_so, target + g1->rm_so);
    else assign_int(ret, m[0].rm_eo);
  } else { // no match
    // has capture
    if (pat.re_nsub > 0) ret->s = "";
    else assign_int(ret, 0);
  }
}

// 4 different signatures of operators.  S = string, I = int, SI = string or
// int.
enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };

enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };

// operators grouped by precedence
static struct op_def {
  char *tok;
  char prec, sig, op; // precedence, signature for type coercion, operator ID
} OPS[] = {
  // logical ops, precedence 1 and 2, signature SI_TO_SI
  {"|", 1, SI_TO_SI, OR  },
  {"&", 2, SI_TO_SI, AND },
  // comparison ops, precedence 3, signature SI_TO_I
  {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
  {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
  {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE }, 
  // arithmetic ops, precedence 4 and 5, signature I_TO_I
  {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
  {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
  // regex match, precedence 6, signature S_TO_SI
  {":", 6, S_TO_SI, RE },
  {NULL, 0, 0, 0}, // sentinel
};

void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
{
  long long a, b, x = 0; // x = a OP b for ints.
  char *s, *t; // string operands
  int cmp;

  switch (o->sig) {

  case SI_TO_SI:
    switch (o->op) {
    case OR:  if (is_false(ret)) *ret = *rhs; break;
    case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
    }
    break;  

  case SI_TO_I:
    if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
      cmp = a - b;
    } else { // otherwise compare both as strings
      cmp = strcmp(s = get_str(ret), t = get_str(rhs));
      if (ret->s != s) free(s);
      if (rhs->s != t) free(t);
    }
    switch (o->op) {
    case EQ:  x = cmp == 0; break;
    case NE:  x = cmp != 0; break;
    case GT:  x = cmp >  0; break;
    case GTE: x = cmp >= 0; break;
    case LT:  x = cmp <  0; break;
    case LTE: x = cmp <= 0; break;
    }
    assign_int(ret, x);
    break;

  case I_TO_I:
    if (!get_int(ret, &a) || !get_int(rhs, &b))
      error_exit("non-integer argument");
    switch (o->op) {
    case ADD: x = a + b; break;
    case SUB: x = a - b; break;
    case MUL: x = a * b; break;
    case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
    case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
    }
    assign_int(ret, x);
    break;

  case S_TO_SI: // op == RE
    s = get_str(ret);
    cmp = ret->s!=s; // ret overwritten by re so check now
    re(s, t = get_str(rhs), ret);
    if (cmp) free(s);
    if (rhs->s!=t) free(t);
    break;
  }
}

// Evalute a compound expression using recursive "Precedence Climbing"
// algorithm, setting 'ret'.
static void eval_expr(struct value *ret, int min_prec)
{
  if (!*TT.tok) error_exit("Unexpected end of input");

  // Evaluate LHS atom, setting 'ret'.
  if (!strcmp(*TT.tok, "(")) { // parenthesized expression
    TT.tok++;  // consume (

    eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
    if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
    if (!*TT.tok) error_exit("Expected )");
    if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
  } else ret->s = *TT.tok;  // simple literal, all values start as strings
  TT.tok++;

  // Evaluate RHS and apply operator until precedence is too low.
  struct value rhs;
  while (*TT.tok) {
    struct op_def *o = OPS;

    while (o->tok) { // Look up operator
      if (!strcmp(*TT.tok, o->tok)) break;
      o++;
    }
    if (!o->tok) break; // Not an operator (extra input will fail later)
    if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
    TT.tok++;

    eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
    eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
  }
}

void expr_main(void)
{
  struct value ret = {0};

  toys.exitval = 2; // if exiting early, indicate error
  TT.tok = toys.optargs; // initialize global token
  eval_expr(&ret, 1);
  if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);

  if (ret.s) printf("%s\n", ret.s);
  else printf("%lld\n", ret.i);

  exit(is_false(&ret));
}