aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2014-11-12 14:47:57 -0600
committerRob Landley <rob@landley.net>2014-11-12 14:47:57 -0600
commitcb652fa275c1806277ff07a98c864dc8c0335e0f (patch)
treeccd2d8fba2c503c32f5468f5633ec5a9dcf94a81
parent6e087a3a6fb420efc5cb77e13818668212034225 (diff)
downloadtoybox-cb652fa275c1806277ff07a98c864dc8c0335e0f.tar.gz
Implement more sed plumbing, including s/// (without \1 or & yet).
-rw-r--r--toys/pending/sed.c304
1 files changed, 210 insertions, 94 deletions
diff --git a/toys/pending/sed.c b/toys/pending/sed.c
index 551aff9d..3d3dcbd3 100644
--- a/toys/pending/sed.c
+++ b/toys/pending/sed.c
@@ -179,10 +179,8 @@ struct step {
// Begin and end of each match
long lmatch[2];
- int rmatch[2]; // offset to regex_t, because realloc() would confuse pointer
+ int rmatch[2], arg1, arg2, w; // offsets because remalloc()
unsigned not, hit, sflags;
-
- int arg1, arg2, arg3; // offset start of to argument (string or regex_t)
char c; // action
};
@@ -208,6 +206,12 @@ static int ghostwheel(regex_t *preg, char *string, long len, int nmatch,
regmatch_t pmatch[], int eflags)
{
/*
+ while (len && !*string) {
+ string++;
+ len--;
+ int l = strlen(string);
+ if (len != strlen(string))
+
// todo: this
long start = 0, rc = 0, matches = 0;
@@ -221,8 +225,8 @@ static int ghostwheel(regex_t *preg, char *string, long len, int nmatch,
return rc;
*/
- return regexec(preg, string, nmatch, pmatch, eflags);
+ return regexec(preg, string, nmatch, pmatch, eflags);
}
// Extend allocation to include new string, with newline between if newlen<0
@@ -248,7 +252,7 @@ static void walk_pattern(char **pline, long plen)
struct append *next, *prev;
int file;
char *str;
- } *append;
+ } *append = 0;
char *line = TT.nextline;
long len = TT.nextlen;
struct step *logrus;
@@ -269,6 +273,7 @@ static void walk_pattern(char **pline, long plen)
logrus = TT.restart ? TT.restart : (void *)TT.pattern;
TT.restart = 0;
+
while (logrus) {
char *str, c = logrus->c;
@@ -311,6 +316,7 @@ static void walk_pattern(char **pline, long plen)
if (logrus->c == '}') curly--;
}
}
+ logrus = logrus->next;
continue;
}
// Deferred disable from regex end match
@@ -324,10 +330,14 @@ static void walk_pattern(char **pline, long plen)
a->str = logrus->arg1+(char *)logrus;
a->file = c== 'r';
dlist_add_nomalloc((void *)&append, (void *)a);
- } else if (c=='b' || c=='t' || c=='T') {
- if (c=='b' || tea^(c=='T')) {
+ } else if (c=='b' || c=='t' || c=='T') {
+ int t = tea;
+
+ if (c != 'b') tea = 0;
+ if (c=='b' || t^(c=='T')) {
str = logrus->arg1+(char *)logrus;
+
if (!*str) break;
for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next)
if (logrus->c == ':' && !strcmp(logrus->arg1+(char *)logrus, str))
@@ -389,18 +399,76 @@ static void walk_pattern(char **pline, long plen)
} else if (c=='p') {
if (emit(line, len, eol)) break;
} else if (c=='q') break;
-// else if (c=='s') {
-// tea = 1;
-// }
- else if (c=='w') {
- int fd = TT.fdout, noeol = TT.noeol;
-
- TT.fdout = logrus->arg2;
- TT.noeol = logrus->arg3;
-
+ else if (c=='s') {
+ char *rline = line, *new = logrus->arg2 + (char *)logrus;;
+ regmatch_t *match = (void *)toybuf;
+ regex_t *reg = (void *)(logrus->arg1+(char *)logrus);
+ unsigned mflags = 0, i = 0, rlen = len, mlen, off, newlen;
+
+ // Find match
+ while (!ghostwheel(reg, rline, rlen, 10, match, mflags)) {
+ mflags = REG_NOTBOL;
+
+ // Zero length matches don't count immediately after last match
+ mlen = match[0].rm_eo-match[0].rm_so;
+ if (!mlen && !match[0].rm_so) {
+ if (!rlen--) break;
+ rline++;
+ continue;
+ }
+
+ // Replace only a specific match?
+ off = logrus->sflags>>3;
+ if (off && off != ++i) {
+ rline += match[0].rm_eo;
+ rlen -= match[0].rm_eo;
+
+ continue;
+ }
+
+ // The strlen() and memcpy don't support \1 or & yet.
+ newlen = strlen(new);
+
+ len = len-mlen+newlen;
+ off = rline-line;
+ line = xrealloc(line, len+1);
+ rline = line+off;
+
+ rlen -= match[0].rm_eo;
+ memmove(rline+match[0].rm_so+newlen, rline+match[0].rm_eo, rlen+1);
+ memcpy(rline+match[0].rm_so, new, newlen);
+ rline += newlen+match[0].rm_so;
+
+ // flag g
+ if (!(logrus->sflags & 2)) break;
+ }
+
+ if (mflags) {
+ // flag p
+ if (logrus->sflags & 4) emit(line, len, eol);
+
+ tea = 1;
+ if (logrus->w) goto writenow;
+ }
+ } else if (c=='w') {
+ int fd, noeol;
+ char *name;
+
+writenow:
+ // Swap out emit() context
+ fd = TT.fdout;
+ noeol = TT.noeol;
+
+ // We save filehandle and newline status before filename
+ name = logrus->w + (char *)logrus;
+ memcpy(&TT.fdout, name, 4);
+ name += 4;
+ TT.noeol = *(name++);
+
+ // write, then save/restore context
if (emit(line, len, eol))
perror_exit("w '%s'", logrus->arg1+(char *)logrus);
- logrus->arg3 = TT.noeol;
+ *(--name) = TT.noeol;
TT.noeol = noeol;
TT.fdout = fd;
} else if (c=='x') {
@@ -411,9 +479,18 @@ static void walk_pattern(char **pline, long plen)
line = str;
TT.rememberlen = len;
len = swap;
-// } else if (c=='y') {
+ } else if (c=='y') {
+ char *from, *to = (char *)logrus;
+ int i, j;
+
+ from = to+logrus->arg1;
+ to += logrus->arg2;
+
+ for (i = 0; i < len; i++) {
+ j = stridx(from, line[i]);
+ if (j != -1) line[i] = to[j];
+ }
} else if (c=='=') xprintf("%ld\n", TT.count);
- // labcirstTwy
else if (c!=':') error_exit("todo: %c", c);
logrus = logrus->next;
@@ -499,47 +576,50 @@ static void do_sed(int fd, char *name)
}
}
-// Note: removing backslash escapes edits the source string, which could
-// be from the environment space via -e, which could screw up what
-// "ps" sees, and I'm ok with that.
+// Note: removing backslash escapes and null terminating edits the source
+// string, which could be from the environment space via -e, which could
+// screw up what "ps" sees, and I'm ok with that. (Modifying the environment
+// space like that means sed is very, very not reentrant.)
+
+// Ok, what happens if we xexec() sed with constant arguments then?
+// TODO: ^^^ that
-// extract regex up to delimeter, converting \escapes
-// You can't use \ as delimiter because how would you escape anything?
-static int parse_regex(regex_t *reg, char **pstr, char delim)
+// returns length of processed string, *pstr advances to next unused char,
+// if delim (or *delim) is 0 uses starting char as delimiter, otherwise
+// parses and saves delimiter from first character(s)
+static int unescape_delimited_string(char **pstr, char *delim)
{
- char *to, *from = *pstr;
+ char *to, *from = *pstr, d;
int rc;
- if (delim == '\\') rc = 0;
- else {
- for (to = from; *from != delim; *(to++) = *(from++)) {
- if (!*from) break;
- if (*from == '\\') {
- if (!from[1]) break;
-
- // Check escaped end delimiter before printf style escapes.
- if (from[1] == delim) from++;
- else {
- char c = unescape(from[1]);
-
- if (c) {
- *to = c;
- from++;
- }
+ if (!delim || !*delim) {
+ if (!(d = *(from++))) return -1;
+ if (d == '\\') d = *(from++);
+ if (!d || d == '\\') return -1;
+ if (delim) *delim = d;
+ } else d = *delim;
+
+ for (to = *pstr; *from != d; *(to++) = *(from++)) {
+ if (!*from) return -1;
+ if (*from == '\\') {
+ if (!from[1]) return -1;
+
+ // Check escaped end delimiter before printf style escapes.
+ if (from[1] == d) from++;
+ else {
+ char c = unescape(from[1]);
+
+ if (c) {
+ *to = c;
+ from++;
}
}
}
- rc = (*from == delim);
}
+ rc = to-*pstr;
+ *to = 0;
+ *pstr = from+1;
- if (rc) {
- delim = *to;
- *to = 0;
- xregcomp(reg, *pstr, ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB);
- *to = delim;
- }
- *pstr = from + rc;
-
return rc;
}
@@ -587,14 +667,11 @@ static void jewel_of_judgement(char **pline, long len)
corwin->lmatch[i] = -1;
line++;
} else if (*line == '/' || *line == '\\') {
- char delim = *(line++);
-
- if (delim == '\\') {
- if (!*line) goto brand;
- delim = *(line++);
- }
+ char *s = line;
- if (!parse_regex((void *)reg, &line, delim)) goto brand;
+ if (-1 == unescape_delimited_string(&line, 0)) goto brand;
+ xregcomp((void *)reg, s,
+ ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB);
corwin->rmatch[i] = reg-toybuf;
reg += sizeof(regex_t);
} else break;
@@ -603,7 +680,10 @@ static void jewel_of_judgement(char **pline, long len)
while (isspace(*line)) line++;
if (!*line) break;
- while (*line == '!') corwin->not = 1;
+ while (*line == '!') {
+ corwin->not = 1;
+ line++;
+ }
while (isspace(*line)) line++;
c = corwin->c = *(line++);
@@ -620,55 +700,89 @@ static void jewel_of_judgement(char **pline, long len)
else if (c == '}') {
if (!TT.nextlen--) break;
} else if (c == 's') {
- char delim = *line;
+ void *merlin, *fiona;
int end;
+ char delim = 0;
// s/pattern/replacement/flags
- if (delim) line++;
- else break;
-
- // get pattern
- end = reg - (char *)corwin;
- corwin = xrealloc(corwin, end+sizeof(regex_t));
- reg = end + (char *)corwin;
- if (!parse_regex((void *)reg, &line, delim)) break;
- corwin->arg1 = reg-toybuf;
- reg += sizeof(regex_t);
+ // get pattern (just record, we parse it later)
+ corwin->arg1 = reg - (char *)corwin;
+ merlin = line;
+ if (-1 == unescape_delimited_string(&line, &delim)) goto brand;
// get replacement
+ fiona = line;
+ if (-1 == (end = unescape_delimited_string(&line, &delim))) goto brand;
+ corwin->arg2 = corwin->arg1 + sizeof(regex_t);
+ reg = extend_string((void *)&corwin, fiona, corwin->arg2, end);
- for (end = 0; line[end] != delim; end++) {
- if (line[end] == '\\') end++;
- if (!line[end]) goto brand;
- }
-
- corwin->arg2 = reg - (char*)corwin;
- reg = extend_string((void *)&corwin, line, corwin->arg2, end);
- line += end+1;
-
- // parse s///flags
- for (;;) {
+ // get flags
+ for (; *line && *line != ';'; line++) {
long l;
- line++;
- if (!*line || *line == ';') break;
if (isspace(*line)) continue;
- if (0 <= (l = stridx("gpi", *line))) corwin->sflags |= 1<<l;
- else if (!corwin->sflags >> 4 && 0<(l = strtol(line+end, &line, 10))) {
- corwin->sflags |= l << 4;
+ if (0 <= (l = stridx("igp", *line))) corwin->sflags |= 1<<l;
+ else if (!corwin->sflags >> 3 && 0<(l = strtol(line+end, &line, 10))) {
+ corwin->sflags |= l << 3;
line--;
- } else if (*line == 'w') {
- while (isspace(*++line));
- if (!*line) goto brand;
- corwin->arg3 = reg - (char *)corwin;
- reg = extend_string((void *)&corwin, line, corwin->arg3,
- strlen(line));
- } else goto brand;
+ } else if (*line == 'w') break;
+ else goto brand;
+ }
+
+ // We deferred actually parsing the rexex until we had the s///i flag
+ // allocating the space was done by extend_string() above
+ xregcomp((void *)(corwin->arg1 + (char *)corwin), merlin,
+ ((toys.optflags & FLAG_r)*REG_EXTENDED)|((corwin->sflags&1)*REG_ICASE));
+ if (*line == 'w') {
+ line++;
+ goto writenow;
}
+ } else if (c == 'w') {
+ int fd, delim;
+ char *cc;
+
+ // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
+ // eol status, and to retain the filename for error messages, we'd need
+ // to go up to arg5 just for this. Compromise: dynamically allocate the
+ // filehandle and eol status.
+
+writenow:
+ while (isspace(*line)) line++;
+ if (!*line) goto brand;
+ for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
+ delim = *cc;
+ *cc = 0;
+ fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0544);
+ *cc = delim;
+
+ delim = cc-line;
+ corwin->w = reg - (char *)corwin;
+ corwin = xrealloc(corwin, corwin->w + delim + 6);
+ reg = corwin->w + (char *)corwin;
+
+ memcpy(reg, &fd, 4);
+ reg += 4;
+ *(reg++) = 0;
+ memcpy(reg, line, delim);
+ reg += delim;
+ *(reg++) = 0;
+
+ line = cc;
+ if (delim) line += 2;
} else if (c == 'y') {
- // y/old/new/
+ char *s = line, delim = 0;
+ int len1, len2;
+
+ if (-1 == (len1 = unescape_delimited_string(&line, &delim))) goto brand;
+ corwin->arg1 = reg-(char *)corwin;
+ reg = extend_string((void *)&corwin, s, reg-(char *)corwin, len1);
+ s = line;
+ corwin->arg2 = reg-(char *)corwin;
+ if (-1 == (len2 = unescape_delimited_string(&line, &delim))) goto brand;
+ if (len1 != len2) goto brand;
+ reg = extend_string((void *)&corwin, s, reg-(char*)corwin, len2);
} else if (strchr("abcirtTw:", c)) {
int end, class;
@@ -742,4 +856,6 @@ void sed_main(void)
loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
if (!(toys.optflags & FLAG_i)) walk_pattern(0, 0);
+
+ // todo: need to close fd when done for TOYBOX_FREE?
}