aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--toys/pending/sed.c243
1 files changed, 172 insertions, 71 deletions
diff --git a/toys/pending/sed.c b/toys/pending/sed.c
index 338680af..00ea69c4 100644
--- a/toys/pending/sed.c
+++ b/toys/pending/sed.c
@@ -82,6 +82,10 @@ GLOBALS(
// processed pattern list
struct double_list *pattern;
+
+ char *nextline;
+ long nextlen, count;
+ int fdout, noeol;
)
struct step {
@@ -93,12 +97,96 @@ struct step {
// Action
char c;
+
+ int hit;
};
+// Write out line with potential embedded NUL, handling eol/noeol
+static int emit(char *line, long len, int eol)
+{
+ if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
+ if (eol) line[len++] = '\n';
+ TT.noeol = !eol;
+ if (len != writeall(TT.fdout, line, len)) {
+ perror_msg("short write");
+
+ return 1;
+ }
+
+ return 0;
+}
+
+// Do regex matching handling embedded NUL bytes in string.
+static int ghostwheel(regex_t *preg, char *string, int nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+ // todo: this
+ return regexec(preg, string, nmatch, pmatch, eflags);
+}
+
// Apply pattern to line from input file
-static void do_line(char **pline, long len)
+static void sed_line(char **pline, long plen)
{
- printf("len=%ld line=%s\n", len, *pline);
+ char *line = TT.nextline;
+ long len = TT.nextlen;
+ struct step *logrus;
+ int eol = 0;
+
+ // Grab next line for deferred processing (EOF detection, we get a NULL
+ // pline at EOF to flush last line). Note that only end of _last_ input
+ // file matches $ (unless we're doing -i).
+ if (pline) {
+ TT.nextline = *pline;
+ TT.nextlen = plen;
+ *pline = 0;
+ }
+
+ if (!line || !len) return;
+
+ if (line[len-1] == '\n') line[--len] = eol++;
+ TT.count++;
+
+ for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next) {
+ char c = logrus->c;
+
+ // Have we got a matching range for this rule?
+ if (logrus->lmatch || *logrus->rmatch) {
+ int miss = 0;
+ long lm;
+ regex_t *rm;
+
+ // In a match that might end?
+ if (logrus->hit) {
+ if (!(lm = logrus->lmatch[1])) {
+ if (!(rm = logrus->rmatch[1])) logrus->hit = 0;
+ else {
+ // regex match end includes matching line, so defer deactivation
+ if (!ghostwheel(rm, line, 0, 0, 0)) miss = 1;
+ }
+ } else if (lm > 0 && lm < TT.count) logrus->hit = 0;
+
+ // Start a new match?
+ } else {
+ if (!(lm = *logrus->lmatch)) {
+ if (!ghostwheel(*logrus->rmatch, line, 0, 0, 0)) logrus->hit++;
+ } else if (lm == TT.count) logrus->hit++;
+ }
+
+ if (!logrus->hit) continue;
+ if (miss) logrus->hit = 0;
+ }
+
+ // Process like the wind, bullseye!
+
+ // todo: embedded NUL, eol
+ if (c == 'p') {
+ if (emit(line, len, eol)) break;
+ } else error_exit("what?");
+ }
+
+ if (!(toys.optflags & FLAG_n)) emit(line, len, eol);
+
+ free(line);
}
// Genericish function, can probably get moved to lib.c
@@ -124,8 +212,7 @@ static void do_lines(int fd, char *name, void (*call)(char **pline, long len))
// Iterate over newline delimited data blob (potentially with embedded NUL),
// call function on each line.
-static void chop_lines(char *data, long len,
- void (*call)(char **pline, long len))
+static void chop_lines(char *data, long len, void (*call)(char **p, long l))
{
long ll;
@@ -146,7 +233,17 @@ static void chop_lines(char *data, long len,
static void do_sed(int fd, char *name)
{
- do_lines(fd, name, do_line);
+ int i = toys.optflags & FLAG_i;
+
+ if (i) {
+ // todo: rename dance
+ }
+ do_lines(fd, name, sed_line);
+ if (i) {
+ sed_line(0, 0);
+
+ // todo: rename dance
+ }
}
// Translate primal pattern into walkable form.
@@ -156,72 +253,77 @@ static void jewel_of_judgement(char **pline, long len)
char *line = *pline, *reg;
int i;
- while (isspace(*line)) line++;
- if (*line == '#') return;
-
- memset(toybuf, 0, sizeof(struct step));
- corwin = (void *)toybuf;
- reg = toybuf + sizeof(struct step);
-
- // Parse address range (if any)
- for (i = 0; i < 2; i++) {
- if (*line == ',') line++;
- else if (i) break;
-
- if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0);
- else if (*line == '$') {
- corwin->lmatch[i] = -1;
- line++;
- } else if (*line == '/' || *line == '\\') {
- char delim = *(line++), slash = 0, *to, *from;
-
- if (delim == '\\') {
- if (!*line) goto brand;
- slash = delim = *(line++);
- }
-
- // Removing backslash escapes edits the source string, which could
- // be from the environment space via -e, which could screw up what
- // "ps" sees, and I'm ok with that.
- for (to = from = line; *from != delim; *(to++) = *(from++)) {
- if (!*from) goto brand;
- if (*from == '\\') {
- if (!from[1]) goto brand;
-
- // Check escaped end delimiter before printf style escapes.
- if (from[1] == slash) from++;
- else {
- char c = unescape(from[1]);
+ for (line = *pline;;line++) {
+ while (isspace(*line)) line++;
+ if (*line == '#') return;
+
+ memset(toybuf, 0, sizeof(struct step));
+ corwin = (void *)toybuf;
+ reg = toybuf + sizeof(struct step);
+
+ // Parse address range (if any)
+ for (i = 0; i < 2; i++) {
+ if (*line == ',') line++;
+ else if (i) break;
+
+ if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0);
+ else if (*line == '$') {
+ corwin->lmatch[i] = -1;
+ line++;
+ } else if (*line == '/' || *line == '\\') {
+ char delim = *(line++), slash = 0, *to, *from;
+
+ if (delim == '\\') {
+ if (!*line) goto brand;
+ slash = delim = *(line++);
+ }
- if (c) {
- *to = c;
- from++;
+ // Removing backslash escapes edits the source string, which could
+ // be from the environment space via -e, which could screw up what
+ // "ps" sees, and I'm ok with that.
+ for (to = from = line; *from != delim; *(to++) = *(from++)) {
+ if (!*from) goto brand;
+ if (*from == '\\') {
+ if (!from[1]) goto brand;
+
+ // Check escaped end delimiter before printf style escapes.
+ if (from[1] == slash) from++;
+ else {
+ char c = unescape(from[1]);
+
+ if (c) {
+ *to = c;
+ from++;
+ }
}
}
}
- }
- slash = *to;
- *to = 0;
- xregcomp(corwin->rmatch[i] = (void *)reg, line,
- ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB);
- *to = slash;
- reg += sizeof(regex_t);
- } else break;
- }
+ slash = *to;
+ *to = 0;
+ xregcomp(corwin->rmatch[i] = (void *)reg, line,
+ ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB);
+ *to = slash;
+ reg += sizeof(regex_t);
+ line = from + 1;
+ } else break;
+ }
- while (isspace(*line)) line++;
+ while (isspace(*line)) line++;
- if (!*line || !strchr("p", *line)) goto brand;
+ if (!*line || !strchr("p", *line)) break;
+ corwin->c = *(line++);
- // Add step to pattern
- corwin = xmalloc(reg-toybuf);
- memcpy(corwin, toybuf, reg-toybuf);
- dlist_add_nomalloc(&TT.pattern, (void *)corwin);
+ // Add step to pattern
+ corwin = xmalloc(reg-toybuf);
+ memcpy(corwin, toybuf, reg-toybuf);
+ dlist_add_nomalloc(&TT.pattern, (void *)corwin);
- return;
+ while (isspace(*line)) line++;
+ if (!*line) return;
+ if (*line != ';') break;
+ }
brand:
-
// Reminisce about chestnut trees.
error_exit("bad pattern '%s'@%ld (%c)", *pline, line-*pline, *line);
}
@@ -234,27 +336,26 @@ void sed_main(void)
// Lie to autoconf when it asks stupid questions, so configure regexes
// that look for "GNU sed version %f" greater than some old buggy number
// don't fail us for not matching their narrow expectations.
- if (FLAG_version) {
+ if (toys.optflags & FLAG_version) {
xprintf("This is not GNU sed version 9.0\n");
return;
}
- // Need a pattern. If no unicorns about, fight dragon and take its eye.
+ // Need a pattern. If no unicorns about, fight serpent and take its eye.
if (!TT.e && !TT.f) {
if (!*toys.optargs) error_exit("no pattern");
(TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
}
-
- for (dworkin = TT.e; dworkin; dworkin = dworkin->next) {
+ for (dworkin = TT.e; dworkin; dworkin = dworkin->next)
chop_lines(dworkin->arg, strlen(dworkin->arg), jewel_of_judgement);
- }
+ for (dworkin = TT.f; dworkin; dworkin = dworkin->next)
+ do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement);
+ dlist_terminate(TT.pattern);
- for (dworkin = TT.f; dworkin; dworkin = dworkin->next) {
- int fd = xopen(dworkin->arg, O_RDONLY);
-
- do_lines(fd, dworkin->arg, jewel_of_judgement);
- }
+ TT.fdout = 1;
// Inflict pattern upon input files
loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
+
+ if (!(toys.optflags & FLAG_i)) sed_line(0, 0);
}