diff options
-rw-r--r-- | toys/pending/sed.c | 243 |
1 files changed, 172 insertions, 71 deletions
diff --git a/toys/pending/sed.c b/toys/pending/sed.c index 338680af..00ea69c4 100644 --- a/toys/pending/sed.c +++ b/toys/pending/sed.c @@ -82,6 +82,10 @@ GLOBALS( // processed pattern list struct double_list *pattern; + + char *nextline; + long nextlen, count; + int fdout, noeol; ) struct step { @@ -93,12 +97,96 @@ struct step { // Action char c; + + int hit; }; +// Write out line with potential embedded NUL, handling eol/noeol +static int emit(char *line, long len, int eol) +{ + if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1; + if (eol) line[len++] = '\n'; + TT.noeol = !eol; + if (len != writeall(TT.fdout, line, len)) { + perror_msg("short write"); + + return 1; + } + + return 0; +} + +// Do regex matching handling embedded NUL bytes in string. +static int ghostwheel(regex_t *preg, char *string, int nmatch, + regmatch_t pmatch[], int eflags) +{ + // todo: this + return regexec(preg, string, nmatch, pmatch, eflags); +} + // Apply pattern to line from input file -static void do_line(char **pline, long len) +static void sed_line(char **pline, long plen) { - printf("len=%ld line=%s\n", len, *pline); + char *line = TT.nextline; + long len = TT.nextlen; + struct step *logrus; + int eol = 0; + + // Grab next line for deferred processing (EOF detection, we get a NULL + // pline at EOF to flush last line). Note that only end of _last_ input + // file matches $ (unless we're doing -i). + if (pline) { + TT.nextline = *pline; + TT.nextlen = plen; + *pline = 0; + } + + if (!line || !len) return; + + if (line[len-1] == '\n') line[--len] = eol++; + TT.count++; + + for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next) { + char c = logrus->c; + + // Have we got a matching range for this rule? + if (logrus->lmatch || *logrus->rmatch) { + int miss = 0; + long lm; + regex_t *rm; + + // In a match that might end? + if (logrus->hit) { + if (!(lm = logrus->lmatch[1])) { + if (!(rm = logrus->rmatch[1])) logrus->hit = 0; + else { + // regex match end includes matching line, so defer deactivation + if (!ghostwheel(rm, line, 0, 0, 0)) miss = 1; + } + } else if (lm > 0 && lm < TT.count) logrus->hit = 0; + + // Start a new match? + } else { + if (!(lm = *logrus->lmatch)) { + if (!ghostwheel(*logrus->rmatch, line, 0, 0, 0)) logrus->hit++; + } else if (lm == TT.count) logrus->hit++; + } + + if (!logrus->hit) continue; + if (miss) logrus->hit = 0; + } + + // Process like the wind, bullseye! + + // todo: embedded NUL, eol + if (c == 'p') { + if (emit(line, len, eol)) break; + } else error_exit("what?"); + } + + if (!(toys.optflags & FLAG_n)) emit(line, len, eol); + + free(line); } // Genericish function, can probably get moved to lib.c @@ -124,8 +212,7 @@ static void do_lines(int fd, char *name, void (*call)(char **pline, long len)) // Iterate over newline delimited data blob (potentially with embedded NUL), // call function on each line. -static void chop_lines(char *data, long len, - void (*call)(char **pline, long len)) +static void chop_lines(char *data, long len, void (*call)(char **p, long l)) { long ll; @@ -146,7 +233,17 @@ static void chop_lines(char *data, long len, static void do_sed(int fd, char *name) { - do_lines(fd, name, do_line); + int i = toys.optflags & FLAG_i; + + if (i) { + // todo: rename dance + } + do_lines(fd, name, sed_line); + if (i) { + sed_line(0, 0); + + // todo: rename dance + } } // Translate primal pattern into walkable form. @@ -156,72 +253,77 @@ static void jewel_of_judgement(char **pline, long len) char *line = *pline, *reg; int i; - while (isspace(*line)) line++; - if (*line == '#') return; - - memset(toybuf, 0, sizeof(struct step)); - corwin = (void *)toybuf; - reg = toybuf + sizeof(struct step); - - // Parse address range (if any) - for (i = 0; i < 2; i++) { - if (*line == ',') line++; - else if (i) break; - - if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0); - else if (*line == '$') { - corwin->lmatch[i] = -1; - line++; - } else if (*line == '/' || *line == '\\') { - char delim = *(line++), slash = 0, *to, *from; - - if (delim == '\\') { - if (!*line) goto brand; - slash = delim = *(line++); - } - - // Removing backslash escapes edits the source string, which could - // be from the environment space via -e, which could screw up what - // "ps" sees, and I'm ok with that. - for (to = from = line; *from != delim; *(to++) = *(from++)) { - if (!*from) goto brand; - if (*from == '\\') { - if (!from[1]) goto brand; - - // Check escaped end delimiter before printf style escapes. - if (from[1] == slash) from++; - else { - char c = unescape(from[1]); + for (line = *pline;;line++) { + while (isspace(*line)) line++; + if (*line == '#') return; + + memset(toybuf, 0, sizeof(struct step)); + corwin = (void *)toybuf; + reg = toybuf + sizeof(struct step); + + // Parse address range (if any) + for (i = 0; i < 2; i++) { + if (*line == ',') line++; + else if (i) break; + + if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0); + else if (*line == '$') { + corwin->lmatch[i] = -1; + line++; + } else if (*line == '/' || *line == '\\') { + char delim = *(line++), slash = 0, *to, *from; + + if (delim == '\\') { + if (!*line) goto brand; + slash = delim = *(line++); + } - if (c) { - *to = c; - from++; + // Removing backslash escapes edits the source string, which could + // be from the environment space via -e, which could screw up what + // "ps" sees, and I'm ok with that. + for (to = from = line; *from != delim; *(to++) = *(from++)) { + if (!*from) goto brand; + if (*from == '\\') { + if (!from[1]) goto brand; + + // Check escaped end delimiter before printf style escapes. + if (from[1] == slash) from++; + else { + char c = unescape(from[1]); + + if (c) { + *to = c; + from++; + } } } } - } - slash = *to; - *to = 0; - xregcomp(corwin->rmatch[i] = (void *)reg, line, - ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB); - *to = slash; - reg += sizeof(regex_t); - } else break; - } + slash = *to; + *to = 0; + xregcomp(corwin->rmatch[i] = (void *)reg, line, + ((toys.optflags & FLAG_r)*REG_EXTENDED)|REG_NOSUB); + *to = slash; + reg += sizeof(regex_t); + line = from + 1; + } else break; + } - while (isspace(*line)) line++; + while (isspace(*line)) line++; - if (!*line || !strchr("p", *line)) goto brand; + if (!*line || !strchr("p", *line)) break; + corwin->c = *(line++); - // Add step to pattern - corwin = xmalloc(reg-toybuf); - memcpy(corwin, toybuf, reg-toybuf); - dlist_add_nomalloc(&TT.pattern, (void *)corwin); + // Add step to pattern + corwin = xmalloc(reg-toybuf); + memcpy(corwin, toybuf, reg-toybuf); + dlist_add_nomalloc(&TT.pattern, (void *)corwin); - return; + while (isspace(*line)) line++; + if (!*line) return; + if (*line != ';') break; + } brand: - // Reminisce about chestnut trees. error_exit("bad pattern '%s'@%ld (%c)", *pline, line-*pline, *line); } @@ -234,27 +336,26 @@ void sed_main(void) // Lie to autoconf when it asks stupid questions, so configure regexes // that look for "GNU sed version %f" greater than some old buggy number // don't fail us for not matching their narrow expectations. - if (FLAG_version) { + if (toys.optflags & FLAG_version) { xprintf("This is not GNU sed version 9.0\n"); return; } - // Need a pattern. If no unicorns about, fight dragon and take its eye. + // Need a pattern. If no unicorns about, fight serpent and take its eye. if (!TT.e && !TT.f) { if (!*toys.optargs) error_exit("no pattern"); (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++); } - - for (dworkin = TT.e; dworkin; dworkin = dworkin->next) { + for (dworkin = TT.e; dworkin; dworkin = dworkin->next) chop_lines(dworkin->arg, strlen(dworkin->arg), jewel_of_judgement); - } + for (dworkin = TT.f; dworkin; dworkin = dworkin->next) + do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement); + dlist_terminate(TT.pattern); - for (dworkin = TT.f; dworkin; dworkin = dworkin->next) { - int fd = xopen(dworkin->arg, O_RDONLY); - - do_lines(fd, dworkin->arg, jewel_of_judgement); - } + TT.fdout = 1; // Inflict pattern upon input files loopfiles_rw(args, O_RDONLY, 0, 0, do_sed); + + if (!(toys.optflags & FLAG_i)) sed_line(0, 0); } |