aboutsummaryrefslogtreecommitdiff
path: root/toys/pending/sed.c
blob: 0ce25acad9eb4c2485ab58022e150fa8854ee0d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/* sed.c - Stream editor.
 *
 * Copyright 2012 Rob Landley <rob@landley.net>
 *
 * See http://opengroup.org/onlinepubs/9699919799/utilities/sed.c

USE_SED(NEWTOY(sed, "irne*f*", TOYFLAG_BIN))

config SED
  bool "sed"
  default n
  help
    usage: sed [-irn] {command | [-e command]...} [FILE...]

    Stream EDitor, transforms text by appling script of command to each line
    of input.

    -e  Add expression to the command script (if no -e, use first argument)
    -i	Modify file in place
    -n  No default output (p commands only)
    -r  Use extended regular expression syntex
*/

#define FOR_sed
#include "toys.h"
#include "lib/xregcomp.h"

GLOBALS(
  struct arg_list *files;
  struct arg_list *scripts;

  void *commands;
)

// Digested version of what sed commands can actually tell use to do.


struct sed_command {
  // double_list compatibility (easier to create in-order)
  struct sed_command *next, *prev;

  // data string for (saicytb)
  char c, *data;
  // Regexes for s/match/data/ and /begin/,/end/command
  regex_t *rmatch, *rbegin, *rend;
  // For numeric ranges ala 10,20command
  long lstart, lstop;
  // Which match to replace, 0 for all. s and w commands can write to a file
  int which, outfd;
};

//  Space. Space. Gotta get past space. Spaaaaaaaace! (But not newline.)
static void spaceorb(char **s)
{
  while (**s == ' ' || **s == '\t') ++*s;
}

// Parse sed commands

static void parse_scripts(void)
{
  struct arg_list *script;
  int which = 0, i;

  // Loop through list of scripts collated from command line and/or files

  for (script = TT.scripts; script; script = script->next) {
    char *str = script->arg;
    struct sed_command *cmd;

    // we can get multiple commands from a string (semicolons and such)

    which++;
    for (i=1;;) {
      if (!*str) break;

      cmd = xzalloc(sizeof(struct sed_command));

      // Identify prefix
      for (;;) {
        spaceorb(&str);
        if (*str == '^') {
          if (cmd->lstart) goto parse_fail;
          cmd->lstart = -1;
          str++;
          continue;
        } else if (*str == '$') {
          cmd->lstop = LONG_MAX;
          str++;
          break;
        } else if (isdigit(*str)) {
          long ll = strtol(str, &str, 10);

          if (ll<0) goto parse_fail;
          if (cmd->lstart) {
            cmd->lstop = ll;
            break;
          } else cmd->lstart = ll;
        } else if (*str == '/' || *str == '\\') {
          // set begin/end
          printf("regex\n");
          exit(1);
        } else if (!cmd->lstart && !cmd->rbegin) break;
        else goto parse_fail;  // , with no range after it

        spaceorb(&str);
        if (*str != ',') break;
        str++;
      }
      i = stridx("{bcdDgGhHlnNpPstwxyrqia= \t#:}", *str);
      if (i == -1) goto parse_fail;

      dlist_add_nomalloc((struct double_list **)&TT.commands,
                         (struct double_list *)cmd);
      exit(1);
    }
  }

  return;

parse_fail:
  error_exit("bad expression %d@%d: %s", which, i, script->arg+i);
}

void sed_main(void)
{
  char **files=toys.optargs;

  // If no -e, use first argument
  if (!TT.scripts) {
    if (!*files) error_exit("Need script");
    (TT.scripts = xzalloc(sizeof(struct arg_list)))->arg = *(files++);
  }

  parse_scripts();

  while (*files) dprintf(2,"file=%s\n", *(files++));
}