aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Whitley <markw@lineo.com>2000-07-11 16:53:56 +0000
committerMark Whitley <markw@lineo.com>2000-07-11 16:53:56 +0000
commitdf5f6ba1159f8c1af500fa1b8eb9c9646e4de2a6 (patch)
tree90005f171e95b4fc0eefca5171593ba0b105b3f7
parentda9841efc1e924ab3f645d931ae17c6d4aaf766f (diff)
downloadbusybox-df5f6ba1159f8c1af500fa1b8eb9c9646e4de2a6.tar.gz
Applied patch from Matt Kraai which does the following:
- adds case-insensitive matching in sed s/// epxressions - consolodates common regcomp code in grep & sed into bb_regcomp and put in utility.c - cleans up a bunch of cruft
-rw-r--r--editors/sed.c133
-rw-r--r--findutils/grep.c15
-rw-r--r--grep.c15
-rw-r--r--internal.h3
-rw-r--r--sed.c133
-rw-r--r--utility.c16
6 files changed, 120 insertions, 195 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 329f5ae8d..2fb243fb9 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -97,50 +97,6 @@ static const char sed_usage[] =
#endif
;
-#if 0
-/* Nuke from here { */
-
-
-/* get_line_from_file() - This function reads an entire line from a text file
- * * up to a newline. It returns a malloc'ed char * which must be stored and
- * * free'ed by the caller. */
-extern char *get_line_from_file(FILE *file)
-{
- static const int GROWBY = 80; /* how large we will grow strings by */
-
- int ch;
- int idx = 0;
- char *linebuf = NULL;
- int linebufsz = 0;
-
- while (1) {
- ch = fgetc(file);
- if (ch == EOF)
- break;
- /* grow the line buffer as necessary */
- if (idx > linebufsz-2)
- linebuf = realloc(linebuf, linebufsz += GROWBY);
- linebuf[idx++] = (char)ch;
- if ((char)ch == '\n')
- break;
- }
-
- if (idx == 0)
- return NULL;
-
- linebuf[idx] = 0;
- return linebuf;
-}
-
-static void usage(const char *string)
-{
- printf("usage: %s\n", string);
- exit(0);
-}
-
-/* } to here when we integrate this into busybox */
-#endif
-
static void destroy_cmd_strs()
{
if (sed_cmds == NULL)
@@ -246,29 +202,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
idx++;
}
else if (my_str[idx] == '/') {
- int ret;
idx = index_of_next_unescaped_slash(idx, my_str);
- if (idx == -1) {
- free(my_str);
+ if (idx == -1)
exit_sed(1, "sed: unterminated match expression\n");
- }
- my_str[idx] = 0; /* shave off the trailing '/' */
- my_str++; /* shave off the leading '/' */
- *regex = (regex_t *)malloc(sizeof(regex_t));
- if ((ret = regcomp(*regex, my_str, 0)) != 0) {
- /* error handling if regular expression couldn't be compiled */
- int errmsgsz = regerror(ret, *regex, NULL, 0);
- char *errmsg = malloc(errmsgsz);
- if (errmsg == NULL) {
- exit_sed(1, "sed: memory error\n");
- }
- regerror(ret, *regex, errmsg, errmsgsz);
- fprintf(stderr, "sed: %s\n", errmsg);
- free(errmsg);
+ my_str[idx] = '\0';
+ *regex = (regex_t *)xmalloc(sizeof(regex_t));
+ if (bb_regcomp(*regex, my_str+1, REG_NEWLINE) != 0) {
+ free(my_str);
exit_sed(1, NULL);
}
- my_str--; /* move my_str back so free() (below) won't barf */
- idx++; /* advance idx one past the end of the /match/ */
}
else {
fprintf(stderr, "sed.c:get_address: no address found in string\n");
@@ -280,6 +222,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
return idx;
}
+static char *strdup_substr(const char *str, int start, int end)
+{
+ int size = end - start + 1;
+ char *newstr = xmalloc(size);
+ memcpy(newstr, str+start, size-1);
+ newstr[size-1] = '\0';
+ return newstr;
+}
+
static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
{
int idx = 0;
@@ -306,10 +257,11 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
sed_cmd->cmd = cmdstr[idx];
/* special-case handling for 's' */
if (sed_cmd->cmd == 's') {
- int oldidx;
+ int oldidx, cflags = REG_NEWLINE;
+ char *match;
/* format for substitution is:
- * s/match/replace/g
- * | |
+ * s/match/replace/gI
+ * | ||
* mandatory optional
*/
@@ -317,19 +269,41 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
if (cmdstr[++idx] != '/')
exit_sed(1, "sed: bad format in substitution expression\n");
- /* get the substitution part */
- idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match);
+ /* save the match string */
+ oldidx = idx+1;
+ idx = index_of_next_unescaped_slash(idx, cmdstr);
+ if (idx == -1)
+ exit_sed(1, "sed: bad format in substitution expression\n");
+ match = strdup_substr(cmdstr, oldidx, idx);
- /* get the replacement part */
- oldidx = idx;
+ /* save the replacement string */
+ oldidx = idx+1;
idx = index_of_next_unescaped_slash(idx, cmdstr);
- sed_cmd->replace = (char *)malloc(idx - oldidx + 1);
- strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx);
- sed_cmd->replace[idx - oldidx] = 0;
+ if (idx == -1)
+ exit_sed(1, "sed: bad format in substitution expression\n");
+ sed_cmd->replace = strdup_substr(cmdstr, oldidx, idx);
- /* store the 'g' if present */
- if (cmdstr[++idx] == 'g')
- sed_cmd->sub_g = 1;
+ /* process the flags */
+ while (cmdstr[++idx]) {
+ switch (cmdstr[idx]) {
+ case 'g':
+ sed_cmd->sub_g = 1;
+ break;
+ case 'I':
+ cflags |= REG_ICASE;
+ break;
+ default:
+ exit_sed(1, "sed: bad option in substitution expression\n");
+ }
+ }
+
+ /* compile the regex */
+ sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
+ if (bb_regcomp(sed_cmd->sub_match, match, cflags) != 0) {
+ free(match);
+ exit_sed(1, NULL);
+ }
+ free(match);
}
}
@@ -553,10 +527,3 @@ extern int sed_main(int argc, char **argv)
/* not reached */
return 0;
}
-
-#ifdef TEST_SED
-int main(int argc, char **argv)
-{
- return sed_main(argc, argv);
-}
-#endif
diff --git a/findutils/grep.c b/findutils/grep.c
index 8d2c915be..dec365f05 100644
--- a/findutils/grep.c
+++ b/findutils/grep.c
@@ -104,7 +104,6 @@ extern int grep_main(int argc, char **argv)
{
int opt;
int reflags;
- int ret;
/* do special-case option parsing */
if (argv[1] && (strcmp(argv[1], "--help") == 0))
@@ -147,20 +146,8 @@ extern int grep_main(int argc, char **argv)
reflags = REG_NOSUB | REG_NEWLINE;
if (ignore_case)
reflags |= REG_ICASE;
- if ((ret = regcomp(&regex, argv[optind], reflags)) != 0) {
- int errmsgsz = regerror(ret, &regex, NULL, 0);
- char *errmsg = malloc(errmsgsz);
- if (errmsg == NULL) {
- fprintf(stderr, "grep: memory error\n");
- regfree(&regex);
- exit(1);
- }
- regerror(ret, &regex, errmsg, errmsgsz);
- fprintf(stderr, "grep: %s\n", errmsg);
- free(errmsg);
- regfree(&regex);
+ if (bb_regcomp(&regex, argv[optind], reflags) != 0)
exit(1);
- }
/* argv[(optind+1)..(argc-1)] should be names of file to grep through. If
* there is more than one file to grep, we will print the filenames */
diff --git a/grep.c b/grep.c
index 8d2c915be..dec365f05 100644
--- a/grep.c
+++ b/grep.c
@@ -104,7 +104,6 @@ extern int grep_main(int argc, char **argv)
{
int opt;
int reflags;
- int ret;
/* do special-case option parsing */
if (argv[1] && (strcmp(argv[1], "--help") == 0))
@@ -147,20 +146,8 @@ extern int grep_main(int argc, char **argv)
reflags = REG_NOSUB | REG_NEWLINE;
if (ignore_case)
reflags |= REG_ICASE;
- if ((ret = regcomp(&regex, argv[optind], reflags)) != 0) {
- int errmsgsz = regerror(ret, &regex, NULL, 0);
- char *errmsg = malloc(errmsgsz);
- if (errmsg == NULL) {
- fprintf(stderr, "grep: memory error\n");
- regfree(&regex);
- exit(1);
- }
- regerror(ret, &regex, errmsg, errmsgsz);
- fprintf(stderr, "grep: %s\n", errmsg);
- free(errmsg);
- regfree(&regex);
+ if (bb_regcomp(&regex, argv[optind], reflags) != 0)
exit(1);
- }
/* argv[(optind+1)..(argc-1)] should be names of file to grep through. If
* there is more than one file to grep, we will print the filenames */
diff --git a/internal.h b/internal.h
index 5864c47ac..4ef15325e 100644
--- a/internal.h
+++ b/internal.h
@@ -34,7 +34,7 @@
#include <sys/stat.h>
#include <sys/param.h>
#include <mntent.h>
-
+#include <regex.h>
/* Some useful definitions */
#define FALSE ((int) 1)
@@ -259,6 +259,7 @@ extern int find_real_root_device_name(char* name);
extern char *get_line_from_file(FILE *file);
extern char process_escape_sequence(char **ptr);
extern char *get_last_path_component(char *path);
+extern int bb_regcomp(regex_t *preg, const char *regex, int cflags);
extern void *xmalloc (size_t size);
extern char *xstrdup (const char *s);
diff --git a/sed.c b/sed.c
index 329f5ae8d..2fb243fb9 100644
--- a/sed.c
+++ b/sed.c
@@ -97,50 +97,6 @@ static const char sed_usage[] =
#endif
;
-#if 0
-/* Nuke from here { */
-
-
-/* get_line_from_file() - This function reads an entire line from a text file
- * * up to a newline. It returns a malloc'ed char * which must be stored and
- * * free'ed by the caller. */
-extern char *get_line_from_file(FILE *file)
-{
- static const int GROWBY = 80; /* how large we will grow strings by */
-
- int ch;
- int idx = 0;
- char *linebuf = NULL;
- int linebufsz = 0;
-
- while (1) {
- ch = fgetc(file);
- if (ch == EOF)
- break;
- /* grow the line buffer as necessary */
- if (idx > linebufsz-2)
- linebuf = realloc(linebuf, linebufsz += GROWBY);
- linebuf[idx++] = (char)ch;
- if ((char)ch == '\n')
- break;
- }
-
- if (idx == 0)
- return NULL;
-
- linebuf[idx] = 0;
- return linebuf;
-}
-
-static void usage(const char *string)
-{
- printf("usage: %s\n", string);
- exit(0);
-}
-
-/* } to here when we integrate this into busybox */
-#endif
-
static void destroy_cmd_strs()
{
if (sed_cmds == NULL)
@@ -246,29 +202,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
idx++;
}
else if (my_str[idx] == '/') {
- int ret;
idx = index_of_next_unescaped_slash(idx, my_str);
- if (idx == -1) {
- free(my_str);
+ if (idx == -1)
exit_sed(1, "sed: unterminated match expression\n");
- }
- my_str[idx] = 0; /* shave off the trailing '/' */
- my_str++; /* shave off the leading '/' */
- *regex = (regex_t *)malloc(sizeof(regex_t));
- if ((ret = regcomp(*regex, my_str, 0)) != 0) {
- /* error handling if regular expression couldn't be compiled */
- int errmsgsz = regerror(ret, *regex, NULL, 0);
- char *errmsg = malloc(errmsgsz);
- if (errmsg == NULL) {
- exit_sed(1, "sed: memory error\n");
- }
- regerror(ret, *regex, errmsg, errmsgsz);
- fprintf(stderr, "sed: %s\n", errmsg);
- free(errmsg);
+ my_str[idx] = '\0';
+ *regex = (regex_t *)xmalloc(sizeof(regex_t));
+ if (bb_regcomp(*regex, my_str+1, REG_NEWLINE) != 0) {
+ free(my_str);
exit_sed(1, NULL);
}
- my_str--; /* move my_str back so free() (below) won't barf */
- idx++; /* advance idx one past the end of the /match/ */
}
else {
fprintf(stderr, "sed.c:get_address: no address found in string\n");
@@ -280,6 +222,15 @@ static int get_address(const char *str, int *line, regex_t **regex)
return idx;
}
+static char *strdup_substr(const char *str, int start, int end)
+{
+ int size = end - start + 1;
+ char *newstr = xmalloc(size);
+ memcpy(newstr, str+start, size-1);
+ newstr[size-1] = '\0';
+ return newstr;
+}
+
static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
{
int idx = 0;
@@ -306,10 +257,11 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
sed_cmd->cmd = cmdstr[idx];
/* special-case handling for 's' */
if (sed_cmd->cmd == 's') {
- int oldidx;
+ int oldidx, cflags = REG_NEWLINE;
+ char *match;
/* format for substitution is:
- * s/match/replace/g
- * | |
+ * s/match/replace/gI
+ * | ||
* mandatory optional
*/
@@ -317,19 +269,41 @@ static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
if (cmdstr[++idx] != '/')
exit_sed(1, "sed: bad format in substitution expression\n");
- /* get the substitution part */
- idx += get_address(&cmdstr[idx], NULL, &sed_cmd->sub_match);
+ /* save the match string */
+ oldidx = idx+1;
+ idx = index_of_next_unescaped_slash(idx, cmdstr);
+ if (idx == -1)
+ exit_sed(1, "sed: bad format in substitution expression\n");
+ match = strdup_substr(cmdstr, oldidx, idx);
- /* get the replacement part */
- oldidx = idx;
+ /* save the replacement string */
+ oldidx = idx+1;
idx = index_of_next_unescaped_slash(idx, cmdstr);
- sed_cmd->replace = (char *)malloc(idx - oldidx + 1);
- strncpy(sed_cmd->replace, &cmdstr[oldidx], idx - oldidx);
- sed_cmd->replace[idx - oldidx] = 0;
+ if (idx == -1)
+ exit_sed(1, "sed: bad format in substitution expression\n");
+ sed_cmd->replace = strdup_substr(cmdstr, oldidx, idx);
- /* store the 'g' if present */
- if (cmdstr[++idx] == 'g')
- sed_cmd->sub_g = 1;
+ /* process the flags */
+ while (cmdstr[++idx]) {
+ switch (cmdstr[idx]) {
+ case 'g':
+ sed_cmd->sub_g = 1;
+ break;
+ case 'I':
+ cflags |= REG_ICASE;
+ break;
+ default:
+ exit_sed(1, "sed: bad option in substitution expression\n");
+ }
+ }
+
+ /* compile the regex */
+ sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
+ if (bb_regcomp(sed_cmd->sub_match, match, cflags) != 0) {
+ free(match);
+ exit_sed(1, NULL);
+ }
+ free(match);
}
}
@@ -553,10 +527,3 @@ extern int sed_main(int argc, char **argv)
/* not reached */
return 0;
}
-
-#ifdef TEST_SED
-int main(int argc, char **argv)
-{
- return sed_main(argc, argv);
-}
-#endif
diff --git a/utility.c b/utility.c
index 46907e46a..cbbc02f98 100644
--- a/utility.c
+++ b/utility.c
@@ -1721,6 +1721,22 @@ char *get_last_path_component(char *path)
}
#endif
+#if defined BB_GREP || defined BB_SED
+int bb_regcomp(regex_t *preg, const char *regex, int cflags)
+{
+ int ret;
+ if ((ret = regcomp(preg, regex, cflags)) != 0) {
+ int errmsgsz = regerror(ret, preg, NULL, 0);
+ char *errmsg = xmalloc(errmsgsz);
+ regerror(ret, preg, errmsg, errmsgsz);
+ errorMsg("bb_regcomp: %s\n", errmsg);
+ free(errmsg);
+ regfree(preg);
+ }
+ return ret;
+}
+#endif
+
/* END CODE */
/*
Local Variables: