aboutsummaryrefslogtreecommitdiff
path: root/toys/posix/grep.c
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2018-08-26 22:53:37 -0500
committerRob Landley <rob@landley.net>2018-08-26 22:53:37 -0500
commitd74148279c8749c995e8381860aef59c1f524350 (patch)
tree06730e51cd344ebdecec9ceca8b853ec3c8cb4e1 /toys/posix/grep.c
parent009b55edc4bad5b46ace88f3c255bbff8671d137 (diff)
downloadtoybox-d74148279c8749c995e8381860aef59c1f524350.tar.gz
Add binary file detection to grep.
Diffstat (limited to 'toys/posix/grep.c')
-rw-r--r--toys/posix/grep.c59
1 files changed, 40 insertions, 19 deletions
diff --git a/toys/posix/grep.c b/toys/posix/grep.c
index d1452cec..c63c1f87 100644
--- a/toys/posix/grep.c
+++ b/toys/posix/grep.c
@@ -4,11 +4,11 @@
*
* See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
*
- * TODO: --color, "Binary file %s matches"
+ * TODO: --color
*
* Posix doesn't even specify -r, documenting deviations from it is silly.
-USE_GREP(NEWTOY(grep, "S(exclude)*M(include)*C#B#A#ZzEFHabhinorsvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN))
+USE_GREP(NEWTOY(grep, "S(exclude)*M(include)*C#B#A#ZzEFHIabhinorsvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN))
USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN))
USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN))
@@ -29,14 +29,15 @@ config GREP
-r Recurse into subdirectories (defaults FILE to ".")
-M Match filename pattern (--include)
-S Skip filename pattern (--exclude)
+ -I Ignore binary files
match type:
-A Show NUM lines after -B Show NUM lines before match
-C NUM lines context (A+B) -E extended regex syntax
- -F fixed (literal match) -i case insensitive
- -m match MAX many lines -v invert match
- -w whole word (implies -E) -x whole line
- -z input NUL terminated
+ -F fixed (literal match) -a always text (not binary)
+ -i case insensitive -m match MAX many lines
+ -v invert match -w whole word (implies -E)
+ -x whole line -z input NUL terminated
display modes: (default: matched line)
-c count of matching lines -l show matching filenames
@@ -90,41 +91,58 @@ static void outline(char *line, char dash, char *name, long lcount, long bcount,
// Show matches in one file
static void do_grep(int fd, char *name)
{
- struct double_list *dlb = 0;
- FILE *file = fdopen(fd, "r");
long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
+ struct double_list *dlb = 0;
char *bars = 0;
+ FILE *file;
+ int bin = 0;
if (!fd) name = "(standard input)";
- if (!file) {
- perror_msg("%s", name);
-
- return;
+ // Only run binary file check on lseekable files.
+ if (!(toys.optflags&FLAG_a) && !lseek(fd, 0, SEEK_CUR)) {
+ char buf[256];
+ int len, i = 0;
+ wchar_t wc;
+
+ // If the first 256 bytes don't parse as utf8, call it binary.
+ if (0<(len = read(fd, buf, 256))) {
+ lseek(fd, -len, SEEK_CUR);
+ while (i<len) {
+ bin = utf8towc(&wc, buf+i, len-i);
+ if (bin == -2) i = len;
+ if (bin<1) break;
+ i += bin;
+ }
+ bin = i!=len;
+ }
+ if (bin && (toys.optflags&FLAG_I)) return;
}
+ if (!(file = fdopen(fd, "r"))) return perror_msg("%s", name);
+
// Loop through lines of input
for (;;) {
char *line = 0, *start;
regmatch_t matches;
- size_t unused;
+ size_t ulen;
long len;
int mmatch = 0;
lcount++;
errno = 0;
- len = getdelim(&line, &unused, TT.indelim, file);
+ ulen = len = getdelim(&line, &ulen, TT.indelim, file);
if (errno) perror_msg("%s", name);
if (len<1) break;
- if (line[len-1] == TT.indelim) line[len-1] = 0;
+ if (line[ulen-1] == TT.indelim) line[--ulen] = 0;
start = line;
- // Loop through matches in this line
+ // Loop to handle multiple matches in same line
do {
int rc = 0, skip = 0;
- // Handle non-regex matches
+ // Handle "fixed" (literal) matches
if (toys.optflags & FLAG_F) {
struct arg_list *seek, fseek;
char *s = 0;
@@ -148,8 +166,10 @@ static void do_grep(int fd, char *name)
matches.rm_so = (s-line);
skip = matches.rm_eo = (s-line)+strlen(seek->arg);
} else rc = 1;
+
+ // Handle regex matches
} else {
- rc = regexec((regex_t *)toybuf, start, 1, &matches,
+ rc = regexec0((void *)toybuf, start, ulen-(start-line), 1, &matches,
start==line ? 0 : REG_NOTBOL);
skip = matches.rm_eo;
}
@@ -214,7 +234,8 @@ static void do_grep(int fd, char *name)
long bcount = 1 + offset + (start-line) +
((toys.optflags & FLAG_o) ? matches.rm_so : 0);
- if (!(toys.optflags & FLAG_o)) {
+ if (bin) printf("Binary file %s matches\n", name);
+ else if (!(toys.optflags & FLAG_o)) {
while (dlb) {
struct double_list *dl = dlist_pop(&dlb);