diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 27 | ||||
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | usr.bin/grep/CVS/Entries | 10 | ||||
-rw-r--r-- | usr.bin/grep/CVS/Repository | 1 | ||||
-rw-r--r-- | usr.bin/grep/CVS/Root | 1 | ||||
-rw-r--r-- | usr.bin/grep/Makefile | 16 | ||||
-rw-r--r-- | usr.bin/grep/binary.c | 93 | ||||
-rw-r--r-- | usr.bin/grep/file.c | 225 | ||||
-rw-r--r-- | usr.bin/grep/grep.1 | 385 | ||||
-rw-r--r-- | usr.bin/grep/grep.c | 530 | ||||
-rw-r--r-- | usr.bin/grep/grep.h | 129 | ||||
-rw-r--r-- | usr.bin/grep/mmfile.c | 90 | ||||
-rw-r--r-- | usr.bin/grep/queue.c | 120 | ||||
-rw-r--r-- | usr.bin/grep/util.c | 682 |
15 files changed, 2307 insertions, 4 deletions
@@ -3,6 +3,7 @@ /diff /doas /ed +/grep /m4 /mandoc /md5 @@ -3,6 +3,7 @@ BIN = \ diff \ doas \ ed \ + grep \ m4 \ mandoc \ md5 \ @@ -61,6 +62,7 @@ MAN = \ usr.bin/diff/diff.1 \ usr.bin/doas/doas.1 \ usr.bin/doas/doas.conf.5 \ + usr.bin/grep/grep.1 \ usr.bin/mandoc/apropos.1 \ usr.bin/mandoc/makewhatis.8 \ usr.bin/mandoc/man.1 \ @@ -87,6 +89,13 @@ MAN = \ all: ${BIN} ${BINOBJ}: ${LIB} +MANDOCLIBS = ${LIB} +GREPLIBS = ${LIB} +ifeq (${ZLIB}, lib/libz/libz.a) + MANDOCLIBS += ${ZLIB} + GREPLIBS += ${ZLIB} +endif + # ------------------------------------------------------------------------------ # diff DIFFOBJ = \ @@ -133,6 +142,20 @@ ed: ${EDOBJ} ${LIB} ${CC} ${LDFLAGS} -o $@ ${EDOBJ} ${LIB} # ------------------------------------------------------------------------------ +# grep +GREPOBJ = \ + usr.bin/grep/binary.o \ + usr.bin/grep/file.o \ + usr.bin/grep/grep.o \ + usr.bin/grep/mmfile.o \ + usr.bin/grep/queue.o \ + usr.bin/grep/util.o +BINOBJ += ${GREPOBJ} + +grep: ${GREPOBJ} ${LIB} + ${CC} ${LDFLAGS} -o $@ ${GREPOBJ} ${LIB} -lz + +# ------------------------------------------------------------------------------ # m4 M4OBJ = \ usr.bin/m4/eval.o \ @@ -218,10 +241,6 @@ MANDOCOBJ = \ usr.bin/mandoc/term_tag.o \ usr.bin/mandoc/tree.o BINOBJ += ${MANDOCOBJ} -MANDOCLIBS = ${LIB} -ifeq (${ZLIB}, lib/libz/libz.a) - MANDOCLIBS += ${ZLIB} -endif mandoc: ${MANDOCOBJ} ${MANDOCLIBS} ${CC} ${LDFLAGS} -o $@ ${MANDOCOBJ} ${LIB} ${ZLIB} ${LIBFTS} @@ -7,6 +7,7 @@ Currently includes the following software: - diff - doas - ed +- grep - m4 - mandoc - md5 diff --git a/usr.bin/grep/CVS/Entries b/usr.bin/grep/CVS/Entries new file mode 100644 index 0000000..d00d3d7 --- /dev/null +++ b/usr.bin/grep/CVS/Entries @@ -0,0 +1,10 @@ +/Makefile/1.7/Wed Jul 14 14:15:09 2021// +/binary.c/1.19/Wed Jul 14 14:15:19 2021// +/file.c/1.16/Wed Jul 14 14:15:19 2021// +/grep.1/1.50/Wed Jul 14 14:15:09 2021// +/grep.c/1.65/Wed Jul 14 14:15:09 2021// +/grep.h/1.28/Wed Jul 14 14:15:19 2021// +/mmfile.c/1.19/Wed Jul 14 14:15:09 2021// +/queue.c/1.7/Wed Jul 14 14:15:09 2021// +/util.c/1.63/Wed Jul 14 14:15:09 2021// +D diff --git a/usr.bin/grep/CVS/Repository b/usr.bin/grep/CVS/Repository new file mode 100644 index 0000000..de0fe9d --- /dev/null +++ b/usr.bin/grep/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/grep diff --git a/usr.bin/grep/CVS/Root b/usr.bin/grep/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/grep/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile new file mode 100644 index 0000000..2799c34 --- /dev/null +++ b/usr.bin/grep/Makefile @@ -0,0 +1,16 @@ +# $OpenBSD: Makefile,v 1.7 2016/03/30 06:38:46 jmc Exp $ + +PROG= grep +SRCS= binary.c file.c grep.c mmfile.c queue.c util.c +LINKS= ${BINDIR}/grep ${BINDIR}/egrep \ + ${BINDIR}/grep ${BINDIR}/fgrep \ + ${BINDIR}/grep ${BINDIR}/zgrep \ + ${BINDIR}/grep ${BINDIR}/zegrep \ + ${BINDIR}/grep ${BINDIR}/zfgrep \ + +CFLAGS+= -Wall + +LDADD= -lz +DPADD= ${LIBZ} + +.include <bsd.prog.mk> diff --git a/usr.bin/grep/binary.c b/usr.bin/grep/binary.c new file mode 100644 index 0000000..119c946 --- /dev/null +++ b/usr.bin/grep/binary.c @@ -0,0 +1,93 @@ +/* $OpenBSD: binary.c,v 1.19 2021/03/10 21:55:22 millert Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <ctype.h> +#include <err.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> + +#include "grep.h" + +static int +isbinary(const char *buf, size_t n) +{ + return (memchr(buf, '\0', n) != NULL); +} + +int +bin_file(FILE *f) +{ + char buf[BUFSIZ]; + size_t m; + int ret = 0; + + if (fseek(f, 0L, SEEK_SET) == -1) + return 0; + + if ((m = fread(buf, 1, BUFSIZ, f)) == 0) + return 0; + + if (isbinary(buf, m)) + ret = 1; + + rewind(f); + return ret; +} + +#ifndef NOZ +int +gzbin_file(gzFile f) +{ + char buf[BUFSIZ]; + int m; + int ret = 0; + + if (gzseek(f, (z_off_t)0, SEEK_SET) == -1) + return 0; + + if ((m = gzread(f, buf, BUFSIZ)) <= 0) + return 0; + + if (isbinary(buf, m)) + ret = 1; + + if (gzrewind(f) != 0) + err(1, "gzbin_file"); + return ret; +} +#endif + +#ifndef SMALL +int +mmbin_file(mmf_t *f) +{ + /* XXX knows too much about mmf internals */ + return isbinary(f->base, f->len < BUFSIZ ? f->len : BUFSIZ); +} +#endif diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c new file mode 100644 index 0000000..27b7f76 --- /dev/null +++ b/usr.bin/grep/file.c @@ -0,0 +1,225 @@ +/* $OpenBSD: file.c,v 1.16 2021/03/10 21:55:22 millert Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/stat.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <zlib.h> + +#include "grep.h" + +static char fname[PATH_MAX]; +static char *lnbuf; +static size_t lnbufsize; + +#define FILE_STDIO 0 +#define FILE_MMAP 1 +#define FILE_GZIP 2 + +struct file { + int type; + int noseek; + FILE *f; + mmf_t *mmf; + gzFile gzf; +}; + +#ifndef NOZ +static char * +gzfgetln(gzFile f, size_t *len) +{ + size_t n; + int c; + + for (n = 0; ; ++n) { + c = gzgetc(f); + if (c == -1) { + const char *gzerrstr; + int gzerr; + + if (gzeof(f)) + break; + + gzerrstr = gzerror(f, &gzerr); + if (gzerr == Z_ERRNO) + err(2, "%s", fname); + else + errx(2, "%s: %s", fname, gzerrstr); + } + if (n >= lnbufsize) { + lnbufsize *= 2; + lnbuf = grep_realloc(lnbuf, ++lnbufsize); + } + if (c == '\n') + break; + lnbuf[n] = c; + } + + if (gzeof(f) && n == 0) + return NULL; + *len = n; + return lnbuf; +} +#endif + +file_t * +grep_fdopen(int fd) +{ + file_t *f; + struct stat sb; + + if (fd == STDIN_FILENO) + snprintf(fname, sizeof fname, "(standard input)"); + else if (fname[0] == '\0') + snprintf(fname, sizeof fname, "(fd %d)", fd); + + if (fstat(fd, &sb) == -1) + return NULL; + if (S_ISDIR(sb.st_mode)) { + errno = EISDIR; + return NULL; + } + + f = grep_malloc(sizeof *f); + +#ifndef NOZ + if (Zflag) { + f->type = FILE_GZIP; + f->noseek = lseek(fd, 0L, SEEK_SET) == -1; + if ((f->gzf = gzdopen(fd, "r")) != NULL) + return f; + } +#endif + f->noseek = isatty(fd); +#ifndef SMALL + /* try mmap first; if it fails, try stdio */ + if (!f->noseek && (f->mmf = mmopen(fd, &sb)) != NULL) { + f->type = FILE_MMAP; + return f; + } +#endif + f->type = FILE_STDIO; + if ((f->f = fdopen(fd, "r")) != NULL) + return f; + + free(f); + return NULL; +} + +file_t * +grep_open(char *path) +{ + file_t *f; + int fd; + + snprintf(fname, sizeof fname, "%s", path); + + if ((fd = open(fname, O_RDONLY)) == -1) + return NULL; + + f = grep_fdopen(fd); + if (f == NULL) + close(fd); + return f; +} + +int +grep_bin_file(file_t *f) +{ + if (f->noseek) + return 0; + + switch (f->type) { + case FILE_STDIO: + return bin_file(f->f); +#ifndef SMALL + case FILE_MMAP: + return mmbin_file(f->mmf); +#endif +#ifndef NOZ + case FILE_GZIP: + return gzbin_file(f->gzf); +#endif + default: + /* can't happen */ + errx(2, "invalid file type"); + } +} + +char * +grep_fgetln(file_t *f, size_t *l) +{ + switch (f->type) { + case FILE_STDIO: + if ((*l = getline(&lnbuf, &lnbufsize, f->f)) == -1) { + if (ferror(f->f)) + err(2, "%s: getline", fname); + else + return NULL; + } + return lnbuf; +#ifndef SMALL + case FILE_MMAP: + return mmfgetln(f->mmf, l); +#endif +#ifndef NOZ + case FILE_GZIP: + return gzfgetln(f->gzf, l); +#endif + default: + /* can't happen */ + errx(2, "invalid file type"); + } +} + +void +grep_close(file_t *f) +{ + switch (f->type) { + case FILE_STDIO: + fclose(f->f); + break; +#ifndef SMALL + case FILE_MMAP: + mmclose(f->mmf); + break; +#endif +#ifndef NOZ + case FILE_GZIP: + gzclose(f->gzf); + break; +#endif + default: + /* can't happen */ + errx(2, "invalid file type"); + } + free(f); +} diff --git a/usr.bin/grep/grep.1 b/usr.bin/grep/grep.1 new file mode 100644 index 0000000..5cc228d --- /dev/null +++ b/usr.bin/grep/grep.1 @@ -0,0 +1,385 @@ +.\" $OpenBSD: grep.1,v 1.50 2019/12/03 08:48:49 kn Exp $ +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)grep.1 8.3 (Berkeley) 4/18/94 +.\" +.Dd $Mdocdate: December 3 2019 $ +.Dt GREP 1 +.Os +.Sh NAME +.Nm grep , egrep , fgrep , +.Nm zgrep , zegrep , zfgrep +.Nd file pattern searcher +.Sh SYNOPSIS +.Nm grep +.Bk -words +.Op Fl abcEFGHhIiLlnoqRsUVvwxZ +.Op Fl A Ar num +.Op Fl B Ar num +.Op Fl C Ns Op Ar num +.Op Fl e Ar pattern +.Op Fl f Ar file +.Op Fl m Ar num +.Op Fl -binary-files Ns = Ns Ar value +.Op Fl -context Ns Op = Ns Ar num +.Op Fl -label Ns = Ns Ar name +.Op Fl -line-buffered +.Op Ar pattern +.Op Ar +.Ek +.Sh DESCRIPTION +The +.Nm grep +utility searches any given input files, +selecting lines that match one or more patterns. +By default, a pattern matches an input line if the regular expression +(RE) in the pattern matches the input line +without its trailing newline. +An empty expression matches every line. +Each input line that matches at least one of the patterns is written +to the standard output. +If no file arguments are specified, the standard input is used. +.Pp +.Nm grep +is used for simple patterns and +basic regular expressions +.Pq BREs ; +.Nm egrep +can handle extended regular expressions +.Pq EREs . +See +.Xr re_format 7 +for more information on regular expressions. +.Nm fgrep +is quicker than both +.Nm grep +and +.Nm egrep , +but can only handle fixed patterns +(i.e. it does not interpret regular expressions). +Patterns may consist of one or more lines, +allowing any of the pattern lines to match a portion of the input. +.Pp +.Nm zgrep , +.Nm zegrep , +and +.Nm zfgrep +act like +.Nm grep , +.Nm egrep , +and +.Nm fgrep , +respectively, but accept input files compressed with the +.Xr compress 1 +or +.Xr gzip 1 +compression utilities. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl A Ar num +Print +.Ar num +lines of trailing context after each match. +See also the +.Fl B +and +.Fl C +options. +.It Fl a +Treat all files as ASCII text. +Normally +.Nm +will simply print +.Dq Binary file ... matches +if files contain binary characters. +Use of this option forces +.Nm +to output lines matching the specified pattern. +.It Fl B Ar num +Print +.Ar num +lines of leading context before each match. +See also the +.Fl A +and +.Fl C +options. +.It Fl b +Each output line is preceded by its position (in bytes) in the file. +If option +.Fl o +is also specified, the position of the matched pattern is displayed. +.It Fl C Ns Oo Ar num Oc , Fl -context Ns Op = Ns Ar num +Print +.Ar num +lines of leading and trailing context surrounding each match. +The default is 2 and is equivalent to +.Fl A +.Ar 2 +.Fl B +.Ar 2 . +Note: +no whitespace may be given between the option and its argument. +.It Fl c +Only a count of selected lines is written to standard output. +.It Fl E +Interpret +.Ar pattern +as an extended regular expression +(i.e. force +.Nm grep +to behave as +.Nm egrep ) . +.It Fl e Ar pattern +Specify a pattern used during the search of the input: +an input line is selected if it matches any of the specified patterns. +This option is most useful when multiple +.Fl e +options are used to specify multiple patterns, +or when a pattern begins with a dash +.Pq Sq - . +.It Fl F +Interpret +.Ar pattern +as a set of fixed strings +(i.e. force +.Nm grep +to behave as +.Nm fgrep ) . +.It Fl f Ar file +Read one or more newline separated patterns from +.Ar file . +Empty pattern lines match every input line. +Newlines are not considered part of a pattern. +If +.Ar file +is empty, nothing is matched. +.It Fl G +Interpret +.Ar pattern +as a basic regular expression +(i.e. force +.Nm grep +to behave as traditional +.Nm grep ) . +.It Fl H +Always print filename headers +.Pq i.e. filenames +with output lines. +.It Fl h +Never print filename headers +.Pq i.e. filenames +with output lines. +.It Fl I +Ignore binary files. +.It Fl i +Perform case insensitive matching. +By default, +.Nm grep +is case sensitive. +.It Fl L +Only the names of files not containing selected lines are written to +standard output. +Pathnames are listed once per file searched. +If the standard input is searched, the string +.Dq (standard input) +is written. +.It Fl l +Only the names of files containing selected lines are written to +standard output. +.Nm grep +will only search a file until a match has been found, +making searches potentially less expensive. +Pathnames are listed once per file searched. +If the standard input is searched, the string +.Dq (standard input) +is written. +.It Fl m Ar num +Stop after +.Ar num +matches. +.It Fl n +Each output line is preceded by its relative line number in the file, +starting at line 1. +The line number counter is reset for each file processed. +This option is ignored if +.Fl c , +.Fl L , +.Fl l , +or +.Fl q +is +specified. +.It Fl o +Print each match, but only the match, not the entire line. +.It Fl q +Quiet mode: +suppress normal output. +.Nm grep +will only search a file until a match has been found, +making searches potentially less expensive. +.It Fl R +Recursively search subdirectories listed. +If no +.Ar file +is given, +.Nm +searches the current working directory. +.It Fl s +Silent mode. +Nonexistent and unreadable files are ignored +(i.e. their error messages are suppressed). +.It Fl U +Search binary files, but do not attempt to print them. +.It Fl V +Display version information. +All other options are ignored. +.It Fl v +Selected lines are those +.Em not +matching any of the specified patterns. +.It Fl w +The expression is searched for as a word (as if surrounded by +.Sq [[:<:]] +and +.Sq [[:>:]] ; +see +.Xr re_format 7 ) . +.It Fl x +Only input lines selected against an entire fixed string or regular +expression are considered to be matching lines. +.It Fl Z +Force +.Nm grep +to behave as +.Nm zgrep . +.It Fl -binary-files Ns = Ns Ar value +Controls searching and printing of binary files. +Options are +.Ar binary , +the default: search binary files but do not print them; +.Ar without-match : +do not search binary files; +and +.Ar text : +treat all files as text. +.It Fl -label Ns = Ns Ar name +Print +.Ar name +instead of the filename before lines. +.It Fl -line-buffered +Force output to be line buffered. +By default, output is line buffered when standard output is a terminal +and block buffered otherwise. +.El +.Sh EXIT STATUS +The +.Nm grep +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Li 0 +One or more lines were selected. +.It Li 1 +No lines were selected. +.It Li >1 +An error occurred. +.El +.Sh EXAMPLES +To find all occurrences of the word +.Sq patricia +in a file: +.Pp +.Dl $ grep 'patricia' myfile +.Pp +To find all occurrences of the pattern +.Ql .Pp +at the beginning of a line: +.Pp +.Dl $ grep '^\e.Pp' myfile +.Pp +The apostrophes ensure the entire expression is evaluated by +.Nm grep +instead of by the user's shell. +The caret +.Ql ^ +matches the null string at the beginning of a line, +and the +.Ql \e +escapes the +.Ql \&. , +which would otherwise match any character. +.Pp +To find all lines in a file which do not contain the words +.Sq foo +or +.Sq bar : +.Pp +.Dl $ grep -v -e 'foo' -e 'bar' myfile +.Pp +A simple example of an extended regular expression: +.Pp +.Dl $ egrep '19|20|25' calendar +.Pp +Peruses the file +.Sq calendar +looking for either 19, 20, or 25. +.Sh SEE ALSO +.Xr ed 1 , +.Xr ex 1 , +.Xr gzip 1 , +.Xr sed 1 , +.Xr re_format 7 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl AaBbCGHhILmoRUVwZ +are extensions to that specification, and the behaviour of the +.Fl f +flag when used with an empty pattern file is left undefined. +.Pp +All long options are provided for compatibility with +GNU versions of this utility. +.Pp +Historic versions of the +.Nm grep +utility also supported the flags +.Op Fl ruy . +This implementation supports those options; +however, their use is strongly discouraged. +.Sh HISTORY +The +.Nm grep +command first appeared in +.At v4 . diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c new file mode 100644 index 0000000..1beeeb6 --- /dev/null +++ b/usr.bin/grep/grep.c @@ -0,0 +1,530 @@ +/* $OpenBSD: grep.c,v 1.65 2020/07/23 20:19:27 martijn Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/queue.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "grep.h" + +#ifndef REG_STARTEND +#define REG_STARTEND 0004 +#endif +#ifndef REG_NOSPEC +#define REG_NOSPEC 0020 +#endif + +/* Flags passed to regcomp() and regexec() */ +int cflags; +int eflags = REG_STARTEND; + +int matchall; /* shortcut */ +int patterns, pattern_sz; +char **pattern; +regex_t *r_pattern; +fastgrep_t *fg_pattern; + +/* For regex errors */ +char re_error[RE_ERROR_BUF + 1]; + +/* Command-line flags */ +int Aflag; /* -A x: print x lines trailing each match */ +int Bflag; /* -B x: print x lines leading each match */ +int Eflag; /* -E: interpret pattern as extended regexp */ +int Fflag; /* -F: interpret pattern as list of fixed strings */ +int Hflag; /* -H: always print filename header */ +int Lflag; /* -L: only show names of files with no matches */ +int Rflag; /* -R: recursively search directory trees */ +int Zflag; /* -Z: decompress input before processing */ +int bflag; /* -b: show block numbers for each match */ +int cflag; /* -c: only show a count of matching lines */ +int hflag; /* -h: don't print filename headers */ +int iflag; /* -i: ignore case */ +int lflag; /* -l: only show names of files with matches */ +int mflag; /* -m x: stop reading the files after x matches */ +long long mcount; /* count for -m */ +long long mlimit; /* requested value for -m */ +int nflag; /* -n: show line numbers in front of matching lines */ +int oflag; /* -o: print each match */ +int qflag; /* -q: quiet mode (don't output anything) */ +int sflag; /* -s: silent mode (ignore errors) */ +int vflag; /* -v: only show non-matching lines */ +int wflag; /* -w: pattern must start and end on word boundaries */ +int xflag; /* -x: pattern must match entire line */ +int lbflag; /* --line-buffered */ +const char *labelname; /* --label=name */ + +int binbehave = BIN_FILE_BIN; + +enum { + BIN_OPT = CHAR_MAX + 1, + HELP_OPT, + MMAP_OPT, + LINEBUF_OPT, + LABEL_OPT, +}; + +/* Housekeeping */ +int first; /* flag whether or not this is our first match */ +int tail; /* lines left to print */ +int file_err; /* file reading error */ + +struct patfile { + const char *pf_file; + SLIST_ENTRY(patfile) pf_next; +}; +SLIST_HEAD(, patfile) patfilelh; + +extern char *__progname; + +static void +usage(void) +{ + fprintf(stderr, +#ifdef NOZ + "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]" +#else + "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]" +#endif + " [-e pattern]\n" + "\t[-f file] [-m num] [--binary-files=value] [--context[=num]]\n" + "\t[--label=name] [--line-buffered] [pattern] [file ...]\n", + __progname); + exit(2); +} + +#ifdef NOZ +static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy"; +#else +static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy"; +#endif + +static const struct option long_options[] = +{ + {"binary-files", required_argument, NULL, BIN_OPT}, + {"help", no_argument, NULL, HELP_OPT}, + {"mmap", no_argument, NULL, MMAP_OPT}, + {"label", required_argument, NULL, LABEL_OPT}, + {"line-buffered", no_argument, NULL, LINEBUF_OPT}, + {"after-context", required_argument, NULL, 'A'}, + {"before-context", required_argument, NULL, 'B'}, + {"context", optional_argument, NULL, 'C'}, + {"devices", required_argument, NULL, 'D'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"basic-regexp", no_argument, NULL, 'G'}, + {"with-filename", no_argument, NULL, 'H'}, + {"binary", no_argument, NULL, 'U'}, + {"version", no_argument, NULL, 'V'}, + {"text", no_argument, NULL, 'a'}, + {"byte-offset", no_argument, NULL, 'b'}, + {"count", no_argument, NULL, 'c'}, + {"regexp", required_argument, NULL, 'e'}, + {"file", required_argument, NULL, 'f'}, + {"no-filename", no_argument, NULL, 'h'}, + {"ignore-case", no_argument, NULL, 'i'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"max-count", required_argument, NULL, 'm'}, + {"line-number", no_argument, NULL, 'n'}, + {"quiet", no_argument, NULL, 'q'}, + {"silent", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"no-messages", no_argument, NULL, 's'}, + {"revert-match", no_argument, NULL, 'v'}, + {"word-regexp", no_argument, NULL, 'w'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, +#ifndef NOZ + {"decompress", no_argument, NULL, 'Z'}, +#endif + {NULL, no_argument, NULL, 0} +}; + + +static void +add_pattern(char *pat, size_t len) +{ + if (!xflag && (len == 0 || matchall)) { + matchall = 1; + return; + } + if (patterns == pattern_sz) { + pattern_sz *= 2; + pattern = grep_reallocarray(pattern, ++pattern_sz, sizeof(*pattern)); + } + if (len > 0 && pat[len - 1] == '\n') + --len; + /* pat may not be NUL-terminated */ + if (wflag && !Fflag) { + int bol = 0, eol = 0, extra; + if (pat[0] == '^') + bol = 1; + if (len > 0 && pat[len - 1] == '$') + eol = 1; + extra = Eflag ? 2 : 4; + pattern[patterns] = grep_malloc(len + 15 + extra); + snprintf(pattern[patterns], len + 15 + extra, + "%s[[:<:]]%s%.*s%s[[:>:]]%s", + bol ? "^" : "", + Eflag ? "(" : "\\(", + (int)len - bol - eol, pat + bol, + Eflag ? ")" : "\\)", + eol ? "$" : ""); + len += 14 + extra; + } else { + pattern[patterns] = grep_malloc(len + 1); + memcpy(pattern[patterns], pat, len); + pattern[patterns][len] = '\0'; + } + ++patterns; +} + +static void +add_patterns(char *pats) +{ + char *nl; + + while ((nl = strchr(pats, '\n')) != NULL) { + add_pattern(pats, nl - pats); + pats = nl + 1; + } + add_pattern(pats, strlen(pats)); +} + +static void +read_patterns(const char *fn) +{ + FILE *f; + char *line; + ssize_t len; + size_t linesize; + + if ((f = fopen(fn, "r")) == NULL) + err(2, "%s", fn); + line = NULL; + linesize = 0; + while ((len = getline(&line, &linesize, f)) != -1) + add_pattern(line, *line == '\n' ? 0 : len); + if (ferror(f)) + err(2, "%s", fn); + fclose(f); + free(line); +} + +int +main(int argc, char *argv[]) +{ + int c, lastc, prevoptind, newarg, i, needpattern, exprs, expr_sz; + struct patfile *patfile, *pf_next; + long l; + char **expr; + const char *errstr; + + if (pledge("stdio rpath", NULL) == -1) + err(2, "pledge"); + + SLIST_INIT(&patfilelh); + switch (__progname[0]) { + case 'e': + Eflag = 1; + break; + case 'f': + Fflag = 1; + break; +#ifndef NOZ + case 'z': + Zflag = 1; + switch(__progname[1]) { + case 'e': + Eflag = 1; + break; + case 'f': + Fflag = 1; + break; + } + break; +#endif + } + + lastc = '\0'; + newarg = 1; + prevoptind = 1; + needpattern = 1; + expr_sz = exprs = 0; + expr = NULL; + while ((c = getopt_long(argc, argv, optstr, + long_options, NULL)) != -1) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg || !isdigit(lastc)) + Aflag = 0; + else if (Aflag > INT_MAX / 10) + errx(2, "context out of range"); + Aflag = Bflag = (Aflag * 10) + (c - '0'); + break; + case 'A': + case 'B': + l = strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr != NULL) + errx(2, "context %s", errstr); + if (c == 'A') + Aflag = (int)l; + else + Bflag = (int)l; + break; + case 'C': + if (optarg == NULL) + Aflag = Bflag = 2; + else { + l = strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr != NULL) + errx(2, "context %s", errstr); + Aflag = Bflag = (int)l; + } + break; + case 'E': + Fflag = 0; + Eflag = 1; + break; + case 'F': + Eflag = 0; + Fflag = 1; + break; + case 'G': + Eflag = Fflag = 0; + break; + case 'H': + Hflag = 1; + break; + case 'I': + binbehave = BIN_FILE_SKIP; + break; + case 'L': + lflag = 0; + Lflag = qflag = 1; + break; + case 'R': + case 'r': + Rflag = 1; + break; + case 'U': + binbehave = BIN_FILE_BIN; + break; + case 'V': + fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); + exit(0); + break; +#ifndef NOZ + case 'Z': + Zflag = 1; + break; +#endif + case 'a': + binbehave = BIN_FILE_TEXT; + break; + case 'b': + bflag = 1; + break; + case 'c': + cflag = 1; + break; + case 'e': + /* defer adding of expressions until all arguments are parsed */ + if (exprs == expr_sz) { + expr_sz *= 2; + expr = grep_reallocarray(expr, ++expr_sz, + sizeof(*expr)); + } + needpattern = 0; + expr[exprs] = optarg; + ++exprs; + break; + case 'f': + patfile = grep_malloc(sizeof(*patfile)); + patfile->pf_file = optarg; + SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next); + needpattern = 0; + break; + case 'h': + hflag = 1; + break; + case 'i': + case 'y': + iflag = 1; + cflags |= REG_ICASE; + break; + case 'l': + Lflag = 0; + lflag = qflag = 1; + break; + case 'm': + mflag = 1; + mlimit = mcount = strtonum(optarg, 0, LLONG_MAX, + &errstr); + if (errstr != NULL) + errx(2, "invalid max-count %s: %s", + optarg, errstr); + break; + case 'n': + nflag = 1; + break; + case 'o': + oflag = 1; + break; + case 'q': + qflag = 1; + break; + case 's': + sflag = 1; + break; + case 'v': + vflag = 1; + break; + case 'w': + wflag = 1; + break; + case 'x': + xflag = 1; + break; + case BIN_OPT: + if (strcmp("binary", optarg) == 0) + binbehave = BIN_FILE_BIN; + else if (strcmp("without-match", optarg) == 0) + binbehave = BIN_FILE_SKIP; + else if (strcmp("text", optarg) == 0) + binbehave = BIN_FILE_TEXT; + else + errx(2, "Unknown binary-files option"); + break; + case 'u': + case MMAP_OPT: + /* default, compatibility */ + break; + case LABEL_OPT: + labelname = optarg; + break; + case LINEBUF_OPT: + lbflag = 1; + break; + case HELP_OPT: + default: + usage(); + } + lastc = c; + newarg = optind != prevoptind; + prevoptind = optind; + } + argc -= optind; + argv += optind; + + for (i = 0; i < exprs; i++) + add_patterns(expr[i]); + free(expr); + expr = NULL; + + for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL; + patfile = pf_next) { + pf_next = SLIST_NEXT(patfile, pf_next); + read_patterns(patfile->pf_file); + free(patfile); + } + + if (argc == 0 && needpattern) + usage(); + + if (argc != 0 && needpattern) { + add_patterns(*argv); + --argc; + ++argv; + } + if (argc == 1 && strcmp(*argv, "-") == 0) { + /* stdin */ + --argc; + ++argv; + } + + if (Eflag) + cflags |= REG_EXTENDED; + if (Fflag) + cflags |= REG_NOSPEC; +#ifdef SMALL + /* Sorry, this won't work */ + if (Fflag && wflag) + errx(1, "Can't use small fgrep with -w"); +#endif + fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); + r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); + for (i = 0; i < patterns; ++i) { + /* Check if cheating is allowed (always is for fgrep). */ +#ifndef SMALL + if (Fflag) { + fgrepcomp(&fg_pattern[i], pattern[i]); + } else +#endif + { + if (fastcomp(&fg_pattern[i], pattern[i])) { + /* Fall back to full regex library */ + c = regcomp(&r_pattern[i], pattern[i], cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); + } + } + } + } + + if (lbflag) + setvbuf(stdout, NULL, _IOLBF, 0); + + if ((argc == 0 || argc == 1) && !Rflag && !Hflag) + hflag = 1; + + if (argc == 0 && !Rflag) + exit(!procfile(NULL)); + + if (Rflag) + c = grep_tree(argv); + else + for (c = 0; argc--; ++argv) + c |= procfile(*argv); + + exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); +} diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h new file mode 100644 index 0000000..731bbcc --- /dev/null +++ b/usr.bin/grep/grep.h @@ -0,0 +1,129 @@ +/* $OpenBSD: grep.h,v 1.28 2021/03/10 21:55:22 millert Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <limits.h> +#include <regex.h> +#include <stdint.h> +#include <stdio.h> +#include <zlib.h> + +#define VER_MAJ 0 +#define VER_MIN 9 + +#define BIN_FILE_BIN 0 +#define BIN_FILE_SKIP 1 +#define BIN_FILE_TEXT 2 + +typedef struct { + size_t len; + long long line_no; + off_t off; + char *file; + char *dat; +} str_t; + +typedef struct { + unsigned char *pattern; + int patternLen; + int qsBc[UCHAR_MAX + 1]; + /* flags */ + int bol; + int eol; + int wmatch; + int reversedSearch; +} fastgrep_t; + +/* Flags passed to regcomp() and regexec() */ +extern int cflags, eflags; + +/* Command line flags */ +extern int Aflag, Bflag, Eflag, Fflag, Hflag, Lflag, + Rflag, Zflag, + bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag, + sflag, vflag, wflag, xflag; +extern int binbehave; +extern const char *labelname; + +extern int first, matchall, patterns, tail, file_err; +extern char **pattern; +extern fastgrep_t *fg_pattern; +extern regex_t *r_pattern; + +/* For -m max-count */ +extern long long mcount, mlimit; + +/* For regex errors */ +#define RE_ERROR_BUF 512 +extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */ + +/* util.c */ +int procfile(char *fn); +int grep_tree(char **argv); +void *grep_malloc(size_t size); +void *grep_calloc(size_t nmemb, size_t size); +void *grep_realloc(void *ptr, size_t size); +void *grep_reallocarray(void *ptr, size_t nmemb, size_t size); +void printline(str_t *line, int sep, regmatch_t *pmatch); +int fastcomp(fastgrep_t *, const char *); +void fgrepcomp(fastgrep_t *, const unsigned char *); + +/* queue.c */ +void initqueue(void); +void enqueue(str_t *x); +void printqueue(void); +void clearqueue(void); + +/* mmfile.c */ +typedef struct mmfile { + int fd; + size_t len; + char *base, *end, *ptr; +} mmf_t; + +mmf_t *mmopen(int fd, struct stat *sb); +void mmclose(mmf_t *mmf); +char *mmfgetln(mmf_t *mmf, size_t *l); + +/* file.c */ +struct file; +typedef struct file file_t; + +file_t *grep_fdopen(int fd); +file_t *grep_open(char *path); +int grep_bin_file(file_t *f); +char *grep_fgetln(file_t *f, size_t *l); +void grep_close(file_t *f); + +/* binary.c */ +int bin_file(FILE * f); +int gzbin_file(gzFile f); +int mmbin_file(mmf_t *f); + diff --git a/usr.bin/grep/mmfile.c b/usr.bin/grep/mmfile.c new file mode 100644 index 0000000..62d089a --- /dev/null +++ b/usr.bin/grep/mmfile.c @@ -0,0 +1,90 @@ +/* $OpenBSD: mmfile.c,v 1.19 2019/01/27 14:43:09 deraadt Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/mman.h> +#include <sys/stat.h> + +#include <err.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "grep.h" + +#ifndef SMALL + +#define MAX_MAP_LEN 1048576 + +mmf_t * +mmopen(int fd, struct stat *st) +{ + mmf_t *mmf; + + mmf = grep_malloc(sizeof *mmf); + if (st->st_size > SIZE_MAX) /* too big to mmap */ + goto ouch; + mmf->len = (size_t)st->st_size; + mmf->fd = fd; + mmf->base = mmap(NULL, mmf->len, PROT_READ, MAP_PRIVATE, mmf->fd, (off_t)0); + if (mmf->base == MAP_FAILED) + goto ouch; + mmf->ptr = mmf->base; + mmf->end = mmf->base + mmf->len; + madvise(mmf->base, mmf->len, MADV_SEQUENTIAL); + return mmf; + +ouch: + free(mmf); + return NULL; +} + +void +mmclose(mmf_t *mmf) +{ + munmap(mmf->base, mmf->len); + close(mmf->fd); + free(mmf); +} + +char * +mmfgetln(mmf_t *mmf, size_t *l) +{ + static char *p; + + if (mmf->ptr >= mmf->end) + return NULL; + for (p = mmf->ptr; mmf->ptr < mmf->end; ++mmf->ptr) + if (*mmf->ptr == '\n') + break; + + *l = mmf->ptr - p; + ++mmf->ptr; + return p; +} + +#endif diff --git a/usr.bin/grep/queue.c b/usr.bin/grep/queue.c new file mode 100644 index 0000000..96e50ab --- /dev/null +++ b/usr.bin/grep/queue.c @@ -0,0 +1,120 @@ +/* $OpenBSD: queue.c,v 1.7 2015/01/16 06:40:08 deraadt Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * A really poor man's queue. It does only what it has to and gets out of + * Dodge. + */ + +#include <stdlib.h> +#include <string.h> + +#include "grep.h" + +typedef struct queue { + struct queue *next; + str_t data; +} queue_t; + +static queue_t *q_head, *q_tail; +static int count; + +static queue_t *dequeue(void); + +void +initqueue(void) +{ + q_head = q_tail = NULL; +} + +static void +free_item(queue_t *item) +{ + free(item); +} + +void +enqueue(str_t *x) +{ + queue_t *item; + + item = grep_malloc(sizeof *item + x->len); + item->data.len = x->len; + item->data.line_no = x->line_no; + item->data.off = x->off; + item->data.dat = (char *)item + sizeof *item; + memcpy(item->data.dat, x->dat, x->len); + item->data.file = x->file; + item->next = NULL; + + if (!q_head) { + q_head = q_tail = item; + } else { + q_tail->next = item; + q_tail = item; + } + + if (++count > Bflag) + free_item(dequeue()); +} + +static queue_t * +dequeue(void) +{ + queue_t *item; + + if (q_head == NULL) + return NULL; + + --count; + item = q_head; + q_head = item->next; + if (q_head == NULL) + q_tail = NULL; + return item; +} + +void +printqueue(void) +{ + queue_t *item; + + while ((item = dequeue()) != NULL) { + printline(&item->data, '-', NULL); + free_item(item); + } +} + +void +clearqueue(void) +{ + queue_t *item; + + while ((item = dequeue()) != NULL) + free_item(item); +} diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c new file mode 100644 index 0000000..e16d08e --- /dev/null +++ b/usr.bin/grep/util.c @@ -0,0 +1,682 @@ +/* $OpenBSD: util.c,v 1.63 2020/07/23 20:19:27 martijn Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fts.h> +#include <regex.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <zlib.h> + +#include "grep.h" + +/* + * Process a file line by line... + */ + +static int linesqueued; +static int procline(str_t *l, int); +static int grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch, int); +#ifndef SMALL +static bool grep_cmp(const char *, const char *, size_t); +static void grep_revstr(unsigned char *, int); +#endif + +int +grep_tree(char **argv) +{ + FTS *fts; + FTSENT *p; + int c, fts_flags; + char *dot_argv[] = { ".", NULL }; + char *path; + + /* default to . if no path given */ + if (argv[0] == NULL) + argv = dot_argv; + + c = 0; + + fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR; + + if (!(fts = fts_open(argv, fts_flags, NULL))) + err(2, NULL); + while ((p = fts_read(fts)) != NULL) { + switch (p->fts_info) { + case FTS_DNR: + break; + case FTS_ERR: + file_err = 1; + if(!sflag) + warnc(p->fts_errno, "%s", p->fts_path); + break; + case FTS_D: + case FTS_DP: + break; + default: + path = p->fts_path; + /* skip "./" if implied */ + if (argv == dot_argv && p->fts_pathlen >= 2) + path += 2; + c |= procfile(path); + break; + } + } + if (errno) + err(2, "fts_read"); + fts_close(fts); + return c; +} + +int +procfile(char *fn) +{ + str_t ln; + file_t *f; + int t, z, nottext, overflow = 0; + unsigned long long c; + + mcount = mlimit; + + if (fn == NULL) { + fn = "(standard input)"; + f = grep_fdopen(STDIN_FILENO); + } else { + f = grep_open(fn); + } + if (f == NULL) { + if (errno == EISDIR) + return 0; + file_err = 1; + if (!sflag) + warn("%s", fn); + return 0; + } + + nottext = grep_bin_file(f); + if (nottext && binbehave == BIN_FILE_SKIP) { + grep_close(f); + return 0; + } + + ln.file = fn; + if (labelname) + ln.file = (char *)labelname; + ln.line_no = 0; + ln.len = 0; + linesqueued = 0; + tail = 0; + ln.off = -1; + + if (Bflag > 0) + initqueue(); + for (c = 0; c == 0 || !(lflag || qflag); ) { + if (mflag && mlimit == 0) + break; + ln.off += ln.len + 1; + if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) + break; + if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') + --ln.len; + ln.line_no++; + + z = tail; + + if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) { + enqueue(&ln); + linesqueued++; + } + if (ULLONG_MAX - c < (unsigned long long)t) + overflow = 1; + else + c += t; + if (mflag && mcount <= 0) + break; + } + if (Bflag > 0) + clearqueue(); + grep_close(f); + + if (cflag) { + if (!hflag) + printf("%s:", ln.file); + printf("%llu%s\n", c, overflow ? "+" : ""); + } + if (lflag && c != 0) + printf("%s\n", fn); + if (Lflag && c == 0) + printf("%s\n", fn); + if (c && !cflag && !lflag && !Lflag && + binbehave == BIN_FILE_BIN && nottext && !qflag) + printf("Binary file %s matches\n", fn); + + return overflow || c != 0; +} + + +/* + * Process an individual line in a file. Return non-zero if it matches. + */ + +#define isword(x) (isalnum((unsigned char)x) || (x) == '_') + +static int +procline(str_t *l, int nottext) +{ + regmatch_t pmatch = { 0 }; + int c, i, r; + regoff_t offset; + + /* size_t will be converted to regoff_t. ssize_t is guaranteed to fit + * into regoff_t */ + if (l->len > SSIZE_MAX) { + errx(2, "Line is too big to process"); + } + + c = 0; + i = 0; + if (matchall) { + c = 1; + goto print; + } + + for (i = 0; i < patterns; i++) { + offset = 0; +redo: + if (fg_pattern[i].pattern) { + int flags = 0; + if (offset) + flags |= REG_NOTBOL; + r = grep_search(&fg_pattern[i], l->dat + offset, + l->len - offset, &pmatch, flags); + pmatch.rm_so += offset; + pmatch.rm_eo += offset; + } else { + int flags = eflags; + if (offset) + flags |= REG_NOTBOL; + pmatch.rm_so = offset; + pmatch.rm_eo = l->len; + r = regexec(&r_pattern[i], l->dat, 1, &pmatch, flags); + } + if (r == 0 && xflag) { + if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len) + r = REG_NOMATCH; + } + if (r == 0) { + c = 1; + if (oflag && pmatch.rm_so != pmatch.rm_eo) + goto print; + break; + } + } + if (oflag) + return c; +print: + if (vflag) + c = !c; + + /* Count the matches if we have a match limit */ + if (mflag) + mcount -= c; + + if (c && binbehave == BIN_FILE_BIN && nottext) + return c; /* Binary file */ + + if ((tail > 0 || c) && !cflag && !qflag) { + if (c) { + if (first > 0 && tail == 0 && (Bflag < linesqueued) && + (Aflag || Bflag)) + printf("--\n"); + first = 1; + tail = Aflag; + if (Bflag > 0) + printqueue(); + linesqueued = 0; + printline(l, ':', oflag ? &pmatch : NULL); + } else { + printline(l, '-', oflag ? &pmatch : NULL); + tail--; + } + } + if (oflag && !matchall) { + offset = pmatch.rm_eo; + goto redo; + } + return c; +} + +#ifndef SMALL +void +fgrepcomp(fastgrep_t *fg, const unsigned char *pattern) +{ + int i; + + /* Initialize. */ + fg->patternLen = strlen(pattern); + fg->bol = 0; + fg->eol = 0; + fg->wmatch = wflag; + fg->reversedSearch = 0; + + /* + * Make a copy and upper case it for later if in -i mode, + * else just copy the pointer. + */ + if (iflag) { + fg->pattern = grep_malloc(fg->patternLen + 1); + for (i = 0; i < fg->patternLen; i++) + fg->pattern[i] = toupper(pattern[i]); + fg->pattern[fg->patternLen] = '\0'; + } else + fg->pattern = (unsigned char *)pattern; /* really const */ + + /* Preprocess pattern. */ + for (i = 0; i <= UCHAR_MAX; i++) + fg->qsBc[i] = fg->patternLen; + for (i = 1; i < fg->patternLen; i++) { + fg->qsBc[fg->pattern[i]] = fg->patternLen - i; + /* + * If case is ignored, make the jump apply to both upper and + * lower cased characters. As the pattern is stored in upper + * case, apply the same to the lower case equivalents. + */ + if (iflag) + fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i; + } +} +#endif + +/* + * Returns: -1 on failure, 0 on success + */ +int +fastcomp(fastgrep_t *fg, const char *pattern) +{ +#ifdef SMALL + return -1; +#else + int i; + int bol = 0; + int eol = 0; + int shiftPatternLen; + int hasDot = 0; + int firstHalfDot = -1; + int firstLastHalfDot = -1; + int lastHalfDot = 0; + + /* Initialize. */ + fg->patternLen = strlen(pattern); + fg->bol = 0; + fg->eol = 0; + fg->wmatch = 0; + fg->reversedSearch = 0; + + /* Remove end-of-line character ('$'). */ + if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') { + eol++; + fg->eol = 1; + fg->patternLen--; + } + + /* Remove beginning-of-line character ('^'). */ + if (pattern[0] == '^') { + bol++; + fg->bol = 1; + fg->patternLen--; + } + + /* Remove enclosing [[:<:]] and [[:>:]] (word match). */ + if (wflag) { + /* basic re's use \( \), extended re's ( ) */ + int extra = Eflag ? 1 : 2; + fg->patternLen -= 14 + 2 * extra; + fg->wmatch = 7 + extra; + } else if (fg->patternLen >= 14 && + strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 && + strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) { + fg->patternLen -= 14; + fg->wmatch = 7; + } + + /* + * Copy pattern minus '^' and '$' characters as well as word + * match character classes at the beginning and ending of the + * string respectively. + */ + fg->pattern = grep_malloc(fg->patternLen + 1); + memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen); + fg->pattern[fg->patternLen] = '\0'; + + /* Look for ways to cheat...er...avoid the full regex engine. */ + for (i = 0; i < fg->patternLen; i++) + { + switch (fg->pattern[i]) { + case '.': + hasDot = i; + if (i < fg->patternLen / 2) { + if (firstHalfDot < 0) + /* Closest dot to the beginning */ + firstHalfDot = i; + } else { + /* Closest dot to the end of the pattern. */ + lastHalfDot = i; + if (firstLastHalfDot < 0) + firstLastHalfDot = i; + } + break; + case '(': case ')': + case '{': case '}': + /* Special in BRE if preceded by '\\' */ + case '?': + case '+': + case '|': + /* Not special in BRE. */ + if (!Eflag) + goto nonspecial; + case '\\': + case '*': + case '[': case ']': + /* Free memory and let others know this is empty. */ + free(fg->pattern); + fg->pattern = NULL; + return (-1); + default: +nonspecial: + if (iflag) + fg->pattern[i] = toupper(fg->pattern[i]); + break; + } + } + + /* + * Determine if a reverse search would be faster based on the placement + * of the dots. + */ + if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) && + ((lastHalfDot) && ((firstHalfDot < 0) || + ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) { + fg->reversedSearch = 1; + hasDot = fg->patternLen - (firstHalfDot < 0 ? + firstLastHalfDot : firstHalfDot) - 1; + grep_revstr(fg->pattern, fg->patternLen); + } + + /* + * Normal Quick Search would require a shift based on the position the + * next character after the comparison is within the pattern. With + * wildcards, the position of the last dot effects the maximum shift + * distance. + * The closer to the end the wild card is the slower the search. A + * reverse version of this algorithm would be useful for wildcards near + * the end of the string. + * + * Examples: + * Pattern Max shift + * ------- --------- + * this 5 + * .his 4 + * t.is 3 + * th.s 2 + * thi. 1 + */ + + /* Adjust the shift based on location of the last dot ('.'). */ + shiftPatternLen = fg->patternLen - hasDot; + + /* Preprocess pattern. */ + for (i = 0; i <= UCHAR_MAX; i++) + fg->qsBc[i] = shiftPatternLen; + for (i = hasDot + 1; i < fg->patternLen; i++) { + fg->qsBc[fg->pattern[i]] = fg->patternLen - i; + /* + * If case is ignored, make the jump apply to both upper and + * lower cased characters. As the pattern is stored in upper + * case, apply the same to the lower case equivalents. + */ + if (iflag) + fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i; + } + + /* + * Put pattern back to normal after pre-processing to allow for easy + * comparisons later. + */ + if (fg->reversedSearch) + grep_revstr(fg->pattern, fg->patternLen); + + return (0); +#endif +} + +/* + * Word boundaries using regular expressions are defined as the point + * of transition from a non-word char to a word char, or vice versa. + * This means that grep -w +a and grep -w a+ never match anything, + * because they lack a starting or ending transition, but grep -w a+b + * does match a line containing a+b. + */ +#define wmatch(d, l, s, e) \ + ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \ + e > s && isword(d[s]) && isword(d[e-1])) + +static int +grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch, + int flags) +{ +#ifdef SMALL + return 0; +#else + regoff_t j; + int rtrnVal = REG_NOMATCH; + + pmatch->rm_so = -1; + pmatch->rm_eo = -1; + + /* No point in going farther if we do not have enough data. */ + if (dataLen < fg->patternLen) + return (rtrnVal); + + /* Only try once at the beginning or ending of the line. */ + if (fg->bol || fg->eol) { + if (fg->bol && (flags & REG_NOTBOL)) + return 0; + /* Simple text comparison. */ + /* Verify data is >= pattern length before searching on it. */ + if (dataLen >= fg->patternLen) { + /* Determine where in data to start search at. */ + if (fg->eol) + j = dataLen - fg->patternLen; + else + j = 0; + if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen))) + if (grep_cmp(fg->pattern, data + j, + fg->patternLen)) { + pmatch->rm_so = j; + pmatch->rm_eo = j + fg->patternLen; + if (!fg->wmatch || wmatch(data, dataLen, + pmatch->rm_so, pmatch->rm_eo)) + rtrnVal = 0; + } + } + } else if (fg->reversedSearch) { + /* Quick Search algorithm. */ + j = dataLen; + do { + if (grep_cmp(fg->pattern, data + j - fg->patternLen, + fg->patternLen)) { + pmatch->rm_so = j - fg->patternLen; + pmatch->rm_eo = j; + if (!fg->wmatch || wmatch(data, dataLen, + pmatch->rm_so, pmatch->rm_eo)) { + rtrnVal = 0; + break; + } + } + /* Shift if within bounds, otherwise, we are done. */ + if (j == fg->patternLen) + break; + j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]]; + } while (j >= fg->patternLen); + } else { + /* Quick Search algorithm. */ + j = 0; + do { + if (grep_cmp(fg->pattern, data + j, fg->patternLen)) { + pmatch->rm_so = j; + pmatch->rm_eo = j + fg->patternLen; + if (fg->patternLen == 0 || !fg->wmatch || + wmatch(data, dataLen, pmatch->rm_so, + pmatch->rm_eo)) { + rtrnVal = 0; + break; + } + } + + /* Shift if within bounds, otherwise, we are done. */ + if (j + fg->patternLen == dataLen) + break; + else + j += fg->qsBc[(unsigned char)data[j + fg->patternLen]]; + } while (j <= (dataLen - fg->patternLen)); + } + + return (rtrnVal); +#endif +} + + +void * +grep_malloc(size_t size) +{ + void *ptr; + + if ((ptr = malloc(size)) == NULL) + err(2, "malloc"); + return ptr; +} + +void * +grep_calloc(size_t nmemb, size_t size) +{ + void *ptr; + + if ((ptr = calloc(nmemb, size)) == NULL) + err(2, "calloc"); + return ptr; +} + +void * +grep_realloc(void *ptr, size_t size) +{ + if ((ptr = realloc(ptr, size)) == NULL) + err(2, "realloc"); + return ptr; +} + +void * +grep_reallocarray(void *ptr, size_t nmemb, size_t size) +{ + if ((ptr = reallocarray(ptr, nmemb, size)) == NULL) + err(2, "reallocarray"); + return ptr; +} + +#ifndef SMALL +/* + * Returns: true on success, false on failure + */ +static bool +grep_cmp(const char *pattern, const char *data, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.')) + || (iflag && pattern[i] == toupper((unsigned char)data[i]))) + continue; + return false; + } + + return true; +} + +static void +grep_revstr(unsigned char *str, int len) +{ + int i; + char c; + + for (i = 0; i < len / 2; i++) { + c = str[i]; + str[i] = str[len - i - 1]; + str[len - i - 1] = c; + } +} +#endif + +void +printline(str_t *line, int sep, regmatch_t *pmatch) +{ + int n; + + n = 0; + if (!hflag) { + fputs(line->file, stdout); + ++n; + } + if (nflag) { + if (n) + putchar(sep); + printf("%lld", line->line_no); + ++n; + } + if (bflag) { + if (n) + putchar(sep); + printf("%lld", (long long)line->off + + (pmatch ? pmatch->rm_so : 0)); + ++n; + } + if (n) + putchar(sep); + if (pmatch) + fwrite(line->dat + pmatch->rm_so, + pmatch->rm_eo - pmatch->rm_so, 1, stdout); + else + fwrite(line->dat, line->len, 1, stdout); + putchar('\n'); +} |