diff options
author | Cem Keylan <cem@ckyln.com> | 2020-10-16 17:47:01 +0300 |
---|---|---|
committer | Cem Keylan <cem@ckyln.com> | 2020-10-16 17:47:01 +0300 |
commit | 5d69c6a2661bba0a22f3ecfd517e2e9767a38346 (patch) | |
tree | 1f479b2714e127835db7f33a3bfed4c38c52f883 /usr.bin/diff | |
parent | e2abcdca396661cbe0ae2ddb13d5c2b85682c13a (diff) | |
download | otools-5d69c6a2661bba0a22f3ecfd517e2e9767a38346.tar.gz |
add tools
Diffstat (limited to 'usr.bin/diff')
-rw-r--r-- | usr.bin/diff/CVS/Entries | 9 | ||||
-rw-r--r-- | usr.bin/diff/CVS/Repository | 1 | ||||
-rw-r--r-- | usr.bin/diff/CVS/Root | 1 | ||||
-rw-r--r-- | usr.bin/diff/Makefile | 7 | ||||
-rw-r--r-- | usr.bin/diff/diff.1 | 474 | ||||
-rw-r--r-- | usr.bin/diff/diff.c | 402 | ||||
-rw-r--r-- | usr.bin/diff/diff.h | 98 | ||||
-rw-r--r-- | usr.bin/diff/diffdir.c | 237 | ||||
-rw-r--r-- | usr.bin/diff/diffreg.c | 1485 | ||||
-rw-r--r-- | usr.bin/diff/xmalloc.c | 85 | ||||
-rw-r--r-- | usr.bin/diff/xmalloc.h | 30 |
11 files changed, 2829 insertions, 0 deletions
diff --git a/usr.bin/diff/CVS/Entries b/usr.bin/diff/CVS/Entries new file mode 100644 index 0000000..912da0e --- /dev/null +++ b/usr.bin/diff/CVS/Entries @@ -0,0 +1,9 @@ +/Makefile/1.3/Tue May 29 18:24:56 2007// +/diff.1/1.49/Sat Feb 8 01:09:58 2020// +/diff.c/1.67/Fri Jun 28 13:35:00 2019// +/diff.h/1.33/Mon Oct 5 20:15:00 2015// +/diffdir.c/1.47/Fri Jan 25 00:19:26 2019// +/diffreg.c/1.93/Fri Jun 28 13:35:00 2019// +/xmalloc.c/1.10/Fri Jun 28 05:44:09 2019// +/xmalloc.h/1.4/Thu Nov 12 16:30:30 2015// +D diff --git a/usr.bin/diff/CVS/Repository b/usr.bin/diff/CVS/Repository new file mode 100644 index 0000000..088ef75 --- /dev/null +++ b/usr.bin/diff/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/diff diff --git a/usr.bin/diff/CVS/Root b/usr.bin/diff/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/diff/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/diff/Makefile b/usr.bin/diff/Makefile new file mode 100644 index 0000000..4f1c9d5 --- /dev/null +++ b/usr.bin/diff/Makefile @@ -0,0 +1,7 @@ +# $OpenBSD: Makefile,v 1.3 2007/05/29 18:24:56 ray Exp $ + +PROG= diff +SRCS= diff.c diffdir.c diffreg.c xmalloc.c +COPTS+= -Wall + +.include <bsd.prog.mk> diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1 new file mode 100644 index 0000000..353c770 --- /dev/null +++ b/usr.bin/diff/diff.1 @@ -0,0 +1,474 @@ +.\" $OpenBSD: diff.1,v 1.49 2020/02/08 01:09:58 jsg Exp $ +.\" +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)diff.1 8.1 (Berkeley) 6/30/93 +.\" +.Dd $Mdocdate: February 8 2020 $ +.Dt DIFF 1 +.Os +.Sh NAME +.Nm diff +.Nd differential file and directory comparator +.Sh SYNOPSIS +.Nm diff +.Op Fl abdipTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl I Ar pattern +.Op Fl L Ar label +.Ar file1 file2 +.Nm diff +.Op Fl abdilpTtw +.Op Fl I Ar pattern +.Op Fl L Ar label +.Fl C Ar number +.Ar file1 file2 +.Nm diff +.Op Fl abdiltw +.Op Fl I Ar pattern +.Fl D Ar string +.Ar file1 file2 +.Nm diff +.Op Fl abdilpTtw +.Op Fl I Ar pattern +.Op Fl L Ar label +.Fl U Ar number +.Ar file1 file2 +.Nm diff +.Op Fl abdilNPprsTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl I Ar pattern +.Bk -words +.Op Fl L Ar label +.Op Fl S Ar name +.Op Fl X Ar file +.Op Fl x Ar pattern +.Ek +.Ar dir1 dir2 +.Sh DESCRIPTION +The +.Nm +utility compares the contents of +.Ar file1 +and +.Ar file2 +and writes to the standard output the list of changes necessary to +convert one file into the other. +No output is produced if the files are identical. +.Pp +Output options (mutually exclusive): +.Bl -tag -width Ds +.It Fl C Ar number +Like +.Fl c +but produces a diff with +.Ar number +lines of context. +.It Fl c +Produces a diff with 3 lines of context. +With +.Fl c +the output format is modified slightly: +the output begins with identification of the files involved and +their creation dates and then each change is separated +by a line with fifteen +.Li * Ns 's . +The lines removed from +.Ar file1 +are marked with +.Sq \-\ \& ; +those added to +.Ar file2 +are marked +.Sq +\ \& . +Lines which are changed from one file to the other are marked in +both files with +.Sq !\ \& . +Changes which lie within 3 lines of each other are grouped together on +output. +.It Fl D Ar string +Creates a merged version of +.Ar file1 +and +.Ar file2 +on the standard output, with C preprocessor controls included so that +a compilation of the result without defining +.Ar string +is equivalent to compiling +.Ar file1 , +while defining +.Ar string +will yield +.Ar file2 . +.It Fl e +Produces output in a form suitable as input for the editor utility, +.Xr ed 1 , +which can then be used to convert file1 into file2. +.Pp +Extra commands are added to the output when comparing directories with +.Fl e , +so that the result is a +.Xr sh 1 +script for converting text files which are common to the two directories +from their state in +.Ar dir1 +to their state in +.Ar dir2 . +.It Fl f +Identical output to that of the +.Fl e +flag, but in reverse order. +It cannot be digested by +.Xr ed 1 . +.It Fl n +Produces a script similar to that of +.Fl e , +but in the opposite order and with a count of changed lines on each +insert or delete command. +This is the form used by +.Xr rcsdiff 1 . +.It Fl q +Just print a line when the files differ. +Does not output a list of changes. +.It Fl U Ar number +Like +.Fl u +but produces a diff with +.Ar number +lines of context. +.It Fl u +Produces a +.Em unified +diff with 3 lines of context. +A unified diff is similar to the context diff produced by the +.Fl c +option. +However, unlike with +.Fl c , +all lines to be changed (added and/or removed) are present in +a single section. +.El +.Pp +Comparison options: +.Bl -tag -width Ds +.It Fl a +Treat all files as ASCII text. +Normally +.Nm +will simply print +.Dq Binary files ... differ +if files contain binary characters. +Use of this option forces +.Nm +to produce a diff. +.It Fl b +Causes trailing blanks (spaces and tabs) to be ignored, and other +strings of blanks to compare equal. +.It Fl d +Try very hard to produce a diff as small as possible. +This may consume a lot of processing power and memory when processing +large files with many changes. +.It Fl I Ar pattern +Ignores changes, insertions, and deletions whose lines match the +extended regular expression +.Ar pattern . +Multiple +.Fl I +patterns may be specified. +All lines in the change must match some pattern for the change to be +ignored. +See +.Xr re_format 7 +for more information on regular expression patterns. +.It Fl i +Ignores the case of letters. +E.g., +.Dq A +will compare equal to +.Dq a . +.It Fl L Ar label +Print +.Ar label +instead of the first (and second, if this option is specified twice) +file name and time in the context or unified diff header. +.It Fl p +With unified and context diffs, show with each change +the first 40 characters of the last line before the context beginning +with a letter, an underscore or a dollar sign. +For C source code following standard layout conventions, this will +show the prototype of the function the change applies to. +.It Fl T +Print a tab rather than a space before the rest of the line for the +normal, context or unified output formats. +This makes the alignment of tabs in the line consistent. +.It Fl t +Will expand tabs in output lines. +Normal or +.Fl c +output adds character(s) to the front of each line which may screw up +the indentation of the original source lines and make the output listing +difficult to interpret. +This option will preserve the original source's indentation. +.It Fl w +Is similar to +.Fl b +but causes whitespace (blanks and tabs) to be totally ignored. +E.g., +.Dq if (\ \&a == b \&) +will compare equal to +.Dq if(a==b) . +.El +.Pp +Directory comparison options: +.Bl -tag -width Ds +.It Fl N +If a file is found in only one directory, act as if it was found in the +other directory too but was of zero size. +.It Fl P +If a file is found only in +.Ar dir2 , +act as if it was found in +.Ar dir1 +too but was of zero size. +.It Fl r +Causes application of +.Nm +recursively to common subdirectories encountered. +.It Fl S Ar name +Re-starts a directory +.Nm +in the middle, beginning with file +.Ar name . +.It Fl s +Causes +.Nm +to report files which are the same, which are otherwise not mentioned. +.It Fl X Ar file +Exclude files and subdirectories from comparison whose basenames match +lines in +.Ar file . +Multiple +.Fl X +options may be specified. +.It Fl x Ar pattern +Exclude files and subdirectories from comparison whose basenames match +.Ar pattern . +Patterns are matched using shell-style globbing via +.Xr fnmatch 3 . +Multiple +.Fl x +options may be specified. +.El +.Pp +If both arguments are directories, +.Nm +sorts the contents of the directories by name, and then runs the +regular file +.Nm +algorithm, producing a change list, +on text files which are different. +Binary files which differ, +common subdirectories, and files which appear in only one directory +are described as such. +In directory mode only regular files and directories are compared. +If a non-regular file such as a device special file or FIFO +is encountered, a diagnostic message is printed. +.Pp +If only one of +.Ar file1 +and +.Ar file2 +is a directory, +.Nm +is applied to the non-directory file and the file contained in +the directory file with a filename that is the same as the +last component of the non-directory file. +.Pp +If either +.Ar file1 +or +.Ar file2 +is +.Sq - , +the standard input is +used in its place. +.Ss Output Style +The default (without +.Fl e , +.Fl c , +or +.Fl n +.\" -C +options) +output contains lines of these forms, where +.Va XX , YY , ZZ , QQ +are line numbers respective of file order. +.Pp +.Bl -tag -width "XX,YYcZZ,QQ" -compact +.It Li XX Ns Ic a Ns Li YY +At (the end of) line +.Va XX +of +.Ar file1 , +append the contents +of line +.Va YY +of +.Ar file2 +to make them equal. +.It Li XX Ns Ic a Ns Li YY,ZZ +Same as above, but append the range of lines, +.Va YY +through +.Va ZZ +of +.Ar file2 +to line +.Va XX +of file1. +.It Li XX Ns Ic d Ns Li YY +At line +.Va XX +delete +the line. +The value +.Va YY +tells to which line the change would bring +.Ar file1 +in line with +.Ar file2 . +.It Li XX,YY Ns Ic d Ns Li ZZ +Delete the range of lines +.Va XX +through +.Va YY +in +.Ar file1 . +.It Li XX Ns Ic c Ns Li YY +Change the line +.Va XX +in +.Ar file1 +to the line +.Va YY +in +.Ar file2 . +.It Li XX,YY Ns Ic c Ns Li ZZ +Replace the range of specified lines with the line +.Va ZZ . +.It Li XX,YY Ns Ic c Ns Li ZZ,QQ +Replace the range +.Va XX , Ns Va YY +from +.Ar file1 +with the range +.Va ZZ , Ns Va QQ +from +.Ar file2 . +.El +.Pp +These lines resemble +.Xr ed 1 +subcommands to convert +.Ar file1 +into +.Ar file2 . +The line numbers before the action letters pertain to +.Ar file1 ; +those after pertain to +.Ar file2 . +Thus, by exchanging +.Ic a +for +.Ic d +and reading the line in reverse order, one can also +determine how to convert +.Ar file2 +into +.Ar file1 . +As in +.Xr ed 1 , +identical +pairs (where num1 = num2) are abbreviated as a single +number. +.Sh FILES +.Bl -tag -width /tmp/diff.XXXXXXXX -compact +.It Pa /tmp/diff. Ns Ar XXXXXXXX +Temporary file used when comparing a device or the standard input. +Note that the temporary file is unlinked as soon as it is created +so it will not show up in a directory listing. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +No differences were found. +.It 1 +Differences were found. +.It >1 +An error occurred. +.El +.Sh SEE ALSO +.Xr cmp 1 , +.Xr comm 1 , +.Xr diff3 1 , +.Xr ed 1 , +.Xr patch 1 , +.Xr sdiff 1 +.Rs +.%A James W. Hunt +.%A M. Douglas McIlroy +.%T "An Algorithm for Differential File Comparison" +.%J Computing Science Technical Report +.%Q Bell Laboratories 41 +.%D June 1976 +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl aDdIiLlNnPpqSsTtwXx +are extensions to that specification. +.Sh HISTORY +A +.Nm +command appeared in +.At v5 . diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c new file mode 100644 index 0000000..64cdd45 --- /dev/null +++ b/usr.bin/diff/diff.c @@ -0,0 +1,402 @@ +/* $OpenBSD: diff.c,v 1.67 2019/06/28 13:35:00 deraadt Exp $ */ + +/* + * Copyright (c) 2003 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/cdefs.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <limits.h> + +#include "diff.h" +#include "xmalloc.h" + +int Nflag, Pflag, rflag, sflag, Tflag; +int diff_format, diff_context, status; +char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; +struct stat stb1, stb2; +struct excludes *excludes_list; +regex_t ignore_re; + +#define OPTIONS "0123456789abC:cdD:efhI:iL:lnNPpqrS:sTtU:uwX:x:" +static struct option longopts[] = { + { "text", no_argument, 0, 'a' }, + { "ignore-space-change", no_argument, 0, 'b' }, + { "context", optional_argument, 0, 'C' }, + { "ifdef", required_argument, 0, 'D' }, + { "minimal", no_argument, 0, 'd' }, + { "ed", no_argument, 0, 'e' }, + { "forward-ed", no_argument, 0, 'f' }, + { "ignore-matching-lines", required_argument, 0, 'I' }, + { "ignore-case", no_argument, 0, 'i' }, + { "label", required_argument, 0, 'L' }, + { "new-file", no_argument, 0, 'N' }, + { "rcs", no_argument, 0, 'n' }, + { "unidirectional-new-file", no_argument, 0, 'P' }, + { "show-c-function", no_argument, 0, 'p' }, + { "brief", no_argument, 0, 'q' }, + { "recursive", no_argument, 0, 'r' }, + { "report-identical-files", no_argument, 0, 's' }, + { "starting-file", required_argument, 0, 'S' }, + { "expand-tabs", no_argument, 0, 't' }, + { "initial-tab", no_argument, 0, 'T' }, + { "unified", optional_argument, 0, 'U' }, + { "ignore-all-space", no_argument, 0, 'w' }, + { "exclude", required_argument, 0, 'x' }, + { "exclude-from", required_argument, 0, 'X' }, + { NULL, 0, 0, '\0'} +}; + +__dead void usage(void); +void push_excludes(char *); +void push_ignore_pats(char *); +void read_excludes_file(char *file); +void set_argstr(char **, char **); + +int +main(int argc, char **argv) +{ + char *ep, **oargv; + long l; + int ch, dflags, lastch, gotstdin, prevoptind, newarg; + + oargv = argv; + gotstdin = 0; + dflags = 0; + lastch = '\0'; + prevoptind = 1; + newarg = 1; + while ((ch = getopt_long(argc, argv, OPTIONS, longopts, NULL)) != -1) { + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg) + usage(); /* disallow -[0-9]+ */ + else if (lastch == 'c' || lastch == 'u') + diff_context = 0; + else if (!isdigit(lastch) || diff_context > INT_MAX / 10) + usage(); + diff_context = (diff_context * 10) + (ch - '0'); + break; + case 'a': + dflags |= D_FORCEASCII; + break; + case 'b': + dflags |= D_FOLDBLANKS; + break; + case 'C': + case 'c': + diff_format = D_CONTEXT; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } else + diff_context = 3; + break; + case 'd': + dflags |= D_MINIMAL; + break; + case 'D': + diff_format = D_IFDEF; + ifdefname = optarg; + break; + case 'e': + diff_format = D_EDIT; + break; + case 'f': + diff_format = D_REVERSE; + break; + case 'h': + /* silently ignore for backwards compatibility */ + break; + case 'I': + push_ignore_pats(optarg); + break; + case 'i': + dflags |= D_IGNORECASE; + break; + case 'L': + if (label[0] == NULL) + label[0] = optarg; + else if (label[1] == NULL) + label[1] = optarg; + else + usage(); + break; + case 'N': + Nflag = 1; + break; + case 'n': + diff_format = D_NREVERSE; + break; + case 'p': + dflags |= D_PROTOTYPE; + break; + case 'P': + Pflag = 1; + break; + case 'r': + rflag = 1; + break; + case 'q': + diff_format = D_BRIEF; + break; + case 'S': + start = optarg; + break; + case 's': + sflag = 1; + break; + case 'T': + Tflag = 1; + break; + case 't': + dflags |= D_EXPANDTABS; + break; + case 'U': + case 'u': + diff_format = D_UNIFIED; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } else + diff_context = 3; + break; + case 'w': + dflags |= D_IGNOREBLANKS; + break; + case 'X': + read_excludes_file(optarg); + break; + case 'x': + push_excludes(optarg); + break; + default: + usage(); + break; + } + lastch = ch; + newarg = optind != prevoptind; + prevoptind = optind; + } + argc -= optind; + argv += optind; + + if (pledge("stdio rpath tmppath", NULL) == -1) + err(2, "pledge"); + + /* + * Do sanity checks, fill in stb1 and stb2 and call the appropriate + * driver routine. Both drivers use the contents of stb1 and stb2. + */ + if (argc != 2) + usage(); + if (ignore_pats != NULL) { + char buf[BUFSIZ]; + int error; + + if ((error = regcomp(&ignore_re, ignore_pats, + REG_NEWLINE | REG_EXTENDED)) != 0) { + regerror(error, &ignore_re, buf, sizeof(buf)); + if (*ignore_pats != '\0') + errx(2, "%s: %s", ignore_pats, buf); + else + errx(2, "%s", buf); + } + } + if (strcmp(argv[0], "-") == 0) { + fstat(STDIN_FILENO, &stb1); + gotstdin = 1; + } else if (stat(argv[0], &stb1) != 0) + err(2, "%s", argv[0]); + if (strcmp(argv[1], "-") == 0) { + fstat(STDIN_FILENO, &stb2); + gotstdin = 1; + } else if (stat(argv[1], &stb2) != 0) + err(2, "%s", argv[1]); + if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) + errx(2, "can't compare - to a directory"); + set_argstr(oargv, argv); + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (diff_format == D_IFDEF) + errx(2, "-D option not supported with directories"); + diffdir(argv[0], argv[1], dflags); + } else { + if (S_ISDIR(stb1.st_mode)) { + argv[0] = splice(argv[0], argv[1]); + if (stat(argv[0], &stb1) == -1) + err(2, "%s", argv[0]); + } + if (S_ISDIR(stb2.st_mode)) { + argv[1] = splice(argv[1], argv[0]); + if (stat(argv[1], &stb2) == -1) + err(2, "%s", argv[1]); + } + print_status(diffreg(argv[0], argv[1], dflags), argv[0], argv[1], + ""); + } + exit(status); +} + +void +set_argstr(char **av, char **ave) +{ + size_t argsize; + char **ap; + + argsize = 4 + *ave - *av + 1; + diffargs = xmalloc(argsize); + strlcpy(diffargs, "diff", argsize); + for (ap = av + 1; ap < ave; ap++) { + if (strcmp(*ap, "--") != 0) { + strlcat(diffargs, " ", argsize); + strlcat(diffargs, *ap, argsize); + } + } +} + +/* + * Read in an excludes file and push each line. + */ +void +read_excludes_file(char *file) +{ + FILE *fp; + char *buf, *pattern; + size_t len; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) + err(2, "%s", file); + while ((buf = fgetln(fp, &len)) != NULL) { + if (buf[len - 1] == '\n') + len--; + pattern = xmalloc(len + 1); + memcpy(pattern, buf, len); + pattern[len] = '\0'; + push_excludes(pattern); + } + if (strcmp(file, "-") != 0) + fclose(fp); +} + +/* + * Push a pattern onto the excludes list. + */ +void +push_excludes(char *pattern) +{ + struct excludes *entry; + + entry = xmalloc(sizeof(*entry)); + entry->pattern = pattern; + entry->next = excludes_list; + excludes_list = entry; +} + +void +push_ignore_pats(char *pattern) +{ + size_t len; + + if (ignore_pats == NULL) + ignore_pats = xstrdup(pattern); + else { + /* old + "|" + new + NUL */ + len = strlen(ignore_pats) + strlen(pattern) + 2; + ignore_pats = xreallocarray(ignore_pats, 1, len); + strlcat(ignore_pats, "|", len); + strlcat(ignore_pats, pattern, len); + } +} + +void +print_only(const char *path, size_t dirlen, const char *entry) +{ + if (dirlen > 1) + dirlen--; + printf("Only in %.*s: %s\n", (int)dirlen, path, entry); +} + +void +print_status(int val, char *path1, char *path2, char *entry) +{ + switch (val) { + case D_BINARY: + printf("Binary files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_DIFFER: + if (diff_format == D_BRIEF) + printf("Files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_SAME: + if (sflag) + printf("Files %s%s and %s%s are identical\n", + path1, entry, path2, entry); + break; + case D_MISMATCH1: + printf("File %s%s is a directory while file %s%s is a regular file\n", + path1, entry, path2, entry); + break; + case D_MISMATCH2: + printf("File %s%s is a regular file while file %s%s is a directory\n", + path1, entry, path2, entry); + break; + case D_SKIPPED1: + printf("File %s%s is not a regular file or directory and was skipped\n", + path1, entry); + break; + case D_SKIPPED2: + printf("File %s%s is not a regular file or directory and was skipped\n", + path2, entry); + break; + } +} + +__dead void +usage(void) +{ + (void)fprintf(stderr, + "usage: diff [-abdipTtw] [-c | -e | -f | -n | -q | -u] [-I pattern] [-L label]\n" + " file1 file2\n" + " diff [-abdipTtw] [-I pattern] [-L label] -C number file1 file2\n" + " diff [-abditw] [-I pattern] -D string file1 file2\n" + " diff [-abdipTtw] [-I pattern] [-L label] -U number file1 file2\n" + " diff [-abdiNPprsTtw] [-c | -e | -f | -n | -q | -u] [-I pattern]\n" + " [-L label] [-S name] [-X file] [-x pattern] dir1 dir2\n"); + + exit(2); +} diff --git a/usr.bin/diff/diff.h b/usr.bin/diff/diff.h new file mode 100644 index 0000000..3a36222 --- /dev/null +++ b/usr.bin/diff/diff.h @@ -0,0 +1,98 @@ + + +/*ROR + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)diff.h 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/types.h> +#include <regex.h> + +/* + * Output format options + */ +#define D_NORMAL 0 /* Normal output */ +#define D_EDIT -1 /* Editor script out */ +#define D_REVERSE 1 /* Reverse editor script */ +#define D_CONTEXT 2 /* Diff with context */ +#define D_UNIFIED 3 /* Unified context diff */ +#define D_IFDEF 4 /* Diff with merged #ifdef's */ +#define D_NREVERSE 5 /* Reverse ed script with numbered + lines and no trailing . */ +#define D_BRIEF 6 /* Say if the files differ */ + +/* + * Output flags + */ +#define D_HEADER 0x001 /* Print a header/footer between files */ +#define D_EMPTY1 0x002 /* Treat first file as empty (/dev/null) */ +#define D_EMPTY2 0x004 /* Treat second file as empty (/dev/null) */ + +/* + * Command line flags + */ +#define D_FORCEASCII 0x008 /* Treat file as ascii regardless of content */ +#define D_FOLDBLANKS 0x010 /* Treat all white space as equal */ +#define D_MINIMAL 0x020 /* Make diff as small as possible */ +#define D_IGNORECASE 0x040 /* Case-insensitive matching */ +#define D_PROTOTYPE 0x080 /* Display C function prototype */ +#define D_EXPANDTABS 0x100 /* Expand tabs to spaces */ +#define D_IGNOREBLANKS 0x200 /* Ignore white space changes */ + +/* + * Status values for print_status() and diffreg() return values + */ +#define D_SAME 0 /* Files are the same */ +#define D_DIFFER 1 /* Files are different */ +#define D_BINARY 2 /* Binary files are different */ +#define D_MISMATCH1 3 /* path1 was a dir, path2 a file */ +#define D_MISMATCH2 4 /* path1 was a file, path2 a dir */ +#define D_SKIPPED1 5 /* path1 was a special file */ +#define D_SKIPPED2 6 /* path2 was a special file */ + +struct excludes { + char *pattern; + struct excludes *next; +}; + +extern int Nflag, Pflag, rflag, sflag, Tflag; +extern int diff_format, diff_context, status; +extern char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; +extern struct stat stb1, stb2; +extern struct excludes *excludes_list; +extern regex_t ignore_re; + +char *splice(char *, char *); +int diffreg(char *, char *, int); +int easprintf(char **, const char *, ...); +void *emalloc(size_t); +void *erealloc(void *, size_t); +void diffdir(char *, char *, int); +void print_only(const char *, size_t, const char *); +void print_status(int, char *, char *, char *); diff --git a/usr.bin/diff/diffdir.c b/usr.bin/diff/diffdir.c new file mode 100644 index 0000000..ad1acdb --- /dev/null +++ b/usr.bin/diff/diffdir.c @@ -0,0 +1,237 @@ +/* $OpenBSD: diffdir.c,v 1.47 2019/01/25 00:19:26 millert Exp $ */ + +/* + * Copyright (c) 2003, 2010 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/stat.h> + +#include <dirent.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <limits.h> + +#include "diff.h" +#include "xmalloc.h" + +static int selectfile(const struct dirent *); +static void diffit(struct dirent *, char *, size_t, char *, size_t, int); + +#define d_status d_type /* we need to store status for -l */ + +/* + * Diff directory traversal. Will be called recursively if -r was specified. + */ +void +diffdir(char *p1, char *p2, int flags) +{ + struct dirent *dent1, **dp1, **edp1, **dirp1 = NULL; + struct dirent *dent2, **dp2, **edp2, **dirp2 = NULL; + size_t dirlen1, dirlen2; + char path1[PATH_MAX], path2[PATH_MAX]; + int pos; + + dirlen1 = strlcpy(path1, *p1 ? p1 : ".", sizeof(path1)); + if (dirlen1 >= sizeof(path1) - 1) { + warnc(ENAMETOOLONG, "%s", p1); + status |= 2; + return; + } + if (path1[dirlen1 - 1] != '/') { + path1[dirlen1++] = '/'; + path1[dirlen1] = '\0'; + } + dirlen2 = strlcpy(path2, *p2 ? p2 : ".", sizeof(path2)); + if (dirlen2 >= sizeof(path2) - 1) { + warnc(ENAMETOOLONG, "%s", p2); + status |= 2; + return; + } + if (path2[dirlen2 - 1] != '/') { + path2[dirlen2++] = '/'; + path2[dirlen2] = '\0'; + } + + /* + * Get a list of entries in each directory, skipping "excluded" files + * and sorting alphabetically. + */ + pos = scandir(path1, &dirp1, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && (Nflag || Pflag)) { + pos = 0; + } else { + warn("%s", path1); + goto closem; + } + } + dp1 = dirp1; + edp1 = dirp1 + pos; + + pos = scandir(path2, &dirp2, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && Nflag) { + pos = 0; + } else { + warn("%s", path2); + goto closem; + } + } + dp2 = dirp2; + edp2 = dirp2 + pos; + + /* + * If we were given a starting point, find it. + */ + if (start != NULL) { + while (dp1 != edp1 && strcmp((*dp1)->d_name, start) < 0) + dp1++; + while (dp2 != edp2 && strcmp((*dp2)->d_name, start) < 0) + dp2++; + } + + /* + * Iterate through the two directory lists, diffing as we go. + */ + while (dp1 != edp1 || dp2 != edp2) { + dent1 = dp1 != edp1 ? *dp1 : NULL; + dent2 = dp2 != edp2 ? *dp2 : NULL; + + pos = dent1 == NULL ? 1 : dent2 == NULL ? -1 : + strcmp(dent1->d_name, dent2->d_name); + if (pos == 0) { + /* file exists in both dirs, diff it */ + diffit(dent1, path1, dirlen1, path2, dirlen2, flags); + dp1++; + dp2++; + } else if (pos < 0) { + /* file only in first dir, only diff if -N */ + if (Nflag) { + diffit(dent1, path1, dirlen1, path2, dirlen2, + flags); + } else { + print_only(path1, dirlen1, dent1->d_name); + status |= 1; + } + dp1++; + } else { + /* file only in second dir, only diff if -N or -P */ + if (Nflag || Pflag) { + diffit(dent2, path1, dirlen1, path2, dirlen2, + flags); + } else { + print_only(path2, dirlen2, dent2->d_name); + status |= 1; + } + dp2++; + } + } + +closem: + if (dirp1 != NULL) { + for (dp1 = dirp1; dp1 < edp1; dp1++) + free(*dp1); + free(dirp1); + } + if (dirp2 != NULL) { + for (dp2 = dirp2; dp2 < edp2; dp2++) + free(*dp2); + free(dirp2); + } +} + +/* + * Do the actual diff by calling either diffreg() or diffdir(). + */ +static void +diffit(struct dirent *dp, char *path1, size_t plen1, char *path2, size_t plen2, + int flags) +{ + flags |= D_HEADER; + strlcpy(path1 + plen1, dp->d_name, PATH_MAX - plen1); + if (stat(path1, &stb1) != 0) { + if (!(Nflag || Pflag) || errno != ENOENT) { + warn("%s", path1); + return; + } + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + } + + strlcpy(path2 + plen2, dp->d_name, PATH_MAX - plen2); + if (stat(path2, &stb2) != 0) { + if (!Nflag || errno != ENOENT) { + warn("%s", path2); + return; + } + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + } + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (rflag) + diffdir(path1, path2, flags); + else + printf("Common subdirectories: %s and %s\n", + path1, path2); + return; + } + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + dp->d_status = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + dp->d_status = D_SKIPPED2; + else + dp->d_status = diffreg(path1, path2, flags); + print_status(dp->d_status, path1, path2, ""); +} + +/* + * Returns 1 if the directory entry should be included in the + * diff, else 0. Checks the excludes list. + */ +static int +selectfile(const struct dirent *dp) +{ + struct excludes *excl; + + if (dp->d_fileno == 0) + return (0); + + /* always skip "." and ".." */ + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + return (0); + + /* check excludes list */ + for (excl = excludes_list; excl != NULL; excl = excl->next) + if (fnmatch(excl->pattern, dp->d_name, FNM_PATHNAME) == 0) + return (0); + + return (1); +} diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c new file mode 100644 index 0000000..fc0029d --- /dev/null +++ b/usr.bin/diff/diffreg.c @@ -0,0 +1,1485 @@ +/* $OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $ */ + +/* + * Copyright (C) Caldera International Inc. 2001-2002. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code and documentation must retain the above + * copyright notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * 4. Neither the name of Caldera International, Inc. nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, + * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/stat.h> +#include <sys/wait.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <paths.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <limits.h> + +#include "diff.h" +#include "xmalloc.h" + +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) +#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) + +/* + * diff - compare two files. + */ + +/* + * Uses an algorithm due to Harold Stone, which finds + * a pair of longest identical subsequences in the two + * files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * check'ed against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ + +struct cand { + int x; + int y; + int pred; +}; + +struct line { + int serial; + int value; +} *file[2]; + +/* + * The following struct is used to record change information when + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + +#define diff_output printf +static FILE *opentemp(const char *); +static void output(char *, FILE *, char *, FILE *, int); +static void check(FILE *, FILE *, int); +static void range(int, int, char *); +static void uni_range(int, int); +static void dump_context_vec(FILE *, FILE *, int); +static void dump_unified_vec(FILE *, FILE *, int); +static void prepare(int, FILE *, off_t, int); +static void prune(void); +static void equiv(struct line *, int, struct line *, int, int *); +static void unravel(int); +static void unsort(struct line *, int, int *); +static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); +static void sort(struct line *, int); +static void print_header(const char *, const char *); +static int ignoreline(char *); +static int asciifile(FILE *); +static int fetch(long *, int, int, FILE *, int, int, int); +static int newcand(int, int, int); +static int search(int *, int, int); +static int skipline(FILE *); +static int isqrt(int); +static int stone(int *, int, int *, int *, int); +static int readhash(FILE *, int); +static int files_differ(FILE *, FILE *, int); +static char *match_function(const long *, int, FILE *); +static char *preadline(int, size_t, off_t); + +static int *J; /* will be overlaid on class */ +static int *class; /* will be overlaid on file[0] */ +static int *klist; /* will be overlaid on file[0] after class */ +static int *member; /* will be overlaid on file[1] */ +static int clen; +static int inifdef; /* whether or not we are in a #ifdef block */ +static int len[2]; +static int pref, suff; /* length of prefix and suffix */ +static int slen[2]; +static int anychange; +static long *ixnew; /* will be overlaid on file[1] */ +static long *ixold; /* will be overlaid on klist */ +static struct cand *clist; /* merely a free storage pot for candidates */ +static int clistlen; /* the length of clist */ +static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ +static u_char *chrtran; /* translation table for case-folding */ +static struct context_vec *context_vec_start; +static struct context_vec *context_vec_end; +static struct context_vec *context_vec_ptr; + +#define FUNCTION_CONTEXT_SIZE 55 +static char lastbuf[FUNCTION_CONTEXT_SIZE]; +static int lastline; +static int lastmatchline; + + +/* + * chrtran points to one of 2 translation tables: cup2low if folding upper to + * lower case clow2low if not folding case + */ +u_char clow2low[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, + 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, + 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, + 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, + 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, + 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, + 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, + 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, + 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, + 0xfd, 0xfe, 0xff +}; + +u_char cup2low[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, + 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, + 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, + 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, + 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, + 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, + 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, + 0xfd, 0xfe, 0xff +}; + +int +diffreg(char *file1, char *file2, int flags) +{ + FILE *f1, *f2; + int i, rval; + + f1 = f2 = NULL; + rval = D_SAME; + anychange = 0; + lastline = 0; + lastmatchline = 0; + context_vec_ptr = context_vec_start - 1; + if (flags & D_IGNORECASE) + chrtran = cup2low; + else + chrtran = clow2low; + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); + if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) + goto closem; + + if (flags & D_EMPTY1) + f1 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb1.st_mode)) { + if ((f1 = opentemp(file1)) == NULL || + fstat(fileno(f1), &stb1) == -1) { + warn("%s", file1); + status |= 2; + goto closem; + } + } else if (strcmp(file1, "-") == 0) + f1 = stdin; + else + f1 = fopen(file1, "r"); + } + if (f1 == NULL) { + warn("%s", file1); + status |= 2; + goto closem; + } + + if (flags & D_EMPTY2) + f2 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb2.st_mode)) { + if ((f2 = opentemp(file2)) == NULL || + fstat(fileno(f2), &stb2) == -1) { + warn("%s", file2); + status |= 2; + goto closem; + } + } else if (strcmp(file2, "-") == 0) + f2 = stdin; + else + f2 = fopen(file2, "r"); + } + if (f2 == NULL) { + warn("%s", file2); + status |= 2; + goto closem; + } + + switch (files_differ(f1, f2, flags)) { + case 0: + goto closem; + case 1: + break; + default: + /* error */ + status |= 2; + goto closem; + } + + if ((flags & D_FORCEASCII) == 0 && + (!asciifile(f1) || !asciifile(f2))) { + rval = D_BINARY; + status |= 1; + goto closem; + } + prepare(0, f1, stb1.st_size, flags); + prepare(1, f2, stb2.st_size, flags); + + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = xreallocarray(member, slen[1] + 2, sizeof(*member)); + + class = (int *)file[0]; + unsort(sfile[0], slen[0], class); + class = xreallocarray(class, slen[0] + 2, sizeof(*class)); + + klist = xcalloc(slen[0] + 2, sizeof(*klist)); + clen = 0; + clistlen = 100; + clist = xcalloc(clistlen, sizeof(*clist)); + i = stone(class, slen[0], member, klist, flags); + free(member); + free(class); + + J = xreallocarray(J, len[0] + 2, sizeof(*J)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); + ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); + check(f1, f2, flags); + output(file1, f1, file2, f2, flags); +closem: + if (anychange) { + status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + if (f1 != NULL) + fclose(f1); + if (f2 != NULL) + fclose(f2); + + return (rval); +} + +/* + * Check to see if the given files differ. + * Returns 0 if they are the same, 1 if different, and -1 on error. + * XXX - could use code from cmp(1) [faster] + */ +static int +files_differ(FILE *f1, FILE *f2, int flags) +{ + char buf1[BUFSIZ], buf2[BUFSIZ]; + size_t i, j; + + if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || + (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) + return (1); + for (;;) { + i = fread(buf1, 1, sizeof(buf1), f1); + j = fread(buf2, 1, sizeof(buf2), f2); + if ((!i && ferror(f1)) || (!j && ferror(f2))) + return (-1); + if (i != j) + return (1); + if (i == 0) + return (0); + if (memcmp(buf1, buf2, i) != 0) + return (1); + } +} + +static FILE * +opentemp(const char *file) +{ + char buf[BUFSIZ], tempfile[PATH_MAX]; + ssize_t nread; + int ifd, ofd; + + if (strcmp(file, "-") == 0) + ifd = STDIN_FILENO; + else if ((ifd = open(file, O_RDONLY, 0644)) == -1) + return (NULL); + + (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); + + if ((ofd = mkstemp(tempfile)) == -1) { + close(ifd); + return (NULL); + } + unlink(tempfile); + while ((nread = read(ifd, buf, BUFSIZ)) > 0) { + if (write(ofd, buf, nread) != nread) { + close(ifd); + close(ofd); + return (NULL); + } + } + close(ifd); + lseek(ofd, (off_t)0, SEEK_SET); + return (fdopen(ofd, "r")); +} + +char * +splice(char *dir, char *file) +{ + char *tail, *buf; + size_t dirlen; + + dirlen = strlen(dir); + while (dirlen != 0 && dir[dirlen - 1] == '/') + dirlen--; + if ((tail = strrchr(file, '/')) == NULL) + tail = file; + else + tail++; + xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); + return (buf); +} + +static void +prepare(int i, FILE *fd, off_t filesize, int flags) +{ + struct line *p; + int j, h; + size_t sz; + + rewind(fd); + + sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; + if (sz < 100) + sz = 100; + + p = xcalloc(sz + 3, sizeof(*p)); + for (j = 0; (h = readhash(fd, flags));) { + if (j == sz) { + sz = sz * 3 / 2; + p = xreallocarray(p, sz + 3, sizeof(*p)); + } + p[++j].value = h; + } + len[i] = j; + file[i] = p; +} + +static void +prune(void) +{ + int i, j; + + for (pref = 0; pref < len[0] && pref < len[1] && + file[0][pref + 1].value == file[1][pref + 1].value; + pref++) + ; + for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && + file[0][len[0] - suff].value == file[1][len[1] - suff].value; + suff++) + ; + for (j = 0; j < 2; j++) { + sfile[j] = file[j] + pref; + slen[j] = len[j] - pref - suff; + for (i = 0; i <= slen[j]; i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while (i <= n && j <= m) { + if (a[i].value < b[j].value) + a[i++].value = 0; + else if (a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while (i <= n) + a[i++].value = 0; + b[m + 1].value = 0; + j = 0; + while (++j <= m) { + c[j] = -b[j].serial; + while (b[j + 1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +/* Code taken from ping.c */ +static int +isqrt(int n) +{ + int y, x = 1; + + if (n == 0) + return (0); + + do { /* newton was a stinker */ + y = x; + x = n / x; + x += y; + x /= 2; + } while ((x - y) > 1 || (x - y) < -1); + + return (x); +} + +static int +stone(int *a, int n, int *b, int *c, int flags) +{ + int i, k, y, j, l; + int oldc, tc, oldl, sq; + u_int numtries, bound; + + if (flags & D_MINIMAL) + bound = UINT_MAX; + else { + sq = isqrt(n); + bound = MAXIMUM(256, sq); + } + + k = 0; + c[0] = newcand(0, 0, 0); + for (i = 1; i <= n; i++) { + j = a[i]; + if (j == 0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + numtries = 0; + do { + if (y <= clist[oldc].y) + continue; + l = search(c, k, y); + if (l != oldl + 1) + oldc = c[l - 1]; + if (l <= k) { + if (clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i, y, oldc); + oldc = tc; + oldl = l; + numtries++; + } else { + c[l] = newcand(i, y, oldc); + k++; + break; + } + } while ((y = b[++j]) > 0 && numtries < bound); + } + return (k); +} + +static int +newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xreallocarray(clist, clistlen, sizeof(*clist)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return (clen++); +} + +static int +search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return (k + 1); + i = 0; + j = k + 1; + for (;;) { + l = (i + j) / 2; + if (l <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return (l); + } + return (l + 1); +} + +static void +unravel(int p) +{ + struct cand *q; + int i; + + for (i = 0; i <= len[0]; i++) + J[i] = i <= pref ? i : + i > len[0] - suff ? i + len[1] - len[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; +} + +/* + * Check does double duty: + * 1. ferret out any fortuitous correspondences due + * to confounding by hashing (which result in "jackpot") + * 2. collect random access indexes to the two files + */ +static void +check(FILE *f1, FILE *f2, int flags) +{ + int i, j, jackpot, c, d; + long ctold, ctnew; + + rewind(f1); + rewind(f2); + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for (i = 1; i <= len[0]; i++) { + if (J[i] == 0) { + ixold[i] = ctold += skipline(f1); + continue; + } + while (j < J[i]) { + ixnew[j] = ctnew += skipline(f2); + j++; + } + if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) { + for (;;) { + c = getc(f1); + d = getc(f2); + /* + * GNU diff ignores a missing newline + * in one file for -b or -w. + */ + if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { + if (c == EOF && d == '\n') { + ctnew++; + break; + } else if (c == '\n' && d == EOF) { + ctold++; + break; + } + } + ctold++; + ctnew++; + if ((flags & D_FOLDBLANKS) && isspace(c) && + isspace(d)) { + do { + if (c == '\n') + break; + ctold++; + } while (isspace(c = getc(f1))); + do { + if (d == '\n') + break; + ctnew++; + } while (isspace(d = getc(f2))); + } else if ((flags & D_IGNOREBLANKS)) { + while (isspace(c) && c != '\n') { + c = getc(f1); + ctold++; + } + while (isspace(d) && d != '\n') { + d = getc(f2); + ctnew++; + } + } + if (chrtran[c] != chrtran[d]) { + jackpot++; + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } else { + for (;;) { + ctold++; + ctnew++; + if ((c = getc(f1)) != (d = getc(f2))) { + /* jackpot++; */ + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for (; j <= len[1]; j++) + ixnew[j] = ctnew += skipline(f2); + /* + * if (jackpot) + * fprintf(stderr, "jackpot\n"); + */ +} + +/* shellsort CACM #201 */ +static void +sort(struct line *a, int n) +{ + struct line *ai, *aim, w; + int j, m = 0, k; + + if (n == 0) + return; + for (j = 1; j <= n; j *= 2) + m = 2 * j - 1; + for (m /= 2; m != 0; m /= 2) { + k = n - m; + for (j = 1; j <= k; j++) { + for (ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if (aim < ai) + break; /* wraparound */ + if (aim->value > ai[0].value || + (aim->value == ai[0].value && + aim->serial > ai[0].serial)) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + +static void +unsort(struct line *f, int l, int *b) +{ + int *a, i; + + a = xcalloc(l + 1, sizeof(*a)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); +} + +static int +skipline(FILE *f) +{ + int i, c; + + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return (i); +} + +static void +output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) +{ + int m, i0, i1, j0, j1; + + rewind(f1); + rewind(f2); + m = len[0]; + J[0] = 0; + J[m + 1] = len[1] + 1; + if (diff_format != D_EDIT) { + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) + i0++; + j0 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j1 = J[i1 + 1] - 1; + J[i1] = j1; + change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); + } + } else { + for (i0 = m; i0 >= 1; i0 = i1 - 1) { + while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) + i0--; + j0 = J[i0 + 1] - 1; + i1 = i0 + 1; + while (i1 > 1 && J[i1 - 1] == 0) + i1--; + j1 = J[i1 - 1] + 1; + J[i1] = j1; + change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); + } + } + if (m == 0) + change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); + if (diff_format == D_IFDEF) { + for (;;) { +#define c i0 + if ((c = getc(f1)) == EOF) + return; + diff_output("%c", c); + } +#undef c + } + if (anychange != 0) { + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, flags); + else if (diff_format == D_UNIFIED) + dump_unified_vec(f1, f2, flags); + } +} + +static void +range(int a, int b, char *separator) +{ + diff_output("%d", a > b ? b : a); + if (a < b) + diff_output("%s%d", separator, b); +} + +static void +uni_range(int a, int b) +{ + if (a < b) + diff_output("%d,%d", a, b - a + 1); + else if (a == b) + diff_output("%d", b); + else + diff_output("%d,0", b); +} + +static char * +preadline(int fd, size_t rlen, off_t off) +{ + char *line; + ssize_t nr; + + line = xmalloc(rlen + 1); + if ((nr = pread(fd, line, rlen, off)) == -1) + err(2, "preadline"); + if (nr > 0 && line[nr-1] == '\n') + nr--; + line[nr] = '\0'; + return (line); +} + +static int +ignoreline(char *line) +{ + int ret; + + ret = regexec(&ignore_re, line, 0, NULL, 0); + free(line); + return (ret == 0); /* if it matched, it should be ignored. */ +} + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater then b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void +change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, + int *pflags) +{ + static size_t max_context = 64; + int i; + +restart: + if (diff_format != D_IFDEF && a > b && c > d) + return; + if (ignore_pats != NULL) { + char *line; + /* + * All lines in the change, insert, or delete must + * match an ignore pattern for the change to be + * ignored. + */ + if (a <= b) { /* Changes and deletes. */ + for (i = a; i <= b; i++) { + line = preadline(fileno(f1), + ixold[i] - ixold[i - 1], ixold[i - 1]); + if (!ignoreline(line)) + goto proceed; + } + } + if (a > b || c <= d) { /* Changes and inserts. */ + for (i = c; i <= d; i++) { + line = preadline(fileno(f2), + ixnew[i] - ixnew[i - 1], ixnew[i - 1]); + if (!ignoreline(line)) + goto proceed; + } + } + return; + } +proceed: + if (*pflags & D_HEADER) { + diff_output("%s %s %s\n", diffargs, file1, file2); + *pflags &= ~D_HEADER; + } + if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { + /* + * Allocate change records as needed. + */ + if (context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = context_vec_ptr - context_vec_start; + max_context <<= 1; + context_vec_start = xreallocarray(context_vec_start, + max_context, sizeof(*context_vec_start)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + anychange = 1; + } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && + c > context_vec_ptr->d + (2 * diff_context) + 1) { + /* + * If this change is more than 'diff_context' lines from the + * previous change, dump the record and reset it. + */ + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, *pflags); + else + dump_unified_vec(f1, f2, *pflags); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + } + if (anychange == 0) + anychange = 1; + switch (diff_format) { + case D_BRIEF: + return; + case D_NORMAL: + case D_EDIT: + range(a, b, ","); + diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + if (diff_format == D_NORMAL) + range(c, d, ","); + diff_output("\n"); + break; + case D_REVERSE: + diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + range(a, b, " "); + diff_output("\n"); + break; + case D_NREVERSE: + if (a > b) + diff_output("a%d %d\n", b, d - c + 1); + else { + diff_output("d%d %d\n", a, b - a + 1); + if (!(c > d)) + /* add changed lines */ + diff_output("a%d %d\n", b, d - c + 1); + } + break; + } + if (diff_format == D_NORMAL || diff_format == D_IFDEF) { + fetch(ixold, a, b, f1, '<', 1, *pflags); + if (a <= b && c <= d && diff_format == D_NORMAL) + diff_output("---\n"); + } + i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); + if (i != 0 && diff_format == D_EDIT) { + /* + * A non-zero return value for D_EDIT indicates that the + * last line printed was a bare dot (".") that has been + * escaped as ".." to prevent ed(1) from misinterpreting + * it. We have to add a substitute command to change this + * back and restart where we left off. + */ + diff_output(".\n"); + diff_output("%ds/.//\n", a + i - 1); + b = a + i - 1; + a = b + 1; + c += i; + goto restart; + } + if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) + diff_output(".\n"); + if (inifdef) { + diff_output("#endif /* %s */\n", ifdefname); + inifdef = 0; + } +} + +static int +fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) +{ + int i, j, c, lastc, col, nc; + + /* + * When doing #ifdef's, copy down to current line + * if this is the first file, so that stuff makes it to output. + */ + if (diff_format == D_IFDEF && oldfile) { + long curpos = ftell(lb); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = f[a > b ? b : a - 1] - curpos; + for (i = 0; i < nc; i++) + diff_output("%c", getc(lb)); + } + if (a > b) + return (0); + if (diff_format == D_IFDEF) { + if (inifdef) { + diff_output("#else /* %s%s */\n", + oldfile == 1 ? "!" : "", ifdefname); + } else { + if (oldfile) + diff_output("#ifndef %s\n", ifdefname); + else + diff_output("#ifdef %s\n", ifdefname); + } + inifdef = 1 + oldfile; + } + for (i = a; i <= b; i++) { + fseek(lb, f[i - 1], SEEK_SET); + nc = f[i] - f[i - 1]; + if (diff_format != D_IFDEF && ch != '\0') { + diff_output("%c", ch); + if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT + || diff_format == D_UNIFIED)) + diff_output("\t"); + else if (diff_format != D_UNIFIED) + diff_output(" "); + } + col = 0; + for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { + if ((c = getc(lb)) == EOF) { + if (diff_format == D_EDIT || diff_format == D_REVERSE || + diff_format == D_NREVERSE) + warnx("No newline at end of file"); + else + diff_output("\n\\ No newline at end of " + "file\n"); + return (0); + } + if (c == '\t' && (flags & D_EXPANDTABS)) { + do { + diff_output(" "); + } while (++col & 7); + } else { + if (diff_format == D_EDIT && j == 1 && c == '\n' + && lastc == '.') { + /* + * Don't print a bare "." line + * since that will confuse ed(1). + * Print ".." instead and return, + * giving the caller an offset + * from which to restart. + */ + diff_output(".\n"); + return (i - a + 1); + } + diff_output("%c", c); + col++; + } + } + } + return (0); +} + +/* + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. + */ +static int +readhash(FILE *f, int flags) +{ + int i, t, space; + int sum; + + sum = 1; + space = 0; + if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { + if (flags & D_IGNORECASE) + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + chrtran[t]; + } + else + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + t; + } + } else { + for (i = 0;;) { + switch (t = getc(f)) { + case '\t': + case '\r': + case '\v': + case '\f': + case ' ': + space++; + continue; + default: + if (space && (flags & D_IGNOREBLANKS) == 0) { + i++; + space = 0; + } + sum = sum * 127 + chrtran[t]; + i++; + continue; + case EOF: + if (i == 0) + return (0); + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } + } + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); +} + +static int +asciifile(FILE *f) +{ + unsigned char buf[BUFSIZ]; + size_t cnt; + + if (f == NULL) + return (1); + + rewind(f); + cnt = fread(buf, 1, sizeof(buf), f); + return (memchr(buf, '\0', cnt) == NULL); +} + +#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) + +static char * +match_function(const long *f, int pos, FILE *fp) +{ + unsigned char buf[FUNCTION_CONTEXT_SIZE]; + size_t nc; + int last = lastline; + char *state = NULL; + + lastline = pos; + while (pos > last) { + fseek(fp, f[pos - 1], SEEK_SET); + nc = f[pos] - f[pos - 1]; + if (nc >= sizeof(buf)) + nc = sizeof(buf) - 1; + nc = fread(buf, 1, nc, fp); + if (nc > 0) { + buf[nc] = '\0'; + buf[strcspn(buf, "\n")] = '\0'; + if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { + if (begins_with(buf, "private:")) { + if (!state) + state = " (private)"; + } else if (begins_with(buf, "protected:")) { + if (!state) + state = " (protected)"; + } else if (begins_with(buf, "public:")) { + if (!state) + state = " (public)"; + } else { + strlcpy(lastbuf, buf, sizeof lastbuf); + if (state) + strlcat(lastbuf, state, + sizeof lastbuf); + lastmatchline = pos; + return lastbuf; + } + } + } + pos--; + } + return lastmatchline > 0 ? lastbuf : NULL; +} + +/* dump accumulated "context" diff changes */ +static void +dump_context_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd, do_output; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAXIMUM(1, cvp->a - diff_context); + upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); + lowc = MAXIMUM(1, cvp->c - diff_context); + upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); + + diff_output("***************"); + if ((flags & D_PROTOTYPE)) { + f = match_function(ixold, lowa-1, f1); + if (f != NULL) + diff_output(" %s", f); + } + diff_output("\n*** "); + range(lowa, upb, ","); + diff_output(" ****\n"); + + /* + * Output changes to the "old" file. The first loop suppresses + * output if there were no changes to the "old" file (we'll see + * the "old" lines as context in the "new" list). + */ + do_output = 0; + for (; cvp <= context_vec_ptr; cvp++) + if (cvp->a <= cvp->b) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'a') + fetch(ixold, lowa, b, f1, ' ', 0, flags); + else { + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, + ch == 'c' ? '!' : '-', 0, flags); + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b + 1, upb, f1, ' ', 0, flags); + } + /* output changes to the "new" file */ + diff_output("--- "); + range(lowc, upd, ","); + diff_output(" ----\n"); + + do_output = 0; + for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) + if (cvp->c <= cvp->d) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'd') + fetch(ixnew, lowc, d, f2, ' ', 0, flags); + else { + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, + ch == 'c' ? '!' : '+', 0, flags); + } + lowc = d + 1; + cvp++; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + } + context_vec_ptr = context_vec_start - 1; +} + +/* dump accumulated "unified" diff changes */ +static void +dump_unified_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAXIMUM(1, cvp->a - diff_context); + upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); + lowc = MAXIMUM(1, cvp->c - diff_context); + upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); + + diff_output("@@ -"); + uni_range(lowa, upb); + diff_output(" +"); + uni_range(lowc, upd); + diff_output(" @@"); + if ((flags & D_PROTOTYPE)) { + f = match_function(ixold, lowa-1, f1); + if (f != NULL) + diff_output(" %s", f); + } + diff_output("\n"); + + /* + * Output changes in "unified" diff format--the old and new lines + * are printed together. + */ + for (; cvp <= context_vec_ptr; cvp++) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + /* + * c: both new and old changes + * d: only changes in the old file + * a: only changes in the new file + */ + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + switch (ch) { + case 'c': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + case 'd': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + break; + case 'a': + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + } + lowa = b + 1; + lowc = d + 1; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + + context_vec_ptr = context_vec_start - 1; +} + +static void +print_header(const char *file1, const char *file2) +{ + if (label[0] != NULL) + diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", + label[0]); + else + diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---", + file1, ctime(&stb1.st_mtime)); + if (label[1] != NULL) + diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", + label[1]); + else + diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++", + file2, ctime(&stb2.st_mtime)); +} diff --git a/usr.bin/diff/xmalloc.c b/usr.bin/diff/xmalloc.c new file mode 100644 index 0000000..ce0f454 --- /dev/null +++ b/usr.bin/diff/xmalloc.c @@ -0,0 +1,85 @@ +/* $OpenBSD: xmalloc.c,v 1.10 2019/06/28 05:44:09 deraadt Exp $ */ +/* + * Author: Tatu Ylonen <ylo@cs.hut.fi> + * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland + * All rights reserved + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#include <err.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xmalloc.h" + +void * +xmalloc(size_t size) +{ + void *ptr; + + if (size == 0) + errx(2, "xmalloc: zero size"); + ptr = malloc(size); + if (ptr == NULL) + err(2, "xmalloc: allocating %zu bytes", size); + return ptr; +} + +void * +xcalloc(size_t nmemb, size_t size) +{ + void *ptr; + + ptr = calloc(nmemb, size); + if (ptr == NULL) + err(2, "xcalloc: allocating %zu * %zu bytes", nmemb, size); + return ptr; +} + +void * +xreallocarray(void *ptr, size_t nmemb, size_t size) +{ + void *new_ptr; + + new_ptr = reallocarray(ptr, nmemb, size); + if (new_ptr == NULL) + err(2, "xreallocarray: allocating %zu * %zu bytes", + nmemb, size); + return new_ptr; +} + +char * +xstrdup(const char *str) +{ + char *cp; + + if ((cp = strdup(str)) == NULL) + err(2, "xstrdup"); + return cp; +} + +int +xasprintf(char **ret, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vasprintf(ret, fmt, ap); + va_end(ap); + + if (i == -1) + err(2, "xasprintf"); + + return i; +} diff --git a/usr.bin/diff/xmalloc.h b/usr.bin/diff/xmalloc.h new file mode 100644 index 0000000..2139671 --- /dev/null +++ b/usr.bin/diff/xmalloc.h @@ -0,0 +1,30 @@ +/* $OpenBSD: xmalloc.h,v 1.4 2015/11/12 16:30:30 mmcc Exp $ */ + +/* + * Author: Tatu Ylonen <ylo@cs.hut.fi> + * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland + * All rights reserved + * Created: Mon Mar 20 22:09:17 1995 ylo + * + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#ifndef XMALLOC_H +#define XMALLOC_H + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xreallocarray(void *, size_t, size_t); +char *xstrdup(const char *); +int xasprintf(char **, const char *, ...) + __attribute__((__format__ (printf, 2, 3))) + __attribute__((__nonnull__ (2))); + +#endif /* XMALLOC_H */ |