diff options
Diffstat (limited to 'coreutils')
-rw-r--r-- | coreutils/diff.c | 1144 |
1 files changed, 557 insertions, 587 deletions
diff --git a/coreutils/diff.c b/coreutils/diff.c index b2945dd87..17975ad20 100644 --- a/coreutils/diff.c +++ b/coreutils/diff.c @@ -3,14 +3,14 @@ * Mini diff implementation for busybox, adapted from OpenBSD diff. * * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> - * + * * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. */ /* * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> * - * Permission to + * Permission to * use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. @@ -71,8 +71,8 @@ * D_SKIPPED2 - skipped path2 as it is a special file */ -#define D_SAME 0 -#define D_DIFFER 1 +#define D_SAME 0 +#define D_DIFFER (1<<0) #define D_BINARY (1<<1) #define D_COMMON (1<<2) #define D_ONLY (1<<3) @@ -84,7 +84,7 @@ /* Command line options */ static unsigned long cmd_flags; -#define FLAG_a 1 +#define FLAG_a (1<<0) #define FLAG_b (1<<1) #define FLAG_d (1<<2) #define FLAG_i (1<<3) @@ -145,207 +145,14 @@ static struct context_vec *context_vec_start; static struct context_vec *context_vec_end; static struct context_vec *context_vec_ptr; -static void output(char *, FILE *, char *, FILE *); -static void check(char *, FILE *, char *, FILE *); -static void uni_range(int, int); -static void dump_unified_vec(FILE *, FILE *); -static void prepare(int, FILE *, off_t); -static void prune(void); -static void equiv(struct line *, int, struct line *, int, int *); -static void unravel(int); -static void unsort(struct line *, int, int *); -static void change(char *, FILE *, char *, FILE *, int, int, int, int); -static void sort(struct line *, int); -static void print_header(const char *, const char *); -static int asciifile(FILE *); -static int fetch(long *, int, int, FILE *, int, int); -static int newcand(int, int, int); -static int search(int *, int, int); -static int skipline(FILE *); -static int stone(int *, int, int *, int *); -static int readhash(FILE *); -static int files_differ(FILE *, FILE *, int); - -static int diffreg(char *, char *, int); -static void print_only(const char *, size_t, const char *); -static void print_status(int, char *, char *, char *); - -#ifdef CONFIG_FEATURE_DIFF_DIR -static int dir_strcmp(const void *p1, const void *p2) { - return strcmp(*(char * const *)p1, *(char * const *)p2); -} - -/* This function adds a filename to dl, the directory listing. */ - -static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) { - dl_count++; - dl = xrealloc(dl, dl_count * sizeof(char *)); - dl[dl_count - 1] = bb_xstrdup(filename); - if (cmd_flags & FLAG_r) { - int *pp = (int *) userdata; - int path_len = *pp + 1; - dl[dl_count - 1] = &(dl[dl_count - 1])[path_len]; - } - return TRUE; -} - -/* This returns a sorted directory listing. */ -static char **get_dir(char *path) { - - int i; - - /* Reset dl_count - there's no need to free dl as bb_xrealloc does - * the job nicely. */ - dl_count = 0; - - /* If -r has been set, then the recursive_action function will be - * used. Unfortunately, this outputs the root directory along with - * the recursed paths, so use void *userdata to specify the string - * length of the root directory. It can then be removed in - * add_to_dirlist. */ - - int path_len = strlen(path); - void *userdata = &path_len; - - /* Now fill dl with a listing. */ - if (cmd_flags & FLAG_r) - recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata); - else { - DIR *dp; - struct dirent *ep; - if ((dp = opendir(path)) == NULL) - bb_error_msg("Error reading directory"); - while ((ep = readdir(dp))) { - if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, "."))) - continue; - add_to_dirlist(ep->d_name, NULL, NULL); - } - closedir(dp); - } - - /* Sort dl alphabetically. */ - qsort(dl, dl_count, sizeof(char *), dir_strcmp); - - /* Copy dl so that we can return it. */ - char **retval = xmalloc(dl_count * sizeof(char *)); - for (i = 0; i < dl_count; i++) - retval[i] = bb_xstrdup(dl[i]); - - return retval; -} - -static void do_diff (char *dir1, char *path1, char *dir2, char *path2) { - - int flags = D_HEADER; - int val; - - char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1); - char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2); - - if (stat(fullpath1, &stb1) != 0) { - flags |= D_EMPTY1; - memset(&stb1, 0, sizeof(stb1)); - fullpath1 = bb_xasprintf("%s/%s", dir1, path2); - } - if (stat(fullpath2, &stb2) != 0) { - flags |= D_EMPTY2; - memset(&stb2, 0, sizeof(stb2)); - stb2.st_mode = stb1.st_mode; - fullpath2 = bb_xasprintf("%s/%s", dir2, path1); - } - - if (stb1.st_mode == 0) - stb1.st_mode = stb2.st_mode; - - if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { - printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); - return; - } - - if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) - val = D_SKIPPED1; - else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) - val = D_SKIPPED2; - else - val = diffreg(fullpath1, fullpath2, flags); - - print_status(val, fullpath1, fullpath2, NULL); -} - -static void diffdir (char *p1, char *p2) { - - char **dirlist1, **dirlist2; - char *dp1, *dp2; - int dirlist1_count, dirlist2_count; - int pos; - - /* Check for trailing slashes. */ - - if (p1[strlen(p1) - 1] == '/') - p1[strlen(p1) - 1] = '\0'; - if (p2[strlen(p2) - 1] == '/') - p2[strlen(p2) - 1] = '\0'; - - /* Get directory listings for p1 and p2. */ - - dirlist1 = get_dir(p1); - dirlist1_count = dl_count; - dirlist1[dirlist1_count] = NULL; - dirlist2 = get_dir(p2); - dirlist2_count = dl_count; - dirlist2[dirlist2_count] = NULL; - - /* If -S was set, find the starting point. */ - if (start) { - while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) - dirlist1++; - while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) - dirlist2++; - if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) - bb_error_msg("Invalid argument to -S"); - } - - /* Now that both dirlist1 and dirlist2 contain sorted directory - * listings, we can start to go through dirlist1. If both listings - * contain the same file, then do a normal diff. Otherwise, behaviour - * is determined by whether the -N flag is set. */ - while (*dirlist1 != NULL || *dirlist2 != NULL) { - dp1 = *dirlist1; - dp2 = *dirlist2; - pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); - if (pos == 0) { - do_diff(p1, dp1, p2, dp2); - dirlist1++; - dirlist2++; - } - else if (pos < 0) { - if (cmd_flags & FLAG_N) - do_diff(p1, dp1, p2, NULL); - else - print_only(p1, strlen(p1) + 1, dp1); - dirlist1++; - } - else { - if (cmd_flags & FLAG_N) - do_diff(p1, NULL, p2, dp2); - else - print_only(p2, strlen(p2) + 1, dp2); - dirlist2++; - } - } -} -#endif - -static void -print_only(const char *path, size_t dirlen, const char *entry) +static void print_only(const char *path, size_t dirlen, const char *entry) { if (dirlen > 1) dirlen--; printf("Only in %.*s: %s\n", (int)dirlen, path, entry); } -static void -print_status(int val, char *path1, char *path2, char *entry) +static void print_status(int val, char *path1, char *path2, char *entry) { switch (val) { case D_ONLY: @@ -391,175 +198,76 @@ print_status(int val, char *path1, char *path2, char *entry) } /* - * The following code uses an algorithm due to Harold Stone, - * which finds a pair of longest identical subsequences in - * the two files. - * - * The major goal is to generate the match vector J. - * J[i] is the index of the line in file1 corresponding - * to line i file0. J[i] = 0 if there is no - * such line in file1. - * - * Lines are hashed so as to work in core. All potential - * matches are located by sorting the lines of each file - * on the hash (called ``value''). In particular, this - * collects the equivalence classes in file1 together. - * Subroutine equiv replaces the value of each line in - * file0 by the index of the first element of its - * matching equivalence in (the reordered) file1. - * To save space equiv squeezes file1 into a single - * array member in which the equivalence classes - * are simply concatenated, except that their first - * members are flagged by changing sign. - * - * Next the indices that point into member are unsorted into - * array class according to the original order of file0. - * - * The cleverness lies in routine stone. This marches - * through the lines of file0, developing a vector klist - * of "k-candidates". At step i a k-candidate is a matched - * pair of lines x,y (x in file0 y in file1) such that - * there is a common subsequence of length k - * between the first i lines of file0 and the first y - * lines of file1, but there is no such subsequence for - * any smaller y. x is the earliest possible mate to y - * that occurs in such a subsequence. - * - * Whenever any of the members of the equivalence class of - * lines in file1 matable to a line in file0 has serial number - * less than the y of some k-candidate, that k-candidate - * with the smallest such y is replaced. The new - * k-candidate is chained (via pred) to the current - * k-1 candidate so that the actual subsequence can - * be recovered. When a member has serial number greater - * that the y of all k-candidates, the klist is extended. - * At the end, the longest subsequence is pulled out - * and placed in the array J by unravel - * - * With J in hand, the matches there recorded are - * checked against reality to assure that no spurious - * matches have crept in due to hashing. If they have, - * they are broken, and "jackpot" is recorded--a harmless - * matter except that a true match for a spuriously - * mated line may now be unnecessarily reported as a change. - * - * Much of the complexity of the program comes simply - * from trying to minimize core utilization and - * maximize the range of doable problems by dynamically - * allocating what is needed and reusing what is not. - * The core requirements for problems larger than somewhat - * are (in words) 2*length(file0) + length(file1) + - * 3*(number of k-candidates installed), typically about - * 6n words for files of length n. + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */ - -static int diffreg(char *ofile1, char *ofile2, int flags) +static int readhash(FILE *f) { - char *file1 = ofile1; - char *file2 = ofile2; - FILE *f1 = NULL; - FILE *f2 = NULL; - int rval = D_SAME; - int i; - - anychange = 0; - context_vec_ptr = context_vec_start - 1; - - if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) - return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); - if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) - goto closem; - - if (flags & D_EMPTY1) - f1 = bb_xfopen(_PATH_DEVNULL, "r"); - else { - if (strcmp(file1, "-") == 0) - f1 = stdin; - else - f1 = bb_xfopen(file1, "r"); - } + int i, t, space; + int sum; - if (flags & D_EMPTY2) - f2 = bb_xfopen(_PATH_DEVNULL, "r"); - else { - if (strcmp(file2, "-") == 0) - f2 = stdin; + sum = 1; + space = 0; + if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) { + if (FLAG_i) + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + t; + } else - f2 = bb_xfopen(file2, "r"); - } - - switch (files_differ(f1, f2, flags)) { - case 0: - goto closem; - case 1: - break; - default: - /* error */ - status |= 2; - goto closem; - } - - if (!asciifile(f1) || !asciifile(f2)) { - rval = D_BINARY; - status |= 1; - goto closem; + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + t; + } + } else { + for (i = 0;;) { + switch (t = getc(f)) { + case '\t': + case '\r': + case '\v': + case '\f': + case ' ': + space++; + continue; + default: + if (space && !(cmd_flags & FLAG_w)) { + i++; + space = 0; + } + sum = sum * 127 + t; + i++; + continue; + case EOF: + if (i == 0) + return (0); + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } } + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); +} - prepare(0, f1, stb1.st_size); - prepare(1, f2, stb2.st_size); - prune(); - sort(sfile[0], slen[0]); - sort(sfile[1], slen[1]); - - member = (int *)file[1]; - equiv(sfile[0], slen[0], sfile[1], slen[1], member); - member = xrealloc(member, (slen[1] + 2) * sizeof(int)); - - class = (int *)file[0]; - unsort(sfile[0], slen[0], class); - class = xrealloc(class, (slen[0] + 2) * sizeof(int)); - - klist = xmalloc((slen[0] + 2) * sizeof(int)); - clen = 0; - clistlen = 100; - clist = xmalloc(clistlen * sizeof(struct cand)); - i = stone(class, slen[0], member, klist); - free(member); - free(class); - J = xrealloc(J, (len[0] + 2) * sizeof(int)); - unravel(klist[i]); - free(clist); - free(klist); - - ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); - ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); - check(file1, f1, file2, f2); - output(file1, f1, file2, f2); - -closem: - if (anychange) { - status |= 1; - if (rval == D_SAME) - rval = D_DIFFER; - } - if (f1 != NULL) - fclose(f1); - if (f2 != NULL) - fclose(f2); - if (file1 != ofile1) - free(file1); - if (file2 != ofile2) - free(file2); - return (rval); -} /* * Check to see if the given files differ. * Returns 0 if they are the same, 1 if different, and -1 on error. */ -static int -files_differ(FILE *f1, FILE *f2, int flags) +static int files_differ(FILE *f1, FILE *f2, int flags) { char buf1[BUFSIZ], buf2[BUFSIZ]; size_t i, j; @@ -582,8 +290,7 @@ files_differ(FILE *f1, FILE *f2, int flags) } } -static void -prepare(int i, FILE *fd, off_t filesize) +static void prepare(int i, FILE *fd, off_t filesize) { struct line *p; int j, h; @@ -607,8 +314,7 @@ prepare(int i, FILE *fd, off_t filesize) file[i] = p; } -static void -prune(void) +static void prune(void) { int i, j; @@ -628,8 +334,7 @@ prune(void) } } -static void -equiv(struct line *a, int n, struct line *b, int m, int *c) +static void equiv(struct line *a, int n, struct line *b, int m, int *c) { int i, j; @@ -658,8 +363,8 @@ equiv(struct line *a, int n, struct line *b, int m, int *c) static int isqrt(int n) { int y, x = 1; - if (n == 0) return(0); - + if (n == 0) return(0); + do { y = x; x = n / x; @@ -670,8 +375,48 @@ static int isqrt(int n) { return (x); } -static int -stone(int *a, int n, int *b, int *c) + +static int newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xrealloc(clist, clistlen * sizeof(struct cand)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return (clen++); +} + + +static int search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return (k + 1); + i = 0; + j = k + 1; + while (1) { + l = i + j; + if ((l >>= 1) <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return (l); + } + return (l + 1); +} + + +static int stone(int *a, int n, int *b, int *c) { int i, k, y, j, l; int oldc, tc, oldl; @@ -715,67 +460,48 @@ stone(int *a, int n, int *b, int *c) return (k); } -static int -newcand(int x, int y, int pred) +static void unravel(int p) { struct cand *q; + int i; - if (clen == clistlen) { - clistlen = clistlen * 11 / 10; - clist = xrealloc(clist, clistlen * sizeof(struct cand)); - } - q = clist + clen; - q->x = x; - q->y = y; - q->pred = pred; - return (clen++); + for (i = 0; i <= len[0]; i++) + J[i] = i <= pref ? i : + i > len[0] - suff ? i + len[1] - len[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; } -static int -search(int *c, int k, int y) + +static void unsort(struct line *f, int l, int *b) { - int i, j, l, t; + int *a, i; - if (clist[c[k]].y < y) /* quick look for typical case */ - return (k + 1); - i = 0; - j = k + 1; - while (1) { - l = i + j; - if ((l >>= 1) <= i) - break; - t = clist[c[l]].y; - if (t > y) - j = l; - else if (t < y) - i = l; - else - return (l); - } - return (l + 1); + a = xmalloc((l + 1) * sizeof(int)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); } -static void -unravel(int p) +static int skipline(FILE *f) { - struct cand *q; - int i; + int i, c; - for (i = 0; i <= len[0]; i++) - J[i] = i <= pref ? i : - i > len[0] - suff ? i + len[1] - len[0] : 0; - for (q = clist + p; q->y != 0; q = clist + q->pred) - J[q->x + pref] = q->y + pref; + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return (i); } + /* * Check does double duty: * 1. ferret out any fortuitous correspondences due * to confounding by hashing (which result in "jackpot") * 2. collect random access indexes to the two files */ -static void -check(char *file1, FILE *f1, char *file2, FILE *f2) +static void check(char *file1, FILE *f1, char *file2, FILE *f2) { int i, j, jackpot, c, d; long ctold, ctnew; @@ -868,8 +594,7 @@ check(char *file1, FILE *f1, char *file2, FILE *f2) } /* shellsort CACM #201 */ -static void -sort(struct line *a, int n) +static void sort(struct line *a, int n) { struct line *ai, *aim, w; int j, m = 0, k; @@ -900,57 +625,6 @@ sort(struct line *a, int n) } } -static void -unsort(struct line *f, int l, int *b) -{ - int *a, i; - - a = xmalloc((l + 1) * sizeof(int)); - for (i = 1; i <= l; i++) - a[f[i].serial] = f[i].value; - for (i = 1; i <= l; i++) - b[i] = a[i]; - free(a); -} - -static int -skipline(FILE *f) -{ - int i, c; - - for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) - continue; - return (i); -} - -static void -output(char *file1, FILE *f1, char *file2, FILE *f2) -{ - int m, i0, i1, j0, j1; - - rewind(f1); - rewind(f2); - m = len[0]; - J[0] = 0; - J[m + 1] = len[1] + 1; - for (i0 = 1; i0 <= m; i0 = i1 + 1) { - while (i0 <= m && J[i0] == J[i0 - 1] + 1) - i0++; - j0 = J[i0 - 1] + 1; - i1 = i0 - 1; - while (i1 < m && J[i1 + 1] == 0) - i1++; - j1 = J[i1 + 1] - 1; - J[i1] = j1; - change(file1, f1, file2, f2, i0, i1, j0, j1); - } - if (m == 0) { - change(file1, f1, file2, f2, 1, 0, 1, len[1]); - } - if (anychange != 0) { - dump_unified_vec(f1, f2); - } -} static void uni_range(int a, int b) { @@ -962,57 +636,7 @@ static void uni_range(int a, int b) printf("%d,0", b); } -/* - * Indicate that there is a difference between lines a and b of the from file - * to get to lines c to d of the to file. If a is greater then b then there - * are no lines in the from file involved and this means that there were - * lines appended (beginning at b). If c is greater than d then there are - * lines missing from the to file. - */ -static void -change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) -{ - static size_t max_context = 64; - - if (a > b && c > d) return; - if (cmd_flags & FLAG_q) return; - - /* - * Allocate change records as needed. - */ - if (context_vec_ptr == context_vec_end - 1) { - ptrdiff_t offset = context_vec_ptr - context_vec_start; - max_context <<= 1; - context_vec_start = xrealloc(context_vec_start, - max_context * sizeof(struct context_vec)); - context_vec_end = context_vec_start + max_context; - context_vec_ptr = context_vec_start + offset; - } - if (anychange == 0) { - /* - * Print the context/unidiff header first time through. - */ - print_header(file1, file2); - anychange = 1; - } else if (a > context_vec_ptr->b + (2 * context) + 1 && - c > context_vec_ptr->d + (2 * context) + 1) { - /* - * If this change is more than 'context' lines from the - * previous change, dump the record and reset it. - */ - dump_unified_vec(f1, f2); - } - context_vec_ptr++; - context_vec_ptr->a = a; - context_vec_ptr->b = b; - context_vec_ptr->c = c; - context_vec_ptr->d = d; - return; - -} - -static int -fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) +static int fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) { int i, j, c, lastc, col, nc; @@ -1045,81 +669,15 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) return (0); } -/* - * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. - */ -static int -readhash(FILE *f) +static int asciifile(FILE *f) { - int i, t, space; - int sum; - sum = 1; - space = 0; - if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) { - if (FLAG_i) - for (i = 0; (t = getc(f)) != '\n'; i++) { - if (t == EOF) { - if (i == 0) - return (0); - break; - } - sum = sum * 127 + t; - } - else - for (i = 0; (t = getc(f)) != '\n'; i++) { - if (t == EOF) { - if (i == 0) - return (0); - break; - } - sum = sum * 127 + t; - } - } else { - for (i = 0;;) { - switch (t = getc(f)) { - case '\t': - case '\r': - case '\v': - case '\f': - case ' ': - space++; - continue; - default: - if (space && !(cmd_flags & FLAG_w)) { - i++; - space = 0; - } - sum = sum * 127 + t; - i++; - continue; - case EOF: - if (i == 0) - return (0); - /* FALLTHROUGH */ - case '\n': - break; - } - break; - } - } - /* - * There is a remote possibility that we end up with a zero sum. - * Zero is used as an EOF marker, so return 1 instead. - */ - return (sum == 0 ? 1 : sum); -} - -static int -asciifile(FILE *f) -{ - - if ((cmd_flags & FLAG_a) || f == NULL) - return (1); + if ((cmd_flags & FLAG_a) || f == NULL) + return (1); #ifdef CONFIG_FEATURE_DIFF_BINARY unsigned char buf[BUFSIZ]; int i, cnt; - + rewind(f); cnt = fread(buf, 1, sizeof(buf), f); for (i = 0; i < cnt; i++) @@ -1130,8 +688,7 @@ asciifile(FILE *f) } /* dump accumulated "unified" diff changes */ -static void -dump_unified_vec(FILE *f1, FILE *f2) +static void dump_unified_vec(FILE *f1, FILE *f2) { struct context_vec *cvp = context_vec_start; int lowa, upb, lowc, upd; @@ -1197,8 +754,8 @@ dump_unified_vec(FILE *f1, FILE *f2) context_vec_ptr = context_vec_start - 1; } -static void -print_header(const char *file1, const char *file2) + +static void print_header(const char *file1, const char *file2) { if (label[0] != NULL) printf("%s %s\n", "---", @@ -1214,6 +771,419 @@ print_header(const char *file1, const char *file2) file2, ctime(&stb2.st_mtime)); } + + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater then b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) +{ + static size_t max_context = 64; + + if (a > b && c > d) return; + if (cmd_flags & FLAG_q) return; + + /* + * Allocate change records as needed. + */ + if (context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = context_vec_ptr - context_vec_start; + max_context <<= 1; + context_vec_start = xrealloc(context_vec_start, + max_context * sizeof(struct context_vec)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + anychange = 1; + } else if (a > context_vec_ptr->b + (2 * context) + 1 && + c > context_vec_ptr->d + (2 * context) + 1) { + /* + * If this change is more than 'context' lines from the + * previous change, dump the record and reset it. + */ + dump_unified_vec(f1, f2); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + +} + + +static void output(char *file1, FILE *f1, char *file2, FILE *f2) +{ + int m, i0, i1, j0, j1; + + rewind(f1); + rewind(f2); + m = len[0]; + J[0] = 0; + J[m + 1] = len[1] + 1; + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) + i0++; + j0 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j1 = J[i1 + 1] - 1; + J[i1] = j1; + change(file1, f1, file2, f2, i0, i1, j0, j1); + } + if (m == 0) { + change(file1, f1, file2, f2, 1, 0, 1, len[1]); + } + if (anychange != 0) { + dump_unified_vec(f1, f2); + } +} + +/* + * The following code uses an algorithm due to Harold Stone, + * which finds a pair of longest identical subsequences in + * the two files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * checked against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ + +static int diffreg(char *ofile1, char *ofile2, int flags) +{ + char *file1 = ofile1; + char *file2 = ofile2; + FILE *f1 = NULL; + FILE *f2 = NULL; + int rval = D_SAME; + int i; + + anychange = 0; + context_vec_ptr = context_vec_start - 1; + + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); + if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) + goto closem; + + if (flags & D_EMPTY1) + f1 = bb_xfopen(_PATH_DEVNULL, "r"); + else { + if (strcmp(file1, "-") == 0) + f1 = stdin; + else + f1 = bb_xfopen(file1, "r"); + } + + if (flags & D_EMPTY2) + f2 = bb_xfopen(_PATH_DEVNULL, "r"); + else { + if (strcmp(file2, "-") == 0) + f2 = stdin; + else + f2 = bb_xfopen(file2, "r"); + } + + switch (files_differ(f1, f2, flags)) { + case 0: + goto closem; + case 1: + break; + default: + /* error */ + status |= 2; + goto closem; + } + + if (!asciifile(f1) || !asciifile(f2)) { + rval = D_BINARY; + status |= 1; + goto closem; + } + + prepare(0, f1, stb1.st_size); + prepare(1, f2, stb2.st_size); + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = xrealloc(member, (slen[1] + 2) * sizeof(int)); + + class = (int *)file[0]; + unsort(sfile[0], slen[0], class); + class = xrealloc(class, (slen[0] + 2) * sizeof(int)); + + klist = xmalloc((slen[0] + 2) * sizeof(int)); + clen = 0; + clistlen = 100; + clist = xmalloc(clistlen * sizeof(struct cand)); + i = stone(class, slen[0], member, klist); + free(member); + free(class); + + J = xrealloc(J, (len[0] + 2) * sizeof(int)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); + ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); + check(file1, f1, file2, f2); + output(file1, f1, file2, f2); + +closem: + if (anychange) { + status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + if (f1 != NULL) + fclose(f1); + if (f2 != NULL) + fclose(f2); + if (file1 != ofile1) + free(file1); + if (file2 != ofile2) + free(file2); + return (rval); +} + +#if ENABLE_FEATURE_DIFF_DIR +static void do_diff (char *dir1, char *path1, char *dir2, char *path2) { + + int flags = D_HEADER; + int val; + + char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1); + char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2); + + if (stat(fullpath1, &stb1) != 0) { + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + fullpath1 = bb_xasprintf("%s/%s", dir1, path2); + } + if (stat(fullpath2, &stb2) != 0) { + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + fullpath2 = bb_xasprintf("%s/%s", dir2, path1); + } + + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); + return; + } + + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + val = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + val = D_SKIPPED2; + else + val = diffreg(fullpath1, fullpath2, flags); + + print_status(val, fullpath1, fullpath2, NULL); +} +#endif + +#ifdef CONFIG_FEATURE_DIFF_DIR +static int dir_strcmp(const void *p1, const void *p2) { + return strcmp(*(char * const *)p1, *(char * const *)p2); +} + +/* This function adds a filename to dl, the directory listing. */ + +static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) { + dl_count++; + dl = xrealloc(dl, dl_count * sizeof(char *)); + dl[dl_count - 1] = bb_xstrdup(filename); + if (cmd_flags & FLAG_r) { + int *pp = (int *) userdata; + int path_len = *pp + 1; + dl[dl_count - 1] = &(dl[dl_count - 1])[path_len]; + } + return TRUE; +} + +/* This returns a sorted directory listing. */ +static char **get_dir(char *path) { + + int i; + + /* Reset dl_count - there's no need to free dl as bb_xrealloc does + * the job nicely. */ + dl_count = 0; + + /* If -r has been set, then the recursive_action function will be + * used. Unfortunately, this outputs the root directory along with + * the recursed paths, so use void *userdata to specify the string + * length of the root directory. It can then be removed in + * add_to_dirlist. */ + + int path_len = strlen(path); + void *userdata = &path_len; + + /* Now fill dl with a listing. */ + if (cmd_flags & FLAG_r) + recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata); + else { + DIR *dp; + struct dirent *ep; + if ((dp = opendir(path)) == NULL) + bb_error_msg("Error reading directory"); + while ((ep = readdir(dp))) { + if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, "."))) + continue; + add_to_dirlist(ep->d_name, NULL, NULL); + } + closedir(dp); + } + + /* Sort dl alphabetically. */ + qsort(dl, dl_count, sizeof(char *), dir_strcmp); + + /* Copy dl so that we can return it. */ + char **retval = xmalloc(dl_count * sizeof(char *)); + for (i = 0; i < dl_count; i++) + retval[i] = bb_xstrdup(dl[i]); + + return retval; +} + +static void diffdir (char *p1, char *p2) { + + char **dirlist1, **dirlist2; + char *dp1, *dp2; + int dirlist1_count, dirlist2_count; + int pos; + + /* Check for trailing slashes. */ + + if (p1[strlen(p1) - 1] == '/') + p1[strlen(p1) - 1] = '\0'; + if (p2[strlen(p2) - 1] == '/') + p2[strlen(p2) - 1] = '\0'; + + /* Get directory listings for p1 and p2. */ + + dirlist1 = get_dir(p1); + dirlist1_count = dl_count; + dirlist1[dirlist1_count] = NULL; + dirlist2 = get_dir(p2); + dirlist2_count = dl_count; + dirlist2[dirlist2_count] = NULL; + + /* If -S was set, find the starting point. */ + if (start) { + while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) + dirlist1++; + while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) + dirlist2++; + if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) + bb_error_msg("Invalid argument to -S"); + } + + /* Now that both dirlist1 and dirlist2 contain sorted directory + * listings, we can start to go through dirlist1. If both listings + * contain the same file, then do a normal diff. Otherwise, behaviour + * is determined by whether the -N flag is set. */ + while (*dirlist1 != NULL || *dirlist2 != NULL) { + dp1 = *dirlist1; + dp2 = *dirlist2; + pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); + if (pos == 0) { + do_diff(p1, dp1, p2, dp2); + dirlist1++; + dirlist2++; + } + else if (pos < 0) { + if (cmd_flags & FLAG_N) + do_diff(p1, dp1, p2, NULL); + else + print_only(p1, strlen(p1) + 1, dp1); + dirlist1++; + } + else { + if (cmd_flags & FLAG_N) + do_diff(p1, NULL, p2, dp2); + else + print_only(p2, strlen(p2) + 1, dp2); + dirlist2++; + } + } +} +#endif + + + extern int diff_main(int argc, char **argv) { char *ep; int gotstdin = 0; |