diff options
author | William Haddon <william@haddonthethird.net> | 2019-08-29 14:04:19 -0400 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2019-08-29 19:55:46 -0500 |
commit | 0222d5957329eb575ef20ae7a00170bcab70b662 (patch) | |
tree | dc7b472b41082c6f5ac54ec1e6515139f1636338 | |
parent | 3841cf8a685b4c95264dae7f42db65ca24dbf1b1 (diff) | |
download | toybox-0222d5957329eb575ef20ae7a00170bcab70b662.tar.gz |
diff and patch: support special characters in filenames with quoting as well as unusual timestamp formats
After further research and testing, I've produced a patch which handles
all filenames with special characters by copying the GNU quoting
behavior, and also treats anything following a tab as a timestamp. This
increases both ability to handle possible filenames and ability to
apply patches found in the field.
In diff, quote and escape filenames according to the following rules.
* Surround the filename with quotes if it contains a byte less than 0x20,
a byte greater than or equal to 0x80, space, backslash, or quote.
* Replace alert, backspace, form feed, newline, carriage return, tab, vertical
tab, backslash, and quote with \a, \b, \f, \n, \r, \t, \v, \\, and \",
respectively.
* Replace other bytes less than 0x20 or greater than or equal to 0x80 with a
backslash followed by the three octal digits representing the value of the
byte.
* Treat valid UTF-8 characters involving sequences of bytes greater than or
equal to 0x80 the same as other sequences of such bytes.
In patch, process quoted and escaped filenames according to the following
rules.
* If the filename does not begin with a quote, do not modify the filename.
* Remove quotes surrounding the filename.
* In quoted filenames, replace \a, \b, \f, \n, \r, \t, \v, \\, and \" with
alert, backspace, form feed, newline, carriage return, tab, vertical tab,
backslash, and quote, respectively.
* In quoted filenames, replace a backslash followed by octal digits with the
byte with that value in octal.
Also, in patch, treat anything on a +++ or --- line following a tab character
after the beginning of the filename as a timestamp, rather than part of the
filename.
-rw-r--r-- | toys/pending/diff.c | 54 | ||||
-rw-r--r-- | toys/posix/patch.c | 34 |
2 files changed, 84 insertions, 4 deletions
diff --git a/toys/pending/diff.c b/toys/pending/diff.c index 2d13d977..d7bb43c4 100644 --- a/toys/pending/diff.c +++ b/toys/pending/diff.c @@ -524,12 +524,64 @@ static int cmp(const void *p1, const void *p2) return strcmp(* (char * const *)p1, * (char * const *)p2); } +// quote and escape filenames that have awkward characters +char *quote_filename(char *filename) +{ + char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\"; + char *result, *s, *t; + size_t len = 0; + int quote = 0; + + // calculate memory usage and presence of quotes + for (s = filename; *s; s++) { + if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v' + || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\') + { + quote = 1; + len += 2; + } else if (*s == ' ') { + quote = 1; + len++; + } else if (*s < 0x20 || *s >= 0x80) { + quote = 1; + len += 4; + } else { + len++; + } + } + + // construct the new string + result = xmalloc(len + (quote ? 2 : 0) + 1); + t = result; + if (quote) *t++ = '"'; + for (s = filename; *s; s++) { + if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v' + || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\') + { + *t = '\\'; + t[1] = to[strchr(from, *s) - from]; + t += 2; + } else if (*s < 0x20 || *s >= 0x80) { + sprintf(t, "\\%.3o", *s); + t += 4; + } else { + *t++ = *s; + } + } + if (quote) *t++ = '"'; + *t = 0; + return result; +} + static void show_label(char *prefix, char *filename, struct stat *sb) { char date[36]; + char *quoted_file; - printf("%s %s\t%s\n", prefix, filename, + quoted_file = quote_filename(filename); + printf("%s %s\t%s\n", prefix, quoted_file, format_iso_time(date, sizeof(date), &sb->st_mtim)); + free(quoted_file); } static void do_diff(char **files) diff --git a/toys/posix/patch.c b/toys/posix/patch.c index efb15432..4850d5df 100644 --- a/toys/posix/patch.c +++ b/toys/posix/patch.c @@ -247,6 +247,35 @@ done: return TT.state; } +// read a filename that has been quoted or escaped +char *unquote_file(char *filename) { + char *s = filename, *result, *t, *u; + int quote = 0, ch; + + // quoted and escaped filenames are larger than the original + result = xmalloc(strlen(filename) + 1); + t = result; + if (*s == '"') { + s++; + quote = 1; + } + for (; *s && !(quote && *s == '"' && !s[1]); s++) { + // don't accept escape sequences unless the filename is quoted + if (quote && *s == '\\' && s[1]) { + if (s[1] >= '0' && s[1] < '8') { + *t++ = strtoul(s + 1, &u, 8); + s = u - 1; + } else { + ch = unescape(s[1]); + *t++ = ch ? ch : s[1]; + s++; + } + } else *t++ = *s; + } + *t = 0; + return result; +} + // Read a patch file and find hunks, opening/creating/deleting files. // Call apply_one_hunk() on each hunk. @@ -322,13 +351,12 @@ void patch_main(void) finish_oldfile(); // Trim date from end of filename (if any). We don't care. - for (s = patchline+4; *s && (*s!='\t' || !isdigit(s[1])); s++) - if (*s=='\\' && s[1]) s++; + for (s = patchline+4; *s && *s!='\t'; s++); i = atoi(s); if (i>1900 && i<=1970) *name = xstrdup("/dev/null"); else { *s = 0; - *name = xstrdup(patchline+4); + *name = unquote_file(patchline+4); } // We defer actually opening the file because svn produces broken |