aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Haddon <william@haddonthethird.net>2019-08-29 14:04:19 -0400
committerRob Landley <rob@landley.net>2019-08-29 19:55:46 -0500
commit0222d5957329eb575ef20ae7a00170bcab70b662 (patch)
treedc7b472b41082c6f5ac54ec1e6515139f1636338
parent3841cf8a685b4c95264dae7f42db65ca24dbf1b1 (diff)
downloadtoybox-0222d5957329eb575ef20ae7a00170bcab70b662.tar.gz
diff and patch: support special characters in filenames with quoting as well as unusual timestamp formats
After further research and testing, I've produced a patch which handles all filenames with special characters by copying the GNU quoting behavior, and also treats anything following a tab as a timestamp. This increases both ability to handle possible filenames and ability to apply patches found in the field. In diff, quote and escape filenames according to the following rules. * Surround the filename with quotes if it contains a byte less than 0x20, a byte greater than or equal to 0x80, space, backslash, or quote. * Replace alert, backspace, form feed, newline, carriage return, tab, vertical tab, backslash, and quote with \a, \b, \f, \n, \r, \t, \v, \\, and \", respectively. * Replace other bytes less than 0x20 or greater than or equal to 0x80 with a backslash followed by the three octal digits representing the value of the byte. * Treat valid UTF-8 characters involving sequences of bytes greater than or equal to 0x80 the same as other sequences of such bytes. In patch, process quoted and escaped filenames according to the following rules. * If the filename does not begin with a quote, do not modify the filename. * Remove quotes surrounding the filename. * In quoted filenames, replace \a, \b, \f, \n, \r, \t, \v, \\, and \" with alert, backspace, form feed, newline, carriage return, tab, vertical tab, backslash, and quote, respectively. * In quoted filenames, replace a backslash followed by octal digits with the byte with that value in octal. Also, in patch, treat anything on a +++ or --- line following a tab character after the beginning of the filename as a timestamp, rather than part of the filename.
-rw-r--r--toys/pending/diff.c54
-rw-r--r--toys/posix/patch.c34
2 files changed, 84 insertions, 4 deletions
diff --git a/toys/pending/diff.c b/toys/pending/diff.c
index 2d13d977..d7bb43c4 100644
--- a/toys/pending/diff.c
+++ b/toys/pending/diff.c
@@ -524,12 +524,64 @@ static int cmp(const void *p1, const void *p2)
return strcmp(* (char * const *)p1, * (char * const *)p2);
}
+// quote and escape filenames that have awkward characters
+char *quote_filename(char *filename)
+{
+ char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\";
+ char *result, *s, *t;
+ size_t len = 0;
+ int quote = 0;
+
+ // calculate memory usage and presence of quotes
+ for (s = filename; *s; s++) {
+ if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
+ || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
+ {
+ quote = 1;
+ len += 2;
+ } else if (*s == ' ') {
+ quote = 1;
+ len++;
+ } else if (*s < 0x20 || *s >= 0x80) {
+ quote = 1;
+ len += 4;
+ } else {
+ len++;
+ }
+ }
+
+ // construct the new string
+ result = xmalloc(len + (quote ? 2 : 0) + 1);
+ t = result;
+ if (quote) *t++ = '"';
+ for (s = filename; *s; s++) {
+ if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
+ || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
+ {
+ *t = '\\';
+ t[1] = to[strchr(from, *s) - from];
+ t += 2;
+ } else if (*s < 0x20 || *s >= 0x80) {
+ sprintf(t, "\\%.3o", *s);
+ t += 4;
+ } else {
+ *t++ = *s;
+ }
+ }
+ if (quote) *t++ = '"';
+ *t = 0;
+ return result;
+}
+
static void show_label(char *prefix, char *filename, struct stat *sb)
{
char date[36];
+ char *quoted_file;
- printf("%s %s\t%s\n", prefix, filename,
+ quoted_file = quote_filename(filename);
+ printf("%s %s\t%s\n", prefix, quoted_file,
format_iso_time(date, sizeof(date), &sb->st_mtim));
+ free(quoted_file);
}
static void do_diff(char **files)
diff --git a/toys/posix/patch.c b/toys/posix/patch.c
index efb15432..4850d5df 100644
--- a/toys/posix/patch.c
+++ b/toys/posix/patch.c
@@ -247,6 +247,35 @@ done:
return TT.state;
}
+// read a filename that has been quoted or escaped
+char *unquote_file(char *filename) {
+ char *s = filename, *result, *t, *u;
+ int quote = 0, ch;
+
+ // quoted and escaped filenames are larger than the original
+ result = xmalloc(strlen(filename) + 1);
+ t = result;
+ if (*s == '"') {
+ s++;
+ quote = 1;
+ }
+ for (; *s && !(quote && *s == '"' && !s[1]); s++) {
+ // don't accept escape sequences unless the filename is quoted
+ if (quote && *s == '\\' && s[1]) {
+ if (s[1] >= '0' && s[1] < '8') {
+ *t++ = strtoul(s + 1, &u, 8);
+ s = u - 1;
+ } else {
+ ch = unescape(s[1]);
+ *t++ = ch ? ch : s[1];
+ s++;
+ }
+ } else *t++ = *s;
+ }
+ *t = 0;
+ return result;
+}
+
// Read a patch file and find hunks, opening/creating/deleting files.
// Call apply_one_hunk() on each hunk.
@@ -322,13 +351,12 @@ void patch_main(void)
finish_oldfile();
// Trim date from end of filename (if any). We don't care.
- for (s = patchline+4; *s && (*s!='\t' || !isdigit(s[1])); s++)
- if (*s=='\\' && s[1]) s++;
+ for (s = patchline+4; *s && *s!='\t'; s++);
i = atoi(s);
if (i>1900 && i<=1970) *name = xstrdup("/dev/null");
else {
*s = 0;
- *name = xstrdup(patchline+4);
+ *name = unquote_file(patchline+4);
}
// We defer actually opening the file because svn produces broken