aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2019-03-20 21:25:14 -0500
committerRob Landley <rob@landley.net>2019-03-20 21:25:14 -0500
commit14eee98d9dbe81729437bf333ea413b43e37969a (patch)
treeee8043274933cf72339ea262474b6a8a4ef5d52d
parent95574e3f2a47291651f7c78c873318344565e80e (diff)
downloadtoybox-14eee98d9dbe81729437bf333ea413b43e37969a.tar.gz
Next round of tar cleanup.
-rw-r--r--toys/pending/tar.c302
1 files changed, 164 insertions, 138 deletions
diff --git a/toys/pending/tar.c b/toys/pending/tar.c
index 97e699b4..18a078e1 100644
--- a/toys/pending/tar.c
+++ b/toys/pending/tar.c
@@ -36,10 +36,8 @@ config TAR
c Create x Extract t Test
f Name of TARFILE C Change to DIR first v Verbose: show filenames
o Ignore owner h Follow symlinks m Ignore mtime
- j Force bzip2 format z Force gzip format
- O Extract to stdout
- X File of names to exclude
- T File of names to include
+ j bzip2 compression z gzip compression
+ O Extract to stdout X exclude names in FILE T include names in FILE
--exclude=FILE File pattern(s) to exclude
*/
@@ -50,10 +48,10 @@ GLOBALS(
char *f, *C;
struct arg_list *T, *X;
char *to_command;
- struct arg_list *exc;
+ struct arg_list *exclude;
// exc is an argument but inc isn't?
- struct arg_list *inc, *pass;
+ struct double_list *incl, *excl, *seen;
void *inodes;
char *cwd;
int fd;
@@ -76,6 +74,34 @@ struct tar_hdr {
prefix[155], padd[12];
};
+// convert to int to octal (or base-256)
+static void itoo(char *str, int len, unsigned long long val)
+{
+ // Do we need binary encoding?
+ if (!(val>>(3*(len-1)))) sprintf(str, "%0*llo", len-1, val);
+ else {
+ *str = 128;
+ while (--len) *++str = val>>(3*len);
+ }
+}
+#define ITOO(x, y) itoo(x, sizeof(x), y)
+
+//convert octal (or base-256) to int
+static unsigned long long otoi(char *str, unsigned len)
+{
+ unsigned long long val = 0;
+
+ // When tar value too big or octal, use binary encoding with high bit set
+ if (128&*str) while (--len) val = (val<<8)+*++str;
+ else {
+ while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
+ if (len && *str && *str != ' ') error_exit("bad header");
+ }
+
+ return val;
+}
+
+
struct inode_list {
struct inode_list *next;
char *arg;
@@ -83,13 +109,7 @@ struct inode_list {
dev_t dev;
};
-//convert to octal
-static void itoo(char *str, int len, off_t val)
-{
- sprintf(str, "%0*llo", len-1, (unsigned long long)val);
-}
-
-// This really needs a hash table
+// TODO This really needs a hash table
static struct inode_list *seen_inode(void **list, struct stat *st, char *name)
{
if (!S_ISDIR(st->st_mode) && st->st_nlink > 1) {
@@ -121,31 +141,38 @@ static unsigned cksum(void *data)
static void write_longname(char *name, char type)
{
- struct tar_hdr tmp[2];
+ struct tar_hdr tmp;
int sz = strlen(name) +1;
- memset(tmp, 0, sizeof(tmp));
- strcpy(tmp->name, "././@LongLink");
- memset(tmp->mode, '0', sizeof(tmp->mode)-1);
- memset(tmp->uid, '0', sizeof(tmp->uid)-1);
- memset(tmp->gid, '0', sizeof(tmp->gid)-1);
- itoo(tmp->size, sizeof(tmp->size), sz);
- memset(tmp->mtime, '0', sizeof(tmp->mtime)-1);
- tmp->type = type;
- strcpy(tmp->magic, "ustar ");
-
- // Calculate checksum
- itoo(tmp->chksum, sizeof(tmp->chksum), cksum(&tmp));
+ memset(&tmp, 0, sizeof(tmp));
+ strcpy(tmp.name, "././@LongLink");
+ ITOO(tmp.mode, 0);
+ ITOO(tmp.uid, 0);
+ ITOO(tmp.gid, 0);
+ ITOO(tmp.size, sz);
+ ITOO(tmp.mtime, 0);
+ tmp.type = type;
+ strcpy(tmp.magic, "ustar ");
+
+ // Calculate checksum. Since 0777777 is twice 512*255 it can never use more
+ // than 6 digits, last byte is ' ' or historical reasons.
+ itoo(tmp.chksum, sizeof(tmp.chksum)-1, cksum(&tmp));
+ tmp.chksum[7] = ' ';
// write header and name, padded with NUL to block size
- xwrite(TT.fd, tmp, sizeof(*tmp));
+ xwrite(TT.fd, &tmp, 512);
xwrite(TT.fd, name, sz);
- xwrite(TT.fd, tmp+1, 512-(sz%512));
+ if (sz%512) xwrite(TT.fd, toybuf, 512-(sz%512));
}
-static int filter(struct arg_list *lst, char *name)
+static struct double_list *filter(struct double_list *lst, char *name)
{
- for (; lst; lst = lst->next) if (!fnmatch(lst->arg, name, 1<<3)) return 1;
+ struct double_list *end = lst;
+
+// TODO 1<<3 = FNM_LEADING_DIR ... Why?
+ if (lst)
+ do if (!fnmatch(lst->data, name, 1<<3)) return lst;
+ while (end != (lst = lst->next));
return 0;
}
@@ -166,6 +193,7 @@ static void alloread(void *buf, int len)
b[len] = 0;
}
+// TODO inline
static void add_file(char **nam, struct stat *st)
{
struct tar_hdr hdr;
@@ -173,11 +201,12 @@ static void add_file(char **nam, struct stat *st)
struct group *gr;
struct inode_list *node = node;
int i, fd =-1;
- char *c, *p, *name = *nam, *lnk, *hname, buf[512] = {0,};
+ char *c, *p, *name = *nam, *lnk, *hname;
static int warn = 1;
+// TODO what/why? fnmatch()?
for (p = name; *p; p++)
- if ((p == name || p[-1] == '/') && *p != '/' && filter(TT.exc, p)) return;
+ if ((p == name || p[-1] == '/') && *p != '/' && filter(TT.excl, p)) return;
if (S_ISDIR(st->st_mode) && name[strlen(name)-1] != '/') {
lnk = xmprintf("%s/",name);
@@ -197,11 +226,11 @@ static void add_file(char **nam, struct stat *st)
memset(&hdr, 0, sizeof(hdr));
strncpy(hdr.name, hname, sizeof(hdr.name));
- itoo(hdr.mode, sizeof(hdr.mode), st->st_mode &07777);
- itoo(hdr.uid, sizeof(hdr.uid), st->st_uid);
- itoo(hdr.gid, sizeof(hdr.gid), st->st_gid);
- itoo(hdr.size, sizeof(hdr.size), 0); //set size later
- itoo(hdr.mtime, sizeof(hdr.mtime), st->st_mtime);
+ ITOO(hdr.mode, st->st_mode &07777);
+ ITOO(hdr.uid, st->st_uid);
+ ITOO(hdr.gid, st->st_gid);
+ ITOO(hdr.size, 0); //set size later
+ ITOO(hdr.mtime, st->st_mtime);
// Hard link or symlink?
i = !!S_ISLNK(st->st_mode);
@@ -212,24 +241,18 @@ static void add_file(char **nam, struct stat *st)
// TODO: does this need NUL terminator?
if (strlen(lnk) > sizeof(hdr.link))
write_longname(lnk, 'K'); //write longname LINK
-// TODO: this will error_exit() if too long, not truncate.
- xstrncpy(hdr.link, lnk, sizeof(hdr.link));
+ strncpy(hdr.link, lnk, sizeof(hdr.link));
if (i) free(lnk);
} else if (S_ISREG(st->st_mode)) {
hdr.type = '0';
- if (st->st_size <= (off_t)077777777777LL)
- itoo(hdr.size, sizeof(hdr.size), st->st_size);
- else {
+ ITOO(hdr.size, st->st_size);
// TODO: test accept 12 7's but don't emit without terminator
- return error_msg("TODO: need base-256 encoding for '%s' '%lld'\n",
- hname, (unsigned long long)st->st_size);
- }
} else if (S_ISDIR(st->st_mode)) hdr.type = '5';
else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
- itoo(hdr.major, sizeof(hdr.major), dev_major(st->st_rdev));
- itoo(hdr.minor, sizeof(hdr.minor), dev_minor(st->st_rdev));
+ ITOO(hdr.major, dev_major(st->st_rdev));
+ ITOO(hdr.minor, dev_minor(st->st_rdev));
} else return error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
if (strlen(hname) > sizeof(hdr.name))
@@ -244,6 +267,8 @@ static void add_file(char **nam, struct stat *st)
else sprintf(hdr.gname, "%d", st->st_gid);
itoo(hdr.chksum, sizeof(hdr.chksum)-1, cksum(&hdr));
+ hdr.chksum[7] = ' ';
+
if (FLAG(v)) printf("%s\n",hname);
xwrite(TT.fd, (void*)&hdr, 512);
@@ -254,7 +279,7 @@ static void add_file(char **nam, struct stat *st)
return;
}
xsendfile_pad(fd, TT.fd, st->st_size);
- if (st->st_size%512) writeall(TT.fd, buf, (512-(st->st_size%512)));
+ if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
close(fd);
}
@@ -264,6 +289,7 @@ static int add_to_tar(struct dirtree *node)
char *path;
if (!dirtree_notdotdot(node)) return 0;
+// TODO repeated stat?
if (!fstat(TT.fd, &st) && st.st_dev == node->st.st_dev
&& st.st_ino == node->st.st_ino) {
error_msg("'%s' file is the archive; not dumped", TT.f);
@@ -273,11 +299,13 @@ static int add_to_tar(struct dirtree *node)
path = dirtree_path(node, 0);
add_file(&path, &(node->st)); //path may be modified
free(path);
- if (FLAG(no_recursion)) return 0;
- return ((DIRTREE_RECURSE | (FLAG(h)?DIRTREE_SYMFOLLOW:0)));
+
+ return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!FLAG(no_recursion);
}
// Does anybody actually use this?
+// TODO xpopen_both()
+// TODO one caller, inline
static void extract_to_command(void)
{
int pipefd[2], status = 0;
@@ -321,6 +349,7 @@ static void extract_to_command(void)
}
}
+// TODO one caller, inline
static void extract_to_disk(void)
{
int flags, dst_fd = -1;
@@ -328,6 +357,7 @@ static void extract_to_disk(void)
struct stat ex;
// while not if
+// TODO readlink -f prefix check
flags = strlen(TT.hdr.name);
if (flags>2)
if (strstr(TT.hdr.name, "/../") || !strcmp(TT.hdr.name, "../") ||
@@ -407,59 +437,30 @@ COPY:
}
}
-static void add_to_list(struct arg_list **llist, char *name)
-{
- struct arg_list **list = llist;
-
- while (*list) list=&((*list)->next);
- *list = xzalloc(sizeof(struct arg_list));
- (*list)->arg = name;
- if ((name[strlen(name)-1] == '/') && strlen(name) != 1)
- name[strlen(name)-1] = '\0';
-}
-
-static void file_to_list(char *file, struct arg_list **llist)
-{
- int fd = xopenro(file);
- char *line = 0;
-
- while ((line = get_line(fd))) add_to_list(llist, xstrdup(line));
- if (fd) close(fd);
- free(line);
-}
-
-//convert octal to int
-static unsigned long long otoi(char *str, int len)
-{
- unsigned long long val;
-
-// todo: base-256 encoding, just do it symmetrically for all fields
- str[len-1] = 0;
- val = strtoull(str, &str, 8);
- if (*str && *str != ' ') error_exit("bad header");
-
- return val;
-}
-
static void unpack_tar(void)
{
+ struct double_list *walk, *delete;
struct tar_hdr tar;
- int i;
+ int i, and = 0;
char *s;
for (;;) {
// align to next block and read it
if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
+ if (!(i = readall(TT.fd, &tar, 512))) return;
- i = readall(TT.fd, &tar, 512);
if (i != 512) error_exit("read error");
+
+ // Two consecutive empty headers ends tar even if there's more data
+ if (!*tar.name) {
+ if (and++) return;
+ TT.hdr.size = 0;
+ continue;
+ }
// ensure null temination even of pathological packets
- tar.padd[0] = 0;
- // End of tar
- if (!*tar.name) return;
+ tar.padd[0] = and = 0;
-// can you append a bzip to a gzip _within_ a tarball? Nested compress?
-// Or compressed data after uncompressed data?
+ // Is this a valid Unix Standard TAR header?
if (memcmp(tar.magic, "ustar", 5)) error_exit("bad header");
if (cksum(&tar) != otoi(tar.chksum, sizeof(tar.chksum)))
error_exit("bad cksum");
@@ -467,6 +468,8 @@ static void unpack_tar(void)
// If this header isn't writing something to the filesystem
if (tar.type<'0' || tar.type>'7') {
+
+ // Long name extension header?
if (tar.type == 'K') alloread(&TT.hdr.link_target, TT.hdr.size);
else if (tar.type == 'L') alloread(&TT.hdr.name, TT.hdr.size);
else if (tar.type == 'x') {
@@ -479,7 +482,7 @@ static void unpack_tar(void)
if ((i = sscanf(p, "%u path=%n", &len, &n))<1 || len<4 ||
len>TT.hdr.size)
{
- error_msg("corrupted extended header");
+ error_msg("bad header");
break;
}
p[len-1] = 0;
@@ -490,14 +493,13 @@ static void unpack_tar(void)
}
free(buf);
- // This could be if (strchr("DMNSVg", tar.type)) but an unknown header
- // type with trailing contents is unlikely to have a valid type & cksum
+ // Ignore everything else.
} else skippy(TT.hdr.size);
continue;
}
- // At this point, we're writing something to the filesystem. Parse fields.
+ // At this point, we're writing to the filesystem.
TT.hdr.mode = otoi(tar.mode, sizeof(tar.mode));
TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
TT.hdr.uid = otoi(tar.uid, sizeof(tar.uid));
@@ -511,54 +513,62 @@ static void unpack_tar(void)
if (!TT.hdr.link_target && *tar.link)
TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
if (!TT.hdr.name) {
+ // Glue prefix and name fields together with / if necessary
i = strnlen(tar.prefix, sizeof(tar.prefix));
TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
(i && tar.prefix[i-1] != '/') ? "/" : "",
(int)sizeof(tar.name), tar.name);
}
- // Directories sometimes recorded as "file with trailing slash"
+ // Old broken tar recorded dir as "file with trailing slash"
if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
*s = 0;
TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
}
- // Hardlinks, symlinks, and directories do not have contents in archive
- // (Neither do fifo, block or char devices, but not testing for that...?)
- if ((TT.hdr.link_target && *TT.hdr.link_target)
- || S_ISLNK(TT.hdr.mode) || S_ISDIR(TT.hdr.mode))
+ // Non-regular files don't have contents stored in archive.
+ if ((TT.hdr.link_target && *TT.hdr.link_target) || !S_ISREG(TT.hdr.mode))
TT.hdr.size = 0;
- // Skip excluded files
- if (filter(TT.exc, TT.hdr.name) || (TT.inc && !filter(TT.inc, TT.hdr.name)))
- skippy(TT.hdr.size);
- else {
-
-// TODO: wrong, shouldn't grow endlessly, mark seen TT.inc instead
- add_to_list(&TT.pass, xstrdup(TT.hdr.name));
-
- if (FLAG(t)) {
- if (FLAG(v)) {
- char perm[11];
- struct tm *lc = localtime(&TT.hdr.mtime);
-
- mode_to_string(TT.hdr.mode, perm);
- printf("%s %s/%s %9ld %d-%02d-%02d %02d:%02d:%02d ", perm,
- TT.hdr.uname, TT.hdr.gname, (long)TT.hdr.size, 1900+lc->tm_year,
- 1+lc->tm_mon, lc->tm_mday, lc->tm_hour, lc->tm_min, lc->tm_sec);
+ // Files are seen even if excluded, so check them here.
+ // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
+ if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
+ if (!TT.seen) TT.seen = delete;
+
+ // Move seen entry to end of list.
+ if (TT.incl == delete) TT.incl = TT.incl->next;
+ else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
+ if (walk == delete) {
+ dlist_pop(&walk);
+ dlist_add_nomalloc(&TT.incl, delete);
}
- printf("%s", TT.hdr.name);
- if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
- xputc('\n');
- skippy(TT.hdr.size);
- } else {
- if (FLAG(v)) printf("%s\n", TT.hdr.name);
- if (FLAG(O)) xsendfile_len(TT.fd, 0, TT.hdr.size);
- else if (FLAG(to_command)) extract_to_command();
- else extract_to_disk();
}
}
+ // Skip excluded files
+ if (filter(TT.excl, TT.hdr.name) || TT.incl && !delete) skippy(TT.hdr.size);
+ else if (FLAG(t)) {
+ if (FLAG(v)) {
+ struct tm *lc = localtime(&TT.hdr.mtime);
+ char perm[11];
+
+ mode_to_string(TT.hdr.mode, perm);
+ printf("%s %s/%s %9lld %d-%02d-%02d %02d:%02d:%02d ", perm,
+ TT.hdr.uname, TT.hdr.gname, (long long)TT.hdr.size,
+ 1900+lc->tm_year, 1+lc->tm_mon, lc->tm_mday, lc->tm_hour,
+ lc->tm_min, lc->tm_sec);
+ }
+ printf("%s", TT.hdr.name);
+ if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
+ xputc('\n');
+ skippy(TT.hdr.size);
+ } else {
+ if (FLAG(v)) printf("%s\n", TT.hdr.name);
+ if (FLAG(O)) xsendfile_len(TT.fd, 0, TT.hdr.size);
+ else if (FLAG(to_command)) extract_to_command();
+ else extract_to_disk();
+ }
+
free(TT.hdr.name);
free(TT.hdr.link_target);
free(TT.hdr.uname);
@@ -567,9 +577,20 @@ static void unpack_tar(void)
}
}
+// Add copy of filename to TT.incl or TT.excl, minus trailing \n and /
+static void trim_list(char **pline, long len)
+{
+ char *n = strdup(*pline);
+ int i = strlen(n);
+
+ dlist_add(TT.X ? &TT.excl : &TT.incl, n);
+ if (i && n[i-1]=='\n') i--;
+ while (i && n[i-1] == '/') i--;
+ n[i] = 0;
+}
+
void tar_main(void)
{
- struct arg_list *tmp;
char *s, **args = toys.optargs;
// When extracting to command
@@ -577,14 +598,14 @@ void tar_main(void)
if (!geteuid()) toys.optflags |= FLAG_p;
- // Collect file list
- while (*args) add_to_list(&TT.inc, *args++);
- for (;TT.T; TT.T = TT.T->next) file_to_list(TT.T->arg, &TT.inc);
- for (;TT.X; TT.X = TT.X->next) file_to_list(TT.X->arg, &TT.exc);
+ // Collect file list. Note: trim_list appends to TT.incl when !TT.X
+ for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', trim_list);
+ for (args = toys.optargs; *args; args++) trim_list(args, strlen(*args));
+ for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', trim_list);
// Open archive file
if (FLAG(c)) {
- if (!TT.inc) error_exit("empty archive");
+ if (!TT.incl) error_exit("empty archive");
TT.fd = 1;
}
if (TT.f && strcmp(TT.f, "-"))
@@ -616,12 +637,18 @@ void tar_main(void)
}
unpack_tar();
- for (tmp = TT.inc; tmp; tmp = tmp->next)
- if (!filter(TT.exc, tmp->arg) && !filter(TT.pass, tmp->arg))
- error_msg("'%s' not in archive", tmp->arg);
+ if (TT.seen != TT.incl) {
+ if (!TT.seen) TT.seen = TT.incl;
+ while (TT.incl != TT.seen) {
+ error_msg("'%s' not in archive", TT.incl->data);
+ TT.incl = TT.incl->next;
+ }
+ }
// are we writing? (Don't have to test flag here one of 3 must be set)
} else {
+ struct double_list *dl = TT.incl;
+
// TODO: autodetect
if (FLAG(j)||FLAG(z)) {
int pipefd[2] = {-1, TT.fd};
@@ -630,10 +657,9 @@ void tar_main(void)
close(TT.fd);
TT.fd = pipefd[0];
}
- for (tmp = TT.inc; tmp; tmp = tmp->next)
- dirtree_flagread(tmp->arg, FLAG(h)?DIRTREE_SYMFOLLOW:0, add_to_tar);
+ do dirtree_flagread(dl->data, FLAG(h)?DIRTREE_SYMFOLLOW:0, add_to_tar);
+ while (TT.incl != (dl = dl->next));
- memset(toybuf, 0, 1024);
writeall(TT.fd, toybuf, 1024);
}
}