diff options
author | Cem Keylan <cem@ckyln.com> | 2020-10-16 17:47:01 +0300 |
---|---|---|
committer | Cem Keylan <cem@ckyln.com> | 2020-10-16 17:47:01 +0300 |
commit | 5d69c6a2661bba0a22f3ecfd517e2e9767a38346 (patch) | |
tree | 1f479b2714e127835db7f33a3bfed4c38c52f883 /bin/pax | |
parent | e2abcdca396661cbe0ae2ddb13d5c2b85682c13a (diff) | |
download | otools-5d69c6a2661bba0a22f3ecfd517e2e9767a38346.tar.gz |
add tools
Diffstat (limited to 'bin/pax')
-rw-r--r-- | bin/pax/CVS/Entries | 24 | ||||
-rw-r--r-- | bin/pax/CVS/Repository | 1 | ||||
-rw-r--r-- | bin/pax/CVS/Root | 1 | ||||
-rw-r--r-- | bin/pax/Makefile | 11 | ||||
-rw-r--r-- | bin/pax/ar_io.c | 1288 | ||||
-rw-r--r-- | bin/pax/ar_subs.c | 1277 | ||||
-rw-r--r-- | bin/pax/buf_subs.c | 983 | ||||
-rw-r--r-- | bin/pax/cpio.1 | 309 | ||||
-rw-r--r-- | bin/pax/cpio.c | 1106 | ||||
-rw-r--r-- | bin/pax/cpio.h | 150 | ||||
-rw-r--r-- | bin/pax/extern.h | 310 | ||||
-rw-r--r-- | bin/pax/file_subs.c | 1106 | ||||
-rw-r--r-- | bin/pax/ftree.c | 566 | ||||
-rw-r--r-- | bin/pax/gen_subs.c | 401 | ||||
-rw-r--r-- | bin/pax/getoldopt.c | 69 | ||||
-rw-r--r-- | bin/pax/options.c | 1788 | ||||
-rw-r--r-- | bin/pax/pat_rep.c | 1108 | ||||
-rw-r--r-- | bin/pax/pax.1 | 1112 | ||||
-rw-r--r-- | bin/pax/pax.c | 446 | ||||
-rw-r--r-- | bin/pax/pax.h | 262 | ||||
-rw-r--r-- | bin/pax/sel_subs.c | 632 | ||||
-rw-r--r-- | bin/pax/tables.c | 1786 | ||||
-rw-r--r-- | bin/pax/tar.1 | 410 | ||||
-rw-r--r-- | bin/pax/tar.c | 1284 | ||||
-rw-r--r-- | bin/pax/tar.h | 159 | ||||
-rw-r--r-- | bin/pax/tty_subs.c | 187 |
26 files changed, 16776 insertions, 0 deletions
diff --git a/bin/pax/CVS/Entries b/bin/pax/CVS/Entries new file mode 100644 index 0000000..0ba2835 --- /dev/null +++ b/bin/pax/CVS/Entries @@ -0,0 +1,24 @@ +/Makefile/1.13/Thu Sep 13 12:33:43 2018// +/ar_io.c/1.63/Fri Jun 28 13:34:59 2019// +/ar_subs.c/1.49/Fri Jun 28 13:34:59 2019// +/buf_subs.c/1.31/Fri Jun 28 13:34:59 2019// +/cpio.1/1.36/Thu Jan 16 16:46:46 2020// +/cpio.c/1.33/Sat Sep 16 07:42:34 2017// +/cpio.h/1.4/Mon Jun 2 23:32:08 2003// +/extern.h/1.60/Mon Mar 23 20:04:19 2020// +/file_subs.c/1.55/Mon Mar 23 20:04:19 2020// +/ftree.c/1.42/Fri Jun 28 13:34:59 2019// +/gen_subs.c/1.32/Fri Aug 26 05:06:14 2016// +/getoldopt.c/1.9/Tue Oct 27 23:59:22 2009// +/options.c/1.103/Fri Nov 15 20:34:17 2019// +/pat_rep.c/1.43/Sat Sep 16 07:42:34 2017// +/pax.1/1.75/Thu Jan 16 16:46:46 2020// +/pax.c/1.53/Fri Jun 28 13:34:59 2019// +/pax.h/1.29/Tue Sep 12 17:11:11 2017// +/sel_subs.c/1.28/Mon Jun 24 03:33:09 2019// +/tables.c/1.54/Fri Jun 28 05:35:34 2019// +/tar.1/1.62/Thu Jan 16 16:46:46 2020// +/tar.c/1.68/Mon Jun 24 03:33:09 2019// +/tar.h/1.9/Wed Jan 8 06:43:34 2014// +/tty_subs.c/1.17/Fri Aug 26 04:22:13 2016// +D diff --git a/bin/pax/CVS/Repository b/bin/pax/CVS/Repository new file mode 100644 index 0000000..19b1a65 --- /dev/null +++ b/bin/pax/CVS/Repository @@ -0,0 +1 @@ +src/bin/pax diff --git a/bin/pax/CVS/Root b/bin/pax/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/bin/pax/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/bin/pax/Makefile b/bin/pax/Makefile new file mode 100644 index 0000000..5dd36e2 --- /dev/null +++ b/bin/pax/Makefile @@ -0,0 +1,11 @@ +# $OpenBSD: Makefile,v 1.13 2018/09/13 12:33:43 millert Exp $ + +WARNINGS=Yes +PROG= pax +SRCS= ar_io.c ar_subs.c buf_subs.c cpio.c file_subs.c ftree.c\ + gen_subs.c getoldopt.c options.c pat_rep.c pax.c sel_subs.c tables.c\ + tar.c tty_subs.c +MAN= pax.1 tar.1 cpio.1 +LINKS= ${BINDIR}/pax ${BINDIR}/tar ${BINDIR}/pax ${BINDIR}/cpio + +.include <bsd.prog.mk> diff --git a/bin/pax/ar_io.c b/bin/pax/ar_io.c new file mode 100644 index 0000000..ddbd36e --- /dev/null +++ b/bin/pax/ar_io.c @@ -0,0 +1,1288 @@ +/* $OpenBSD: ar_io.c,v 1.63 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: ar_io.c,v 1.5 1996/03/26 23:54:13 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/mtio.h> +#include <sys/wait.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * Routines which deal directly with the archive I/O device/file. + */ + +#define DMOD 0666 /* default mode of created archives */ +#define EXT_MODE O_RDONLY /* open mode for list/extract */ +#define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */ +#define APP_MODE O_RDWR /* mode for append */ +#define STDO "<STDOUT>" /* pseudo name for stdout */ +#define STDN "<STDIN>" /* pseudo name for stdin */ +static int arfd = -1; /* archive file descriptor */ +static int artyp = ISREG; /* archive type: file/FIFO/tape */ +static int arvol = 1; /* archive volume number */ +static int lstrval = -1; /* return value from last i/o */ +static int io_ok; /* i/o worked on volume after resync */ +static int did_io; /* did i/o ever occur on volume? */ +static int done; /* set via tty termination */ +static struct stat arsb; /* stat of archive device at open */ +static int invld_rec; /* tape has out of spec record size */ +static int wr_trail = 1; /* trailer was rewritten in append */ +static int can_unlnk = 0; /* do we unlink null archives? */ +const char *arcname; /* printable name of archive */ +const char *gzip_program; /* name of gzip program */ +static pid_t zpid = -1; /* pid of child process */ +int force_one_volume; /* 1 if we ignore volume changes */ + +static int get_phys(void); +extern sigset_t s_mask; +static void ar_start_gzip(int, const char *, int); + +/* + * ar_open() + * Opens the next archive volume. Determines the type of the device and + * sets up block sizes as required by the archive device and the format. + * Note: we may be called with name == NULL on the first open only. + * Return: + * -1 on failure, 0 otherwise + */ + +int +ar_open(const char *name) +{ + struct mtget mb; + + if (arfd != -1) + (void)close(arfd); + arfd = -1; + can_unlnk = did_io = io_ok = invld_rec = 0; + artyp = ISREG; + flcnt = 0; + + /* + * open based on overall operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + if (name == NULL) { + arfd = STDIN_FILENO; + arcname = STDN; + } else if ((arfd = open(name, EXT_MODE, DMOD)) == -1) + syswarn(1, errno, "Failed open to read on %s", name); + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 0); + break; + case ARCHIVE: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, AR_MODE, DMOD)) == -1) + syswarn(1, errno, "Failed open to write on %s", name); + else + can_unlnk = 1; + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 1); + break; + case APPND: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, APP_MODE, DMOD)) == -1) + syswarn(1, errno, "Failed open to read/write on %s", + name); + break; + case COPY: + /* + * arfd not used in COPY mode + */ + arcname = "<NONE>"; + lstrval = 1; + return(0); + } + if (arfd < 0) + return(-1); + + if (chdname != NULL) + if (chdir(chdname) == -1) { + syswarn(1, errno, "Failed chdir to %s", chdname); + return(-1); + } + /* + * set up is based on device type + */ + if (fstat(arfd, &arsb) == -1) { + syswarn(1, errno, "Failed stat on %s", arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + if (S_ISDIR(arsb.st_mode)) { + paxwarn(0, "Cannot write an archive on top of a directory %s", + arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + + if (S_ISCHR(arsb.st_mode)) + artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE; + else if (S_ISBLK(arsb.st_mode)) + artyp = ISBLK; + else if ((lseek(arfd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)) + artyp = ISPIPE; + else + artyp = ISREG; + + /* + * make sure beyond any doubt that we can unlink only regular files + * we created + */ + if (artyp != ISREG) + can_unlnk = 0; + /* + * if we are writing, we are done + */ + if (act == ARCHIVE) { + blksz = rdblksz = wrblksz; + lstrval = 1; + return(0); + } + + /* + * set default blksz on read. APPNDs writes rdblksz on the last volume + * On all new archive volumes, we shift to wrblksz (if the user + * specified one, otherwise we will continue to use rdblksz). We + * must set blocksize based on what kind of device the archive is + * stored. + */ + switch (artyp) { + case ISTAPE: + /* + * Tape drives come in at least two flavors. Those that support + * variable sized records and those that have fixed sized + * records. They must be treated differently. For tape drives + * that support variable sized records, we must make large + * reads to make sure we get the entire record, otherwise we + * will just get the first part of the record (up to size we + * asked). Tapes with fixed sized records may or may not return + * multiple records in a single read. We really do not care + * what the physical record size is UNLESS we are going to + * append. (We will need the physical block size to rewrite + * the trailer). Only when we are appending do we go to the + * effort to figure out the true PHYSICAL record size. + */ + blksz = rdblksz = MAXBLK; + break; + case ISPIPE: + case ISBLK: + case ISCHR: + /* + * Blocksize is not a major issue with these devices (but must + * be kept a multiple of 512). If the user specified a write + * block size, we use that to read. Under append, we must + * always keep blksz == rdblksz. Otherwise we go ahead and use + * the device optimal blocksize as (and if) returned by stat + * and if it is within pax specs. + */ + if ((act == APPND) && wrblksz) { + blksz = rdblksz = wrblksz; + break; + } + + if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) && + ((arsb.st_blksize % BLKMULT) == 0)) + rdblksz = arsb.st_blksize; + else + rdblksz = DEVBLK; + /* + * For performance go for large reads when we can without harm + */ + if ((act == APPND) || (artyp == ISCHR)) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + case ISREG: + /* + * if the user specified wrblksz works, use it. Under appends + * we must always keep blksz == rdblksz + */ + if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){ + blksz = rdblksz = wrblksz; + break; + } + /* + * See if we can find the blocking factor from the file size + */ + for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT) + if ((arsb.st_size % rdblksz) == 0) + break; + /* + * When we cannot find a match, we may have a flawed archive. + */ + if (rdblksz <= 0) + rdblksz = FILEBLK; + /* + * for performance go for large reads when we can + */ + if (act == APPND) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + default: + /* + * should never happen, worst case, slow... + */ + blksz = rdblksz = BLKMULT; + break; + } + lstrval = 1; + return(0); +} + +/* + * ar_close(int int_sig) + * closes archive device, increments volume number, and prints i/o summary + * If in_sig is set we're in a signal handler and can't flush stdio. + */ +void +ar_close(int in_sig) +{ + int status; + + if (arfd < 0) { + did_io = io_ok = flcnt = 0; + return; + } + if (!in_sig) + fflush(listf); + + /* + * Close archive file. This may take a LONG while on tapes (we may be + * forced to wait for the rewind to complete) so tell the user what is + * going on (this avoids the user hitting control-c thinking pax is + * broken). + */ + if (vflag && (artyp == ISTAPE)) { + (void)dprintf(listfd, + "%s%s: Waiting for tape drive close to complete...", + vfpart ? "\n" : "", argv0); + } + + /* + * if nothing was written to the archive (and we created it), we remove + * it + */ + if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) && + (arsb.st_size == 0)) { + (void)unlink(arcname); + can_unlnk = 0; + } + + /* + * for a quick extract/list, pax frequently exits before the child + * process is done + */ + if ((act == LIST || act == EXTRACT) && nflag && zpid > 0) { + kill(zpid, SIGINT); + zpid = -1; + } + + (void)close(arfd); + + /* Do not exit before child to ensure data integrity */ + if (zpid > 0) { + waitpid(zpid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + exit_val = 1; + } + + + if (vflag && (artyp == ISTAPE)) { + (void)write(listfd, "done.\n", sizeof("done.\n")-1); + vfpart = 0; + } + arfd = -1; + + if (!io_ok && !did_io) { + flcnt = 0; + return; + } + did_io = io_ok = 0; + + /* + * The volume number is only increased when the last device has data + * and we have already determined the archive format. + */ + if (frmt != NULL) + ++arvol; + + if (!vflag) { + flcnt = 0; + return; + } + + /* + * Print out a summary of I/O for this archive volume. + */ + if (vfpart) { + (void)write(listfd, "\n", 1); + vfpart = 0; + } + + /* + * If we have not determined the format yet, we just say how many bytes + * we have skipped over looking for a header to id. there is no way we + * could have written anything yet. + */ + if (frmt == NULL) { + (void)dprintf(listfd, + "%s: unknown format, %llu bytes skipped.\n", argv0, rdcnt); + flcnt = 0; + return; + } + + if (op_mode == OP_PAX) + (void)dprintf(listfd, "%s: %s vol %d, %lu files," + " %llu bytes read, %llu bytes written.\n", + argv0, frmt->name, arvol-1, flcnt, rdcnt, wrcnt); +#ifndef NOCPIO + else if (op_mode == OP_CPIO) + (void)dprintf(listfd, "%llu blocks\n", + (rdcnt ? rdcnt : wrcnt) / 5120); +#endif /* !NOCPIO */ + flcnt = 0; +} + +/* + * ar_drain() + * drain any archive format independent padding from an archive read + * from a socket or a pipe. This is to prevent the process on the + * other side of the pipe from getting a SIGPIPE (pax will stop + * reading an archive once a format dependent trailer is detected). + */ +void +ar_drain(void) +{ + int res; + char drbuf[MAXBLK]; + + /* + * we only drain from a pipe/socket. Other devices can be closed + * without reading up to end of file. We sure hope that pipe is closed + * on the other side so we will get an EOF. + */ + if ((artyp != ISPIPE) || (lstrval <= 0)) + return; + + /* + * keep reading until pipe is drained + */ + while ((res = read(arfd, drbuf, sizeof(drbuf))) > 0) + continue; + lstrval = res; +} + +/* + * ar_set_wr() + * Set up device right before switching from read to write in an append. + * device dependent code (if required) to do this should be added here. + * For all archive devices we are already positioned at the place we want + * to start writing when this routine is called. + * Return: + * 0 if all ready to write, -1 otherwise + */ + +int +ar_set_wr(void) +{ + off_t cpos; + + /* + * we must make sure the trailer is rewritten on append, ar_next() + * will stop us if the archive containing the trailer was not written + */ + wr_trail = 0; + + /* + * Add any device dependent code as required here + */ + if (artyp != ISREG) + return(0); + /* + * Ok we have an archive in a regular file. If we were rewriting a + * file, we must get rid of all the stuff after the current offset + * (it was not written by pax). + */ + if (((cpos = lseek(arfd, 0, SEEK_CUR)) < 0) || + (ftruncate(arfd, cpos) == -1)) { + syswarn(1, errno, "Unable to truncate archive file"); + return(-1); + } + return(0); +} + +/* + * ar_app_ok() + * check if the last volume in the archive allows appends. We cannot check + * this until we are ready to write since there is no spec that says all + * volumes in a single archive have to be of the same type... + * Return: + * 0 if we can append, -1 otherwise. + */ + +int +ar_app_ok(void) +{ + if (artyp == ISPIPE) { + paxwarn(1, "Cannot append to an archive obtained from a pipe."); + return(-1); + } + + if (!invld_rec) + return(0); + paxwarn(1,"Cannot append, device record size %d does not support %s spec", + rdblksz, argv0); + return(-1); +} + +/* + * ar_read() + * read up to a specified number of bytes from the archive into the + * supplied buffer. When dealing with tapes we may not always be able to + * read what we want. + * Return: + * Number of bytes in buffer. 0 for end of file, -1 for a read error. + */ + +int +ar_read(char *buf, int cnt) +{ + int res = 0; + + /* + * if last i/o was in error, no more reads until reset or new volume + */ + if (lstrval <= 0) + return(lstrval); + + /* + * how we read must be based on device type + */ + switch (artyp) { + case ISTAPE: + if ((res = read(arfd, buf, cnt)) > 0) { + /* + * CAUTION: tape systems may not always return the same + * sized records so we leave blksz == MAXBLK. The + * physical record size that a tape drive supports is + * very hard to determine in a uniform and portable + * manner. + */ + io_ok = 1; + if (res != rdblksz) { + /* + * Record size changed. If this happens on + * any record after the first, we probably have + * a tape drive which has a fixed record size + * (we are getting multiple records in a single + * read). Watch out for record blocking that + * violates pax spec (must be a multiple of + * BLKMULT). + */ + rdblksz = res; + if (rdblksz % BLKMULT) + invld_rec = 1; + } + return(res); + } + break; + case ISREG: + case ISBLK: + case ISCHR: + case ISPIPE: + default: + /* + * Files are so easy to deal with. These other things cannot + * be trusted at all. So when we are dealing with character + * devices and pipes we just take what they have ready for us + * and return. Trying to do anything else with them runs the + * risk of failure. + */ + if ((res = read(arfd, buf, cnt)) > 0) { + io_ok = 1; + return(res); + } + break; + } + + /* + * We are in trouble at this point, something is broken... + */ + lstrval = res; + if (res < 0) + syswarn(1, errno, "Failed read on archive volume %d", arvol); + else + paxwarn(0, "End of archive volume %d reached", arvol); + return(res); +} + +/* + * ar_write() + * Write a specified number of bytes in supplied buffer to the archive + * device so it appears as a single "block". Deals with errors and tries + * to recover when faced with short writes. + * Return: + * Number of bytes written. 0 indicates end of volume reached and with no + * flaws (as best that can be detected). A -1 indicates an unrecoverable + * error in the archive occurred. + */ + +int +ar_write(char *buf, int bsz) +{ + ssize_t res; + off_t cpos; + + /* + * do not allow pax to create a "bad" archive. Once a write fails on + * an archive volume prevent further writes to it. + */ + if (lstrval <= 0) + return(lstrval); + + if ((res = write(arfd, buf, bsz)) == bsz) { + wr_trail = 1; + io_ok = 1; + return(bsz); + } + /* + * write broke, see what we can do with it. We try to send any partial + * writes that may violate pax spec to the next archive volume. + */ + if (res == -1) + lstrval = res; + else + lstrval = 0; + + switch (artyp) { + case ISREG: + if ((res > 0) && (res % BLKMULT)) { + /* + * try to fix up partial writes which are not BLKMULT + * in size by forcing the runt record to next archive + * volume + */ + if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) + break; + cpos -= res; + if (ftruncate(arfd, cpos) == -1) + break; + res = lstrval = 0; + break; + } + if (res >= 0) + break; + /* + * if file is out of space, handle it like a return of 0 + */ + if ((errno == ENOSPC) || (errno == EFBIG) || (errno == EDQUOT)) + res = lstrval = 0; + break; + case ISTAPE: + case ISCHR: + case ISBLK: + if (res >= 0) + break; + if (errno == EACCES) { + paxwarn(0, "Write failed, archive is write protected."); + res = lstrval = 0; + return(0); + } + /* + * see if we reached the end of media, if so force a change to + * the next volume + */ + if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO)) + res = lstrval = 0; + break; + case ISPIPE: + default: + /* + * we cannot fix errors to these devices + */ + break; + } + + /* + * Better tell the user the bad news... + * if this is a block aligned archive format, we may have a bad archive + * if the format wants the header to start at a BLKMULT boundary. While + * we can deal with the mis-aligned data, it violates spec and other + * archive readers will likely fail. if the format is not block + * aligned, the user may be lucky (and the archive is ok). + */ + if (res >= 0) { + if (res > 0) + wr_trail = 1; + io_ok = 1; + } + + /* + * If we were trying to rewrite the trailer and it didn't work, we + * must quit right away. + */ + if (!wr_trail && (res <= 0)) { + paxwarn(1,"Unable to append, trailer re-write failed. Quitting."); + return(res); + } + + if (res == 0) + paxwarn(0, "End of archive volume %d reached", arvol); + else if (res < 0) + syswarn(1, errno, "Failed write to archive volume: %d", arvol); + else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0)) + paxwarn(0,"WARNING: partial archive write. Archive MAY BE FLAWED"); + else + paxwarn(1,"WARNING: partial archive write. Archive IS FLAWED"); + return(res); +} + +/* + * ar_rdsync() + * Try to move past a bad spot on a flawed archive as needed to continue + * I/O. Clears error flags to allow I/O to continue. + * Return: + * 0 when ok to try i/o again, -1 otherwise. + */ + +int +ar_rdsync(void) +{ + long fsbz; + off_t cpos; + off_t mpos; + struct mtop mb; + + /* + * Fail resync attempts at user request (done) or if this is going to be + * an update/append to a existing archive. if last i/o hit media end, + * we need to go to the next volume not try a resync + */ + if ((done > 0) || (lstrval == 0)) + return(-1); + + if ((act == APPND) || (act == ARCHIVE)) { + paxwarn(1, "Cannot allow updates to an archive with flaws."); + return(-1); + } + if (io_ok) + did_io = 1; + + switch (artyp) { + case ISTAPE: + /* + * if the last i/o was a successful data transfer, we assume + * the fault is just a bad record on the tape that we are now + * past. If we did not get any data since the last resync try + * to move the tape forward one PHYSICAL record past any + * damaged tape section. Some tape drives are stubborn and need + * to be pushed. + */ + if (io_ok) { + io_ok = 0; + lstrval = 1; + break; + } + mb.mt_op = MTFSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) + break; + lstrval = 1; + break; + case ISREG: + case ISCHR: + case ISBLK: + /* + * try to step over the bad part of the device. + */ + io_ok = 0; + if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG)) + fsbz = BLKMULT; + if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) + break; + mpos = fsbz - (cpos % fsbz); + if (lseek(arfd, mpos, SEEK_CUR) == -1) + break; + lstrval = 1; + break; + case ISPIPE: + default: + /* + * cannot recover on these archive device types + */ + io_ok = 0; + break; + } + if (lstrval <= 0) { + paxwarn(1, "Unable to recover from an archive read failure."); + return(-1); + } + paxwarn(0, "Attempting to recover from an archive read failure."); + return(0); +} + +/* + * ar_fow() + * Move the I/O position within the archive forward the specified number of + * bytes as supported by the device. If we cannot move the requested + * number of bytes, return the actual number of bytes moved in skipped. + * Return: + * 0 if moved the requested distance, -1 on complete failure, 1 on + * partial move (the amount moved is in skipped) + */ + +int +ar_fow(off_t sksz, off_t *skipped) +{ + off_t cpos; + off_t mpos; + + *skipped = 0; + if (sksz <= 0) + return(0); + + /* + * we cannot move forward at EOF or error + */ + if (lstrval <= 0) + return(lstrval); + + /* + * Safer to read forward on devices where it is hard to find the end of + * the media without reading to it. With tapes we cannot be sure of the + * number of physical blocks to skip (we do not know physical block + * size at this point), so we must only read forward on tapes! + */ + if (artyp != ISREG) + return(0); + + /* + * figure out where we are in the archive + */ + if ((cpos = lseek(arfd, 0, SEEK_CUR)) >= 0) { + /* + * we can be asked to move farther than there are bytes in this + * volume, if so, just go to file end and let normal buf_fill() + * deal with the end of file (it will go to next volume by + * itself) + */ + if ((mpos = cpos + sksz) > arsb.st_size) { + *skipped = arsb.st_size - cpos; + mpos = arsb.st_size; + } else + *skipped = sksz; + if (lseek(arfd, mpos, SEEK_SET) >= 0) + return(0); + } + syswarn(1, errno, "Forward positioning operation on archive failed"); + lstrval = -1; + return(-1); +} + +/* + * ar_rev() + * move the i/o position within the archive backwards the specified byte + * count as supported by the device. With tapes drives we RESET rdblksz to + * the PHYSICAL blocksize. + * NOTE: We should only be called to move backwards so we can rewrite the + * last records (the trailer) of an archive (APPEND). + * Return: + * 0 if moved the requested distance, -1 on complete failure + */ + +int +ar_rev(off_t sksz) +{ + off_t cpos; + struct mtop mb; + int phyblk; + + /* + * make sure we do not have try to reverse on a flawed archive + */ + if (lstrval < 0) + return(lstrval); + + switch (artyp) { + case ISPIPE: + if (sksz <= 0) + break; + /* + * cannot go backwards on these critters + */ + paxwarn(1, "Reverse positioning on pipes is not supported."); + lstrval = -1; + return(-1); + case ISREG: + case ISBLK: + case ISCHR: + default: + if (sksz <= 0) + break; + + /* + * For things other than files, backwards movement has a very + * high probability of failure as we really do not know the + * true attributes of the device we are talking to (the device + * may not even have the ability to lseek() in any direction). + * First we figure out where we are in the archive. + */ + if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) { + syswarn(1, errno, + "Unable to obtain current archive byte offset"); + lstrval = -1; + return(-1); + } + + /* + * we may try to go backwards past the start when the archive + * is only a single record. If this happens and we are on a + * multi-volume archive, we need to go to the end of the + * previous volume and continue our movement backwards from + * there. + */ + if ((cpos -= sksz) < 0) { + if (arvol > 1) { + /* + * this should never happen + */ + paxwarn(1,"Reverse position on previous volume."); + lstrval = -1; + return(-1); + } + cpos = 0; + } + if (lseek(arfd, cpos, SEEK_SET) == -1) { + syswarn(1, errno, "Unable to seek archive backwards"); + lstrval = -1; + return(-1); + } + break; + case ISTAPE: + /* + * Calculate and move the proper number of PHYSICAL tape + * blocks. If the sksz is not an even multiple of the physical + * tape size, we cannot do the move (this should never happen). + * (We also cannot handle trailers spread over two vols.) + * get_phys() also makes sure we are in front of the filemark. + */ + if ((phyblk = get_phys()) <= 0) { + lstrval = -1; + return(-1); + } + + /* + * make sure future tape reads only go by physical tape block + * size (set rdblksz to the real size). + */ + rdblksz = phyblk; + + /* + * if no movement is required, just return (we must be after + * get_phys() so the physical blocksize is properly set) + */ + if (sksz <= 0) + break; + + /* + * ok we have to move. Make sure the tape drive can do it. + */ + if (sksz % phyblk) { + paxwarn(1, + "Tape drive unable to backspace requested amount"); + lstrval = -1; + return(-1); + } + + /* + * move backwards the requested number of bytes + */ + mb.mt_op = MTBSR; + mb.mt_count = sksz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1,errno, "Unable to backspace tape %d blocks.", + mb.mt_count); + lstrval = -1; + return(-1); + } + break; + } + lstrval = 1; + return(0); +} + +/* + * get_phys() + * Determine the physical block size on a tape drive. We need the physical + * block size so we know how many bytes we skip over when we move with + * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when + * return. + * This is one really SLOW routine... + * Return: + * physical block size if ok (ok > 0), -1 otherwise + */ + +static int +get_phys(void) +{ + int padsz = 0; + int res; + int phyblk; + struct mtop mb; + char scbuf[MAXBLK]; + + /* + * move to the file mark, and then back up one record and read it. + * this should tell us the physical record size the tape is using. + */ + if (lstrval == 1) { + /* + * we know we are at file mark when we get back a 0 from + * read() + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + padsz += res; + if (res == -1) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + } + + /* + * move backwards over the file mark so we are at the end of the + * last record. + */ + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * move backwards so we are in front of the last record and read it to + * get physical tape blocksize. + */ + mb.mt_op = MTBSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1, errno, "Unable to backspace over last tape block."); + return(-1); + } + if ((phyblk = read(arfd, scbuf, sizeof(scbuf))) <= 0) { + syswarn(1, errno, "Cannot determine archive tape blocksize."); + return(-1); + } + + /* + * read forward to the file mark, then back up in front of the filemark + * (this is a bit paranoid, but should be safe to do). + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + continue; + if (res == -1) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * set lstrval so we know that the filemark has not been seen + */ + lstrval = 1; + + /* + * return if there was no padding + */ + if (padsz == 0) + return(phyblk); + + /* + * make sure we can move backwards over the padding. (this should + * never fail). + */ + if (padsz % phyblk) { + paxwarn(1, "Tape drive unable to backspace requested amount"); + return(-1); + } + + /* + * move backwards over the padding so the head is where it was when + * we were first called (if required). + */ + mb.mt_op = MTBSR; + mb.mt_count = padsz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1,errno,"Unable to backspace tape over %d pad blocks", + mb.mt_count); + return(-1); + } + return(phyblk); +} + +/* + * ar_next() + * prompts the user for the next volume in this archive. For some devices + * we may allow the media to be changed. Otherwise a new archive is + * prompted for. By pax spec, if there is no controlling tty or an eof is + * read on tty input, we must quit pax. + * Return: + * 0 when ready to continue, -1 when all done + */ + +int +ar_next(void) +{ + char buf[PAXPATHLEN+2]; + static int freeit = 0; + sigset_t o_mask; + + /* + * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so + * things like writing EOF etc will be done) (Watch out ar_close() can + * also be called via a signal handler, so we must prevent a race. + */ + if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) == -1) + syswarn(0, errno, "Unable to set signal mask"); + ar_close(0); + if (sigprocmask(SIG_SETMASK, &o_mask, NULL) == -1) + syswarn(0, errno, "Unable to restore signal mask"); + + if (done || !wr_trail || force_one_volume || op_mode == OP_TAR) + return(-1); + + tty_prnt("\nATTENTION! %s archive volume change required.\n", argv0); + + /* + * if i/o is on stdin or stdout, we cannot reopen it (we do not know + * the name), the user will be forced to type it in. + */ + if (strcmp(arcname, STDO) && strcmp(arcname, STDN) && (artyp != ISREG) + && (artyp != ISPIPE)) { + if (artyp == ISTAPE) { + tty_prnt("%s ready for archive tape volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT TAPE on the tape drive"); + } else { + tty_prnt("%s ready for archive volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT STORAGE MEDIA (if required)"); + } + + if ((act == ARCHIVE) || (act == APPND)) + tty_prnt(" and make sure it is WRITE ENABLED.\n"); + else + tty_prnt("\n"); + + for (;;) { + tty_prnt("Type \"y\" to continue, \".\" to quit %s,", + argv0); + tty_prnt(" or \"s\" to switch to new device.\nIf you"); + tty_prnt(" cannot change storage media, type \"s\"\n"); + tty_prnt("Is the device ready and online? > "); + + if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){ + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + + if ((buf[0] == '\0') || (buf[1] != '\0')) { + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + + switch (buf[0]) { + case 'y': + case 'Y': + /* + * we are to continue with the same device + */ + if (ar_open(arcname) >= 0) + return(0); + tty_prnt("Cannot re-open %s, try again\n", + arcname); + continue; + case 's': + case 'S': + /* + * user wants to open a different device + */ + tty_prnt("Switching to a different archive\n"); + break; + default: + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + break; + } + } else + tty_prnt("Ready for archive volume: %d\n", arvol); + + /* + * have to go to a different archive + */ + for (;;) { + tty_prnt("Input archive name or \".\" to quit %s.\n", argv0); + tty_prnt("Archive name > "); + + if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) { + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + if (buf[0] == '\0') { + tty_prnt("Empty file name, try again\n"); + continue; + } + if (!strcmp(buf, "..")) { + tty_prnt("Illegal file name: .. try again\n"); + continue; + } + if (strlen(buf) > PAXPATHLEN) { + tty_prnt("File name too long, try again\n"); + continue; + } + + /* + * try to open new archive + */ + if (ar_open(buf) >= 0) { + if (freeit) { + free((char *)arcname); + freeit = 0; + } + if ((arcname = strdup(buf)) == NULL) { + done = 1; + lstrval = -1; + paxwarn(0, "Cannot save archive name."); + return(-1); + } + freeit = 1; + break; + } + tty_prnt("Cannot open %s, try again\n", buf); + continue; + } + return(0); +} + +/* + * ar_start_gzip() + * starts the gzip compression/decompression process as a child, using magic + * to keep the fd the same in the calling function (parent). + */ +void +ar_start_gzip(int fd, const char *path, int wr) +{ + int fds[2]; + const char *gzip_flags; + + if (pipe(fds) == -1) + err(1, "could not pipe"); + zpid = fork(); + if (zpid == -1) + err(1, "could not fork"); + + /* parent */ + if (zpid) { + if (wr) + dup2(fds[1], fd); + else + dup2(fds[0], fd); + close(fds[0]); + close(fds[1]); + + if (pmode == 0 || (act != EXTRACT && act != COPY)) { + if (pledge("stdio rpath wpath cpath fattr dpath getpw proc tape", + NULL) == -1) + err(1, "pledge"); + } + } else { + if (wr) { + dup2(fds[0], STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + gzip_flags = "-c"; + } else { + dup2(fds[1], STDOUT_FILENO); + dup2(fd, STDIN_FILENO); + gzip_flags = "-dc"; + } + close(fds[0]); + close(fds[1]); + + /* System compressors are more likely to use pledge(2) */ + putenv("PATH=/usr/bin:/usr/local/bin"); + + if (execlp(path, path, gzip_flags, (char *)NULL) == -1) + err(1, "could not exec %s", path); + /* NOTREACHED */ + } +} diff --git a/bin/pax/ar_subs.c b/bin/pax/ar_subs.c new file mode 100644 index 0000000..f0a55ab --- /dev/null +++ b/bin/pax/ar_subs.c @@ -0,0 +1,1277 @@ +/* $OpenBSD: ar_subs.c,v 1.49 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include "pax.h" +#include "extern.h" + +static void wr_archive(ARCHD *, int is_app); +static int get_arc(void); +static int next_head(ARCHD *); +static int rd_gnu_string(ARCHD *); +extern sigset_t s_mask; + +/* + * Routines which control the overall operation modes of pax as specified by + * the user: list, append, read ... + */ + +static char hdbuf[BLKMULT]; /* space for archive header on read */ +u_long flcnt; /* number of files processed */ + +/* + * list() + * list the contents of an archive which match user supplied pattern(s) + * (no pattern matches all). + */ + +void +list(void) +{ + ARCHD *arcn; + int res; + ARCHD archd; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine. We + * also save current time for ls_list() so we do not make a system + * call for each file we need to print. If verbose (vflag) start up + * the name and group caches. + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0)) + return; + + now = time(NULL); + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + if (rd_gnu_string(arcn)) + continue; + + /* + * check for pattern, and user specified options match. + * When all patterns are matched we are done. + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res == 0) && (sel_chk(arcn) == 0)) { + /* + * pattern resulted in a selected file + */ + if (pat_sel(arcn) < 0) + break; + + /* + * modify the name as requested by the user if name + * survives modification, do a listing of the file + */ + if ((res = mod_name(arcn)) < 0) + break; + if (res == 0) + ls_list(arcn, now, stdout); + } + + /* + * skip to next archive format header using values calculated + * by the format header read routine + */ + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + } + + /* + * all done, let format have a chance to cleanup, and make sure that + * the patterns supplied by the user were all matched + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + pat_chk(); +} + +static int +cmp_file_times(int mtime_flag, int ctime_flag, ARCHD *arcn, struct stat *sbp) +{ + struct stat sb; + + if (sbp == NULL) { + if (lstat(arcn->name, &sb) != 0) + return (0); + sbp = &sb; + } + + if (ctime_flag && mtime_flag) + return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=) && + timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=)); + else if (ctime_flag) + return (timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=)); + else + return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=)); +} + +/* + * extract() + * extract the member(s) of an archive as specified by user supplied + * pattern(s) (no patterns extracts all members) + */ + +void +extract(void) +{ + ARCHD *arcn; + int res; + off_t cnt; + ARCHD archd; + int fd; + time_t now; + + sltab_start(); + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine; + * start up the directory modification time and access mode database + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + now = time(NULL); + + /* + * step through each entry on the archive until the format read routine + * says it is done + */ + while (next_head(arcn) == 0) { + if (rd_gnu_string(arcn)) + continue; + + /* + * check for pattern, and user specified options match. When + * all the patterns are matched we are done + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res > 0) || (sel_chk(arcn) != 0)) { + /* + * file is not selected. skip past any file data and + * padding and go back for the next archive member + */ + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * with -u or -D only extract when the archive member is newer + * than the file with the same name in the file system (no + * test of being the same type is required). + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this operation can be confusing to the + * user who might expect the test to be done on an existing + * file AFTER the name mod. In honesty the pax spec is probably + * flawed in this respect. + */ + if ((uflag || Dflag) && + cmp_file_times(uflag, Dflag, arcn, NULL)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * this archive member is now been selected. modify the name. + */ + if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if (res > 0) { + /* + * a bad name mod, skip and purge name from link table + */ + purg_lnk(arcn); + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * Non standard -Y and -Z flag. When the existing file is + * same age or newer skip + */ + if ((Yflag || Zflag) && + cmp_file_times(Yflag, Zflag, arcn, NULL)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + } + + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + if (chdir(arcn->pat->chdname) != 0) + syswarn(1, errno, "Cannot chdir to %s", + arcn->pat->chdname); + + /* + * all ok, extract this member based on type + */ + if (!PAX_IS_REG(arcn->type)) { + /* + * process archive members that are not regular files. + * throw out padding and any data that might follow the + * header (as determined by the format). + */ + if (PAX_IS_HARDLINK(arcn->type)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + + (void)rd_skip(arcn->skip + arcn->pad); + if (res < 0) + purg_lnk(arcn); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + goto popd; + } + /* + * we have a file with data here. If we can not create it, skip + * over the data and purge the name from hard link table + */ + if ((fd = file_creat(arcn)) < 0) { + (void)rd_skip(arcn->skip + arcn->pad); + purg_lnk(arcn); + goto popd; + } + /* + * extract the file from the archive and skip over padding and + * any unprocessed data + */ + res = rd_wrfile(arcn, fd, &cnt); + file_close(arcn, fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (!res) + (void)rd_skip(cnt + arcn->pad); + +popd: + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + if (fchdir(cwdfd) != 0) + syswarn(1, errno, + "Can't fchdir to starting directory"); + } + + /* + * all done, restore directory modes and times as required; make sure + * all patterns supplied by the user were matched; block off signals + * to avoid chance for multiple entry into the cleanup code. + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + sltab_process(0); + proc_dir(0); + pat_chk(); +} + +/* + * wr_archive() + * Write an archive. used in both creating a new archive and appends on + * previously written archive. + */ + +static void +wr_archive(ARCHD *arcn, int is_app) +{ + int res; + int hlk; + int wr_one; + off_t cnt; + int (*wrf)(ARCHD *); + int fd = -1; + time_t now; + + /* + * if this format supports hard link storage, start up the database + * that detects them. + */ + if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) + return; + + /* + * if this is not append, and there are no files, we do not write a + * trailer + */ + wr_one = is_app; + + /* + * start up the file traversal code and format specific write + */ + if (ftree_start() < 0) { + if (is_app) + goto trailer; + return; + } else if (((*frmt->st_wr)() < 0)) + return; + + wrf = frmt->wr; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + now = time(NULL); + + /* + * while there are files to archive, process them one at at time + */ + while (next_file(arcn) == 0) { + /* + * check if this file meets user specified options match. + */ + if (sel_chk(arcn) != 0) + continue; + fd = -1; + if (uflag) { + /* + * only archive if this file is newer than a file with + * the same name that is already stored on the archive + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + ftree_skipped_newer(arcn); + continue; + } + } + + /* + * this file is considered selected now. see if this is a hard + * link to a file already stored + */ + ftree_sel(arcn); + if (hlk && (chk_lnk(arcn) < 0)) + break; + + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) { + /* + * we will have to read this file. by opening it now we + * can avoid writing a header to the archive for a file + * we were later unable to read (we also purge it from + * the link table). + */ + if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1,errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + } + + /* + * Now modify the name as requested by the user + */ + if ((res = mod_name(arcn)) < 0) { + /* + * name modification says to skip this file, close the + * file and purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + break; + } + + if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { + /* + * unable to obtain the crc we need, close the file, + * purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + continue; + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + } + ++flcnt; + + /* + * looks safe to store the file, have the format specific + * routine write routine store the file header on the archive + */ + if ((res = (*wrf)(arcn)) < 0) { + rdfile_close(arcn, &fd); + break; + } + wr_one = 1; + if (res > 0) { + /* + * format write says no file data needs to be stored + * so we are done messing with this file + */ + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + rdfile_close(arcn, &fd); + continue; + } + + /* + * Add file data to the archive, quit on write error. if we + * cannot write the entire file contents to the archive we + * must pad the archive to replace the missing file data + * (otherwise during an extract the file header for the file + * which FOLLOWS this one will not be where we expect it to + * be). + */ + res = wr_rdfile(arcn, fd, &cnt); + rdfile_close(arcn, &fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (res < 0) + break; + + /* + * pad as required, cnt is number of bytes not written + */ + if (((cnt > 0) && (wr_skip(cnt) < 0)) || + ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) + break; + } + +trailer: + /* + * tell format to write trailer; pad to block boundary; reset directory + * mode/access times, and check if all patterns supplied by the user + * were matched. block off signals to avoid chance for multiple entry + * into the cleanup code + */ + if (wr_one) { + (*frmt->end_wr)(); + wr_fin(); + } + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + if (tflag) + proc_dir(0); + ftree_chk(); +} + +/* + * append() + * Add file to previously written archive. Archive format specified by the + * user must agree with archive. The archive is read first to collect + * modification times (if -u) and locate the archive trailer. The archive + * is positioned in front of the record with the trailer and wr_archive() + * is called to add the new members. + * PAX IMPLEMENTATION DETAIL NOTE: + * -u is implemented by adding the new members to the end of the archive. + * Care is taken so that these do not end up as links to the older + * version of the same file already stored in the archive. It is expected + * when extraction occurs these newer versions will over-write the older + * ones stored "earlier" in the archive (this may be a bad assumption as + * it depends on the implementation of the program doing the extraction). + * It is really difficult to splice in members without either re-writing + * the entire archive (from the point were the old version was), or having + * assistance of the format specification in terms of a special update + * header that invalidates a previous archive record. The posix spec left + * the method used to implement -u unspecified. This pax is able to + * over write existing files that it creates. + */ + +void +append(void) +{ + ARCHD *arcn; + int res; + ARCHD archd; + FSUB *orgfrmt; + int udev; + off_t tlen; + + arcn = &archd; + orgfrmt = frmt; + + /* + * Do not allow an append operation if the actual archive is of a + * different format than the user specified format. + */ + if (get_arc() < 0) + return; + if ((orgfrmt != NULL) && (orgfrmt != frmt)) { + paxwarn(1, "Cannot mix current archive format %s with %s", + frmt->name, orgfrmt->name); + return; + } + + /* + * pass the format any options and start up format + */ + if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) + return; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files we see. + */ + if (uflag && (ftime_start() < 0)) + return; + + /* + * some archive formats encode hard links by recording the device and + * file serial number (inode) but copy the file anyway (multiple times) + * to the archive. When we append, we run the risk that newly added + * files may have the same device and inode numbers as those recorded + * on the archive but during a previous run. If this happens, when the + * archive is extracted we get INCORRECT hard links. We avoid this by + * remapping the device numbers so that newly added files will never + * use the same device number as one found on the archive. remapping + * allows new members to safely have links among themselves. remapping + * also avoids problems with file inode (serial number) truncations + * when the inode number is larger than storage space in the archive + * header. See the remap routines for more details. + */ + if ((udev = frmt->udev) && (dev_start() < 0)) + return; + + /* + * reading the archive may take a long time. If verbose tell the user + */ + if (vflag) { + (void)fprintf(listf, + "%s: Reading archive to position at the end...", argv0); + vfpart = 1; + } + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check if this file meets user specified options. + */ + if (sel_chk(arcn) != 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + + if (uflag) { + /* + * see if this is the newest version of this file has + * already been seen, if so skip. + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + } + + /* + * Store this device number. Device numbers seen during the + * read phase of append will cause newly appended files with a + * device number seen in the old part of the archive to be + * remapped to an unused device number. + */ + if ((udev && (add_dev(arcn) < 0)) || + (rd_skip(arcn->skip + arcn->pad) == 1)) + break; + } + + /* + * done, finish up read and get the number of bytes to back up so we + * can add new members. The format might have used the hard link table, + * purge it. + */ + tlen = (*frmt->end_rd)(); + lnk_end(); + + /* + * try to position for write, if this fails quit. if any error occurs, + * we will refuse to write + */ + if (appnd_start(tlen) < 0) + return; + + /* + * tell the user we are done reading. + */ + if (vflag && vfpart) { + (void)fputs("done.\n", listf); + vfpart = 0; + } + + /* + * go to the writing phase to add the new members + */ + wr_archive(arcn, 1); +} + +/* + * archive() + * write a new archive + */ + +void +archive(void) +{ + ARCHD archd; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files; set up for writing; pass the format any + * options write the archive + */ + if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) + return; + if ((*frmt->options)() < 0) + return; + + wr_archive(&archd, 0); +} + +/* + * copy() + * copy files from one part of the file system to another. this does not + * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an + * archive was written and then extracted in the destination directory + * (except the files are forced to be under the destination directory). + */ + +void +copy(void) +{ + ARCHD *arcn; + int res; + int fddest; + char *dest_pt; + size_t dlen; + size_t drem; + int fdsrc = -1; + struct stat sb; + ARCHD archd; + char dirbuf[PAXPATHLEN+1]; + + sltab_start(); + + arcn = &archd; + /* + * set up the destination dir path and make sure it is a directory. We + * make sure we have a trailing / on the destination + */ + dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf)); + if (dlen >= sizeof(dirbuf) || + (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) { + paxwarn(1, "directory name is too long %s", dirptr); + return; + } + dest_pt = dirbuf + dlen; + if (*(dest_pt-1) != '/') { + *dest_pt++ = '/'; + *dest_pt = '\0'; + ++dlen; + } + drem = PAXPATHLEN - dlen; + + if (stat(dirptr, &sb) == -1) { + syswarn(1, errno, "Cannot access destination directory %s", + dirptr); + return; + } + if (!S_ISDIR(sb.st_mode)) { + paxwarn(1, "Destination is not a directory %s", dirptr); + return; + } + + /* + * start up the hard link table; file traversal routines and the + * modification time and access mode database + */ + if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * set up to cp file trees + */ + cp_start(); + + /* + * while there are files to archive, process them + */ + while (next_file(arcn) == 0) { + fdsrc = -1; + + /* + * check if this file meets user specified options + */ + if (sel_chk(arcn) != 0) + continue; + + /* + * if there is already a file in the destination directory with + * the same name and it is newer, skip the one stored on the + * archive. + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this can be confusing to the user who + * might expect the test to be done on an existing file AFTER + * the name mod. In honesty the pax spec is probably flawed in + * this respect + */ + if (uflag || Dflag) { + /* + * create the destination name + */ + if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'), + drem + 1) > drem) { + paxwarn(1, "Destination pathname too long %s", + arcn->name); + continue; + } + + /* + * if existing file is same age or newer skip + */ + res = lstat(dirbuf, &sb); + *dest_pt = '\0'; + + if (res == 0) { + ftree_skipped_newer(arcn); + if (cmp_file_times(uflag, Dflag, arcn, &sb)) + continue; + } + } + + /* + * this file is considered selected. See if this is a hard link + * to a previous file; modify the name as requested by the + * user; set the final destination. + */ + ftree_sel(arcn); + if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { + /* + * skip file, purge from link table + */ + purg_lnk(arcn); + continue; + } + + /* + * Non standard -Y and -Z flag. When the existing file is + * same age or newer skip + */ + if ((Yflag || Zflag) && + cmp_file_times(Yflag, Zflag, arcn, NULL)) + continue; + + if (vflag) { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + ++flcnt; + + /* + * try to create a hard link to the src file if requested + * but make sure we are not trying to overwrite ourselves. + */ + if (lflag) + res = cross_lnk(arcn); + else + res = chk_same(arcn); + if (res <= 0) { + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to create a new file + */ + if (!PAX_IS_REG(arcn->type)) { + /* + * create a link or special file + */ + if (PAX_IS_HARDLINK(arcn->type)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + if (res < 0) + purg_lnk(arcn); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to copy a regular file to the destination directory. + * first open source file and then create the destination file + */ + if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1, errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + if ((fddest = file_creat(arcn)) < 0) { + rdfile_close(arcn, &fdsrc); + purg_lnk(arcn); + continue; + } + + /* + * copy source file data to the destination file + */ + cp_file(arcn, fdsrc, fddest); + file_close(arcn, fddest); + rdfile_close(arcn, &fdsrc); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + } + + /* + * restore directory modes and times as required; make sure all + * patterns were selected block off signals to avoid chance for + * multiple entry into the cleanup code. + */ + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + sltab_process(0); + proc_dir(0); + ftree_chk(); +} + +/* + * next_head() + * try to find a valid header in the archive. Uses format specific + * routines to extract the header and id the trailer. Trailers may be + * located within a valid header or in an invalid header (the location + * is format specific. The inhead field from the option table tells us + * where to look for the trailer). + * We keep reading (and resyncing) until we get enough contiguous data + * to check for a header. If we cannot find one, we shift by a byte + * add a new byte from the archive to the end of the buffer and try again. + * If we get a read error, we throw out what we have (as we must have + * contiguous data) and start over again. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if we got a header, -1 if we are unable to ever find another one + * (we reached the end of input, or we reached the limit on retries. see + * the specs for rd_wrbuf() for more details) + */ + +static int +next_head(ARCHD *arcn) +{ + int ret; + char *hdend; + int res; + int shftsz; + int hsz; + int in_resync = 0; /* set when we are in resync mode */ + int cnt = 0; /* counter for trailer function */ + int first = 1; /* on 1st read, EOF isn't premature. */ + + /* + * set up initial conditions, we want a whole frmt->hsz block as we + * have no data yet. + */ + res = hsz = frmt->hsz; + hdend = hdbuf; + shftsz = hsz - 1; + for (;;) { + /* + * keep looping until we get a contiguous FULL buffer + * (frmt->hsz is the proper size) + */ + for (;;) { + if ((ret = rd_wrbuf(hdend, res)) == res) + break; + + /* + * If we read 0 bytes (EOF) from an archive when we + * expect to find a header, we have stepped upon + * an archive without the customary block of zeroes + * end marker. It's just stupid to error out on + * them, so exit gracefully. + */ + if (first && ret == 0) + return(-1); + first = 0; + + /* + * some kind of archive read problem, try to resync the + * storage device, better give the user the bad news. + */ + if ((ret == 0) || (rd_sync() < 0)) { + paxwarn(1,"Premature end of file on archive read"); + return(-1); + } + if (!in_resync) { + if (act == APPND) { + paxwarn(1, + "Archive I/O error, cannot continue"); + return(-1); + } + paxwarn(1,"Archive I/O error. Trying to recover."); + ++in_resync; + } + + /* + * oh well, throw it all out and start over + */ + res = hsz; + hdend = hdbuf; + } + + /* + * ok we have a contiguous buffer of the right size. Call the + * format read routine. If this was not a valid header and this + * format stores trailers outside of the header, call the + * format specific trailer routine to check for a trailer. We + * have to watch out that we do not mis-identify file data or + * block padding as a header or trailer. Format specific + * trailer functions must NOT check for the trailer while we + * are running in resync mode. Some trailer functions may tell + * us that this block cannot contain a valid header either, so + * we then throw out the entire block and start over. + */ + if ((*frmt->rd)(arcn, hdbuf) == 0) + break; + + if (!frmt->inhead) { + /* + * this format has trailers outside of valid headers + */ + if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){ + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + if (ret == 1) { + /* + * we are in resync and we were told to throw + * the whole block out because none of the + * bytes in this block can be used to form a + * valid header + */ + res = hsz; + hdend = hdbuf; + continue; + } + } + + /* + * Brute force section. + * not a valid header. We may be able to find a header yet. So + * we shift over by one byte, and set up to read one byte at a + * time from the archive and place it at the end of the buffer. + * We will keep moving byte at a time until we find a header or + * get a read error and have to start over. + */ + if (!in_resync) { + if (act == APPND) { + paxwarn(1,"Unable to append, archive header flaw"); + return(-1); + } + paxwarn(1,"Invalid header, starting valid header search."); + ++in_resync; + } + memmove(hdbuf, hdbuf+1, shftsz); + res = 1; + hdend = hdbuf + shftsz; + } + + /* + * ok got a valid header, check for trailer if format encodes it in the + * the header. NOTE: the parameters are different than trailer routines + * which encode trailers outside of the header! + */ + if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) { + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + ++flcnt; + return(0); +} + +/* + * get_arc() + * Figure out what format an archive is. Handles archive with flaws by + * brute force searches for a legal header in any supported format. The + * format id routines have to be careful to NOT mis-identify a format. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if archive found -1 otherwise + */ + +static int +get_arc(void) +{ + int i; + int hdsz = 0; + int res; + int minhd = BLKMULT; + char *hdend; + int notice = 0; + + /* + * find the smallest header size in all archive formats and then set up + * to read the archive. + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].name != NULL && fsub[ford[i]].hsz < minhd) + minhd = fsub[ford[i]].hsz; + } + if (rd_start() < 0) + return(-1); + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + for (;;) { + for (;;) { + /* + * fill the buffer with at least the smallest header + */ + i = rd_wrbuf(hdend, res); + if (i > 0) + hdsz += i; + if (hdsz >= minhd) + break; + + /* + * if we cannot recover from a read error quit + */ + if ((i == 0) || (rd_sync() < 0)) + goto out; + + /* + * when we get an error none of the data we already + * have can be used to create a legal header (we just + * got an error in the middle), so we throw it all out + * and refill the buffer with fresh data. + */ + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + if (!notice) { + if (act == APPND) + return(-1); + paxwarn(1,"Cannot identify format. Searching..."); + ++notice; + } + } + + /* + * we have at least the size of the smallest header in any + * archive format. Look to see if we have a match. The array + * ford[] is used to specify the header id order to reduce the + * chance of incorrectly id'ing a valid header (some formats + * may be subsets of each other and the order would then be + * important). + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].id == NULL || + (*fsub[ford[i]].id)(hdbuf, hdsz) < 0) + continue; + frmt = &(fsub[ford[i]]); + /* + * yuck, to avoid slow special case code in the extract + * routines, just push this header back as if it was + * not seen. We have left extra space at start of the + * buffer for this purpose. This is a bit ugly, but + * adding all the special case code is far worse. + */ + pback(hdbuf, hdsz); + return(0); + } + + /* + * We have a flawed archive, no match. we start searching, but + * we never allow additions to flawed archives + */ + if (!notice) { + if (act == APPND) + return(-1); + paxwarn(1, "Cannot identify format. Searching..."); + ++notice; + } + + /* + * brute force search for a header that we can id. + * we shift through byte at a time. this is slow, but we cannot + * determine the nature of the flaw in the archive in a + * portable manner + */ + if (--hdsz > 0) { + memmove(hdbuf, hdbuf+1, hdsz); + res = BLKMULT - hdsz; + hdend = hdbuf + hdsz; + } else { + res = BLKMULT; + hdend = hdbuf; + hdsz = 0; + } + } + + out: + /* + * we cannot find a header, bow, apologize and quit + */ + paxwarn(1, "Sorry, unable to determine archive format."); + return(-1); +} + +/* + * rd_gnu_string() + * Read the file contents into an allocated string if it is a GNU tar + * long link/file. + * Return: + * 1 if gnu string read, 0 otherwise + */ + +static int +rd_gnu_string(ARCHD *arcn) +{ + char **strp; + + switch (arcn->type) { + case PAX_GLF: + strp = &gnu_name_string; + break; + case PAX_GLL: + strp = &gnu_link_string; + break; + default: + strp = NULL; + break; + } + if (!strp) + return 0; + /* + * we need to read, to get the real filename + */ + if (*strp) + err(1, "WARNING! Major Internal Error! GNU hack Failing!"); + *strp = malloc(arcn->sb.st_size + 1); + if (*strp == NULL) { + paxwarn(1, "Out of memory"); + (void)rd_skip(arcn->skip + arcn->pad); + } else if (rd_wrbuf(*strp, arcn->sb.st_size) < arcn->sb.st_size) { + free(*strp); + *strp = NULL; + } else { + (*strp)[arcn->sb.st_size] = '\0'; + (void)rd_skip(arcn->pad); + } + return 1; +} diff --git a/bin/pax/buf_subs.c b/bin/pax/buf_subs.c new file mode 100644 index 0000000..e84f9e0 --- /dev/null +++ b/bin/pax/buf_subs.c @@ -0,0 +1,983 @@ +/* $OpenBSD: buf_subs.c,v 1.31 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: buf_subs.c,v 1.5 1995/03/21 09:07:08 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include "pax.h" +#include "extern.h" + +/* + * routines which implement archive and file buffering + */ + +#define MINFBSZ 512 /* default block size for hole detect */ +#define MAXFLT 10 /* default media read error limit */ + +/* + * Need to change bufmem to dynamic allocation when the upper + * limit on blocking size is removed (though that will violate pax spec) + * MAXBLK define and tests will also need to be updated. + */ +static char bufmem[MAXBLK+BLKMULT]; /* i/o buffer + pushback id space */ +static char *buf; /* normal start of i/o buffer */ +static char *bufend; /* end or last char in i/o buffer */ +static char *bufpt; /* read/write point in i/o buffer */ +int blksz = MAXBLK; /* block input/output size in bytes */ +int wrblksz; /* user spec output size in bytes */ +int maxflt = MAXFLT; /* MAX consecutive media errors */ +int rdblksz; /* first read blksize (tapes only) */ +off_t wrlimit; /* # of bytes written per archive vol */ +off_t wrcnt; /* # of bytes written on current vol */ +off_t rdcnt; /* # of bytes read on current vol */ + +/* + * wr_start() + * set up the buffering system to operate in a write mode + * Return: + * 0 if ok, -1 if the user specified write block size violates pax spec + */ + +int +wr_start(void) +{ + buf = &(bufmem[BLKMULT]); + /* + * Check to make sure the write block size meets pax specs. If the user + * does not specify a blocksize, we use the format default blocksize. + * We must be picky on writes, so we do not allow the user to create an + * archive that might be hard to read elsewhere. If all ok, we then + * open the first archive volume + */ + if (!wrblksz) + wrblksz = frmt->bsz; + if (wrblksz > MAXBLK) { + paxwarn(1, "Write block size of %d too large, maximium is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + paxwarn(1, "Write block size of %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + if (wrblksz > MAXBLK_POSIX) { + paxwarn(0, "Write block size of %d larger than POSIX max %d, archive may not be portable", + wrblksz, MAXBLK_POSIX); + return(-1); + } + + /* + * we only allow wrblksz to be used with all archive operations + */ + blksz = rdblksz = wrblksz; + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + wrcnt = 0; + bufend = buf + wrblksz; + bufpt = buf; + return(0); +} + +/* + * rd_start() + * set up buffering system to read an archive + * Return: + * 0 if ok, -1 otherwise + */ + +int +rd_start(void) +{ + /* + * leave space for the header pushback (see get_arc()). If we are + * going to append and user specified a write block size, check it + * right away + */ + buf = &(bufmem[BLKMULT]); + if ((act == APPND) && wrblksz) { + if (wrblksz > MAXBLK) { + paxwarn(1,"Write block size %d too large, maximium is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + paxwarn(1, "Write block size %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + } + + /* + * open the archive + */ + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + bufend = buf + rdblksz; + bufpt = bufend; + rdcnt = 0; + return(0); +} + +/* + * cp_start() + * set up buffer system for copying within the file system + */ + +void +cp_start(void) +{ + buf = &(bufmem[BLKMULT]); + rdblksz = blksz = MAXBLK; +} + +/* + * appnd_start() + * Set up the buffering system to append new members to an archive that + * was just read. The last block(s) of an archive may contain a format + * specific trailer. To append a new member, this trailer has to be + * removed from the archive. The first byte of the trailer is replaced by + * the start of the header of the first file added to the archive. The + * format specific end read function tells us how many bytes to move + * backwards in the archive to be positioned BEFORE the trailer. Two + * different position have to be adjusted, the O.S. file offset (e.g. the + * position of the tape head) and the write point within the data we have + * stored in the read (soon to become write) buffer. We may have to move + * back several records (the number depends on the size of the archive + * record and the size of the format trailer) to read up the record where + * the first byte of the trailer is recorded. Trailers may span (and + * overlap) record boundaries. + * We first calculate which record has the first byte of the trailer. We + * move the OS file offset back to the start of this record and read it + * up. We set the buffer write pointer to be at this byte (the byte where + * the trailer starts). We then move the OS file pointer back to the + * start of this record so a flush of this buffer will replace the record + * in the archive. + * A major problem is rewriting this last record. For archives stored + * on disk files, this is trivial. However, many devices are really picky + * about the conditions under which they will allow a write to occur. + * Often devices restrict the conditions where writes can be made, + * so it may not be feasible to append archives stored on all types of + * devices. + * Return: + * 0 for success, -1 for failure + */ + +int +appnd_start(off_t skcnt) +{ + int res; + off_t cnt; + + if (exit_val != 0) { + paxwarn(0, "Cannot append to an archive that may have flaws."); + return(-1); + } + /* + * if the user did not specify a write blocksize, inherit the size used + * in the last archive volume read. (If a is set we still use rdblksz + * until next volume, cannot shift sizes within a single volume). + */ + if (!wrblksz) + wrblksz = blksz = rdblksz; + else + blksz = rdblksz; + + /* + * make sure that this volume allows appends + */ + if (ar_app_ok() < 0) + return(-1); + + /* + * Calculate bytes to move back and move in front of record where we + * need to start writing from. Remember we have to add in any padding + * that might be in the buffer after the trailer in the last block. We + * travel skcnt + padding ROUNDED UP to blksize. + */ + skcnt += bufend - bufpt; + if ((cnt = (skcnt/blksz) * blksz) < skcnt) + cnt += blksz; + if (ar_rev(cnt) < 0) + goto out; + + /* + * We may have gone too far if there is valid data in the block we are + * now in front of, read up the block and position the pointer after + * the valid data. + */ + if ((cnt -= skcnt) > 0) { + /* + * watch out for stupid tape drives. ar_rev() will set rdblksz + * to be real physical blocksize so we must loop until we get + * the old rdblksz (now in blksz). If ar_rev() fouls up the + * determination of the physical block size, we will fail. + */ + bufpt = buf; + bufend = buf + blksz; + while (bufpt < bufend) { + if ((res = ar_read(bufpt, rdblksz)) <= 0) + goto out; + bufpt += res; + } + if (ar_rev(bufpt - buf) < 0) + goto out; + bufpt = buf + cnt; + bufend = buf + blksz; + } else { + /* + * buffer is empty + */ + bufend = buf + blksz; + bufpt = buf; + } + rdblksz = blksz; + rdcnt -= skcnt; + wrcnt = 0; + + /* + * At this point we are ready to write. If the device requires special + * handling to write at a point were previously recorded data resides, + * that is handled in ar_set_wr(). From now on we operate under normal + * ARCHIVE mode (write) conditions + */ + if (ar_set_wr() < 0) + return(-1); + act = ARCHIVE; + return(0); + + out: + paxwarn(1, "Unable to rewrite archive trailer, cannot append."); + return(-1); +} + +/* + * rd_sync() + * A read error occurred on this archive volume. Resync the buffer and + * try to reset the device (if possible) so we can continue to read. Keep + * trying to do this until we get a valid read, or we reach the limit on + * consecutive read faults (at which point we give up). The user can + * adjust the read error limit through a command line option. + * Returns: + * 0 on success, and -1 on failure + */ + +int +rd_sync(void) +{ + int errcnt = 0; + int res; + + /* + * if the user says bail out on first fault, we are out of here... + */ + if (maxflt == 0) + return(-1); + if (act == APPND) { + paxwarn(1, "Unable to append when there are archive read errors."); + return(-1); + } + + /* + * poke at device and try to get past media error + */ + if (ar_rdsync() < 0) { + if (ar_next() < 0) + return(-1); + else + rdcnt = 0; + } + + for (;;) { + if ((res = ar_read(buf, blksz)) > 0) { + /* + * All right! got some data, fill that buffer + */ + bufpt = buf; + bufend = buf + res; + rdcnt += res; + return(0); + } + + /* + * Oh well, yet another failed read... + * if error limit reached, ditch. o.w. poke device to move past + * bad media and try again. if media is badly damaged, we ask + * the poor (and upset user at this point) for the next archive + * volume. remember the goal on reads is to get the most we + * can extract out of the archive. + */ + if ((maxflt > 0) && (++errcnt > maxflt)) + paxwarn(0,"Archive read error limit (%d) reached",maxflt); + else if (ar_rdsync() == 0) + continue; + if (ar_next() < 0) + break; + rdcnt = 0; + errcnt = 0; + } + return(-1); +} + +/* + * pback() + * push the data used during the archive id phase back into the I/O + * buffer. This is required as we cannot be sure that the header does NOT + * overlap a block boundary (as in the case we are trying to recover a + * flawed archived). This was not designed to be used for any other + * purpose. (What software engineering, HA!) + * WARNING: do not even THINK of pback greater than BLKMULT, unless the + * pback space is increased. + */ + +void +pback(char *pt, int cnt) +{ + bufpt -= cnt; + memcpy(bufpt, pt, cnt); +} + +/* + * rd_skip() + * skip forward in the archive during a archive read. Used to get quickly + * past file data and padding for files the user did NOT select. + * Return: + * 0 if ok, -1 failure, and 1 when EOF on the archive volume was detected. + */ + +int +rd_skip(off_t skcnt) +{ + off_t res; + off_t cnt; + off_t skipped = 0; + + /* + * consume what data we have in the buffer. If we have to move forward + * whole records, we call the low level skip function to see if we can + * move within the archive without doing the expensive reads on data we + * do not want. + */ + if (skcnt == 0) + return(0); + res = MINIMUM((bufend - bufpt), skcnt); + bufpt += res; + skcnt -= res; + + /* + * if skcnt is now 0, then no additional i/o is needed + */ + if (skcnt == 0) + return(0); + + /* + * We have to read more, calculate complete and partial record reads + * based on rdblksz. we skip over "cnt" complete records + */ + res = skcnt%rdblksz; + cnt = (skcnt/rdblksz) * rdblksz; + + /* + * if the skip fails, we will have to resync. ar_fow will tell us + * how much it can skip over. We will have to read the rest. + */ + if (ar_fow(cnt, &skipped) < 0) + return(-1); + res += cnt - skipped; + rdcnt += skipped; + + /* + * what is left we have to read (which may be the whole thing if + * ar_fow() told us the device can only read to skip records); + */ + while (res > 0) { + cnt = bufend - bufpt; + /* + * if the read fails, we will have to resync + */ + if ((cnt <= 0) && ((cnt = buf_fill()) < 0)) + return(-1); + if (cnt == 0) + return(1); + cnt = MINIMUM(cnt, res); + bufpt += cnt; + res -= cnt; + } + return(0); +} + +/* + * wr_fin() + * flush out any data (and pad if required) the last block. We always pad + * with zero (even though we do not have to). Padding with 0 makes it a + * lot easier to recover if the archive is damaged. zero padding SHOULD + * BE a requirement.... + */ + +void +wr_fin(void) +{ + if (bufpt > buf) { + memset(bufpt, 0, bufend - bufpt); + bufpt = bufend; + (void)buf_flush(blksz); + } +} + +/* + * wr_rdbuf() + * fill the write buffer from data passed to it in a buffer (usually used + * by format specific write routines to pass a file header). On failure we + * punt. We do not allow the user to continue to write flawed archives. + * We assume these headers are not very large (the memory copy we use is + * a bit expensive). + * Return: + * 0 if buffer was filled ok, -1 o.w. (buffer flush failure) + */ + +int +wr_rdbuf(char *out, int outcnt) +{ + int cnt; + + /* + * while there is data to copy copy into the write buffer. when the + * write buffer fills, flush it to the archive and continue + */ + while (outcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + /* + * only move what we have space for + */ + cnt = MINIMUM(cnt, outcnt); + memcpy(bufpt, out, cnt); + bufpt += cnt; + out += cnt; + outcnt -= cnt; + } + return(0); +} + +/* + * rd_wrbuf() + * copy from the read buffer into a supplied buffer a specified number of + * bytes. If the read buffer is empty fill it and continue to copy. + * usually used to obtain a file header for processing by a format + * specific read routine. + * Return + * number of bytes copied to the buffer, 0 indicates EOF on archive volume, + * -1 is a read error + */ + +int +rd_wrbuf(char *in, int cpcnt) +{ + int res; + int cnt; + int incnt = cpcnt; + + /* + * loop until we fill the buffer with the requested number of bytes + */ + while (incnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) { + /* + * read error, return what we got (or the error if + * no data was copied). The caller must know that an + * error occurred and has the best knowledge what to + * do with it + */ + if ((res = cpcnt - incnt) > 0) + return(res); + return(cnt); + } + + /* + * calculate how much data to copy based on whats left and + * state of buffer + */ + cnt = MINIMUM(cnt, incnt); + memcpy(in, bufpt, cnt); + bufpt += cnt; + incnt -= cnt; + in += cnt; + } + return(cpcnt); +} + +/* + * wr_skip() + * skip forward during a write. In other words add padding to the file. + * we add zero filled padding as it makes flawed archives much easier to + * recover from. the caller tells us how many bytes of padding to add + * This routine was not designed to add HUGE amount of padding, just small + * amounts (a few 512 byte blocks at most) + * Return: + * 0 if ok, -1 if there was a buf_flush failure + */ + +int +wr_skip(off_t skcnt) +{ + int cnt; + + /* + * loop while there is more padding to add + */ + while (skcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + cnt = MINIMUM(cnt, skcnt); + memset(bufpt, 0, cnt); + bufpt += cnt; + skcnt -= cnt; + } + return(0); +} + +/* + * wr_rdfile() + * fill write buffer with the contents of a file. We are passed an open + * file descriptor to the file an the archive structure that describes the + * file we are storing. The variable "left" is modified to contain the + * number of bytes of the file we were NOT able to write to the archive. + * it is important that we always write EXACTLY the number of bytes that + * the format specific write routine told us to. The file can also get + * bigger, so reading to the end of file would create an improper archive, + * we just detect this case and warn the user. We never create a bad + * archive if we can avoid it. Of course trying to archive files that are + * active is asking for trouble. It we fail, we pass back how much we + * could NOT copy and let the caller deal with it. + * Return: + * 0 ok, -1 if archive write failure. a short read of the file returns a + * 0, but "left" is set to be greater than zero. + */ + +int +wr_rdfile(ARCHD *arcn, int ifd, off_t *left) +{ + int cnt; + int res = 0; + off_t size = arcn->sb.st_size; + struct stat sb; + + /* + * while there are more bytes to write + */ + while (size > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) { + *left = size; + return(-1); + } + cnt = MINIMUM(cnt, size); + if ((res = read(ifd, bufpt, cnt)) <= 0) + break; + size -= res; + bufpt += res; + } + + /* + * better check the file did not change during this operation + * or the file read failed. + */ + if (res < 0) + syswarn(1, errno, "Read fault on %s", arcn->org_name); + else if (size != 0) + paxwarn(1, "File changed size during read %s", arcn->org_name); + else if (fstat(ifd, &sb) == -1) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=)) + paxwarn(1, "File %s was modified during copy to archive", + arcn->org_name); + *left = size; + return(0); +} + +/* + * rd_wrfile() + * extract the contents of a file from the archive. If we are unable to + * extract the entire file (due to failure to write the file) we return + * the numbers of bytes we did NOT process. This way the caller knows how + * many bytes to skip past to find the next archive header. If the failure + * was due to an archive read, we will catch that when we try to skip. If + * the format supplies a file data crc value, we calculate the actual crc + * so that it can be compared to the value stored in the header + * NOTE: + * We call a special function to write the file. This function attempts to + * restore file holes (blocks of zeros) into the file. When files are + * sparse this saves space, and is a LOT faster. For non sparse files + * the performance hit is small. As of this writing, no archive supports + * information on where the file holes are. + * Return: + * 0 ok, -1 if archive read failure. if we cannot write the entire file, + * we return a 0 but "left" is set to be the amount unwritten + */ + +int +rd_wrfile(ARCHD *arcn, int ofd, off_t *left) +{ + int cnt = 0; + off_t size = arcn->sb.st_size; + int res = 0; + char *fnm = arcn->name; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + u_int32_t crc = 0; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(ofd, &sb) == 0) { + if (sb.st_blksize > 0) + sz = (int)sb.st_blksize; + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + *left = 0; + + /* + * Copy the archive to the file the number of bytes specified. We have + * to assume that we want to recover file holes as none of the archive + * formats can record the location of file holes. + */ + while (size > 0) { + cnt = bufend - bufpt; + /* + * if we get a read error, we do not want to skip, as we may + * miss a header, so we do not set left, but if we get a write + * error, we do want to skip over the unprocessed data. + */ + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) + break; + cnt = MINIMUM(cnt, size); + if ((res = file_write(ofd,bufpt,cnt,&rem,&isem,sz,fnm)) <= 0) { + *left = size; + break; + } + + if (docrc) { + /* + * update the actual crc value + */ + cnt = res; + while (--cnt >= 0) + crc += *bufpt++ & 0xff; + } else + bufpt += res; + size -= res; + } + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (isem && (arcn->sb.st_size > 0)) + file_flush(ofd, fnm, isem); + + /* + * if we failed from archive read, we do not want to skip + */ + if ((size > 0) && (*left == 0)) + return(-1); + + /* + * some formats record a crc on file data. If so, then we compare the + * calculated crc to the crc stored in the archive + */ + if (docrc && (size == 0) && (arcn->crc != crc)) + paxwarn(1,"Actual crc does not match expected crc %s",arcn->name); + return(0); +} + +/* + * cp_file() + * copy the contents of one file to another. used during -rw phase of pax + * just as in rd_wrfile() we use a special write function to write the + * destination file so we can properly copy files with holes. + */ + +void +cp_file(ARCHD *arcn, int fd1, int fd2) +{ + int cnt; + off_t cpcnt = 0; + int res = 0; + char *fnm = arcn->name; + int no_hole = 0; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + + /* + * check for holes in the source file. If none, we will use regular + * write instead of file write. + */ + if (((off_t)(arcn->sb.st_blocks * BLKMULT)) >= arcn->sb.st_size) + ++no_hole; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(fd2, &sb) == 0) { + if (sb.st_blksize > 0) + sz = sb.st_blksize; + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + + /* + * read the source file and copy to destination file until EOF + */ + for (;;) { + if ((cnt = read(fd1, buf, blksz)) <= 0) + break; + if (no_hole) + res = write(fd2, buf, cnt); + else + res = file_write(fd2, buf, cnt, &rem, &isem, sz, fnm); + if (res != cnt) + break; + cpcnt += cnt; + } + + /* + * check to make sure the copy is valid. + */ + if (res < 0) + syswarn(1, errno, "Failed write during copy of %s to %s", + arcn->org_name, arcn->name); + else if (cpcnt != arcn->sb.st_size) + paxwarn(1, "File %s changed size during copy to %s", + arcn->org_name, arcn->name); + else if (fstat(fd1, &sb) == -1) + syswarn(1, errno, "Failed stat of %s", arcn->org_name); + else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=)) + paxwarn(1, "File %s was modified during copy to %s", + arcn->org_name, arcn->name); + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (!no_hole && isem && (arcn->sb.st_size > 0)) + file_flush(fd2, fnm, isem); +} + +/* + * buf_fill() + * fill the read buffer with the next record (or what we can get) from + * the archive volume. + * Return: + * Number of bytes of data in the read buffer, -1 for read error, and + * 0 when finished (user specified termination in ar_next()). + */ + +int +buf_fill(void) +{ + int cnt; + static int fini = 0; + + if (fini) + return(0); + + for (;;) { + /* + * try to fill the buffer. on error the next archive volume is + * opened and we try again. + */ + if ((cnt = ar_read(buf, blksz)) > 0) { + bufpt = buf; + bufend = buf + cnt; + rdcnt += cnt; + return(cnt); + } + + /* + * errors require resync, EOF goes to next archive + * but in case we have not determined yet the format, + * this means that we have a very short file, so we + * are done again. + */ + if (cnt < 0) + break; + if (frmt == NULL || ar_next() < 0) { + fini = 1; + return(0); + } + rdcnt = 0; + } + exit_val = 1; + return(-1); +} + +/* + * buf_flush() + * force the write buffer to the archive. We are passed the number of + * bytes in the buffer at the point of the flush. When we change archives + * the record size might change. (either larger or smaller). + * Return: + * 0 if all is ok, -1 when a write error occurs. + */ + +int +buf_flush(int bufcnt) +{ + int cnt; + int push = 0; + int totcnt = 0; + + /* + * if we have reached the user specified byte count for each archive + * volume, prompt for the next volume. (The non-standard -R flag). + * NOTE: If the wrlimit is smaller than wrcnt, we will always write + * at least one record. We always round limit UP to next blocksize. + */ + if ((wrlimit > 0) && (wrcnt > wrlimit)) { + paxwarn(0, "User specified archive volume byte limit reached."); + if (ar_next() < 0) { + wrcnt = 0; + exit_val = 1; + return(-1); + } + wrcnt = 0; + + /* + * The new archive volume might have changed the size of the + * write blocksize. if so we figure out if we need to write + * (one or more times), or if there is now free space left in + * the buffer (it is no longer full). bufcnt has the number of + * bytes in the buffer, (the blocksize, at the point we were + * CALLED). Push has the amount of "extra" data in the buffer + * if the block size has shrunk from a volume change. + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * We have enough data to write at least one archive block + */ + for (;;) { + /* + * write a block and check if it all went out ok + */ + cnt = ar_write(buf, blksz); + if (cnt == blksz) { + /* + * the write went ok + */ + wrcnt += cnt; + totcnt += cnt; + if (push > 0) { + /* we have extra data to push to the front. + * check for more than 1 block of push, and if + * so we loop back to write again + */ + memcpy(buf, bufend, push); + bufpt = buf + push; + if (push >= blksz) { + push -= blksz; + continue; + } + } else + bufpt = buf; + return(totcnt); + } else if (cnt > 0) { + /* + * Oh drat we got a partial write! + * if format does not care about alignment let it go, + * we warned the user in ar_write().... but this means + * the last record on this volume violates pax spec.... + */ + totcnt += cnt; + wrcnt += cnt; + bufpt = buf + cnt; + cnt = bufcnt - cnt; + memcpy(buf, bufpt, cnt); + bufpt = buf + cnt; + if (!frmt->blkalgn || ((cnt % frmt->blkalgn) == 0)) + return(totcnt); + break; + } + + /* + * All done, go to next archive + */ + wrcnt = 0; + if (ar_next() < 0) + break; + + /* + * The new archive volume might also have changed the block + * size. if so, figure out if we have too much or too little + * data for using the new block size + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * write failed, stop pax. we must not create a bad archive! + */ + exit_val = 1; + return(-1); +} diff --git a/bin/pax/cpio.1 b/bin/pax/cpio.1 new file mode 100644 index 0000000..89a6e36 --- /dev/null +++ b/bin/pax/cpio.1 @@ -0,0 +1,309 @@ +.\" $OpenBSD: cpio.1,v 1.36 2020/01/16 16:46:46 schwarze Exp $ +.\" +.\" Copyright (c) 1997 SigmaSoft, Th. Lockert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd $Mdocdate: January 16 2020 $ +.Dt CPIO 1 +.Os +.Sh NAME +.Nm cpio +.Nd copy file archives in and out +.Sh SYNOPSIS +.Nm cpio +.Fl o +.Op Fl AaBcjLvZz +.Op Fl C Ar bytes +.Op Fl F Ar archive +.Op Fl H Ar format +.Op Fl O Ar archive +.No \*(Lt Ar name-list +.Op \*(Gt Ar archive +.Nm cpio +.Fl i +.Op Fl 6BbcdfjmrSstuvZz +.Op Fl C Ar bytes +.Op Fl E Ar file +.Op Fl F Ar archive +.Op Fl H Ar format +.Op Fl I Ar archive +.Op Ar pattern ... +.Op \*(Lt Ar archive +.Nm cpio +.Fl p +.Op Fl adLlmuv +.Ar destination-directory +.No \*(Lt Ar name-list +.Sh DESCRIPTION +The +.Nm +command copies files to and from a +.Nm +archive. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl o +Create an archive. +Reads the list of files to store in the +archive from standard input, and writes the archive on standard +output. +.Bl -tag -width Ds +.It Fl A +Append to the specified archive. +.It Fl a +Reset the access times on files that have been copied to the +archive. +.It Fl B +Set block size of output to 5120 bytes. +.It Fl C Ar bytes +Set the block size of output to +.Ar bytes . +.It Fl c +Use ASCII format for +.Nm +header for portability. +.It Fl F Ar archive +Use the specified file as the input for the archive. +.It Fl H Ar format +Write the archive in the specified format. +Recognized formats are: +.Pp +.Bl -tag -width sv4cpio -compact +.It Ar bcpio +Old binary +.Nm +format. +.It Ar cpio +Old octal character +.Nm +format. +.It Ar sv4cpio +SVR4 hex +.Nm +format. +.It Ar tar +Old tar format. +.It Ar ustar +POSIX ustar format. +.El +.It Fl j +Compress archive using the bzip2 format. +The bzip2 utility must be installed separately. +.It Fl L +Follow symbolic links. +.It Fl O Ar archive +Use the specified file name as the archive to write to. +.It Fl v +Be verbose about operations. +List filenames as they are written to the archive. +.It Fl Z +Compress archive using +.Xr compress 1 +format. +.It Fl z +Compress archive using +.Xr gzip 1 +format. +.El +.It Fl i +Restore files from an archive. +Reads the archive file from +standard input and extracts files matching the +.Ar patterns +that were specified on the command line. +.Bl -tag -width Ds +.It Fl 6 +Process old-style +.Nm +format archives. +.It Fl B +Set the block size of the archive being read to 5120 bytes. +.It Fl b +Do byte and word swapping after reading in data from the +archive, for restoring archives created on systems with +a different byte order. +.It Fl C Ar bytes +Read archive written with a block size of +.Ar bytes . +.It Fl c +Expect the archive headers to be in ASCII format. +.It Fl d +Create any intermediate directories as needed during +restore. +.It Fl E Ar file +Read list of file name patterns to extract or list from +.Ar file . +.It Fl F Ar archive , Fl I Ar archive +Use the specified file as the input for the archive. +.It Fl f +Restore all files except those matching the +.Ar patterns +given on the command line. +.It Fl H Ar format +Read an archive of the specified format. +Recognized formats are: +.Pp +.Bl -tag -width sv4cpio -compact +.It Ar bcpio +Old binary +.Nm +format. +.It Ar cpio +Old octal character +.Nm +format. +.It Ar sv4cpio +SVR4 hex +.Nm +format. +.It Ar tar +Old tar format. +.It Ar ustar +POSIX ustar format. +.El +.It Fl j +Uncompress archive using the bzip2 format. +The bzip2 utility must be installed separately. +.It Fl m +Restore modification times on files. +.It Fl r +Rename restored files interactively. +.It Fl S +Swap words after reading data from the archive. +.It Fl s +Swap bytes after reading data from the archive. +.It Fl t +Only list the contents of the archive, no files or +directories will be created. +.It Fl u +Overwrite files even when the file in the archive is +older than the one that will be overwritten. +.It Fl v +Be verbose about operations. +List filenames as they are copied in from the archive. +.It Fl Z +Uncompress archive using +.Xr compress 1 +format. +.It Fl z +Uncompress archive using +.Xr gzip 1 +format. +.El +.It Fl p +Copy files from one location to another in a single pass. +The list of files to copy are read from standard input and +written out to a directory relative to the specified +.Ar directory +argument. +.Bl -tag -width Ds +.It Fl a +Reset the access times on files that have been copied. +.It Fl d +Create any intermediate directories as needed to write +the files at the new location. +.It Fl L +Follow symbolic links. +.It Fl l +When possible, link files rather than creating an +extra copy. +.It Fl m +Restore modification times on files. +.It Fl u +Overwrite files even when the original file being copied is +older than the one that will be overwritten. +.It Fl v +Be verbose about operations. +List filenames as they are copied. +.El +.El +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev TMPDIR +Path in which to store temporary files. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Sh DIAGNOSTICS +Whenever +.Nm +cannot create a file or a link when extracting an archive or cannot +find a file while writing an archive, or cannot preserve the user +ID, group ID, file mode, or access and modification times when the +.Fl p +option is specified, a diagnostic message is written to standard +error and a non-zero exit value will be returned, but processing +will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated +by a signal or error, +.Nm +may have only partially extracted the file the user wanted. +Additionally, the file modes of extracted files and directories may +have incorrect file bits, and the modification and access times may +be wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal +or error, +.Nm +may have only partially created the archive, which may violate the +specific archive format specification. +.Sh SEE ALSO +.Xr pax 1 , +.Xr tar 1 +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. +.Sh CAVEATS +Different file formats have different maximum file sizes. +It is recommended that a format such as cpio or ustar +be used for larger files. +.Bl -column "File format" "Maximum file size" -offset indent +.It Sy "File format" Ta Sy "Maximum file size" +.It bcpio Ta "4 Gigabytes" +.It sv4cpio Ta "4 Gigabytes" +.It cpio Ta "8 Gigabytes" +.It tar Ta "8 Gigabytes" +.It ustar Ta "8 Gigabytes" +.El +.Sh BUGS +The +.Fl s +and +.Fl S +options are currently not implemented. diff --git a/bin/pax/cpio.c b/bin/pax/cpio.c new file mode 100644 index 0000000..769a9df --- /dev/null +++ b/bin/pax/cpio.c @@ -0,0 +1,1106 @@ +/* $OpenBSD: cpio.c,v 1.33 2017/09/16 07:42:34 otto Exp $ */ +/* $NetBSD: cpio.c,v 1.5 1995/03/21 09:07:13 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#ifndef major +#include <sys/sysmacros.h> +#endif + +#include "pax.h" +#include "cpio.h" +#include "extern.h" + +static int rd_nm(ARCHD *, int); +static int rd_ln_nm(ARCHD *); +static int com_rd(ARCHD *); + +/* + * Routines which support the different cpio versions + */ + +static int swp_head; /* binary cpio header byte swap */ + +/* + * Routines common to all versions of cpio + */ + +/* + * cpio_strd() + * Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +cpio_strd(void) +{ + return(lnk_start()); +} + +/* + * cpio_trail() + * Called to determine if a header block is a valid trailer. We are + * passed the block, the in_sync flag (which tells us we are in resync + * mode; looking for a valid header), and cnt (which starts at zero) + * which is used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, + */ + +int +cpio_trail(ARCHD *arcn, char *notused, int notused2, int *notused3) +{ + /* + * look for trailer id in file we are about to process + */ + if ((strcmp(arcn->name, TRAILER) == 0) && (arcn->sb.st_size == 0)) + return(0); + return(-1); +} + +/* + * com_rd() + * operations common to all cpio read functions. + * Return: + * 0 + */ + +static int +com_rd(ARCHD *arcn) +{ + arcn->skip = 0; + arcn->pat = NULL; + arcn->org_name = arcn->name; + switch (arcn->sb.st_mode & C_IFMT) { + case C_ISFIFO: + arcn->type = PAX_FIF; + break; + case C_ISDIR: + arcn->type = PAX_DIR; + break; + case C_ISBLK: + arcn->type = PAX_BLK; + break; + case C_ISCHR: + arcn->type = PAX_CHR; + break; + case C_ISLNK: + arcn->type = PAX_SLK; + break; + case C_ISOCK: + arcn->type = PAX_SCK; + break; + case C_ISCTG: + case C_ISREG: + default: + /* + * we have file data, set up skip (pad is set in the format + * specific sections) + */ + arcn->sb.st_mode = (arcn->sb.st_mode & 0xfff) | C_ISREG; + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + } + if (chk_lnk(arcn) < 0) + return(-1); + return(0); +} + +/* + * cpio_endwr() + * write the special file with the name trailer in the proper format + * Return: + * result of the write of the trailer from the cpio specific write func + */ + +int +cpio_endwr(void) +{ + ARCHD last; + + /* + * create a trailer request and call the proper format write function + */ + memset(&last, 0, sizeof(last)); + last.nlen = sizeof(TRAILER) - 1; + last.type = PAX_REG; + last.sb.st_nlink = 1; + (void)strlcpy(last.name, TRAILER, sizeof(last.name)); + return((*frmt->wr)(&last)); +} + +/* + * rd_nm() + * read in the file name which follows the cpio header + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_nm(ARCHD *arcn, int nsz) +{ + /* + * do not even try bogus values + */ + if ((nsz == 0) || ((size_t)nsz > sizeof(arcn->name))) { + paxwarn(1, "Cpio file name length %d is out of range", nsz); + return(-1); + } + + /* + * read the name and make sure it is not empty and is \0 terminated + */ + if ((rd_wrbuf(arcn->name,nsz) != nsz) || (arcn->name[nsz-1] != '\0') || + (arcn->name[0] == '\0')) { + paxwarn(1, "Cpio file name in header is corrupted"); + return(-1); + } + return(0); +} + +/* + * rd_ln_nm() + * read in the link name for a file with links. The link name is stored + * like file data (and is NOT \0 terminated!) + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_ln_nm(ARCHD *arcn) +{ + /* + * check the length specified for bogus values + */ + if ((arcn->sb.st_size <= 0) || + (arcn->sb.st_size >= (off_t)sizeof(arcn->ln_name))) { + paxwarn(1, "Cpio link name length is invalid: %zu", + arcn->sb.st_size); + return(-1); + } + + /* + * read in the link name and \0 terminate it + */ + if (rd_wrbuf(arcn->ln_name, (int)arcn->sb.st_size) != + (int)arcn->sb.st_size) { + paxwarn(1, "Cpio link name read error"); + return(-1); + } + arcn->ln_nlen = arcn->sb.st_size; + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * watch out for those empty link names + */ + if (arcn->ln_name[0] == '\0') { + paxwarn(1, "Cpio link name is corrupt"); + return(-1); + } + return(0); +} + +/* + * Routines common to the extended byte oriented cpio format + */ + +/* + * cpio_id() + * determine if a block given to us is a valid extended byte oriented + * cpio header + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +cpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_CPIO)) || + (strncmp(blk, AMAGIC, sizeof(AMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * cpio_rd() + * determine if a buffer is a byte oriented extended cpio archive entry. + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +cpio_rd(ARCHD *arcn, char *buf) +{ + int nsz; + unsigned long long val; + HD_CPIO *hd; + + /* + * check that this is a valid header, if not return -1 + */ + if (cpio_id(buf, sizeof(HD_CPIO)) < 0) + return(-1); + hd = (HD_CPIO *)buf; + + /* + * byte oriented cpio (posix) does not have padding! extract the octal + * ascii fields from the header + */ + arcn->pad = 0; + arcn->sb.st_dev = (dev_t)asc_ul(hd->c_dev, sizeof(hd->c_dev), OCT); + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), OCT); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), OCT); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), OCT); + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + OCT); + arcn->sb.st_rdev = (dev_t)asc_ul(hd->c_rdev, sizeof(hd->c_rdev), OCT); + val = asc_ull(hd->c_mtime, sizeof(hd->c_mtime), OCT); + if (val > MAX_TIME_T) + arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ + else + arcn->sb.st_mtime = val; + arcn->sb.st_mtim.tv_nsec = 0; + arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; + arcn->sb.st_size = (off_t)asc_ull(hd->c_filesize,sizeof(hd->c_filesize), + OCT); + + /* + * check name size and if valid, read in the name of this entry (name + * follows header in the archive) + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),OCT)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * no link name to read for this file + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + return(com_rd(arcn)); + } + + /* + * check link name size and read in the link name. Link names are + * stored like file data. + */ + if (rd_ln_nm(arcn) < 0) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * cpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +cpio_endrd(void) +{ + return sizeof(HD_CPIO) + sizeof(TRAILER); +} + +/* + * cpio_stwr() + * start up the device mapping table + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +cpio_stwr(void) +{ + return(dev_start()); +} + +/* + * cpio_wr() + * copy the data in the ARCHD to buffer in extended byte oriented cpio + * format. + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +cpio_wr(ARCHD *arcn) +{ + HD_CPIO *hd; + int nsz; + char hdblk[sizeof(HD_CPIO)]; + + /* + * check and repair truncated device and inode fields in the header + */ + if (map_dev(arcn, CPIO_MASK, CPIO_MASK) < 0) + return(-1); + + arcn->pad = 0; + nsz = arcn->nlen + 1; + hd = (HD_CPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + switch (arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * set data size for file data + */ + if (ull_asc(arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { + paxwarn(1,"File is too large for cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * set data size to hold link name + */ + if (ul_asc(arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) + goto out; + break; + default: + /* + * all other file types have no file data + */ + if (ul_asc(0, hd->c_filesize, sizeof(hd->c_filesize), OCT)) + goto out; + break; + } + + /* + * copy the values to the header using octal ascii + */ + if (ul_asc(MAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc(arcn->sb.st_dev, hd->c_dev, sizeof(hd->c_dev), OCT) || + ul_asc(arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), OCT) || + ul_asc(arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), OCT) || + ul_asc(arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), OCT) || + ul_asc(arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), OCT) || + ul_asc(arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), OCT) || + ul_asc(arcn->sb.st_rdev, hd->c_rdev, sizeof(hd->c_rdev), OCT) || + ull_asc(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->c_mtime, + sizeof(hd->c_mtime), OCT) || + ul_asc(nsz, hd->c_namesize, sizeof(hd->c_namesize), OCT)) + goto out; + + /* + * write the file name to the archive + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_CPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0)) { + paxwarn(1, "Unable to write cpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if this file has data, we are done. The caller will write the file + * data, if we are link tell caller we are done, go to next file + */ + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) + return(0); + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name to the archive, tell the caller to go to the + * next file as we are done. + */ + if (wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) { + paxwarn(1,"Unable to write cpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Cpio header field is too small to store file %s", + arcn->org_name); + return(1); +} + +/* + * Routines common to the system VR4 version of cpio (with/without file CRC) + */ + +/* + * vcpio_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITHOUT crc. WATCH it the magic cookies are in OCTAL, the header + * uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +vcpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVMAGIC, sizeof(AVMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITH crc. WATCH it the magic cookies are in OCTAL the header uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +crc_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVCMAGIC, sizeof(AVCMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_strd() + w set file data CRC calculations. Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +crc_strd(void) +{ + docrc = 1; + return(lnk_start()); +} + +/* + * vcpio_rd() + * determine if a buffer is a system VR4 archive entry. (with/without CRC) + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +vcpio_rd(ARCHD *arcn, char *buf) +{ + HD_VCPIO *hd; + dev_t devminor; + dev_t devmajor; + int nsz; + + /* + * during the id phase it was determined if we were using CRC, use the + * proper id routine. + */ + if (docrc) { + if (crc_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } else { + if (vcpio_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } + + hd = (HD_VCPIO *)buf; + arcn->pad = 0; + + /* + * extract the hex ascii fields from the header + */ + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), HEX); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), HEX); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), HEX); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), HEX); + arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime,sizeof(hd->c_mtime),HEX); + arcn->sb.st_mtim.tv_nsec = 0; + arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; + arcn->sb.st_size = (off_t)asc_ull(hd->c_filesize, + sizeof(hd->c_filesize), HEX); + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + HEX); + devmajor = (dev_t)asc_ul(hd->c_maj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_min, sizeof(hd->c_min), HEX); + arcn->sb.st_dev = TODEV(devmajor, devminor); + devmajor = (dev_t)asc_ul(hd->c_rmaj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_rmin, sizeof(hd->c_min), HEX); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + arcn->crc = asc_ul(hd->c_chksum, sizeof(hd->c_chksum), HEX); + + /* + * check the length of the file name, if ok read it in, return -1 if + * bogus + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),HEX)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * skip padding. header + filename is aligned to 4 byte boundaries + */ + if (rd_skip(VCPIO_PAD(sizeof(HD_VCPIO) + nsz)) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + /* + * read in the link name and skip over the padding + */ + if ((rd_ln_nm(arcn) < 0) || + (rd_skip(VCPIO_PAD(arcn->sb.st_size)) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * vcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +vcpio_endrd(void) +{ + return sizeof(HD_VCPIO) + sizeof(TRAILER) + + (VCPIO_PAD(sizeof(HD_VCPIO) + sizeof(TRAILER))); +} + +/* + * crc_stwr() + * start up the device mapping table, enable crc file calculation + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +crc_stwr(void) +{ + docrc = 1; + return(dev_start()); +} + +/* + * vcpio_wr() + * copy the data in the ARCHD to buffer in system VR4 cpio + * (with/without crc) format. + * Return + * 0 if file has data to be written after the header, 1 if file has + * NO data to write after the header, -1 if archive write failed + */ + +int +vcpio_wr(ARCHD *arcn) +{ + HD_VCPIO *hd; + unsigned int nsz; + char hdblk[sizeof(HD_VCPIO)]; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, VCPIO_MASK, VCPIO_MASK) < 0) + return(-1); + nsz = arcn->nlen + 1; + hd = (HD_VCPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + /* + * add the proper magic value depending whether we were asked for + * file data crc's, and the crc if needed. + */ + if (docrc) { + if (ul_asc(VCMAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc(arcn->crc,hd->c_chksum,sizeof(hd->c_chksum), HEX)) + goto out; + } else { + if (ul_asc(VMAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc(0, hd->c_chksum, sizeof(hd->c_chksum),HEX)) + goto out; + } + + switch (arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + if (ull_asc(arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { + paxwarn(1,"File is too large for sv4cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0; + if (ul_asc(arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0; + if (ul_asc(0, hd->c_filesize, sizeof(hd->c_filesize), HEX)) + goto out; + break; + } + + /* + * set the other fields in the header + */ + if (ul_asc(arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), HEX) || + ul_asc(arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), HEX) || + ul_asc(arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), HEX) || + ul_asc(arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), HEX) || + ul_asc(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->c_mtime, + sizeof(hd->c_mtime), HEX) || + ul_asc(arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), HEX) || + ul_asc(MAJOR(arcn->sb.st_dev),hd->c_maj, sizeof(hd->c_maj), HEX) || + ul_asc(MINOR(arcn->sb.st_dev),hd->c_min, sizeof(hd->c_min), HEX) || + ul_asc(MAJOR(arcn->sb.st_rdev),hd->c_rmaj,sizeof(hd->c_maj), HEX) || + ul_asc(MINOR(arcn->sb.st_rdev),hd->c_rmin,sizeof(hd->c_min), HEX) || + ul_asc(nsz, hd->c_namesize, sizeof(hd->c_namesize), HEX)) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_VCPIO)) < 0) || + (wr_rdbuf(arcn->name, (int)nsz) < 0) || + (wr_skip(VCPIO_PAD(sizeof(HD_VCPIO) + nsz)) < 0)) { + paxwarn(1,"Could not write sv4cpio header for %s",arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done, copy the file + */ + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip(VCPIO_PAD(arcn->ln_nlen)) < 0)) { + paxwarn(1,"Could not write sv4cpio link name for %s", + arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1,"Sv4cpio header field is too small for file %s",arcn->org_name); + return(1); +} + +/* + * Routines common to the old binary header cpio + */ + +/* + * bcpio_id() + * determine if a block given to us is a old binary cpio header + * (with/without header byte swapping) + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +bcpio_id(char *blk, int size) +{ + if (size < (int)sizeof(HD_BCPIO)) + return(-1); + + /* + * check both normal and byte swapped magic cookies + */ + if (((u_short)SHRT_EXT(blk)) == MAGIC) + return(0); + if (((u_short)RSHRT_EXT(blk)) == MAGIC) { + if (!swp_head) + ++swp_head; + return(0); + } + return(-1); +} + +/* + * bcpio_rd() + * determine if a buffer is a old binary archive entry. (it may have byte + * swapped header) convert and store the values in the ARCHD parameter. + * This is a very old header format and should not really be used. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +bcpio_rd(ARCHD *arcn, char *buf) +{ + HD_BCPIO *hd; + int nsz; + + /* + * check the header + */ + if (bcpio_id(buf, sizeof(HD_BCPIO)) < 0) + return(-1); + + arcn->pad = 0; + hd = (HD_BCPIO *)buf; + if (swp_head) { + /* + * header has swapped bytes on 16 bit boundaries + */ + arcn->sb.st_dev = (dev_t)(RSHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(RSHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(RSHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(RSHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(RSHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(RSHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(RSHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(RSHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(RSHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(RSHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(RSHRT_EXT(hd->h_filesize_2))); + nsz = (int)(RSHRT_EXT(hd->h_namesize)); + } else { + arcn->sb.st_dev = (dev_t)(SHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(SHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(SHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(SHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(SHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(SHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(SHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(SHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(SHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(SHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(SHRT_EXT(hd->h_filesize_2))); + nsz = (int)(SHRT_EXT(hd->h_namesize)); + } + arcn->sb.st_mtim.tv_nsec = 0; + arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; + + /* + * check the file name size, if bogus give up. otherwise read the file + * name + */ + if (nsz < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * header + file name are aligned to 2 byte boundaries, skip if needed + */ + if (rd_skip(BCPIO_PAD(sizeof(HD_BCPIO) + nsz)) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode & C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)){ + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + if ((rd_ln_nm(arcn) < 0) || + (rd_skip(BCPIO_PAD(arcn->sb.st_size)) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * bcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +bcpio_endrd(void) +{ + return sizeof(HD_BCPIO) + sizeof(TRAILER) + + (BCPIO_PAD(sizeof(HD_BCPIO) + sizeof(TRAILER))); +} + +/* + * bcpio_wr() + * copy the data in the ARCHD to buffer in old binary cpio format + * There is a real chance of field overflow with this critter. So we + * always check the conversion is ok. nobody in their right mind + * should write an archive in this format... + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +bcpio_wr(ARCHD *arcn) +{ + HD_BCPIO *hd; + int nsz; + char hdblk[sizeof(HD_BCPIO)]; + off_t t_offt; + int t_int; + time_t t_timet; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, BCPIO_MASK, BCPIO_MASK) < 0) + return(-1); + + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + hd = (HD_BCPIO *)hdblk; + + switch (arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + hd->h_filesize_1[0] = CHR_WR_0(arcn->sb.st_size); + hd->h_filesize_1[1] = CHR_WR_1(arcn->sb.st_size); + hd->h_filesize_2[0] = CHR_WR_2(arcn->sb.st_size); + hd->h_filesize_2[1] = CHR_WR_3(arcn->sb.st_size); + t_offt = (off_t)(SHRT_EXT(hd->h_filesize_1)); + t_offt = (t_offt<<16) | ((off_t)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->sb.st_size != t_offt) { + paxwarn(1,"File is too large for bcpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0; + hd->h_filesize_1[0] = CHR_WR_0(arcn->ln_nlen); + hd->h_filesize_1[1] = CHR_WR_1(arcn->ln_nlen); + hd->h_filesize_2[0] = CHR_WR_2(arcn->ln_nlen); + hd->h_filesize_2[1] = CHR_WR_3(arcn->ln_nlen); + t_int = (int)(SHRT_EXT(hd->h_filesize_1)); + t_int = (t_int << 16) | ((int)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->ln_nlen != t_int) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0; + hd->h_filesize_1[0] = (char)0; + hd->h_filesize_1[1] = (char)0; + hd->h_filesize_2[0] = (char)0; + hd->h_filesize_2[1] = (char)0; + break; + } + + /* + * build up the rest of the fields + */ + hd->h_magic[0] = CHR_WR_2(MAGIC); + hd->h_magic[1] = CHR_WR_3(MAGIC); + hd->h_dev[0] = CHR_WR_2(arcn->sb.st_dev); + hd->h_dev[1] = CHR_WR_3(arcn->sb.st_dev); + if (arcn->sb.st_dev != (dev_t)(SHRT_EXT(hd->h_dev))) + goto out; + hd->h_ino[0] = CHR_WR_2(arcn->sb.st_ino); + hd->h_ino[1] = CHR_WR_3(arcn->sb.st_ino); + if (arcn->sb.st_ino != (ino_t)(SHRT_EXT(hd->h_ino))) + goto out; + hd->h_mode[0] = CHR_WR_2(arcn->sb.st_mode); + hd->h_mode[1] = CHR_WR_3(arcn->sb.st_mode); + if (arcn->sb.st_mode != (mode_t)(SHRT_EXT(hd->h_mode))) + goto out; + hd->h_uid[0] = CHR_WR_2(arcn->sb.st_uid); + hd->h_uid[1] = CHR_WR_3(arcn->sb.st_uid); + if (arcn->sb.st_uid != (uid_t)(SHRT_EXT(hd->h_uid))) + goto out; + hd->h_gid[0] = CHR_WR_2(arcn->sb.st_gid); + hd->h_gid[1] = CHR_WR_3(arcn->sb.st_gid); + if (arcn->sb.st_gid != (gid_t)(SHRT_EXT(hd->h_gid))) + goto out; + hd->h_nlink[0] = CHR_WR_2(arcn->sb.st_nlink); + hd->h_nlink[1] = CHR_WR_3(arcn->sb.st_nlink); + if (arcn->sb.st_nlink != (nlink_t)(SHRT_EXT(hd->h_nlink))) + goto out; + hd->h_rdev[0] = CHR_WR_2(arcn->sb.st_rdev); + hd->h_rdev[1] = CHR_WR_3(arcn->sb.st_rdev); + if (arcn->sb.st_rdev != (dev_t)(SHRT_EXT(hd->h_rdev))) + goto out; + if (arcn->sb.st_mtime > 0) { + hd->h_mtime_1[0] = CHR_WR_0(arcn->sb.st_mtime); + hd->h_mtime_1[1] = CHR_WR_1(arcn->sb.st_mtime); + hd->h_mtime_2[0] = CHR_WR_2(arcn->sb.st_mtime); + hd->h_mtime_2[1] = CHR_WR_3(arcn->sb.st_mtime); + t_timet = (time_t)SHRT_EXT(hd->h_mtime_1); + t_timet = t_timet << 16 | (time_t)SHRT_EXT(hd->h_mtime_2); + if (arcn->sb.st_mtime != t_timet) + goto out; + } else { + hd->h_mtime_1[0] = hd->h_mtime_1[1] = 0; + hd->h_mtime_2[0] = hd->h_mtime_2[1] = 0; + } + nsz = arcn->nlen + 1; + hd->h_namesize[0] = CHR_WR_2(nsz); + hd->h_namesize[1] = CHR_WR_3(nsz); + if (nsz != (int)(SHRT_EXT(hd->h_namesize))) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_BCPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0) || + (wr_skip(BCPIO_PAD(sizeof(HD_BCPIO) + nsz)) < 0)) { + paxwarn(1, "Could not write bcpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done + */ + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip(BCPIO_PAD(arcn->ln_nlen)) < 0)) { + paxwarn(1,"Could not write bcpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1,"Bcpio header field is too small for file %s", arcn->org_name); + return(1); +} diff --git a/bin/pax/cpio.h b/bin/pax/cpio.h new file mode 100644 index 0000000..dfbd03f --- /dev/null +++ b/bin/pax/cpio.h @@ -0,0 +1,150 @@ +/* $OpenBSD: cpio.h,v 1.4 2003/06/02 23:32:08 millert Exp $ */ +/* $NetBSD: cpio.h,v 1.3 1995/03/21 09:07:15 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cpio.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Defines common to all versions of cpio + */ +#define TRAILER "TRAILER!!!" /* name in last archive record */ + +/* + * Header encoding of the different file types + */ +#define C_ISDIR 040000 /* Directory */ +#define C_ISFIFO 010000 /* FIFO */ +#define C_ISREG 0100000 /* Regular file */ +#define C_ISBLK 060000 /* Block special file */ +#define C_ISCHR 020000 /* Character special file */ +#define C_ISCTG 0110000 /* Reserved for contiguous files */ +#define C_ISLNK 0120000 /* Reserved for symbolic links */ +#define C_ISOCK 0140000 /* Reserved for sockets */ +#define C_IFMT 0170000 /* type of file */ + +/* + * Data Interchange Format - Extended cpio header format - POSIX 1003.1-1990 + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_dev[6]; /* device number */ + char c_ino[6]; /* inode number */ + char c_mode[6]; /* file type/access */ + char c_uid[6]; /* owners uid */ + char c_gid[6]; /* owners gid */ + char c_nlink[6]; /* # of links at archive creation */ + char c_rdev[6]; /* block/char major/minor # */ + char c_mtime[11]; /* modification time */ + char c_namesize[6]; /* length of pathname */ + char c_filesize[11]; /* length of file in bytes */ +} HD_CPIO; + +#define MAGIC 070707 /* transportable archive id */ + +#ifdef _PAX_ +#define AMAGIC "070707" /* ascii equivalent string of MAGIC */ +#define CPIO_MASK 0x3ffff /* bits valid in the dev/ino fields */ + /* used for dev/inode remaps */ +#endif /* _PAX_ */ + +/* + * Binary cpio header structure + * + * CAUTION! CAUTION! CAUTION! + * Each field really represents a 16 bit short (NOT ASCII). Described as + * an array of chars in an attempt to improve portability!! + */ +typedef struct { + u_char h_magic[2]; + u_char h_dev[2]; + u_char h_ino[2]; + u_char h_mode[2]; + u_char h_uid[2]; + u_char h_gid[2]; + u_char h_nlink[2]; + u_char h_rdev[2]; + u_char h_mtime_1[2]; + u_char h_mtime_2[2]; + u_char h_namesize[2]; + u_char h_filesize_1[2]; + u_char h_filesize_2[2]; +} HD_BCPIO; + +#ifdef _PAX_ +/* + * extraction and creation macros for binary cpio + */ +#define SHRT_EXT(ch) ((((unsigned)(ch)[0])<<8) | (((unsigned)(ch)[1])&0xff)) +#define RSHRT_EXT(ch) ((((unsigned)(ch)[1])<<8) | (((unsigned)(ch)[0])&0xff)) +#define CHR_WR_0(val) ((char)(((val) >> 24) & 0xff)) +#define CHR_WR_1(val) ((char)(((val) >> 16) & 0xff)) +#define CHR_WR_2(val) ((char)(((val) >> 8) & 0xff)) +#define CHR_WR_3(val) ((char)((val) & 0xff)) + +/* + * binary cpio masks and pads + */ +#define BCPIO_PAD(x) ((2 - ((x) & 1)) & 1) /* pad to next 2 byte word */ +#define BCPIO_MASK 0xffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ + +/* + * System VR4 cpio header structure (with/without file data crc) + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_ino[8]; /* inode number */ + char c_mode[8]; /* file type/access */ + char c_uid[8]; /* owners uid */ + char c_gid[8]; /* owners gid */ + char c_nlink[8]; /* # of links at archive creation */ + char c_mtime[8]; /* modification time */ + char c_filesize[8]; /* length of file in bytes */ + char c_maj[8]; /* block/char major # */ + char c_min[8]; /* block/char minor # */ + char c_rmaj[8]; /* special file major # */ + char c_rmin[8]; /* special file minor # */ + char c_namesize[8]; /* length of pathname */ + char c_chksum[8]; /* 0 OR CRC of bytes of FILE data */ +} HD_VCPIO; + +#define VMAGIC 070701 /* sVr4 new portable archive id */ +#define VCMAGIC 070702 /* sVr4 new portable archive id CRC */ +#ifdef _PAX_ +#define AVMAGIC "070701" /* ascii string of above */ +#define AVCMAGIC "070702" /* ascii string of above */ +#define VCPIO_PAD(x) ((4 - ((x) & 3)) & 3) /* pad to next 4 byte word */ +#define VCPIO_MASK 0xffffffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ diff --git a/bin/pax/extern.h b/bin/pax/extern.h new file mode 100644 index 0000000..67a21d8 --- /dev/null +++ b/bin/pax/extern.h @@ -0,0 +1,310 @@ +/* $OpenBSD: extern.h,v 1.60 2020/03/23 20:04:19 espie Exp $ */ +/* $NetBSD: extern.h,v 1.5 1996/03/26 23:54:16 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * External references from each source file + */ + +/* + * ar_io.c + */ +extern const char *arcname; +extern const char *gzip_program; +extern int force_one_volume; +int ar_open(const char *); +void ar_close(int _in_sig); +void ar_drain(void); +int ar_set_wr(void); +int ar_app_ok(void); +int ar_read(char *, int); +int ar_write(char *, int); +int ar_rdsync(void); +int ar_fow(off_t, off_t *); +int ar_rev(off_t ); +int ar_next(void); + +/* + * ar_subs.c + */ +extern u_long flcnt; +void list(void); +void extract(void); +void append(void); +void archive(void); +void copy(void); + +/* + * buf_subs.c + */ +extern int blksz; +extern int wrblksz; +extern int maxflt; +extern int rdblksz; +extern off_t wrlimit; +extern off_t rdcnt; +extern off_t wrcnt; +int wr_start(void); +int rd_start(void); +void cp_start(void); +int appnd_start(off_t); +int rd_sync(void); +void pback(char *, int); +int rd_skip(off_t); +void wr_fin(void); +int wr_rdbuf(char *, int); +int rd_wrbuf(char *, int); +int wr_skip(off_t); +int wr_rdfile(ARCHD *, int, off_t *); +int rd_wrfile(ARCHD *, int, off_t *); +void cp_file(ARCHD *, int, int); +int buf_fill(void); +int buf_flush(int); + +/* + * cpio.c + */ +int cpio_strd(void); +int cpio_trail(ARCHD *, char *, int, int *); +int cpio_endwr(void); +int cpio_id(char *, int); +int cpio_rd(ARCHD *, char *); +off_t cpio_endrd(void); +int cpio_stwr(void); +int cpio_wr(ARCHD *); +int vcpio_id(char *, int); +int crc_id(char *, int); +int crc_strd(void); +int vcpio_rd(ARCHD *, char *); +off_t vcpio_endrd(void); +int crc_stwr(void); +int vcpio_wr(ARCHD *); +int bcpio_id(char *, int); +int bcpio_rd(ARCHD *, char *); +off_t bcpio_endrd(void); +int bcpio_wr(ARCHD *); + +/* + * file_subs.c + */ +int file_creat(ARCHD *); +void file_close(ARCHD *, int); +int lnk_creat(ARCHD *); +int cross_lnk(ARCHD *); +int chk_same(ARCHD *); +int node_creat(ARCHD *); +int unlnk_exist(char *, int); +int chk_path(char *, uid_t, gid_t, int); +void set_ftime(const char *, const struct timespec *, + const struct timespec *, int); +void fset_ftime(const char *, int, const struct timespec *, + const struct timespec *, int); +int set_ids(char *, uid_t, gid_t); +int fset_ids(char *, int, uid_t, gid_t); +void set_pmode(char *, mode_t); +void fset_pmode(char *, int, mode_t); +int set_attr(const struct file_times *, int _force_times, mode_t, int _do_mode, + int _in_sig); +int file_write(int, char *, int, int *, int *, int, char *); +void file_flush(int, char *, int); +void rdfile_close(ARCHD *, int *); +int set_crc(ARCHD *, int); + +/* + * ftree.c + */ +int ftree_start(void); +int ftree_add(char *, int); +void ftree_sel(ARCHD *); +void ftree_skipped_newer(ARCHD *); +void ftree_chk(void); +int next_file(ARCHD *); + +/* + * gen_subs.c + */ +void ls_list(ARCHD *, time_t, FILE *); +void ls_tty(ARCHD *); +void safe_print(const char *, FILE *); +u_long asc_ul(char *, int, int); +int ul_asc(u_long, char *, int, int); +unsigned long long asc_ull(char *, int, int); +int ull_asc(unsigned long long, char *, int, int); +size_t fieldcpy(char *, size_t, const char *, size_t); + +/* + * getoldopt.c + */ +int getoldopt(int, char **, const char *); + +/* + * options.c + */ +extern FSUB fsub[]; +extern int ford[]; +void options(int, char **); +OPLIST * opt_next(void); +int opt_add(const char *); +int bad_opt(void); +extern char *chdname; + +/* + * pat_rep.c + */ +int rep_add(char *); +int pat_add(char *, char *); +void pat_chk(void); +int pat_sel(ARCHD *); +int pat_match(ARCHD *); +int mod_name(ARCHD *); +int set_dest(ARCHD *, char *, int); +int has_dotdot(const char *); + +/* + * pax.c + */ +extern int act; +extern FSUB *frmt; +extern int cflag; +extern int cwdfd; +extern int dflag; +extern int iflag; +extern int kflag; +extern int lflag; +extern int nflag; +extern int tflag; +extern int uflag; +extern int vflag; +extern int Dflag; +extern int Hflag; +extern int Lflag; +extern int Nflag; +extern int Xflag; +extern int Yflag; +extern int Zflag; +extern int zeroflag; +extern int vfpart; +extern int patime; +extern int pmtime; +extern int nodirs; +extern int pmode; +extern int pids; +extern int rmleadslash; +extern int exit_val; +extern int docrc; +extern char *dirptr; +extern char *argv0; +extern enum op_mode { OP_PAX, OP_TAR, OP_CPIO } op_mode; +extern FILE *listf; +extern int listfd; +extern char *tempfile; +extern char *tempbase; +extern int havechd; + +void sig_cleanup(int); + +/* + * sel_subs.c + */ +int sel_chk(ARCHD *); +int grp_add(char *); +int usr_add(char *); +int trng_add(char *); + +/* + * tables.c + */ +int lnk_start(void); +int chk_lnk(ARCHD *); +void purg_lnk(ARCHD *); +void lnk_end(void); +int ftime_start(void); +int chk_ftime(ARCHD *); +int sltab_start(void); +int sltab_add_sym(const char *_path, const char *_value, mode_t _mode); +int sltab_add_link(const char *, const struct stat *); +void sltab_process(int _in_sig); +int name_start(void); +int add_name(char *, int, char *); +void sub_name(char *, int *, int); +#ifndef NOCPIO +int dev_start(void); +int add_dev(ARCHD *); +int map_dev(ARCHD *, u_long, u_long); +#else +# define dev_start() 0 +# define add_dev(x) 0 +# define map_dev(x,y,z) 0 +#endif /* NOCPIO */ +int atdir_start(void); +void atdir_end(void); +void add_atdir(char *, dev_t, ino_t, const struct timespec *, + const struct timespec *); +int do_atdir(const char *, dev_t, ino_t); +int dir_start(void); +void add_dir(char *, struct stat *, int); +void delete_dir(dev_t, ino_t); +void proc_dir(int _in_sig); +u_int st_hash(const char *, int, int); + +/* + * tar.c + */ +extern int tar_nodir; +extern char *gnu_name_string, *gnu_link_string; +int tar_endwr(void); +off_t tar_endrd(void); +int tar_trail(ARCHD *, char *, int, int *); +int tar_id(char *, int); +int tar_opt(void); +int tar_rd(ARCHD *, char *); +int tar_wr(ARCHD *); +int ustar_id(char *, int); +int ustar_rd(ARCHD *, char *); +int ustar_wr(ARCHD *); + +/* + * tty_subs.c + */ +int tty_init(void); +void tty_prnt(const char *, ...) + __attribute__((nonnull(1), format(printf, 1, 2))); +int tty_read(char *, int); +void paxwarn(int, const char *, ...) + __attribute__((nonnull(2), format(printf, 2, 3))); +void syswarn(int, int, const char *, ...) + __attribute__((nonnull(3), format(printf, 3, 4))); diff --git a/bin/pax/file_subs.c b/bin/pax/file_subs.c new file mode 100644 index 0000000..2c0994f --- /dev/null +++ b/bin/pax/file_subs.c @@ -0,0 +1,1106 @@ +/* $OpenBSD: file_subs.c,v 1.55 2020/03/23 20:04:19 espie Exp $ */ +/* $NetBSD: file_subs.c,v 1.4 1995/03/21 09:07:18 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "pax.h" +#include "extern.h" + +static int +mk_link(char *, struct stat *, char *, int); + +/* + * routines that deal with file operations such as: creating, removing; + * and setting access modes, uid/gid and times of files + */ + +/* + * file_creat() + * Create and open a file. + * Return: + * file descriptor or -1 for failure + */ + +int +file_creat(ARCHD *arcn) +{ + int fd = -1; + mode_t file_mode; + int oerrno; + + /* + * Assume file doesn't exist, so just try to create it, most times this + * works. We have to take special handling when the file does exist. To + * detect this, we use O_EXCL. For example when trying to create a + * file and a character device or fifo exists with the same name, we + * can accidently open the device by mistake (or block waiting to open). + * If we find that the open has failed, then spend the effort to + * figure out why. This strategy was found to have better average + * performance in common use than checking the file (and the path) + * first with lstat. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_EXCL, + file_mode)) >= 0) + return(fd); + + /* + * the file seems to exist. First we try to get rid of it (found to be + * the second most common failure when traced). If this fails, only + * then we go to the expense to check and create the path to the file + */ + if (unlnk_exist(arcn->name, arcn->type) != 0) + return(-1); + + for (;;) { + /* + * try to open it again, if this fails, check all the nodes in + * the path and give it a final try. if chk_path() finds that + * it cannot fix anything, we will skip the last attempt + */ + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC, + file_mode)) >= 0) + break; + oerrno = errno; + if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid, 0) < 0) { + syswarn(1, oerrno, "Unable to create %s", arcn->name); + return(-1); + } + } + return(fd); +} + +/* + * file_close() + * Close file descriptor to a file just created by pax. Sets modes, + * ownership and times as required. + * Return: + * 0 for success, -1 for failure + */ + +void +file_close(ARCHD *arcn, int fd) +{ + int res = 0; + + if (fd < 0) + return; + + /* + * set owner/groups first as this may strip off mode bits we want + * then set file permission modes. Then set file access and + * modification times. + */ + if (pids) + res = fset_ids(arcn->name, fd, arcn->sb.st_uid, + arcn->sb.st_gid); + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode) + fset_pmode(arcn->name, fd, arcn->sb.st_mode); + if (patime || pmtime) + fset_ftime(arcn->name, fd, &arcn->sb.st_mtim, + &arcn->sb.st_atim, 0); + if (close(fd) == -1) + syswarn(0, errno, "Unable to close file descriptor on %s", + arcn->name); +} + +/* + * lnk_creat() + * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name + * must exist; + * Return: + * 0 if ok, -1 otherwise + */ + +int +lnk_creat(ARCHD *arcn) +{ + struct stat sb; + int res; + + /* + * we may be running as root, so we have to be sure that link target + * is not a directory, so we lstat and check + */ + if (lstat(arcn->ln_name, &sb) == -1) { + syswarn(1,errno,"Unable to link to %s from %s", arcn->ln_name, + arcn->name); + return(-1); + } + + if (S_ISDIR(sb.st_mode)) { + paxwarn(1, "A hard link to the directory %s is not allowed", + arcn->ln_name); + return(-1); + } + + res = mk_link(arcn->ln_name, &sb, arcn->name, 0); + if (res == 0) { + /* check for a hardlink to a placeholder symlink */ + res = sltab_add_link(arcn->name, &sb); + + if (res < 0) { + /* arrgh, it failed, clean up */ + unlink(arcn->name); + } + } + + return (res); +} + +/* + * cross_lnk() + * Create a hard link to arcn->org_name from arcn->name. Only used in copy + * with the -l flag. No warning or error if this does not succeed (we will + * then just create the file) + * Return: + * 1 if copy() should try to create this file node + * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self). + */ + +int +cross_lnk(ARCHD *arcn) +{ + /* + * try to make a link to original file (-l flag in copy mode). make + * sure we do not try to link to directories in case we are running as + * root (and it might succeed). + */ + if (arcn->type == PAX_DIR) + return(1); + return(mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1)); +} + +/* + * chk_same() + * In copy mode if we are not trying to make hard links between the src + * and destinations, make sure we are not going to overwrite ourselves by + * accident. This slows things down a little, but we have to protect all + * those people who make typing errors. + * Return: + * 1 the target does not exist, go ahead and copy + * 0 skip it file exists (-k) or may be the same as source file + */ + +int +chk_same(ARCHD *arcn) +{ + struct stat sb; + + /* + * if file does not exist, return. if file exists and -k, skip it + * quietly + */ + if (lstat(arcn->name, &sb) == -1) + return(1); + if (kflag) + return(0); + + /* + * better make sure the user does not have src == dest by mistake + */ + if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) { + paxwarn(1, "Unable to copy %s, file would overwrite itself", + arcn->name); + return(0); + } + return(1); +} + +/* + * mk_link() + * try to make a hard link between two files. if ign set, we do not + * complain. + * Return: + * 0 if successful (or we are done with this file but no error, such as + * finding the from file exists and the user has set -k). + * 1 when ign was set to indicates we could not make the link but we + * should try to copy/extract the file as that might work (and is an + * allowed option). -1 an error occurred. + */ + +static int +mk_link(char *to, struct stat *to_sb, char *from, int ign) +{ + struct stat sb; + int oerrno; + + /* + * if from file exists, it has to be unlinked to make the link. If the + * file exists and -k is set, skip it quietly + */ + if (lstat(from, &sb) == 0) { + if (kflag) + return(0); + + /* + * make sure it is not the same file, protect the user + */ + if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) { + paxwarn(1, "Unable to link file %s to itself", to); + return(-1); + } + + /* + * try to get rid of the file, based on the type + */ + if (S_ISDIR(sb.st_mode)) { + if (rmdir(from) == -1) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + delete_dir(sb.st_dev, sb.st_ino); + } else if (unlink(from) == -1) { + if (!ign) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + return(1); + } + } + + /* + * from file is gone (or did not exist), try to make the hard link. + * if it fails, check the path and try it again (if chk_path() says to + * try again) + */ + for (;;) { + if (linkat(AT_FDCWD, to, AT_FDCWD, from, 0) == 0) + break; + oerrno = errno; + if (!nodirs && chk_path(from, to_sb->st_uid, to_sb->st_gid, ign) == 0) + continue; + if (!ign) { + syswarn(1, oerrno, "Could not link to %s from %s", to, + from); + return(-1); + } + return(1); + } + + /* + * all right the link was made + */ + return(0); +} + +/* + * node_creat() + * create an entry in the file system (other than a file or hard link). + * If successful, sets uid/gid modes and times as required. + * Return: + * 0 if ok, -1 otherwise + */ + +int +node_creat(ARCHD *arcn) +{ + int res; + int ign = 0; + int oerrno; + int pass = 0; + mode_t file_mode; + struct stat sb; + char target[PATH_MAX]; + char *nm = arcn->name; + int len, defer_pmode = 0; + + /* + * create node based on type, if that fails try to unlink the node and + * try again. finally check the path and try again. As noted in the + * file and link creation routines, this method seems to exhibit the + * best performance in general use workloads. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + + for (;;) { + switch (arcn->type) { + case PAX_DIR: + /* + * If -h (or -L) was given in tar-mode, follow the + * potential symlink chain before trying to create the + * directory. + */ + if (op_mode == OP_TAR && Lflag) { + while (lstat(nm, &sb) == 0 && + S_ISLNK(sb.st_mode)) { + len = readlink(nm, target, + sizeof target - 1); + if (len == -1) { + syswarn(0, errno, + "cannot follow symlink %s in chain for %s", + nm, arcn->name); + res = -1; + goto badlink; + } + target[len] = '\0'; + nm = target; + } + } + res = mkdir(nm, file_mode); + +badlink: + if (ign) + res = 0; + break; + case PAX_CHR: + file_mode |= S_IFCHR; + res = mknod(nm, file_mode, arcn->sb.st_rdev); + break; + case PAX_BLK: + file_mode |= S_IFBLK; + res = mknod(nm, file_mode, arcn->sb.st_rdev); + break; + case PAX_FIF: + res = mkfifo(nm, file_mode); + break; + case PAX_SCK: + /* + * Skip sockets, operation has no meaning under BSD + */ + paxwarn(0, + "%s skipped. Sockets cannot be copied or extracted", + nm); + return(-1); + case PAX_SLK: + if (arcn->ln_name[0] != '/' && + !has_dotdot(arcn->ln_name)) + res = symlink(arcn->ln_name, nm); + else { + /* + * absolute symlinks and symlinks with ".." + * have to be deferred to prevent the archive + * from bootstrapping itself to outside the + * working directory. + */ + res = sltab_add_sym(nm, arcn->ln_name, + arcn->sb.st_mode); + if (res == 0) + defer_pmode = 1; + } + break; + case PAX_CTG: + case PAX_HLK: + case PAX_HRG: + case PAX_REG: + default: + /* + * we should never get here + */ + paxwarn(0, "%s has an unknown file type, skipping", + nm); + return(-1); + } + + /* + * if we were able to create the node break out of the loop, + * otherwise try to unlink the node and try again. if that + * fails check the full path and try a final time. + */ + if (res == 0) + break; + + /* + * we failed to make the node + */ + oerrno = errno; + if ((ign = unlnk_exist(nm, arcn->type)) < 0) + return(-1); + + if (++pass <= 1) + continue; + + if (nodirs || chk_path(nm,arcn->sb.st_uid,arcn->sb.st_gid, 0) < 0) { + syswarn(1, oerrno, "Could not create: %s", nm); + return(-1); + } + } + + /* + * we were able to create the node. set uid/gid, modes and times + */ + if (pids) + res = set_ids(nm, arcn->sb.st_uid, arcn->sb.st_gid); + else + res = 0; + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT any + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode && !defer_pmode) + set_pmode(nm, arcn->sb.st_mode); + + if (arcn->type == PAX_DIR && op_mode != OP_CPIO) { + /* + * Dirs must be processed again at end of extract to set times + * and modes to agree with those stored in the archive. However + * to allow extract to continue, we may have to also set owner + * rights. This allows nodes in the archive that are children + * of this directory to be extracted without failure. Both time + * and modes will be fixed after the entire archive is read and + * before pax exits. To do that safely, we want the dev+ino + * of the directory we created. + */ + if (lstat(nm, &sb) == -1) { + syswarn(0, errno,"Could not access %s (stat)", nm); + } else if (access(nm, R_OK | W_OK | X_OK) == -1) { + /* + * We have to add rights to the dir, so we make + * sure to restore the mode. The mode must be + * restored AS CREATED and not as stored if + * pmode is not set. + */ + set_pmode(nm, + ((sb.st_mode & FILEBITS) | S_IRWXU)); + if (!pmode) + arcn->sb.st_mode = sb.st_mode; + + /* + * we have to force the mode to what was set + * here, since we changed it from the default + * as created. + */ + arcn->sb.st_dev = sb.st_dev; + arcn->sb.st_ino = sb.st_ino; + add_dir(nm, &(arcn->sb), 1); + } else if (pmode || patime || pmtime) { + arcn->sb.st_dev = sb.st_dev; + arcn->sb.st_ino = sb.st_ino; + add_dir(nm, &(arcn->sb), 0); + } + } else if (patime || pmtime) + set_ftime(nm, &arcn->sb.st_mtim, &arcn->sb.st_atim, 0); + return(0); +} + +/* + * unlnk_exist() + * Remove node from file system with the specified name. We pass the type + * of the node that is going to replace it. When we try to create a + * directory and find that it already exists, we allow processing to + * continue as proper modes etc will always be set for it later on. + * Return: + * 0 is ok to proceed, no file with the specified name exists + * -1 we were unable to remove the node, or we should not remove it (-k) + * 1 we found a directory and we were going to create a directory. + */ + +int +unlnk_exist(char *name, int type) +{ + struct stat sb; + + /* + * the file does not exist, or -k we are done + */ + if (lstat(name, &sb) == -1) + return(0); + if (kflag) + return(-1); + + if (S_ISDIR(sb.st_mode)) { + /* + * try to remove a directory, if it fails and we were going to + * create a directory anyway, tell the caller (return a 1) + */ + if (rmdir(name) == -1) { + if (type == PAX_DIR) + return(1); + syswarn(1,errno,"Unable to remove directory %s", name); + return(-1); + } + delete_dir(sb.st_dev, sb.st_ino); + return(0); + } + + /* + * try to get rid of all non-directory type nodes + */ + if (unlink(name) == -1) { + syswarn(1, errno, "Could not unlink %s", name); + return(-1); + } + return(0); +} + +/* + * chk_path() + * We were trying to create some kind of node in the file system and it + * failed. chk_path() makes sure the path up to the node exists and is + * writeable. When we have to create a directory that is missing along the + * path somewhere, the directory we create will be set to the same + * uid/gid as the file has (when uid and gid are being preserved). + * NOTE: this routine is a real performance loss. It is only used as a + * last resort when trying to create entries in the file system. + * Return: + * -1 when it could find nothing it is allowed to fix. + * 0 otherwise + */ + +int +chk_path(char *name, uid_t st_uid, gid_t st_gid, int ign) +{ + char *spt = name; + char *next; + struct stat sb; + int retval = -1; + + /* + * watch out for paths with nodes stored directly in / (e.g. /bozo) + */ + while (*spt == '/') + ++spt; + + for (;;) { + /* + * work forward from the first / and check each part of the path + */ + spt = strchr(spt, '/'); + if (spt == NULL) + break; + + /* + * skip over duplicate slashes; stop if there're only + * trailing slashes left + */ + next = spt + 1; + while (*next == '/') + next++; + if (*next == '\0') + break; + + *spt = '\0'; + + /* + * if it exists we assume it is a directory, it is not within + * the spec (at least it seems to read that way) to alter the + * file system for nodes NOT EXPLICITLY stored on the archive. + * If that assumption is changed, you would test the node here + * and figure out how to get rid of it (probably like some + * recursive unlink()) or fix up the directory permissions if + * required (do an access()). + */ + if (lstat(name, &sb) == 0) { + *spt = '/'; + spt = next; + continue; + } + + /* + * the path fails at this point, see if we can create the + * needed directory and continue on + */ + if (mkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) == -1) { + if (!ign) + syswarn(1, errno, "Unable to mkdir %s", name); + *spt = '/'; + retval = -1; + break; + } + + /* + * we were able to create the directory. We will tell the + * caller that we found something to fix, and it is ok to try + * and create the node again. + */ + retval = 0; + if (pids) + (void)set_ids(name, st_uid, st_gid); + + /* + * make sure the user doesn't have some strange umask that + * causes this newly created directory to be unusable. We fix + * the modes and restore them back to the creation default at + * the end of pax + */ + if ((access(name, R_OK | W_OK | X_OK) == -1) && + (lstat(name, &sb) == 0)) { + set_pmode(name, ((sb.st_mode & FILEBITS) | S_IRWXU)); + add_dir(name, &sb, 1); + } + *spt = '/'; + spt = next; + continue; + } + return(retval); +} + +/* + * set_ftime() + * Set the access time and modification time for a named file. If frc + * is non-zero we force these times to be set even if the user did not + * request access and/or modification time preservation (this is also + * used by -t to reset access times). + * When ign is zero, only those times the user has asked for are set, the + * other ones are left alone. + */ + +void +set_ftime(const char *fnm, const struct timespec *mtimp, + const struct timespec *atimp, int frc) +{ + struct timespec tv[2]; + + tv[0] = *atimp; + tv[1] = *mtimp; + + if (!frc) { + /* + * if we are not forcing, only set those times the user wants + * set. + */ + if (!patime) + tv[0].tv_nsec = UTIME_OMIT; + if (!pmtime) + tv[1].tv_nsec = UTIME_OMIT; + } + + /* + * set the times + */ + if (utimensat(AT_FDCWD, fnm, tv, AT_SYMLINK_NOFOLLOW) < 0) + syswarn(1, errno, "Access/modification time set failed on: %s", + fnm); +} + +void +fset_ftime(const char *fnm, int fd, const struct timespec *mtimp, + const struct timespec *atimp, int frc) +{ + struct timespec tv[2]; + + + tv[0] = *atimp; + tv[1] = *mtimp; + + if (!frc) { + /* + * if we are not forcing, only set those times the user wants + * set. + */ + if (!patime) + tv[0].tv_nsec = UTIME_OMIT; + if (!pmtime) + tv[1].tv_nsec = UTIME_OMIT; + } + /* + * set the times + */ + if (futimens(fd, tv) == -1) + syswarn(1, errno, "Access/modification time set failed on: %s", + fnm); +} + +/* + * set_ids() + * set the uid and gid of a file system node + * Return: + * 0 when set, -1 on failure + */ + +int +set_ids(char *fnm, uid_t uid, gid_t gid) +{ + if (fchownat(AT_FDCWD, fnm, uid, gid, AT_SYMLINK_NOFOLLOW) == -1) { + /* + * ignore EPERM unless in verbose mode or being run by root. + * if running as pax, POSIX requires a warning. + */ + if (op_mode == OP_PAX || errno != EPERM || vflag || + geteuid() == 0) + syswarn(1, errno, "Unable to set file uid/gid of %s", + fnm); + return(-1); + } + return(0); +} + +int +fset_ids(char *fnm, int fd, uid_t uid, gid_t gid) +{ + if (fchown(fd, uid, gid) == -1) { + /* + * ignore EPERM unless in verbose mode or being run by root. + * if running as pax, POSIX requires a warning. + */ + if (op_mode == OP_PAX || errno != EPERM || vflag || + geteuid() == 0) + syswarn(1, errno, "Unable to set file uid/gid of %s", + fnm); + return(-1); + } + return(0); +} + +/* + * set_pmode() + * Set file access mode + */ + +void +set_pmode(char *fnm, mode_t mode) +{ + mode &= ABITS; + if (fchmodat(AT_FDCWD, fnm, mode, AT_SYMLINK_NOFOLLOW) == -1 && errno != EOPNOTSUPP) + syswarn(1, errno, "Could not set permissions on %s", fnm); +} + +void +fset_pmode(char *fnm, int fd, mode_t mode) +{ + mode &= ABITS; + if (fchmod(fd, mode) == -1) + syswarn(1, errno, "Could not set permissions on %s", fnm); +} + +/* + * set_attr() + * Given a DIRDATA, restore the mode and times as indicated, but + * only after verifying that it's the directory that we wanted. + */ +int +set_attr(const struct file_times *ft, int force_times, mode_t mode, + int do_mode, int in_sig) +{ + struct stat sb; + int fd, r; + + if (!do_mode && !force_times && !patime && !pmtime) + return (0); + + /* + * We could legitimately go through a symlink here, + * so do *not* use O_NOFOLLOW. The dev+ino check will + * protect us from evil. + */ + fd = open(ft->ft_name, O_RDONLY | O_DIRECTORY); + if (fd == -1) { + if (!in_sig) + syswarn(1, errno, "Unable to restore mode and times" + " for directory: %s", ft->ft_name); + return (-1); + } + + if (fstat(fd, &sb) == -1) { + if (!in_sig) + syswarn(1, errno, "Unable to stat directory: %s", + ft->ft_name); + r = -1; + } else if (ft->ft_ino != sb.st_ino || ft->ft_dev != sb.st_dev) { + if (!in_sig) + paxwarn(1, "Directory vanished before restoring" + " mode and times: %s", ft->ft_name); + r = -1; + } else { + /* Whew, it's a match! Is there anything to change? */ + if (do_mode && (mode & ABITS) != (sb.st_mode & ABITS)) + fset_pmode(ft->ft_name, fd, mode); + if (((force_times || patime) && + timespeccmp(&ft->ft_atim, &sb.st_atim, !=)) || + ((force_times || pmtime) && + timespeccmp(&ft->ft_mtim, &sb.st_mtim, !=))) + fset_ftime(ft->ft_name, fd, &ft->ft_mtim, + &ft->ft_atim, force_times); + r = 0; + } + close(fd); + + return (r); +} + + +/* + * file_write() + * Write/copy a file (during copy or archive extract). This routine knows + * how to copy files with lseek holes in it. (Which are read as file + * blocks containing all 0's but do not have any file blocks associated + * with the data). Typical examples of these are files created by dbm + * variants (.pag files). While the file size of these files are huge, the + * actual storage is quite small (the files are sparse). The problem is + * the holes read as all zeros so are probably stored on the archive that + * way (there is no way to determine if the file block is really a hole, + * we only know that a file block of all zero's can be a hole). + * At this writing, no major archive format knows how to archive files + * with holes. However, on extraction (or during copy, -rw) we have to + * deal with these files. Without detecting the holes, the files can + * consume a lot of file space if just written to disk. This replacement + * for write when passed the basic allocation size of a file system block, + * uses lseek whenever it detects the input data is all 0 within that + * file block. In more detail, the strategy is as follows: + * While the input is all zero keep doing an lseek. Keep track of when we + * pass over file block boundaries. Only write when we hit a non zero + * input. once we have written a file block, we continue to write it to + * the end (we stop looking at the input). When we reach the start of the + * next file block, start checking for zero blocks again. Working on file + * block boundaries significantly reduces the overhead when copying files + * that are NOT very sparse. This overhead (when compared to a write) is + * almost below the measurement resolution on many systems. Without it, + * files with holes cannot be safely copied. It does has a side effect as + * it can put holes into files that did not have them before, but that is + * not a problem since the file contents are unchanged (in fact it saves + * file space). (Except on paging files for diskless clients. But since we + * cannot determine one of those file from here, we ignore them). If this + * ever ends up on a system where CTG files are supported and the holes + * are not desired, just do a conditional test in those routines that + * call file_write() and have it call write() instead. BEFORE CLOSING THE + * FILE, make sure to call file_flush() when the last write finishes with + * an empty block. A lot of file systems will not create an lseek hole at + * the end. In this case we drop a single 0 at the end to force the + * trailing 0's in the file. + * ---Parameters--- + * rem: how many bytes left in this file system block + * isempt: have we written to the file block yet (is it empty) + * sz: basic file block allocation size + * cnt: number of bytes on this write + * str: buffer to write + * Return: + * number of bytes written, -1 on write (or lseek) error. + */ + +int +file_write(int fd, char *str, int cnt, int *rem, int *isempt, int sz, + char *name) +{ + char *pt; + char *end; + int wcnt; + char *st = str; + + /* + * while we have data to process + */ + while (cnt) { + if (!*rem) { + /* + * We are now at the start of file system block again + * (or what we think one is...). start looking for + * empty blocks again + */ + *isempt = 1; + *rem = sz; + } + + /* + * only examine up to the end of the current file block or + * remaining characters to write, whatever is smaller + */ + wcnt = MINIMUM(cnt, *rem); + cnt -= wcnt; + *rem -= wcnt; + if (*isempt) { + /* + * have not written to this block yet, so we keep + * looking for zero's + */ + pt = st; + end = st + wcnt; + + /* + * look for a zero filled buffer + */ + while ((pt < end) && (*pt == '\0')) + ++pt; + + if (pt == end) { + /* + * skip, buf is empty so far + */ + if (fd > -1 && + lseek(fd, wcnt, SEEK_CUR) < 0) { + syswarn(1,errno,"File seek on %s", + name); + return(-1); + } + st = pt; + continue; + } + /* + * drat, the buf is not zero filled + */ + *isempt = 0; + } + + /* + * have non-zero data in this file system block, have to write + */ + if (write(fd, st, wcnt) != wcnt) { + syswarn(1, errno, "Failed write to file %s", name); + return(-1); + } + st += wcnt; + } + return(st - str); +} + +/* + * file_flush() + * when the last file block in a file is zero, many file systems will not + * let us create a hole at the end. To get the last block with zeros, we + * write the last BYTE with a zero (back up one byte and write a zero). + */ + +void +file_flush(int fd, char *fname, int isempt) +{ + static char blnk[] = "\0"; + + /* + * silly test, but make sure we are only called when the last block is + * filled with all zeros. + */ + if (!isempt) + return; + + /* + * move back one byte and write a zero + */ + if (lseek(fd, -1, SEEK_CUR) < 0) { + syswarn(1, errno, "Failed seek on file %s", fname); + return; + } + + if (write(fd, blnk, 1) == -1) + syswarn(1, errno, "Failed write to file %s", fname); +} + +/* + * rdfile_close() + * close a file we have been reading (to copy or archive). If we have to + * reset access time (tflag) do so (the times are stored in arcn). + */ + +void +rdfile_close(ARCHD *arcn, int *fd) +{ + /* + * make sure the file is open + */ + if (*fd < 0) + return; + + /* + * user wants last access time reset + */ + if (tflag) + fset_ftime(arcn->org_name, *fd, &arcn->sb.st_mtim, + &arcn->sb.st_atim, 1); + + (void)close(*fd); + *fd = -1; +} + +/* + * set_crc() + * read a file to calculate its crc. This is a real drag. Archive formats + * that have this, end up reading the file twice (we have to write the + * header WITH the crc before writing the file contents. Oh well... + * Return: + * 0 if was able to calculate the crc, -1 otherwise + */ + +int +set_crc(ARCHD *arcn, int fd) +{ + int i; + int res; + off_t cpcnt = 0; + size_t size; + u_int32_t crc = 0; + char tbuf[FILEBLK]; + struct stat sb; + + if (fd < 0) { + /* + * hmm, no fd, should never happen. well no crc then. + */ + arcn->crc = 0; + return(0); + } + + if ((size = arcn->sb.st_blksize) > sizeof(tbuf)) + size = sizeof(tbuf); + + /* + * read all the bytes we think that there are in the file. If the user + * is trying to archive an active file, forget this file. + */ + for (;;) { + if ((res = read(fd, tbuf, size)) <= 0) + break; + cpcnt += res; + for (i = 0; i < res; ++i) + crc += (tbuf[i] & 0xff); + } + + /* + * safety check. we want to avoid archiving files that are active as + * they can create inconsistent archive copies. + */ + if (cpcnt != arcn->sb.st_size) + paxwarn(1, "File changed size %s", arcn->org_name); + else if (fstat(fd, &sb) == -1) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=)) + paxwarn(1, "File %s was modified during read", arcn->org_name); + else if (lseek(fd, 0, SEEK_SET) < 0) + syswarn(1, errno, "File rewind failed on: %s", arcn->org_name); + else { + arcn->crc = crc; + return(0); + } + return(-1); +} diff --git a/bin/pax/ftree.c b/bin/pax/ftree.c new file mode 100644 index 0000000..b780dbb --- /dev/null +++ b/bin/pax/ftree.c @@ -0,0 +1,566 @@ +/* $OpenBSD: ftree.c,v 1.42 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: ftree.c,v 1.4 1995/03/21 09:07:21 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fts.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * Data structure used to store the file args to be handed to fts(). + * It keeps track of which args generated a "selected" member. + */ +typedef struct ftree { + char *fname; /* file tree name */ + int refcnt; /* has tree had a selected file? */ + int newercnt; /* skipped due to -u/-D */ + int chflg; /* change directory flag */ + struct ftree *fow; /* pointer to next entry on list */ +} FTREE; + + +/* + * routines to interface with the fts library function. + * + * file args supplied to pax are stored on a single linked list (of type FTREE) + * and given to fts to be processed one at a time. pax "selects" files from + * the expansion of each arg into the corresponding file tree (if the arg is a + * directory, otherwise the node itself is just passed to pax). The selection + * is modified by the -n and -u flags. The user is informed when a specific + * file arg does not generate any selected files. -n keeps expanding the file + * tree arg until one of its files is selected, then skips to the next file + * arg. when the user does not supply the file trees as command line args to + * pax, they are read from stdin + */ + +static FTS *ftsp = NULL; /* current FTS handle */ +static int ftsopts; /* options to be used on fts_open */ +static char *farray[2]; /* array for passing each arg to fts */ +static FTREE *fthead = NULL; /* head of linked list of file args */ +static FTREE *fttail = NULL; /* tail of linked list of file args */ +static FTREE *ftcur = NULL; /* current file arg being processed */ +static FTSENT *ftent = NULL; /* current file tree entry */ +static int ftree_skip; /* when set skip to next file arg */ + +static int ftree_arg(void); +static char *getpathname(char *, int); + +/* + * ftree_start() + * initialize the options passed to fts_open() during this run of pax + * options are based on the selection of pax options by the user + * fts_start() also calls fts_arg() to open the first valid file arg. We + * also attempt to reset directory access times when -t (tflag) is set. + * Return: + * 0 if there is at least one valid file arg to process, -1 otherwise + */ + +int +ftree_start(void) +{ + /* + * set up the operation mode of fts, open the first file arg. We must + * use FTS_NOCHDIR, as the user may have to open multiple archives and + * if fts did a chdir off into the boondocks, we may create an archive + * volume in an place where the user did not expect to. + */ + ftsopts = FTS_NOCHDIR; + + /* + * optional user flags that effect file traversal + * -H command line symlink follow only (half follow) + * -L follow sylinks (logical) + * -P do not follow sylinks (physical). This is the default. + * -X do not cross over mount points + * -t preserve access times on files read. + * -n select only the first member of a file tree when a match is found + * -d do not extract subtrees rooted at a directory arg. + */ + if (Lflag) + ftsopts |= FTS_LOGICAL; + else + ftsopts |= FTS_PHYSICAL; + if (Hflag) + ftsopts |= FTS_COMFOLLOW; + if (Xflag) + ftsopts |= FTS_XDEV; + + if ((fthead == NULL) && ((farray[0] = malloc(PAXPATHLEN+2)) == NULL)) { + paxwarn(1, "Unable to allocate memory for file name buffer"); + return(-1); + } + + if (ftree_arg() < 0) + return(-1); + if (tflag && (atdir_start() < 0)) + return(-1); + return(0); +} + +/* + * ftree_add() + * add the arg to the linked list of files to process. Each will be + * processed by fts one at a time + * Return: + * 0 if added to the linked list, -1 if failed + */ + +int +ftree_add(char *str, int chflg) +{ + FTREE *ft; + int len; + + /* + * simple check for bad args + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(0, "Invalid file name argument"); + return(-1); + } + + /* + * allocate FTREE node and add to the end of the linked list (args are + * processed in the same order they were passed to pax). Get rid of any + * trailing / the user may pass us. (watch out for / by itself). + */ + if ((ft = malloc(sizeof(FTREE))) == NULL) { + paxwarn(0, "Unable to allocate memory for filename"); + return(-1); + } + + if (((len = strlen(str) - 1) > 0) && (str[len] == '/')) + str[len] = '\0'; + ft->fname = str; + ft->refcnt = 0; + ft->newercnt = 0; + ft->chflg = chflg; + ft->fow = NULL; + if (fthead == NULL) { + fttail = fthead = ft; + return(0); + } + fttail->fow = ft; + fttail = ft; + return(0); +} + +/* + * ftree_sel() + * this entry has been selected by pax. bump up reference count and handle + * -n and -d processing. + */ + +void +ftree_sel(ARCHD *arcn) +{ + /* + * set reference bit for this pattern. This linked list is only used + * when file trees are supplied pax as args. The list is not used when + * the trees are read from stdin. + */ + if (ftcur != NULL) + ftcur->refcnt = 1; + + /* + * if -n we are done with this arg, force a skip to the next arg when + * pax asks for the next file in next_file(). + * if -d we tell fts only to match the directory (if the arg is a dir) + * and not the entire file tree rooted at that point. + */ + if (nflag) + ftree_skip = 1; + + if (!dflag || (arcn->type != PAX_DIR)) + return; + + if (ftent != NULL) + (void)fts_set(ftsp, ftent, FTS_SKIP); +} + +/* + * ftree_skipped_newer() + * file has been skipped because a newer file exists and -u/-D given + */ + +void +ftree_skipped_newer(ARCHD *arcn) +{ + /* skipped due to -u/-D, mark accordingly */ + if (ftcur != NULL) + ftcur->newercnt = 1; +} + +/* + * ftree_chk() + * called at end on pax execution. Prints all those file args that did not + * have a selected member (reference count still 0) + */ + +void +ftree_chk(void) +{ + FTREE *ft; + int wban = 0; + + /* + * make sure all dir access times were reset. + */ + if (tflag) + atdir_end(); + + /* + * walk down list and check reference count. Print out those members + * that never had a match + */ + for (ft = fthead; ft != NULL; ft = ft->fow) { + if ((ft->refcnt > 0) || ft->newercnt > 0 || ft->chflg) + continue; + if (wban == 0) { + paxwarn(1,"WARNING! These file names were not selected:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", ft->fname); + } +} + +/* + * ftree_arg() + * Get the next file arg for fts to process. Can be from either the linked + * list or read from stdin when the user did not them as args to pax. Each + * arg is processed until the first successful fts_open(). + * Return: + * 0 when the next arg is ready to go, -1 if out of file args (or EOF on + * stdin). + */ + +static int +ftree_arg(void) +{ + + /* + * close off the current file tree + */ + if (ftsp != NULL) { + (void)fts_close(ftsp); + ftsp = NULL; + } + + /* + * keep looping until we get a valid file tree to process. Stop when we + * reach the end of the list (or get an eof on stdin) + */ + for (;;) { + if (fthead == NULL) { + /* + * the user didn't supply any args, get the file trees + * to process from stdin; + */ + if (getpathname(farray[0], PAXPATHLEN+1) == NULL) + return(-1); + } else { + /* + * the user supplied the file args as arguments to pax + */ + if (ftcur == NULL) + ftcur = fthead; + else if ((ftcur = ftcur->fow) == NULL) + return(-1); + if (ftcur->chflg) { + /* First fchdir() back... */ + if (fchdir(cwdfd) == -1) { + syswarn(1, errno, + "Can't fchdir to starting directory"); + return(-1); + } + if (chdir(ftcur->fname) == -1) { + syswarn(1, errno, "Can't chdir to %s", + ftcur->fname); + return(-1); + } + continue; + } else + farray[0] = ftcur->fname; + } + + /* + * watch it, fts wants the file arg stored in a array of char + * ptrs, with the last one a null. we use a two element array + * and set farray[0] to point at the buffer with the file name + * in it. We cannot pass all the file args to fts at one shot + * as we need to keep a handle on which file arg generates what + * files (the -n and -d flags need this). If the open is + * successful, return a 0. + */ + if ((ftsp = fts_open(farray, ftsopts, NULL)) != NULL) + break; + } + return(0); +} + +/* + * next_file() + * supplies the next file to process in the supplied archd structure. + * Return: + * 0 when contents of arcn have been set with the next file, -1 when done. + */ + +int +next_file(ARCHD *arcn) +{ + int cnt; + + /* + * ftree_sel() might have set the ftree_skip flag if the user has the + * -n option and a file was selected from this file arg tree. (-n says + * only one member is matched for each pattern) ftree_skip being 1 + * forces us to go to the next arg now. + */ + if (ftree_skip) { + /* + * clear and go to next arg + */ + ftree_skip = 0; + if (ftree_arg() < 0) + return(-1); + } + + /* + * loop until we get a valid file to process + */ + for (;;) { + if ((ftent = fts_read(ftsp)) == NULL) { + if (errno) + syswarn(1, errno, "next_file"); + /* + * out of files in this tree, go to next arg, if none + * we are done + */ + if (ftree_arg() < 0) + return(-1); + continue; + } + + /* + * handle each type of fts_read() flag + */ + switch (ftent->fts_info) { + case FTS_D: + case FTS_DEFAULT: + case FTS_F: + case FTS_SL: + case FTS_SLNONE: + /* + * these are all ok + */ + break; + case FTS_DP: + /* + * already saw this directory. If the user wants file + * access times reset, we use this to restore the + * access time for this directory since this is the + * last time we will see it in this file subtree + * remember to force the time (this is -t on a read + * directory, not a created directory). + */ + if (!tflag) + continue; + do_atdir(ftent->fts_path, ftent->fts_statp->st_dev, + ftent->fts_statp->st_ino); + continue; + case FTS_DC: + /* + * fts claims a file system cycle + */ + paxwarn(1,"File system cycle found at %s",ftent->fts_path); + continue; + case FTS_DNR: + syswarn(1, ftent->fts_errno, + "Unable to read directory %s", ftent->fts_path); + continue; + case FTS_ERR: + syswarn(1, ftent->fts_errno, + "File system traversal error"); + continue; + case FTS_NS: + case FTS_NSOK: + syswarn(1, ftent->fts_errno, + "Unable to access %s", ftent->fts_path); + continue; + } + + /* + * ok got a file tree node to process. copy info into arcn + * structure (initialize as required) + */ + arcn->skip = 0; + arcn->pad = 0; + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + memcpy(&arcn->sb, ftent->fts_statp, sizeof(arcn->sb)); + + /* + * file type based set up and copy into the arcn struct + * SIDE NOTE: + * we try to reset the access time on all files and directories + * we may read when the -t flag is specified. files are reset + * when we close them after copying. we reset the directories + * when we are done with their file tree (we also clean up at + * end in case we cut short a file tree traversal). However + * there is no way to reset access times on symlinks. + */ + switch (S_IFMT & arcn->sb.st_mode) { + case S_IFDIR: + arcn->type = PAX_DIR; + if (!tflag) + break; + add_atdir(ftent->fts_path, arcn->sb.st_dev, + arcn->sb.st_ino, &arcn->sb.st_mtim, + &arcn->sb.st_atim); + break; + case S_IFCHR: + arcn->type = PAX_CHR; + break; + case S_IFBLK: + arcn->type = PAX_BLK; + break; + case S_IFREG: + /* + * only regular files with have data to store on the + * archive. all others will store a zero length skip. + * the skip field is used by pax for actual data it has + * to read (or skip over). + */ + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + case S_IFLNK: + arcn->type = PAX_SLK; + /* + * have to read the symlink path from the file + */ + if ((cnt = readlink(ftent->fts_path, arcn->ln_name, + PAXPATHLEN)) == -1) { + syswarn(1, errno, "Unable to read symlink %s", + ftent->fts_path); + continue; + } + /* + * set link name length, watch out readlink does not + * NUL terminate the link path + */ + arcn->ln_name[cnt] = '\0'; + arcn->ln_nlen = cnt; + break; + case S_IFSOCK: + /* + * under BSD storing a socket is senseless but we will + * let the format specific write function make the + * decision of what to do with it. + */ + arcn->type = PAX_SCK; + break; + case S_IFIFO: + arcn->type = PAX_FIF; + break; + } + break; + } + + /* + * copy file name, set file name length + */ + arcn->nlen = strlcpy(arcn->name, ftent->fts_path, sizeof(arcn->name)); + if ((size_t)arcn->nlen >= sizeof(arcn->name)) + arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ + arcn->org_name = ftent->fts_path; + return(0); +} + +/* + * getpathname() + * Reads a pathname from stdin, handling NUL- or newline-termination. + * Return: + * NULL at end of file, otherwise the NUL-terminated buffer. + */ + +static char * +getpathname(char *buf, int buflen) +{ + char *bp, *ep; + int ch, term; + + if (zeroflag) { + /* + * Read a NUL-terminated pathname, being especially + * paranoid about proper termination and pathname length. + */ + for (bp = buf, ep = buf + buflen; bp < ep; bp++) { + if ((ch = getchar()) == EOF) { + if (bp != buf) + paxwarn(1, "Ignoring unterminated " + "pathname at EOF"); + return(NULL); + } + if ((*bp = ch) == '\0') + return(buf); + } + /* Too long - skip this path */ + *--bp = '\0'; + term = '\0'; + } else { + if (fgets(buf, buflen, stdin) == NULL) + return(NULL); + if ((bp = strchr(buf, '\n')) != NULL || feof(stdin)) { + if (bp != NULL) + *bp = '\0'; + return(buf); + } + /* Too long - skip this path */ + term = '\n'; + } + while ((ch = getchar()) != term && ch != EOF) + continue; + paxwarn(1, "Ignoring too-long pathname: %s", buf); + return(NULL); +} diff --git a/bin/pax/gen_subs.c b/bin/pax/gen_subs.c new file mode 100644 index 0000000..7eb8200 --- /dev/null +++ b/bin/pax/gen_subs.c @@ -0,0 +1,401 @@ +/* $OpenBSD: gen_subs.c,v 1.32 2016/08/26 05:06:14 guenther Exp $ */ +/* $NetBSD: gen_subs.c,v 1.5 1995/03/21 09:07:26 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <grp.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <utmp.h> +#include <vis.h> +#ifndef major +#include <sys/sysmacros.h> +#endif + +#include "pax.h" +#include "extern.h" + +/* + * a collection of general purpose subroutines used by pax + */ + +/* + * constants used by ls_list() when printing out archive members + */ +#define MODELEN 20 +#define DATELEN 64 +#define SECSPERDAY (24 * 60 * 60) +#define SIXMONTHS (SECSPERDAY * 365 / 2) +#define CURFRMT "%b %e %H:%M" +#define OLDFRMT "%b %e %Y" +#define NAME_WIDTH 8 +#define TIMEFMT(t, now) \ + (((t) + SIXMONTHS <= (now) || (t) > (now)) ? OLDFRMT : CURFRMT) + +/* + * ls_list() + * list the members of an archive in ls format + */ + +void +ls_list(ARCHD *arcn, time_t now, FILE *fp) +{ + struct stat *sbp; + char f_mode[MODELEN]; + char f_date[DATELEN]; + int term; + + term = zeroflag ? '\0' : '\n'; /* path termination character */ + + /* + * if not verbose, just print the file name + */ + if (!vflag) { + if (zeroflag) + (void)fputs(arcn->name, fp); + else + safe_print(arcn->name, fp); + (void)putc(term, fp); + (void)fflush(fp); + return; + } + + /* + * user wants long mode + */ + sbp = &(arcn->sb); + strmode(sbp->st_mode, f_mode); + + /* + * print file mode, link count, uid, gid and time + */ + if (strftime(f_date, sizeof(f_date), TIMEFMT(sbp->st_mtime, now), + localtime(&(sbp->st_mtime))) == 0) + f_date[0] = '\0'; + (void)fprintf(fp, "%s%2u %-*.*s %-*.*s ", f_mode, (unsigned)sbp->st_nlink, + NAME_WIDTH, UT_NAMESIZE, user_from_uid(sbp->st_uid, 0), + NAME_WIDTH, UT_NAMESIZE, group_from_gid(sbp->st_gid, 0)); + + /* + * print device id's for devices, or sizes for other nodes + */ + if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK)) + (void)fprintf(fp, "%4lu, %4lu ", + (unsigned long)MAJOR(sbp->st_rdev), + (unsigned long)MINOR(sbp->st_rdev)); + else { + (void)fprintf(fp, "%9zu ", sbp->st_size); + } + + /* + * print name and link info for hard and soft links + */ + (void)fputs(f_date, fp); + (void)putc(' ', fp); + safe_print(arcn->name, fp); + if (PAX_IS_HARDLINK(arcn->type)) { + fputs(" == ", fp); + safe_print(arcn->ln_name, fp); + } else if (arcn->type == PAX_SLK) { + fputs(" -> ", fp); + safe_print(arcn->ln_name, fp); + } + (void)putc(term, fp); + (void)fflush(fp); +} + +/* + * tty_ls() + * print a short summary of file to tty. + */ + +void +ls_tty(ARCHD *arcn) +{ + char f_date[DATELEN]; + char f_mode[MODELEN]; + time_t now = time(NULL); + + /* + * convert time to string, and print + */ + if (strftime(f_date, DATELEN, TIMEFMT(arcn->sb.st_mtime, now), + localtime(&(arcn->sb.st_mtime))) == 0) + f_date[0] = '\0'; + strmode(arcn->sb.st_mode, f_mode); + tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name); +} + +void +safe_print(const char *str, FILE *fp) +{ + char visbuf[5]; + const char *cp; + + /* + * if printing to a tty, use vis(3) to print special characters. + */ + if (isatty(fileno(fp))) { + for (cp = str; *cp; cp++) { + (void)vis(visbuf, cp[0], VIS_CSTYLE, cp[1]); + (void)fputs(visbuf, fp); + } + } else { + (void)fputs(str, fp); + } +} + +/* + * asc_ul() + * convert hex/octal character string into a u_long. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * unsigned long value + */ + +u_long +asc_ul(char *str, int len, int base) +{ + char *stop; + u_long tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * ul_asc() + * convert an unsigned long into an hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +ul_asc(u_long val, char *str, int len, int base) +{ + char *pt; + u_long digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + val >>= 4; + if (val == 0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * asc_ull() + * Convert hex/octal character string into a unsigned long long. + * We do not have to check for overflow! (The headers in all + * supported formats are not large enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * unsigned long long value + */ + +unsigned long long +asc_ull(char *str, int len, int base) +{ + char *stop; + unsigned long long tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * ull_asc() + * Convert an unsigned long long into a hex/oct ascii string. + * Pads with LEADING ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +ull_asc(unsigned long long val, char *str, int len, int base) +{ + char *pt; + unsigned long long digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + val >>= 4; + if (val == 0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * Copy at max min(bufz, fieldsz) chars from field to buf, stopping + * at the first NUL char. NUL terminate buf if there is room left. + */ +size_t +fieldcpy(char *buf, size_t bufsz, const char *field, size_t fieldsz) +{ + char *p = buf; + const char *q = field; + size_t i = 0; + + if (fieldsz > bufsz) + fieldsz = bufsz; + while (i < fieldsz && *q != '\0') { + *p++ = *q++; + i++; + } + if (i < bufsz) + *p = '\0'; + return(i); +} diff --git a/bin/pax/getoldopt.c b/bin/pax/getoldopt.c new file mode 100644 index 0000000..8ceb189 --- /dev/null +++ b/bin/pax/getoldopt.c @@ -0,0 +1,69 @@ +/* $OpenBSD: getoldopt.c,v 1.9 2009/10/27 23:59:22 deraadt Exp $ */ +/* $NetBSD: getoldopt.c,v 1.3 1995/03/21 09:07:28 cgd Exp $ */ + +/* + * Plug-compatible replacement for getopt() for parsing tar-like + * arguments. If the first argument begins with "-", it uses getopt; + * otherwise, it uses the old rules used by tar, dump, and ps. + * + * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu) and placed + * in the Public Domain for your edification and enjoyment. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include "pax.h" +#include "extern.h" + +int +getoldopt(int argc, char **argv, const char *optstring) +{ + static char *key; /* Points to next keyletter */ + static char use_getopt; /* !=0 if argv[1][0] was '-' */ + char c; + char *place; + + optarg = NULL; + + if (key == NULL) { /* First time */ + if (argc < 2) + return (-1); + key = argv[1]; + if (*key == '-') + use_getopt++; + else + optind = 2; + } + + if (use_getopt) + return (getopt(argc, argv, optstring)); + + c = *key++; + if (c == '\0') { + key--; + return (-1); + } + place = strchr(optstring, c); + + if (place == NULL || c == ':') { + fprintf(stderr, "%s: unknown option %c\n", argv[0], c); + return ('?'); + } + + place++; + if (*place == ':') { + if (optind < argc) { + optarg = argv[optind]; + optind++; + } else { + fprintf(stderr, "%s: %c argument missing\n", + argv[0], c); + return ('?'); + } + } + + return (c); +} diff --git a/bin/pax/options.c b/bin/pax/options.c new file mode 100644 index 0000000..917414c --- /dev/null +++ b/bin/pax/options.c @@ -0,0 +1,1788 @@ +/* $OpenBSD: options.c,v 1.103 2019/11/15 20:34:17 naddy Exp $ */ +/* $NetBSD: options.c,v 1.6 1996/03/26 23:54:18 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <limits.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "cpio.h" +#include "tar.h" +#include "extern.h" + +/* + * argv[0] names. Used for tar and cpio emulation + */ + +#define NM_TAR "tar" +#define NM_CPIO "cpio" +#define NM_PAX "pax" + +/* + * Constants used to specify the legal sets of flags in pax. For each major + * operation mode of pax, a set of illegal flags is defined. If any one of + * those illegal flags are found set, we scream and exit + */ + +/* + * flags (one for each option). + */ +#define AF 0x00000001 +#define BF 0x00000002 +#define CF 0x00000004 +#define DF 0x00000008 +#define FF 0x00000010 +#define IF 0x00000020 +#define KF 0x00000040 +#define LF 0x00000080 +#define NF 0x00000100 +#define OF 0x00000200 +#define PF 0x00000400 +#define RF 0x00000800 +#define SF 0x00001000 +#define TF 0x00002000 +#define UF 0x00004000 +#define VF 0x00008000 +#define WF 0x00010000 +#define XF 0x00020000 +#define CBF 0x00040000 /* nonstandard extension */ +#define CDF 0x00080000 /* nonstandard extension */ +#define CEF 0x00100000 /* nonstandard extension */ +#define CGF 0x00200000 /* nonstandard extension */ +#define CHF 0x00400000 /* nonstandard extension */ +#define CLF 0x00800000 /* nonstandard extension */ +#define CPF 0x01000000 /* nonstandard extension */ +#define CTF 0x02000000 /* nonstandard extension */ +#define CUF 0x04000000 /* nonstandard extension */ +#define CXF 0x08000000 +#define CYF 0x10000000 /* nonstandard extension */ +#define CZF 0x20000000 /* nonstandard extension */ +#define C0F 0x40000000 /* nonstandard extension */ + +/* + * ascii string indexed by bit position above (alter the above and you must + * alter this string) used to tell the user what flags caused us to complain + */ +#define FLGCH "abcdfiklnoprstuvwxBDEGHLPTUXYZ0" + +/* + * legal pax operation bit patterns + */ + +#define ISLIST(x) (((x) & (RF|WF)) == 0) +#define ISEXTRACT(x) (((x) & (RF|WF)) == RF) +#define ISARCHIVE(x) (((x) & (AF|RF|WF)) == WF) +#define ISAPPND(x) (((x) & (AF|RF|WF)) == (AF|WF)) +#define ISCOPY(x) (((x) & (RF|WF)) == (RF|WF)) +#define ISWRITE(x) (((x) & (RF|WF)) == WF) + +/* + * Illegal option flag subsets based on pax operation + */ + +#define BDEXTR (AF|BF|LF|TF|WF|XF|CBF|CHF|CLF|CPF|CXF) +#define BDARCH (CF|KF|LF|NF|PF|RF|CDF|CEF|CYF|CZF) +#define BDCOPY (AF|BF|FF|OF|XF|CBF|CEF) +#define BDLIST (AF|BF|IF|KF|LF|OF|PF|RF|TF|UF|WF|XF|CBF|CDF|CHF|CLF|CPF|CXF|CYF|CZF) + + +/* + * Routines which handle command line options + */ + +static char flgch[] = FLGCH; /* list of all possible flags */ +static OPLIST *ophead = NULL; /* head for format specific options -x */ +static OPLIST *optail = NULL; /* option tail */ + +static int no_op(void); +static void printflg(unsigned int); +static off_t str_offt(char *); +static char *get_line(FILE *fp); +static void pax_options(int, char **); +static void pax_usage(void); +static void tar_options(int, char **); +static void tar_usage(void); +#ifndef NOCPIO +static void cpio_options(int, char **); +static void cpio_usage(void); +#endif + +static int compress_id(char *_blk, int _size); +static int gzip_id(char *_blk, int _size); +static int bzip2_id(char *_blk, int _size); +static int xz_id(char *_blk, int _size); + +#define GZIP_CMD "gzip" /* command to run as gzip */ +#define COMPRESS_CMD "compress" /* command to run as compress */ +#define BZIP2_CMD "bzip2" /* command to run as bzip2 */ +#define XZ_CMD "xz" /* command to run as xz */ + +/* + * Format specific routine table + * (see pax.h for description of each function) + * + * name, blksz, hdsz, udev, hlk, blkagn, inhead, id, st_read, + * read, end_read, st_write, write, end_write, trail, + * rd_data, wr_data, options + */ + +FSUB fsub[] = { +#ifdef NOCPIO +/* 0: OLD BINARY CPIO */ + { }, +/* 1: OLD OCTAL CHARACTER CPIO */ + { }, +/* 2: SVR4 HEX CPIO */ + { }, +/* 3: SVR4 HEX CPIO WITH CRC */ + { }, +#else +/* 0: OLD BINARY CPIO */ + {"bcpio", 5120, sizeof(HD_BCPIO), 1, 0, 0, 1, bcpio_id, cpio_strd, + bcpio_rd, bcpio_endrd, cpio_stwr, bcpio_wr, cpio_endwr, cpio_trail, + bad_opt}, + +/* 1: OLD OCTAL CHARACTER CPIO */ + {"cpio", 5120, sizeof(HD_CPIO), 1, 0, 0, 1, cpio_id, cpio_strd, + cpio_rd, cpio_endrd, cpio_stwr, cpio_wr, cpio_endwr, cpio_trail, + bad_opt}, + +/* 2: SVR4 HEX CPIO */ + {"sv4cpio", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, vcpio_id, cpio_strd, + vcpio_rd, vcpio_endrd, cpio_stwr, vcpio_wr, cpio_endwr, cpio_trail, + bad_opt}, + +/* 3: SVR4 HEX CPIO WITH CRC */ + {"sv4crc", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, crc_id, crc_strd, + vcpio_rd, vcpio_endrd, crc_stwr, vcpio_wr, cpio_endwr, cpio_trail, + bad_opt}, +#endif +/* 4: OLD TAR */ + {"tar", 10240, BLKMULT, 0, 1, BLKMULT, 0, tar_id, no_op, + tar_rd, tar_endrd, no_op, tar_wr, tar_endwr, tar_trail, + tar_opt}, + +/* 5: POSIX USTAR */ + {"ustar", 10240, BLKMULT, 0, 1, BLKMULT, 0, ustar_id, no_op, + ustar_rd, tar_endrd, no_op, ustar_wr, tar_endwr, tar_trail, + tar_opt}, + +#ifdef SMALL +/* 6: compress, to detect failure to use -Z */ + { }, +/* 7: xz, to detect failure to decompress it */ + { }, +/* 8: bzip2, to detect failure to use -j */ + { }, +/* 9: gzip, to detect failure to use -z */ + { }, +#else +/* 6: compress, to detect failure to use -Z */ + {NULL, 0, 4, 0, 0, 0, 0, compress_id}, +/* 7: xz, to detect failure to decompress it */ + {NULL, 0, 4, 0, 0, 0, 0, xz_id}, +/* 8: bzip2, to detect failure to use -j */ + {NULL, 0, 4, 0, 0, 0, 0, bzip2_id}, +/* 9: gzip, to detect failure to use -z */ + {NULL, 0, 4, 0, 0, 0, 0, gzip_id}, +#endif +}; +#define F_OCPIO 0 /* format when called as cpio -6 */ +#define F_ACPIO 1 /* format when called as cpio -c */ +#define F_CPIO 3 /* format when called as cpio */ +#define F_OTAR 4 /* format when called as tar -o */ +#define F_TAR 5 /* format when called as tar */ +#define DEFLT 5 /* default write format from list above */ + +/* + * ford is the archive search order used by get_arc() to determine what kind + * of archive we are dealing with. This helps to properly id archive formats + * some formats may be subsets of others.... + */ +int ford[] = {5, 4, 9, 8, 7, 6, 3, 2, 1, 0, -1}; + +/* + * Do we have -C anywhere and what is it? + */ +int havechd = 0; +char *chdname = NULL; + +/* + * options() + * figure out if we are pax, tar or cpio. Call the appropriate options + * parser + */ + +void +options(int argc, char **argv) +{ + extern char *__progname; + + /* + * Are we acting like pax, tar or cpio (based on argv[0]) + */ + argv0 = __progname; + + if (strcmp(NM_TAR, argv0) == 0) { + op_mode = OP_TAR; + tar_options(argc, argv); + return; + } +#ifndef NOCPIO + else if (strcmp(NM_CPIO, argv0) == 0) { + op_mode = OP_CPIO; + cpio_options(argc, argv); + return; + } +#endif /* !NOCPIO */ + /* + * assume pax as the default + */ + argv0 = NM_PAX; + op_mode = OP_PAX; + pax_options(argc, argv); +} + +/* + * pax_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +pax_options(int argc, char **argv) +{ + int c; + unsigned i; + unsigned int flg = 0; + unsigned int bflg = 0; + const char *errstr; + char *pt; + + /* + * process option flags + */ + while ((c=getopt(argc,argv,"ab:cdf:ijklno:p:rs:tuvwx:zB:DE:G:HJLOPT:U:XYZ0")) + != -1) { + switch (c) { + case 'a': + /* + * append + */ + flg |= AF; + break; + case 'b': + /* + * specify blocksize + */ + flg |= BF; + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid block size %s", optarg); + pax_usage(); + } + break; + case 'c': + /* + * inverse match on patterns + */ + cflag = 1; + flg |= CF; + break; + case 'd': + /* + * match only dir on extract, not the subtree at dir + */ + dflag = 1; + flg |= DF; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + flg |= FF; + break; + case 'i': + /* + * interactive file rename + */ + iflag = 1; + flg |= IF; + break; + case 'j': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'k': + /* + * do not clobber files that exist + */ + kflag = 1; + flg |= KF; + break; + case 'l': + /* + * try to link src to dest with copy (-rw) + */ + lflag = 1; + flg |= LF; + break; + case 'n': + /* + * select first match for a pattern only + */ + nflag = 1; + flg |= NF; + break; + case 'o': + /* + * pass format specific options + */ + flg |= OF; + if (opt_add(optarg) < 0) + pax_usage(); + break; + case 'p': + /* + * specify file characteristic options + */ + for (pt = optarg; *pt != '\0'; ++pt) { + switch (*pt) { + case 'a': + /* + * do not preserve access time + */ + patime = 0; + break; + case 'e': + /* + * preserve user id, group id, file + * mode, access/modification times + */ + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + /* + * preserve uid/gid + */ + pids = 1; + break; + case 'p': + /* + * preserve file mode bits + */ + pmode = 1; + break; + default: + paxwarn(1, "Invalid -p string: %c", *pt); + pax_usage(); + break; + } + } + flg |= PF; + break; + case 'r': + /* + * read the archive + */ + flg |= RF; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= SF; + break; + case 't': + /* + * preserve access time on filesystem nodes we read + */ + tflag = 1; + flg |= TF; + break; + case 'u': + /* + * ignore those older files + */ + uflag = 1; + flg |= UF; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + flg |= VF; + break; + case 'w': + /* + * write an archive + */ + flg |= WF; + break; + case 'x': + /* + * specify an archive format on write + */ + for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i) + if (fsub[i].name != NULL && + strcmp(fsub[i].name, optarg) == 0) + break; + if (i < sizeof(fsub)/sizeof(FSUB)) { + frmt = &fsub[i]; + flg |= XF; + break; + } + paxwarn(1, "Unknown -x format: %s", optarg); + (void)fputs("pax: Known -x formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + if (fsub[i].name != NULL) + (void)fprintf(stderr, " %s", + fsub[i].name); + (void)fputs("\n\n", stderr); + pax_usage(); + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * non-standard option on number of bytes written on a + * single archive volume. + */ + if ((wrlimit = str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid write limit %s", optarg); + pax_usage(); + } + if (wrlimit % BLKMULT) { + paxwarn(1, "Write limit is not a %d byte multiple", + BLKMULT); + pax_usage(); + } + flg |= CBF; + break; + case 'D': + /* + * On extraction check file inode change time before the + * modification of the file name. Non standard option. + */ + Dflag = 1; + flg |= CDF; + break; + case 'E': + /* + * non-standard limit on read faults + * 0 indicates stop after first error, values + * indicate a limit + */ + flg |= CEF; + maxflt = strtonum(optarg, 0, INT_MAX, &errstr); + if (errstr) { + paxwarn(1, "Error count value: %s", errstr); + pax_usage(); + } + break; + case 'G': + /* + * non-standard option for selecting files within an + * archive by group (gid or name) + */ + if (grp_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CGF; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + flg |= CHF; + break; + case 'J': + /* + * use xz. Non standard option. + */ + gzip_program = XZ_CMD; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + flg |= CLF; + break; + case 'O': + /* + * Force one volume. Non standard option. + */ + force_one_volume = 1; + break; + case 'P': + /* + * do NOT follow symlinks (default) + */ + Lflag = 0; + flg |= CPF; + break; + case 'T': + /* + * non-standard option for selecting files within an + * archive by modification time range (lower,upper) + */ + if (trng_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CTF; + break; + case 'U': + /* + * non-standard option for selecting files within an + * archive by user (uid or name) + */ + if (usr_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CUF; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + flg |= CXF; + break; + case 'Y': + /* + * On extraction check file inode change time after the + * modification of the file name. Non standard option. + */ + Yflag = 1; + flg |= CYF; + break; + case 'Z': + /* + * On extraction check modification time after the + * modification of the file name. Non standard option. + */ + Zflag = 1; + flg |= CZF; + break; + case '0': + /* + * Use \0 as pathname terminator. + * (For use with the -print0 option of find(1).) + */ + zeroflag = 1; + flg |= C0F; + break; + default: + pax_usage(); + break; + } + } + + /* + * figure out the operation mode of pax read,write,extract,copy,append + * or list. check that we have not been given a bogus set of flags + * for the operation mode. + */ + if (ISLIST(flg)) { + act = LIST; + listf = stdout; + bflg = flg & BDLIST; + } else if (ISEXTRACT(flg)) { + act = EXTRACT; + bflg = flg & BDEXTR; + } else if (ISARCHIVE(flg)) { + act = ARCHIVE; + bflg = flg & BDARCH; + } else if (ISAPPND(flg)) { + act = APPND; + bflg = flg & BDARCH; + } else if (ISCOPY(flg)) { + act = COPY; + bflg = flg & BDCOPY; + } else + pax_usage(); + if (bflg) { + printflg(flg); + pax_usage(); + } + + /* + * if we are writing (ARCHIVE) we use the default format if the user + * did not specify a format. when we write during an APPEND, we will + * adopt the format of the existing archive if none was supplied. + */ + if (!(flg & XF) && (act == ARCHIVE)) + frmt = &(fsub[DEFLT]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + for (; optind < argc; optind++) + if (pat_add(argv[optind], NULL) < 0) + pax_usage(); + break; + case COPY: + if (optind >= argc) { + paxwarn(0, "Destination directory was not supplied"); + pax_usage(); + } + --argc; + dirptr = argv[argc]; + /* FALL THROUGH */ + case ARCHIVE: + case APPND: + for (; optind < argc; optind++) + if (ftree_add(argv[optind], 0) < 0) + pax_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + + +/* + * tar_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +tar_options(int argc, char **argv) +{ + int c; + int Oflag = 0; + int nincfiles = 0; + int incfiles_max = 0; + struct incfile { + char *file; + char *dir; + }; + struct incfile *incfiles = NULL; + + /* + * Set default values. + */ + rmleadslash = 1; + + /* + * process option flags + */ + while ((c = getoldopt(argc, argv, + "b:cef:hjmopqruts:vwxzBC:HI:JLNOPXZ014578")) != -1) { + switch (c) { + case 'b': + /* + * specify blocksize in 512-byte blocks + */ + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid block size %s", optarg); + tar_usage(); + } + wrblksz *= 512; /* XXX - check for int oflow */ + break; + case 'c': + /* + * create an archive + */ + act = ARCHIVE; + break; + case 'e': + /* + * stop after first error + */ + maxflt = 0; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + break; + case 'h': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'j': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'O': + Oflag = 1; + break; + case 'o': + Oflag = 2; + tar_nodir = 1; + break; + case 'p': + /* + * preserve uid/gid and file mode, regardless of umask + */ + pmode = 1; + pids = 1; + break; + case 'q': + /* + * select first match for a pattern only + */ + nflag = 1; + break; + case 'r': + case 'u': + /* + * append to the archive + */ + act = APPND; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + tar_usage(); + break; + } + break; + case 't': + /* + * list contents of the tape + */ + act = LIST; + break; + case 'v': + /* + * verbose operation mode + */ + vflag++; + break; + case 'w': + /* + * interactive file rename + */ + iflag = 1; + break; + case 'x': + /* + * extract an archive, preserving mode, + * and mtime if possible. + */ + act = EXTRACT; + pmtime = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * Nothing to do here, this is pax default + */ + break; + case 'C': + havechd++; + chdname = optarg; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + break; + case 'I': + if (++nincfiles > incfiles_max) { + size_t n = nincfiles + 3; + struct incfile *p; + + p = reallocarray(incfiles, n, + sizeof(*incfiles)); + if (p == NULL) { + paxwarn(0, "Unable to allocate space " + "for option list"); + exit(1); + } + incfiles = p; + incfiles_max = n; + } + incfiles[nincfiles - 1].file = optarg; + incfiles[nincfiles - 1].dir = chdname; + break; + case 'J': + /* + * use xz. Non standard option. + */ + gzip_program = XZ_CMD; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'N': + /* numeric uid and gid only */ + Nflag = 1; + break; + case 'P': + /* + * do not remove leading '/' from pathnames + */ + rmleadslash = 0; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + break; + case 'Z': + /* + * use compress. + */ + gzip_program = COMPRESS_CMD; + break; + case '0': + arcname = DEV_0; + break; + case '1': + arcname = DEV_1; + break; + case '4': + arcname = DEV_4; + break; + case '5': + arcname = DEV_5; + break; + case '7': + arcname = DEV_7; + break; + case '8': + arcname = DEV_8; + break; + default: + tar_usage(); + break; + } + } + argc -= optind; + argv += optind; + + if ((arcname == NULL) || (*arcname == '\0')) { + arcname = getenv("TAPE"); + if ((arcname == NULL) || (*arcname == '\0')) + arcname = "-"; + } + if ((arcname[0] == '-') && (arcname[1]== '\0')) + arcname = NULL; + + /* + * Traditional tar behaviour: list-like output goes to stdout unless + * writing the archive there. (pax uses stderr unless in list mode) + */ + if (act == LIST || act == EXTRACT || arcname != NULL) + listf = stdout; + + /* Traditional tar behaviour (pax wants to read file list from stdin) */ + if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0) + exit(0); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + default: + { + int sawpat = 0; + char *file, *dir; + + while (nincfiles || *argv != NULL) { + /* + * If we queued up any include files, + * pull them in now. Otherwise, check + * for -I and -C positional flags. + * Anything else must be a file to + * extract. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = chdname; + } else + file = NULL; + if (file != NULL) { + FILE *fp; + char *str; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + syswarn(1, errno, + "Unable to open %s", file); + tar_usage(); + } + while ((str = get_line(fp)) != NULL) { + if (pat_add(str, dir) < 0) + tar_usage(); + sawpat = 1; + } + if (ferror(fp)) { + syswarn(1, errno, + "Unable to read from %s", + strcmp(file, "-") ? file : + "stdin"); + tar_usage(); + } + if (strcmp(file, "-") != 0) + fclose(fp); + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + chdname = *argv++; + havechd++; + } else if (pat_add(*argv++, chdname) < 0) + tar_usage(); + else + sawpat = 1; + } + /* + * if patterns were added, we are doing chdir() + * on a file-by-file basis, else, just one + * global chdir (if any) after opening input. + */ + if (sawpat > 0) + chdname = NULL; + } + break; + case ARCHIVE: + case APPND: + frmt = &(fsub[Oflag ? F_OTAR : F_TAR]); + + if (chdname != NULL) { /* initial chdir() */ + if (ftree_add(chdname, 1) < 0) + tar_usage(); + } + + while (nincfiles || *argv != NULL) { + char *file, *dir; + + /* + * If we queued up any include files, pull them in + * now. Otherwise, check for -I and -C positional + * flags. Anything else must be a file to include + * in the archive. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = NULL; + } else + file = NULL; + if (file != NULL) { + FILE *fp; + char *str; + + /* Set directory if needed */ + if (dir) { + if (ftree_add(dir, 1) < 0) + tar_usage(); + } + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + syswarn(1, errno, "Unable to open %s", + file); + tar_usage(); + } + while ((str = get_line(fp)) != NULL) { + if (ftree_add(str, 0) < 0) + tar_usage(); + } + if (ferror(fp)) { + syswarn(1, errno, + "Unable to read from %s", + strcmp(file, "-") ? file : "stdin"); + tar_usage(); + } + if (strcmp(file, "-") != 0) + fclose(fp); + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + if (ftree_add(*argv++, 1) < 0) + tar_usage(); + havechd++; + } else if (ftree_add(*argv++, 0) < 0) + tar_usage(); + } + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + +int mkpath(char *); + +int +mkpath(path) + char *path; +{ + struct stat sb; + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (stat(path, &sb)) { + if (errno != ENOENT || mkdir(path, 0777)) { + paxwarn(1, "%s", path); + return (-1); + } + } else if (!S_ISDIR(sb.st_mode)) { + syswarn(1, ENOTDIR, "%s", path); + return (-1); + } + + if (!done) + *slash = '/'; + } + + return (0); +} + +#ifndef NOCPIO +/* + * cpio_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +cpio_options(int argc, char **argv) +{ + const char *errstr; + int c, list_only = 0; + unsigned i; + char *str; + FILE *fp; + + kflag = 1; + pids = 1; + pmode = 1; + pmtime = 0; + arcname = NULL; + dflag = 1; + act = -1; + nodirs = 1; + while ((c=getopt(argc,argv,"abcdfijklmoprstuvzABC:E:F:H:I:JLO:SZ6")) != -1) + switch (c) { + case 'a': + /* + * preserve access time on files read + */ + tflag = 1; + break; + case 'b': + /* + * swap bytes and half-words when reading data + */ + break; + case 'c': + /* + * ASCII cpio header + */ + frmt = &(fsub[F_ACPIO]); + break; + case 'd': + /* + * create directories as needed + */ + nodirs = 0; + break; + case 'f': + /* + * invert meaning of pattern list + */ + cflag = 1; + break; + case 'i': + /* + * restore an archive + */ + act = EXTRACT; + break; + case 'j': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'k': + break; + case 'l': + /* + * use links instead of copies when possible + */ + lflag = 1; + break; + case 'm': + /* + * preserve modification time + */ + pmtime = 1; + break; + case 'o': + /* + * create an archive + */ + act = ARCHIVE; + if (frmt == NULL) + frmt = &(fsub[F_CPIO]); + break; + case 'p': + /* + * copy-pass mode + */ + act = COPY; + break; + case 'r': + /* + * interactively rename files + */ + iflag = 1; + break; + case 's': + /* + * swap bytes after reading data + */ + break; + case 't': + /* + * list contents of archive + */ + list_only = 1; + break; + case 'u': + /* + * replace newer files + */ + kflag = 0; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'A': + /* + * append mode + */ + act = APPND; + break; + case 'B': + /* + * Use 5120 byte block size + */ + wrblksz = 5120; + break; + case 'C': + /* + * set block size in bytes + */ + wrblksz = strtonum(optarg, 0, INT_MAX, &errstr); + if (errstr) { + paxwarn(1, "Invalid block size %s: %s", + optarg, errstr); + pax_usage(); + } + break; + case 'E': + /* + * file with patterns to extract or list + */ + if ((fp = fopen(optarg, "r")) == NULL) { + syswarn(1, errno, "Unable to open %s", + optarg); + cpio_usage(); + } + while ((str = get_line(fp)) != NULL) { + pat_add(str, NULL); + } + if (ferror(fp)) { + syswarn(1, errno, + "Unable to read from %s", optarg); + cpio_usage(); + } + fclose(fp); + break; + case 'F': + case 'I': + case 'O': + /* + * filename where the archive is stored + */ + if ((optarg[0] == '-') && (optarg[1]== '\0')) { + /* + * treat a - as stdin + */ + arcname = NULL; + break; + } + arcname = optarg; + break; + case 'H': + /* + * specify an archive format on write + */ + for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i) + if (fsub[i].name != NULL && + strcmp(fsub[i].name, optarg) == 0) + break; + if (i < sizeof(fsub)/sizeof(FSUB)) { + frmt = &fsub[i]; + break; + } + paxwarn(1, "Unknown -H format: %s", optarg); + (void)fputs("cpio: Known -H formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + if (fsub[i].name != NULL) + (void)fprintf(stderr, " %s", + fsub[i].name); + (void)fputs("\n\n", stderr); + cpio_usage(); + break; + case 'J': + /* + * use xz. Non standard option. + */ + gzip_program = XZ_CMD; + break; + case 'L': + /* + * follow symbolic links + */ + Lflag = 1; + break; + case 'S': + /* + * swap halfwords after reading data + */ + break; + case 'Z': + /* + * use compress. Non standard option. + */ + gzip_program = COMPRESS_CMD; + break; + case '6': + /* + * process Version 6 cpio format + */ + frmt = &(fsub[F_OCPIO]); + break; + case '?': + default: + cpio_usage(); + break; + } + argc -= optind; + argv += optind; + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case EXTRACT: + if (list_only) { + act = LIST; + + /* + * cpio is like pax: list to stderr + * unless in list mode + */ + listf = stdout; + } + while (*argv != NULL) + if (pat_add(*argv++, NULL) < 0) + cpio_usage(); + break; + case COPY: + if (*argv == NULL) { + paxwarn(0, "Destination directory was not supplied"); + cpio_usage(); + } + dirptr = *argv; + if (mkpath(dirptr) < 0) + cpio_usage(); + --argc; + ++argv; + /* FALL THROUGH */ + case ARCHIVE: + case APPND: + if (*argv != NULL) + cpio_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + while ((str = get_line(stdin)) != NULL) { + ftree_add(str, 0); + } + if (ferror(stdin)) { + syswarn(1, errno, "Unable to read from %s", + "stdin"); + cpio_usage(); + } + break; + default: + cpio_usage(); + break; + } +} +#endif /* !NOCPIO */ + +/* + * printflg() + * print out those invalid flag sets found to the user + */ + +static void +printflg(unsigned int flg) +{ + int nxt; + int pos = 0; + + (void)fprintf(stderr,"%s: Invalid combination of options:", argv0); + while ((nxt = ffs(flg)) != 0) { + flg >>= nxt; + pos += nxt; + (void)fprintf(stderr, " -%c", flgch[pos-1]); + } + (void)putc('\n', stderr); +} + +/* + * opt_next() + * called by format specific options routines to get each format specific + * flag and value specified with -o + * Return: + * pointer to next OPLIST entry or NULL (end of list). + */ + +OPLIST * +opt_next(void) +{ + OPLIST *opt; + + if ((opt = ophead) != NULL) + ophead = ophead->fow; + return(opt); +} + +/* + * bad_opt() + * generic routine used to complain about a format specific options + * when the format does not support options. + */ + +int +bad_opt(void) +{ + OPLIST *opt; + + if (ophead == NULL) + return(0); + /* + * print all we were given + */ + paxwarn(1,"These format options are not supported"); + while ((opt = opt_next()) != NULL) + (void)fprintf(stderr, "\t%s = %s\n", opt->name, opt->value); + pax_usage(); + return(0); +} + +/* + * opt_add() + * breaks the value supplied to -o into a option name and value. options + * are given to -o in the form -o name-value,name=value + * multiple -o may be specified. + * Return: + * 0 if format in name=value format, -1 if -o is passed junk + */ + +int +opt_add(const char *str) +{ + OPLIST *opt; + char *frpt; + char *pt; + char *endpt; + char *dstr; + + if ((str == NULL) || (*str == '\0')) { + paxwarn(0, "Invalid option name"); + return(-1); + } + if ((dstr = strdup(str)) == NULL) { + paxwarn(0, "Unable to allocate space for option list"); + return(-1); + } + frpt = endpt = dstr; + + /* + * break into name and values pieces and stuff each one into a + * OPLIST structure. When we know the format, the format specific + * option function will go through this list + */ + while ((frpt != NULL) && (*frpt != '\0')) { + if ((endpt = strchr(frpt, ',')) != NULL) + *endpt = '\0'; + if ((pt = strchr(frpt, '=')) == NULL) { + paxwarn(0, "Invalid options format"); + free(dstr); + return(-1); + } + if ((opt = malloc(sizeof(OPLIST))) == NULL) { + paxwarn(0, "Unable to allocate space for option list"); + free(dstr); + return(-1); + } + dstr = NULL; /* parts of string going onto the OPLIST */ + *pt++ = '\0'; + opt->name = frpt; + opt->value = pt; + opt->fow = NULL; + if (endpt != NULL) + frpt = endpt + 1; + else + frpt = NULL; + if (ophead == NULL) { + optail = ophead = opt; + continue; + } + optail->fow = opt; + optail = opt; + } + free(dstr); + return(0); +} + +/* + * str_offt() + * Convert an expression of the following forms to an off_t > 0. + * 1) A positive decimal number. + * 2) A positive decimal number followed by a b (mult by 512). + * 3) A positive decimal number followed by a k (mult by 1024). + * 4) A positive decimal number followed by a m (mult by 512). + * 5) A positive decimal number followed by a w (mult by sizeof int) + * 6) Two or more positive decimal numbers (with/without k,b or w). + * separated by x (also * for backwards compatibility), specifying + * the product of the indicated values. + * Return: + * 0 for an error, a positive value o.w. + */ + +static off_t +str_offt(char *val) +{ + char *expr; + off_t num, t; + + num = strtoll(val, &expr, 0); + if ((num == LLONG_MAX) || (num <= 0) || (expr == val)) + return(0); + + switch (*expr) { + case 'b': + t = num; + num *= 512; + if (t > num) + return(0); + ++expr; + break; + case 'k': + t = num; + num *= 1024; + if (t > num) + return(0); + ++expr; + break; + case 'm': + t = num; + num *= 1048576; + if (t > num) + return(0); + ++expr; + break; + case 'w': + t = num; + num *= sizeof(int); + if (t > num) + return(0); + ++expr; + break; + } + + switch (*expr) { + case '\0': + break; + case '*': + case 'x': + t = num; + num *= str_offt(expr + 1); + if (t > num) + return(0); + break; + default: + return(0); + } + return(num); +} + +char * +get_line(FILE *f) +{ + char *str = NULL; + size_t size = 0; + ssize_t len; + + do { + len = getline(&str, &size, f); + if (len == -1) { + free(str); + return NULL; + } + if (str[len - 1] == '\n') + str[len - 1] = '\0'; + } while (str[0] == '\0'); + return str; +} + +/* + * no_op() + * for those option functions where the archive format has nothing to do. + * Return: + * 0 + */ + +static int +no_op(void) +{ + return(0); +} + +/* + * pax_usage() + * print the usage summary to the user + */ + +void +pax_usage(void) +{ + (void)fputs( + "usage: pax [-0cdjnOvz] [-E limit] [-f archive] [-G group] [-s replstr]\n" + " [-T range] [-U user] [pattern ...]\n" + " pax -r [-0cDdijknOuvYZz] [-E limit] [-f archive] [-G group] [-o options]\n" + " [-p string] [-s replstr] [-T range] [-U user] [pattern ...]\n" + " pax -w [-0adHijLOPtuvXz] [-B bytes] [-b blocksize] [-f archive]\n" + " [-G group] [-o options] [-s replstr] [-T range] [-U user]\n" + " [-x format] [file ...]\n" + " pax -rw [-0DdHikLlnOPtuvXYZ] [-G group] [-p string] [-s replstr]\n" + " [-T range] [-U user] [file ...] directory\n", + stderr); + exit(1); +} + +/* + * tar_usage() + * print the usage summary to the user + */ + +void +tar_usage(void) +{ + (void)fputs( + "usage: tar {crtux}[014578befHhjLmNOoPpqsvwXZz]\n" + " [blocking-factor | archive | replstr] [-C directory] [-I file]\n" + " [file ...]\n" + " tar {-crtux} [-014578eHhjLmNOoPpqvwXZz] [-b blocking-factor]\n" + " [-C directory] [-f archive] [-I file] [-s replstr] [file ...]\n", + stderr); + exit(1); +} + +#ifndef NOCPIO +/* + * cpio_usage() + * print the usage summary to the user + */ + +void +cpio_usage(void) +{ + (void)fputs( + "usage: cpio -o [-AaBcjLvZz] [-C bytes] [-F archive] [-H format]\n" + " [-O archive] < name-list [> archive]\n" + " cpio -i [-6BbcdfjmrSstuvZz] [-C bytes] [-E file] [-F archive] [-H format]\n" + " [-I archive] [pattern ...] [< archive]\n" + " cpio -p [-adLlmuv] destination-directory < name-list\n", + stderr); + exit(1); +} +#endif /* !NOCPIO */ + +#ifndef SMALL +static int +compress_id(char *blk, int size) +{ + if (size >= 2 && blk[0] == '\037' && blk[1] == '\235') { + paxwarn(0, "input compressed with %s; use the -%c option" + " to decompress it", "compress", 'Z'); + exit(1); + } + return (-1); +} + +static int +gzip_id(char *blk, int size) +{ + if (size >= 2 && blk[0] == '\037' && blk[1] == '\213') { + paxwarn(0, "input compressed with %s; use the -%c option" + " to decompress it", "gzip", 'z'); + exit(1); + } + return (-1); +} + +static int +bzip2_id(char *blk, int size) +{ + if (size >= 3 && blk[0] == 'B' && blk[1] == 'Z' && blk[2] == 'h') { + paxwarn(0, "input compressed with %s; use the -%c option" + " to decompress it", "bzip2", 'j'); + exit(1); + } + return (-1); +} + +static int +xz_id(char *blk, int size) +{ + if (size >= 6 && memcmp(blk, "\xFD\x37\x7A\x58\x5A", 6) == 0) { + paxwarn(0, "input compressed with xz"); + exit(1); + } + return (-1); +} +#endif /* !SMALL */ diff --git a/bin/pax/pat_rep.c b/bin/pax/pat_rep.c new file mode 100644 index 0000000..deddca0 --- /dev/null +++ b/bin/pax/pat_rep.c @@ -0,0 +1,1108 @@ +/* $OpenBSD: pat_rep.c,v 1.43 2017/09/16 07:42:34 otto Exp $ */ +/* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "pax.h" +#include "extern.h" + +/* + * data structure for storing user supplied replacement strings (-s) + */ +typedef struct replace { + char *nstr; /* the new string we will substitute with */ + regex_t rcmp; /* compiled regular expression used to match */ + int flgs; /* print conversions? global in operation? */ +#define PRNT 0x1 +#define GLOB 0x2 + struct replace *fow; /* pointer to next pattern */ +} REPLACE; + +/* + * routines to handle pattern matching, name modification (regular expression + * substitution and interactive renames), and destination name modification for + * copy (-rw). Both file name and link names are adjusted as required in these + * routines. + */ + +#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ +static PATTERN *pathead = NULL; /* file pattern match list head */ +static PATTERN *pattail = NULL; /* file pattern match list tail */ +static REPLACE *rephead = NULL; /* replacement string list head */ +static REPLACE *reptail = NULL; /* replacement string list tail */ + +static int rep_name(char *, size_t, int *, int); +static int tty_rename(ARCHD *); +static int fix_path(char *, int *, char *, int); +static int fn_match(char *, char *, char **); +static char * range_match(char *, int); +static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); + +/* + * rep_add() + * parses the -s replacement string; compiles the regular expression + * and stores the compiled value and it's replacement string together in + * replacement string list. Input to this function is of the form: + * /old/new/pg + * The first char in the string specifies the delimiter used by this + * replacement string. "Old" is a regular expression in "ed" format which + * is compiled by regcomp() and is applied to filenames. "new" is the + * substitution string; p and g are options flags for printing and global + * replacement (over the single filename) + * Return: + * 0 if a proper replacement string and regular expression was added to + * the list of replacement patterns; -1 otherwise. + */ + +int +rep_add(char *str) +{ + char *pt1; + char *pt2; + REPLACE *rep; + int res; + char rebuf[BUFSIZ]; + + /* + * throw out the bad parameters + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty replacement string"); + return(-1); + } + + /* + * first character in the string specifies what the delimiter is for + * this expression + */ + for (pt1 = str+1; *pt1; pt1++) { + if (*pt1 == '\\') { + pt1++; + continue; + } + if (*pt1 == *str) + break; + } + if (*pt1 == '\0') { + paxwarn(1, "Invalid replacement string %s", str); + return(-1); + } + + /* + * allocate space for the node that handles this replacement pattern + * and split out the regular expression and try to compile it + */ + if ((rep = malloc(sizeof(REPLACE))) == NULL) { + paxwarn(1, "Unable to allocate memory for replacement string"); + return(-1); + } + + *pt1 = '\0'; + if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { + regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); + paxwarn(1, "%s while compiling regular expression %s", rebuf, str); + free(rep); + return(-1); + } + + /* + * put the delimiter back in case we need an error message and + * locate the delimiter at the end of the replacement string + * we then point the node at the new substitution string + */ + *pt1++ = *str; + for (pt2 = pt1; *pt2; pt2++) { + if (*pt2 == '\\') { + pt2++; + continue; + } + if (*pt2 == *str) + break; + } + if (*pt2 == '\0') { + regfree(&(rep->rcmp)); + free(rep); + paxwarn(1, "Invalid replacement string %s", str); + return(-1); + } + + *pt2 = '\0'; + rep->nstr = pt1; + pt1 = pt2++; + rep->flgs = 0; + + /* + * set the options if any + */ + while (*pt2 != '\0') { + switch (*pt2) { + case 'g': + case 'G': + rep->flgs |= GLOB; + break; + case 'p': + case 'P': + rep->flgs |= PRNT; + break; + default: + regfree(&(rep->rcmp)); + free(rep); + *pt1 = *str; + paxwarn(1, "Invalid replacement string option %s", str); + return(-1); + } + ++pt2; + } + + /* + * all done, link it in at the end + */ + rep->fow = NULL; + if (rephead == NULL) { + reptail = rephead = rep; + return(0); + } + reptail->fow = rep; + reptail = rep; + return(0); +} + +/* + * pat_add() + * add a pattern match to the pattern match list. Pattern matches are used + * to select which archive members are extracted. (They appear as + * arguments to pax in the list and read modes). If no patterns are + * supplied to pax, all members in the archive will be selected (and the + * pattern match list is empty). + * Return: + * 0 if the pattern was added to the list, -1 otherwise + */ + +int +pat_add(char *str, char *chdirname) +{ + PATTERN *pt; + + /* + * throw out the junk + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty pattern string"); + return(-1); + } + + /* + * allocate space for the pattern and store the pattern. the pattern is + * part of argv so do not bother to copy it, just point at it. Add the + * node to the end of the pattern list + */ + if ((pt = malloc(sizeof(PATTERN))) == NULL) { + paxwarn(1, "Unable to allocate memory for pattern string"); + return(-1); + } + + pt->pstr = str; + pt->pend = NULL; + pt->plen = strlen(str); + pt->fow = NULL; + pt->flgs = 0; + pt->chdname = chdirname; + + if (pathead == NULL) { + pattail = pathead = pt; + return(0); + } + pattail->fow = pt; + pattail = pt; + return(0); +} + +/* + * pat_chk() + * complain if any the user supplied pattern did not result in a match to + * a selected archive member. + */ + +void +pat_chk(void) +{ + PATTERN *pt; + int wban = 0; + + /* + * walk down the list checking the flags to make sure MTCH was set, + * if not complain + */ + for (pt = pathead; pt != NULL; pt = pt->fow) { + if (pt->flgs & MTCH) + continue; + if (!wban) { + paxwarn(1, "WARNING! These patterns were not matched:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", pt->pstr); + } +} + +/* + * pat_sel() + * the archive member which matches a pattern was selected. Mark the + * pattern as having selected an archive member. arcn->pat points at the + * pattern that was matched. arcn->pat is set in pat_match() + * + * NOTE: When the -c option is used, we are called when there was no match + * by pat_match() (that means we did match before the inverted sense of + * the logic). Now this seems really strange at first, but with -c we + * need to keep track of those patterns that cause an archive member to NOT + * be selected (it found an archive member with a specified pattern) + * Return: + * 0 if the pattern pointed at by arcn->pat was tagged as creating a + * match, -1 otherwise. + */ + +int +pat_sel(ARCHD *arcn) +{ + PATTERN *pt; + PATTERN **ppt; + size_t len; + + /* + * if no patterns just return + */ + if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) + return(0); + + /* + * when we are NOT limited to a single match per pattern mark the + * pattern and return + */ + if (!nflag) { + pt->flgs |= MTCH; + return(0); + } + + /* + * we reach this point only when we allow a single selected match per + * pattern, if the pattern matches a directory and we do not have -d + * (dflag) we are done with this pattern. We may also be handed a file + * in the subtree of a directory. in that case when we are operating + * with -d, this pattern was already selected and we are done + */ + if (pt->flgs & DIR_MTCH) + return(0); + + if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { + /* + * ok we matched a directory and we are allowing + * subtree matches but because of the -n only its children will + * match. This is tagged as a DIR_MTCH type. + * WATCH IT, the code assumes that pt->pend points + * into arcn->name and arcn->name has not been modified. + * If not we will have a big mess. Yup this is another kludge + */ + + /* + * if this was a prefix match, remove trailing part of path + * so we can copy it. Future matches will be exact prefix match + */ + if (pt->pend != NULL) + *pt->pend = '\0'; + + if ((pt->pstr = strdup(arcn->name)) == NULL) { + paxwarn(1, "Pattern select out of memory"); + if (pt->pend != NULL) + *pt->pend = '/'; + pt->pend = NULL; + return(-1); + } + + /* + * put the trailing / back in the source string + */ + if (pt->pend != NULL) { + *pt->pend = '/'; + pt->pend = NULL; + } + pt->plen = strlen(pt->pstr); + + /* + * strip off any trailing /, this should really never happen + */ + len = pt->plen - 1; + if (*(pt->pstr + len) == '/') { + *(pt->pstr + len) = '\0'; + pt->plen = len; + } + pt->flgs = DIR_MTCH | MTCH; + arcn->pat = pt; + return(0); + } + + /* + * we are then done with this pattern, so we delete it from the list + * because it can never be used for another match. + * Seems kind of strange to do for a -c, but the pax spec is really + * vague on the interaction of -c, -n and -d. We assume that when -c + * and the pattern rejects a member (i.e. it matched it) it is done. + * In effect we place the order of the flags as having -c last. + */ + pt = pathead; + ppt = &pathead; + while ((pt != NULL) && (pt != arcn->pat)) { + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt == NULL) { + /* + * should never happen.... + */ + paxwarn(1, "Pattern list inconsistent"); + return(-1); + } + *ppt = pt->fow; + free(pt); + arcn->pat = NULL; + return(0); +} + +/* + * pat_match() + * see if this archive member matches any supplied pattern, if a match + * is found, arcn->pat is set to point at the potential pattern. Later if + * this archive member is "selected" we process and mark the pattern as + * one which matched a selected archive member (see pat_sel()) + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + */ + +int +pat_match(ARCHD *arcn) +{ + PATTERN *pt; + + arcn->pat = NULL; + + /* + * if there are no more patterns and we have -n (and not -c) we are + * done. otherwise with no patterns to match, matches all + */ + if (pathead == NULL) { + if (nflag && !cflag) + return(-1); + return(0); + } + + /* + * have to search down the list one at a time looking for a match. + */ + pt = pathead; + while (pt != NULL) { + /* + * check for a file name match unless we have DIR_MTCH set in + * this pattern then we want a prefix match + */ + if (pt->flgs & DIR_MTCH) { + /* + * this pattern was matched before to a directory + * as we must have -n set for this (but not -d). We can + * only match CHILDREN of that directory so we must use + * an exact prefix match (no wildcards). + */ + if ((arcn->name[pt->plen] == '/') && + (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) + break; + } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) + break; + pt = pt->fow; + } + + /* + * return the result, remember that cflag (-c) inverts the sense of a + * match + */ + if (pt == NULL) + return(cflag ? 0 : 1); + + /* + * we had a match, now when we invert the sense (-c) we reject this + * member. However we have to tag the pattern a being successful, (in a + * match, not in selecting a archive member) so we call pat_sel() here. + */ + arcn->pat = pt; + if (!cflag) + return(0); + + if (pat_sel(arcn) < 0) + return(-1); + arcn->pat = NULL; + return(1); +} + +/* + * fn_match() + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + * Note: *pend may be changed to show where the prefix ends. + */ + +static int +fn_match(char *pattern, char *string, char **pend) +{ + char c; + char test; + + *pend = NULL; + for (;;) { + switch (c = *pattern++) { + case '\0': + /* + * Ok we found an exact match + */ + if (*string == '\0') + return(0); + + /* + * Check if it is a prefix match + */ + if ((dflag == 1) || (*string != '/')) + return(-1); + + /* + * It is a prefix match, remember where the trailing + * / is located + */ + *pend = string; + return(0); + case '?': + if ((test = *string++) == '\0') + return (-1); + break; + case '*': + c = *pattern; + /* + * Collapse multiple *'s. + */ + while (c == '*') + c = *++pattern; + + /* + * Optimized hack for pattern with a * at the end + */ + if (c == '\0') + return (0); + + /* + * General case, use recursion. + */ + while ((test = *string) != '\0') { + if (!fn_match(pattern, string, pend)) + return (0); + ++string; + } + return (-1); + case '[': + /* + * range match + */ + if (((test = *string++) == '\0') || + ((pattern = range_match(pattern, test)) == NULL)) + return (-1); + break; + case '\\': + if ((c = *pattern++) == '\0') + return (-1); + /* FALLTHROUGH */ + default: + if (c != *string++) + return (-1); + break; + } + } + /* NOTREACHED */ +} + +static char * +range_match(char *pattern, int test) +{ + char c; + char c2; + int negate; + int ok = 0; + + if ((negate = (*pattern == '!')) != 0) + ++pattern; + + while ((c = *pattern++) != ']') { + /* + * Illegal pattern + */ + if (c == '\0') + return (NULL); + + if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && + (c2 != ']')) { + if ((c <= test) && (test <= c2)) + ok = 1; + pattern += 2; + } else if (c == test) + ok = 1; + } + return (ok == negate ? NULL : pattern); +} + +/* + * has_dotdot() + * Returns true iff the supplied path contains a ".." component. + */ + +int +has_dotdot(const char *path) +{ + const char *p = path; + + while ((p = strstr(p, "..")) != NULL) { + if ((p == path || p[-1] == '/') && + (p[2] == '/' || p[2] == '\0')) + return (1); + p += 2; + } + return (0); +} + +/* + * mod_name() + * modify a selected file name. first attempt to apply replacement string + * expressions, then apply interactive file rename. We apply replacement + * string expressions to both filenames and file links (if we didn't the + * links would point to the wrong place, and we could never be able to + * move an archive that has a file link in it). When we rename files + * interactively, we store that mapping (old name to user input name) so + * if we spot any file links to the old file name in the future, we will + * know exactly how to fix the file link. + * Return: + * 0 continue to process file, 1 skip this file, -1 pax is finished + */ + +int +mod_name(ARCHD *arcn) +{ + int res = 0; + + /* + * Strip off leading '/' if appropriate. + * Currently, this option is only set for the tar format. + */ + while (rmleadslash && arcn->name[0] == '/') { + if (arcn->name[1] == '\0') { + arcn->name[0] = '.'; + } else { + (void)memmove(arcn->name, &arcn->name[1], + strlen(arcn->name)); + arcn->nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + paxwarn(0, "Removing leading / from absolute path names in the archive"); + } + } + while (rmleadslash && arcn->ln_name[0] == '/' && + PAX_IS_HARDLINK(arcn->type)) { + if (arcn->ln_name[1] == '\0') { + arcn->ln_name[0] = '.'; + } else { + (void)memmove(arcn->ln_name, &arcn->ln_name[1], + strlen(arcn->ln_name)); + arcn->ln_nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + paxwarn(0, "Removing leading / from absolute path names in the archive"); + } + } + if (rmleadslash) { + const char *last = NULL; + const char *p = arcn->name; + + while ((p = strstr(p, "..")) != NULL) { + if ((p == arcn->name || p[-1] == '/') && + (p[2] == '/' || p[2] == '\0')) + last = p + 2; + p += 2; + } + if (last != NULL) { + last++; + paxwarn(1, "Removing leading \"%.*s\"", + (int)(last - arcn->name), arcn->name); + arcn->nlen = strlen(last); + if (arcn->nlen > 0) + memmove(arcn->name, last, arcn->nlen + 1); + else { + arcn->name[0] = '.'; + arcn->name[1] = '\0'; + arcn->nlen = 1; + } + } + } + + /* + * IMPORTANT: We have a problem. what do we do with symlinks? + * Modifying a hard link name makes sense, as we know the file it + * points at should have been seen already in the archive (and if it + * wasn't seen because of a read error or a bad archive, we lose + * anyway). But there are no such requirements for symlinks. On one + * hand the symlink that refers to a file in the archive will have to + * be modified to so it will still work at its new location in the + * file system. On the other hand a symlink that points elsewhere (and + * should continue to do so) should not be modified. There is clearly + * no perfect solution here. So we handle them like hardlinks. Clearly + * a replacement made by the interactive rename mapping is very likely + * to be correct since it applies to a single file and is an exact + * match. The regular expression replacements are a little harder to + * justify though. We claim that the symlink name is only likely + * to be replaced when it points within the file tree being moved and + * in that case it should be modified. what we really need to do is to + * call an oracle here. :) + */ + if (rephead != NULL) { + /* + * we have replacement strings, modify the name and the link + * name if any. + */ + if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0) + return(res); + + if (PAX_IS_LINK(arcn->type)) { + if ((res = rep_name(arcn->ln_name, + sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0) + return(res); + } + } + + if (iflag) { + /* + * perform interactive file rename, then map the link if any + */ + if ((res = tty_rename(arcn)) != 0) + return(res); + if (PAX_IS_LINK(arcn->type)) + sub_name(arcn->ln_name, &(arcn->ln_nlen), + sizeof(arcn->ln_name)); + } + return(res); +} + +/* + * tty_rename() + * Prompt the user for a replacement file name. A "." keeps the old name, + * a empty line skips the file, and an EOF on reading the tty, will cause + * pax to stop processing and exit. Otherwise the file name input, replaces + * the old one. + * Return: + * 0 process this file, 1 skip this file, -1 we need to exit pax + */ + +static int +tty_rename(ARCHD *arcn) +{ + char tmpname[PAXPATHLEN+2]; + int res; + + /* + * prompt user for the replacement name for a file, keep trying until + * we get some reasonable input. Archives may have more than one file + * on them with the same name (from updates etc). We print verbose info + * on the file so the user knows what is up. + */ + tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); + + for (;;) { + ls_tty(arcn); + tty_prnt("Input new name, or a \".\" to keep the old name, "); + tty_prnt("or a \"return\" to skip this file.\n"); + tty_prnt("Input > "); + if (tty_read(tmpname, sizeof(tmpname)) < 0) + return(-1); + if (strcmp(tmpname, "..") == 0) { + tty_prnt("Try again, illegal file name: ..\n"); + continue; + } + if (strlen(tmpname) > PAXPATHLEN) { + tty_prnt("Try again, file name too long\n"); + continue; + } + break; + } + + /* + * empty file name, skips this file. a "." leaves it alone + */ + if (tmpname[0] == '\0') { + tty_prnt("Skipping file.\n"); + return(1); + } + if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { + tty_prnt("Processing continues, name unchanged.\n"); + return(0); + } + + /* + * ok the name changed. We may run into links that point at this + * file later. we have to remember where the user sent the file + * in order to repair any links. + */ + tty_prnt("Processing continues, name changed to: %s\n", tmpname); + res = add_name(arcn->name, arcn->nlen, tmpname); + arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); + if ((size_t)arcn->nlen >= sizeof(arcn->name)) + arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ + if (res < 0) + return(-1); + return(0); +} + +/* + * set_dest() + * fix up the file name and the link name (if any) so this file will land + * in the destination directory (used during copy() -rw). + * Return: + * 0 if ok, -1 if failure (name too long) + */ + +int +set_dest(ARCHD *arcn, char *dest_dir, int dir_len) +{ + if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) + return(-1); + + /* + * It is really hard to deal with symlinks here, we cannot be sure + * if the name they point was moved (or will be moved). It is best to + * leave them alone. + */ + if (!PAX_IS_HARDLINK(arcn->type)) + return(0); + + if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) + return(-1); + return(0); +} + +/* + * fix_path + * concatenate dir_name and or_name and store the result in or_name (if + * it fits). This is one ugly function. + * Return: + * 0 if ok, -1 if the final name is too long + */ + +static int +fix_path(char *or_name, int *or_len, char *dir_name, int dir_len) +{ + char *src; + char *dest; + char *start; + int len; + + /* + * we shift the or_name to the right enough to tack in the dir_name + * at the front. We make sure we have enough space for it all before + * we start. since dest always ends in a slash, we skip of or_name + * if it also starts with one. + */ + start = or_name; + src = start + *or_len; + dest = src + dir_len; + if (*start == '/') { + ++start; + --dest; + } + if ((len = dest - or_name) > PAXPATHLEN) { + paxwarn(1, "File name %s/%s, too long", dir_name, start); + return(-1); + } + *or_len = len; + + /* + * enough space, shift + */ + while (src >= start) + *dest-- = *src--; + src = dir_name + dir_len - 1; + + /* + * splice in the destination directory name + */ + while (src >= dir_name) + *dest-- = *src--; + + *(or_name + len) = '\0'; + return(0); +} + +/* + * rep_name() + * walk down the list of replacement strings applying each one in order. + * when we find one with a successful substitution, we modify the name + * as specified. if required, we print the results. if the resulting name + * is empty, we will skip this archive member. We use the regexp(3) + * routines (regexp() ought to win a prize as having the most cryptic + * library function manual page). + * --Parameters-- + * name is the file name we are going to apply the regular expressions to + * (and may be modified) + * nsize is the size of the name buffer. + * nlen is the length of this name (and is modified to hold the length of + * the final string). + * prnt is a flag that says whether to print the final result. + * Return: + * 0 if substitution was successful, 1 if we are to skip the file (the name + * ended up empty) + */ + +static int +rep_name(char *name, size_t nsize, int *nlen, int prnt) +{ + REPLACE *pt; + char *inpt; + char *outpt; + char *endpt; + char *rpt; + int found = 0; + int res; + regmatch_t pm[MAXSUBEXP]; + char nname[PAXPATHLEN+1]; /* final result of all replacements */ + char buf1[PAXPATHLEN+1]; /* where we work on the name */ + + /* + * copy the name into buf1, where we will work on it. We need to keep + * the orig string around so we can print out the result of the final + * replacement. We build up the final result in nname. inpt points at + * the string we apply the regular expression to. prnt is used to + * suppress printing when we handle replacements on the link field + * (the user already saw that substitution go by) + */ + pt = rephead; + (void)strlcpy(buf1, name, sizeof(buf1)); + inpt = buf1; + outpt = nname; + endpt = outpt + PAXPATHLEN; + + /* + * try each replacement string in order + */ + while (pt != NULL) { + do { + char *oinpt = inpt; + /* + * check for a successful substitution, if not go to + * the next pattern, or cleanup if we were global + */ + if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) + break; + + /* + * ok we found one. We have three parts, the prefix + * which did not match, the section that did and the + * tail (that also did not match). Copy the prefix to + * the final output buffer (watching to make sure we + * do not create a string too long). + */ + found = 1; + rpt = inpt + pm[0].rm_so; + + while ((inpt < rpt) && (outpt < endpt)) + *outpt++ = *inpt++; + if (outpt == endpt) + break; + + /* + * for the second part (which matched the regular + * expression) apply the substitution using the + * replacement string and place it the prefix in the + * final output. If we have problems, skip it. + */ + if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt)) + < 0) { + if (prnt) + paxwarn(1, "Replacement name error %s", + name); + return(1); + } + outpt += res; + + /* + * we set up to look again starting at the first + * character in the tail (of the input string right + * after the last character matched by the regular + * expression (inpt always points at the first char in + * the string to process). If we are not doing a global + * substitution, we will use inpt to copy the tail to + * the final result. Make sure we do not overrun the + * output buffer + */ + inpt += pm[0].rm_eo - pm[0].rm_so; + + if ((outpt == endpt) || (*inpt == '\0')) + break; + + /* + * if the user wants global we keep trying to + * substitute until it fails, then we are done. + */ + } while (pt->flgs & GLOB); + + if (found) + break; + + /* + * a successful substitution did NOT occur, try the next one + */ + pt = pt->fow; + } + + if (found) { + /* + * we had a substitution, copy the last tail piece (if there is + * room) to the final result + */ + while ((outpt < endpt) && (*inpt != '\0')) + *outpt++ = *inpt++; + + *outpt = '\0'; + if ((outpt == endpt) && (*inpt != '\0')) { + if (prnt) + paxwarn(1,"Replacement name too long %s >> %s", + name, nname); + return(1); + } + + /* + * inform the user of the result if wanted + */ + if (prnt && (pt->flgs & PRNT)) { + if (*nname == '\0') + (void)fprintf(stderr,"%s >> <empty string>\n", + name); + else + (void)fprintf(stderr,"%s >> %s\n", name, nname); + } + + /* + * if empty inform the caller this file is to be skipped + * otherwise copy the new name over the orig name and return + */ + if (*nname == '\0') + return(1); + *nlen = strlcpy(name, nname, nsize); + } + return(0); +} + +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +static int +resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest, + char *destend) +{ + char *spt; + char *dpt; + char c; + regmatch_t *pmpt; + int len; + int subexcnt; + + spt = src; + dpt = dest; + subexcnt = rp->re_nsub; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + /* + * see if we just have an ordinary replacement character + * or we refer to a subexpression. + */ + if (c == '&') { + pmpt = pm; + } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { + /* + * make sure there is a subexpression as specified + */ + if ((len = *spt++ - '0') > subexcnt) + return(-1); + pmpt = pm + len; + } else { + /* + * Ordinary character, just copy it + */ + if ((c == '\\') && (*spt != '\0')) + c = *spt++; + *dpt++ = c; + continue; + } + + /* + * continue if the subexpression is bogus + */ + if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || + ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + return (-1); + strncpy(dpt, inpt + pmpt->rm_so, len); + dpt += len; + } + return(dpt - dest); +} diff --git a/bin/pax/pax.1 b/bin/pax/pax.1 new file mode 100644 index 0000000..d146a96 --- /dev/null +++ b/bin/pax/pax.1 @@ -0,0 +1,1112 @@ +.\" $OpenBSD: pax.1,v 1.75 2020/01/16 16:46:46 schwarze Exp $ +.\" $NetBSD: pax.1,v 1.3 1995/03/21 09:07:37 cgd Exp $ +.\" +.\" Copyright (c) 1992 Keith Muller. +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Keith Muller of the University of California, San Diego. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)pax.1 8.4 (Berkeley) 4/18/94 +.\" +.Dd $Mdocdate: January 16 2020 $ +.Dt PAX 1 +.Os +.Sh NAME +.Nm pax +.Nd read and write file archives and copy directory hierarchies +.Sh SYNOPSIS +.Nm pax +.Op Fl 0cdjnOvz +.Op Fl E Ar limit +.Op Fl f Ar archive +.Op Fl G Ar group +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Ar pattern ... +.Nm pax +.Fl r +.Op Fl 0cDdijknOuvYZz +.Op Fl E Ar limit +.Op Fl f Ar archive +.Op Fl G Ar group +.Op Fl o Ar options +.Op Fl p Ar string +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Ar pattern ... +.Nm pax +.Fl w +.Op Fl 0adHijLOPtuvXz +.Op Fl B Ar bytes +.Op Fl b Ar blocksize +.Op Fl f Ar archive +.Op Fl G Ar group +.Op Fl o Ar options +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Fl x Ar format +.Op Ar +.Nm pax +.Fl rw +.Op Fl 0DdHijkLlnOPtuvXYZ +.Op Fl G Ar group +.Op Fl p Ar string +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Ar +.Ar directory +.Sh DESCRIPTION +.Nm +will read, write, and list the members of an archive file +and will copy directory hierarchies. +.Nm +operation is independent of the specific archive format +and supports a wide variety of different archive formats. +A list of supported archive formats can be found under the description of the +.Fl x +option. +.Pp +The presence of the +.Fl r +and the +.Fl w +options specifies which of the following functional modes +.Nm +will operate under: +.Em list , read , write , +and +.Em copy . +.Bl -tag -width 6n +.It Aq none +.Em List . +.Nm +will write to standard output +a table of contents of the members of the archive file read from +standard input, whose pathnames match the specified +.Ar pattern +arguments. +The table of contents contains one filename per line +and is written using single line buffering. +.It Fl r +.Em Read . +.Nm +extracts the members of the archive file read from the standard input, +with pathnames matching the specified +.Ar pattern +arguments. +The archive format and blocking is automatically determined on input. +When an extracted file is a directory, the entire file hierarchy +rooted at that directory is extracted. +All extracted files are created relative to the current file hierarchy. +The setting of ownership, access and modification times, and file mode of +the extracted files are discussed in more detail under the +.Fl p +option. +.It Fl w +.Em Write . +.Nm +writes an archive containing the +.Ar file +operands to standard output +using the specified archive format. +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +standard input. +When a +.Ar file +operand is also a directory, the entire file hierarchy rooted +at that directory will be included. +.It Fl rw +.Em Copy . +.Nm +copies the +.Ar file +operands to the destination +.Ar directory . +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +the standard input. +When a +.Ar file +operand is also a directory the entire file +hierarchy rooted at that directory will be included. +The effect of the +.Em copy +is as if the copied files were written to an archive file and then +subsequently extracted, except that there may be hard links between +the original and the copied files (see the +.Fl l +option below). +.Pp +.Sy Warning : +The destination +.Ar directory +must not be one of the +.Ar file +operands or a member of a file hierarchy rooted at one of the +.Ar file +operands. +The result of a +.Em copy +under these conditions is unpredictable. +.El +.Pp +While processing a damaged archive during a read or list operation, +.Nm +will attempt to recover from media defects and will search through the archive +to locate and process the largest number of archive members possible (see the +.Fl E +option for more details on error handling). +.Pp +The +.Ar directory +operand specifies a destination directory pathname. +If the +.Ar directory +operand does not exist, or it is not writable by the user, +or it is not of type directory, +.Nm +will exit with a non-zero exit status. +.Pp +The +.Ar pattern +operand is used to select one or more pathnames of archive members. +Archive members are selected using the pattern matching notation described +by +.Xr glob 7 . +When the +.Ar pattern +operand is not supplied, all members of the archive will be selected. +When a +.Ar pattern +matches a directory, the entire file hierarchy rooted at that directory will +be selected. +When a +.Ar pattern +operand does not select at least one archive member, +.Nm +will write these +.Ar pattern +operands in a diagnostic message to standard error +and then exit with a non-zero exit status. +.Pp +The +.Ar file +operand specifies the pathname of a file to be copied or archived. +When a +.Ar file +operand does not select at least one archive member, +.Nm +will write these +.Ar file +operand pathnames in a diagnostic message to standard error +and then exit with a non-zero exit status. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl 0 +Use the NUL +.Pq Ql \e0 +character as a pathname terminator, instead of newline +.Pq Ql \en . +This applies only to the pathnames read from standard input in +the write and copy modes, +and to the pathnames written to standard output in list mode. +This option is expected to be used in concert with the +.Fl print0 +function in +.Xr find 1 +or the +.Fl 0 +flag in +.Xr xargs 1 . +.It Fl a +Append the given +.Ar file +operands +to the end of an archive that was previously written. +If an archive format is not specified with a +.Fl x +option, the format currently being used in the archive will be selected. +Any attempt to append to an archive in a format different from the +format already used in the archive will cause +.Nm +to exit immediately +with a non-zero exit status. +The blocking size used in the archive volume where writing starts +will continue to be used for the remainder of that archive volume. +.Pp +.Sy Warning : +Many storage devices are not able to support the operations necessary +to perform an append operation. +Any attempt to append to an archive stored on such a device may damage the +archive or have other unpredictable results. +Tape drives in particular are more likely to not support an append operation. +An archive stored in a regular file system file or on a disk device will +usually support an append operation. +.It Fl B Ar bytes +Limit the number of bytes written to a single archive volume to +.Ar bytes . +The +.Ar bytes +limit can end with +.Sq Li m , +.Sq Li k , +or +.Sq Li b +to specify multiplication by 1048576 (1M), 1024 (1K) or 512, respectively. +A pair of +.Ar bytes +limits can be separated by +.Sq Li x +to indicate a product. +.Pp +.Sy Warning : +Only use this option when writing an archive to a device which supports +an end of file read condition based on last (or largest) write offset +(such as a regular file or a tape drive). +The use of this option with a floppy or hard disk is not recommended. +.It Fl b Ar blocksize +When writing an archive, +block the output at a positive decimal integer number of +bytes per write to the archive file. +The +.Ar blocksize +must be a multiple of 512 bytes with a maximum of 64512 bytes. +Archive block sizes larger than 32256 bytes violate the POSIX +standard and will not be portable to all systems. +A +.Ar blocksize +can end with +.Sq Li k +or +.Sq Li b +to specify multiplication by 1024 (1K) or 512, respectively. +A pair of blocksizes can be separated by +.Sq Li x +to indicate a product. +A specific archive device may impose additional restrictions on the size +of blocking it will support. +When blocking is not specified, the default +.Ar blocksize +is dependent on the specific archive format being used (see the +.Fl x +option). +.It Fl c +Match all file or archive members +.Em except +those specified by the +.Ar pattern +and +.Ar file +operands. +.It Fl D +This option is the same as the +.Fl u +option, except that the file inode change time is checked instead of the +file modification time. +The file inode change time can be used to select files whose inode information +(e.g., UID, GID, etc.) is newer than a copy of the file in the destination +.Ar directory . +.It Fl d +Cause files of type directory being copied or archived, or archive members of +type directory being extracted, to match only the directory file or archive +member and not the file hierarchy rooted at the directory. +.It Fl E Ar limit +Limit the number of consecutive read faults while trying to read a flawed +archive to +.Ar limit . +With a positive +.Ar limit , +.Nm +will attempt to recover from an archive read error and will +continue processing starting with the next file stored in the archive. +A +.Ar limit +of 0 will cause +.Nm +to stop operation after the first read error is detected on an archive volume. +The default +.Ar limit +is a small positive number of retries. +.It Fl f Ar archive +Specify +.Ar archive +as the pathname of the input or output archive, overriding the default +standard input (for list and read) +or standard output +(for write). +A single archive may span multiple files and different archive devices. +When required, +.Nm +will prompt for the pathname of the file or device of the next volume in the +archive. +.It Fl G Ar group +Select a file based on its +.Ar group +name, or when starting with a +.Cm # , +a numeric GID. +A +.Ql \e +can be used to escape the +.Cm # . +Multiple +.Fl G +options may be supplied and checking stops with the first match. +.It Fl H +Follow only command-line symbolic links while performing a physical file +system traversal. +.It Fl i +Interactively rename files or archive members. +For each archive member matching a +.Ar pattern +operand or each file matching a +.Ar file +operand, +.Nm +will prompt to +.Pa /dev/tty +giving the name of the file, its file mode, and its modification time. +.Nm +will then read a line from +.Pa /dev/tty . +If this line is blank, the file or archive member is skipped. +If this line consists of a single period, the +file or archive member is processed with no modification to its name. +Otherwise, its name is replaced with the contents of the line. +.Nm +will immediately exit with a non-zero exit status if +.Dv EOF +is encountered when reading a response or if +.Pa /dev/tty +cannot be opened for reading and writing. +.It Fl j +Use bzip2 to compress (decompress) the archive while writing (reading). +The bzip2 utility must be installed separately. +Incompatible with +.Fl a . +.It Fl k +Do not overwrite existing files. +.It Fl L +Follow all symbolic links to perform a logical file system traversal. +.It Fl l +(The lowercase letter +.Dq ell . ) +Link files. +In copy mode +.Pq Fl r Fl w , +hard links are made between the source and destination file hierarchies +whenever possible. +.It Fl n +Select the first archive member that matches each +.Ar pattern +operand. +No more than one archive member is matched for each +.Ar pattern . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched (unless +.Fl d +is also specified). +.It Fl O +Force the archive to be one volume. +If a volume ends prematurely, +.Nm +will not prompt for a new volume. +This option can be useful for +automated tasks where error recovery cannot be performed by a human. +.It Fl o Ar options +Information to modify the algorithm for extracting or writing archive files +which is specific to the archive format specified by +.Fl x . +In general, +.Ar options +take the form: +.Ar name Ns = Ns Ar value . +.Pp +The following options are available for the +.Cm ustar +and old +.Bx +.Cm tar +formats: +.Pp +.Bl -tag -width Ds -compact +.It Cm write_opt=nodir +When writing archives, omit the storage of directories. +.El +.It Fl P +Do not follow symbolic links, perform a physical file system traversal. +This is the default mode. +.It Fl p Ar string +Specify one or more file characteristic options (privileges). +The +.Ar string +option-argument is a string specifying file characteristics to be retained or +discarded on extraction. +The string consists of the specification characters +.Cm a , e , m , o , +and +.Cm p . +Multiple characteristics can be concatenated within the same string +and multiple +.Fl p +options can be specified. +The meanings of the specification characters are as follows: +.Bl -tag -width 2n +.It Cm a +Do not preserve file access times. +By default, file access times are preserved whenever possible. +.It Cm e +.Dq Preserve everything , +the user ID, group ID, file mode bits, +file access time, and file modification time. +This is intended to be used by root, +someone with all the appropriate privileges, in order to preserve all +aspects of the files as they are recorded in the archive. +The +.Cm e +flag is the sum of the +.Cm o +and +.Cm p +flags. +.It Cm m +Do not preserve file modification times. +By default, file modification times are preserved whenever possible. +.It Cm o +Preserve the user ID and group ID. +.It Cm p +.Dq Preserve +the file mode bits. +This is intended to be used by a user with regular privileges +who wants to preserve all aspects of the file other than the ownership. +The file times are preserved by default, but two other flags are offered to +disable this and use the time of extraction instead. +.El +.Pp +In the preceding list, +.Sq preserve +indicates that an attribute stored in the archive is given to the +extracted file, subject to the permissions of the invoking +process. +Otherwise the attribute of the extracted file is determined as +part of the normal file creation action. +If neither the +.Cm e +nor the +.Cm o +specification character is specified, or the user ID and group ID are not +preserved for any reason, +.Nm +will not set the +.Dv S_ISUID +(setuid) and +.Dv S_ISGID +(setgid) bits of the file mode. +If the preservation of any of these items fails for any reason, +.Nm +will write a diagnostic message to standard error. +Failure to preserve these items will affect the final exit status, +but will not cause the extracted file to be deleted. +If the file characteristic letters in any of the string option-arguments are +duplicated or conflict with each other, the one(s) given last will take +precedence. +For example, if +.Fl p Ar eme +is specified, file modification times are still preserved. +.It Fl r +Read an archive file from standard input +and extract the specified +.Ar file +operands. +If any intermediate directories are needed in order to extract an archive +member, these directories will be created as if +.Xr mkdir 2 +was called with the bitwise OR of +.Dv S_IRWXU , S_IRWXG , +and +.Dv S_IRWXO +as the mode argument. +When the selected archive format supports the specification of linked +files and these files cannot be linked while the archive is being extracted, +.Nm +will write a diagnostic message to standard error +and exit with a non-zero exit status at the completion of operation. +.It Fl s Ar replstr +Modify the archive member names according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +.Ar file +or +.Ar pattern +arguments may be given to restrict the list of archive members to those +specified. +.Pp +The format of these regular expressions is: +.Pp +.Dl /old/new/[gp] +.Pp +As in +.Xr ed 1 , +.Ar old +is a basic regular expression (see +.Xr re_format 7 ) +and +.Ar new +can contain an ampersand +.Pq Ql & , +.Ql \e Ns Em n +(where +.Em n +is a digit) back-references, +or subexpression matching. +The +.Ar old +string may also contain newline characters. +Any non-null character can be used as a delimiter +.Po +.Ql / +is shown here +.Pc . +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +.Pp +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring, +which starts with the first character following the end of the last successful +substitution. +The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +standard error in the following format: +.Pp +.D1 Em original-pathname No >> Em new-pathname +.Pp +File or archive member names that substitute to the empty string +are not selected and will be skipped. +.It Fl T Ar range +Allow files to be selected based on a file modification or inode change +time falling within the specified time range. +The range has the format: +.Sm off +.Bd -filled -offset indent +.Op Ar from_date +.Op \&, Ar to_date +.Op / Oo Cm c Oc Op Cm m +.Ed +.Sm on +.Pp +The dates specified by +.Ar from_date +to +.Ar to_date +are inclusive. +If only a +.Ar from_date +is supplied, all files with a modification or inode change time +equal to or younger are selected. +If only a +.Ar to_date +is supplied, all files with a modification or inode change time +equal to or older will be selected. +When the +.Ar from_date +is equal to the +.Ar to_date , +only files with a modification or inode change time of exactly that +time will be selected. +.Pp +When +.Nm +is in write or copy mode, the optional trailing field +.Oo Cm c Oc Ns Op Cm m +can be used to determine which file time (inode change, file modification or +both) are used in the comparison. +If neither is specified, the default is to use file modification time only. +The +.Cm m +specifies the comparison of file modification time (the time when +the file was last written). +The +.Cm c +specifies the comparison of inode change time (the time when the file +inode was last changed; e.g., a change of owner, group, mode, etc). +When +.Cm c +and +.Cm m +are both specified, then the modification and inode change times are +both compared. +.Pp +The inode change time comparison is useful in selecting files whose +attributes were recently changed or selecting files which were recently +created and had their modification time reset to an older time (as what +happens when a file is extracted from an archive and the modification time +is preserved). +Time comparisons using both file times is useful when +.Nm +is used to create a time based incremental archive (only files that were +changed during a specified time range will be archived). +.Pp +A time range is made up of six different fields and each field must contain two +digits. +The format is: +.Pp +.Dl [[[[[cc]yy]mm]dd]HH]MM[.SS] +.Pp +Where +.Ar cc +is the first two digits of the year (the century), +.Ar yy +is the last two digits of the year, +the first +.Ar mm +is the month (from 01 to 12), +.Ar dd +is the day of the month (from 01 to 31), +.Ar HH +is the hour of the day (from 00 to 23), +.Ar MM +is the minute (from 00 to 59), +and +.Ar SS +is the seconds (from 00 to 59). +The minute field +.Ar MM +is required, while the other fields are optional and must be added in the +following order: +.Ar HH , dd , mm , +.Ar yy , cc . +.Pp +The +.Ar SS +field may be added independently of the other fields. +Time ranges are relative to the current time, so +.Ic -T 1234/cm +would select all files with a modification or inode change time +of 12:34 PM today or later. +Multiple +.Fl T +time range can be supplied and checking stops with the first match. +.It Fl t +Reset the access times of any file or directory read or accessed by +.Nm +to be the same as they were before being read or accessed by +.Nm pax . +.It Fl U Ar user +Select a file based on its +.Ar user +name, or when starting with a +.Cm # , +a numeric UID. +A +.Ql \e +can be used to escape the +.Cm # . +Multiple +.Fl U +options may be supplied and checking stops with the first match. +.It Fl u +Ignore files that are older (having a less recent file modification time) +than a pre-existing file or archive member with the same name. +During read, +an archive member with the same name as a file in the file system will be +extracted if the archive member is newer than the file. +During write, +a file system member with the same name as an archive member will be +written to the archive if it is newer than the archive member. +During copy, +the file in the destination hierarchy is replaced by the file in the source +hierarchy or by a link to the file in the source hierarchy if the file in +the source hierarchy is newer. +.It Fl v +During a list operation, produce a verbose table of contents using the format of the +.Xr ls 1 +utility with the +.Fl l +option. +For pathnames representing a hard link to a previous member of the archive, +the output has the format: +.Pp +.Dl Em ls -l listing No == Em link-name +.Pp +For pathnames representing a symbolic link, the output has the format: +.Pp +.Dl Em ls -l listing No -> Em link-name +.Pp +Where +.Em ls -l listing +is the output format specified by the +.Xr ls 1 +utility when used with the +.Fl l +option. +Otherwise for all the other operational modes +(read, write, and copy), +pathnames are written and flushed to standard error +without a trailing newline +as soon as processing begins on that file or +archive member. +The trailing newline +is not buffered and is written only after the file has been read or written. +.It Fl w +Write files to the standard output +in the specified archive format. +When no +.Ar file +operands are specified, standard input +is read for a list of pathnames with one per line without any leading or +trailing +.Aq blanks . +.It Fl X +When traversing the file hierarchy specified by a pathname, +do not descend into directories that have a different device ID. +See the +.Li st_dev +field as described in +.Xr stat 2 +for more information about device IDs. +.It Fl x Ar format +Specify the output archive format, with the default format being +.Cm ustar . +.Nm +currently supports the following formats: +.Bl -tag -width "sv4cpio" +.It Cm bcpio +The old binary cpio format. +The default blocksize for this format is 5120 bytes. +This format is not very portable and should not be used when other formats +are available. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm cpio +The extended cpio interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm sv4cpio +The System V release 4 cpio. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm sv4crc +The System V release 4 cpio with file CRC checksums. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm tar +The old +.Bx +tar format as found in +.Bx 4.3 . +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 100 characters or less in length. +Only regular files, hard links, soft links, and directories +will be archived (other file system types are not supported). +For backwards compatibility with even older tar formats, a +.Fl o +option can be used when writing an archive to omit the storage of directories. +This option takes the form: +.Pp +.Dl Fl o Cm write_opt=nodir +.It Cm ustar +The extended tar interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 10240 bytes. +Filenames stored by this format must be 100 characters or less in length; +the total pathname must be 256 characters or less. +.El +.Pp +.Nm +will detect and report any file that it is unable to store or extract +as the result of any specific archive format restrictions. +The individual archive formats may impose additional restrictions on use. +Typical archive format restrictions include (but are not limited to): +file pathname length, file size, link pathname length, and the type of the +file. +.It Fl Y +This option is the same as the +.Fl D +option, except that the inode change time is checked using the +pathname created after all the file name modifications have completed. +.It Fl Z +This option is the same as the +.Fl u +option, except that the modification time is checked using the +pathname created after all the file name modifications have completed. +.It Fl z +Use +.Xr gzip 1 +to compress (decompress) the archive while writing (reading). +Incompatible with +.Fl a . +.El +.Pp +The options that operate on the names of files or archive members +.Po Fl c , +.Fl i , +.Fl j , +.Fl n , +.Fl s , +.Fl u , +.Fl v , +.Fl D , +.Fl G , +.Fl T , +.Fl U , +.Fl Y , +and +.Fl Z +.Pc +interact as follows. +.Pp +When extracting files during a read operation, archive members are +.Sq selected , +based only on the user specified pattern operands as modified by the +.Fl c , +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +.Fl U +options. +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then the +.Fl Y +and +.Fl Z +options will be applied based on the final pathname. +Finally, the +.Fl v +option will write the names resulting from these modifications. +.Pp +When archiving files during a write operation, +or copying files during a copy operation, +archive members are +.Sq selected , +based only on the user specified pathnames as modified by the +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +and +.Fl U +options (the +.Fl D +option only applies during a copy operation). +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then during a copy operation the +.Fl Y +and the +.Fl Z +options will be applied based on the final pathname. +Finally, the +.Fl v +option will write the names resulting from these modifications. +.Pp +When one or both of the +.Fl u +or +.Fl D +options are specified along with the +.Fl n +option, a file is not considered selected unless it is newer +than the file to which it is compared. +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev TMPDIR +Path in which to store temporary files. +.El +.Sh EXIT STATUS +.Ex -std pax +.Sh EXAMPLES +Copy the contents of the current directory to the device +.Pa /dev/rst0 : +.Pp +.Dl $ pax -w -f /dev/rst0 \&. +.Pp +Give the verbose table of contents for an archive stored in +.Pa filename : +.Pp +.Dl $ pax -v -f filename +.Pp +This sequence of commands will copy the entire +.Pa olddir +directory hierarchy to +.Pa newdir : +.Bd -literal -offset indent +$ mkdir newdir +$ cd olddir +$ pax -rw . ../newdir +.Ed +.Pp +Extract files from the archive +.Pa a.pax . +Files rooted in +.Pa /usr +are extracted relative to the current working directory; +all other files are extracted to their unmodified path. +.Pp +.Dl $ pax -r -s ',^/usr/,,' -f a.pax +.Pp +This can be used to interactively select the files to copy from the +current directory to +.Pa dest_dir : +.Pp +.Dl $ pax -rw -i \&. dest_dir +.Pp +Extract all files from the archive +.Pa a.pax +which are owned by +.Em root +with group +.Em bin +and preserve all file permissions: +.Pp +.Dl $ pax -r -pe -U root -G bin -f a.pax +.Pp +Update (and list) only those files in the destination directory +.Pa /backup +which are older (less recent inode change or file modification times) than +files with the same name found in the source file tree +.Pa home : +.Pp +.Dl $ pax -r -w -v -Y -Z home /backup +.Sh DIAGNOSTICS +Whenever +.Nm +cannot create a file or a link when reading an archive or cannot +find a file when writing an archive, or cannot preserve the user ID, +group ID, or file mode when the +.Fl p +option is specified, a diagnostic message is written to standard error +and a non-zero exit status will be returned, but processing will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated by +a signal or error, +.Nm +may have only partially extracted a file the user wanted. +Additionally, the file modes of extracted files and directories +may have incorrect file bits, and the modification and access times may be +wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal or error, +.Nm +may have only partially created the archive, which may violate the specific +archive format specification. +.Pp +If while doing a copy, +.Nm +detects a file is about to overwrite itself, the file is not copied, +a diagnostic message is written to standard error +and when +.Nm +completes it will exit with a non-zero exit status. +.Sh SEE ALSO +.Xr cpio 1 , +.Xr tar 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification, +except that the +.Cm pax +archive format and the +.Cm listopt +keyword are unsupported. +.Pp +The flags +.Op Fl 0BDEGjOPTUYZz , +the archive formats +.Cm bcpio , +.Cm sv4cpio , +.Cm sv4crc , +and +.Cm tar , +the +.Cm b , k , +and +.Cm x +additions to the +.Fl b +flag, +and the flawed archive handling during list and read operations +are extensions to that specification. +.Sh HISTORY +A +.Nm +utility appeared in +.Bx 4.4 . +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. diff --git a/bin/pax/pax.c b/bin/pax/pax.c new file mode 100644 index 0000000..4d0fc68 --- /dev/null +++ b/bin/pax/pax.c @@ -0,0 +1,446 @@ +/* $OpenBSD: pax.c,v 1.53 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: pax.c,v 1.5 1996/03/26 23:54:20 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <signal.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <err.h> +#include <fcntl.h> +#include <grp.h> +#include <paths.h> +#include <pwd.h> +#include <stdio.h> + +#include "pax.h" +#include "extern.h" +static int gen_init(void); + +/* + * PAX main routines, general globals and some simple start up routines + */ + +/* + * Variables that can be accessed by any routine within pax + */ +int act = DEFOP; /* read/write/append/copy */ +FSUB *frmt = NULL; /* archive format type */ +int cflag; /* match all EXCEPT pattern/file */ +int cwdfd; /* starting cwd */ +int dflag; /* directory member match only */ +int iflag; /* interactive file/archive rename */ +int kflag; /* do not overwrite existing files */ +int lflag; /* use hard links when possible */ +int nflag; /* select first archive member match */ +int tflag; /* restore access time after read */ +int uflag; /* ignore older modification time files */ +int vflag; /* produce verbose output */ +int Dflag; /* same as uflag except inode change time */ +int Hflag; /* follow command line symlinks (write only) */ +int Lflag; /* follow symlinks when writing */ +int Nflag; /* only use numeric uid and gid */ +int Xflag; /* archive files with same device id only */ +int Yflag; /* same as Dflag except after name mode */ +int Zflag; /* same as uflag except after name mode */ +int zeroflag; /* use \0 as pathname terminator */ +int vfpart; /* is partial verbose output in progress */ +int patime = 1; /* preserve file access time */ +int pmtime = 1; /* preserve file modification times */ +int nodirs; /* do not create directories as needed */ +int pmode; /* preserve file mode bits */ +int pids; /* preserve file uid/gid */ +int rmleadslash = 0; /* remove leading '/' from pathnames */ +int exit_val; /* exit value */ +int docrc; /* check/create file crc */ +char *dirptr; /* destination dir in a copy */ +char *argv0; /* root of argv[0] */ +enum op_mode op_mode; /* what program are we acting as? */ +sigset_t s_mask; /* signal mask for cleanup critical sect */ +FILE *listf; /* file pointer to print file list to */ +int listfd = STDERR_FILENO; /* fd matching listf, for sighandler output */ +char *tempfile; /* tempfile to use for mkstemp(3) */ +char *tempbase; /* basename of tempfile to use for mkstemp(3) */ + +/* + * PAX - Portable Archive Interchange + * + * A utility to read, write, and write lists of the members of archive + * files and copy directory hierarchies. A variety of archive formats + * are supported (some are described in POSIX 1003.1 10.1): + * + * ustar - 10.1.1 extended tar interchange format + * cpio - 10.1.2 extended cpio interchange format + * tar - old BSD 4.3 tar format + * binary cpio - old cpio with binary header format + * sysVR4 cpio - with and without CRC + * + * This version is a superset of IEEE Std 1003.2b-d3 + * + * Summary of Extensions to the IEEE Standard: + * + * 1 READ ENHANCEMENTS + * 1.1 Operations which read archives will continue to operate even when + * processing archives which may be damaged, truncated, or fail to meet + * format specs in several different ways. Damaged sections of archives + * are detected and avoided if possible. Attempts will be made to resync + * archive read operations even with badly damaged media. + * 1.2 Blocksize requirements are not strictly enforced on archive read. + * Tapes which have variable sized records can be read without errors. + * 1.3 The user can specify via the non-standard option flag -E if error + * resync operation should stop on a media error, try a specified number + * of times to correct, or try to correct forever. + * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks + * of all zeros will be restored with holes appropriate for the target + * filesystem + * 1.5 The user is notified whenever something is found during archive + * read operations which violates spec (but the read will continue). + * 1.6 Multiple archive volumes can be read and may span over different + * archive devices + * 1.7 Rigidly restores all file attributes exactly as they are stored on the + * archive. + * 1.8 Modification change time ranges can be specified via multiple -T + * options. These allow a user to select files whose modification time + * lies within a specific time range. + * 1.9 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 1.10 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 1.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 2 WRITE ENHANCEMENTS + * 2.1 Write operation will stop instead of allowing a user to create a flawed + * flawed archive (due to any problem). + * 2.2 Archives written by pax are forced to strictly conform to both the + * archive and pax the specific format specifications. + * 2.3 Blocking size and format is rigidly enforced on writes. + * 2.4 Formats which may exhibit header overflow problems (they have fields + * too small for large file systems, such as inode number storage), use + * routines designed to repair this problem. These techniques still + * conform to both pax and format specifications, but no longer truncate + * these fields. This removes any restrictions on using these archive + * formats on large file systems. + * 2.5 Multiple archive volumes can be written and may span over different + * archive devices + * 2.6 A archive volume record limit allows the user to specify the number + * of bytes stored on an archive volume. When reached the user is + * prompted for the next archive volume. This is specified with the + * non-standard -B flag. The limit is rounded up to the next blocksize. + * 2.7 All archive padding during write use zero filled sections. This makes + * it much easier to pull data out of flawed archive during read + * operations. + * 2.8 Access time reset with the -t applies to all file nodes (including + * directories). + * 2.9 Symbolic links can be followed with -L (optional in the spec). + * 2.10 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 2.11 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 2.12 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 2.13 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * + * 3 COPY ENHANCEMENTS + * 3.1 Sparse files (lseek holes) can be copied without expanding the holes + * into zero filled blocks. The file copy is created with holes which are + * appropriate for the target filesystem + * 3.2 Access time as well as modification time on copied file trees can be + * preserved with the appropriate -p options. + * 3.3 Access time reset with the -t applies to all file nodes (including + * directories). + * 3.4 Symbolic links can be followed with -L (optional in the spec). + * 3.5 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 3.6 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 3.7 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 3.8 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * 3.9 File inode change time can be checked against existing file before + * name modification (-D) + * 3.10 File inode change time can be checked against existing file after + * name modification (-Y) + * 3.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 4 GENERAL ENHANCEMENTS + * 4.1 Internal structure is designed to isolate format dependent and + * independent functions. Formats are selected via a format driver table. + * This encourages the addition of new archive formats by only having to + * write those routines which id, read and write the archive header. + */ + +/* + * main() + * parse options, set up and operate as specified by the user. + * any operational flaw will set exit_val to non-zero + * Return: 0 if ok, 1 otherwise + */ + +int +main(int argc, char **argv) +{ + char *tmpdir; + size_t tdlen; + + listf = stderr; + + /* + * Keep a reference to cwd, so we can always come back home. + */ + cwdfd = open(".", O_RDONLY | O_CLOEXEC); + if (cwdfd == -1) { + syswarn(1, errno, "Can't open current working directory."); + return(exit_val); + } + + /* + * Where should we put temporary files? + */ + if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') + tmpdir = _PATH_TMP; + tdlen = strlen(tmpdir); + while (tdlen > 0 && tmpdir[tdlen - 1] == '/') + tdlen--; + tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE)); + if (tempfile == NULL) { + paxwarn(1, "Cannot allocate memory for temp file name."); + return(exit_val); + } + if (tdlen) + memcpy(tempfile, tmpdir, tdlen); + tempbase = tempfile + tdlen; + *tempbase++ = '/'; + + /* + * keep passwd and group files open for faster lookups. + */ + setpassent(1); + setgroupent(1); + + /* + * parse options, determine operational mode, general init + */ + options(argc, argv); + if ((gen_init() < 0) || (tty_init() < 0)) + return(exit_val); + + /* + * pmode needs to restore setugid bits when extracting or copying, + * so can't pledge at all then. + */ + if (pmode == 0 || (act != EXTRACT && act != COPY)) { + if (pledge("stdio rpath wpath cpath fattr dpath getpw proc exec tape", + NULL) == -1) + err(1, "pledge"); + + /* Copy mode, or no gzip -- don't need to fork/exec. */ + if (gzip_program == NULL || act == COPY) { + if (pledge("stdio rpath wpath cpath fattr dpath getpw tape", + NULL) == -1) + err(1, "pledge"); + } + } + + /* + * select a primary operation mode + */ + switch (act) { + case EXTRACT: + extract(); + break; + case ARCHIVE: + archive(); + break; + case APPND: + if (gzip_program != NULL) + errx(1, "can not gzip while appending"); + append(); + break; + case COPY: + copy(); + break; + default: + case LIST: + list(); + break; + } + return(exit_val); +} + +/* + * sig_cleanup() + * when interrupted we try to do whatever delayed processing we can. + * This is not critical, but we really ought to limit our damage when we + * are aborted by the user. + * Return: + * never.... + */ + +void +sig_cleanup(int which_sig) +{ + /* + * restore modes and times for any dirs we may have created + * or any dirs we may have read. + */ + + /* paxwarn() uses stdio; fake it as well as we can */ + if (which_sig == SIGXCPU) + dprintf(STDERR_FILENO, "\nCPU time limit reached, cleaning up.\n"); + else + dprintf(STDERR_FILENO, "\nSignal caught, cleaning up.\n"); + + ar_close(1); + sltab_process(1); + proc_dir(1); + if (tflag) + atdir_end(); + _exit(1); +} + +/* + * setup_sig() + * set a signal to be caught, but only if it isn't being ignored already + */ + +static int +setup_sig(int sig, const struct sigaction *n_hand) +{ + struct sigaction o_hand; + + if (sigaction(sig, NULL, &o_hand) == -1) + return (-1); + + if (o_hand.sa_handler == SIG_IGN) + return (0); + + return (sigaction(sig, n_hand, NULL)); +} + +/* + * gen_init() + * general setup routines. Not all are required, but they really help + * when dealing with a medium to large sized archives. + */ + +static int +gen_init(void) +{ + struct rlimit reslimit; + struct sigaction n_hand; + + /* + * Really needed to handle large archives. We can run out of memory for + * internal tables really fast when we have a whole lot of files... + */ + if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_DATA , &reslimit); + } + + /* + * should file size limits be waived? if the os limits us, this is + * needed if we want to write a large archive + */ + if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_FSIZE , &reslimit); + } + + /* + * increase the size the stack can grow to + */ + if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_STACK , &reslimit); + } + + /* + * not really needed, but doesn't hurt + */ + if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_RSS , &reslimit); + } + + /* + * signal handling to reset stored directory times and modes. Since + * we deal with broken pipes via failed writes we ignore it. We also + * deal with any file size limit through failed writes. Cpu time + * limits are caught and a cleanup is forced. + */ + if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || + (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || + (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) || + (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) { + paxwarn(1, "Unable to set up signal mask"); + return(-1); + } + + /* snag the fd to be used from the signal handler */ + listfd = fileno(listf); + + memset(&n_hand, 0, sizeof n_hand); + n_hand.sa_mask = s_mask; + n_hand.sa_flags = 0; + n_hand.sa_handler = sig_cleanup; + + if (setup_sig(SIGHUP, &n_hand) || + setup_sig(SIGTERM, &n_hand) || + setup_sig(SIGINT, &n_hand) || + setup_sig(SIGQUIT, &n_hand) || + setup_sig(SIGXCPU, &n_hand)) + goto out; + + n_hand.sa_handler = SIG_IGN; + if ((sigaction(SIGPIPE, &n_hand, NULL) == -1) || + (sigaction(SIGXFSZ, &n_hand, NULL) == -1)) + goto out; + return(0); + + out: + syswarn(1, errno, "Unable to set up signal handler"); + return(-1); +} diff --git a/bin/pax/pax.h b/bin/pax/pax.h new file mode 100644 index 0000000..65d445a --- /dev/null +++ b/bin/pax/pax.h @@ -0,0 +1,262 @@ +/* $OpenBSD: pax.h,v 1.29 2017/09/12 17:11:11 otto Exp $ */ +/* $NetBSD: pax.h,v 1.3 1995/03/21 09:07:41 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pax.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * BSD PAX global data structures and constants. + */ + +#define MAXBLK 64512 /* MAX blocksize supported (posix SPEC) */ + /* WARNING: increasing MAXBLK past 32256 */ + /* will violate posix spec. */ +#define MAXBLK_POSIX 32256 /* MAX blocksize supported as per POSIX */ +#define BLKMULT 512 /* blocksize must be even mult of 512 bytes */ + /* Don't even think of changing this */ +#define DEVBLK 8192 /* default read blksize for devices */ +#define FILEBLK 10240 /* default read blksize for files */ +#define PAXPATHLEN 3072 /* maximum path length for pax. MUST be */ + /* longer than the system PATH_MAX */ + +/* + * Pax modes of operation + */ +#define LIST 0 /* List the file in an archive */ +#define EXTRACT 1 /* extract the files in an archive */ +#define ARCHIVE 2 /* write a new archive */ +#define APPND 3 /* append to the end of an archive */ +#define COPY 4 /* copy files to destination dir */ +#define DEFOP LIST /* if no flags default is to LIST */ + +/* + * Device type of the current archive volume + */ +#define ISREG 0 /* regular file */ +#define ISCHR 1 /* character device */ +#define ISBLK 2 /* block device */ +#define ISTAPE 3 /* tape drive */ +#define ISPIPE 4 /* pipe/socket */ + +/* + * Pattern matching structure + * + * Used to store command line patterns + */ +typedef struct pattern { + char *pstr; /* pattern to match, user supplied */ + char *pend; /* end of a prefix match */ + char *chdname; /* the dir to change to if not NULL. */ + size_t plen; /* length of pstr */ + int flgs; /* processing/state flags */ +#define MTCH 0x1 /* pattern has been matched */ +#define DIR_MTCH 0x2 /* pattern matched a directory */ + struct pattern *fow; /* next pattern */ +} PATTERN; + +/* + * General Archive Structure (used internal to pax) + * + * This structure is used to pass information about archive members between + * the format independent routines and the format specific routines. When + * new archive formats are added, they must accept requests and supply info + * encoded in a structure of this type. The name fields are declared statically + * here, as there is only ONE of these floating around, size is not a major + * consideration. Eventually converting the name fields to a dynamic length + * may be required if and when the supporting operating system removes all + * restrictions on the length of pathnames it will resolve. + */ +typedef struct { + int nlen; /* file name length */ + char name[PAXPATHLEN+1]; /* file name */ + int ln_nlen; /* link name length */ + char ln_name[PAXPATHLEN+1]; /* name to link to (if any) */ + char *org_name; /* orig name in file system */ + PATTERN *pat; /* ptr to pattern match (if any) */ + struct stat sb; /* stat buffer see stat(2) */ + off_t pad; /* bytes of padding after file xfer */ + off_t skip; /* bytes of real data after header */ + /* IMPORTANT. The st_size field does */ + /* not always indicate the amount of */ + /* data following the header. */ + u_int32_t crc; /* file crc */ + int type; /* type of file node */ +#define PAX_DIR 1 /* directory */ +#define PAX_CHR 2 /* character device */ +#define PAX_BLK 3 /* block device */ +#define PAX_REG 4 /* regular file */ +#define PAX_SLK 5 /* symbolic link */ +#define PAX_SCK 6 /* socket */ +#define PAX_FIF 7 /* fifo */ +#define PAX_HLK 8 /* hard link */ +#define PAX_HRG 9 /* hard link to a regular file */ +#define PAX_CTG 10 /* high performance file */ +#define PAX_GLL 11 /* GNU long symlink */ +#define PAX_GLF 12 /* GNU long file */ +} ARCHD; + +#define PAX_IS_REG(type) ((type) == PAX_REG || (type) == PAX_CTG) +#define PAX_IS_HARDLINK(type) ((type) == PAX_HLK || (type) == PAX_HRG) +#define PAX_IS_LINK(type) ((type) == PAX_SLK || PAX_IS_HARDLINK(type)) + +/* + * Format Specific Routine Table + * + * The format specific routine table allows new archive formats to be quickly + * added. Overall pax operation is independent of the actual format used to + * form the archive. Only those routines which deal directly with the archive + * are tailored to the oddities of the specific format. All other routines are + * independent of the archive format. Data flow in and out of the format + * dependent routines pass pointers to ARCHD structure (described below). + */ +typedef struct { + char *name; /* name of format, this is the name the user */ + /* gives to -x option to select it. */ + int bsz; /* default block size. used when the user */ + /* does not specify a blocksize for writing */ + /* Appends continue to with the blocksize */ + /* the archive is currently using. */ + int hsz; /* Header size in bytes. this is the size of */ + /* the smallest header this format supports. */ + /* Headers are assumed to fit in a BLKMULT. */ + /* If they are bigger, get_head() and */ + /* get_arc() must be adjusted */ + int udev; /* does append require unique dev/ino? some */ + /* formats use the device and inode fields */ + /* to specify hard links. when members in */ + /* the archive have the same inode/dev they */ + /* are assumed to be hard links. During */ + /* append we may have to generate unique ids */ + /* to avoid creating incorrect hard links */ + int hlk; /* does archive store hard links info? if */ + /* not, we do not bother to look for them */ + /* during archive write operations */ + int blkalgn; /* writes must be aligned to blkalgn boundary */ + int inhead; /* is the trailer encoded in a valid header? */ + /* if not, trailers are assumed to be found */ + /* in invalid headers (i.e like tar) */ + int (*id)(char *, /* checks if a buffer is a valid header */ + int); /* returns 1 if it is, o.w. returns a 0 */ + int (*st_rd)(void); /* initialize routine for read. so format */ + /* can set up tables etc before it starts */ + /* reading an archive */ + int (*rd)(ARCHD *, /* read header routine. passed a pointer to */ + char *); /* ARCHD. It must extract the info from the */ + /* format and store it in the ARCHD struct. */ + /* This routine is expected to fill all the */ + /* fields in the ARCHD (including stat buf) */ + /* 0 is returned when a valid header is */ + /* found. -1 when not valid. This routine */ + /* set the skip and pad fields so the format */ + /* independent routines know the amount of */ + /* padding and the number of bytes of data */ + /* which follow the header. This info is */ + /* used skip to the next file header */ + off_t (*end_rd)(void); /* read cleanup. Allows format to clean up */ + /* and MUST RETURN THE LENGTH OF THE TRAILER */ + /* RECORD (so append knows how many bytes */ + /* to move back to rewrite the trailer) */ + int (*st_wr)(void); /* initialize routine for write operations */ + int (*wr)(ARCHD *); /* write archive header. Passed an ARCHD */ + /* filled with the specs on the next file to */ + /* archived. Returns a 1 if no file data is */ + /* is to be stored; 0 if file data is to be */ + /* added. A -1 is returned if a write */ + /* operation to the archive failed. this */ + /* function sets the skip and pad fields so */ + /* the proper padding can be added after */ + /* file data. This routine must NEVER write */ + /* a flawed archive header. */ + int (*end_wr)(void); /* end write. write the trailer and do any */ + /* other format specific functions needed */ + /* at the end of an archive write */ + int (*trail)(ARCHD *, /* returns 0 if a valid trailer, -1 if not */ + char *, int, /* For formats which encode the trailer */ + int *); /* outside of a valid header, a return value */ + /* of 1 indicates that the block passed to */ + /* it can never contain a valid header (skip */ + /* this block, no point in looking at it) */ + /* CAUTION: parameters to this function are */ + /* different for trailers inside or outside */ + /* of headers. See get_head() for details */ + int (*options)(void); /* process format specific options (-o) */ +} FSUB; + +/* + * Time data for a given file. This is usually embedded in a structure + * indexed by dev+ino, by name, by order in the archive, etc. set_attr() + * takes one of these and will only change the times or mode if the file + * at the given name has the indicated dev+ino. + */ +struct file_times { + ino_t ft_ino; /* inode number to verify */ + struct timespec ft_mtim; /* times to set */ + struct timespec ft_atim; + char *ft_name; /* name of file to set the times on */ + dev_t ft_dev; /* device number to verify */ +}; + +/* + * Format Specific Options List + * + * Used to pass format options to the format options handler + */ +typedef struct oplist { + char *name; /* option variable name e.g. name= */ + char *value; /* value for option variable */ + struct oplist *fow; /* next option */ +} OPLIST; + +/* + * General Macros + */ +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) +#define MAJOR(x) major(x) +#define MINOR(x) minor(x) +#define TODEV(x, y) makedev((x), (y)) + +#define FILEBITS (S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) +#define SETBITS (S_ISUID | S_ISGID) +#define ABITS (FILEBITS | SETBITS) + +/* + * General Defines + */ +#define HEX 16 +#define OCT 8 +#define _PAX_ 1 +#define _TFILE_BASE "paxXXXXXXXXXX" +#define MAX_TIME_T (sizeof(time_t) == sizeof(long long) ? \ + LLONG_MAX : INT_MAX) diff --git a/bin/pax/sel_subs.c b/bin/pax/sel_subs.c new file mode 100644 index 0000000..578c445 --- /dev/null +++ b/bin/pax/sel_subs.c @@ -0,0 +1,632 @@ +/* $OpenBSD: sel_subs.c,v 1.28 2019/06/24 03:33:09 deraadt Exp $ */ +/* $NetBSD: sel_subs.c,v 1.5 1995/03/21 09:07:42 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <ctype.h> +#include <grp.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * data structure for storing uid/grp selects (-U, -G non standard options) + */ + +#define USR_TB_SZ 317 /* user selection table size */ +#define GRP_TB_SZ 317 /* user selection table size */ + +typedef struct usrt { + uid_t uid; + struct usrt *fow; /* next uid */ +} USRT; + +typedef struct grpt { + gid_t gid; + struct grpt *fow; /* next gid */ +} GRPT; + +/* + * data structure for storing user supplied time ranges (-T option) + */ + +#define ATOI2(ar) ((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2; + +typedef struct time_rng { + time_t low_time; /* lower inclusive time limit */ + time_t high_time; /* higher inclusive time limit */ + int flgs; /* option flags */ +#define HASLOW 0x01 /* has lower time limit */ +#define HASHIGH 0x02 /* has higher time limit */ +#define CMPMTME 0x04 /* compare file modification time */ +#define CMPCTME 0x08 /* compare inode change time */ +#define CMPBOTH (CMPMTME|CMPCTME) /* compare inode and mod time */ + struct time_rng *fow; /* next pattern */ +} TIME_RNG; + +static int str_sec(const char *, time_t *); +static int usr_match(ARCHD *); +static int grp_match(ARCHD *); +static int trng_match(ARCHD *); + +static TIME_RNG *trhead = NULL; /* time range list head */ +static TIME_RNG *trtail = NULL; /* time range list tail */ +static USRT **usrtb = NULL; /* user selection table */ +static GRPT **grptb = NULL; /* group selection table */ + +/* + * Routines for selection of archive members + */ + +/* + * sel_chk() + * check if this file matches a specified uid, gid or time range + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +int +sel_chk(ARCHD *arcn) +{ + if (((usrtb != NULL) && usr_match(arcn)) || + ((grptb != NULL) && grp_match(arcn)) || + ((trhead != NULL) && trng_match(arcn))) + return(1); + return(0); +} + +/* + * User/group selection routines + * + * Routines to handle user selection of files based on the file uid/gid. To + * add an entry, the user supplies either the name or the uid/gid starting with + * a # on the command line. A \# will escape the #. + */ + +/* + * usr_add() + * add a user match to the user match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +usr_add(char *str) +{ + u_int indx; + USRT *pt; + uid_t uid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((usrtb == NULL) && + ((usrtb = calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) { + paxwarn(1, "Unable to allocate memory for user selection table"); + return(-1); + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a user name, \# escapes # as first char in user name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if (uid_from_user(str, &uid) == -1) { + paxwarn(1, "Unable to find uid for user: %s", str); + return(-1); + } + } else + uid = (uid_t)strtoul(str+1, NULL, 10); + endpwent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)uid) % USR_TB_SZ; + if ((pt = usrtb[indx]) != NULL) { + while (pt != NULL) { + if (pt->uid == uid) + return(0); + pt = pt->fow; + } + } + + /* + * uid is not yet in the table, add it to the front of the chain + */ + if ((pt = malloc(sizeof(USRT))) != NULL) { + pt->uid = uid; + pt->fow = usrtb[indx]; + usrtb[indx] = pt; + return(0); + } + paxwarn(1, "User selection table out of memory"); + return(-1); +} + +/* + * usr_match() + * check if this files uid matches a selected uid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +usr_match(ARCHD *arcn) +{ + USRT *pt; + + /* + * hash and look for it in the table + */ + pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ]; + while (pt != NULL) { + if (pt->uid == arcn->sb.st_uid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * grp_add() + * add a group match to the group match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +grp_add(char *str) +{ + u_int indx; + GRPT *pt; + gid_t gid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((grptb == NULL) && + ((grptb = calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) { + paxwarn(1, "Unable to allocate memory fo group selection table"); + return(-1); + } + + /* + * figure out group spec + */ + if (str[0] != '#') { + /* + * it is a group name, \# escapes # as first char in group name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if (gid_from_group(str, &gid) == -1) { + paxwarn(1,"Cannot determine gid for group name: %s", str); + return(-1); + } + } else + gid = (gid_t)strtoul(str+1, NULL, 10); + endgrent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)gid) % GRP_TB_SZ; + if ((pt = grptb[indx]) != NULL) { + while (pt != NULL) { + if (pt->gid == gid) + return(0); + pt = pt->fow; + } + } + + /* + * gid not in the table, add it to the front of the chain + */ + if ((pt = malloc(sizeof(GRPT))) != NULL) { + pt->gid = gid; + pt->fow = grptb[indx]; + grptb[indx] = pt; + return(0); + } + paxwarn(1, "Group selection table out of memory"); + return(-1); +} + +/* + * grp_match() + * check if this files gid matches a selected gid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +grp_match(ARCHD *arcn) +{ + GRPT *pt; + + /* + * hash and look for it in the table + */ + pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ]; + while (pt != NULL) { + if (pt->gid == arcn->sb.st_gid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * Time range selection routines + * + * Routines to handle user selection of files based on the modification and/or + * inode change time falling within a specified time range (the non-standard + * -T flag). The user may specify any number of different file time ranges. + * Time ranges are checked one at a time until a match is found (if at all). + * If the file has a mtime (and/or ctime) which lies within one of the time + * ranges, the file is selected. Time ranges may have a lower and/or a upper + * value. These ranges are inclusive. When no time ranges are supplied to pax + * with the -T option, all members in the archive will be selected by the time + * range routines. When only a lower range is supplied, only files with a + * mtime (and/or ctime) equal to or younger are selected. When only a upper + * range is supplied, only files with a mtime (and/or ctime) equal to or older + * are selected. When the lower time range is equal to the upper time range, + * only files with a mtime (or ctime) of exactly that time are selected. + */ + +/* + * trng_add() + * add a time range match to the time range list. + * This is a non-standard pax option. Lower and upper ranges are in the + * format: [[[[[cc]yy]mm]dd]HH]MM[.SS] and are comma separated. + * Time ranges are based on current time, so 1234 would specify a time of + * 12:34 today. + * Return: + * 0 if the time range was added to the list, -1 otherwise + */ + +int +trng_add(char *str) +{ + TIME_RNG *pt; + char *up_pt = NULL; + char *stpt; + char *flgpt; + int dot = 0; + + /* + * throw out the badly formed time ranges + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty time range string"); + return(-1); + } + + /* + * locate optional flags suffix /{cm}. + */ + if ((flgpt = strrchr(str, '/')) != NULL) + *flgpt++ = '\0'; + + for (stpt = str; *stpt != '\0'; ++stpt) { + if ((*stpt >= '0') && (*stpt <= '9')) + continue; + if ((*stpt == ',') && (up_pt == NULL)) { + *stpt = '\0'; + up_pt = stpt + 1; + dot = 0; + continue; + } + + /* + * allow only one dot per range (secs) + */ + if ((*stpt == '.') && (!dot)) { + ++dot; + continue; + } + paxwarn(1, "Improperly specified time range: %s", str); + goto out; + } + + /* + * allocate space for the time range and store the limits + */ + if ((pt = malloc(sizeof(TIME_RNG))) == NULL) { + paxwarn(1, "Unable to allocate memory for time range"); + return(-1); + } + + /* + * by default we only will check file mtime, but user can specify + * mtime, ctime (inode change time) or both. + */ + if ((flgpt == NULL) || (*flgpt == '\0')) + pt->flgs = CMPMTME; + else { + pt->flgs = 0; + while (*flgpt != '\0') { + switch (*flgpt) { + case 'M': + case 'm': + pt->flgs |= CMPMTME; + break; + case 'C': + case 'c': + pt->flgs |= CMPCTME; + break; + default: + paxwarn(1, "Bad option %c with time range %s", + *flgpt, str); + free(pt); + goto out; + } + ++flgpt; + } + } + + /* + * start off with the current time + */ + pt->low_time = pt->high_time = time(NULL); + if (*str != '\0') { + /* + * add lower limit + */ + if (str_sec(str, &(pt->low_time)) < 0) { + paxwarn(1, "Illegal lower time range %s", str); + free(pt); + goto out; + } + pt->flgs |= HASLOW; + } + + if ((up_pt != NULL) && (*up_pt != '\0')) { + /* + * add upper limit + */ + if (str_sec(up_pt, &(pt->high_time)) < 0) { + paxwarn(1, "Illegal upper time range %s", up_pt); + free(pt); + goto out; + } + pt->flgs |= HASHIGH; + + /* + * check that the upper and lower do not overlap + */ + if (pt->flgs & HASLOW) { + if (pt->low_time > pt->high_time) { + paxwarn(1, "Upper %s and lower %s time overlap", + up_pt, str); + free(pt); + return(-1); + } + } + } + + pt->fow = NULL; + if (trhead == NULL) { + trtail = trhead = pt; + return(0); + } + trtail->fow = pt; + trtail = pt; + return(0); + + out: + paxwarn(1, "Time range format is: [[[[[cc]yy]mm]dd]HH]MM[.SS][/[c][m]]"); + return(-1); +} + +/* + * trng_match() + * check if this files mtime/ctime falls within any supplied time range. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +trng_match(ARCHD *arcn) +{ + TIME_RNG *pt; + + /* + * have to search down the list one at a time looking for a match. + * remember time range limits are inclusive. + */ + pt = trhead; + while (pt != NULL) { + switch (pt->flgs & CMPBOTH) { + case CMPBOTH: + /* + * user wants both mtime and ctime checked for this + * time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPCTME: + /* + * user wants only ctime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPMTME: + default: + /* + * user wants only mtime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + } + break; + } + + if (pt == NULL) + return(1); + return(0); +} + +/* + * str_sec() + * Convert a time string in the format of [[[[[cc]yy]mm]dd]HH]MM[.SS] to + * seconds UTC. Tval already has current time loaded into it at entry. + * Return: + * 0 if converted ok, -1 otherwise + */ + +static int +str_sec(const char *p, time_t *tval) +{ + struct tm *lt; + const char *dot, *t; + size_t len; + int bigyear; + int yearset; + + yearset = 0; + len = strlen(p); + + for (t = p, dot = NULL; *t; ++t) { + if (isdigit((unsigned char)*t)) + continue; + if (*t == '.' && dot == NULL) { + dot = t; + continue; + } + return(-1); + } + + lt = localtime(tval); + + if (dot != NULL) { /* .SS */ + if (strlen(++dot) != 2) + return(-1); + lt->tm_sec = ATOI2(dot); + if (lt->tm_sec > 61) + return(-1); + len -= 3; + } else + lt->tm_sec = 0; + + switch (len) { + case 12: /* cc */ + bigyear = ATOI2(p); + lt->tm_year = (bigyear * 100) - 1900; + yearset = 1; + /* FALLTHROUGH */ + case 10: /* yy */ + if (yearset) { + lt->tm_year += ATOI2(p); + } else { + lt->tm_year = ATOI2(p); + if (lt->tm_year < 69) /* hack for 2000 ;-} */ + lt->tm_year += (2000 - 1900); + } + /* FALLTHROUGH */ + case 8: /* mm */ + lt->tm_mon = ATOI2(p); + if ((lt->tm_mon > 12) || !lt->tm_mon) + return(-1); + --lt->tm_mon; /* time struct is 0 - 11 */ + /* FALLTHROUGH */ + case 6: /* dd */ + lt->tm_mday = ATOI2(p); + if ((lt->tm_mday > 31) || !lt->tm_mday) + return(-1); + /* FALLTHROUGH */ + case 4: /* HH */ + lt->tm_hour = ATOI2(p); + if (lt->tm_hour > 23) + return(-1); + /* FALLTHROUGH */ + case 2: /* MM */ + lt->tm_min = ATOI2(p); + if (lt->tm_min > 59) + return(-1); + break; + default: + return(-1); + } + + /* convert broken-down time to UTC clock time seconds */ + if ((*tval = mktime(lt)) == -1) + return(-1); + return(0); +} diff --git a/bin/pax/tables.c b/bin/pax/tables.c new file mode 100644 index 0000000..0a7b71f --- /dev/null +++ b/bin/pax/tables.c @@ -0,0 +1,1786 @@ +/* $OpenBSD: tables.c,v 1.54 2019/06/28 05:35:34 deraadt Exp $ */ +/* $NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * Routines for controlling the contents of all the different databases pax + * keeps. Tables are dynamically created only when they are needed. The + * goal was speed and the ability to work with HUGE archives. The databases + * were kept simple, but do have complex rules for when the contents change. + * As of this writing, the posix library functions were more complex than + * needed for this application (pax databases have very short lifetimes and + * do not survive after pax is finished). Pax is required to handle very + * large archives. These database routines carefully combine memory usage and + * temporary file storage in ways which will not significantly impact runtime + * performance while allowing the largest possible archives to be handled. + * Trying to force the fit to the posix database routines was not considered + * time well spent. + */ + +/* + * data structures and constants used by the different databases kept by pax + */ + +/* + * Hash Table Sizes MUST BE PRIME, if set too small performance suffers. + * Probably safe to expect 500000 inodes per tape. Assuming good key + * distribution (inodes) chains of under 50 long (worst case) is ok. + */ +#define L_TAB_SZ 2503 /* hard link hash table size */ +#define F_TAB_SZ 50503 /* file time hash table size */ +#define N_TAB_SZ 541 /* interactive rename hash table */ +#define D_TAB_SZ 317 /* unique device mapping table */ +#define A_TAB_SZ 317 /* ftree dir access time reset table */ +#define SL_TAB_SZ 317 /* escape symlink tables */ +#define MAXKEYLEN 64 /* max number of chars for hash */ +#define DIRP_SIZE 64 /* initial size of created dir table */ + +/* + * file hard link structure (hashed by dev/ino and chained) used to find the + * hard links in a file system or with some archive formats (cpio) + */ +typedef struct hrdlnk { + ino_t ino; /* files inode number */ + char *name; /* name of first file seen with this ino/dev */ + dev_t dev; /* files device number */ + u_long nlink; /* expected link count */ + struct hrdlnk *fow; +} HRDLNK; + +/* + * Archive write update file time table (the -u, -C flag), hashed by filename. + * Filenames are stored in a scratch file at seek offset into the file. The + * file time (mod time) and the file name length (for a quick check) are + * stored in a hash table node. We were forced to use a scratch file because + * with -u, the mtime for every node in the archive must always be available + * to compare against (and this data can get REALLY large with big archives). + * By being careful to read only when we have a good chance of a match, the + * performance loss is not measurable (and the size of the archive we can + * handle is greatly increased). + */ +typedef struct ftm { + off_t seek; /* location in scratch file */ + struct timespec mtim; /* files last modification time */ + struct ftm *fow; + int namelen; /* file name length */ +} FTM; + +/* + * Interactive rename table (-i flag), hashed by orig filename. + * We assume this will not be a large table as this mapping data can only be + * obtained through interactive input by the user. Nobody is going to type in + * changes for 500000 files? We use chaining to resolve collisions. + */ + +typedef struct namt { + char *oname; /* old name */ + char *nname; /* new name typed in by the user */ + struct namt *fow; +} NAMT; + +/* + * Unique device mapping tables. Some protocols (e.g. cpio) require that the + * <c_dev,c_ino> pair will uniquely identify a file in an archive unless they + * are links to the same file. Appending to archives can break this. For those + * protocols that have this requirement we map c_dev to a unique value not seen + * in the archive when we append. We also try to handle inode truncation with + * this table. (When the inode field in the archive header are too small, we + * remap the dev on writes to remove accidental collisions). + * + * The list is hashed by device number using chain collision resolution. Off of + * each DEVT are linked the various remaps for this device based on those bits + * in the inode which were truncated. For example if we are just remapping to + * avoid a device number during an update append, off the DEVT we would have + * only a single DLIST that has a truncation id of 0 (no inode bits were + * stripped for this device so far). When we spot inode truncation we create + * a new mapping based on the set of bits in the inode which were stripped off. + * so if the top four bits of the inode are stripped and they have a pattern of + * 0110...... (where . are those bits not truncated) we would have a mapping + * assigned for all inodes that has the same 0110.... pattern (with this dev + * number of course). This keeps the mapping sparse and should be able to store + * close to the limit of files which can be represented by the optimal + * combination of dev and inode bits, and without creating a fouled up archive. + * Note we also remap truncated devs in the same way (an exercise for the + * dedicated reader; always wanted to say that...:) + */ + +typedef struct devt { + dev_t dev; /* the orig device number we now have to map */ + struct devt *fow; /* new device map list */ + struct dlist *list; /* map list based on inode truncation bits */ +} DEVT; + +typedef struct dlist { + ino_t trunc_bits; /* truncation pattern for a specific map */ + dev_t dev; /* the new device id we use */ + struct dlist *fow; +} DLIST; + +/* + * ftree directory access time reset table. When we are done with a + * subtree we reset the access and mod time of the directory when the tflag is + * set. Not really explicitly specified in the pax spec, but easy and fast to + * do (and this may have even been intended in the spec, it is not clear). + * table is hashed by inode with chaining. + */ + +typedef struct atdir { + struct file_times ft; + struct atdir *fow; +} ATDIR; + +/* + * created directory time and mode storage entry. After pax is finished during + * extraction or copy, we must reset directory access modes and times that + * may have been modified after creation (they no longer have the specified + * times and/or modes). We must reset time in the reverse order of creation, + * because entries are added from the top of the file tree to the bottom. + * We MUST reset times from leaf to root (it will not work the other + * direction). + */ + +typedef struct dirdata { + struct file_times ft; + u_int16_t mode; /* file mode to restore */ + u_int16_t frc_mode; /* do we force mode settings? */ +} DIRDATA; + +static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ +static FTM **ftab = NULL; /* file time table for updating arch */ +static NAMT **ntab = NULL; /* interactive rename storage table */ +#ifndef NOCPIO +static DEVT **dtab = NULL; /* device/inode mapping tables */ +#endif +static ATDIR **atab = NULL; /* file tree directory time reset table */ +static DIRDATA *dirp = NULL; /* storage for setting created dir time/mode */ +static size_t dirsize; /* size of dirp table */ +static size_t dircnt = 0; /* entries in dir time/mode storage */ +static int ffd = -1; /* tmp file for file time table name storage */ + +/* + * hard link table routines + * + * The hard link table tries to detect hard links to files using the device and + * inode values. We do this when writing an archive, so we can tell the format + * write routine that this file is a hard link to another file. The format + * write routine then can store this file in whatever way it wants (as a hard + * link if the format supports that like tar, or ignore this info like cpio). + * (Actually a field in the format driver table tells us if the format wants + * hard link info. if not, we do not waste time looking for them). We also use + * the same table when reading an archive. In that situation, this table is + * used by the format read routine to detect hard links from stored dev and + * inode numbers (like cpio). This will allow pax to create a link when one + * can be detected by the archive format. + */ + +/* + * lnk_start + * Creates the hard link table. + * Return: + * 0 if created, -1 if failure + */ + +int +lnk_start(void) +{ + if (ltab != NULL) + return(0); + if ((ltab = calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { + paxwarn(1, "Cannot allocate memory for hard link table"); + return(-1); + } + return(0); +} + +/* + * chk_lnk() + * Looks up entry in hard link hash table. If found, it copies the name + * of the file it is linked to (we already saw that file) into ln_name. + * lnkcnt is decremented and if goes to 1 the node is deleted from the + * database. (We have seen all the links to this file). If not found, + * we add the file to the database if it has the potential for having + * hard links to other files we may process (it has a link count > 1) + * Return: + * if found returns 1; if not found returns 0; -1 on error + */ + +int +chk_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return(-1); + /* + * ignore those nodes that cannot have hard links + */ + if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) + return(0); + + /* + * hash inode number and look for this file + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) != NULL) { + /* + * its hash chain in not empty, walk down looking for it + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found a link. set the node type and copy in the + * name of the file it is to link to. we need to + * handle hardlinks to regular files differently than + * other links. + */ + arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name, + sizeof(arcn->ln_name)); + /* XXX truncate? */ + if ((size_t)arcn->nlen >= sizeof(arcn->name)) + arcn->nlen = sizeof(arcn->name) - 1; + if (arcn->type == PAX_REG) + arcn->type = PAX_HRG; + else + arcn->type = PAX_HLK; + + /* + * if we have found all the links to this file, remove + * it from the database + */ + if (--pt->nlink <= 1) { + *ppt = pt->fow; + free(pt->name); + free(pt); + } + return(1); + } + } + + /* + * we never saw this file before. It has links so we add it to the + * front of this hash chain + */ + if ((pt = malloc(sizeof(HRDLNK))) != NULL) { + if ((pt->name = strdup(arcn->name)) != NULL) { + pt->dev = arcn->sb.st_dev; + pt->ino = arcn->sb.st_ino; + pt->nlink = arcn->sb.st_nlink; + pt->fow = ltab[indx]; + ltab[indx] = pt; + return(0); + } + free(pt); + } + + paxwarn(1, "Hard link table out of memory"); + return(-1); +} + +/* + * purg_lnk + * remove reference for a file that we may have added to the data base as + * a potential source for hard links. We ended up not using the file, so + * we do not want to accidently point another file at it later on. + */ + +void +purg_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return; + /* + * do not bother to look if it could not be in the database + */ + if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || + PAX_IS_HARDLINK(arcn->type)) + return; + + /* + * find the hash chain for this inode value, if empty return + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) == NULL) + return; + + /* + * walk down the list looking for the inode/dev pair, unlink and + * free if found + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + if (pt == NULL) + return; + + /* + * remove and free it + */ + *ppt = pt->fow; + free(pt->name); + free(pt); +} + +/* + * lnk_end() + * pull apart a existing link table so we can reuse it. We do this between + * read and write phases of append with update. (The format may have + * used the link table, and we need to start with a fresh table for the + * write phase + */ + +void +lnk_end(void) +{ + int i; + HRDLNK *pt; + HRDLNK *ppt; + + if (ltab == NULL) + return; + + for (i = 0; i < L_TAB_SZ; ++i) { + if (ltab[i] == NULL) + continue; + pt = ltab[i]; + ltab[i] = NULL; + + /* + * free up each entry on this chain + */ + while (pt != NULL) { + ppt = pt; + pt = ppt->fow; + free(ppt->name); + free(ppt); + } + } +} + +/* + * modification time table routines + * + * The modification time table keeps track of last modification times for all + * files stored in an archive during a write phase when -u is set. We only + * add a file to the archive if it is newer than a file with the same name + * already stored on the archive (if there is no other file with the same + * name on the archive it is added). This applies to writes and appends. + * An append with an -u must read the archive and store the modification time + * for every file on that archive before starting the write phase. It is clear + * that this is one HUGE database. To save memory space, the actual file names + * are stored in a scratch file and indexed by an in-memory hash table. The + * hash table is indexed by hashing the file path. The nodes in the table store + * the length of the filename and the lseek offset within the scratch file + * where the actual name is stored. Since there are never any deletions from + * this table, fragmentation of the scratch file is never a issue. Lookups + * seem to not exhibit any locality at all (files in the database are rarely + * looked up more than once...), so caching is just a waste of memory. The + * only limitation is the amount of scratch file space available to store the + * path names. + */ + +/* + * ftime_start() + * create the file time hash table and open for read/write the scratch + * file. (after created it is unlinked, so when we exit we leave + * no witnesses). + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +int +ftime_start(void) +{ + + if (ftab != NULL) + return(0); + if ((ftab = calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { + paxwarn(1, "Cannot allocate memory for file time table"); + return(-1); + } + + /* + * get random name and create temporary scratch file, unlink name + * so it will get removed on exit + */ + memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); + if ((ffd = mkstemp(tempfile)) == -1) { + syswarn(1, errno, "Unable to create temporary file: %s", + tempfile); + return(-1); + } + (void)unlink(tempfile); + + return(0); +} + +/* + * chk_ftime() + * looks up entry in file time hash table. If not found, the file is + * added to the hash table and the file named stored in the scratch file. + * If a file with the same name is found, the file times are compared and + * the most recent file time is retained. If the new file was younger (or + * was not in the database) the new file is selected for storage. + * Return: + * 0 if file should be added to the archive, 1 if it should be skipped, + * -1 on error + */ + +int +chk_ftime(ARCHD *arcn) +{ + FTM *pt; + int namelen; + u_int indx; + char ckname[PAXPATHLEN+1]; + + /* + * no info, go ahead and add to archive + */ + if (ftab == NULL) + return(0); + + /* + * hash the pathname and look up in table + */ + namelen = arcn->nlen; + indx = st_hash(arcn->name, namelen, F_TAB_SZ); + if ((pt = ftab[indx]) != NULL) { + /* + * the hash chain is not empty, walk down looking for match + * only read up the path names if the lengths match, speeds + * up the search a lot + */ + while (pt != NULL) { + if (pt->namelen == namelen) { + /* + * potential match, have to read the name + * from the scratch file. + */ + if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { + syswarn(1, errno, + "Failed ftime table seek"); + return(-1); + } + if (read(ffd, ckname, namelen) != namelen) { + syswarn(1, errno, + "Failed ftime table read"); + return(-1); + } + + /* + * if the names match, we are done + */ + if (!strncmp(ckname, arcn->name, namelen)) + break; + } + + /* + * try the next entry on the chain + */ + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found the file, compare the times, save the newer + */ + if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)) { + /* + * file is newer + */ + pt->mtim = arcn->sb.st_mtim; + return(0); + } + /* + * file is older + */ + return(1); + } + } + + /* + * not in table, add it + */ + if ((pt = malloc(sizeof(FTM))) != NULL) { + /* + * add the name at the end of the scratch file, saving the + * offset. add the file to the head of the hash chain + */ + if ((pt->seek = lseek(ffd, 0, SEEK_END)) >= 0) { + if (write(ffd, arcn->name, namelen) == namelen) { + pt->mtim = arcn->sb.st_mtim; + pt->namelen = namelen; + pt->fow = ftab[indx]; + ftab[indx] = pt; + return(0); + } + syswarn(1, errno, "Failed write to file time table"); + } else + syswarn(1, errno, "Failed seek on file time table"); + } else + paxwarn(1, "File time table ran out of memory"); + + if (pt != NULL) + free(pt); + return(-1); +} + +/* + * escaping (absolute or w/"..") symlink table routines + * + * By default, an archive shouldn't be able extract to outside of the + * current directory. What should we do if the archive contains a symlink + * whose value is either absolute or contains ".." components? What we'll + * do is initially create the path as an empty file (to block attempts to + * reference _through_ it) and instead record its path and desired + * final value and mode. Then once all the other archive + * members are created (but before the pass to set timestamps on + * directories) we'll process those records, replacing the placeholder with + * the correct symlink and setting them to the correct mode, owner, group, + * and timestamps. + * + * Note: we also need to handle hardlinks to symlinks (barf) as well as + * hardlinks whose target is replaced by a later entry in the archive (barf^2). + * + * So we track things by dev+ino of the placeholder file, associating with + * that the value and mode of the final symlink and a list of paths that + * should all be hardlinks of that. We'll 'store' the symlink's desired + * timestamps, owner, and group by setting them on the placeholder file. + * + * The operations are: + * a) create an escaping symlink: create the placeholder file and add an entry + * for the new link + * b) create a hardlink: do the link. If the target turns out to be a + * zero-length file whose dev+ino are in the symlink table, then add this + * path to the list of names for that link + * c) perform deferred processing: for each entry, check each associated path: + * if it's a zero-length file with the correct dev+ino then recreate it as + * the specified symlink or hardlink to the first such + */ + +struct slpath { + char *sp_path; + struct slpath *sp_next; +}; +struct slinode { + ino_t sli_ino; + char *sli_value; + struct slpath sli_paths; + struct slinode *sli_fow; /* hash table chain */ + dev_t sli_dev; + mode_t sli_mode; +}; + +static struct slinode **slitab = NULL; + +/* + * sltab_start() + * create the hash table + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +int +sltab_start(void) +{ + + if ((slitab = calloc(SL_TAB_SZ, sizeof *slitab)) == NULL) { + syswarn(1, errno, "symlink table"); + return(-1); + } + + return(0); +} + +/* + * sltab_add_sym() + * Create the placeholder and tracking info for an escaping symlink. + * Return: + * 0 on success, -1 otherwise + */ + +int +sltab_add_sym(const char *path0, const char *value0, mode_t mode) +{ + struct stat sb; + struct slinode *s; + struct slpath *p; + char *path, *value; + u_int indx; + int fd; + + /* create the placeholder */ + fd = open(path0, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0600); + if (fd == -1) + return (-1); + if (fstat(fd, &sb) == -1) { + unlink(path0); + close(fd); + return (-1); + } + close(fd); + + if (havechd && *path0 != '/') { + if ((path = realpath(path0, NULL)) == NULL) { + syswarn(1, errno, "Cannot canonicalize %s", path0); + unlink(path0); + return (-1); + } + } else if ((path = strdup(path0)) == NULL) { + syswarn(1, errno, "defered symlink path"); + unlink(path0); + return (-1); + } + if ((value = strdup(value0)) == NULL) { + syswarn(1, errno, "defered symlink value"); + unlink(path); + free(path); + return (-1); + } + + /* now check the hash table for conflicting entry */ + indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ; + for (s = slitab[indx]; s != NULL; s = s->sli_fow) { + if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev) + continue; + + /* + * One of our placeholders got removed behind our back and + * we've reused the inode. Weird, but clean up the mess. + */ + free(s->sli_value); + free(s->sli_paths.sp_path); + p = s->sli_paths.sp_next; + while (p != NULL) { + struct slpath *next_p = p->sp_next; + + free(p->sp_path); + free(p); + p = next_p; + } + goto set_value; + } + + /* Normal case: create a new node */ + if ((s = malloc(sizeof *s)) == NULL) { + syswarn(1, errno, "defered symlink"); + unlink(path); + free(path); + free(value); + return (-1); + } + s->sli_ino = sb.st_ino; + s->sli_dev = sb.st_dev; + s->sli_fow = slitab[indx]; + slitab[indx] = s; + +set_value: + s->sli_paths.sp_path = path; + s->sli_paths.sp_next = NULL; + s->sli_value = value; + s->sli_mode = mode; + return (0); +} + +/* + * sltab_add_link() + * A hardlink was created; if it looks like a placeholder, handle the + * tracking. + * Return: + * 0 if things are ok, -1 if something went wrong + */ + +int +sltab_add_link(const char *path, const struct stat *sb) +{ + struct slinode *s; + struct slpath *p; + u_int indx; + + if (!S_ISREG(sb->st_mode) || sb->st_size != 0) + return (1); + + /* find the hash table entry for this hardlink */ + indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ; + for (s = slitab[indx]; s != NULL; s = s->sli_fow) { + if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev) + continue; + + if ((p = malloc(sizeof *p)) == NULL) { + syswarn(1, errno, "deferred symlink hardlink"); + return (-1); + } + if (havechd && *path != '/') { + if ((p->sp_path = realpath(path, NULL)) == NULL) { + syswarn(1, errno, "Cannot canonicalize %s", + path); + free(p); + return (-1); + } + } else if ((p->sp_path = strdup(path)) == NULL) { + syswarn(1, errno, "defered symlink hardlink path"); + free(p); + return (-1); + } + + /* link it in */ + p->sp_next = s->sli_paths.sp_next; + s->sli_paths.sp_next = p; + return (0); + } + + /* not found */ + return (1); +} + + +static int +sltab_process_one(struct slinode *s, struct slpath *p, const char *first, + int in_sig) +{ + struct stat sb; + char *path = p->sp_path; + mode_t mode; + int err; + + /* + * is it the expected placeholder? This can fail legimately + * if the archive overwrote the link with another, later entry, + * so don't warn. + */ + if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode) || sb.st_size != 0 || + sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev) + return (0); + + if (unlink(path) && errno != ENOENT) { + if (!in_sig) + syswarn(1, errno, "deferred symlink removal"); + return (0); + } + + err = 0; + if (first != NULL) { + /* add another hardlink to the existing symlink */ + if (linkat(AT_FDCWD, first, AT_FDCWD, path, 0) == 0) + return (0); + + /* + * Couldn't hardlink the symlink for some reason, so we'll + * try creating it as its own symlink, but save the error + * for reporting if that fails. + */ + err = errno; + } + + if (symlink(s->sli_value, path)) { + if (!in_sig) { + const char *qualifier = ""; + if (err) + qualifier = " hardlink"; + else + err = errno; + + syswarn(1, err, "deferred symlink%s: %s", + qualifier, path); + } + return (0); + } + + /* success, so set the id, mode, and times */ + mode = s->sli_mode; + if (pids) { + /* if can't set the ids, force the set[ug]id bits off */ + if (set_ids(path, sb.st_uid, sb.st_gid)) + mode &= ~(SETBITS); + } + + if (pmode) + set_pmode(path, mode); + + if (patime || pmtime) + set_ftime(path, &sb.st_mtim, &sb.st_atim, 0); + + /* + * If we tried to link to first but failed, then this new symlink + * might be a better one to try in the future. Guess from the errno. + */ + if (err == 0 || err == ENOENT || err == EMLINK || err == EOPNOTSUPP) + return (1); + return (0); +} + +/* + * sltab_process() + * Do all the delayed process for escape symlinks + */ + +void +sltab_process(int in_sig) +{ + struct slinode *s; + struct slpath *p; + char *first; + u_int indx; + + if (slitab == NULL) + return; + + /* walk across the entire hash table */ + for (indx = 0; indx < SL_TAB_SZ; indx++) { + while ((s = slitab[indx]) != NULL) { + /* pop this entry */ + slitab[indx] = s->sli_fow; + + first = NULL; + p = &s->sli_paths; + while (1) { + struct slpath *next_p; + + if (sltab_process_one(s, p, first, in_sig)) { + if (!in_sig) + free(first); + first = p->sp_path; + } else if (!in_sig) + free(p->sp_path); + + if ((next_p = p->sp_next) == NULL) + break; + *p = *next_p; + if (!in_sig) + free(next_p); + } + if (!in_sig) { + free(first); + free(s->sli_value); + free(s); + } + } + } + if (!in_sig) + free(slitab); + slitab = NULL; +} + + +/* + * Interactive rename table routines + * + * The interactive rename table keeps track of the new names that the user + * assigns to files from tty input. Since this map is unique for each file + * we must store it in case there is a reference to the file later in archive + * (a link). Otherwise we will be unable to find the file we know was + * extracted. The remapping of these files is stored in a memory based hash + * table (it is assumed since input must come from /dev/tty, it is unlikely to + * be a very large table). + */ + +/* + * name_start() + * create the interactive rename table + * Return: + * 0 if successful, -1 otherwise + */ + +int +name_start(void) +{ + if (ntab != NULL) + return(0); + if ((ntab = calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { + paxwarn(1, "Cannot allocate memory for interactive rename table"); + return(-1); + } + return(0); +} + +/* + * add_name() + * add the new name to old name mapping just created by the user. + * If an old name mapping is found (there may be duplicate names on an + * archive) only the most recent is kept. + * Return: + * 0 if added, -1 otherwise + */ + +int +add_name(char *oname, int onamelen, char *nname) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) { + /* + * should never happen + */ + paxwarn(0, "No interactive rename table, links may fail"); + return(0); + } + + /* + * look to see if we have already mapped this file, if so we + * will update it + */ + indx = st_hash(oname, onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) != NULL) { + /* + * look down the has chain for the file + */ + while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) + pt = pt->fow; + + if (pt != NULL) { + /* + * found an old mapping, replace it with the new one + * the user just input (if it is different) + */ + if (strcmp(nname, pt->nname) == 0) + return(0); + + free(pt->nname); + if ((pt->nname = strdup(nname)) == NULL) { + paxwarn(1, "Cannot update rename table"); + return(-1); + } + return(0); + } + } + + /* + * this is a new mapping, add it to the table + */ + if ((pt = malloc(sizeof(NAMT))) != NULL) { + if ((pt->oname = strdup(oname)) != NULL) { + if ((pt->nname = strdup(nname)) != NULL) { + pt->fow = ntab[indx]; + ntab[indx] = pt; + return(0); + } + free(pt->oname); + } + free(pt); + } + paxwarn(1, "Interactive rename table out of memory"); + return(-1); +} + +/* + * sub_name() + * look up a link name to see if it points at a file that has been + * remapped by the user. If found, the link is adjusted to contain the + * new name (oname is the link to name) + */ + +void +sub_name(char *oname, int *onamelen, int onamesize) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) + return; + /* + * look the name up in the hash table + */ + indx = st_hash(oname, *onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) == NULL) + return; + + while (pt != NULL) { + /* + * walk down the hash chain looking for a match + */ + if (strcmp(oname, pt->oname) == 0) { + /* + * found it, replace it with the new name + * and return (we know that oname has enough space) + */ + *onamelen = strlcpy(oname, pt->nname, onamesize); + if (*onamelen >= onamesize) + *onamelen = onamesize - 1; /* XXX truncate? */ + return; + } + pt = pt->fow; + } + + /* + * no match, just return + */ +} + +#ifndef NOCPIO +/* + * device/inode mapping table routines + * (used with formats that store device and inodes fields) + * + * device/inode mapping tables remap the device field in a archive header. The + * device/inode fields are used to determine when files are hard links to each + * other. However these values have very little meaning outside of that. This + * database is used to solve one of two different problems. + * + * 1) when files are appended to an archive, while the new files may have hard + * links to each other, you cannot determine if they have hard links to any + * file already stored on the archive from a prior run of pax. We must assume + * that these inode/device pairs are unique only within a SINGLE run of pax + * (which adds a set of files to an archive). So we have to make sure the + * inode/dev pairs we add each time are always unique. We do this by observing + * while the inode field is very dense, the use of the dev field is fairly + * sparse. Within each run of pax, we remap any device number of a new archive + * member that has a device number used in a prior run and already stored in a + * file on the archive. During the read phase of the append, we store the + * device numbers used and mark them to not be used by any file during the + * write phase. If during write we go to use one of those old device numbers, + * we remap it to a new value. + * + * 2) Often the fields in the archive header used to store these values are + * too small to store the entire value. The result is an inode or device value + * which can be truncated. This really can foul up an archive. With truncation + * we end up creating links between files that are really not links (after + * truncation the inodes are the same value). We address that by detecting + * truncation and forcing a remap of the device field to split truncated + * inodes away from each other. Each truncation creates a pattern of bits that + * are removed. We use this pattern of truncated bits to partition the inodes + * on a single device to many different devices (each one represented by the + * truncated bit pattern). All inodes on the same device that have the same + * truncation pattern are mapped to the same new device. Two inodes that + * truncate to the same value clearly will always have different truncation + * bit patterns, so they will be split from away each other. When we spot + * device truncation we remap the device number to a non truncated value. + * (for more info see table.h for the data structures involved). + */ + +static DEVT *chk_dev(dev_t, int); + +/* + * dev_start() + * create the device mapping table + * Return: + * 0 if successful, -1 otherwise + */ + +int +dev_start(void) +{ + if (dtab != NULL) + return(0); + if ((dtab = calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { + paxwarn(1, "Cannot allocate memory for device mapping table"); + return(-1); + } + return(0); +} + +/* + * add_dev() + * add a device number to the table. this will force the device to be + * remapped to a new value if it be used during a write phase. This + * function is called during the read phase of an append to prohibit the + * use of any device number already in the archive. + * Return: + * 0 if added ok, -1 otherwise + */ + +int +add_dev(ARCHD *arcn) +{ + if (chk_dev(arcn->sb.st_dev, 1) == NULL) + return(-1); + return(0); +} + +/* + * chk_dev() + * check for a device value in the device table. If not found and the add + * flag is set, it is added. This does NOT assign any mapping values, just + * adds the device number as one that need to be remapped. If this device + * is already mapped, just return with a pointer to that entry. + * Return: + * pointer to the entry for this device in the device map table. Null + * if the add flag is not set and the device is not in the table (it is + * not been seen yet). If add is set and the device cannot be added, null + * is returned (indicates an error). + */ + +static DEVT * +chk_dev(dev_t dev, int add) +{ + DEVT *pt; + u_int indx; + + if (dtab == NULL) + return(NULL); + /* + * look to see if this device is already in the table + */ + indx = ((unsigned)dev) % D_TAB_SZ; + if ((pt = dtab[indx]) != NULL) { + while ((pt != NULL) && (pt->dev != dev)) + pt = pt->fow; + + /* + * found it, return a pointer to it + */ + if (pt != NULL) + return(pt); + } + + /* + * not in table, we add it only if told to as this may just be a check + * to see if a device number is being used. + */ + if (add == 0) + return(NULL); + + /* + * allocate a node for this device and add it to the front of the hash + * chain. Note we do not assign remaps values here, so the pt->list + * list must be NULL. + */ + if ((pt = malloc(sizeof(DEVT))) == NULL) { + paxwarn(1, "Device map table out of memory"); + return(NULL); + } + pt->dev = dev; + pt->list = NULL; + pt->fow = dtab[indx]; + dtab[indx] = pt; + return(pt); +} +/* + * map_dev() + * given an inode and device storage mask (the mask has a 1 for each bit + * the archive format is able to store in a header), we check for inode + * and device truncation and remap the device as required. Device mapping + * can also occur when during the read phase of append a device number was + * seen (and was marked as do not use during the write phase). WE ASSUME + * that unsigned longs are the same size or bigger than the fields used + * for ino_t and dev_t. If not the types will have to be changed. + * Return: + * 0 if all ok, -1 otherwise. + */ + +int +map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) +{ + DEVT *pt; + DLIST *dpt; + static dev_t lastdev = 0; /* next device number to try */ + int trc_ino = 0; + int trc_dev = 0; + ino_t trunc_bits = 0; + ino_t nino; + + if (dtab == NULL) + return(0); + /* + * check for device and inode truncation, and extract the truncated + * bit pattern. + */ + if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) + ++trc_dev; + if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { + ++trc_ino; + trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); + } + + /* + * see if this device is already being mapped, look up the device + * then find the truncation bit pattern which applies + */ + if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { + /* + * this device is already marked to be remapped + */ + for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) + if (dpt->trunc_bits == trunc_bits) + break; + + if (dpt != NULL) { + /* + * we are being remapped for this device and pattern + * change the device number to be stored and return + */ + arcn->sb.st_dev = dpt->dev; + arcn->sb.st_ino = nino; + return(0); + } + } else { + /* + * this device is not being remapped YET. if we do not have any + * form of truncation, we do not need a remap + */ + if (!trc_ino && !trc_dev) + return(0); + + /* + * we have truncation, have to add this as a device to remap + */ + if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) + goto bad; + + /* + * if we just have a truncated inode, we have to make sure that + * all future inodes that do not truncate (they have the + * truncation pattern of all 0's) continue to map to the same + * device number. We probably have already written inodes with + * this device number to the archive with the truncation + * pattern of all 0's. So we add the mapping for all 0's to the + * same device number. + */ + if (!trc_dev && (trunc_bits != 0)) { + if ((dpt = malloc(sizeof(DLIST))) == NULL) + goto bad; + dpt->trunc_bits = 0; + dpt->dev = arcn->sb.st_dev; + dpt->fow = pt->list; + pt->list = dpt; + } + } + + /* + * look for a device number not being used. We must watch for wrap + * around on lastdev (so we do not get stuck looking forever!) + */ + while (++lastdev > 0) { + if (chk_dev(lastdev, 0) != NULL) + continue; + /* + * found an unused value. If we have reached truncation point + * for this format we are hosed, so we give up. Otherwise we + * mark it as being used. + */ + if (((lastdev & ((dev_t)dev_mask)) != lastdev) || + (chk_dev(lastdev, 1) == NULL)) + goto bad; + break; + } + + if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL)) + goto bad; + + /* + * got a new device number, store it under this truncation pattern. + * change the device number this file is being stored with. + */ + dpt->trunc_bits = trunc_bits; + dpt->dev = lastdev; + dpt->fow = pt->list; + pt->list = dpt; + arcn->sb.st_dev = lastdev; + arcn->sb.st_ino = nino; + return(0); + + bad: + paxwarn(1, "Unable to fix truncated inode/device field when storing %s", + arcn->name); + paxwarn(0, "Archive may create improper hard links when extracted"); + return(0); +} +#endif /* NOCPIO */ + +/* + * directory access/mod time reset table routines (for directories READ by pax) + * + * The pax -t flag requires that access times of archive files be the same + * before being read by pax. For regular files, access time is restored after + * the file has been copied. This database provides the same functionality for + * directories read during file tree traversal. Restoring directory access time + * is more complex than files since directories may be read several times until + * all the descendants in their subtree are visited by fts. Directory access + * and modification times are stored during the fts pre-order visit (done + * before any descendants in the subtree are visited) and restored after the + * fts post-order visit (after all the descendants have been visited). In the + * case of premature exit from a subtree (like from the effects of -n), any + * directory entries left in this database are reset during final cleanup + * operations of pax. Entries are hashed by inode number for fast lookup. + */ + +/* + * atdir_start() + * create the directory access time database for directories READ by pax. + * Return: + * 0 is created ok, -1 otherwise. + */ + +int +atdir_start(void) +{ + if (atab != NULL) + return(0); + if ((atab = calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { + paxwarn(1,"Cannot allocate space for directory access time table"); + return(-1); + } + return(0); +} + + +/* + * atdir_end() + * walk through the directory access time table and reset the access time + * of any directory who still has an entry left in the database. These + * entries are for directories READ by pax + */ + +void +atdir_end(void) +{ + ATDIR *pt; + int i; + + if (atab == NULL) + return; + /* + * for each non-empty hash table entry reset all the directories + * chained there. + */ + for (i = 0; i < A_TAB_SZ; ++i) { + if ((pt = atab[i]) == NULL) + continue; + /* + * remember to force the times, set_ftime() looks at pmtime + * and patime, which only applies to things CREATED by pax, + * not read by pax. Read time reset is controlled by -t. + */ + for (; pt != NULL; pt = pt->fow) + set_attr(&pt->ft, 1, 0, 0, 0); + } +} + +/* + * add_atdir() + * add a directory to the directory access time table. Table is hashed + * and chained by inode number. This is for directories READ by pax + */ + +void +add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp, + const struct timespec *atimp) +{ + ATDIR *pt; + sigset_t allsigs, savedsigs; + u_int indx; + + if (atab == NULL) + return; + + /* + * make sure this directory is not already in the table, if so just + * return (the older entry always has the correct time). The only + * way this will happen is when the same subtree can be traversed by + * different args to pax and the -n option is aborting fts out of a + * subtree before all the post-order visits have been made. + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) != NULL) { + while (pt != NULL) { + if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) + break; + pt = pt->fow; + } + + /* + * oops, already there. Leave it alone. + */ + if (pt != NULL) + return; + } + + /* + * add it to the front of the hash chain + */ + sigfillset(&allsigs); + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + if ((pt = malloc(sizeof *pt)) != NULL) { + if ((pt->ft.ft_name = strdup(fname)) != NULL) { + pt->ft.ft_dev = dev; + pt->ft.ft_ino = ino; + pt->ft.ft_mtim = *mtimp; + pt->ft.ft_atim = *atimp; + pt->fow = atab[indx]; + atab[indx] = pt; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + return; + } + free(pt); + } + + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + paxwarn(1, "Directory access time reset table ran out of memory"); +} + +/* + * get_atdir() + * look up a directory by inode and device number to obtain the access + * and modification time you want to set to. If found, the modification + * and access time parameters are set and the entry is removed from the + * table (as it is no longer needed). These are for directories READ by + * pax + * Return: + * 0 if found, -1 if not found. + */ + +int +do_atdir(const char *name, dev_t dev, ino_t ino) +{ + ATDIR *pt; + ATDIR **ppt; + sigset_t allsigs, savedsigs; + u_int indx; + + if (atab == NULL) + return(-1); + /* + * hash by inode and search the chain for an inode and device match + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) == NULL) + return(-1); + + ppt = &(atab[indx]); + while (pt != NULL) { + if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) + break; + /* + * no match, go to next one + */ + ppt = &(pt->fow); + pt = pt->fow; + } + + /* + * return if we did not find it. + */ + if (pt == NULL || pt->ft.ft_name == NULL || + strcmp(name, pt->ft.ft_name) == 0) + return(-1); + + /* + * found it. set the times and remove the entry from the table. + */ + set_attr(&pt->ft, 1, 0, 0, 0); + sigfillset(&allsigs); + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + *ppt = pt->fow; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + free(pt->ft.ft_name); + free(pt); + return(0); +} + +/* + * directory access mode and time storage routines (for directories CREATED + * by pax). + * + * Pax requires that extracted directories, by default, have their access/mod + * times and permissions set to the values specified in the archive. During the + * actions of extracting (and creating the destination subtree during -rw copy) + * directories extracted may be modified after being created. Even worse is + * that these directories may have been created with file permissions which + * prohibits any descendants of these directories from being extracted. When + * directories are created by pax, access rights may be added to permit the + * creation of files in their subtree. Every time pax creates a directory, the + * times and file permissions specified by the archive are stored. After all + * files have been extracted (or copied), these directories have their times + * and file modes reset to the stored values. The directory info is restored in + * reverse order as entries were added from root to leaf: to restore atime + * properly, we must go backwards. + */ + +/* + * dir_start() + * set up the directory time and file mode storage for directories CREATED + * by pax. + * Return: + * 0 if ok, -1 otherwise + */ + +int +dir_start(void) +{ + if (dirp != NULL) + return(0); + + dirsize = DIRP_SIZE; + if ((dirp = reallocarray(NULL, dirsize, sizeof(DIRDATA))) == NULL) { + paxwarn(1, "Unable to allocate memory for directory times"); + return(-1); + } + return(0); +} + +/* + * add_dir() + * add the mode and times for a newly CREATED directory + * name is name of the directory, psb the stat buffer with the data in it, + * frc_mode is a flag that says whether to force the setting of the mode + * (ignoring the user set values for preserving file mode). Frc_mode is + * for the case where we created a file and found that the resulting + * directory was not writeable and the user asked for file modes to NOT + * be preserved. (we have to preserve what was created by default, so we + * have to force the setting at the end. this is stated explicitly in the + * pax spec) + */ + +void +add_dir(char *name, struct stat *psb, int frc_mode) +{ + DIRDATA *dblk; + sigset_t allsigs, savedsigs; + char realname[PATH_MAX], *rp; + + if (dirp == NULL) + return; + + if (havechd && *name != '/') { + if ((rp = realpath(name, realname)) == NULL) { + paxwarn(1, "Cannot canonicalize %s", name); + return; + } + name = rp; + } + if (dircnt == dirsize) { + dblk = reallocarray(dirp, dirsize * 2, sizeof(DIRDATA)); + if (dblk == NULL) { + paxwarn(1, "Unable to store mode and times for created" + " directory: %s", name); + return; + } + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + dirp = dblk; + dirsize *= 2; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + } + dblk = &dirp[dircnt]; + if ((dblk->ft.ft_name = strdup(name)) == NULL) { + paxwarn(1, "Unable to store mode and times for created" + " directory: %s", name); + return; + } + dblk->ft.ft_mtim = psb->st_mtim; + dblk->ft.ft_atim = psb->st_atim; + dblk->ft.ft_ino = psb->st_ino; + dblk->ft.ft_dev = psb->st_dev; + dblk->mode = psb->st_mode & ABITS; + dblk->frc_mode = frc_mode; + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + ++dircnt; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); +} + +/* + * delete_dir() + * When we rmdir a directory, we may want to make sure we don't + * later warn about being unable to set its mode and times. + */ + +void +delete_dir(dev_t dev, ino_t ino) +{ + DIRDATA *dblk; + char *name; + size_t i; + + if (dirp == NULL) + return; + for (i = 0; i < dircnt; i++) { + dblk = &dirp[i]; + + if (dblk->ft.ft_name == NULL) + continue; + if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) { + name = dblk->ft.ft_name; + dblk->ft.ft_name = NULL; + free(name); + break; + } + } +} + +/* + * proc_dir(int in_sig) + * process all file modes and times stored for directories CREATED + * by pax. If in_sig is set, we're in a signal handler and can't + * free stuff. + */ + +void +proc_dir(int in_sig) +{ + DIRDATA *dblk; + size_t cnt; + + if (dirp == NULL) + return; + /* + * read backwards through the file and process each directory + */ + cnt = dircnt; + while (cnt-- > 0) { + dblk = &dirp[cnt]; + /* + * If we remove a directory we created, we replace the + * ft_name with NULL. Ignore those. + */ + if (dblk->ft.ft_name == NULL) + continue; + + /* + * frc_mode set, make sure we set the file modes even if + * the user didn't ask for it (see file_subs.c for more info) + */ + set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode, + in_sig); + if (!in_sig) + free(dblk->ft.ft_name); + } + + if (!in_sig) + free(dirp); + dirp = NULL; + dircnt = 0; +} + +/* + * database independent routines + */ + +/* + * st_hash() + * hashes filenames to a u_int for hashing into a table. Looks at the tail + * end of file, as this provides far better distribution than any other + * part of the name. For performance reasons we only care about the last + * MAXKEYLEN chars (should be at LEAST large enough to pick off the file + * name). Was tested on 500,000 name file tree traversal from the root + * and gave almost a perfectly uniform distribution of keys when used with + * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) + * chars at a time and pads with 0 for last addition. + * Return: + * the hash value of the string MOD (%) the table size. + */ + +u_int +st_hash(const char *name, int len, int tabsz) +{ + const char *pt; + char *dest; + const char *end; + int i; + u_int key = 0; + int steps; + int res; + u_int val; + + /* + * only look at the tail up to MAXKEYLEN, we do not need to waste + * time here (remember these are pathnames, the tail is what will + * spread out the keys) + */ + if (len > MAXKEYLEN) { + pt = &(name[len - MAXKEYLEN]); + len = MAXKEYLEN; + } else + pt = name; + + /* + * calculate the number of u_int size steps in the string and if + * there is a runt to deal with + */ + steps = len/sizeof(u_int); + res = len % sizeof(u_int); + + /* + * add up the value of the string in unsigned integer sized pieces + * too bad we cannot have unsigned int aligned strings, then we + * could avoid the expensive copy. + */ + for (i = 0; i < steps; ++i) { + end = pt + sizeof(u_int); + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * add in the runt padded with zero to the right + */ + if (res) { + val = 0; + end = pt + res; + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * return the result mod the table size + */ + return(key % tabsz); +} diff --git a/bin/pax/tar.1 b/bin/pax/tar.1 new file mode 100644 index 0000000..bbdef11 --- /dev/null +++ b/bin/pax/tar.1 @@ -0,0 +1,410 @@ +.\" $OpenBSD: tar.1,v 1.62 2020/01/16 16:46:46 schwarze Exp $ +.\" +.\" Copyright (c) 1996 SigmaSoft, Th. Lockert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd $Mdocdate: January 16 2020 $ +.Dt TAR 1 +.Os +.Sh NAME +.Nm tar +.Nd tape archiver +.Sh SYNOPSIS +.Nm tar +.Sm off +.No { Cm crtux No } Op Cm 014578befHhjLmNOoPpqsvwXZz +.Sm on +.Bk -words +.Op Ar blocking-factor | archive | replstr +.Op Fl C Ar directory +.Op Fl I Ar file +.Op Ar +.Ek +.Nm tar +.No { Ns Fl crtux Ns } +.Bk -words +.Op Fl 014578eHhjLmNOoPpqvwXZz +.Op Fl b Ar blocking-factor +.Op Fl C Ar directory +.Op Fl f Ar archive +.Op Fl I Ar file +.Op Fl s Ar replstr +.Op Ar +.Ek +.Sh DESCRIPTION +The +.Nm +command creates, adds files to, or extracts files from an +archive file in +.Dq tar +format. +A tar archive is often stored on a magnetic tape, but can be +stored equally well on a floppy, CD-ROM, or in a regular disk file. +.Pp +In the first (legacy) form, all option flags except for +.Fl C +and +.Fl I +must be contained within the first argument to +.Nm +and must not be prefixed by a hyphen +.Pq Sq - . +Option arguments, if any, are processed as subsequent arguments to +.Nm +and are processed in the order in which their corresponding option +flags have been presented on the command line. +.Pp +In the second and preferred form, option flags may be given in any order +and are immediately followed by their corresponding option argument +values. +.Pp +One of the following flags must be present: +.Bl -tag -width Ds +.It Fl c +Create new archive, or overwrite an existing archive, +adding the specified files to it. +.It Fl r +Append the named new files to existing archive. +Note that this will only work on media on which an end-of-file mark +can be overwritten. +.It Fl t +List contents of archive. +If any files are named on the +command line, only those files will be listed. +The +.Ar file +arguments may be specified as glob patterns (see +.Xr glob 7 +for more information), in which case +.Nm +will list all archive members that match each pattern. +.It Fl u +Alias for +.Fl r . +.It Fl x +Extract files from archive. +If any files are named on the +command line, only those files will be extracted from the +archive. +The +.Ar file +arguments may be specified as glob patterns (see +.Xr glob 7 +for more information), in which case +.Nm +will extract all archive members that match each pattern. +.Pp +If more than one copy of a file exists in the +archive, later copies will overwrite earlier copies during +extraction. +The file mode and modification time are preserved +if possible. +The file mode is subject to modification by the +.Xr umask 2 . +.El +.Pp +In addition to the flags mentioned above, any of the following +flags may be used: +.Bl -tag -width Ds +.It Fl b Ar blocking-factor +Set blocking factor to use for the archive. +.Nm +uses 512-byte blocks. +The default is 20, the maximum is 126. +Archives with a blocking factor larger than 63 +violate the POSIX standard and will not be portable to all systems. +.It Fl C Ar directory +This is a positional argument which sets the working directory for the +following files. +When extracting, files will be extracted into +the specified directory; when creating, the specified files will be matched +from the directory. +.It Fl e +Stop after the first error. +.It Fl f Ar archive +Filename where the archive is stored. +Defaults to +.Pa /dev/rst0 . +If set to hyphen +.Pq Sq - +standard output is used. +See also the +.Ev TAPE +environment variable. +.It Fl H +Follow symlinks given on the command line only. +.It Fl h +Follow symbolic links as if they were normal files +or directories. +In extract mode this means that a directory entry in the archive +will not overwrite an existing symbolic link, but rather what the +link ultimately points to. +.It Fl I Ar file +This is a positional argument which reads the names of files to +archive or extract from the given file, one per line. +.It Fl j +Compress archive using bzip2. +The bzip2 utility must be installed separately. +.It Fl L +Synonym for the +.Fl h +option. +.It Fl m +Do not preserve modification time. +.It Fl N +Use only the numeric UID and GID values when creating or extracting an +archive. +.It Fl O +Write old-style (non-POSIX) archives. +.It Fl o +Don't write directory information that the older (V7) style +.Nm +is unable to decode. +This implies the +.Fl O +flag. +.It Fl P +Do not strip leading slashes +.Pq Sq / +from pathnames. +The default is to strip leading slashes. +.It Fl p +Preserve user and group ID as well as file mode regardless of +the current +.Xr umask 2 . +The setuid and setgid bits are only preserved if the user and group ID +could be preserved. +Only meaningful in conjunction with the +.Fl x +flag. +.It Fl q +Select the first archive member that matches each +.Ar file +operand. +No more than one archive member is matched for each +.Ar file . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched. +.It Fl s Ar replstr +Modify the archive member names according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +.Ar file +arguments may be given to restrict the list of archive members to those +specified. +.Pp +The format of these regular expressions is +.Pp +.Dl /old/new/[gp] +.Pp +As in +.Xr ed 1 , +.Va old +is a basic regular expression (see +.Xr re_format 7 ) +and +.Va new +can contain an ampersand +.Pq Ql & , +.Ql \e Ns Em n +(where +.Em n +is a digit) back-references, +or subexpression matching. +The +.Va old +string may also contain newline characters. +Any non-null character can be used as a delimiter +.Po +.Ql / +is shown here +.Pc . +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +.Pp +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring, +which starts with the first character following the end of the last successful +substitution. +The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +standard error in the following format: +.Pp +.D1 Em original-pathname No >> Em new-pathname +.Pp +File or archive member names that substitute to the empty string +are not selected and will be skipped. +.It Fl v +Verbose operation mode. +If +.Fl v +is specified multiple times or if the +.Fl t +option is also specified, +.Nm +will use a long format for listing files, similar to +.Xr ls 1 +.Fl l . +.It Fl w +Interactively rename files. +This option causes +.Nm +to prompt the user for the filename to use when storing or +extracting files in an archive. +.It Fl X +Do not cross mount points in the file system. +.It Fl Z +Compress archive using +.Xr compress 1 . +.It Fl z +Compress archive using +.Xr gzip 1 . +.El +.Pp +The options +.Op Fl 014578 +can be used to select one of the compiled-in backup devices, +.Pa /dev/rstN . +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev TMPDIR +Path in which to store temporary files. +.It Ev TAPE +Default tape device to use instead of +.Pa /dev/rst0 . +If set to hyphen +.Pq Sq - +standard output is used. +.El +.Sh FILES +.Bl -tag -width "/dev/rst0" +.It Pa /dev/rst0 +default archive name +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Sh EXAMPLES +Create an archive on the default tape drive, containing the files named +.Pa bonvole +and +.Pa sekve : +.Pp +.Dl $ tar c bonvole sekve +.Pp +Output a +.Xr gzip 1 +compressed archive containing the files +.Pa bonvole +and +.Pa sekve +to a file called +.Pa foriru.tar.gz : +.Pp +.Dl $ tar zcf foriru.tar.gz bonvole sekve +.Pp +Verbosely create an archive, called +.Pa backup.tar.gz , +of all files matching the shell +.Xr glob 7 +function +.Pa *.c : +.Pp +.Dl $ tar zcvf backup.tar.gz *.c +.Pp +Verbosely list, but do not extract, all files ending in +.Pa .jpeg +from a compressed archive named +.Pa backup.tar.gz . +Note that the glob pattern has been quoted to avoid expansion by the shell: +.Pp +.Dl $ tar tvzf backup.tar.gz '*.jpeg' +.Pp +For more detailed examples, see +.Xr pax 1 . +.Sh DIAGNOSTICS +Whenever +.Nm +cannot create a file or a link when extracting an archive or cannot +find a file while writing an archive, or cannot preserve the user +ID, group ID, file mode, or access and modification times when the +.Fl p +option is specified, a diagnostic message is written to standard +error and a non-zero exit value will be returned, but processing +will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated +by a signal or error, +.Nm +may have only partially extracted the file the user wanted. +Additionally, the file modes of extracted files and directories may +have incorrect file bits, and the modification and access times may +be wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal +or error, +.Nm +may have only partially created the archive, which may violate the +specific archive format specification. +.Sh SEE ALSO +.Xr cpio 1 , +.Xr pax 1 +.Sh HISTORY +A +.Nm +command first appeared in +.At v7 . +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. +.Sh CAVEATS +The +.Fl j +and +.Fl L +flags are not portable to other versions of +.Nm +where they may have a different meaning. diff --git a/bin/pax/tar.c b/bin/pax/tar.c new file mode 100644 index 0000000..c62705b --- /dev/null +++ b/bin/pax/tar.c @@ -0,0 +1,1284 @@ +/* $OpenBSD: tar.c,v 1.68 2019/06/24 03:33:09 deraadt Exp $ */ +/* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <ctype.h> +#include <errno.h> +#include <grp.h> +#include <limits.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifndef major +#include <sys/sysmacros.h> +#endif + +#include "pax.h" +#include "extern.h" +#include "tar.h" + +/* + * Routines for reading, writing and header identify of various versions of tar + */ + +static size_t expandname(char *, size_t, char **, const char *, size_t); +static u_long tar_chksm(char *, int); +static char *name_split(char *, int); +static int ul_oct(u_long, char *, int, int); +static int ull_oct(unsigned long long, char *, int, int); +#ifndef SMALL +static int rd_xheader(ARCHD *arcn, int, off_t); +#endif + +static uid_t uid_nobody; +static uid_t uid_warn; +static gid_t gid_nobody; +static gid_t gid_warn; + +/* + * Routines common to all versions of tar + */ + +int tar_nodir; /* do not write dirs under old tar */ +char *gnu_name_string; /* GNU ././@LongLink hackery name */ +char *gnu_link_string; /* GNU ././@LongLink hackery link */ + +/* + * tar_endwr() + * add the tar trailer of two null blocks + * Return: + * 0 if ok, -1 otherwise (what wr_skip returns) + */ + +int +tar_endwr(void) +{ + return wr_skip(NULLCNT * BLKMULT); +} + +/* + * tar_endrd() + * no cleanup needed here, just return size of trailer (for append) + * Return: + * size of trailer (2 * BLKMULT) + */ + +off_t +tar_endrd(void) +{ + return NULLCNT * BLKMULT; +} + +/* + * tar_trail() + * Called to determine if a header block is a valid trailer. We are passed + * the block, the in_sync flag (which tells us we are in resync mode; + * looking for a valid header), and cnt (which starts at zero) which is + * used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block + * could never contain a header. + */ + +int +tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt) +{ + int i; + + /* + * look for all zero, trailer is two consecutive blocks of zero + */ + for (i = 0; i < BLKMULT; ++i) { + if (buf[i] != '\0') + break; + } + + /* + * if not all zero it is not a trailer, but MIGHT be a header. + */ + if (i != BLKMULT) + return(-1); + + /* + * When given a zero block, we must be careful! + * If we are not in resync mode, check for the trailer. Have to watch + * out that we do not mis-identify file data as the trailer, so we do + * NOT try to id a trailer during resync mode. During resync mode we + * might as well throw this block out since a valid header can NEVER be + * a block of all 0 (we must have a valid file name). + */ + if (!in_resync && (++*cnt >= NULLCNT)) + return(0); + return(1); +} + +/* + * ul_oct() + * convert an unsigned long to an octal string. many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is '0' padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +ul_oct(u_long val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch (term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * ull_oct() + * Convert an unsigned long long to an octal string. One of many oddball + * field termination characters are used by the various versions of tar + * in the different fields. term selects which kind to use. str is + * '0' padded at the front to len. We are unable to use only one format + * as many old tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +ull_oct(unsigned long long val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch (term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * tar_chksm() + * calculate the checksum for a tar block counting the checksum field as + * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). + * NOTE: we use len to short circuit summing 0's on write since we ALWAYS + * pad headers with 0. + * Return: + * unsigned long checksum + */ + +static u_long +tar_chksm(char *blk, int len) +{ + char *stop; + char *pt; + u_long chksm = BLNKSUM; /* initial value is checksum field sum */ + + /* + * add the part of the block before the checksum field + */ + pt = blk; + stop = blk + CHK_OFFSET; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + /* + * move past the checksum field and keep going, spec counts the + * checksum field as the sum of 8 blanks (which is pre-computed as + * BLNKSUM). + * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding + * starts, no point in summing zero's) + */ + pt += CHK_LEN; + stop = blk + len; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + return(chksm); +} + +/* + * Routines for old BSD style tar (also made portable to sysV tar) + */ + +/* + * tar_id() + * determine if a block given to us is a valid tar header (and not a USTAR + * header). We have to be on the lookout for those pesky blocks of all + * zero's. + * Return: + * 0 if a tar header, -1 otherwise + */ + +int +tar_id(char *blk, int size) +{ + HD_TAR *hd; + HD_USTAR *uhd; + + if (size < BLKMULT) + return(-1); + hd = (HD_TAR *)blk; + uhd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test, then make + * sure this is not a ustar header by looking for the ustar magic + * cookie. We should use TMAGLEN, but some USTAR archive programs are + * wrong and create archives missing the \0. Last we check the + * checksum. If this is ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return(-1); + if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + force_one_volume = 1; + return(0); +} + +/* + * tar_opt() + * handle tar format specific -o options + * Return: + * 0 if ok -1 otherwise + */ + +int +tar_opt(void) +{ + OPLIST *opt; + + while ((opt = opt_next()) != NULL) { + if (strcmp(opt->name, TAR_OPTION) || + strcmp(opt->value, TAR_NODIR)) { + paxwarn(1, "Unknown tar format -o option/value pair %s=%s", + opt->name, opt->value); + paxwarn(1,"%s=%s is the only supported tar format option", + TAR_OPTION, TAR_NODIR); + return(-1); + } + + /* + * we only support one option, and only when writing + */ + if ((act != APPND) && (act != ARCHIVE)) { + paxwarn(1, "%s=%s is only supported when writing.", + opt->name, opt->value); + return(-1); + } + tar_nodir = 1; + } + return(0); +} + + +/* + * tar_rd() + * extract the values out of block already determined to be a tar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +tar_rd(ARCHD *arcn, char *buf) +{ + HD_TAR *hd; + unsigned long long val; + char *pt; + + /* + * we only get proper sized buffers passed to us + */ + if (tar_id(buf, BLKMULT) < 0) + return(-1); + memset(arcn, 0, sizeof(*arcn)); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + + /* + * copy out the name and values in the stat buffer + */ + hd = (HD_TAR *)buf; + if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { + arcn->nlen = expandname(arcn->name, sizeof(arcn->name), + &gnu_name_string, hd->name, sizeof(hd->name)); + arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), + &gnu_link_string, hd->linkname, sizeof(hd->linkname)); + } + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & + 0xfff); + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT); + val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT); + if (val > MAX_TIME_T) + arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ + else + arcn->sb.st_mtime = val; + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * have to look at the last character, it may be a '/' and that is used + * to encode this as a directory + */ + pt = &(arcn->name[arcn->nlen - 1]); + arcn->pad = 0; + arcn->skip = 0; + switch (hd->linkflag) { + case SYMTYPE: + /* + * symbolic link, need to get the link name and set the type in + * the st_mode so -v printing will look correct. + */ + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + break; + case LNKTYPE: + /* + * hard link, need to get the link name, set the type in the + * st_mode and st_nlink so -v printing will look better. + */ + arcn->type = PAX_HLK; + arcn->sb.st_nlink = 2; + + /* + * no idea of what type this thing really points at, but + * we set something for printing only. + */ + arcn->sb.st_mode |= S_IFREG; + break; + case LONGLINKTYPE: + case LONGNAMETYPE: + /* + * GNU long link/file; we tag these here and let the + * pax internals deal with it -- too ugly otherwise. + */ + arcn->type = + hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + break; + case DIRTYPE: + /* + * It is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + break; + case AREGTYPE: + case REGTYPE: + default: + /* + * If we have a trailing / this is a directory and NOT a file. + */ + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + if (*pt == '/') { + /* + * it is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + } else { + /* + * have a file that will be followed by data. Set the + * skip value to the size field and calculate the size + * of the padding. + */ + arcn->type = PAX_REG; + arcn->sb.st_mode |= S_IFREG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } + break; + } + + /* + * strip off any trailing slash. + */ + if (*pt == '/') { + *pt = '\0'; + --arcn->nlen; + } + return(0); +} + +/* + * tar_wr() + * write a tar header for the file specified in the ARCHD to the archive. + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, each field + * of tar has it own spec for the termination character(s). + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +tar_wr(ARCHD *arcn) +{ + HD_TAR *hd; + int len; + char hdblk[sizeof(HD_TAR)]; + + /* + * check for those file system types which tar cannot store + */ + switch (arcn->type) { + case PAX_DIR: + /* + * user asked that dirs not be written to the archive + */ + if (tar_nodir) + return(1); + break; + case PAX_CHR: + paxwarn(1, "Tar cannot archive a character device %s", + arcn->org_name); + return(1); + case PAX_BLK: + paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name); + return(1); + case PAX_SCK: + paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name); + return(1); + case PAX_FIF: + paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name); + return(1); + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if ((size_t)arcn->ln_nlen > sizeof(hd->linkname)) { + paxwarn(1, "Link name too long for tar %s", + arcn->ln_name); + return(1); + } + break; + case PAX_REG: + case PAX_CTG: + default: + break; + } + + /* + * check file name len, remember extra char for dirs (the / at the end) + */ + len = arcn->nlen; + if (arcn->type == PAX_DIR) + ++len; + if ((size_t)len > sizeof(hd->name)) { + paxwarn(1, "File name too long for tar %s", arcn->name); + return(1); + } + + /* + * Copy the data out of the ARCHD into the tar header based on the type + * of the file. Remember, many tar readers want all fields to be + * padded with zero so we zero the header first. We then set the + * linkflag field (type), the linkname, the size, and set the padding + * (if any) to be added after the file data (0 for all other types, + * as they only have a header). + */ + memset(hdblk, 0, sizeof(hdblk)); + hd = (HD_TAR *)hdblk; + fieldcpy(hd->name, sizeof(hd->name), arcn->name, sizeof(arcn->name)); + arcn->pad = 0; + + if (arcn->type == PAX_DIR) { + /* + * directories are the same as files, except have a filename + * that ends with a /, we add the slash here. No data follows + * dirs, so no pad. + */ + hd->linkflag = AREGTYPE; + hd->name[len-1] = '/'; + if (ul_oct(0, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (arcn->type == PAX_SLK) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = SYMTYPE; + fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, + sizeof(arcn->ln_name)); + if (ul_oct(0, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (PAX_IS_HARDLINK(arcn->type)) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = LNKTYPE; + fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, + sizeof(arcn->ln_name)); + if (ul_oct(0, hd->size, sizeof(hd->size), 1)) + goto out; + } else { + /* + * data follows this file, so set the pad + */ + hd->linkflag = AREGTYPE; + if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { + paxwarn(1, "File is too large for tar %s", + arcn->org_name); + return(1); + } + arcn->pad = TAR_PAD(arcn->sb.st_size); + } + + /* + * copy those fields that are independent of the type + */ + if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || + ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, + sizeof(hd->mtime), 1) || + ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || + ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0)) + goto out; + + /* + * calculate and add the checksum, then write the header. A return of + * 0 tells the caller to now write the file data, 1 says no data needs + * to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) + return(-1); + if (wr_skip(BLKMULT - sizeof(HD_TAR)) < 0) + return(-1); + if (PAX_IS_REG(arcn->type)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Tar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * Routines for POSIX ustar + */ + +/* + * ustar_id() + * determine if a block given to us is a valid ustar header. We have to + * be on the lookout for those pesky blocks of all zero's + * Return: + * 0 if a ustar header, -1 otherwise + */ + +int +ustar_id(char *blk, int size) +{ + HD_USTAR *hd; + + if (size < BLKMULT) + return(-1); + hd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test then check + * ustar magic cookie. We should use TMAGLEN, but some USTAR archive + * programs are fouled up and create archives missing the \0. Last we + * check the checksum. If ok we have to assume it is a valid header. + */ + if (hd->prefix[0] == '\0' && hd->name[0] == '\0') + return(-1); + if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + return(0); +} + +/* + * ustar_rd() + * extract the values out of block already determined to be a ustar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +ustar_rd(ARCHD *arcn, char *buf) +{ + HD_USTAR *hd = (HD_USTAR *)buf; + char *dest; + int cnt = 0; + dev_t devmajor; + dev_t devminor; + unsigned long long val; + + /* + * we only get proper sized buffers + */ + if (ustar_id(buf, BLKMULT) < 0) + return(-1); + +#ifndef SMALL +reset: +#endif + memset(arcn, 0, sizeof(*arcn)); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + +#ifndef SMALL + /* Process Extended headers. */ + if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) { + if (rd_xheader(arcn, hd->typeflag == GHDRTYPE, + (off_t)asc_ul(hd->size, sizeof(hd->size), OCT)) < 0) + return (-1); + + /* Update and check the ustar header. */ + if (rd_wrbuf(buf, BLKMULT) != BLKMULT) + return (-1); + if (ustar_id(buf, BLKMULT) < 0) + return(-1); + + /* if the next block is another extension, reset the values */ + if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) + goto reset; + } +#endif + + if (!arcn->nlen) { + /* + * See if the filename is split into two parts. if, so join + * the parts. We copy the prefix first and add a / between + * the prefix and name. + */ + dest = arcn->name; + if (*(hd->prefix) != '\0') { + cnt = fieldcpy(dest, sizeof(arcn->name) - 1, + hd->prefix, sizeof(hd->prefix)); + dest += cnt; + *dest++ = '/'; + cnt++; + } else + cnt = 0; + + if (hd->typeflag != LONGLINKTYPE && + hd->typeflag != LONGNAMETYPE) { + arcn->nlen = cnt + expandname(dest, + sizeof(arcn->name) - cnt, &gnu_name_string, + hd->name, sizeof(hd->name)); + } + } + + if (!arcn->ln_nlen && + hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { + arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), + &gnu_link_string, hd->linkname, sizeof(hd->linkname)); + } + + /* + * follow the spec to the letter. we should only have mode bits, strip + * off all other crud we may be passed. + */ + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & + 0xfff); + arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT); + val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT); + if (val > MAX_TIME_T) + arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ + else + arcn->sb.st_mtime = val; + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * If we can find the ascii names for gname and uname in the password + * and group files we will use the uid's and gid they bind. Otherwise + * we use the uid and gid values stored in the header. (This is what + * the posix spec wants). + */ + hd->gname[sizeof(hd->gname) - 1] = '\0'; + if (Nflag || gid_from_group(hd->gname, &(arcn->sb.st_gid)) == -1) + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + hd->uname[sizeof(hd->uname) - 1] = '\0'; + if (Nflag || uid_from_user(hd->uname, &(arcn->sb.st_uid)) == -1) + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + + /* + * set the defaults, these may be changed depending on the file type + */ + arcn->pad = 0; + arcn->skip = 0; + arcn->sb.st_rdev = (dev_t)0; + + /* + * set the mode and PAX type according to the typeflag in the header + */ + switch (hd->typeflag) { + case FIFOTYPE: + arcn->type = PAX_FIF; + arcn->sb.st_mode |= S_IFIFO; + break; + case DIRTYPE: + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + + /* + * Some programs that create ustar archives append a '/' + * to the pathname for directories. This clearly violates + * ustar specs, but we will silently strip it off anyway. + */ + if (arcn->name[arcn->nlen - 1] == '/') + arcn->name[--arcn->nlen] = '\0'; + break; + case BLKTYPE: + case CHRTYPE: + /* + * this type requires the rdev field to be set. + */ + if (hd->typeflag == BLKTYPE) { + arcn->type = PAX_BLK; + arcn->sb.st_mode |= S_IFBLK; + } else { + arcn->type = PAX_CHR; + arcn->sb.st_mode |= S_IFCHR; + } + devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); + devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + break; + case SYMTYPE: + case LNKTYPE: + if (hd->typeflag == SYMTYPE) { + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + } else { + arcn->type = PAX_HLK; + /* + * so printing looks better + */ + arcn->sb.st_mode |= S_IFREG; + arcn->sb.st_nlink = 2; + } + break; + case LONGLINKTYPE: + case LONGNAMETYPE: + /* + * GNU long link/file; we tag these here and let the + * pax internals deal with it -- too ugly otherwise. + */ + arcn->type = + hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + break; + case CONTTYPE: + case AREGTYPE: + case REGTYPE: + default: + /* + * these types have file data that follows. Set the skip and + * pad fields. + */ + arcn->type = PAX_REG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + arcn->sb.st_mode |= S_IFREG; + break; + } + return(0); +} + +/* + * ustar_wr() + * write a ustar header for the file specified in the ARCHD to the archive + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, we only use + * '\0' for the termination character (this is different than picky tar) + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +ustar_wr(ARCHD *arcn) +{ + HD_USTAR *hd; + const char *name; + char *pt, hdblk[sizeof(HD_USTAR)]; + + /* + * check for those file system types ustar cannot store + */ + if (arcn->type == PAX_SCK) { + paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name); + return(1); + } + + /* + * user asked that dirs not be written to the archive + */ + if (arcn->type == PAX_DIR && tar_nodir) + return (1); + + /* + * check the length of the linkname + */ + if (PAX_IS_LINK(arcn->type) && + ((size_t)arcn->ln_nlen > sizeof(hd->linkname))) { + paxwarn(1, "Link name too long for ustar %s", arcn->ln_name); + return(1); + } + + /* + * split the path name into prefix and name fields (if needed). if + * pt != arcn->name, the name has to be split + */ + if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { + paxwarn(1, "File name too long for ustar %s", arcn->name); + return(1); + } + + /* + * zero out the header so we don't have to worry about zero fill below + */ + memset(hdblk, 0, sizeof(hdblk)); + hd = (HD_USTAR *)hdblk; + arcn->pad = 0; + + /* + * split the name, or zero out the prefix + */ + if (pt != arcn->name) { + /* + * name was split, pt points at the / where the split is to + * occur, we remove the / and copy the first part to the prefix + */ + *pt = '\0'; + fieldcpy(hd->prefix, sizeof(hd->prefix), arcn->name, + sizeof(arcn->name)); + *pt++ = '/'; + } + + /* + * copy the name part. this may be the whole path or the part after + * the prefix + */ + fieldcpy(hd->name, sizeof(hd->name), pt, + sizeof(arcn->name) - (pt - arcn->name)); + + /* + * set the fields in the header that are type dependent + */ + switch (arcn->type) { + case PAX_DIR: + hd->typeflag = DIRTYPE; + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_CHR: + case PAX_BLK: + if (arcn->type == PAX_CHR) + hd->typeflag = CHRTYPE; + else + hd->typeflag = BLKTYPE; + if (ul_oct(MAJOR(arcn->sb.st_rdev), hd->devmajor, + sizeof(hd->devmajor), 3) || + ul_oct(MINOR(arcn->sb.st_rdev), hd->devminor, + sizeof(hd->devminor), 3) || + ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_FIF: + hd->typeflag = FIFOTYPE; + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->type == PAX_SLK) + hd->typeflag = SYMTYPE; + else + hd->typeflag = LNKTYPE; + fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, + sizeof(arcn->ln_name)); + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_REG: + case PAX_CTG: + default: + /* + * file data with this type, set the padding + */ + if (arcn->type == PAX_CTG) + hd->typeflag = CONTTYPE; + else + hd->typeflag = REGTYPE; + arcn->pad = TAR_PAD(arcn->sb.st_size); + if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) { + paxwarn(1, "File is too long for ustar %s", + arcn->org_name); + return(1); + } + break; + } + + memcpy(hd->magic, TMAGIC, TMAGLEN); + memcpy(hd->version, TVERSION, TVERSLEN); + + /* + * set the remaining fields. Some versions want all 16 bits of mode + * we better humor them (they really do not meet spec though).... + */ + if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) { + if (uid_nobody == 0) { + if (uid_from_user("nobody", &uid_nobody) == -1) + goto out; + } + if (uid_warn != arcn->sb.st_uid) { + uid_warn = arcn->sb.st_uid; + paxwarn(1, + "Ustar header field is too small for uid %lu, " + "using nobody", (u_long)arcn->sb.st_uid); + } + if (ul_oct(uid_nobody, hd->uid, sizeof(hd->uid), 3)) + goto out; + } + if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) { + if (gid_nobody == 0) { + if (gid_from_group("nobody", &gid_nobody) == -1) + goto out; + } + if (gid_warn != arcn->sb.st_gid) { + gid_warn = arcn->sb.st_gid; + paxwarn(1, + "Ustar header field is too small for gid %lu, " + "using nobody", (u_long)arcn->sb.st_gid); + } + if (ul_oct(gid_nobody, hd->gid, sizeof(hd->gid), 3)) + goto out; + } + if (ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, + sizeof(hd->mtime), 3) || + ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) + goto out; + if (!Nflag) { + if ((name = user_from_uid(arcn->sb.st_uid, 1)) != NULL) + strncpy(hd->uname, name, sizeof(hd->uname)); + if ((name = group_from_gid(arcn->sb.st_gid, 1)) != NULL) + strncpy(hd->gname, name, sizeof(hd->gname)); + } + + /* + * calculate and store the checksum write the header to the archive + * return 0 tells the caller to now write the file data, 1 says no data + * needs to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) + return(-1); + if (wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) + return(-1); + if (PAX_IS_REG(arcn->type)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Ustar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * name_split() + * see if the name has to be split for storage in a ustar header. We try + * to fit the entire name in the name field without splitting if we can. + * The split point is always at a / + * Return + * character pointer to split point (always the / that is to be removed + * if the split is not needed, the points is set to the start of the file + * name (it would violate the spec to split there). A NULL is returned if + * the file name is too long + */ + +static char * +name_split(char *name, int len) +{ + char *start; + + /* + * check to see if the file name is small enough to fit in the name + * field. if so just return a pointer to the name. + * The strings can fill the complete name and prefix fields + * without a NUL terminator. + */ + if (len <= TNMSZ) + return(name); + if (len > (TPFSZ + TNMSZ + 1)) + return(NULL); + + /* + * we start looking at the biggest sized piece that fits in the name + * field. We walk forward looking for a slash to split at. The idea is + * to find the biggest piece to fit in the name field (or the smallest + * prefix we can find) (the -1 is correct the biggest piece would + * include the slash between the two parts that gets thrown away) + */ + start = name + len - TNMSZ - 1; + + /* + * the prefix may not be empty, so skip the first character when + * trying to split a path of exactly TNMSZ+1 characters. + * NOTE: This means the ustar format can't store /str if + * str contains no slashes and the length of str == TNMSZ + */ + if (start == name) + ++start; + + while ((*start != '\0') && (*start != '/')) + ++start; + + /* + * if we hit the end of the string, this name cannot be split, so we + * cannot store this file. + */ + if (*start == '\0') + return(NULL); + + /* + * the split point isn't valid if it results in a prefix + * longer than TPFSZ + */ + if ((start - name) > TPFSZ) + return(NULL); + + /* + * ok have a split point, return it to the caller + */ + return(start); +} + +static size_t +expandname(char *buf, size_t len, char **gnu_name, const char *name, + size_t limit) +{ + size_t nlen; + + if (*gnu_name) { + /* *gnu_name is NUL terminated */ + if ((nlen = strlcpy(buf, *gnu_name, len)) >= len) + nlen = len - 1; + free(*gnu_name); + *gnu_name = NULL; + } else + nlen = fieldcpy(buf, len, name, limit); + return(nlen); +} + +#ifndef SMALL + +/* shortest possible extended record: "5 a=\n" */ +#define MINXHDRSZ 5 + +/* longest record we'll accept */ +#define MAXXHDRSZ BLKMULT + +static int +rd_xheader(ARCHD *arcn, int global, off_t size) +{ + char buf[MAXXHDRSZ]; + long len; + char *delim, *keyword; + char *nextp, *p, *end; + int pad, ret = 0; + + /* before we alter size, make note of how much we have to skip */ + pad = TAR_PAD((unsigned)size); + + p = end = buf; + while (size > 0 || p < end) { + if (size > 0) { + int rdlen; + + /* shift stuff down */ + if (p > buf) { + memmove(buf, p, end - p); + end -= p - buf; + p = buf; + } + + /* fill starting at end */ + rdlen = MINIMUM(size, (buf + sizeof buf) - end); + if (rd_wrbuf(end, rdlen) != rdlen) { + ret = -1; + break; + } + size -= rdlen; + end += rdlen; + } + + /* [p, end) is good */ + if (memchr(p, ' ', end - p) == NULL || + !isdigit((unsigned char)*p)) { + paxwarn(1, "Invalid extended header record"); + ret = -1; + break; + } + errno = 0; + len = strtol(p, &delim, 10); + if (*delim != ' ' || (errno == ERANGE && len == LONG_MAX) || + len < MINXHDRSZ) { + paxwarn(1, "Invalid extended header record length"); + ret = -1; + break; + } + if (len > end - p) { + paxwarn(1, "Extended header record length %lu is " + "out of range", len); + /* if we can just toss this record, do so */ + len -= end - p; + if (len <= size && rd_skip(len) == 0) { + size -= len; + p = end = buf; + continue; + } + ret = -1; + break; + } + nextp = p + len; + keyword = p = delim + 1; + p = memchr(p, '=', len); + if (!p || nextp[-1] != '\n') { + paxwarn(1, "Malformed extended header record"); + ret = -1; + break; + } + *p++ = nextp[-1] = '\0'; + if (!global) { + if (!strcmp(keyword, "path")) { + arcn->nlen = strlcpy(arcn->name, p, + sizeof(arcn->name)); + } else if (!strcmp(keyword, "linkpath")) { + arcn->ln_nlen = strlcpy(arcn->ln_name, p, + sizeof(arcn->ln_name)); + } + } + p = nextp; + } + + if (rd_skip(size + pad) < 0) + return (-1); + return (ret); +} +#endif diff --git a/bin/pax/tar.h b/bin/pax/tar.h new file mode 100644 index 0000000..318d099 --- /dev/null +++ b/bin/pax/tar.h @@ -0,0 +1,159 @@ +/* $OpenBSD: tar.h,v 1.9 2014/01/08 06:43:34 deraadt Exp $ */ +/* $NetBSD: tar.h,v 1.3 1995/03/21 09:07:51 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tar.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * defines and data structures common to all tar formats + */ +#define CHK_LEN 8 /* length of checksum field */ +#define TNMSZ 100 /* size of name field */ +#ifdef _PAX_ +#define NULLCNT 2 /* number of null blocks in trailer */ +#define CHK_OFFSET 148 /* start of chksum field */ +#define BLNKSUM 256L /* sum of checksum field using ' ' */ +#endif /* _PAX_ */ + +/* + * Values used in typeflag field in all tar formats + * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers) + */ +#define REGTYPE '0' /* Regular File */ +#define AREGTYPE '\0' /* Regular File */ +#define LNKTYPE '1' /* Link */ +#define SYMTYPE '2' /* Symlink */ +#define CHRTYPE '3' /* Character Special File */ +#define BLKTYPE '4' /* Block Special File */ +#define DIRTYPE '5' /* Directory */ +#define FIFOTYPE '6' /* FIFO */ +#define CONTTYPE '7' /* high perf file */ + +/* + * Extended header - POSIX.1-2001 + */ +#define XHDRTYPE 'x' /* Extended header */ +#define GHDRTYPE 'g' /* Global header*/ + +/* + * GNU tar compatibility; + */ +#define LONGLINKTYPE 'K' /* Long Symlink */ +#define LONGNAMETYPE 'L' /* Long File */ + +/* + * Mode field encoding of the different file types - values in octal + */ +#define TSUID 04000 /* Set UID on execution */ +#define TSGID 02000 /* Set GID on execution */ +#define TSVTX 01000 /* Reserved */ +#define TUREAD 00400 /* Read by owner */ +#define TUWRITE 00200 /* Write by owner */ +#define TUEXEC 00100 /* Execute/Search by owner */ +#define TGREAD 00040 /* Read by group */ +#define TGWRITE 00020 /* Write by group */ +#define TGEXEC 00010 /* Execute/Search by group */ +#define TOREAD 00004 /* Read by other */ +#define TOWRITE 00002 /* Write by other */ +#define TOEXEC 00001 /* Execute/Search by other */ + +#ifdef _PAX_ +/* + * Pad with a bit mask, much faster than doing a mod but only works on powers + * of 2. Macro below is for block of 512 bytes. + */ +#define TAR_PAD(x) ((512 - ((x) & 511)) & 511) +#endif /* _PAX_ */ + +/* + * structure of an old tar header as it appeared in BSD releases + */ +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char linkflag; /* norm, hard, or sym. */ + char linkname[TNMSZ]; /* linked to name */ +} HD_TAR; + +#ifdef _PAX_ +/* + * -o options for BSD tar to not write directories to the archive + */ +#define TAR_NODIR "nodir" +#define TAR_OPTION "write_opt" + +/* + * default device names + */ +#define DEV_0 "/dev/rst0" +#define DEV_1 "/dev/rst1" +#define DEV_4 "/dev/rst4" +#define DEV_5 "/dev/rst5" +#define DEV_7 "/dev/rst7" +#define DEV_8 "/dev/rst8" +#endif /* _PAX_ */ + +/* + * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990 + */ +#define TPFSZ 155 +#define TMAGIC "ustar" /* ustar and a null */ +#define TMAGLEN 6 +#define TVERSION "00" /* 00 and no null */ +#define TVERSLEN 2 + +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char typeflag; /* type of file. */ + char linkname[TNMSZ]; /* linked to name */ + char magic[TMAGLEN]; /* magic cookie */ + char version[TVERSLEN]; /* version */ + char uname[32]; /* ascii owner name */ + char gname[32]; /* ascii group name */ + char devmajor[8]; /* major device number */ + char devminor[8]; /* minor device number */ + char prefix[TPFSZ]; /* linked to name */ +} HD_USTAR; diff --git a/bin/pax/tty_subs.c b/bin/pax/tty_subs.c new file mode 100644 index 0000000..a07264a --- /dev/null +++ b/bin/pax/tty_subs.c @@ -0,0 +1,187 @@ +/* $OpenBSD: tty_subs.c,v 1.17 2016/08/26 04:22:13 guenther Exp $ */ +/* $NetBSD: tty_subs.c,v 1.5 1995/03/21 09:07:52 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * routines that deal with I/O to and from the user + */ + +#define DEVTTY "/dev/tty" /* device for interactive i/o */ +static FILE *ttyoutf = NULL; /* output pointing at control tty */ +static FILE *ttyinf = NULL; /* input pointing at control tty */ + +/* + * tty_init() + * try to open the controlling terminal (if any) for this process. if the + * open fails, future ops that require user input will get an EOF + */ + +int +tty_init(void) +{ + int ttyfd; + + if ((ttyfd = open(DEVTTY, O_RDWR | O_CLOEXEC)) >= 0) { + if ((ttyoutf = fdopen(ttyfd, "w")) != NULL) { + if ((ttyinf = fdopen(ttyfd, "r")) != NULL) + return(0); + (void)fclose(ttyoutf); + } + (void)close(ttyfd); + } + + if (iflag) { + paxwarn(1, "Fatal error, cannot open %s", DEVTTY); + return(-1); + } + return(0); +} + +/* + * tty_prnt() + * print a message using the specified format to the controlling tty + * if there is no controlling terminal, just return. + */ + +void +tty_prnt(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (ttyoutf == NULL) { + va_end(ap); + return; + } + (void)vfprintf(ttyoutf, fmt, ap); + va_end(ap); + (void)fflush(ttyoutf); +} + +/* + * tty_read() + * read a string from the controlling terminal if it is open into the + * supplied buffer + * Return: + * 0 if data was read, -1 otherwise. + */ + +int +tty_read(char *str, int len) +{ + if (ttyinf == NULL || fgets(str, len, ttyinf) == NULL) + return(-1); + + /* + * strip off that trailing newline + */ + str[strcspn(str, "\n")] = '\0'; + return(0); +} + +/* + * paxwarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +paxwarn(int set, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fflush(listf); + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fputc('\n', stderr); +} + +/* + * syswarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +syswarn(int set, int errnum, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fflush(listf); + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + + /* + * format and print the errno + */ + if (errnum > 0) + (void)fprintf(stderr, ": %s", strerror(errnum)); + (void)fputc('\n', stderr); +} |