aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCem Keylan <cem@ckyln.com>2020-10-16 17:47:01 +0300
committerCem Keylan <cem@ckyln.com>2020-10-16 17:47:01 +0300
commit5d69c6a2661bba0a22f3ecfd517e2e9767a38346 (patch)
tree1f479b2714e127835db7f33a3bfed4c38c52f883
parente2abcdca396661cbe0ae2ddb13d5c2b85682c13a (diff)
downloadotools-5d69c6a2661bba0a22f3ecfd517e2e9767a38346.tar.gz
add tools
-rw-r--r--.gitignore18
-rw-r--r--bin/md5/CVS/Entries7
-rw-r--r--bin/md5/CVS/Repository1
-rw-r--r--bin/md5/CVS/Root1
-rw-r--r--bin/md5/Makefile14
-rw-r--r--bin/md5/cksum.1191
-rw-r--r--bin/md5/crc.c137
-rw-r--r--bin/md5/crc.h31
-rw-r--r--bin/md5/md5.1142
-rw-r--r--bin/md5/md5.c852
-rw-r--r--bin/pax/CVS/Entries24
-rw-r--r--bin/pax/CVS/Repository1
-rw-r--r--bin/pax/CVS/Root1
-rw-r--r--bin/pax/Makefile11
-rw-r--r--bin/pax/ar_io.c1288
-rw-r--r--bin/pax/ar_subs.c1277
-rw-r--r--bin/pax/buf_subs.c983
-rw-r--r--bin/pax/cpio.1309
-rw-r--r--bin/pax/cpio.c1106
-rw-r--r--bin/pax/cpio.h150
-rw-r--r--bin/pax/extern.h310
-rw-r--r--bin/pax/file_subs.c1106
-rw-r--r--bin/pax/ftree.c566
-rw-r--r--bin/pax/gen_subs.c401
-rw-r--r--bin/pax/getoldopt.c69
-rw-r--r--bin/pax/options.c1788
-rw-r--r--bin/pax/pat_rep.c1108
-rw-r--r--bin/pax/pax.11112
-rw-r--r--bin/pax/pax.c446
-rw-r--r--bin/pax/pax.h262
-rw-r--r--bin/pax/sel_subs.c632
-rw-r--r--bin/pax/tables.c1786
-rw-r--r--bin/pax/tar.1410
-rw-r--r--bin/pax/tar.c1284
-rw-r--r--bin/pax/tar.h159
-rw-r--r--bin/pax/tty_subs.c187
-rw-r--r--usr.bin/diff/CVS/Entries9
-rw-r--r--usr.bin/diff/CVS/Repository1
-rw-r--r--usr.bin/diff/CVS/Root1
-rw-r--r--usr.bin/diff/Makefile7
-rw-r--r--usr.bin/diff/diff.1474
-rw-r--r--usr.bin/diff/diff.c402
-rw-r--r--usr.bin/diff/diff.h98
-rw-r--r--usr.bin/diff/diffdir.c237
-rw-r--r--usr.bin/diff/diffreg.c1485
-rw-r--r--usr.bin/diff/xmalloc.c85
-rw-r--r--usr.bin/diff/xmalloc.h30
-rw-r--r--usr.bin/doas/CVS/Entries8
-rw-r--r--usr.bin/doas/CVS/Repository1
-rw-r--r--usr.bin/doas/CVS/Root1
-rw-r--r--usr.bin/doas/Makefile15
-rw-r--r--usr.bin/doas/doas.1130
-rw-r--r--usr.bin/doas/doas.c482
-rw-r--r--usr.bin/doas/doas.conf.5149
-rw-r--r--usr.bin/doas/doas.h48
-rw-r--r--usr.bin/doas/env.c235
-rw-r--r--usr.bin/doas/parse.c868
-rw-r--r--usr.bin/doas/parse.tab.h10
-rw-r--r--usr.bin/doas/parse.y343
-rw-r--r--usr.bin/doas/persist.c133
-rw-r--r--usr.bin/m4/CVS/Entries18
-rw-r--r--usr.bin/m4/CVS/Repository1
-rw-r--r--usr.bin/m4/CVS/Root1
-rw-r--r--usr.bin/m4/Makefile17
-rw-r--r--usr.bin/m4/NOTES64
-rw-r--r--usr.bin/m4/PSD.doc/CVS/Entries3
-rw-r--r--usr.bin/m4/PSD.doc/CVS/Repository1
-rw-r--r--usr.bin/m4/PSD.doc/CVS/Root1
-rw-r--r--usr.bin/m4/PSD.doc/Makefile11
-rw-r--r--usr.bin/m4/PSD.doc/m4.ms967
-rw-r--r--usr.bin/m4/TEST/CVS/Entries7
-rw-r--r--usr.bin/m4/TEST/CVS/Repository1
-rw-r--r--usr.bin/m4/TEST/CVS/Root1
-rw-r--r--usr.bin/m4/TEST/ack.m438
-rw-r--r--usr.bin/m4/TEST/hanoi.m443
-rw-r--r--usr.bin/m4/TEST/hash.m453
-rw-r--r--usr.bin/m4/TEST/sqroot.m443
-rw-r--r--usr.bin/m4/TEST/string.m443
-rw-r--r--usr.bin/m4/TEST/test.m4241
-rw-r--r--usr.bin/m4/eval.c1033
-rw-r--r--usr.bin/m4/expr.c43
-rw-r--r--usr.bin/m4/extern.h182
-rw-r--r--usr.bin/m4/gnum4.c692
-rw-r--r--usr.bin/m4/look.c337
-rw-r--r--usr.bin/m4/m4.1524
-rw-r--r--usr.bin/m4/main.c642
-rw-r--r--usr.bin/m4/mdef.h237
-rw-r--r--usr.bin/m4/misc.c467
-rw-r--r--usr.bin/m4/parser.c756
-rw-r--r--usr.bin/m4/parser.tab.h13
-rw-r--r--usr.bin/m4/parser.y84
-rw-r--r--usr.bin/m4/pathnames.h38
-rw-r--r--usr.bin/m4/stdd.h55
-rw-r--r--usr.bin/m4/tokenizer.c191
-rw-r--r--usr.bin/m4/trace.c196
-rw-r--r--usr.bin/mandoc/CVS/Entries101
-rw-r--r--usr.bin/mandoc/CVS/Repository1
-rw-r--r--usr.bin/mandoc/CVS/Root1
-rw-r--r--usr.bin/mandoc/Makefile78
-rw-r--r--usr.bin/mandoc/apropos.1510
-rw-r--r--usr.bin/mandoc/arch.c52
-rw-r--r--usr.bin/mandoc/att.c47
-rw-r--r--usr.bin/mandoc/cgi.c1255
-rw-r--r--usr.bin/mandoc/cgi.h.example7
-rw-r--r--usr.bin/mandoc/chars.c506
-rw-r--r--usr.bin/mandoc/dba.c501
-rw-r--r--usr.bin/mandoc/dba.h50
-rw-r--r--usr.bin/mandoc/dba_array.c188
-rw-r--r--usr.bin/mandoc/dba_array.h47
-rw-r--r--usr.bin/mandoc/dba_read.c72
-rw-r--r--usr.bin/mandoc/dba_write.c117
-rw-r--r--usr.bin/mandoc/dba_write.h30
-rw-r--r--usr.bin/mandoc/dbm.c474
-rw-r--r--usr.bin/mandoc/dbm.h68
-rw-r--r--usr.bin/mandoc/dbm_map.c188
-rw-r--r--usr.bin/mandoc/dbm_map.h29
-rw-r--r--usr.bin/mandoc/eqn.c1130
-rw-r--r--usr.bin/mandoc/eqn.h72
-rw-r--r--usr.bin/mandoc/eqn_html.c244
-rw-r--r--usr.bin/mandoc/eqn_parse.h48
-rw-r--r--usr.bin/mandoc/eqn_term.c172
-rw-r--r--usr.bin/mandoc/html.c1085
-rw-r--r--usr.bin/mandoc/html.h141
-rw-r--r--usr.bin/mandoc/libman.h42
-rw-r--r--usr.bin/mandoc/libmandoc.h85
-rw-r--r--usr.bin/mandoc/libmdoc.h86
-rw-r--r--usr.bin/mandoc/main.c1255
-rw-r--r--usr.bin/mandoc/main.h53
-rw-r--r--usr.bin/mandoc/makewhatis.8228
-rw-r--r--usr.bin/mandoc/man.1431
-rw-r--r--usr.bin/mandoc/man.c343
-rw-r--r--usr.bin/mandoc/man.cgi.8426
-rw-r--r--usr.bin/mandoc/man.conf.5133
-rw-r--r--usr.bin/mandoc/man.h21
-rw-r--r--usr.bin/mandoc/man_html.c640
-rw-r--r--usr.bin/mandoc/man_macro.c466
-rw-r--r--usr.bin/mandoc/man_term.c1149
-rw-r--r--usr.bin/mandoc/man_validate.c656
-rw-r--r--usr.bin/mandoc/manconf.h56
-rw-r--r--usr.bin/mandoc/mandoc.12336
-rw-r--r--usr.bin/mandoc/mandoc.c657
-rw-r--r--usr.bin/mandoc/mandoc.css360
-rw-r--r--usr.bin/mandoc/mandoc.h322
-rw-r--r--usr.bin/mandoc/mandoc_aux.c113
-rw-r--r--usr.bin/mandoc/mandoc_aux.h27
-rw-r--r--usr.bin/mandoc/mandoc_msg.c368
-rw-r--r--usr.bin/mandoc/mandoc_ohash.c64
-rw-r--r--usr.bin/mandoc/mandoc_ohash.h19
-rw-r--r--usr.bin/mandoc/mandoc_parse.h44
-rw-r--r--usr.bin/mandoc/mandoc_xr.c122
-rw-r--r--usr.bin/mandoc/mandoc_xr.h31
-rw-r--r--usr.bin/mandoc/mandocdb.c2386
-rw-r--r--usr.bin/mandoc/manpath.c342
-rw-r--r--usr.bin/mandoc/mansearch.c842
-rw-r--r--usr.bin/mandoc/mansearch.h118
-rw-r--r--usr.bin/mandoc/mdoc.c431
-rw-r--r--usr.bin/mandoc/mdoc.h158
-rw-r--r--usr.bin/mandoc/mdoc_argv.c680
-rw-r--r--usr.bin/mandoc/mdoc_html.c1758
-rw-r--r--usr.bin/mandoc/mdoc_macro.c1598
-rw-r--r--usr.bin/mandoc/mdoc_man.c1836
-rw-r--r--usr.bin/mandoc/mdoc_markdown.c1606
-rw-r--r--usr.bin/mandoc/mdoc_state.c254
-rw-r--r--usr.bin/mandoc/mdoc_term.c1962
-rw-r--r--usr.bin/mandoc/mdoc_validate.c3047
-rw-r--r--usr.bin/mandoc/msec.c35
-rw-r--r--usr.bin/mandoc/msec.in34
-rw-r--r--usr.bin/mandoc/out.c563
-rw-r--r--usr.bin/mandoc/out.h70
-rw-r--r--usr.bin/mandoc/preconv.c177
-rw-r--r--usr.bin/mandoc/predefs.in65
-rw-r--r--usr.bin/mandoc/read.c727
-rw-r--r--usr.bin/mandoc/roff.c4372
-rw-r--r--usr.bin/mandoc/roff.h561
-rw-r--r--usr.bin/mandoc/roff_html.c117
-rw-r--r--usr.bin/mandoc/roff_int.h94
-rw-r--r--usr.bin/mandoc/roff_term.c244
-rw-r--r--usr.bin/mandoc/roff_validate.c149
-rw-r--r--usr.bin/mandoc/st.c80
-rw-r--r--usr.bin/mandoc/tag.c326
-rw-r--r--usr.bin/mandoc/tag.h35
-rw-r--r--usr.bin/mandoc/tbl.c181
-rw-r--r--usr.bin/mandoc/tbl.h122
-rw-r--r--usr.bin/mandoc/tbl_data.c300
-rw-r--r--usr.bin/mandoc/tbl_html.c255
-rw-r--r--usr.bin/mandoc/tbl_int.h47
-rw-r--r--usr.bin/mandoc/tbl_layout.c371
-rw-r--r--usr.bin/mandoc/tbl_opts.c171
-rw-r--r--usr.bin/mandoc/tbl_parse.h30
-rw-r--r--usr.bin/mandoc/tbl_term.c943
-rw-r--r--usr.bin/mandoc/term.c1112
-rw-r--r--usr.bin/mandoc/term.h158
-rw-r--r--usr.bin/mandoc/term_ascii.c392
-rw-r--r--usr.bin/mandoc/term_ps.c1355
-rw-r--r--usr.bin/mandoc/term_tab.c128
-rw-r--r--usr.bin/mandoc/term_tag.c199
-rw-r--r--usr.bin/mandoc/term_tag.h34
-rw-r--r--usr.bin/mandoc/tree.c514
-rw-r--r--usr.bin/nc/CVS/Entries7
-rw-r--r--usr.bin/nc/CVS/Repository1
-rw-r--r--usr.bin/nc/CVS/Root1
-rw-r--r--usr.bin/nc/Makefile8
-rw-r--r--usr.bin/nc/atomicio.c67
-rw-r--r--usr.bin/nc/atomicio.h39
-rw-r--r--usr.bin/nc/nc.1585
-rw-r--r--usr.bin/nc/netcat.c1899
-rw-r--r--usr.bin/nc/socks.c400
-rw-r--r--usr.bin/patch/CVS/Entries16
-rw-r--r--usr.bin/patch/CVS/Repository1
-rw-r--r--usr.bin/patch/CVS/Root1
-rw-r--r--usr.bin/patch/Makefile6
-rw-r--r--usr.bin/patch/backupfile.c237
-rw-r--r--usr.bin/patch/backupfile.h38
-rw-r--r--usr.bin/patch/common.h111
-rw-r--r--usr.bin/patch/ed.c336
-rw-r--r--usr.bin/patch/ed.h19
-rw-r--r--usr.bin/patch/inp.c430
-rw-r--r--usr.bin/patch/inp.h31
-rw-r--r--usr.bin/patch/mkpath.c77
-rw-r--r--usr.bin/patch/patch.1680
-rw-r--r--usr.bin/patch/patch.c1064
-rw-r--r--usr.bin/patch/pch.c1522
-rw-r--r--usr.bin/patch/pch.h60
-rw-r--r--usr.bin/patch/util.c425
-rw-r--r--usr.bin/patch/util.h51
-rw-r--r--usr.bin/signify/CVS/Entries16
-rw-r--r--usr.bin/signify/CVS/Repository1
-rw-r--r--usr.bin/signify/CVS/Root1
-rw-r--r--usr.bin/signify/Makefile16
-rw-r--r--usr.bin/signify/crypto_api.c30
-rw-r--r--usr.bin/signify/crypto_api.h39
-rw-r--r--usr.bin/signify/fe25519.c335
-rw-r--r--usr.bin/signify/fe25519.h70
-rw-r--r--usr.bin/signify/ge25519.h43
-rw-r--r--usr.bin/signify/ge25519_base.data858
-rw-r--r--usr.bin/signify/mod_ed25519.c143
-rw-r--r--usr.bin/signify/mod_ge25519.c327
-rw-r--r--usr.bin/signify/sc25519.c306
-rw-r--r--usr.bin/signify/sc25519.h80
-rw-r--r--usr.bin/signify/signify.1206
-rw-r--r--usr.bin/signify/signify.c918
-rw-r--r--usr.bin/signify/signify.h33
-rw-r--r--usr.bin/signify/zsig.c317
243 files changed, 91034 insertions, 9 deletions
diff --git a/.gitignore b/.gitignore
index 5bacaf8..e94c0fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,11 @@
*.a
*.o
-diff
-doas
-m4
-mandoc
-md5
-nc
-patch
-pax
-signify
+/diff
+/doas
+/m4
+/mandoc
+/md5
+/nc
+/patch
+/pax
+/signify
diff --git a/bin/md5/CVS/Entries b/bin/md5/CVS/Entries
new file mode 100644
index 0000000..86ca07f
--- /dev/null
+++ b/bin/md5/CVS/Entries
@@ -0,0 +1,7 @@
+/Makefile/1.15/Wed Mar 30 06:38:40 2016//
+/cksum.1/1.39/Sat Sep 3 17:01:01 2016//
+/crc.c/1.5/Fri Jan 25 00:19:25 2019//
+/crc.h/1.4/Fri Jan 25 00:19:25 2019//
+/md5.1/1.48/Fri Jan 25 00:19:25 2019//
+/md5.c/1.95/Sat May 18 16:53:39 2019//
+D
diff --git a/bin/md5/CVS/Repository b/bin/md5/CVS/Repository
new file mode 100644
index 0000000..0c71c98
--- /dev/null
+++ b/bin/md5/CVS/Repository
@@ -0,0 +1 @@
+src/bin/md5
diff --git a/bin/md5/CVS/Root b/bin/md5/CVS/Root
new file mode 100644
index 0000000..3811072
--- /dev/null
+++ b/bin/md5/CVS/Root
@@ -0,0 +1 @@
+/cvs
diff --git a/bin/md5/Makefile b/bin/md5/Makefile
new file mode 100644
index 0000000..c094424
--- /dev/null
+++ b/bin/md5/Makefile
@@ -0,0 +1,14 @@
+# $OpenBSD: Makefile,v 1.15 2016/03/30 06:38:40 jmc Exp $
+
+PROG= md5
+SRCS= crc.c md5.c
+MAN= cksum.1 md5.1
+LINKS= ${BINDIR}/md5 ${BINDIR}/sha1 \
+ ${BINDIR}/md5 ${BINDIR}/sha256 \
+ ${BINDIR}/md5 ${BINDIR}/sha512 \
+ ${BINDIR}/md5 ${BINDIR}/cksum
+
+CPPFLAGS+= -I${.CURDIR}
+COPTS+= -Wall -Wconversion -Wmissing-prototypes
+
+.include <bsd.prog.mk>
diff --git a/bin/md5/cksum.1 b/bin/md5/cksum.1
new file mode 100644
index 0000000..af52a4a
--- /dev/null
+++ b/bin/md5/cksum.1
@@ -0,0 +1,191 @@
+.\" $OpenBSD: cksum.1,v 1.39 2016/09/03 17:01:01 tedu Exp $
+.\"
+.\" Copyright (c) 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)cksum.1 8.2 (Berkeley) 4/28/95
+.\"
+.Dd $Mdocdate: September 3 2016 $
+.Dt CKSUM 1
+.Os
+.Sh NAME
+.Nm cksum
+.Nd display file checksums and block counts
+.Sh SYNOPSIS
+.Nm cksum
+.Bk -words
+.Op Fl bcpqrtx
+.Op Fl a Ar algorithms
+.Op Fl C Ar checklist
+.Op Fl h Ar hashfile
+.Op Fl s Ar string
+.Op Ar
+.Ek
+.Sh DESCRIPTION
+The
+.Nm cksum
+utility writes to the standard output a single line for each input file.
+The format of this line varies with the algorithm being used as follows:
+.Bl -tag -width allxothers
+.It cksum
+The output line consists of three whitespace separated fields:
+a CRC checksum, the number of octets in the input,
+and name of the file or string.
+If no file name is specified, the standard input is used and no file name
+is written.
+.It all others
+The output line consists of four whitespace separated fields:
+the name of the algorithm used, the name of the file or string in
+parentheses, an equals sign, and the cryptographic hash of the input.
+If no file name is specified, the standard input is used and only
+the cryptographic hash is output.
+.El
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl a Ar algorithms
+Use the specified algorithm(s) instead of the default (cksum).
+Supported algorithms include cksum, md5, rmd160, sha1,
+sha224, sha256, sha384, sha512/256, and sha512.
+Multiple algorithms may be specified, separated by a comma or whitespace.
+Additionally, multiple
+.Fl a
+options may be specified on the command line.
+Case is ignored when matching algorithms.
+The output format may be specified on a per-algorithm basis
+by using a single-character suffix, e.g.\&
+.Dq sha256b .
+If the algorithm has a
+.Sq b
+suffix, the checksum will be output in base64 format.
+If the algorithm has an
+.Sq x
+suffix, the checksum will be output in hex format.
+If an algorithm with the same output format is repeated,
+only the first instance is used.
+Note that output format suffixes are not supported
+for the cksum algorithm.
+.It Fl b
+Output checksums in base64 notation, not hexadecimal by
+default.
+A
+.Sq b
+or
+.Sq x
+suffix on the algorithm will override this default.
+This option is ignored for the cksum algorithm.
+.It Fl C Ar checklist
+Compare the checksum of each
+.Ar file
+against the checksums in the
+.Ar checklist .
+Any specified
+.Ar file
+that is not listed in the
+.Ar checklist
+will generate an error.
+.It Fl c
+If this option is specified, the
+.Ar file
+options become checklists.
+Each checklist should contain hash results in the normal format,
+which will be verified against the specified paths.
+Output consists of the digest used, the file name,
+and an OK, FAILED, or MISSING for the result of the comparison.
+This will validate any of the supported checksums.
+If no file is given, stdin is used.
+The
+.Fl c
+option may not be used in conjunction with more than a single
+.Fl a
+option.
+.It Fl h Ar hashfile
+Place the checksum into
+.Ar hashfile
+instead of stdout.
+.It Fl p
+Echoes stdin to stdout and appends the
+checksum to stdout.
+.It Fl q
+Only print the checksum (quiet mode) or if used in conjunction with the
+.Fl c
+flag, only print the failed cases.
+.It Fl r
+Reverse the format of the hash algorithm output, making
+it match the checksum output format.
+.It Fl s Ar string
+Prints a checksum of the given
+.Ar string .
+.It Fl t
+Runs a built-in time trial.
+Specifying
+.Fl t
+multiple times results in the number of rounds being multiplied
+by 10 for each additional flag.
+.It Fl x
+Runs a built-in test script.
+.El
+.Pp
+The default CRC used is based on the polynomial used for CRC error checking
+in the networking standard
+ISO/IEC 8802-3:1996.
+The other available algorithms are described in their respective
+man pages in section 3 of the manual.
+.Sh EXIT STATUS
+.Ex -std cksum
+.Sh SEE ALSO
+.Xr md5 1
+.Pp
+The default calculation is identical to that given in pseudo-code
+in the following ACM article:
+.Rs
+.%T "Computation of Cyclic Redundancy Checks Via Table Lookup"
+.%A Dilip V. Sarwate
+.%J "Communications of the ACM"
+.%D "August 1988"
+.Re
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+All the flags are extensions to that specification.
+.Sh HISTORY
+The
+.Nm cksum
+utility appeared in
+.Bx 4.4 .
+.Sh CAVEATS
+Do not use the cksum or md5 algorithms to verify file integrity.
+An attacker can trivially produce modified payload that
+has the same checksum as the original version.
+Use a cryptographic checksum instead.
diff --git a/bin/md5/crc.c b/bin/md5/crc.c
new file mode 100644
index 0000000..19ea317
--- /dev/null
+++ b/bin/md5/crc.c
@@ -0,0 +1,137 @@
+/* $OpenBSD: crc.c,v 1.5 2019/01/25 00:19:25 millert Exp $ */
+
+/*
+ * Copyright (c) 2004 Todd C. Miller <millert@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "crc.h"
+
+/*
+ * Table-driven version of the following polynomial from POSIX 1003.2:
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 +
+ * x^7 + x^5 + x^4 + x^2 + x + 1
+ */
+static const u_int32_t crc32tab[] = {
+ 0x00000000U,
+ 0x04c11db7U, 0x09823b6eU, 0x0d4326d9U, 0x130476dcU, 0x17c56b6bU,
+ 0x1a864db2U, 0x1e475005U, 0x2608edb8U, 0x22c9f00fU, 0x2f8ad6d6U,
+ 0x2b4bcb61U, 0x350c9b64U, 0x31cd86d3U, 0x3c8ea00aU, 0x384fbdbdU,
+ 0x4c11db70U, 0x48d0c6c7U, 0x4593e01eU, 0x4152fda9U, 0x5f15adacU,
+ 0x5bd4b01bU, 0x569796c2U, 0x52568b75U, 0x6a1936c8U, 0x6ed82b7fU,
+ 0x639b0da6U, 0x675a1011U, 0x791d4014U, 0x7ddc5da3U, 0x709f7b7aU,
+ 0x745e66cdU, 0x9823b6e0U, 0x9ce2ab57U, 0x91a18d8eU, 0x95609039U,
+ 0x8b27c03cU, 0x8fe6dd8bU, 0x82a5fb52U, 0x8664e6e5U, 0xbe2b5b58U,
+ 0xbaea46efU, 0xb7a96036U, 0xb3687d81U, 0xad2f2d84U, 0xa9ee3033U,
+ 0xa4ad16eaU, 0xa06c0b5dU, 0xd4326d90U, 0xd0f37027U, 0xddb056feU,
+ 0xd9714b49U, 0xc7361b4cU, 0xc3f706fbU, 0xceb42022U, 0xca753d95U,
+ 0xf23a8028U, 0xf6fb9d9fU, 0xfbb8bb46U, 0xff79a6f1U, 0xe13ef6f4U,
+ 0xe5ffeb43U, 0xe8bccd9aU, 0xec7dd02dU, 0x34867077U, 0x30476dc0U,
+ 0x3d044b19U, 0x39c556aeU, 0x278206abU, 0x23431b1cU, 0x2e003dc5U,
+ 0x2ac12072U, 0x128e9dcfU, 0x164f8078U, 0x1b0ca6a1U, 0x1fcdbb16U,
+ 0x018aeb13U, 0x054bf6a4U, 0x0808d07dU, 0x0cc9cdcaU, 0x7897ab07U,
+ 0x7c56b6b0U, 0x71159069U, 0x75d48ddeU, 0x6b93dddbU, 0x6f52c06cU,
+ 0x6211e6b5U, 0x66d0fb02U, 0x5e9f46bfU, 0x5a5e5b08U, 0x571d7dd1U,
+ 0x53dc6066U, 0x4d9b3063U, 0x495a2dd4U, 0x44190b0dU, 0x40d816baU,
+ 0xaca5c697U, 0xa864db20U, 0xa527fdf9U, 0xa1e6e04eU, 0xbfa1b04bU,
+ 0xbb60adfcU, 0xb6238b25U, 0xb2e29692U, 0x8aad2b2fU, 0x8e6c3698U,
+ 0x832f1041U, 0x87ee0df6U, 0x99a95df3U, 0x9d684044U, 0x902b669dU,
+ 0x94ea7b2aU, 0xe0b41de7U, 0xe4750050U, 0xe9362689U, 0xedf73b3eU,
+ 0xf3b06b3bU, 0xf771768cU, 0xfa325055U, 0xfef34de2U, 0xc6bcf05fU,
+ 0xc27dede8U, 0xcf3ecb31U, 0xcbffd686U, 0xd5b88683U, 0xd1799b34U,
+ 0xdc3abdedU, 0xd8fba05aU, 0x690ce0eeU, 0x6dcdfd59U, 0x608edb80U,
+ 0x644fc637U, 0x7a089632U, 0x7ec98b85U, 0x738aad5cU, 0x774bb0ebU,
+ 0x4f040d56U, 0x4bc510e1U, 0x46863638U, 0x42472b8fU, 0x5c007b8aU,
+ 0x58c1663dU, 0x558240e4U, 0x51435d53U, 0x251d3b9eU, 0x21dc2629U,
+ 0x2c9f00f0U, 0x285e1d47U, 0x36194d42U, 0x32d850f5U, 0x3f9b762cU,
+ 0x3b5a6b9bU, 0x0315d626U, 0x07d4cb91U, 0x0a97ed48U, 0x0e56f0ffU,
+ 0x1011a0faU, 0x14d0bd4dU, 0x19939b94U, 0x1d528623U, 0xf12f560eU,
+ 0xf5ee4bb9U, 0xf8ad6d60U, 0xfc6c70d7U, 0xe22b20d2U, 0xe6ea3d65U,
+ 0xeba91bbcU, 0xef68060bU, 0xd727bbb6U, 0xd3e6a601U, 0xdea580d8U,
+ 0xda649d6fU, 0xc423cd6aU, 0xc0e2d0ddU, 0xcda1f604U, 0xc960ebb3U,
+ 0xbd3e8d7eU, 0xb9ff90c9U, 0xb4bcb610U, 0xb07daba7U, 0xae3afba2U,
+ 0xaafbe615U, 0xa7b8c0ccU, 0xa379dd7bU, 0x9b3660c6U, 0x9ff77d71U,
+ 0x92b45ba8U, 0x9675461fU, 0x8832161aU, 0x8cf30badU, 0x81b02d74U,
+ 0x857130c3U, 0x5d8a9099U, 0x594b8d2eU, 0x5408abf7U, 0x50c9b640U,
+ 0x4e8ee645U, 0x4a4ffbf2U, 0x470cdd2bU, 0x43cdc09cU, 0x7b827d21U,
+ 0x7f436096U, 0x7200464fU, 0x76c15bf8U, 0x68860bfdU, 0x6c47164aU,
+ 0x61043093U, 0x65c52d24U, 0x119b4be9U, 0x155a565eU, 0x18197087U,
+ 0x1cd86d30U, 0x029f3d35U, 0x065e2082U, 0x0b1d065bU, 0x0fdc1becU,
+ 0x3793a651U, 0x3352bbe6U, 0x3e119d3fU, 0x3ad08088U, 0x2497d08dU,
+ 0x2056cd3aU, 0x2d15ebe3U, 0x29d4f654U, 0xc5a92679U, 0xc1683bceU,
+ 0xcc2b1d17U, 0xc8ea00a0U, 0xd6ad50a5U, 0xd26c4d12U, 0xdf2f6bcbU,
+ 0xdbee767cU, 0xe3a1cbc1U, 0xe760d676U, 0xea23f0afU, 0xeee2ed18U,
+ 0xf0a5bd1dU, 0xf464a0aaU, 0xf9278673U, 0xfde69bc4U, 0x89b8fd09U,
+ 0x8d79e0beU, 0x803ac667U, 0x84fbdbd0U, 0x9abc8bd5U, 0x9e7d9662U,
+ 0x933eb0bbU, 0x97ffad0cU, 0xafb010b1U, 0xab710d06U, 0xa6322bdfU,
+ 0xa2f33668U, 0xbcb4666dU, 0xb8757bdaU, 0xb5365d03U, 0xb1f740b4U
+};
+
+void
+CKSUM_Init(CKSUM_CTX *ctx)
+{
+ ctx->crc = 0;
+ ctx->len = 0;
+}
+
+#define UPDATE(crc, byte) do \
+ (crc) = ((crc) << 8) ^ crc32tab[((crc) >> 24) ^ (byte)]; \
+while(0)
+
+void
+CKSUM_Update(CKSUM_CTX *ctx, const unsigned char *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ UPDATE(ctx->crc, buf[i]);
+ ctx->len += len;
+}
+
+void
+CKSUM_Final(CKSUM_CTX *ctx)
+{
+ off_t len = ctx->len;
+
+ /* add in number of bytes read and finish */
+ while (len != 0) {
+ UPDATE(ctx->crc, len & 0xff);
+ len >>= 8;
+ }
+ ctx->crc = ~ctx->crc;
+}
+
+char *
+CKSUM_End(CKSUM_CTX *ctx, char *outstr)
+{
+ CKSUM_Final(ctx);
+
+ if (outstr == NULL) {
+ if (asprintf(&outstr, "%u %lld", ctx->crc, ctx->len) == -1)
+ return (NULL);
+ } else {
+ (void)snprintf(outstr, (size_t)CKSUM_DIGEST_STRING_LENGTH,
+ "%u %lld", ctx->crc, ctx->len);
+ }
+
+ return (outstr);
+}
diff --git a/bin/md5/crc.h b/bin/md5/crc.h
new file mode 100644
index 0000000..2000c48
--- /dev/null
+++ b/bin/md5/crc.h
@@ -0,0 +1,31 @@
+/* $OpenBSD: crc.h,v 1.4 2019/01/25 00:19:25 millert Exp $ */
+
+/*
+ * Copyright (c) 2004 Todd C. Miller <millert@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define CKSUM_DIGEST_LENGTH 4
+#define CKSUM_DIGEST_STRING_LENGTH (10 + 1 + 20 + 1)
+
+typedef struct CKSUMContext {
+ u_int32_t crc;
+ off_t len;
+} CKSUM_CTX;
+
+void CKSUM_Init(CKSUM_CTX *);
+void CKSUM_Update(CKSUM_CTX *, const u_int8_t *, size_t);
+void CKSUM_Final(CKSUM_CTX *);
+char *CKSUM_End(CKSUM_CTX *, char *);
+char *CKSUM_Data(const u_int8_t *, size_t, char *);
diff --git a/bin/md5/md5.1 b/bin/md5/md5.1
new file mode 100644
index 0000000..fa860eb
--- /dev/null
+++ b/bin/md5/md5.1
@@ -0,0 +1,142 @@
+.\" $OpenBSD: md5.1,v 1.48 2019/01/25 00:19:25 millert Exp $
+.\"
+.\" Copyright (c) 2003, 2004, 2006 Todd C. Miller <millert@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.\" Sponsored in part by the Defense Advanced Research Projects
+.\" Agency (DARPA) and Air Force Research Laboratory, Air Force
+.\" Materiel Command, USAF, under agreement number F39502-99-1-0512.
+.\"
+.Dd $Mdocdate: January 25 2019 $
+.Dt MD5 1
+.Os
+.Sh NAME
+.Nm md5 ,
+.Nm sha1 ,
+.Nm sha256 ,
+.Nm sha512
+.Nd calculate a message digest (checksum) for a file
+.Sh SYNOPSIS
+.Nm md5
+.Op Fl bcpqrtx
+.Op Fl C Ar checklist
+.Op Fl h Ar hashfile
+.Op Fl s Ar string
+.Op Ar
+.Nm sha1
+.Op Fl bcpqrtx
+.Op Fl C Ar checklist
+.Op Fl h Ar hashfile
+.Op Fl s Ar string
+.Op Ar
+.Nm sha256
+.Op Fl bcpqrtx
+.Op Fl C Ar checklist
+.Op Fl h Ar hashfile
+.Op Fl s Ar string
+.Op Ar
+.Nm sha512
+.Op Fl bcpqrtx
+.Op Fl C Ar checklist
+.Op Fl h Ar hashfile
+.Op Fl s Ar string
+.Op Ar
+.Sh DESCRIPTION
+These utilities take as input a message of arbitrary length and produce
+as output a message digest (checksum) of the input.
+.Pp
+Two messages having the same message digest (a collision) have been produced
+for MD5 and for SHA-1, so their use is deprecated.
+.Pp
+The options for use with each command are as follows:
+.Bl -tag -width Ds
+.It Fl b
+Output checksums in base64 notation, not hexadecimal.
+.It Fl C Ar checklist
+Compare the checksum of each
+.Ar file
+against the checksums in the
+.Ar checklist .
+Any specified
+.Ar file
+that is not listed in the
+.Ar checklist
+will generate an error.
+.It Fl c
+If this option is specified, the
+.Ar file
+options become checklists.
+Each checklist should contain hash results in the normal format,
+which will be verified against the specified paths.
+Output consists of the digest used, the file name,
+and an OK, FAILED, or MISSING for the result of the comparison.
+This will validate any of the supported checksums (see
+.Xr cksum 1 ) .
+If no file is given, stdin is used.
+.It Fl h Ar hashfile
+Place the checksum into
+.Ar hashfile
+instead of stdout.
+.It Fl p
+Echoes stdin to stdout and appends the
+checksum to stdout.
+.It Fl q
+Only print the checksum (quiet mode) or if used in conjunction with the
+.Fl c
+flag, only print the failed cases.
+.It Fl r
+Reverse the format of the hash algorithm output, making
+it match the output format used by
+.Xr cksum 1 .
+.It Fl s Ar string
+Prints a checksum of the given
+.Ar string .
+.It Fl t
+Runs a built-in time trial.
+Specifying
+.Fl t
+multiple times results in the number of rounds being multiplied
+by 10 for each additional flag.
+.It Fl x
+Runs a built-in test script.
+.El
+.Sh EXIT STATUS
+These utilities exit 0 on success,
+and \*(Gt0 if an error occurs.
+.Sh SEE ALSO
+.Xr cksum 1
+.Sh STANDARDS
+.Rs
+.%A R. Rivest
+.%D April 1992
+.%R RFC 1321
+.%T The MD5 Message-Digest Algorithm
+.Re
+.Rs
+.%A J. Burrows
+.%O FIPS PUB 180-1
+.%T The Secure Hash Standard
+.Re
+.Pp
+.Rs
+.%A D. Eastlake
+.%A P. Jones
+.%D September 2001
+.%R RFC 3174
+.%T US Secure Hash Algorithm 1 (SHA1)
+.Re
+.Rs
+.%T Secure Hash Standard
+.%O FIPS PUB 180-2
+.Re
diff --git a/bin/md5/md5.c b/bin/md5/md5.c
new file mode 100644
index 0000000..069b2f5
--- /dev/null
+++ b/bin/md5/md5.c
@@ -0,0 +1,852 @@
+/* $OpenBSD: md5.c,v 1.95 2019/05/18 16:53:39 otto Exp $ */
+
+/*
+ * Copyright (c) 2001,2003,2005-2007,2010,2013,2014
+ * Todd C. Miller <millert@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/queue.h>
+#include <sys/resource.h>
+#include <netinet/in.h>
+#include <ctype.h>
+#include <err.h>
+#include <fcntl.h>
+#include <resolv.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <md5.h>
+#include <rmd160.h>
+#include <sha1.h>
+#include <sha2.h>
+#include "crc.h"
+
+#define STYLE_MD5 0
+#define STYLE_CKSUM 1
+#define STYLE_TERSE 2
+
+#define MAX_DIGEST_LEN 128
+
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
+
+union ANY_CTX {
+#if !defined(SHA2_ONLY)
+ CKSUM_CTX cksum;
+ MD5_CTX md5;
+ RMD160_CTX rmd160;
+ SHA1_CTX sha1;
+#endif /* !defined(SHA2_ONLY) */
+ SHA2_CTX sha2;
+};
+
+struct hash_function {
+ const char *name;
+ size_t digestlen;
+ int style;
+ int base64;
+ void *ctx; /* XXX - only used by digest_file() */
+ void (*init)(void *);
+ void (*update)(void *, const unsigned char *, size_t);
+ void (*final)(unsigned char *, void *);
+ char * (*end)(void *, char *);
+ TAILQ_ENTRY(hash_function) tailq;
+} functions[] = {
+#if !defined(SHA2_ONLY)
+ {
+ "CKSUM",
+ CKSUM_DIGEST_LENGTH,
+ STYLE_CKSUM,
+ -1,
+ NULL,
+ (void (*)(void *))CKSUM_Init,
+ (void (*)(void *, const unsigned char *, size_t))CKSUM_Update,
+ (void (*)(unsigned char *, void *))CKSUM_Final,
+ (char *(*)(void *, char *))CKSUM_End
+ },
+ {
+ "MD5",
+ MD5_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))MD5Init,
+ (void (*)(void *, const unsigned char *, size_t))MD5Update,
+ (void (*)(unsigned char *, void *))MD5Final,
+ (char *(*)(void *, char *))MD5End
+ },
+ {
+ "RMD160",
+ RMD160_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))RMD160Init,
+ (void (*)(void *, const unsigned char *, size_t))RMD160Update,
+ (void (*)(unsigned char *, void *))RMD160Final,
+ (char *(*)(void *, char *))RMD160End
+ },
+ {
+ "SHA1",
+ SHA1_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))SHA1Init,
+ (void (*)(void *, const unsigned char *, size_t))SHA1Update,
+ (void (*)(unsigned char *, void *))SHA1Final,
+ (char *(*)(void *, char *))SHA1End
+ },
+ {
+ "SHA224",
+ SHA224_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))SHA224Init,
+ (void (*)(void *, const unsigned char *, size_t))SHA224Update,
+ (void (*)(unsigned char *, void *))SHA224Final,
+ (char *(*)(void *, char *))SHA224End
+ },
+#endif /* !defined(SHA2_ONLY) */
+ {
+ "SHA256",
+ SHA256_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))SHA256Init,
+ (void (*)(void *, const unsigned char *, size_t))SHA256Update,
+ (void (*)(unsigned char *, void *))SHA256Final,
+ (char *(*)(void *, char *))SHA256End
+ },
+#if !defined(SHA2_ONLY)
+ {
+ "SHA384",
+ SHA384_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))SHA384Init,
+ (void (*)(void *, const unsigned char *, size_t))SHA384Update,
+ (void (*)(unsigned char *, void *))SHA384Final,
+ (char *(*)(void *, char *))SHA384End
+ },
+ {
+ "SHA512/256",
+ SHA512_256_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))SHA512_256Init,
+ (void (*)(void *, const unsigned char *, size_t))SHA512_256Update,
+ (void (*)(unsigned char *, void *))SHA512_256Final,
+ (char *(*)(void *, char *))SHA512_256End
+ },
+#endif /* !defined(SHA2_ONLY) */
+ {
+ "SHA512",
+ SHA512_DIGEST_LENGTH,
+ STYLE_MD5,
+ 0,
+ NULL,
+ (void (*)(void *))SHA512Init,
+ (void (*)(void *, const unsigned char *, size_t))SHA512Update,
+ (void (*)(unsigned char *, void *))SHA512Final,
+ (char *(*)(void *, char *))SHA512End
+ },
+ {
+ NULL,
+ }
+};
+
+TAILQ_HEAD(hash_list, hash_function);
+
+void digest_end(const struct hash_function *, void *, char *, size_t, int);
+int digest_file(const char *, struct hash_list *, int);
+void digest_print(const struct hash_function *, const char *, const char *);
+#if !defined(SHA2_ONLY)
+int digest_filelist(const char *, struct hash_function *, int, char **);
+void digest_printstr(const struct hash_function *, const char *, const char *);
+void digest_string(char *, struct hash_list *);
+void digest_test(struct hash_list *);
+void digest_time(struct hash_list *, int);
+#endif /* !defined(SHA2_ONLY) */
+void hash_insert(struct hash_list *, struct hash_function *, int);
+void usage(void) __attribute__((__noreturn__));
+
+extern char *__progname;
+int qflag = 0;
+FILE *ofile = NULL;
+
+int
+main(int argc, char **argv)
+{
+ struct hash_function *hf, *hftmp;
+ struct hash_list hl;
+ size_t len;
+ char *cp, *input_string, *selective_checklist;
+ const char *optstr;
+ int fl, error, base64;
+ int bflag, cflag, pflag, rflag, tflag, xflag;
+
+ if (pledge("stdio rpath wpath cpath", NULL) == -1)
+ err(1, "pledge");
+
+ TAILQ_INIT(&hl);
+ input_string = NULL;
+ selective_checklist = NULL;
+ error = bflag = cflag = pflag = qflag = rflag = tflag = xflag = 0;
+
+#if !defined(SHA2_ONLY)
+ if (strcmp(__progname, "cksum") == 0)
+ optstr = "a:bC:ch:pqrs:tx";
+ else
+#endif /* !defined(SHA2_ONLY) */
+ optstr = "bC:ch:pqrs:tx";
+
+ /* Check for -b option early since it changes behavior. */
+ while ((fl = getopt(argc, argv, optstr)) != -1) {
+ switch (fl) {
+ case 'b':
+ bflag = 1;
+ break;
+ case '?':
+ usage();
+ }
+ }
+ optind = 1;
+ optreset = 1;
+ while ((fl = getopt(argc, argv, optstr)) != -1) {
+ switch (fl) {
+ case 'a':
+ while ((cp = strsep(&optarg, " \t,")) != NULL) {
+ if (*cp == '\0')
+ continue;
+ base64 = -1;
+ for (hf = functions; hf->name != NULL; hf++) {
+ len = strlen(hf->name);
+ if (strncasecmp(cp, hf->name, len) != 0)
+ continue;
+ if (cp[len] == '\0') {
+ if (hf->base64 != -1)
+ base64 = bflag;
+ break; /* exact match */
+ }
+ if (cp[len + 1] == '\0' &&
+ (cp[len] == 'b' || cp[len] == 'x')) {
+ base64 =
+ cp[len] == 'b' ? 1 : 0;
+ break; /* match w/ suffix */
+ }
+ }
+ if (hf->name == NULL) {
+ warnx("unknown algorithm \"%s\"", cp);
+ usage();
+ }
+ if (hf->base64 == -1 && base64 != -1) {
+ warnx("%s doesn't support %s",
+ hf->name,
+ base64 ? "base64" : "hex");
+ usage();
+ }
+ /* Check for dupes. */
+ TAILQ_FOREACH(hftmp, &hl, tailq) {
+ if (hftmp->base64 == base64 &&
+ strcmp(hf->name, hftmp->name) == 0)
+ break;
+ }
+ if (hftmp == NULL)
+ hash_insert(&hl, hf, base64);
+ }
+ break;
+ case 'b':
+ /* has already been parsed */
+ break;
+ case 'h':
+ ofile = fopen(optarg, "w");
+ if (ofile == NULL)
+ err(1, "%s", optarg);
+ break;
+#if !defined(SHA2_ONLY)
+ case 'C':
+ selective_checklist = optarg;
+ break;
+ case 'c':
+ cflag = 1;
+ break;
+#endif /* !defined(SHA2_ONLY) */
+ case 'p':
+ pflag = 1;
+ break;
+ case 'q':
+ qflag = 1;
+ break;
+ case 'r':
+ rflag = 1;
+ break;
+ case 's':
+ input_string = optarg;
+ break;
+ case 't':
+ tflag++;
+ break;
+ case 'x':
+ xflag = 1;
+ break;
+ default:
+ usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (ofile == NULL)
+ ofile = stdout;
+
+ if (pledge("stdio rpath", NULL) == -1)
+ err(1, "pledge");
+
+ /* Most arguments are mutually exclusive */
+ fl = pflag + (tflag ? 1 : 0) + xflag + cflag + (input_string != NULL);
+ if (fl > 1 || (fl && argc && cflag == 0) || (rflag && qflag) ||
+ (selective_checklist != NULL && argc == 0))
+ usage();
+ if (selective_checklist || cflag) {
+ if (TAILQ_FIRST(&hl) != TAILQ_LAST(&hl, hash_list))
+ errx(1, "only a single algorithm may be specified "
+ "in -C or -c mode");
+ }
+
+ /* No algorithm specified, check the name we were called as. */
+ if (TAILQ_EMPTY(&hl)) {
+ for (hf = functions; hf->name != NULL; hf++) {
+ if (strcasecmp(hf->name, __progname) == 0)
+ break;
+ }
+ if (hf->name == NULL)
+ hf = &functions[0]; /* default to cksum */
+ hash_insert(&hl, hf, (hf->base64 == -1 ? 0 : bflag));
+ }
+
+ if (rflag || qflag) {
+ const int new_style = rflag ? STYLE_CKSUM : STYLE_TERSE;
+ TAILQ_FOREACH(hf, &hl, tailq) {
+ hf->style = new_style;
+ }
+ }
+
+#if !defined(SHA2_ONLY)
+ if (tflag)
+ digest_time(&hl, tflag);
+ else if (xflag)
+ digest_test(&hl);
+ else if (input_string)
+ digest_string(input_string, &hl);
+ else if (selective_checklist) {
+ int i;
+
+ error = digest_filelist(selective_checklist, TAILQ_FIRST(&hl),
+ argc, argv);
+ for (i = 0; i < argc; i++) {
+ if (argv[i] != NULL) {
+ warnx("%s does not exist in %s", argv[i],
+ selective_checklist);
+ error++;
+ }
+ }
+ } else if (cflag) {
+ if (argc == 0)
+ error = digest_filelist("-", TAILQ_FIRST(&hl), 0, NULL);
+ else
+ while (argc--)
+ error += digest_filelist(*argv++,
+ TAILQ_FIRST(&hl), 0, NULL);
+ } else
+#endif /* !defined(SHA2_ONLY) */
+ if (pflag || argc == 0)
+ error = digest_file("-", &hl, pflag);
+ else
+ while (argc--)
+ error += digest_file(*argv++, &hl, 0);
+
+ return(error ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+void
+hash_insert(struct hash_list *hl, struct hash_function *hf, int base64)
+{
+ struct hash_function *hftmp;
+
+ hftmp = malloc(sizeof(*hftmp));
+ if (hftmp == NULL)
+ err(1, NULL);
+ *hftmp = *hf;
+ hftmp->base64 = base64;
+ TAILQ_INSERT_TAIL(hl, hftmp, tailq);
+}
+
+void
+digest_end(const struct hash_function *hf, void *ctx, char *buf, size_t bsize,
+ int base64)
+{
+ u_char *digest;
+
+ if (base64 == 1) {
+ if ((digest = malloc(hf->digestlen)) == NULL)
+ err(1, NULL);
+ hf->final(digest, ctx);
+ if (b64_ntop(digest, hf->digestlen, buf, bsize) == -1)
+ errx(1, "error encoding base64");
+ free(digest);
+ } else {
+ hf->end(ctx, buf);
+ }
+}
+
+#if !defined(SHA2_ONLY)
+void
+digest_string(char *string, struct hash_list *hl)
+{
+ struct hash_function *hf;
+ char digest[MAX_DIGEST_LEN + 1];
+ union ANY_CTX context;
+
+ TAILQ_FOREACH(hf, hl, tailq) {
+ hf->init(&context);
+ hf->update(&context, string, strlen(string));
+ digest_end(hf, &context, digest, sizeof(digest),
+ hf->base64);
+ digest_printstr(hf, string, digest);
+ }
+}
+#endif /* !defined(SHA2_ONLY) */
+
+void
+digest_print(const struct hash_function *hf, const char *what,
+ const char *digest)
+{
+ switch (hf->style) {
+ case STYLE_MD5:
+ (void)fprintf(ofile, "%s (%s) = %s\n", hf->name, what, digest);
+ break;
+ case STYLE_CKSUM:
+ (void)fprintf(ofile, "%s %s\n", digest, what);
+ break;
+ case STYLE_TERSE:
+ (void)fprintf(ofile, "%s\n", digest);
+ break;
+ }
+}
+
+#if !defined(SHA2_ONLY)
+void
+digest_printstr(const struct hash_function *hf, const char *what,
+ const char *digest)
+{
+ switch (hf->style) {
+ case STYLE_MD5:
+ (void)fprintf(ofile, "%s (\"%s\") = %s\n", hf->name, what, digest);
+ break;
+ case STYLE_CKSUM:
+ (void)fprintf(ofile, "%s %s\n", digest, what);
+ break;
+ case STYLE_TERSE:
+ (void)fprintf(ofile, "%s\n", digest);
+ break;
+ }
+}
+#endif /* !defined(SHA2_ONLY) */
+
+int
+digest_file(const char *file, struct hash_list *hl, int echo)
+{
+ struct hash_function *hf;
+ FILE *fp;
+ size_t nread;
+ u_char data[32 * 1024];
+ char digest[MAX_DIGEST_LEN + 1];
+
+ if (strcmp(file, "-") == 0)
+ fp = stdin;
+ else if ((fp = fopen(file, "r")) == NULL) {
+ warn("cannot open %s", file);
+ return(1);
+ }
+
+ TAILQ_FOREACH(hf, hl, tailq) {
+ if ((hf->ctx = malloc(sizeof(union ANY_CTX))) == NULL)
+ err(1, NULL);
+ hf->init(hf->ctx);
+ }
+ while ((nread = fread(data, 1UL, sizeof(data), fp)) != 0) {
+ if (echo) {
+ (void)fwrite(data, nread, 1UL, stdout);
+ if (fflush(stdout) != 0)
+ err(1, "stdout: write error");
+ }
+ TAILQ_FOREACH(hf, hl, tailq)
+ hf->update(hf->ctx, data, nread);
+ }
+ if (ferror(fp)) {
+ warn("%s: read error", file);
+ if (fp != stdin)
+ fclose(fp);
+ TAILQ_FOREACH(hf, hl, tailq) {
+ free(hf->ctx);
+ hf->ctx = NULL;
+ }
+ return(1);
+ }
+ if (fp != stdin)
+ fclose(fp);
+ TAILQ_FOREACH(hf, hl, tailq) {
+ digest_end(hf, hf->ctx, digest, sizeof(digest), hf->base64);
+ free(hf->ctx);
+ hf->ctx = NULL;
+ if (fp == stdin)
+ fprintf(ofile, "%s\n", digest);
+ else
+ digest_print(hf, file, digest);
+ }
+ return(0);
+}
+
+#if !defined(SHA2_ONLY)
+/*
+ * Parse through the input file looking for valid lines.
+ * If one is found, use this checksum and file as a reference and
+ * generate a new checksum against the file on the filesystem.
+ * Print out the result of each comparison.
+ */
+int
+digest_filelist(const char *file, struct hash_function *defhash, int selcount,
+ char **sel)
+{
+ int found, base64, error, cmp, i;
+ size_t algorithm_max, algorithm_min;
+ const char *algorithm;
+ char *filename, *checksum, *line, *p, *tmpline;
+ char digest[MAX_DIGEST_LEN + 1];
+ ssize_t linelen;
+ FILE *listfp, *fp;
+ size_t len, linesize, nread;
+ int *sel_found = NULL;
+ u_char data[32 * 1024];
+ union ANY_CTX context;
+ struct hash_function *hf;
+
+ if (strcmp(file, "-") == 0) {
+ listfp = stdin;
+ } else if ((listfp = fopen(file, "r")) == NULL) {
+ warn("cannot open %s", file);
+ return(1);
+ }
+
+ if (sel != NULL) {
+ sel_found = calloc((size_t)selcount, sizeof(*sel_found));
+ if (sel_found == NULL)
+ err(1, NULL);
+ }
+
+ algorithm_max = algorithm_min = strlen(functions[0].name);
+ for (hf = &functions[1]; hf->name != NULL; hf++) {
+ len = strlen(hf->name);
+ algorithm_max = MAXIMUM(algorithm_max, len);
+ algorithm_min = MINIMUM(algorithm_min, len);
+ }
+
+ error = found = 0;
+ line = NULL;
+ linesize = 0;
+ while ((linelen = getline(&line, &linesize, listfp)) != -1) {
+ tmpline = line;
+ base64 = 0;
+ if (line[linelen - 1] == '\n')
+ line[linelen - 1] = '\0';
+ while (isspace((unsigned char)*tmpline))
+ tmpline++;
+
+ /*
+ * Crack the line into an algorithm, filename, and checksum.
+ * Lines are of the form:
+ * ALGORITHM (FILENAME) = CHECKSUM
+ *
+ * Fallback on GNU form:
+ * CHECKSUM FILENAME
+ */
+ p = strchr(tmpline, ' ');
+ if (p != NULL && *(p + 1) == '(') {
+ /* BSD form */
+ *p = '\0';
+ algorithm = tmpline;
+ len = strlen(algorithm);
+ if (len > algorithm_max || len < algorithm_min)
+ continue;
+
+ filename = p + 2;
+ p = strrchr(filename, ')');
+ if (p == NULL || strncmp(p + 1, " = ", (size_t)3) != 0)
+ continue;
+ *p = '\0';
+
+ checksum = p + 4;
+ p = strpbrk(checksum, " \t\r");
+ if (p != NULL)
+ *p = '\0';
+
+ /*
+ * Check that the algorithm is one we recognize.
+ */
+ for (hf = functions; hf->name != NULL; hf++) {
+ if (strcasecmp(algorithm, hf->name) == 0)
+ break;
+ }
+ if (hf->name == NULL || *checksum == '\0')
+ continue;
+ /*
+ * Check the length to see if this could be
+ * a valid checksum. If hex, it will be 2x the
+ * size of the binary data. For base64, we have
+ * to check both with and without the '=' padding.
+ */
+ len = strlen(checksum);
+ if (len != hf->digestlen * 2) {
+ size_t len2;
+
+ if (checksum[len - 1] == '=') {
+ /* use padding */
+ len2 = 4 * ((hf->digestlen + 2) / 3);
+ } else {
+ /* no padding */
+ len2 = (4 * hf->digestlen + 2) / 3;
+ }
+ if (len != len2)
+ continue;
+ base64 = 1;
+ }
+ } else {
+ /* could be GNU form */
+ if ((hf = defhash) == NULL)
+ continue;
+ algorithm = hf->name;
+ checksum = tmpline;
+ if ((p = strchr(checksum, ' ')) == NULL)
+ continue;
+ if (hf->style == STYLE_CKSUM) {
+ if ((p = strchr(p + 1, ' ')) == NULL)
+ continue;
+ }
+ *p++ = '\0';
+ while (isspace((unsigned char)*p))
+ p++;
+ if (*p == '\0')
+ continue;
+ filename = p;
+ p = strpbrk(filename, "\t\r");
+ if (p != NULL)
+ *p = '\0';
+ }
+ found = 1;
+
+ /*
+ * If only a selection of files is wanted, proceed only
+ * if the filename matches one of those in the selection.
+ */
+ if (sel != NULL) {
+ for (i = 0; i < selcount; i++) {
+ if (strcmp(sel[i], filename) == 0) {
+ sel_found[i] = 1;
+ break;
+ }
+ }
+ if (i == selcount)
+ continue;
+ }
+
+ if ((fp = fopen(filename, "r")) == NULL) {
+ warn("cannot open %s", filename);
+ (void)printf("(%s) %s: %s\n", algorithm, filename,
+ (errno == ENOENT ? "MISSING" : "FAILED"));
+ error = 1;
+ continue;
+ }
+
+ hf->init(&context);
+ while ((nread = fread(data, 1UL, sizeof(data), fp)) > 0)
+ hf->update(&context, data, nread);
+ if (ferror(fp)) {
+ warn("%s: read error", file);
+ error = 1;
+ fclose(fp);
+ continue;
+ }
+ fclose(fp);
+ digest_end(hf, &context, digest, sizeof(digest), base64);
+
+ if (base64)
+ cmp = strncmp(checksum, digest, len);
+ else
+ cmp = strcasecmp(checksum, digest);
+ if (cmp == 0) {
+ if (qflag == 0)
+ (void)printf("(%s) %s: OK\n", algorithm,
+ filename);
+ } else {
+ (void)printf("(%s) %s: FAILED\n", algorithm, filename);
+ error = 1;
+ }
+ }
+ free(line);
+ if (ferror(listfp)) {
+ warn("%s: getline", file);
+ error = 1;
+ }
+ if (listfp != stdin)
+ fclose(listfp);
+ if (!found)
+ warnx("%s: no properly formatted checksum lines found", file);
+ if (sel_found != NULL) {
+ /*
+ * Mark found files by setting them to NULL so that we can
+ * detect files that are missing from the checklist later.
+ */
+ for (i = 0; i < selcount; i++) {
+ if (sel_found[i])
+ sel[i] = NULL;
+ }
+ free(sel_found);
+ }
+ return(error || !found);
+}
+
+#define TEST_BLOCK_LEN 10000
+#define TEST_BLOCK_COUNT 10000
+
+void
+digest_time(struct hash_list *hl, int times)
+{
+ struct hash_function *hf;
+ struct rusage start, stop;
+ struct timeval res;
+ union ANY_CTX context;
+ u_int i;
+ u_char data[TEST_BLOCK_LEN];
+ char digest[MAX_DIGEST_LEN + 1];
+ double elapsed;
+ int count = TEST_BLOCK_COUNT;
+ while (--times > 0 && count < INT_MAX / 10)
+ count *= 10;
+
+ TAILQ_FOREACH(hf, hl, tailq) {
+ (void)printf("%s time trial. Processing %d %d-byte blocks...",
+ hf->name, count, TEST_BLOCK_LEN);
+ fflush(stdout);
+
+ /* Initialize data based on block number. */
+ for (i = 0; i < TEST_BLOCK_LEN; i++)
+ data[i] = (u_char)(i & 0xff);
+
+ getrusage(RUSAGE_SELF, &start);
+ hf->init(&context);
+ for (i = 0; i < count; i++)
+ hf->update(&context, data, (size_t)TEST_BLOCK_LEN);
+ digest_end(hf, &context, digest, sizeof(digest), hf->base64);
+ getrusage(RUSAGE_SELF, &stop);
+ timersub(&stop.ru_utime, &start.ru_utime, &res);
+ elapsed = res.tv_sec + res.tv_usec / 1000000.0;
+
+ (void)printf("\nDigest = %s\n", digest);
+ (void)printf("Time = %f seconds\n", elapsed);
+ (void)printf("Speed = %f bytes/second\n",
+ (double)TEST_BLOCK_LEN * count / elapsed);
+ }
+}
+
+void
+digest_test(struct hash_list *hl)
+{
+ struct hash_function *hf;
+ union ANY_CTX context;
+ int i;
+ char digest[MAX_DIGEST_LEN + 1];
+ unsigned char buf[1000];
+ unsigned const char *test_strings[] = {
+ "",
+ "a",
+ "abc",
+ "message digest",
+ "abcdefghijklmnopqrstuvwxyz",
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789",
+ "12345678901234567890123456789012345678901234567890123456789"
+ "012345678901234567890",
+ };
+
+ TAILQ_FOREACH(hf, hl, tailq) {
+ (void)printf("%s test suite:\n", hf->name);
+
+ for (i = 0; i < 8; i++) {
+ hf->init(&context);
+ hf->update(&context, test_strings[i],
+ strlen(test_strings[i]));
+ digest_end(hf, &context, digest, sizeof(digest),
+ hf->base64);
+ digest_printstr(hf, test_strings[i], digest);
+ }
+
+ /* Now simulate a string of a million 'a' characters. */
+ memset(buf, 'a', sizeof(buf));
+ hf->init(&context);
+ for (i = 0; i < 1000; i++)
+ hf->update(&context, buf, sizeof(buf));
+ digest_end(hf, &context, digest, sizeof(digest), hf->base64);
+ digest_print(hf, "one million 'a' characters",
+ digest);
+ }
+}
+#endif /* !defined(SHA2_ONLY) */
+
+void
+usage(void)
+{
+#if !defined(SHA2_ONLY)
+ if (strcmp(__progname, "cksum") == 0)
+ fprintf(stderr, "usage: %s [-bcpqrtx] [-a algorithms] [-C checklist] "
+ "[-h hashfile]\n"
+ " [-s string] [file ...]\n",
+ __progname);
+ else
+#endif /* !defined(SHA2_ONLY) */
+ fprintf(stderr, "usage:"
+ "\t%s [-bcpqrtx] [-C checklist] [-h hashfile] [-s string] "
+ "[file ...]\n",
+ __progname);
+
+ exit(EXIT_FAILURE);
+}
diff --git a/bin/pax/CVS/Entries b/bin/pax/CVS/Entries
new file mode 100644
index 0000000..0ba2835
--- /dev/null
+++ b/bin/pax/CVS/Entries
@@ -0,0 +1,24 @@
+/Makefile/1.13/Thu Sep 13 12:33:43 2018//
+/ar_io.c/1.63/Fri Jun 28 13:34:59 2019//
+/ar_subs.c/1.49/Fri Jun 28 13:34:59 2019//
+/buf_subs.c/1.31/Fri Jun 28 13:34:59 2019//
+/cpio.1/1.36/Thu Jan 16 16:46:46 2020//
+/cpio.c/1.33/Sat Sep 16 07:42:34 2017//
+/cpio.h/1.4/Mon Jun 2 23:32:08 2003//
+/extern.h/1.60/Mon Mar 23 20:04:19 2020//
+/file_subs.c/1.55/Mon Mar 23 20:04:19 2020//
+/ftree.c/1.42/Fri Jun 28 13:34:59 2019//
+/gen_subs.c/1.32/Fri Aug 26 05:06:14 2016//
+/getoldopt.c/1.9/Tue Oct 27 23:59:22 2009//
+/options.c/1.103/Fri Nov 15 20:34:17 2019//
+/pat_rep.c/1.43/Sat Sep 16 07:42:34 2017//
+/pax.1/1.75/Thu Jan 16 16:46:46 2020//
+/pax.c/1.53/Fri Jun 28 13:34:59 2019//
+/pax.h/1.29/Tue Sep 12 17:11:11 2017//
+/sel_subs.c/1.28/Mon Jun 24 03:33:09 2019//
+/tables.c/1.54/Fri Jun 28 05:35:34 2019//
+/tar.1/1.62/Thu Jan 16 16:46:46 2020//
+/tar.c/1.68/Mon Jun 24 03:33:09 2019//
+/tar.h/1.9/Wed Jan 8 06:43:34 2014//
+/tty_subs.c/1.17/Fri Aug 26 04:22:13 2016//
+D
diff --git a/bin/pax/CVS/Repository b/bin/pax/CVS/Repository
new file mode 100644
index 0000000..19b1a65
--- /dev/null
+++ b/bin/pax/CVS/Repository
@@ -0,0 +1 @@
+src/bin/pax
diff --git a/bin/pax/CVS/Root b/bin/pax/CVS/Root
new file mode 100644
index 0000000..3811072
--- /dev/null
+++ b/bin/pax/CVS/Root
@@ -0,0 +1 @@
+/cvs
diff --git a/bin/pax/Makefile b/bin/pax/Makefile
new file mode 100644
index 0000000..5dd36e2
--- /dev/null
+++ b/bin/pax/Makefile
@@ -0,0 +1,11 @@
+# $OpenBSD: Makefile,v 1.13 2018/09/13 12:33:43 millert Exp $
+
+WARNINGS=Yes
+PROG= pax
+SRCS= ar_io.c ar_subs.c buf_subs.c cpio.c file_subs.c ftree.c\
+ gen_subs.c getoldopt.c options.c pat_rep.c pax.c sel_subs.c tables.c\
+ tar.c tty_subs.c
+MAN= pax.1 tar.1 cpio.1
+LINKS= ${BINDIR}/pax ${BINDIR}/tar ${BINDIR}/pax ${BINDIR}/cpio
+
+.include <bsd.prog.mk>
diff --git a/bin/pax/ar_io.c b/bin/pax/ar_io.c
new file mode 100644
index 0000000..ddbd36e
--- /dev/null
+++ b/bin/pax/ar_io.c
@@ -0,0 +1,1288 @@
+/* $OpenBSD: ar_io.c,v 1.63 2019/06/28 13:34:59 deraadt Exp $ */
+/* $NetBSD: ar_io.c,v 1.5 1996/03/26 23:54:13 mrg Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mtio.h>
+#include <sys/wait.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * Routines which deal directly with the archive I/O device/file.
+ */
+
+#define DMOD 0666 /* default mode of created archives */
+#define EXT_MODE O_RDONLY /* open mode for list/extract */
+#define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */
+#define APP_MODE O_RDWR /* mode for append */
+#define STDO "<STDOUT>" /* pseudo name for stdout */
+#define STDN "<STDIN>" /* pseudo name for stdin */
+static int arfd = -1; /* archive file descriptor */
+static int artyp = ISREG; /* archive type: file/FIFO/tape */
+static int arvol = 1; /* archive volume number */
+static int lstrval = -1; /* return value from last i/o */
+static int io_ok; /* i/o worked on volume after resync */
+static int did_io; /* did i/o ever occur on volume? */
+static int done; /* set via tty termination */
+static struct stat arsb; /* stat of archive device at open */
+static int invld_rec; /* tape has out of spec record size */
+static int wr_trail = 1; /* trailer was rewritten in append */
+static int can_unlnk = 0; /* do we unlink null archives? */
+const char *arcname; /* printable name of archive */
+const char *gzip_program; /* name of gzip program */
+static pid_t zpid = -1; /* pid of child process */
+int force_one_volume; /* 1 if we ignore volume changes */
+
+static int get_phys(void);
+extern sigset_t s_mask;
+static void ar_start_gzip(int, const char *, int);
+
+/*
+ * ar_open()
+ * Opens the next archive volume. Determines the type of the device and
+ * sets up block sizes as required by the archive device and the format.
+ * Note: we may be called with name == NULL on the first open only.
+ * Return:
+ * -1 on failure, 0 otherwise
+ */
+
+int
+ar_open(const char *name)
+{
+ struct mtget mb;
+
+ if (arfd != -1)
+ (void)close(arfd);
+ arfd = -1;
+ can_unlnk = did_io = io_ok = invld_rec = 0;
+ artyp = ISREG;
+ flcnt = 0;
+
+ /*
+ * open based on overall operation mode
+ */
+ switch (act) {
+ case LIST:
+ case EXTRACT:
+ if (name == NULL) {
+ arfd = STDIN_FILENO;
+ arcname = STDN;
+ } else if ((arfd = open(name, EXT_MODE, DMOD)) == -1)
+ syswarn(1, errno, "Failed open to read on %s", name);
+ if (arfd != -1 && gzip_program != NULL)
+ ar_start_gzip(arfd, gzip_program, 0);
+ break;
+ case ARCHIVE:
+ if (name == NULL) {
+ arfd = STDOUT_FILENO;
+ arcname = STDO;
+ } else if ((arfd = open(name, AR_MODE, DMOD)) == -1)
+ syswarn(1, errno, "Failed open to write on %s", name);
+ else
+ can_unlnk = 1;
+ if (arfd != -1 && gzip_program != NULL)
+ ar_start_gzip(arfd, gzip_program, 1);
+ break;
+ case APPND:
+ if (name == NULL) {
+ arfd = STDOUT_FILENO;
+ arcname = STDO;
+ } else if ((arfd = open(name, APP_MODE, DMOD)) == -1)
+ syswarn(1, errno, "Failed open to read/write on %s",
+ name);
+ break;
+ case COPY:
+ /*
+ * arfd not used in COPY mode
+ */
+ arcname = "<NONE>";
+ lstrval = 1;
+ return(0);
+ }
+ if (arfd < 0)
+ return(-1);
+
+ if (chdname != NULL)
+ if (chdir(chdname) == -1) {
+ syswarn(1, errno, "Failed chdir to %s", chdname);
+ return(-1);
+ }
+ /*
+ * set up is based on device type
+ */
+ if (fstat(arfd, &arsb) == -1) {
+ syswarn(1, errno, "Failed stat on %s", arcname);
+ (void)close(arfd);
+ arfd = -1;
+ can_unlnk = 0;
+ return(-1);
+ }
+ if (S_ISDIR(arsb.st_mode)) {
+ paxwarn(0, "Cannot write an archive on top of a directory %s",
+ arcname);
+ (void)close(arfd);
+ arfd = -1;
+ can_unlnk = 0;
+ return(-1);
+ }
+
+ if (S_ISCHR(arsb.st_mode))
+ artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE;
+ else if (S_ISBLK(arsb.st_mode))
+ artyp = ISBLK;
+ else if ((lseek(arfd, 0, SEEK_CUR) == -1) && (errno == ESPIPE))
+ artyp = ISPIPE;
+ else
+ artyp = ISREG;
+
+ /*
+ * make sure beyond any doubt that we can unlink only regular files
+ * we created
+ */
+ if (artyp != ISREG)
+ can_unlnk = 0;
+ /*
+ * if we are writing, we are done
+ */
+ if (act == ARCHIVE) {
+ blksz = rdblksz = wrblksz;
+ lstrval = 1;
+ return(0);
+ }
+
+ /*
+ * set default blksz on read. APPNDs writes rdblksz on the last volume
+ * On all new archive volumes, we shift to wrblksz (if the user
+ * specified one, otherwise we will continue to use rdblksz). We
+ * must set blocksize based on what kind of device the archive is
+ * stored.
+ */
+ switch (artyp) {
+ case ISTAPE:
+ /*
+ * Tape drives come in at least two flavors. Those that support
+ * variable sized records and those that have fixed sized
+ * records. They must be treated differently. For tape drives
+ * that support variable sized records, we must make large
+ * reads to make sure we get the entire record, otherwise we
+ * will just get the first part of the record (up to size we
+ * asked). Tapes with fixed sized records may or may not return
+ * multiple records in a single read. We really do not care
+ * what the physical record size is UNLESS we are going to
+ * append. (We will need the physical block size to rewrite
+ * the trailer). Only when we are appending do we go to the
+ * effort to figure out the true PHYSICAL record size.
+ */
+ blksz = rdblksz = MAXBLK;
+ break;
+ case ISPIPE:
+ case ISBLK:
+ case ISCHR:
+ /*
+ * Blocksize is not a major issue with these devices (but must
+ * be kept a multiple of 512). If the user specified a write
+ * block size, we use that to read. Under append, we must
+ * always keep blksz == rdblksz. Otherwise we go ahead and use
+ * the device optimal blocksize as (and if) returned by stat
+ * and if it is within pax specs.
+ */
+ if ((act == APPND) && wrblksz) {
+ blksz = rdblksz = wrblksz;
+ break;
+ }
+
+ if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) &&
+ ((arsb.st_blksize % BLKMULT) == 0))
+ rdblksz = arsb.st_blksize;
+ else
+ rdblksz = DEVBLK;
+ /*
+ * For performance go for large reads when we can without harm
+ */
+ if ((act == APPND) || (artyp == ISCHR))
+ blksz = rdblksz;
+ else
+ blksz = MAXBLK;
+ break;
+ case ISREG:
+ /*
+ * if the user specified wrblksz works, use it. Under appends
+ * we must always keep blksz == rdblksz
+ */
+ if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){
+ blksz = rdblksz = wrblksz;
+ break;
+ }
+ /*
+ * See if we can find the blocking factor from the file size
+ */
+ for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT)
+ if ((arsb.st_size % rdblksz) == 0)
+ break;
+ /*
+ * When we cannot find a match, we may have a flawed archive.
+ */
+ if (rdblksz <= 0)
+ rdblksz = FILEBLK;
+ /*
+ * for performance go for large reads when we can
+ */
+ if (act == APPND)
+ blksz = rdblksz;
+ else
+ blksz = MAXBLK;
+ break;
+ default:
+ /*
+ * should never happen, worst case, slow...
+ */
+ blksz = rdblksz = BLKMULT;
+ break;
+ }
+ lstrval = 1;
+ return(0);
+}
+
+/*
+ * ar_close(int int_sig)
+ * closes archive device, increments volume number, and prints i/o summary
+ * If in_sig is set we're in a signal handler and can't flush stdio.
+ */
+void
+ar_close(int in_sig)
+{
+ int status;
+
+ if (arfd < 0) {
+ did_io = io_ok = flcnt = 0;
+ return;
+ }
+ if (!in_sig)
+ fflush(listf);
+
+ /*
+ * Close archive file. This may take a LONG while on tapes (we may be
+ * forced to wait for the rewind to complete) so tell the user what is
+ * going on (this avoids the user hitting control-c thinking pax is
+ * broken).
+ */
+ if (vflag && (artyp == ISTAPE)) {
+ (void)dprintf(listfd,
+ "%s%s: Waiting for tape drive close to complete...",
+ vfpart ? "\n" : "", argv0);
+ }
+
+ /*
+ * if nothing was written to the archive (and we created it), we remove
+ * it
+ */
+ if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) &&
+ (arsb.st_size == 0)) {
+ (void)unlink(arcname);
+ can_unlnk = 0;
+ }
+
+ /*
+ * for a quick extract/list, pax frequently exits before the child
+ * process is done
+ */
+ if ((act == LIST || act == EXTRACT) && nflag && zpid > 0) {
+ kill(zpid, SIGINT);
+ zpid = -1;
+ }
+
+ (void)close(arfd);
+
+ /* Do not exit before child to ensure data integrity */
+ if (zpid > 0) {
+ waitpid(zpid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ exit_val = 1;
+ }
+
+
+ if (vflag && (artyp == ISTAPE)) {
+ (void)write(listfd, "done.\n", sizeof("done.\n")-1);
+ vfpart = 0;
+ }
+ arfd = -1;
+
+ if (!io_ok && !did_io) {
+ flcnt = 0;
+ return;
+ }
+ did_io = io_ok = 0;
+
+ /*
+ * The volume number is only increased when the last device has data
+ * and we have already determined the archive format.
+ */
+ if (frmt != NULL)
+ ++arvol;
+
+ if (!vflag) {
+ flcnt = 0;
+ return;
+ }
+
+ /*
+ * Print out a summary of I/O for this archive volume.
+ */
+ if (vfpart) {
+ (void)write(listfd, "\n", 1);
+ vfpart = 0;
+ }
+
+ /*
+ * If we have not determined the format yet, we just say how many bytes
+ * we have skipped over looking for a header to id. there is no way we
+ * could have written anything yet.
+ */
+ if (frmt == NULL) {
+ (void)dprintf(listfd,
+ "%s: unknown format, %llu bytes skipped.\n", argv0, rdcnt);
+ flcnt = 0;
+ return;
+ }
+
+ if (op_mode == OP_PAX)
+ (void)dprintf(listfd, "%s: %s vol %d, %lu files,"
+ " %llu bytes read, %llu bytes written.\n",
+ argv0, frmt->name, arvol-1, flcnt, rdcnt, wrcnt);
+#ifndef NOCPIO
+ else if (op_mode == OP_CPIO)
+ (void)dprintf(listfd, "%llu blocks\n",
+ (rdcnt ? rdcnt : wrcnt) / 5120);
+#endif /* !NOCPIO */
+ flcnt = 0;
+}
+
+/*
+ * ar_drain()
+ * drain any archive format independent padding from an archive read
+ * from a socket or a pipe. This is to prevent the process on the
+ * other side of the pipe from getting a SIGPIPE (pax will stop
+ * reading an archive once a format dependent trailer is detected).
+ */
+void
+ar_drain(void)
+{
+ int res;
+ char drbuf[MAXBLK];
+
+ /*
+ * we only drain from a pipe/socket. Other devices can be closed
+ * without reading up to end of file. We sure hope that pipe is closed
+ * on the other side so we will get an EOF.
+ */
+ if ((artyp != ISPIPE) || (lstrval <= 0))
+ return;
+
+ /*
+ * keep reading until pipe is drained
+ */
+ while ((res = read(arfd, drbuf, sizeof(drbuf))) > 0)
+ continue;
+ lstrval = res;
+}
+
+/*
+ * ar_set_wr()
+ * Set up device right before switching from read to write in an append.
+ * device dependent code (if required) to do this should be added here.
+ * For all archive devices we are already positioned at the place we want
+ * to start writing when this routine is called.
+ * Return:
+ * 0 if all ready to write, -1 otherwise
+ */
+
+int
+ar_set_wr(void)
+{
+ off_t cpos;
+
+ /*
+ * we must make sure the trailer is rewritten on append, ar_next()
+ * will stop us if the archive containing the trailer was not written
+ */
+ wr_trail = 0;
+
+ /*
+ * Add any device dependent code as required here
+ */
+ if (artyp != ISREG)
+ return(0);
+ /*
+ * Ok we have an archive in a regular file. If we were rewriting a
+ * file, we must get rid of all the stuff after the current offset
+ * (it was not written by pax).
+ */
+ if (((cpos = lseek(arfd, 0, SEEK_CUR)) < 0) ||
+ (ftruncate(arfd, cpos) == -1)) {
+ syswarn(1, errno, "Unable to truncate archive file");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * ar_app_ok()
+ * check if the last volume in the archive allows appends. We cannot check
+ * this until we are ready to write since there is no spec that says all
+ * volumes in a single archive have to be of the same type...
+ * Return:
+ * 0 if we can append, -1 otherwise.
+ */
+
+int
+ar_app_ok(void)
+{
+ if (artyp == ISPIPE) {
+ paxwarn(1, "Cannot append to an archive obtained from a pipe.");
+ return(-1);
+ }
+
+ if (!invld_rec)
+ return(0);
+ paxwarn(1,"Cannot append, device record size %d does not support %s spec",
+ rdblksz, argv0);
+ return(-1);
+}
+
+/*
+ * ar_read()
+ * read up to a specified number of bytes from the archive into the
+ * supplied buffer. When dealing with tapes we may not always be able to
+ * read what we want.
+ * Return:
+ * Number of bytes in buffer. 0 for end of file, -1 for a read error.
+ */
+
+int
+ar_read(char *buf, int cnt)
+{
+ int res = 0;
+
+ /*
+ * if last i/o was in error, no more reads until reset or new volume
+ */
+ if (lstrval <= 0)
+ return(lstrval);
+
+ /*
+ * how we read must be based on device type
+ */
+ switch (artyp) {
+ case ISTAPE:
+ if ((res = read(arfd, buf, cnt)) > 0) {
+ /*
+ * CAUTION: tape systems may not always return the same
+ * sized records so we leave blksz == MAXBLK. The
+ * physical record size that a tape drive supports is
+ * very hard to determine in a uniform and portable
+ * manner.
+ */
+ io_ok = 1;
+ if (res != rdblksz) {
+ /*
+ * Record size changed. If this happens on
+ * any record after the first, we probably have
+ * a tape drive which has a fixed record size
+ * (we are getting multiple records in a single
+ * read). Watch out for record blocking that
+ * violates pax spec (must be a multiple of
+ * BLKMULT).
+ */
+ rdblksz = res;
+ if (rdblksz % BLKMULT)
+ invld_rec = 1;
+ }
+ return(res);
+ }
+ break;
+ case ISREG:
+ case ISBLK:
+ case ISCHR:
+ case ISPIPE:
+ default:
+ /*
+ * Files are so easy to deal with. These other things cannot
+ * be trusted at all. So when we are dealing with character
+ * devices and pipes we just take what they have ready for us
+ * and return. Trying to do anything else with them runs the
+ * risk of failure.
+ */
+ if ((res = read(arfd, buf, cnt)) > 0) {
+ io_ok = 1;
+ return(res);
+ }
+ break;
+ }
+
+ /*
+ * We are in trouble at this point, something is broken...
+ */
+ lstrval = res;
+ if (res < 0)
+ syswarn(1, errno, "Failed read on archive volume %d", arvol);
+ else
+ paxwarn(0, "End of archive volume %d reached", arvol);
+ return(res);
+}
+
+/*
+ * ar_write()
+ * Write a specified number of bytes in supplied buffer to the archive
+ * device so it appears as a single "block". Deals with errors and tries
+ * to recover when faced with short writes.
+ * Return:
+ * Number of bytes written. 0 indicates end of volume reached and with no
+ * flaws (as best that can be detected). A -1 indicates an unrecoverable
+ * error in the archive occurred.
+ */
+
+int
+ar_write(char *buf, int bsz)
+{
+ ssize_t res;
+ off_t cpos;
+
+ /*
+ * do not allow pax to create a "bad" archive. Once a write fails on
+ * an archive volume prevent further writes to it.
+ */
+ if (lstrval <= 0)
+ return(lstrval);
+
+ if ((res = write(arfd, buf, bsz)) == bsz) {
+ wr_trail = 1;
+ io_ok = 1;
+ return(bsz);
+ }
+ /*
+ * write broke, see what we can do with it. We try to send any partial
+ * writes that may violate pax spec to the next archive volume.
+ */
+ if (res == -1)
+ lstrval = res;
+ else
+ lstrval = 0;
+
+ switch (artyp) {
+ case ISREG:
+ if ((res > 0) && (res % BLKMULT)) {
+ /*
+ * try to fix up partial writes which are not BLKMULT
+ * in size by forcing the runt record to next archive
+ * volume
+ */
+ if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1)
+ break;
+ cpos -= res;
+ if (ftruncate(arfd, cpos) == -1)
+ break;
+ res = lstrval = 0;
+ break;
+ }
+ if (res >= 0)
+ break;
+ /*
+ * if file is out of space, handle it like a return of 0
+ */
+ if ((errno == ENOSPC) || (errno == EFBIG) || (errno == EDQUOT))
+ res = lstrval = 0;
+ break;
+ case ISTAPE:
+ case ISCHR:
+ case ISBLK:
+ if (res >= 0)
+ break;
+ if (errno == EACCES) {
+ paxwarn(0, "Write failed, archive is write protected.");
+ res = lstrval = 0;
+ return(0);
+ }
+ /*
+ * see if we reached the end of media, if so force a change to
+ * the next volume
+ */
+ if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO))
+ res = lstrval = 0;
+ break;
+ case ISPIPE:
+ default:
+ /*
+ * we cannot fix errors to these devices
+ */
+ break;
+ }
+
+ /*
+ * Better tell the user the bad news...
+ * if this is a block aligned archive format, we may have a bad archive
+ * if the format wants the header to start at a BLKMULT boundary. While
+ * we can deal with the mis-aligned data, it violates spec and other
+ * archive readers will likely fail. if the format is not block
+ * aligned, the user may be lucky (and the archive is ok).
+ */
+ if (res >= 0) {
+ if (res > 0)
+ wr_trail = 1;
+ io_ok = 1;
+ }
+
+ /*
+ * If we were trying to rewrite the trailer and it didn't work, we
+ * must quit right away.
+ */
+ if (!wr_trail && (res <= 0)) {
+ paxwarn(1,"Unable to append, trailer re-write failed. Quitting.");
+ return(res);
+ }
+
+ if (res == 0)
+ paxwarn(0, "End of archive volume %d reached", arvol);
+ else if (res < 0)
+ syswarn(1, errno, "Failed write to archive volume: %d", arvol);
+ else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0))
+ paxwarn(0,"WARNING: partial archive write. Archive MAY BE FLAWED");
+ else
+ paxwarn(1,"WARNING: partial archive write. Archive IS FLAWED");
+ return(res);
+}
+
+/*
+ * ar_rdsync()
+ * Try to move past a bad spot on a flawed archive as needed to continue
+ * I/O. Clears error flags to allow I/O to continue.
+ * Return:
+ * 0 when ok to try i/o again, -1 otherwise.
+ */
+
+int
+ar_rdsync(void)
+{
+ long fsbz;
+ off_t cpos;
+ off_t mpos;
+ struct mtop mb;
+
+ /*
+ * Fail resync attempts at user request (done) or if this is going to be
+ * an update/append to a existing archive. if last i/o hit media end,
+ * we need to go to the next volume not try a resync
+ */
+ if ((done > 0) || (lstrval == 0))
+ return(-1);
+
+ if ((act == APPND) || (act == ARCHIVE)) {
+ paxwarn(1, "Cannot allow updates to an archive with flaws.");
+ return(-1);
+ }
+ if (io_ok)
+ did_io = 1;
+
+ switch (artyp) {
+ case ISTAPE:
+ /*
+ * if the last i/o was a successful data transfer, we assume
+ * the fault is just a bad record on the tape that we are now
+ * past. If we did not get any data since the last resync try
+ * to move the tape forward one PHYSICAL record past any
+ * damaged tape section. Some tape drives are stubborn and need
+ * to be pushed.
+ */
+ if (io_ok) {
+ io_ok = 0;
+ lstrval = 1;
+ break;
+ }
+ mb.mt_op = MTFSR;
+ mb.mt_count = 1;
+ if (ioctl(arfd, MTIOCTOP, &mb) == -1)
+ break;
+ lstrval = 1;
+ break;
+ case ISREG:
+ case ISCHR:
+ case ISBLK:
+ /*
+ * try to step over the bad part of the device.
+ */
+ io_ok = 0;
+ if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG))
+ fsbz = BLKMULT;
+ if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1)
+ break;
+ mpos = fsbz - (cpos % fsbz);
+ if (lseek(arfd, mpos, SEEK_CUR) == -1)
+ break;
+ lstrval = 1;
+ break;
+ case ISPIPE:
+ default:
+ /*
+ * cannot recover on these archive device types
+ */
+ io_ok = 0;
+ break;
+ }
+ if (lstrval <= 0) {
+ paxwarn(1, "Unable to recover from an archive read failure.");
+ return(-1);
+ }
+ paxwarn(0, "Attempting to recover from an archive read failure.");
+ return(0);
+}
+
+/*
+ * ar_fow()
+ * Move the I/O position within the archive forward the specified number of
+ * bytes as supported by the device. If we cannot move the requested
+ * number of bytes, return the actual number of bytes moved in skipped.
+ * Return:
+ * 0 if moved the requested distance, -1 on complete failure, 1 on
+ * partial move (the amount moved is in skipped)
+ */
+
+int
+ar_fow(off_t sksz, off_t *skipped)
+{
+ off_t cpos;
+ off_t mpos;
+
+ *skipped = 0;
+ if (sksz <= 0)
+ return(0);
+
+ /*
+ * we cannot move forward at EOF or error
+ */
+ if (lstrval <= 0)
+ return(lstrval);
+
+ /*
+ * Safer to read forward on devices where it is hard to find the end of
+ * the media without reading to it. With tapes we cannot be sure of the
+ * number of physical blocks to skip (we do not know physical block
+ * size at this point), so we must only read forward on tapes!
+ */
+ if (artyp != ISREG)
+ return(0);
+
+ /*
+ * figure out where we are in the archive
+ */
+ if ((cpos = lseek(arfd, 0, SEEK_CUR)) >= 0) {
+ /*
+ * we can be asked to move farther than there are bytes in this
+ * volume, if so, just go to file end and let normal buf_fill()
+ * deal with the end of file (it will go to next volume by
+ * itself)
+ */
+ if ((mpos = cpos + sksz) > arsb.st_size) {
+ *skipped = arsb.st_size - cpos;
+ mpos = arsb.st_size;
+ } else
+ *skipped = sksz;
+ if (lseek(arfd, mpos, SEEK_SET) >= 0)
+ return(0);
+ }
+ syswarn(1, errno, "Forward positioning operation on archive failed");
+ lstrval = -1;
+ return(-1);
+}
+
+/*
+ * ar_rev()
+ * move the i/o position within the archive backwards the specified byte
+ * count as supported by the device. With tapes drives we RESET rdblksz to
+ * the PHYSICAL blocksize.
+ * NOTE: We should only be called to move backwards so we can rewrite the
+ * last records (the trailer) of an archive (APPEND).
+ * Return:
+ * 0 if moved the requested distance, -1 on complete failure
+ */
+
+int
+ar_rev(off_t sksz)
+{
+ off_t cpos;
+ struct mtop mb;
+ int phyblk;
+
+ /*
+ * make sure we do not have try to reverse on a flawed archive
+ */
+ if (lstrval < 0)
+ return(lstrval);
+
+ switch (artyp) {
+ case ISPIPE:
+ if (sksz <= 0)
+ break;
+ /*
+ * cannot go backwards on these critters
+ */
+ paxwarn(1, "Reverse positioning on pipes is not supported.");
+ lstrval = -1;
+ return(-1);
+ case ISREG:
+ case ISBLK:
+ case ISCHR:
+ default:
+ if (sksz <= 0)
+ break;
+
+ /*
+ * For things other than files, backwards movement has a very
+ * high probability of failure as we really do not know the
+ * true attributes of the device we are talking to (the device
+ * may not even have the ability to lseek() in any direction).
+ * First we figure out where we are in the archive.
+ */
+ if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) {
+ syswarn(1, errno,
+ "Unable to obtain current archive byte offset");
+ lstrval = -1;
+ return(-1);
+ }
+
+ /*
+ * we may try to go backwards past the start when the archive
+ * is only a single record. If this happens and we are on a
+ * multi-volume archive, we need to go to the end of the
+ * previous volume and continue our movement backwards from
+ * there.
+ */
+ if ((cpos -= sksz) < 0) {
+ if (arvol > 1) {
+ /*
+ * this should never happen
+ */
+ paxwarn(1,"Reverse position on previous volume.");
+ lstrval = -1;
+ return(-1);
+ }
+ cpos = 0;
+ }
+ if (lseek(arfd, cpos, SEEK_SET) == -1) {
+ syswarn(1, errno, "Unable to seek archive backwards");
+ lstrval = -1;
+ return(-1);
+ }
+ break;
+ case ISTAPE:
+ /*
+ * Calculate and move the proper number of PHYSICAL tape
+ * blocks. If the sksz is not an even multiple of the physical
+ * tape size, we cannot do the move (this should never happen).
+ * (We also cannot handle trailers spread over two vols.)
+ * get_phys() also makes sure we are in front of the filemark.
+ */
+ if ((phyblk = get_phys()) <= 0) {
+ lstrval = -1;
+ return(-1);
+ }
+
+ /*
+ * make sure future tape reads only go by physical tape block
+ * size (set rdblksz to the real size).
+ */
+ rdblksz = phyblk;
+
+ /*
+ * if no movement is required, just return (we must be after
+ * get_phys() so the physical blocksize is properly set)
+ */
+ if (sksz <= 0)
+ break;
+
+ /*
+ * ok we have to move. Make sure the tape drive can do it.
+ */
+ if (sksz % phyblk) {
+ paxwarn(1,
+ "Tape drive unable to backspace requested amount");
+ lstrval = -1;
+ return(-1);
+ }
+
+ /*
+ * move backwards the requested number of bytes
+ */
+ mb.mt_op = MTBSR;
+ mb.mt_count = sksz/phyblk;
+ if (ioctl(arfd, MTIOCTOP, &mb) == -1) {
+ syswarn(1,errno, "Unable to backspace tape %d blocks.",
+ mb.mt_count);
+ lstrval = -1;
+ return(-1);
+ }
+ break;
+ }
+ lstrval = 1;
+ return(0);
+}
+
+/*
+ * get_phys()
+ * Determine the physical block size on a tape drive. We need the physical
+ * block size so we know how many bytes we skip over when we move with
+ * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when
+ * return.
+ * This is one really SLOW routine...
+ * Return:
+ * physical block size if ok (ok > 0), -1 otherwise
+ */
+
+static int
+get_phys(void)
+{
+ int padsz = 0;
+ int res;
+ int phyblk;
+ struct mtop mb;
+ char scbuf[MAXBLK];
+
+ /*
+ * move to the file mark, and then back up one record and read it.
+ * this should tell us the physical record size the tape is using.
+ */
+ if (lstrval == 1) {
+ /*
+ * we know we are at file mark when we get back a 0 from
+ * read()
+ */
+ while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0)
+ padsz += res;
+ if (res == -1) {
+ syswarn(1, errno, "Unable to locate tape filemark.");
+ return(-1);
+ }
+ }
+
+ /*
+ * move backwards over the file mark so we are at the end of the
+ * last record.
+ */
+ mb.mt_op = MTBSF;
+ mb.mt_count = 1;
+ if (ioctl(arfd, MTIOCTOP, &mb) == -1) {
+ syswarn(1, errno, "Unable to backspace over tape filemark.");
+ return(-1);
+ }
+
+ /*
+ * move backwards so we are in front of the last record and read it to
+ * get physical tape blocksize.
+ */
+ mb.mt_op = MTBSR;
+ mb.mt_count = 1;
+ if (ioctl(arfd, MTIOCTOP, &mb) == -1) {
+ syswarn(1, errno, "Unable to backspace over last tape block.");
+ return(-1);
+ }
+ if ((phyblk = read(arfd, scbuf, sizeof(scbuf))) <= 0) {
+ syswarn(1, errno, "Cannot determine archive tape blocksize.");
+ return(-1);
+ }
+
+ /*
+ * read forward to the file mark, then back up in front of the filemark
+ * (this is a bit paranoid, but should be safe to do).
+ */
+ while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0)
+ continue;
+ if (res == -1) {
+ syswarn(1, errno, "Unable to locate tape filemark.");
+ return(-1);
+ }
+ mb.mt_op = MTBSF;
+ mb.mt_count = 1;
+ if (ioctl(arfd, MTIOCTOP, &mb) == -1) {
+ syswarn(1, errno, "Unable to backspace over tape filemark.");
+ return(-1);
+ }
+
+ /*
+ * set lstrval so we know that the filemark has not been seen
+ */
+ lstrval = 1;
+
+ /*
+ * return if there was no padding
+ */
+ if (padsz == 0)
+ return(phyblk);
+
+ /*
+ * make sure we can move backwards over the padding. (this should
+ * never fail).
+ */
+ if (padsz % phyblk) {
+ paxwarn(1, "Tape drive unable to backspace requested amount");
+ return(-1);
+ }
+
+ /*
+ * move backwards over the padding so the head is where it was when
+ * we were first called (if required).
+ */
+ mb.mt_op = MTBSR;
+ mb.mt_count = padsz/phyblk;
+ if (ioctl(arfd, MTIOCTOP, &mb) == -1) {
+ syswarn(1,errno,"Unable to backspace tape over %d pad blocks",
+ mb.mt_count);
+ return(-1);
+ }
+ return(phyblk);
+}
+
+/*
+ * ar_next()
+ * prompts the user for the next volume in this archive. For some devices
+ * we may allow the media to be changed. Otherwise a new archive is
+ * prompted for. By pax spec, if there is no controlling tty or an eof is
+ * read on tty input, we must quit pax.
+ * Return:
+ * 0 when ready to continue, -1 when all done
+ */
+
+int
+ar_next(void)
+{
+ char buf[PAXPATHLEN+2];
+ static int freeit = 0;
+ sigset_t o_mask;
+
+ /*
+ * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so
+ * things like writing EOF etc will be done) (Watch out ar_close() can
+ * also be called via a signal handler, so we must prevent a race.
+ */
+ if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) == -1)
+ syswarn(0, errno, "Unable to set signal mask");
+ ar_close(0);
+ if (sigprocmask(SIG_SETMASK, &o_mask, NULL) == -1)
+ syswarn(0, errno, "Unable to restore signal mask");
+
+ if (done || !wr_trail || force_one_volume || op_mode == OP_TAR)
+ return(-1);
+
+ tty_prnt("\nATTENTION! %s archive volume change required.\n", argv0);
+
+ /*
+ * if i/o is on stdin or stdout, we cannot reopen it (we do not know
+ * the name), the user will be forced to type it in.
+ */
+ if (strcmp(arcname, STDO) && strcmp(arcname, STDN) && (artyp != ISREG)
+ && (artyp != ISPIPE)) {
+ if (artyp == ISTAPE) {
+ tty_prnt("%s ready for archive tape volume: %d\n",
+ arcname, arvol);
+ tty_prnt("Load the NEXT TAPE on the tape drive");
+ } else {
+ tty_prnt("%s ready for archive volume: %d\n",
+ arcname, arvol);
+ tty_prnt("Load the NEXT STORAGE MEDIA (if required)");
+ }
+
+ if ((act == ARCHIVE) || (act == APPND))
+ tty_prnt(" and make sure it is WRITE ENABLED.\n");
+ else
+ tty_prnt("\n");
+
+ for (;;) {
+ tty_prnt("Type \"y\" to continue, \".\" to quit %s,",
+ argv0);
+ tty_prnt(" or \"s\" to switch to new device.\nIf you");
+ tty_prnt(" cannot change storage media, type \"s\"\n");
+ tty_prnt("Is the device ready and online? > ");
+
+ if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){
+ done = 1;
+ lstrval = -1;
+ tty_prnt("Quitting %s!\n", argv0);
+ vfpart = 0;
+ return(-1);
+ }
+
+ if ((buf[0] == '\0') || (buf[1] != '\0')) {
+ tty_prnt("%s unknown command, try again\n",buf);
+ continue;
+ }
+
+ switch (buf[0]) {
+ case 'y':
+ case 'Y':
+ /*
+ * we are to continue with the same device
+ */
+ if (ar_open(arcname) >= 0)
+ return(0);
+ tty_prnt("Cannot re-open %s, try again\n",
+ arcname);
+ continue;
+ case 's':
+ case 'S':
+ /*
+ * user wants to open a different device
+ */
+ tty_prnt("Switching to a different archive\n");
+ break;
+ default:
+ tty_prnt("%s unknown command, try again\n",buf);
+ continue;
+ }
+ break;
+ }
+ } else
+ tty_prnt("Ready for archive volume: %d\n", arvol);
+
+ /*
+ * have to go to a different archive
+ */
+ for (;;) {
+ tty_prnt("Input archive name or \".\" to quit %s.\n", argv0);
+ tty_prnt("Archive name > ");
+
+ if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) {
+ done = 1;
+ lstrval = -1;
+ tty_prnt("Quitting %s!\n", argv0);
+ vfpart = 0;
+ return(-1);
+ }
+ if (buf[0] == '\0') {
+ tty_prnt("Empty file name, try again\n");
+ continue;
+ }
+ if (!strcmp(buf, "..")) {
+ tty_prnt("Illegal file name: .. try again\n");
+ continue;
+ }
+ if (strlen(buf) > PAXPATHLEN) {
+ tty_prnt("File name too long, try again\n");
+ continue;
+ }
+
+ /*
+ * try to open new archive
+ */
+ if (ar_open(buf) >= 0) {
+ if (freeit) {
+ free((char *)arcname);
+ freeit = 0;
+ }
+ if ((arcname = strdup(buf)) == NULL) {
+ done = 1;
+ lstrval = -1;
+ paxwarn(0, "Cannot save archive name.");
+ return(-1);
+ }
+ freeit = 1;
+ break;
+ }
+ tty_prnt("Cannot open %s, try again\n", buf);
+ continue;
+ }
+ return(0);
+}
+
+/*
+ * ar_start_gzip()
+ * starts the gzip compression/decompression process as a child, using magic
+ * to keep the fd the same in the calling function (parent).
+ */
+void
+ar_start_gzip(int fd, const char *path, int wr)
+{
+ int fds[2];
+ const char *gzip_flags;
+
+ if (pipe(fds) == -1)
+ err(1, "could not pipe");
+ zpid = fork();
+ if (zpid == -1)
+ err(1, "could not fork");
+
+ /* parent */
+ if (zpid) {
+ if (wr)
+ dup2(fds[1], fd);
+ else
+ dup2(fds[0], fd);
+ close(fds[0]);
+ close(fds[1]);
+
+ if (pmode == 0 || (act != EXTRACT && act != COPY)) {
+ if (pledge("stdio rpath wpath cpath fattr dpath getpw proc tape",
+ NULL) == -1)
+ err(1, "pledge");
+ }
+ } else {
+ if (wr) {
+ dup2(fds[0], STDIN_FILENO);
+ dup2(fd, STDOUT_FILENO);
+ gzip_flags = "-c";
+ } else {
+ dup2(fds[1], STDOUT_FILENO);
+ dup2(fd, STDIN_FILENO);
+ gzip_flags = "-dc";
+ }
+ close(fds[0]);
+ close(fds[1]);
+
+ /* System compressors are more likely to use pledge(2) */
+ putenv("PATH=/usr/bin:/usr/local/bin");
+
+ if (execlp(path, path, gzip_flags, (char *)NULL) == -1)
+ err(1, "could not exec %s", path);
+ /* NOTREACHED */
+ }
+}
diff --git a/bin/pax/ar_subs.c b/bin/pax/ar_subs.c
new file mode 100644
index 0000000..f0a55ab
--- /dev/null
+++ b/bin/pax/ar_subs.c
@@ -0,0 +1,1277 @@
+/* $OpenBSD: ar_subs.c,v 1.49 2019/06/28 13:34:59 deraadt Exp $ */
+/* $NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include "pax.h"
+#include "extern.h"
+
+static void wr_archive(ARCHD *, int is_app);
+static int get_arc(void);
+static int next_head(ARCHD *);
+static int rd_gnu_string(ARCHD *);
+extern sigset_t s_mask;
+
+/*
+ * Routines which control the overall operation modes of pax as specified by
+ * the user: list, append, read ...
+ */
+
+static char hdbuf[BLKMULT]; /* space for archive header on read */
+u_long flcnt; /* number of files processed */
+
+/*
+ * list()
+ * list the contents of an archive which match user supplied pattern(s)
+ * (no pattern matches all).
+ */
+
+void
+list(void)
+{
+ ARCHD *arcn;
+ int res;
+ ARCHD archd;
+ time_t now;
+
+ arcn = &archd;
+ /*
+ * figure out archive type; pass any format specific options to the
+ * archive option processing routine; call the format init routine. We
+ * also save current time for ls_list() so we do not make a system
+ * call for each file we need to print. If verbose (vflag) start up
+ * the name and group caches.
+ */
+ if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
+ ((*frmt->st_rd)() < 0))
+ return;
+
+ now = time(NULL);
+
+ /*
+ * step through the archive until the format says it is done
+ */
+ while (next_head(arcn) == 0) {
+ if (rd_gnu_string(arcn))
+ continue;
+
+ /*
+ * check for pattern, and user specified options match.
+ * When all patterns are matched we are done.
+ */
+ if ((res = pat_match(arcn)) < 0)
+ break;
+
+ if ((res == 0) && (sel_chk(arcn) == 0)) {
+ /*
+ * pattern resulted in a selected file
+ */
+ if (pat_sel(arcn) < 0)
+ break;
+
+ /*
+ * modify the name as requested by the user if name
+ * survives modification, do a listing of the file
+ */
+ if ((res = mod_name(arcn)) < 0)
+ break;
+ if (res == 0)
+ ls_list(arcn, now, stdout);
+ }
+
+ /*
+ * skip to next archive format header using values calculated
+ * by the format header read routine
+ */
+ if (rd_skip(arcn->skip + arcn->pad) == 1)
+ break;
+ }
+
+ /*
+ * all done, let format have a chance to cleanup, and make sure that
+ * the patterns supplied by the user were all matched
+ */
+ (void)(*frmt->end_rd)();
+ (void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
+ ar_close(0);
+ pat_chk();
+}
+
+static int
+cmp_file_times(int mtime_flag, int ctime_flag, ARCHD *arcn, struct stat *sbp)
+{
+ struct stat sb;
+
+ if (sbp == NULL) {
+ if (lstat(arcn->name, &sb) != 0)
+ return (0);
+ sbp = &sb;
+ }
+
+ if (ctime_flag && mtime_flag)
+ return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=) &&
+ timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=));
+ else if (ctime_flag)
+ return (timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=));
+ else
+ return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=));
+}
+
+/*
+ * extract()
+ * extract the member(s) of an archive as specified by user supplied
+ * pattern(s) (no patterns extracts all members)
+ */
+
+void
+extract(void)
+{
+ ARCHD *arcn;
+ int res;
+ off_t cnt;
+ ARCHD archd;
+ int fd;
+ time_t now;
+
+ sltab_start();
+
+ arcn = &archd;
+ /*
+ * figure out archive type; pass any format specific options to the
+ * archive option processing routine; call the format init routine;
+ * start up the directory modification time and access mode database
+ */
+ if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
+ ((*frmt->st_rd)() < 0) || (dir_start() < 0))
+ return;
+
+ /*
+ * When we are doing interactive rename, we store the mapping of names
+ * so we can fix up hard links files later in the archive.
+ */
+ if (iflag && (name_start() < 0))
+ return;
+
+ now = time(NULL);
+
+ /*
+ * step through each entry on the archive until the format read routine
+ * says it is done
+ */
+ while (next_head(arcn) == 0) {
+ if (rd_gnu_string(arcn))
+ continue;
+
+ /*
+ * check for pattern, and user specified options match. When
+ * all the patterns are matched we are done
+ */
+ if ((res = pat_match(arcn)) < 0)
+ break;
+
+ if ((res > 0) || (sel_chk(arcn) != 0)) {
+ /*
+ * file is not selected. skip past any file data and
+ * padding and go back for the next archive member
+ */
+ (void)rd_skip(arcn->skip + arcn->pad);
+ continue;
+ }
+
+ /*
+ * with -u or -D only extract when the archive member is newer
+ * than the file with the same name in the file system (no
+ * test of being the same type is required).
+ * NOTE: this test is done BEFORE name modifications as
+ * specified by pax. this operation can be confusing to the
+ * user who might expect the test to be done on an existing
+ * file AFTER the name mod. In honesty the pax spec is probably
+ * flawed in this respect.
+ */
+ if ((uflag || Dflag) &&
+ cmp_file_times(uflag, Dflag, arcn, NULL)) {
+ (void)rd_skip(arcn->skip + arcn->pad);
+ continue;
+ }
+
+ /*
+ * this archive member is now been selected. modify the name.
+ */
+ if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0))
+ break;
+ if (res > 0) {
+ /*
+ * a bad name mod, skip and purge name from link table
+ */
+ purg_lnk(arcn);
+ (void)rd_skip(arcn->skip + arcn->pad);
+ continue;
+ }
+
+ /*
+ * Non standard -Y and -Z flag. When the existing file is
+ * same age or newer skip
+ */
+ if ((Yflag || Zflag) &&
+ cmp_file_times(Yflag, Zflag, arcn, NULL)) {
+ (void)rd_skip(arcn->skip + arcn->pad);
+ continue;
+ }
+
+ if (vflag) {
+ if (vflag > 1)
+ ls_list(arcn, now, listf);
+ else {
+ (void)safe_print(arcn->name, listf);
+ vfpart = 1;
+ }
+ }
+
+ /*
+ * if required, chdir around.
+ */
+ if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
+ if (chdir(arcn->pat->chdname) != 0)
+ syswarn(1, errno, "Cannot chdir to %s",
+ arcn->pat->chdname);
+
+ /*
+ * all ok, extract this member based on type
+ */
+ if (!PAX_IS_REG(arcn->type)) {
+ /*
+ * process archive members that are not regular files.
+ * throw out padding and any data that might follow the
+ * header (as determined by the format).
+ */
+ if (PAX_IS_HARDLINK(arcn->type))
+ res = lnk_creat(arcn);
+ else
+ res = node_creat(arcn);
+
+ (void)rd_skip(arcn->skip + arcn->pad);
+ if (res < 0)
+ purg_lnk(arcn);
+
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ goto popd;
+ }
+ /*
+ * we have a file with data here. If we can not create it, skip
+ * over the data and purge the name from hard link table
+ */
+ if ((fd = file_creat(arcn)) < 0) {
+ (void)rd_skip(arcn->skip + arcn->pad);
+ purg_lnk(arcn);
+ goto popd;
+ }
+ /*
+ * extract the file from the archive and skip over padding and
+ * any unprocessed data
+ */
+ res = rd_wrfile(arcn, fd, &cnt);
+ file_close(arcn, fd);
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ if (!res)
+ (void)rd_skip(cnt + arcn->pad);
+
+popd:
+ /*
+ * if required, chdir around.
+ */
+ if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
+ if (fchdir(cwdfd) != 0)
+ syswarn(1, errno,
+ "Can't fchdir to starting directory");
+ }
+
+ /*
+ * all done, restore directory modes and times as required; make sure
+ * all patterns supplied by the user were matched; block off signals
+ * to avoid chance for multiple entry into the cleanup code.
+ */
+ (void)(*frmt->end_rd)();
+ (void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
+ ar_close(0);
+ sltab_process(0);
+ proc_dir(0);
+ pat_chk();
+}
+
+/*
+ * wr_archive()
+ * Write an archive. used in both creating a new archive and appends on
+ * previously written archive.
+ */
+
+static void
+wr_archive(ARCHD *arcn, int is_app)
+{
+ int res;
+ int hlk;
+ int wr_one;
+ off_t cnt;
+ int (*wrf)(ARCHD *);
+ int fd = -1;
+ time_t now;
+
+ /*
+ * if this format supports hard link storage, start up the database
+ * that detects them.
+ */
+ if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0))
+ return;
+
+ /*
+ * if this is not append, and there are no files, we do not write a
+ * trailer
+ */
+ wr_one = is_app;
+
+ /*
+ * start up the file traversal code and format specific write
+ */
+ if (ftree_start() < 0) {
+ if (is_app)
+ goto trailer;
+ return;
+ } else if (((*frmt->st_wr)() < 0))
+ return;
+
+ wrf = frmt->wr;
+
+ /*
+ * When we are doing interactive rename, we store the mapping of names
+ * so we can fix up hard links files later in the archive.
+ */
+ if (iflag && (name_start() < 0))
+ return;
+
+ now = time(NULL);
+
+ /*
+ * while there are files to archive, process them one at at time
+ */
+ while (next_file(arcn) == 0) {
+ /*
+ * check if this file meets user specified options match.
+ */
+ if (sel_chk(arcn) != 0)
+ continue;
+ fd = -1;
+ if (uflag) {
+ /*
+ * only archive if this file is newer than a file with
+ * the same name that is already stored on the archive
+ */
+ if ((res = chk_ftime(arcn)) < 0)
+ break;
+ if (res > 0) {
+ ftree_skipped_newer(arcn);
+ continue;
+ }
+ }
+
+ /*
+ * this file is considered selected now. see if this is a hard
+ * link to a file already stored
+ */
+ ftree_sel(arcn);
+ if (hlk && (chk_lnk(arcn) < 0))
+ break;
+
+ if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) {
+ /*
+ * we will have to read this file. by opening it now we
+ * can avoid writing a header to the archive for a file
+ * we were later unable to read (we also purge it from
+ * the link table).
+ */
+ if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) {
+ syswarn(1,errno, "Unable to open %s to read",
+ arcn->org_name);
+ purg_lnk(arcn);
+ continue;
+ }
+ }
+
+ /*
+ * Now modify the name as requested by the user
+ */
+ if ((res = mod_name(arcn)) < 0) {
+ /*
+ * name modification says to skip this file, close the
+ * file and purge link table entry
+ */
+ rdfile_close(arcn, &fd);
+ purg_lnk(arcn);
+ break;
+ }
+
+ if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) {
+ /*
+ * unable to obtain the crc we need, close the file,
+ * purge link table entry
+ */
+ rdfile_close(arcn, &fd);
+ purg_lnk(arcn);
+ continue;
+ }
+
+ if (vflag) {
+ if (vflag > 1)
+ ls_list(arcn, now, listf);
+ else {
+ (void)safe_print(arcn->name, listf);
+ vfpart = 1;
+ }
+ }
+ ++flcnt;
+
+ /*
+ * looks safe to store the file, have the format specific
+ * routine write routine store the file header on the archive
+ */
+ if ((res = (*wrf)(arcn)) < 0) {
+ rdfile_close(arcn, &fd);
+ break;
+ }
+ wr_one = 1;
+ if (res > 0) {
+ /*
+ * format write says no file data needs to be stored
+ * so we are done messing with this file
+ */
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ rdfile_close(arcn, &fd);
+ continue;
+ }
+
+ /*
+ * Add file data to the archive, quit on write error. if we
+ * cannot write the entire file contents to the archive we
+ * must pad the archive to replace the missing file data
+ * (otherwise during an extract the file header for the file
+ * which FOLLOWS this one will not be where we expect it to
+ * be).
+ */
+ res = wr_rdfile(arcn, fd, &cnt);
+ rdfile_close(arcn, &fd);
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ if (res < 0)
+ break;
+
+ /*
+ * pad as required, cnt is number of bytes not written
+ */
+ if (((cnt > 0) && (wr_skip(cnt) < 0)) ||
+ ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0)))
+ break;
+ }
+
+trailer:
+ /*
+ * tell format to write trailer; pad to block boundary; reset directory
+ * mode/access times, and check if all patterns supplied by the user
+ * were matched. block off signals to avoid chance for multiple entry
+ * into the cleanup code
+ */
+ if (wr_one) {
+ (*frmt->end_wr)();
+ wr_fin();
+ }
+ (void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
+ ar_close(0);
+ if (tflag)
+ proc_dir(0);
+ ftree_chk();
+}
+
+/*
+ * append()
+ * Add file to previously written archive. Archive format specified by the
+ * user must agree with archive. The archive is read first to collect
+ * modification times (if -u) and locate the archive trailer. The archive
+ * is positioned in front of the record with the trailer and wr_archive()
+ * is called to add the new members.
+ * PAX IMPLEMENTATION DETAIL NOTE:
+ * -u is implemented by adding the new members to the end of the archive.
+ * Care is taken so that these do not end up as links to the older
+ * version of the same file already stored in the archive. It is expected
+ * when extraction occurs these newer versions will over-write the older
+ * ones stored "earlier" in the archive (this may be a bad assumption as
+ * it depends on the implementation of the program doing the extraction).
+ * It is really difficult to splice in members without either re-writing
+ * the entire archive (from the point were the old version was), or having
+ * assistance of the format specification in terms of a special update
+ * header that invalidates a previous archive record. The posix spec left
+ * the method used to implement -u unspecified. This pax is able to
+ * over write existing files that it creates.
+ */
+
+void
+append(void)
+{
+ ARCHD *arcn;
+ int res;
+ ARCHD archd;
+ FSUB *orgfrmt;
+ int udev;
+ off_t tlen;
+
+ arcn = &archd;
+ orgfrmt = frmt;
+
+ /*
+ * Do not allow an append operation if the actual archive is of a
+ * different format than the user specified format.
+ */
+ if (get_arc() < 0)
+ return;
+ if ((orgfrmt != NULL) && (orgfrmt != frmt)) {
+ paxwarn(1, "Cannot mix current archive format %s with %s",
+ frmt->name, orgfrmt->name);
+ return;
+ }
+
+ /*
+ * pass the format any options and start up format
+ */
+ if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0))
+ return;
+
+ /*
+ * if we only are adding members that are newer, we need to save the
+ * mod times for all files we see.
+ */
+ if (uflag && (ftime_start() < 0))
+ return;
+
+ /*
+ * some archive formats encode hard links by recording the device and
+ * file serial number (inode) but copy the file anyway (multiple times)
+ * to the archive. When we append, we run the risk that newly added
+ * files may have the same device and inode numbers as those recorded
+ * on the archive but during a previous run. If this happens, when the
+ * archive is extracted we get INCORRECT hard links. We avoid this by
+ * remapping the device numbers so that newly added files will never
+ * use the same device number as one found on the archive. remapping
+ * allows new members to safely have links among themselves. remapping
+ * also avoids problems with file inode (serial number) truncations
+ * when the inode number is larger than storage space in the archive
+ * header. See the remap routines for more details.
+ */
+ if ((udev = frmt->udev) && (dev_start() < 0))
+ return;
+
+ /*
+ * reading the archive may take a long time. If verbose tell the user
+ */
+ if (vflag) {
+ (void)fprintf(listf,
+ "%s: Reading archive to position at the end...", argv0);
+ vfpart = 1;
+ }
+
+ /*
+ * step through the archive until the format says it is done
+ */
+ while (next_head(arcn) == 0) {
+ /*
+ * check if this file meets user specified options.
+ */
+ if (sel_chk(arcn) != 0) {
+ if (rd_skip(arcn->skip + arcn->pad) == 1)
+ break;
+ continue;
+ }
+
+ if (uflag) {
+ /*
+ * see if this is the newest version of this file has
+ * already been seen, if so skip.
+ */
+ if ((res = chk_ftime(arcn)) < 0)
+ break;
+ if (res > 0) {
+ if (rd_skip(arcn->skip + arcn->pad) == 1)
+ break;
+ continue;
+ }
+ }
+
+ /*
+ * Store this device number. Device numbers seen during the
+ * read phase of append will cause newly appended files with a
+ * device number seen in the old part of the archive to be
+ * remapped to an unused device number.
+ */
+ if ((udev && (add_dev(arcn) < 0)) ||
+ (rd_skip(arcn->skip + arcn->pad) == 1))
+ break;
+ }
+
+ /*
+ * done, finish up read and get the number of bytes to back up so we
+ * can add new members. The format might have used the hard link table,
+ * purge it.
+ */
+ tlen = (*frmt->end_rd)();
+ lnk_end();
+
+ /*
+ * try to position for write, if this fails quit. if any error occurs,
+ * we will refuse to write
+ */
+ if (appnd_start(tlen) < 0)
+ return;
+
+ /*
+ * tell the user we are done reading.
+ */
+ if (vflag && vfpart) {
+ (void)fputs("done.\n", listf);
+ vfpart = 0;
+ }
+
+ /*
+ * go to the writing phase to add the new members
+ */
+ wr_archive(arcn, 1);
+}
+
+/*
+ * archive()
+ * write a new archive
+ */
+
+void
+archive(void)
+{
+ ARCHD archd;
+
+ /*
+ * if we only are adding members that are newer, we need to save the
+ * mod times for all files; set up for writing; pass the format any
+ * options write the archive
+ */
+ if ((uflag && (ftime_start() < 0)) || (wr_start() < 0))
+ return;
+ if ((*frmt->options)() < 0)
+ return;
+
+ wr_archive(&archd, 0);
+}
+
+/*
+ * copy()
+ * copy files from one part of the file system to another. this does not
+ * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an
+ * archive was written and then extracted in the destination directory
+ * (except the files are forced to be under the destination directory).
+ */
+
+void
+copy(void)
+{
+ ARCHD *arcn;
+ int res;
+ int fddest;
+ char *dest_pt;
+ size_t dlen;
+ size_t drem;
+ int fdsrc = -1;
+ struct stat sb;
+ ARCHD archd;
+ char dirbuf[PAXPATHLEN+1];
+
+ sltab_start();
+
+ arcn = &archd;
+ /*
+ * set up the destination dir path and make sure it is a directory. We
+ * make sure we have a trailing / on the destination
+ */
+ dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf));
+ if (dlen >= sizeof(dirbuf) ||
+ (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) {
+ paxwarn(1, "directory name is too long %s", dirptr);
+ return;
+ }
+ dest_pt = dirbuf + dlen;
+ if (*(dest_pt-1) != '/') {
+ *dest_pt++ = '/';
+ *dest_pt = '\0';
+ ++dlen;
+ }
+ drem = PAXPATHLEN - dlen;
+
+ if (stat(dirptr, &sb) == -1) {
+ syswarn(1, errno, "Cannot access destination directory %s",
+ dirptr);
+ return;
+ }
+ if (!S_ISDIR(sb.st_mode)) {
+ paxwarn(1, "Destination is not a directory %s", dirptr);
+ return;
+ }
+
+ /*
+ * start up the hard link table; file traversal routines and the
+ * modification time and access mode database
+ */
+ if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0))
+ return;
+
+ /*
+ * When we are doing interactive rename, we store the mapping of names
+ * so we can fix up hard links files later in the archive.
+ */
+ if (iflag && (name_start() < 0))
+ return;
+
+ /*
+ * set up to cp file trees
+ */
+ cp_start();
+
+ /*
+ * while there are files to archive, process them
+ */
+ while (next_file(arcn) == 0) {
+ fdsrc = -1;
+
+ /*
+ * check if this file meets user specified options
+ */
+ if (sel_chk(arcn) != 0)
+ continue;
+
+ /*
+ * if there is already a file in the destination directory with
+ * the same name and it is newer, skip the one stored on the
+ * archive.
+ * NOTE: this test is done BEFORE name modifications as
+ * specified by pax. this can be confusing to the user who
+ * might expect the test to be done on an existing file AFTER
+ * the name mod. In honesty the pax spec is probably flawed in
+ * this respect
+ */
+ if (uflag || Dflag) {
+ /*
+ * create the destination name
+ */
+ if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'),
+ drem + 1) > drem) {
+ paxwarn(1, "Destination pathname too long %s",
+ arcn->name);
+ continue;
+ }
+
+ /*
+ * if existing file is same age or newer skip
+ */
+ res = lstat(dirbuf, &sb);
+ *dest_pt = '\0';
+
+ if (res == 0) {
+ ftree_skipped_newer(arcn);
+ if (cmp_file_times(uflag, Dflag, arcn, &sb))
+ continue;
+ }
+ }
+
+ /*
+ * this file is considered selected. See if this is a hard link
+ * to a previous file; modify the name as requested by the
+ * user; set the final destination.
+ */
+ ftree_sel(arcn);
+ if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0))
+ break;
+ if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) {
+ /*
+ * skip file, purge from link table
+ */
+ purg_lnk(arcn);
+ continue;
+ }
+
+ /*
+ * Non standard -Y and -Z flag. When the existing file is
+ * same age or newer skip
+ */
+ if ((Yflag || Zflag) &&
+ cmp_file_times(Yflag, Zflag, arcn, NULL))
+ continue;
+
+ if (vflag) {
+ (void)safe_print(arcn->name, listf);
+ vfpart = 1;
+ }
+ ++flcnt;
+
+ /*
+ * try to create a hard link to the src file if requested
+ * but make sure we are not trying to overwrite ourselves.
+ */
+ if (lflag)
+ res = cross_lnk(arcn);
+ else
+ res = chk_same(arcn);
+ if (res <= 0) {
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ continue;
+ }
+
+ /*
+ * have to create a new file
+ */
+ if (!PAX_IS_REG(arcn->type)) {
+ /*
+ * create a link or special file
+ */
+ if (PAX_IS_HARDLINK(arcn->type))
+ res = lnk_creat(arcn);
+ else
+ res = node_creat(arcn);
+ if (res < 0)
+ purg_lnk(arcn);
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ continue;
+ }
+
+ /*
+ * have to copy a regular file to the destination directory.
+ * first open source file and then create the destination file
+ */
+ if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) {
+ syswarn(1, errno, "Unable to open %s to read",
+ arcn->org_name);
+ purg_lnk(arcn);
+ continue;
+ }
+ if ((fddest = file_creat(arcn)) < 0) {
+ rdfile_close(arcn, &fdsrc);
+ purg_lnk(arcn);
+ continue;
+ }
+
+ /*
+ * copy source file data to the destination file
+ */
+ cp_file(arcn, fdsrc, fddest);
+ file_close(arcn, fddest);
+ rdfile_close(arcn, &fdsrc);
+
+ if (vflag && vfpart) {
+ (void)putc('\n', listf);
+ vfpart = 0;
+ }
+ }
+
+ /*
+ * restore directory modes and times as required; make sure all
+ * patterns were selected block off signals to avoid chance for
+ * multiple entry into the cleanup code.
+ */
+ (void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
+ ar_close(0);
+ sltab_process(0);
+ proc_dir(0);
+ ftree_chk();
+}
+
+/*
+ * next_head()
+ * try to find a valid header in the archive. Uses format specific
+ * routines to extract the header and id the trailer. Trailers may be
+ * located within a valid header or in an invalid header (the location
+ * is format specific. The inhead field from the option table tells us
+ * where to look for the trailer).
+ * We keep reading (and resyncing) until we get enough contiguous data
+ * to check for a header. If we cannot find one, we shift by a byte
+ * add a new byte from the archive to the end of the buffer and try again.
+ * If we get a read error, we throw out what we have (as we must have
+ * contiguous data) and start over again.
+ * ASSUMED: headers fit within a BLKMULT header.
+ * Return:
+ * 0 if we got a header, -1 if we are unable to ever find another one
+ * (we reached the end of input, or we reached the limit on retries. see
+ * the specs for rd_wrbuf() for more details)
+ */
+
+static int
+next_head(ARCHD *arcn)
+{
+ int ret;
+ char *hdend;
+ int res;
+ int shftsz;
+ int hsz;
+ int in_resync = 0; /* set when we are in resync mode */
+ int cnt = 0; /* counter for trailer function */
+ int first = 1; /* on 1st read, EOF isn't premature. */
+
+ /*
+ * set up initial conditions, we want a whole frmt->hsz block as we
+ * have no data yet.
+ */
+ res = hsz = frmt->hsz;
+ hdend = hdbuf;
+ shftsz = hsz - 1;
+ for (;;) {
+ /*
+ * keep looping until we get a contiguous FULL buffer
+ * (frmt->hsz is the proper size)
+ */
+ for (;;) {
+ if ((ret = rd_wrbuf(hdend, res)) == res)
+ break;
+
+ /*
+ * If we read 0 bytes (EOF) from an archive when we
+ * expect to find a header, we have stepped upon
+ * an archive without the customary block of zeroes
+ * end marker. It's just stupid to error out on
+ * them, so exit gracefully.
+ */
+ if (first && ret == 0)
+ return(-1);
+ first = 0;
+
+ /*
+ * some kind of archive read problem, try to resync the
+ * storage device, better give the user the bad news.
+ */
+ if ((ret == 0) || (rd_sync() < 0)) {
+ paxwarn(1,"Premature end of file on archive read");
+ return(-1);
+ }
+ if (!in_resync) {
+ if (act == APPND) {
+ paxwarn(1,
+ "Archive I/O error, cannot continue");
+ return(-1);
+ }
+ paxwarn(1,"Archive I/O error. Trying to recover.");
+ ++in_resync;
+ }
+
+ /*
+ * oh well, throw it all out and start over
+ */
+ res = hsz;
+ hdend = hdbuf;
+ }
+
+ /*
+ * ok we have a contiguous buffer of the right size. Call the
+ * format read routine. If this was not a valid header and this
+ * format stores trailers outside of the header, call the
+ * format specific trailer routine to check for a trailer. We
+ * have to watch out that we do not mis-identify file data or
+ * block padding as a header or trailer. Format specific
+ * trailer functions must NOT check for the trailer while we
+ * are running in resync mode. Some trailer functions may tell
+ * us that this block cannot contain a valid header either, so
+ * we then throw out the entire block and start over.
+ */
+ if ((*frmt->rd)(arcn, hdbuf) == 0)
+ break;
+
+ if (!frmt->inhead) {
+ /*
+ * this format has trailers outside of valid headers
+ */
+ if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){
+ /*
+ * valid trailer found, drain input as required
+ */
+ ar_drain();
+ return(-1);
+ }
+
+ if (ret == 1) {
+ /*
+ * we are in resync and we were told to throw
+ * the whole block out because none of the
+ * bytes in this block can be used to form a
+ * valid header
+ */
+ res = hsz;
+ hdend = hdbuf;
+ continue;
+ }
+ }
+
+ /*
+ * Brute force section.
+ * not a valid header. We may be able to find a header yet. So
+ * we shift over by one byte, and set up to read one byte at a
+ * time from the archive and place it at the end of the buffer.
+ * We will keep moving byte at a time until we find a header or
+ * get a read error and have to start over.
+ */
+ if (!in_resync) {
+ if (act == APPND) {
+ paxwarn(1,"Unable to append, archive header flaw");
+ return(-1);
+ }
+ paxwarn(1,"Invalid header, starting valid header search.");
+ ++in_resync;
+ }
+ memmove(hdbuf, hdbuf+1, shftsz);
+ res = 1;
+ hdend = hdbuf + shftsz;
+ }
+
+ /*
+ * ok got a valid header, check for trailer if format encodes it in the
+ * the header. NOTE: the parameters are different than trailer routines
+ * which encode trailers outside of the header!
+ */
+ if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) {
+ /*
+ * valid trailer found, drain input as required
+ */
+ ar_drain();
+ return(-1);
+ }
+
+ ++flcnt;
+ return(0);
+}
+
+/*
+ * get_arc()
+ * Figure out what format an archive is. Handles archive with flaws by
+ * brute force searches for a legal header in any supported format. The
+ * format id routines have to be careful to NOT mis-identify a format.
+ * ASSUMED: headers fit within a BLKMULT header.
+ * Return:
+ * 0 if archive found -1 otherwise
+ */
+
+static int
+get_arc(void)
+{
+ int i;
+ int hdsz = 0;
+ int res;
+ int minhd = BLKMULT;
+ char *hdend;
+ int notice = 0;
+
+ /*
+ * find the smallest header size in all archive formats and then set up
+ * to read the archive.
+ */
+ for (i = 0; ford[i] >= 0; ++i) {
+ if (fsub[ford[i]].name != NULL && fsub[ford[i]].hsz < minhd)
+ minhd = fsub[ford[i]].hsz;
+ }
+ if (rd_start() < 0)
+ return(-1);
+ res = BLKMULT;
+ hdsz = 0;
+ hdend = hdbuf;
+ for (;;) {
+ for (;;) {
+ /*
+ * fill the buffer with at least the smallest header
+ */
+ i = rd_wrbuf(hdend, res);
+ if (i > 0)
+ hdsz += i;
+ if (hdsz >= minhd)
+ break;
+
+ /*
+ * if we cannot recover from a read error quit
+ */
+ if ((i == 0) || (rd_sync() < 0))
+ goto out;
+
+ /*
+ * when we get an error none of the data we already
+ * have can be used to create a legal header (we just
+ * got an error in the middle), so we throw it all out
+ * and refill the buffer with fresh data.
+ */
+ res = BLKMULT;
+ hdsz = 0;
+ hdend = hdbuf;
+ if (!notice) {
+ if (act == APPND)
+ return(-1);
+ paxwarn(1,"Cannot identify format. Searching...");
+ ++notice;
+ }
+ }
+
+ /*
+ * we have at least the size of the smallest header in any
+ * archive format. Look to see if we have a match. The array
+ * ford[] is used to specify the header id order to reduce the
+ * chance of incorrectly id'ing a valid header (some formats
+ * may be subsets of each other and the order would then be
+ * important).
+ */
+ for (i = 0; ford[i] >= 0; ++i) {
+ if (fsub[ford[i]].id == NULL ||
+ (*fsub[ford[i]].id)(hdbuf, hdsz) < 0)
+ continue;
+ frmt = &(fsub[ford[i]]);
+ /*
+ * yuck, to avoid slow special case code in the extract
+ * routines, just push this header back as if it was
+ * not seen. We have left extra space at start of the
+ * buffer for this purpose. This is a bit ugly, but
+ * adding all the special case code is far worse.
+ */
+ pback(hdbuf, hdsz);
+ return(0);
+ }
+
+ /*
+ * We have a flawed archive, no match. we start searching, but
+ * we never allow additions to flawed archives
+ */
+ if (!notice) {
+ if (act == APPND)
+ return(-1);
+ paxwarn(1, "Cannot identify format. Searching...");
+ ++notice;
+ }
+
+ /*
+ * brute force search for a header that we can id.
+ * we shift through byte at a time. this is slow, but we cannot
+ * determine the nature of the flaw in the archive in a
+ * portable manner
+ */
+ if (--hdsz > 0) {
+ memmove(hdbuf, hdbuf+1, hdsz);
+ res = BLKMULT - hdsz;
+ hdend = hdbuf + hdsz;
+ } else {
+ res = BLKMULT;
+ hdend = hdbuf;
+ hdsz = 0;
+ }
+ }
+
+ out:
+ /*
+ * we cannot find a header, bow, apologize and quit
+ */
+ paxwarn(1, "Sorry, unable to determine archive format.");
+ return(-1);
+}
+
+/*
+ * rd_gnu_string()
+ * Read the file contents into an allocated string if it is a GNU tar
+ * long link/file.
+ * Return:
+ * 1 if gnu string read, 0 otherwise
+ */
+
+static int
+rd_gnu_string(ARCHD *arcn)
+{
+ char **strp;
+
+ switch (arcn->type) {
+ case PAX_GLF:
+ strp = &gnu_name_string;
+ break;
+ case PAX_GLL:
+ strp = &gnu_link_string;
+ break;
+ default:
+ strp = NULL;
+ break;
+ }
+ if (!strp)
+ return 0;
+ /*
+ * we need to read, to get the real filename
+ */
+ if (*strp)
+ err(1, "WARNING! Major Internal Error! GNU hack Failing!");
+ *strp = malloc(arcn->sb.st_size + 1);
+ if (*strp == NULL) {
+ paxwarn(1, "Out of memory");
+ (void)rd_skip(arcn->skip + arcn->pad);
+ } else if (rd_wrbuf(*strp, arcn->sb.st_size) < arcn->sb.st_size) {
+ free(*strp);
+ *strp = NULL;
+ } else {
+ (*strp)[arcn->sb.st_size] = '\0';
+ (void)rd_skip(arcn->pad);
+ }
+ return 1;
+}
diff --git a/bin/pax/buf_subs.c b/bin/pax/buf_subs.c
new file mode 100644
index 0000000..e84f9e0
--- /dev/null
+++ b/bin/pax/buf_subs.c
@@ -0,0 +1,983 @@
+/* $OpenBSD: buf_subs.c,v 1.31 2019/06/28 13:34:59 deraadt Exp $ */
+/* $NetBSD: buf_subs.c,v 1.5 1995/03/21 09:07:08 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * routines which implement archive and file buffering
+ */
+
+#define MINFBSZ 512 /* default block size for hole detect */
+#define MAXFLT 10 /* default media read error limit */
+
+/*
+ * Need to change bufmem to dynamic allocation when the upper
+ * limit on blocking size is removed (though that will violate pax spec)
+ * MAXBLK define and tests will also need to be updated.
+ */
+static char bufmem[MAXBLK+BLKMULT]; /* i/o buffer + pushback id space */
+static char *buf; /* normal start of i/o buffer */
+static char *bufend; /* end or last char in i/o buffer */
+static char *bufpt; /* read/write point in i/o buffer */
+int blksz = MAXBLK; /* block input/output size in bytes */
+int wrblksz; /* user spec output size in bytes */
+int maxflt = MAXFLT; /* MAX consecutive media errors */
+int rdblksz; /* first read blksize (tapes only) */
+off_t wrlimit; /* # of bytes written per archive vol */
+off_t wrcnt; /* # of bytes written on current vol */
+off_t rdcnt; /* # of bytes read on current vol */
+
+/*
+ * wr_start()
+ * set up the buffering system to operate in a write mode
+ * Return:
+ * 0 if ok, -1 if the user specified write block size violates pax spec
+ */
+
+int
+wr_start(void)
+{
+ buf = &(bufmem[BLKMULT]);
+ /*
+ * Check to make sure the write block size meets pax specs. If the user
+ * does not specify a blocksize, we use the format default blocksize.
+ * We must be picky on writes, so we do not allow the user to create an
+ * archive that might be hard to read elsewhere. If all ok, we then
+ * open the first archive volume
+ */
+ if (!wrblksz)
+ wrblksz = frmt->bsz;
+ if (wrblksz > MAXBLK) {
+ paxwarn(1, "Write block size of %d too large, maximium is: %d",
+ wrblksz, MAXBLK);
+ return(-1);
+ }
+ if (wrblksz % BLKMULT) {
+ paxwarn(1, "Write block size of %d is not a %d byte multiple",
+ wrblksz, BLKMULT);
+ return(-1);
+ }
+ if (wrblksz > MAXBLK_POSIX) {
+ paxwarn(0, "Write block size of %d larger than POSIX max %d, archive may not be portable",
+ wrblksz, MAXBLK_POSIX);
+ return(-1);
+ }
+
+ /*
+ * we only allow wrblksz to be used with all archive operations
+ */
+ blksz = rdblksz = wrblksz;
+ if ((ar_open(arcname) < 0) && (ar_next() < 0))
+ return(-1);
+ wrcnt = 0;
+ bufend = buf + wrblksz;
+ bufpt = buf;
+ return(0);
+}
+
+/*
+ * rd_start()
+ * set up buffering system to read an archive
+ * Return:
+ * 0 if ok, -1 otherwise
+ */
+
+int
+rd_start(void)
+{
+ /*
+ * leave space for the header pushback (see get_arc()). If we are
+ * going to append and user specified a write block size, check it
+ * right away
+ */
+ buf = &(bufmem[BLKMULT]);
+ if ((act == APPND) && wrblksz) {
+ if (wrblksz > MAXBLK) {
+ paxwarn(1,"Write block size %d too large, maximium is: %d",
+ wrblksz, MAXBLK);
+ return(-1);
+ }
+ if (wrblksz % BLKMULT) {
+ paxwarn(1, "Write block size %d is not a %d byte multiple",
+ wrblksz, BLKMULT);
+ return(-1);
+ }
+ }
+
+ /*
+ * open the archive
+ */
+ if ((ar_open(arcname) < 0) && (ar_next() < 0))
+ return(-1);
+ bufend = buf + rdblksz;
+ bufpt = bufend;
+ rdcnt = 0;
+ return(0);
+}
+
+/*
+ * cp_start()
+ * set up buffer system for copying within the file system
+ */
+
+void
+cp_start(void)
+{
+ buf = &(bufmem[BLKMULT]);
+ rdblksz = blksz = MAXBLK;
+}
+
+/*
+ * appnd_start()
+ * Set up the buffering system to append new members to an archive that
+ * was just read. The last block(s) of an archive may contain a format
+ * specific trailer. To append a new member, this trailer has to be
+ * removed from the archive. The first byte of the trailer is replaced by
+ * the start of the header of the first file added to the archive. The
+ * format specific end read function tells us how many bytes to move
+ * backwards in the archive to be positioned BEFORE the trailer. Two
+ * different position have to be adjusted, the O.S. file offset (e.g. the
+ * position of the tape head) and the write point within the data we have
+ * stored in the read (soon to become write) buffer. We may have to move
+ * back several records (the number depends on the size of the archive
+ * record and the size of the format trailer) to read up the record where
+ * the first byte of the trailer is recorded. Trailers may span (and
+ * overlap) record boundaries.
+ * We first calculate which record has the first byte of the trailer. We
+ * move the OS file offset back to the start of this record and read it
+ * up. We set the buffer write pointer to be at this byte (the byte where
+ * the trailer starts). We then move the OS file pointer back to the
+ * start of this record so a flush of this buffer will replace the record
+ * in the archive.
+ * A major problem is rewriting this last record. For archives stored
+ * on disk files, this is trivial. However, many devices are really picky
+ * about the conditions under which they will allow a write to occur.
+ * Often devices restrict the conditions where writes can be made,
+ * so it may not be feasible to append archives stored on all types of
+ * devices.
+ * Return:
+ * 0 for success, -1 for failure
+ */
+
+int
+appnd_start(off_t skcnt)
+{
+ int res;
+ off_t cnt;
+
+ if (exit_val != 0) {
+ paxwarn(0, "Cannot append to an archive that may have flaws.");
+ return(-1);
+ }
+ /*
+ * if the user did not specify a write blocksize, inherit the size used
+ * in the last archive volume read. (If a is set we still use rdblksz
+ * until next volume, cannot shift sizes within a single volume).
+ */
+ if (!wrblksz)
+ wrblksz = blksz = rdblksz;
+ else
+ blksz = rdblksz;
+
+ /*
+ * make sure that this volume allows appends
+ */
+ if (ar_app_ok() < 0)
+ return(-1);
+
+ /*
+ * Calculate bytes to move back and move in front of record where we
+ * need to start writing from. Remember we have to add in any padding
+ * that might be in the buffer after the trailer in the last block. We
+ * travel skcnt + padding ROUNDED UP to blksize.
+ */
+ skcnt += bufend - bufpt;
+ if ((cnt = (skcnt/blksz) * blksz) < skcnt)
+ cnt += blksz;
+ if (ar_rev(cnt) < 0)
+ goto out;
+
+ /*
+ * We may have gone too far if there is valid data in the block we are
+ * now in front of, read up the block and position the pointer after
+ * the valid data.
+ */
+ if ((cnt -= skcnt) > 0) {
+ /*
+ * watch out for stupid tape drives. ar_rev() will set rdblksz
+ * to be real physical blocksize so we must loop until we get
+ * the old rdblksz (now in blksz). If ar_rev() fouls up the
+ * determination of the physical block size, we will fail.
+ */
+ bufpt = buf;
+ bufend = buf + blksz;
+ while (bufpt < bufend) {
+ if ((res = ar_read(bufpt, rdblksz)) <= 0)
+ goto out;
+ bufpt += res;
+ }
+ if (ar_rev(bufpt - buf) < 0)
+ goto out;
+ bufpt = buf + cnt;
+ bufend = buf + blksz;
+ } else {
+ /*
+ * buffer is empty
+ */
+ bufend = buf + blksz;
+ bufpt = buf;
+ }
+ rdblksz = blksz;
+ rdcnt -= skcnt;
+ wrcnt = 0;
+
+ /*
+ * At this point we are ready to write. If the device requires special
+ * handling to write at a point were previously recorded data resides,
+ * that is handled in ar_set_wr(). From now on we operate under normal
+ * ARCHIVE mode (write) conditions
+ */
+ if (ar_set_wr() < 0)
+ return(-1);
+ act = ARCHIVE;
+ return(0);
+
+ out:
+ paxwarn(1, "Unable to rewrite archive trailer, cannot append.");
+ return(-1);
+}
+
+/*
+ * rd_sync()
+ * A read error occurred on this archive volume. Resync the buffer and
+ * try to reset the device (if possible) so we can continue to read. Keep
+ * trying to do this until we get a valid read, or we reach the limit on
+ * consecutive read faults (at which point we give up). The user can
+ * adjust the read error limit through a command line option.
+ * Returns:
+ * 0 on success, and -1 on failure
+ */
+
+int
+rd_sync(void)
+{
+ int errcnt = 0;
+ int res;
+
+ /*
+ * if the user says bail out on first fault, we are out of here...
+ */
+ if (maxflt == 0)
+ return(-1);
+ if (act == APPND) {
+ paxwarn(1, "Unable to append when there are archive read errors.");
+ return(-1);
+ }
+
+ /*
+ * poke at device and try to get past media error
+ */
+ if (ar_rdsync() < 0) {
+ if (ar_next() < 0)
+ return(-1);
+ else
+ rdcnt = 0;
+ }
+
+ for (;;) {
+ if ((res = ar_read(buf, blksz)) > 0) {
+ /*
+ * All right! got some data, fill that buffer
+ */
+ bufpt = buf;
+ bufend = buf + res;
+ rdcnt += res;
+ return(0);
+ }
+
+ /*
+ * Oh well, yet another failed read...
+ * if error limit reached, ditch. o.w. poke device to move past
+ * bad media and try again. if media is badly damaged, we ask
+ * the poor (and upset user at this point) for the next archive
+ * volume. remember the goal on reads is to get the most we
+ * can extract out of the archive.
+ */
+ if ((maxflt > 0) && (++errcnt > maxflt))
+ paxwarn(0,"Archive read error limit (%d) reached",maxflt);
+ else if (ar_rdsync() == 0)
+ continue;
+ if (ar_next() < 0)
+ break;
+ rdcnt = 0;
+ errcnt = 0;
+ }
+ return(-1);
+}
+
+/*
+ * pback()
+ * push the data used during the archive id phase back into the I/O
+ * buffer. This is required as we cannot be sure that the header does NOT
+ * overlap a block boundary (as in the case we are trying to recover a
+ * flawed archived). This was not designed to be used for any other
+ * purpose. (What software engineering, HA!)
+ * WARNING: do not even THINK of pback greater than BLKMULT, unless the
+ * pback space is increased.
+ */
+
+void
+pback(char *pt, int cnt)
+{
+ bufpt -= cnt;
+ memcpy(bufpt, pt, cnt);
+}
+
+/*
+ * rd_skip()
+ * skip forward in the archive during a archive read. Used to get quickly
+ * past file data and padding for files the user did NOT select.
+ * Return:
+ * 0 if ok, -1 failure, and 1 when EOF on the archive volume was detected.
+ */
+
+int
+rd_skip(off_t skcnt)
+{
+ off_t res;
+ off_t cnt;
+ off_t skipped = 0;
+
+ /*
+ * consume what data we have in the buffer. If we have to move forward
+ * whole records, we call the low level skip function to see if we can
+ * move within the archive without doing the expensive reads on data we
+ * do not want.
+ */
+ if (skcnt == 0)
+ return(0);
+ res = MINIMUM((bufend - bufpt), skcnt);
+ bufpt += res;
+ skcnt -= res;
+
+ /*
+ * if skcnt is now 0, then no additional i/o is needed
+ */
+ if (skcnt == 0)
+ return(0);
+
+ /*
+ * We have to read more, calculate complete and partial record reads
+ * based on rdblksz. we skip over "cnt" complete records
+ */
+ res = skcnt%rdblksz;
+ cnt = (skcnt/rdblksz) * rdblksz;
+
+ /*
+ * if the skip fails, we will have to resync. ar_fow will tell us
+ * how much it can skip over. We will have to read the rest.
+ */
+ if (ar_fow(cnt, &skipped) < 0)
+ return(-1);
+ res += cnt - skipped;
+ rdcnt += skipped;
+
+ /*
+ * what is left we have to read (which may be the whole thing if
+ * ar_fow() told us the device can only read to skip records);
+ */
+ while (res > 0) {
+ cnt = bufend - bufpt;
+ /*
+ * if the read fails, we will have to resync
+ */
+ if ((cnt <= 0) && ((cnt = buf_fill()) < 0))
+ return(-1);
+ if (cnt == 0)
+ return(1);
+ cnt = MINIMUM(cnt, res);
+ bufpt += cnt;
+ res -= cnt;
+ }
+ return(0);
+}
+
+/*
+ * wr_fin()
+ * flush out any data (and pad if required) the last block. We always pad
+ * with zero (even though we do not have to). Padding with 0 makes it a
+ * lot easier to recover if the archive is damaged. zero padding SHOULD
+ * BE a requirement....
+ */
+
+void
+wr_fin(void)
+{
+ if (bufpt > buf) {
+ memset(bufpt, 0, bufend - bufpt);
+ bufpt = bufend;
+ (void)buf_flush(blksz);
+ }
+}
+
+/*
+ * wr_rdbuf()
+ * fill the write buffer from data passed to it in a buffer (usually used
+ * by format specific write routines to pass a file header). On failure we
+ * punt. We do not allow the user to continue to write flawed archives.
+ * We assume these headers are not very large (the memory copy we use is
+ * a bit expensive).
+ * Return:
+ * 0 if buffer was filled ok, -1 o.w. (buffer flush failure)
+ */
+
+int
+wr_rdbuf(char *out, int outcnt)
+{
+ int cnt;
+
+ /*
+ * while there is data to copy copy into the write buffer. when the
+ * write buffer fills, flush it to the archive and continue
+ */
+ while (outcnt > 0) {
+ cnt = bufend - bufpt;
+ if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0))
+ return(-1);
+ /*
+ * only move what we have space for
+ */
+ cnt = MINIMUM(cnt, outcnt);
+ memcpy(bufpt, out, cnt);
+ bufpt += cnt;
+ out += cnt;
+ outcnt -= cnt;
+ }
+ return(0);
+}
+
+/*
+ * rd_wrbuf()
+ * copy from the read buffer into a supplied buffer a specified number of
+ * bytes. If the read buffer is empty fill it and continue to copy.
+ * usually used to obtain a file header for processing by a format
+ * specific read routine.
+ * Return
+ * number of bytes copied to the buffer, 0 indicates EOF on archive volume,
+ * -1 is a read error
+ */
+
+int
+rd_wrbuf(char *in, int cpcnt)
+{
+ int res;
+ int cnt;
+ int incnt = cpcnt;
+
+ /*
+ * loop until we fill the buffer with the requested number of bytes
+ */
+ while (incnt > 0) {
+ cnt = bufend - bufpt;
+ if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) {
+ /*
+ * read error, return what we got (or the error if
+ * no data was copied). The caller must know that an
+ * error occurred and has the best knowledge what to
+ * do with it
+ */
+ if ((res = cpcnt - incnt) > 0)
+ return(res);
+ return(cnt);
+ }
+
+ /*
+ * calculate how much data to copy based on whats left and
+ * state of buffer
+ */
+ cnt = MINIMUM(cnt, incnt);
+ memcpy(in, bufpt, cnt);
+ bufpt += cnt;
+ incnt -= cnt;
+ in += cnt;
+ }
+ return(cpcnt);
+}
+
+/*
+ * wr_skip()
+ * skip forward during a write. In other words add padding to the file.
+ * we add zero filled padding as it makes flawed archives much easier to
+ * recover from. the caller tells us how many bytes of padding to add
+ * This routine was not designed to add HUGE amount of padding, just small
+ * amounts (a few 512 byte blocks at most)
+ * Return:
+ * 0 if ok, -1 if there was a buf_flush failure
+ */
+
+int
+wr_skip(off_t skcnt)
+{
+ int cnt;
+
+ /*
+ * loop while there is more padding to add
+ */
+ while (skcnt > 0) {
+ cnt = bufend - bufpt;
+ if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0))
+ return(-1);
+ cnt = MINIMUM(cnt, skcnt);
+ memset(bufpt, 0, cnt);
+ bufpt += cnt;
+ skcnt -= cnt;
+ }
+ return(0);
+}
+
+/*
+ * wr_rdfile()
+ * fill write buffer with the contents of a file. We are passed an open
+ * file descriptor to the file an the archive structure that describes the
+ * file we are storing. The variable "left" is modified to contain the
+ * number of bytes of the file we were NOT able to write to the archive.
+ * it is important that we always write EXACTLY the number of bytes that
+ * the format specific write routine told us to. The file can also get
+ * bigger, so reading to the end of file would create an improper archive,
+ * we just detect this case and warn the user. We never create a bad
+ * archive if we can avoid it. Of course trying to archive files that are
+ * active is asking for trouble. It we fail, we pass back how much we
+ * could NOT copy and let the caller deal with it.
+ * Return:
+ * 0 ok, -1 if archive write failure. a short read of the file returns a
+ * 0, but "left" is set to be greater than zero.
+ */
+
+int
+wr_rdfile(ARCHD *arcn, int ifd, off_t *left)
+{
+ int cnt;
+ int res = 0;
+ off_t size = arcn->sb.st_size;
+ struct stat sb;
+
+ /*
+ * while there are more bytes to write
+ */
+ while (size > 0) {
+ cnt = bufend - bufpt;
+ if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) {
+ *left = size;
+ return(-1);
+ }
+ cnt = MINIMUM(cnt, size);
+ if ((res = read(ifd, bufpt, cnt)) <= 0)
+ break;
+ size -= res;
+ bufpt += res;
+ }
+
+ /*
+ * better check the file did not change during this operation
+ * or the file read failed.
+ */
+ if (res < 0)
+ syswarn(1, errno, "Read fault on %s", arcn->org_name);
+ else if (size != 0)
+ paxwarn(1, "File changed size during read %s", arcn->org_name);
+ else if (fstat(ifd, &sb) == -1)
+ syswarn(1, errno, "Failed stat on %s", arcn->org_name);
+ else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=))
+ paxwarn(1, "File %s was modified during copy to archive",
+ arcn->org_name);
+ *left = size;
+ return(0);
+}
+
+/*
+ * rd_wrfile()
+ * extract the contents of a file from the archive. If we are unable to
+ * extract the entire file (due to failure to write the file) we return
+ * the numbers of bytes we did NOT process. This way the caller knows how
+ * many bytes to skip past to find the next archive header. If the failure
+ * was due to an archive read, we will catch that when we try to skip. If
+ * the format supplies a file data crc value, we calculate the actual crc
+ * so that it can be compared to the value stored in the header
+ * NOTE:
+ * We call a special function to write the file. This function attempts to
+ * restore file holes (blocks of zeros) into the file. When files are
+ * sparse this saves space, and is a LOT faster. For non sparse files
+ * the performance hit is small. As of this writing, no archive supports
+ * information on where the file holes are.
+ * Return:
+ * 0 ok, -1 if archive read failure. if we cannot write the entire file,
+ * we return a 0 but "left" is set to be the amount unwritten
+ */
+
+int
+rd_wrfile(ARCHD *arcn, int ofd, off_t *left)
+{
+ int cnt = 0;
+ off_t size = arcn->sb.st_size;
+ int res = 0;
+ char *fnm = arcn->name;
+ int isem = 1;
+ int rem;
+ int sz = MINFBSZ;
+ struct stat sb;
+ u_int32_t crc = 0;
+
+ /*
+ * pass the blocksize of the file being written to the write routine,
+ * if the size is zero, use the default MINFBSZ
+ */
+ if (fstat(ofd, &sb) == 0) {
+ if (sb.st_blksize > 0)
+ sz = (int)sb.st_blksize;
+ } else
+ syswarn(0,errno,"Unable to obtain block size for file %s",fnm);
+ rem = sz;
+ *left = 0;
+
+ /*
+ * Copy the archive to the file the number of bytes specified. We have
+ * to assume that we want to recover file holes as none of the archive
+ * formats can record the location of file holes.
+ */
+ while (size > 0) {
+ cnt = bufend - bufpt;
+ /*
+ * if we get a read error, we do not want to skip, as we may
+ * miss a header, so we do not set left, but if we get a write
+ * error, we do want to skip over the unprocessed data.
+ */
+ if ((cnt <= 0) && ((cnt = buf_fill()) <= 0))
+ break;
+ cnt = MINIMUM(cnt, size);
+ if ((res = file_write(ofd,bufpt,cnt,&rem,&isem,sz,fnm)) <= 0) {
+ *left = size;
+ break;
+ }
+
+ if (docrc) {
+ /*
+ * update the actual crc value
+ */
+ cnt = res;
+ while (--cnt >= 0)
+ crc += *bufpt++ & 0xff;
+ } else
+ bufpt += res;
+ size -= res;
+ }
+
+ /*
+ * if the last block has a file hole (all zero), we must make sure this
+ * gets updated in the file. We force the last block of zeros to be
+ * written. just closing with the file offset moved forward may not put
+ * a hole at the end of the file.
+ */
+ if (isem && (arcn->sb.st_size > 0))
+ file_flush(ofd, fnm, isem);
+
+ /*
+ * if we failed from archive read, we do not want to skip
+ */
+ if ((size > 0) && (*left == 0))
+ return(-1);
+
+ /*
+ * some formats record a crc on file data. If so, then we compare the
+ * calculated crc to the crc stored in the archive
+ */
+ if (docrc && (size == 0) && (arcn->crc != crc))
+ paxwarn(1,"Actual crc does not match expected crc %s",arcn->name);
+ return(0);
+}
+
+/*
+ * cp_file()
+ * copy the contents of one file to another. used during -rw phase of pax
+ * just as in rd_wrfile() we use a special write function to write the
+ * destination file so we can properly copy files with holes.
+ */
+
+void
+cp_file(ARCHD *arcn, int fd1, int fd2)
+{
+ int cnt;
+ off_t cpcnt = 0;
+ int res = 0;
+ char *fnm = arcn->name;
+ int no_hole = 0;
+ int isem = 1;
+ int rem;
+ int sz = MINFBSZ;
+ struct stat sb;
+
+ /*
+ * check for holes in the source file. If none, we will use regular
+ * write instead of file write.
+ */
+ if (((off_t)(arcn->sb.st_blocks * BLKMULT)) >= arcn->sb.st_size)
+ ++no_hole;
+
+ /*
+ * pass the blocksize of the file being written to the write routine,
+ * if the size is zero, use the default MINFBSZ
+ */
+ if (fstat(fd2, &sb) == 0) {
+ if (sb.st_blksize > 0)
+ sz = sb.st_blksize;
+ } else
+ syswarn(0,errno,"Unable to obtain block size for file %s",fnm);
+ rem = sz;
+
+ /*
+ * read the source file and copy to destination file until EOF
+ */
+ for (;;) {
+ if ((cnt = read(fd1, buf, blksz)) <= 0)
+ break;
+ if (no_hole)
+ res = write(fd2, buf, cnt);
+ else
+ res = file_write(fd2, buf, cnt, &rem, &isem, sz, fnm);
+ if (res != cnt)
+ break;
+ cpcnt += cnt;
+ }
+
+ /*
+ * check to make sure the copy is valid.
+ */
+ if (res < 0)
+ syswarn(1, errno, "Failed write during copy of %s to %s",
+ arcn->org_name, arcn->name);
+ else if (cpcnt != arcn->sb.st_size)
+ paxwarn(1, "File %s changed size during copy to %s",
+ arcn->org_name, arcn->name);
+ else if (fstat(fd1, &sb) == -1)
+ syswarn(1, errno, "Failed stat of %s", arcn->org_name);
+ else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=))
+ paxwarn(1, "File %s was modified during copy to %s",
+ arcn->org_name, arcn->name);
+
+ /*
+ * if the last block has a file hole (all zero), we must make sure this
+ * gets updated in the file. We force the last block of zeros to be
+ * written. just closing with the file offset moved forward may not put
+ * a hole at the end of the file.
+ */
+ if (!no_hole && isem && (arcn->sb.st_size > 0))
+ file_flush(fd2, fnm, isem);
+}
+
+/*
+ * buf_fill()
+ * fill the read buffer with the next record (or what we can get) from
+ * the archive volume.
+ * Return:
+ * Number of bytes of data in the read buffer, -1 for read error, and
+ * 0 when finished (user specified termination in ar_next()).
+ */
+
+int
+buf_fill(void)
+{
+ int cnt;
+ static int fini = 0;
+
+ if (fini)
+ return(0);
+
+ for (;;) {
+ /*
+ * try to fill the buffer. on error the next archive volume is
+ * opened and we try again.
+ */
+ if ((cnt = ar_read(buf, blksz)) > 0) {
+ bufpt = buf;
+ bufend = buf + cnt;
+ rdcnt += cnt;
+ return(cnt);
+ }
+
+ /*
+ * errors require resync, EOF goes to next archive
+ * but in case we have not determined yet the format,
+ * this means that we have a very short file, so we
+ * are done again.
+ */
+ if (cnt < 0)
+ break;
+ if (frmt == NULL || ar_next() < 0) {
+ fini = 1;
+ return(0);
+ }
+ rdcnt = 0;
+ }
+ exit_val = 1;
+ return(-1);
+}
+
+/*
+ * buf_flush()
+ * force the write buffer to the archive. We are passed the number of
+ * bytes in the buffer at the point of the flush. When we change archives
+ * the record size might change. (either larger or smaller).
+ * Return:
+ * 0 if all is ok, -1 when a write error occurs.
+ */
+
+int
+buf_flush(int bufcnt)
+{
+ int cnt;
+ int push = 0;
+ int totcnt = 0;
+
+ /*
+ * if we have reached the user specified byte count for each archive
+ * volume, prompt for the next volume. (The non-standard -R flag).
+ * NOTE: If the wrlimit is smaller than wrcnt, we will always write
+ * at least one record. We always round limit UP to next blocksize.
+ */
+ if ((wrlimit > 0) && (wrcnt > wrlimit)) {
+ paxwarn(0, "User specified archive volume byte limit reached.");
+ if (ar_next() < 0) {
+ wrcnt = 0;
+ exit_val = 1;
+ return(-1);
+ }
+ wrcnt = 0;
+
+ /*
+ * The new archive volume might have changed the size of the
+ * write blocksize. if so we figure out if we need to write
+ * (one or more times), or if there is now free space left in
+ * the buffer (it is no longer full). bufcnt has the number of
+ * bytes in the buffer, (the blocksize, at the point we were
+ * CALLED). Push has the amount of "extra" data in the buffer
+ * if the block size has shrunk from a volume change.
+ */
+ bufend = buf + blksz;
+ if (blksz > bufcnt)
+ return(0);
+ if (blksz < bufcnt)
+ push = bufcnt - blksz;
+ }
+
+ /*
+ * We have enough data to write at least one archive block
+ */
+ for (;;) {
+ /*
+ * write a block and check if it all went out ok
+ */
+ cnt = ar_write(buf, blksz);
+ if (cnt == blksz) {
+ /*
+ * the write went ok
+ */
+ wrcnt += cnt;
+ totcnt += cnt;
+ if (push > 0) {
+ /* we have extra data to push to the front.
+ * check for more than 1 block of push, and if
+ * so we loop back to write again
+ */
+ memcpy(buf, bufend, push);
+ bufpt = buf + push;
+ if (push >= blksz) {
+ push -= blksz;
+ continue;
+ }
+ } else
+ bufpt = buf;
+ return(totcnt);
+ } else if (cnt > 0) {
+ /*
+ * Oh drat we got a partial write!
+ * if format does not care about alignment let it go,
+ * we warned the user in ar_write().... but this means
+ * the last record on this volume violates pax spec....
+ */
+ totcnt += cnt;
+ wrcnt += cnt;
+ bufpt = buf + cnt;
+ cnt = bufcnt - cnt;
+ memcpy(buf, bufpt, cnt);
+ bufpt = buf + cnt;
+ if (!frmt->blkalgn || ((cnt % frmt->blkalgn) == 0))
+ return(totcnt);
+ break;
+ }
+
+ /*
+ * All done, go to next archive
+ */
+ wrcnt = 0;
+ if (ar_next() < 0)
+ break;
+
+ /*
+ * The new archive volume might also have changed the block
+ * size. if so, figure out if we have too much or too little
+ * data for using the new block size
+ */
+ bufend = buf + blksz;
+ if (blksz > bufcnt)
+ return(0);
+ if (blksz < bufcnt)
+ push = bufcnt - blksz;
+ }
+
+ /*
+ * write failed, stop pax. we must not create a bad archive!
+ */
+ exit_val = 1;
+ return(-1);
+}
diff --git a/bin/pax/cpio.1 b/bin/pax/cpio.1
new file mode 100644
index 0000000..89a6e36
--- /dev/null
+++ b/bin/pax/cpio.1
@@ -0,0 +1,309 @@
+.\" $OpenBSD: cpio.1,v 1.36 2020/01/16 16:46:46 schwarze Exp $
+.\"
+.\" Copyright (c) 1997 SigmaSoft, Th. Lockert
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd $Mdocdate: January 16 2020 $
+.Dt CPIO 1
+.Os
+.Sh NAME
+.Nm cpio
+.Nd copy file archives in and out
+.Sh SYNOPSIS
+.Nm cpio
+.Fl o
+.Op Fl AaBcjLvZz
+.Op Fl C Ar bytes
+.Op Fl F Ar archive
+.Op Fl H Ar format
+.Op Fl O Ar archive
+.No \*(Lt Ar name-list
+.Op \*(Gt Ar archive
+.Nm cpio
+.Fl i
+.Op Fl 6BbcdfjmrSstuvZz
+.Op Fl C Ar bytes
+.Op Fl E Ar file
+.Op Fl F Ar archive
+.Op Fl H Ar format
+.Op Fl I Ar archive
+.Op Ar pattern ...
+.Op \*(Lt Ar archive
+.Nm cpio
+.Fl p
+.Op Fl adLlmuv
+.Ar destination-directory
+.No \*(Lt Ar name-list
+.Sh DESCRIPTION
+The
+.Nm
+command copies files to and from a
+.Nm
+archive.
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl o
+Create an archive.
+Reads the list of files to store in the
+archive from standard input, and writes the archive on standard
+output.
+.Bl -tag -width Ds
+.It Fl A
+Append to the specified archive.
+.It Fl a
+Reset the access times on files that have been copied to the
+archive.
+.It Fl B
+Set block size of output to 5120 bytes.
+.It Fl C Ar bytes
+Set the block size of output to
+.Ar bytes .
+.It Fl c
+Use ASCII format for
+.Nm
+header for portability.
+.It Fl F Ar archive
+Use the specified file as the input for the archive.
+.It Fl H Ar format
+Write the archive in the specified format.
+Recognized formats are:
+.Pp
+.Bl -tag -width sv4cpio -compact
+.It Ar bcpio
+Old binary
+.Nm
+format.
+.It Ar cpio
+Old octal character
+.Nm
+format.
+.It Ar sv4cpio
+SVR4 hex
+.Nm
+format.
+.It Ar tar
+Old tar format.
+.It Ar ustar
+POSIX ustar format.
+.El
+.It Fl j
+Compress archive using the bzip2 format.
+The bzip2 utility must be installed separately.
+.It Fl L
+Follow symbolic links.
+.It Fl O Ar archive
+Use the specified file name as the archive to write to.
+.It Fl v
+Be verbose about operations.
+List filenames as they are written to the archive.
+.It Fl Z
+Compress archive using
+.Xr compress 1
+format.
+.It Fl z
+Compress archive using
+.Xr gzip 1
+format.
+.El
+.It Fl i
+Restore files from an archive.
+Reads the archive file from
+standard input and extracts files matching the
+.Ar patterns
+that were specified on the command line.
+.Bl -tag -width Ds
+.It Fl 6
+Process old-style
+.Nm
+format archives.
+.It Fl B
+Set the block size of the archive being read to 5120 bytes.
+.It Fl b
+Do byte and word swapping after reading in data from the
+archive, for restoring archives created on systems with
+a different byte order.
+.It Fl C Ar bytes
+Read archive written with a block size of
+.Ar bytes .
+.It Fl c
+Expect the archive headers to be in ASCII format.
+.It Fl d
+Create any intermediate directories as needed during
+restore.
+.It Fl E Ar file
+Read list of file name patterns to extract or list from
+.Ar file .
+.It Fl F Ar archive , Fl I Ar archive
+Use the specified file as the input for the archive.
+.It Fl f
+Restore all files except those matching the
+.Ar patterns
+given on the command line.
+.It Fl H Ar format
+Read an archive of the specified format.
+Recognized formats are:
+.Pp
+.Bl -tag -width sv4cpio -compact
+.It Ar bcpio
+Old binary
+.Nm
+format.
+.It Ar cpio
+Old octal character
+.Nm
+format.
+.It Ar sv4cpio
+SVR4 hex
+.Nm
+format.
+.It Ar tar
+Old tar format.
+.It Ar ustar
+POSIX ustar format.
+.El
+.It Fl j
+Uncompress archive using the bzip2 format.
+The bzip2 utility must be installed separately.
+.It Fl m
+Restore modification times on files.
+.It Fl r
+Rename restored files interactively.
+.It Fl S
+Swap words after reading data from the archive.
+.It Fl s
+Swap bytes after reading data from the archive.
+.It Fl t
+Only list the contents of the archive, no files or
+directories will be created.
+.It Fl u
+Overwrite files even when the file in the archive is
+older than the one that will be overwritten.
+.It Fl v
+Be verbose about operations.
+List filenames as they are copied in from the archive.
+.It Fl Z
+Uncompress archive using
+.Xr compress 1
+format.
+.It Fl z
+Uncompress archive using
+.Xr gzip 1
+format.
+.El
+.It Fl p
+Copy files from one location to another in a single pass.
+The list of files to copy are read from standard input and
+written out to a directory relative to the specified
+.Ar directory
+argument.
+.Bl -tag -width Ds
+.It Fl a
+Reset the access times on files that have been copied.
+.It Fl d
+Create any intermediate directories as needed to write
+the files at the new location.
+.It Fl L
+Follow symbolic links.
+.It Fl l
+When possible, link files rather than creating an
+extra copy.
+.It Fl m
+Restore modification times on files.
+.It Fl u
+Overwrite files even when the original file being copied is
+older than the one that will be overwritten.
+.It Fl v
+Be verbose about operations.
+List filenames as they are copied.
+.El
+.El
+.Sh ENVIRONMENT
+.Bl -tag -width Ds
+.It Ev TMPDIR
+Path in which to store temporary files.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It 0
+All files were processed successfully.
+.It 1
+An error occurred.
+.El
+.Sh DIAGNOSTICS
+Whenever
+.Nm
+cannot create a file or a link when extracting an archive or cannot
+find a file while writing an archive, or cannot preserve the user
+ID, group ID, file mode, or access and modification times when the
+.Fl p
+option is specified, a diagnostic message is written to standard
+error and a non-zero exit value will be returned, but processing
+will continue.
+In the case where
+.Nm
+cannot create a link to a file,
+.Nm
+will not create a second copy of the file.
+.Pp
+If the extraction of a file from an archive is prematurely terminated
+by a signal or error,
+.Nm
+may have only partially extracted the file the user wanted.
+Additionally, the file modes of extracted files and directories may
+have incorrect file bits, and the modification and access times may
+be wrong.
+.Pp
+If the creation of an archive is prematurely terminated by a signal
+or error,
+.Nm
+may have only partially created the archive, which may violate the
+specific archive format specification.
+.Sh SEE ALSO
+.Xr pax 1 ,
+.Xr tar 1
+.Sh AUTHORS
+.An Keith Muller
+at the University of California, San Diego.
+.Sh CAVEATS
+Different file formats have different maximum file sizes.
+It is recommended that a format such as cpio or ustar
+be used for larger files.
+.Bl -column "File format" "Maximum file size" -offset indent
+.It Sy "File format" Ta Sy "Maximum file size"
+.It bcpio Ta "4 Gigabytes"
+.It sv4cpio Ta "4 Gigabytes"
+.It cpio Ta "8 Gigabytes"
+.It tar Ta "8 Gigabytes"
+.It ustar Ta "8 Gigabytes"
+.El
+.Sh BUGS
+The
+.Fl s
+and
+.Fl S
+options are currently not implemented.
diff --git a/bin/pax/cpio.c b/bin/pax/cpio.c
new file mode 100644
index 0000000..769a9df
--- /dev/null
+++ b/bin/pax/cpio.c
@@ -0,0 +1,1106 @@
+/* $OpenBSD: cpio.c,v 1.33 2017/09/16 07:42:34 otto Exp $ */
+/* $NetBSD: cpio.c,v 1.5 1995/03/21 09:07:13 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#ifndef major
+#include <sys/sysmacros.h>
+#endif
+
+#include "pax.h"
+#include "cpio.h"
+#include "extern.h"
+
+static int rd_nm(ARCHD *, int);
+static int rd_ln_nm(ARCHD *);
+static int com_rd(ARCHD *);
+
+/*
+ * Routines which support the different cpio versions
+ */
+
+static int swp_head; /* binary cpio header byte swap */
+
+/*
+ * Routines common to all versions of cpio
+ */
+
+/*
+ * cpio_strd()
+ * Fire up the hard link detection code
+ * Return:
+ * 0 if ok -1 otherwise (the return values of lnk_start())
+ */
+
+int
+cpio_strd(void)
+{
+ return(lnk_start());
+}
+
+/*
+ * cpio_trail()
+ * Called to determine if a header block is a valid trailer. We are
+ * passed the block, the in_sync flag (which tells us we are in resync
+ * mode; looking for a valid header), and cnt (which starts at zero)
+ * which is used to count the number of empty blocks we have seen so far.
+ * Return:
+ * 0 if a valid trailer, -1 if not a valid trailer,
+ */
+
+int
+cpio_trail(ARCHD *arcn, char *notused, int notused2, int *notused3)
+{
+ /*
+ * look for trailer id in file we are about to process
+ */
+ if ((strcmp(arcn->name, TRAILER) == 0) && (arcn->sb.st_size == 0))
+ return(0);
+ return(-1);
+}
+
+/*
+ * com_rd()
+ * operations common to all cpio read functions.
+ * Return:
+ * 0
+ */
+
+static int
+com_rd(ARCHD *arcn)
+{
+ arcn->skip = 0;
+ arcn->pat = NULL;
+ arcn->org_name = arcn->name;
+ switch (arcn->sb.st_mode & C_IFMT) {
+ case C_ISFIFO:
+ arcn->type = PAX_FIF;
+ break;
+ case C_ISDIR:
+ arcn->type = PAX_DIR;
+ break;
+ case C_ISBLK:
+ arcn->type = PAX_BLK;
+ break;
+ case C_ISCHR:
+ arcn->type = PAX_CHR;
+ break;
+ case C_ISLNK:
+ arcn->type = PAX_SLK;
+ break;
+ case C_ISOCK:
+ arcn->type = PAX_SCK;
+ break;
+ case C_ISCTG:
+ case C_ISREG:
+ default:
+ /*
+ * we have file data, set up skip (pad is set in the format
+ * specific sections)
+ */
+ arcn->sb.st_mode = (arcn->sb.st_mode & 0xfff) | C_ISREG;
+ arcn->type = PAX_REG;
+ arcn->skip = arcn->sb.st_size;
+ break;
+ }
+ if (chk_lnk(arcn) < 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * cpio_endwr()
+ * write the special file with the name trailer in the proper format
+ * Return:
+ * result of the write of the trailer from the cpio specific write func
+ */
+
+int
+cpio_endwr(void)
+{
+ ARCHD last;
+
+ /*
+ * create a trailer request and call the proper format write function
+ */
+ memset(&last, 0, sizeof(last));
+ last.nlen = sizeof(TRAILER) - 1;
+ last.type = PAX_REG;
+ last.sb.st_nlink = 1;
+ (void)strlcpy(last.name, TRAILER, sizeof(last.name));
+ return((*frmt->wr)(&last));
+}
+
+/*
+ * rd_nm()
+ * read in the file name which follows the cpio header
+ * Return:
+ * 0 if ok, -1 otherwise
+ */
+
+static int
+rd_nm(ARCHD *arcn, int nsz)
+{
+ /*
+ * do not even try bogus values
+ */
+ if ((nsz == 0) || ((size_t)nsz > sizeof(arcn->name))) {
+ paxwarn(1, "Cpio file name length %d is out of range", nsz);
+ return(-1);
+ }
+
+ /*
+ * read the name and make sure it is not empty and is \0 terminated
+ */
+ if ((rd_wrbuf(arcn->name,nsz) != nsz) || (arcn->name[nsz-1] != '\0') ||
+ (arcn->name[0] == '\0')) {
+ paxwarn(1, "Cpio file name in header is corrupted");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * rd_ln_nm()
+ * read in the link name for a file with links. The link name is stored
+ * like file data (and is NOT \0 terminated!)
+ * Return:
+ * 0 if ok, -1 otherwise
+ */
+
+static int
+rd_ln_nm(ARCHD *arcn)
+{
+ /*
+ * check the length specified for bogus values
+ */
+ if ((arcn->sb.st_size <= 0) ||
+ (arcn->sb.st_size >= (off_t)sizeof(arcn->ln_name))) {
+ paxwarn(1, "Cpio link name length is invalid: %zu",
+ arcn->sb.st_size);
+ return(-1);
+ }
+
+ /*
+ * read in the link name and \0 terminate it
+ */
+ if (rd_wrbuf(arcn->ln_name, (int)arcn->sb.st_size) !=
+ (int)arcn->sb.st_size) {
+ paxwarn(1, "Cpio link name read error");
+ return(-1);
+ }
+ arcn->ln_nlen = arcn->sb.st_size;
+ arcn->ln_name[arcn->ln_nlen] = '\0';
+
+ /*
+ * watch out for those empty link names
+ */
+ if (arcn->ln_name[0] == '\0') {
+ paxwarn(1, "Cpio link name is corrupt");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * Routines common to the extended byte oriented cpio format
+ */
+
+/*
+ * cpio_id()
+ * determine if a block given to us is a valid extended byte oriented
+ * cpio header
+ * Return:
+ * 0 if a valid header, -1 otherwise
+ */
+
+int
+cpio_id(char *blk, int size)
+{
+ if ((size < (int)sizeof(HD_CPIO)) ||
+ (strncmp(blk, AMAGIC, sizeof(AMAGIC) - 1) != 0))
+ return(-1);
+ return(0);
+}
+
+/*
+ * cpio_rd()
+ * determine if a buffer is a byte oriented extended cpio archive entry.
+ * convert and store the values in the ARCHD parameter.
+ * Return:
+ * 0 if a valid header, -1 otherwise.
+ */
+
+int
+cpio_rd(ARCHD *arcn, char *buf)
+{
+ int nsz;
+ unsigned long long val;
+ HD_CPIO *hd;
+
+ /*
+ * check that this is a valid header, if not return -1
+ */
+ if (cpio_id(buf, sizeof(HD_CPIO)) < 0)
+ return(-1);
+ hd = (HD_CPIO *)buf;
+
+ /*
+ * byte oriented cpio (posix) does not have padding! extract the octal
+ * ascii fields from the header
+ */
+ arcn->pad = 0;
+ arcn->sb.st_dev = (dev_t)asc_ul(hd->c_dev, sizeof(hd->c_dev), OCT);
+ arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), OCT);
+ arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), OCT);
+ arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), OCT);
+ arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), OCT);
+ arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink),
+ OCT);
+ arcn->sb.st_rdev = (dev_t)asc_ul(hd->c_rdev, sizeof(hd->c_rdev), OCT);
+ val = asc_ull(hd->c_mtime, sizeof(hd->c_mtime), OCT);
+ if (val > MAX_TIME_T)
+ arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */
+ else
+ arcn->sb.st_mtime = val;
+ arcn->sb.st_mtim.tv_nsec = 0;
+ arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim;
+ arcn->sb.st_size = (off_t)asc_ull(hd->c_filesize,sizeof(hd->c_filesize),
+ OCT);
+
+ /*
+ * check name size and if valid, read in the name of this entry (name
+ * follows header in the archive)
+ */
+ if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),OCT)) < 2)
+ return(-1);
+ arcn->nlen = nsz - 1;
+ if (rd_nm(arcn, nsz) < 0)
+ return(-1);
+
+ if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) {
+ /*
+ * no link name to read for this file
+ */
+ arcn->ln_nlen = 0;
+ arcn->ln_name[0] = '\0';
+ return(com_rd(arcn));
+ }
+
+ /*
+ * check link name size and read in the link name. Link names are
+ * stored like file data.
+ */
+ if (rd_ln_nm(arcn) < 0)
+ return(-1);
+
+ /*
+ * we have a valid header (with a link)
+ */
+ return(com_rd(arcn));
+}
+
+/*
+ * cpio_endrd()
+ * no cleanup needed here, just return size of the trailer (for append)
+ * Return:
+ * size of trailer header in this format
+ */
+
+off_t
+cpio_endrd(void)
+{
+ return sizeof(HD_CPIO) + sizeof(TRAILER);
+}
+
+/*
+ * cpio_stwr()
+ * start up the device mapping table
+ * Return:
+ * 0 if ok, -1 otherwise (what dev_start() returns)
+ */
+
+int
+cpio_stwr(void)
+{
+ return(dev_start());
+}
+
+/*
+ * cpio_wr()
+ * copy the data in the ARCHD to buffer in extended byte oriented cpio
+ * format.
+ * Return
+ * 0 if file has data to be written after the header, 1 if file has NO
+ * data to write after the header, -1 if archive write failed
+ */
+
+int
+cpio_wr(ARCHD *arcn)
+{
+ HD_CPIO *hd;
+ int nsz;
+ char hdblk[sizeof(HD_CPIO)];
+
+ /*
+ * check and repair truncated device and inode fields in the header
+ */
+ if (map_dev(arcn, CPIO_MASK, CPIO_MASK) < 0)
+ return(-1);
+
+ arcn->pad = 0;
+ nsz = arcn->nlen + 1;
+ hd = (HD_CPIO *)hdblk;
+ if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR))
+ arcn->sb.st_rdev = 0;
+
+ switch (arcn->type) {
+ case PAX_CTG:
+ case PAX_REG:
+ case PAX_HRG:
+ /*
+ * set data size for file data
+ */
+ if (ull_asc(arcn->sb.st_size, hd->c_filesize,
+ sizeof(hd->c_filesize), OCT)) {
+ paxwarn(1,"File is too large for cpio format %s",
+ arcn->org_name);
+ return(1);
+ }
+ break;
+ case PAX_SLK:
+ /*
+ * set data size to hold link name
+ */
+ if (ul_asc(arcn->ln_nlen, hd->c_filesize,
+ sizeof(hd->c_filesize), OCT))
+ goto out;
+ break;
+ default:
+ /*
+ * all other file types have no file data
+ */
+ if (ul_asc(0, hd->c_filesize, sizeof(hd->c_filesize), OCT))
+ goto out;
+ break;
+ }
+
+ /*
+ * copy the values to the header using octal ascii
+ */
+ if (ul_asc(MAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) ||
+ ul_asc(arcn->sb.st_dev, hd->c_dev, sizeof(hd->c_dev), OCT) ||
+ ul_asc(arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), OCT) ||
+ ul_asc(arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), OCT) ||
+ ul_asc(arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), OCT) ||
+ ul_asc(arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), OCT) ||
+ ul_asc(arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), OCT) ||
+ ul_asc(arcn->sb.st_rdev, hd->c_rdev, sizeof(hd->c_rdev), OCT) ||
+ ull_asc(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->c_mtime,
+ sizeof(hd->c_mtime), OCT) ||
+ ul_asc(nsz, hd->c_namesize, sizeof(hd->c_namesize), OCT))
+ goto out;
+
+ /*
+ * write the file name to the archive
+ */
+ if ((wr_rdbuf(hdblk, (int)sizeof(HD_CPIO)) < 0) ||
+ (wr_rdbuf(arcn->name, nsz) < 0)) {
+ paxwarn(1, "Unable to write cpio header for %s", arcn->org_name);
+ return(-1);
+ }
+
+ /*
+ * if this file has data, we are done. The caller will write the file
+ * data, if we are link tell caller we are done, go to next file
+ */
+ if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG))
+ return(0);
+ if (arcn->type != PAX_SLK)
+ return(1);
+
+ /*
+ * write the link name to the archive, tell the caller to go to the
+ * next file as we are done.
+ */
+ if (wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) {
+ paxwarn(1,"Unable to write cpio link name for %s",arcn->org_name);
+ return(-1);
+ }
+ return(1);
+
+ out:
+ /*
+ * header field is out of range
+ */
+ paxwarn(1, "Cpio header field is too small to store file %s",
+ arcn->org_name);
+ return(1);
+}
+
+/*
+ * Routines common to the system VR4 version of cpio (with/without file CRC)
+ */
+
+/*
+ * vcpio_id()
+ * determine if a block given to us is a valid system VR4 cpio header
+ * WITHOUT crc. WATCH it the magic cookies are in OCTAL, the header
+ * uses HEX
+ * Return:
+ * 0 if a valid header, -1 otherwise
+ */
+
+int
+vcpio_id(char *blk, int size)
+{
+ if ((size < (int)sizeof(HD_VCPIO)) ||
+ (strncmp(blk, AVMAGIC, sizeof(AVMAGIC) - 1) != 0))
+ return(-1);
+ return(0);
+}
+
+/*
+ * crc_id()
+ * determine if a block given to us is a valid system VR4 cpio header
+ * WITH crc. WATCH it the magic cookies are in OCTAL the header uses HEX
+ * Return:
+ * 0 if a valid header, -1 otherwise
+ */
+
+int
+crc_id(char *blk, int size)
+{
+ if ((size < (int)sizeof(HD_VCPIO)) ||
+ (strncmp(blk, AVCMAGIC, sizeof(AVCMAGIC) - 1) != 0))
+ return(-1);
+ return(0);
+}
+
+/*
+ * crc_strd()
+ w set file data CRC calculations. Fire up the hard link detection code
+ * Return:
+ * 0 if ok -1 otherwise (the return values of lnk_start())
+ */
+
+int
+crc_strd(void)
+{
+ docrc = 1;
+ return(lnk_start());
+}
+
+/*
+ * vcpio_rd()
+ * determine if a buffer is a system VR4 archive entry. (with/without CRC)
+ * convert and store the values in the ARCHD parameter.
+ * Return:
+ * 0 if a valid header, -1 otherwise.
+ */
+
+int
+vcpio_rd(ARCHD *arcn, char *buf)
+{
+ HD_VCPIO *hd;
+ dev_t devminor;
+ dev_t devmajor;
+ int nsz;
+
+ /*
+ * during the id phase it was determined if we were using CRC, use the
+ * proper id routine.
+ */
+ if (docrc) {
+ if (crc_id(buf, sizeof(HD_VCPIO)) < 0)
+ return(-1);
+ } else {
+ if (vcpio_id(buf, sizeof(HD_VCPIO)) < 0)
+ return(-1);
+ }
+
+ hd = (HD_VCPIO *)buf;
+ arcn->pad = 0;
+
+ /*
+ * extract the hex ascii fields from the header
+ */
+ arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), HEX);
+ arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), HEX);
+ arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), HEX);
+ arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), HEX);
+ arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime,sizeof(hd->c_mtime),HEX);
+ arcn->sb.st_mtim.tv_nsec = 0;
+ arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim;
+ arcn->sb.st_size = (off_t)asc_ull(hd->c_filesize,
+ sizeof(hd->c_filesize), HEX);
+ arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink),
+ HEX);
+ devmajor = (dev_t)asc_ul(hd->c_maj, sizeof(hd->c_maj), HEX);
+ devminor = (dev_t)asc_ul(hd->c_min, sizeof(hd->c_min), HEX);
+ arcn->sb.st_dev = TODEV(devmajor, devminor);
+ devmajor = (dev_t)asc_ul(hd->c_rmaj, sizeof(hd->c_maj), HEX);
+ devminor = (dev_t)asc_ul(hd->c_rmin, sizeof(hd->c_min), HEX);
+ arcn->sb.st_rdev = TODEV(devmajor, devminor);
+ arcn->crc = asc_ul(hd->c_chksum, sizeof(hd->c_chksum), HEX);
+
+ /*
+ * check the length of the file name, if ok read it in, return -1 if
+ * bogus
+ */
+ if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),HEX)) < 2)
+ return(-1);
+ arcn->nlen = nsz - 1;
+ if (rd_nm(arcn, nsz) < 0)
+ return(-1);
+
+ /*
+ * skip padding. header + filename is aligned to 4 byte boundaries
+ */
+ if (rd_skip(VCPIO_PAD(sizeof(HD_VCPIO) + nsz)) < 0)
+ return(-1);
+
+ /*
+ * if not a link (or a file with no data), calculate pad size (for
+ * padding which follows the file data), clear the link name and return
+ */
+ if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) {
+ /*
+ * we have a valid header (not a link)
+ */
+ arcn->ln_nlen = 0;
+ arcn->ln_name[0] = '\0';
+ arcn->pad = VCPIO_PAD(arcn->sb.st_size);
+ return(com_rd(arcn));
+ }
+
+ /*
+ * read in the link name and skip over the padding
+ */
+ if ((rd_ln_nm(arcn) < 0) ||
+ (rd_skip(VCPIO_PAD(arcn->sb.st_size)) < 0))
+ return(-1);
+
+ /*
+ * we have a valid header (with a link)
+ */
+ return(com_rd(arcn));
+}
+
+/*
+ * vcpio_endrd()
+ * no cleanup needed here, just return size of the trailer (for append)
+ * Return:
+ * size of trailer header in this format
+ */
+
+off_t
+vcpio_endrd(void)
+{
+ return sizeof(HD_VCPIO) + sizeof(TRAILER) +
+ (VCPIO_PAD(sizeof(HD_VCPIO) + sizeof(TRAILER)));
+}
+
+/*
+ * crc_stwr()
+ * start up the device mapping table, enable crc file calculation
+ * Return:
+ * 0 if ok, -1 otherwise (what dev_start() returns)
+ */
+
+int
+crc_stwr(void)
+{
+ docrc = 1;
+ return(dev_start());
+}
+
+/*
+ * vcpio_wr()
+ * copy the data in the ARCHD to buffer in system VR4 cpio
+ * (with/without crc) format.
+ * Return
+ * 0 if file has data to be written after the header, 1 if file has
+ * NO data to write after the header, -1 if archive write failed
+ */
+
+int
+vcpio_wr(ARCHD *arcn)
+{
+ HD_VCPIO *hd;
+ unsigned int nsz;
+ char hdblk[sizeof(HD_VCPIO)];
+
+ /*
+ * check and repair truncated device and inode fields in the cpio
+ * header
+ */
+ if (map_dev(arcn, VCPIO_MASK, VCPIO_MASK) < 0)
+ return(-1);
+ nsz = arcn->nlen + 1;
+ hd = (HD_VCPIO *)hdblk;
+ if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR))
+ arcn->sb.st_rdev = 0;
+
+ /*
+ * add the proper magic value depending whether we were asked for
+ * file data crc's, and the crc if needed.
+ */
+ if (docrc) {
+ if (ul_asc(VCMAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) ||
+ ul_asc(arcn->crc,hd->c_chksum,sizeof(hd->c_chksum), HEX))
+ goto out;
+ } else {
+ if (ul_asc(VMAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) ||
+ ul_asc(0, hd->c_chksum, sizeof(hd->c_chksum),HEX))
+ goto out;
+ }
+
+ switch (arcn->type) {
+ case PAX_CTG:
+ case PAX_REG:
+ case PAX_HRG:
+ /*
+ * caller will copy file data to the archive. tell him how
+ * much to pad.
+ */
+ arcn->pad = VCPIO_PAD(arcn->sb.st_size);
+ if (ull_asc(arcn->sb.st_size, hd->c_filesize,
+ sizeof(hd->c_filesize), HEX)) {
+ paxwarn(1,"File is too large for sv4cpio format %s",
+ arcn->org_name);
+ return(1);
+ }
+ break;
+ case PAX_SLK:
+ /*
+ * no file data for the caller to process, the file data has
+ * the size of the link
+ */
+ arcn->pad = 0;
+ if (ul_asc(arcn->ln_nlen, hd->c_filesize,
+ sizeof(hd->c_filesize), HEX))
+ goto out;
+ break;
+ default:
+ /*
+ * no file data for the caller to process
+ */
+ arcn->pad = 0;
+ if (ul_asc(0, hd->c_filesize, sizeof(hd->c_filesize), HEX))
+ goto out;
+ break;
+ }
+
+ /*
+ * set the other fields in the header
+ */
+ if (ul_asc(arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), HEX) ||
+ ul_asc(arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), HEX) ||
+ ul_asc(arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), HEX) ||
+ ul_asc(arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), HEX) ||
+ ul_asc(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->c_mtime,
+ sizeof(hd->c_mtime), HEX) ||
+ ul_asc(arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), HEX) ||
+ ul_asc(MAJOR(arcn->sb.st_dev),hd->c_maj, sizeof(hd->c_maj), HEX) ||
+ ul_asc(MINOR(arcn->sb.st_dev),hd->c_min, sizeof(hd->c_min), HEX) ||
+ ul_asc(MAJOR(arcn->sb.st_rdev),hd->c_rmaj,sizeof(hd->c_maj), HEX) ||
+ ul_asc(MINOR(arcn->sb.st_rdev),hd->c_rmin,sizeof(hd->c_min), HEX) ||
+ ul_asc(nsz, hd->c_namesize, sizeof(hd->c_namesize), HEX))
+ goto out;
+
+ /*
+ * write the header, the file name and padding as required.
+ */
+ if ((wr_rdbuf(hdblk, (int)sizeof(HD_VCPIO)) < 0) ||
+ (wr_rdbuf(arcn->name, (int)nsz) < 0) ||
+ (wr_skip(VCPIO_PAD(sizeof(HD_VCPIO) + nsz)) < 0)) {
+ paxwarn(1,"Could not write sv4cpio header for %s",arcn->org_name);
+ return(-1);
+ }
+
+ /*
+ * if we have file data, tell the caller we are done, copy the file
+ */
+ if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG))
+ return(0);
+
+ /*
+ * if we are not a link, tell the caller we are done, go to next file
+ */
+ if (arcn->type != PAX_SLK)
+ return(1);
+
+ /*
+ * write the link name, tell the caller we are done.
+ */
+ if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) ||
+ (wr_skip(VCPIO_PAD(arcn->ln_nlen)) < 0)) {
+ paxwarn(1,"Could not write sv4cpio link name for %s",
+ arcn->org_name);
+ return(-1);
+ }
+ return(1);
+
+ out:
+ /*
+ * header field is out of range
+ */
+ paxwarn(1,"Sv4cpio header field is too small for file %s",arcn->org_name);
+ return(1);
+}
+
+/*
+ * Routines common to the old binary header cpio
+ */
+
+/*
+ * bcpio_id()
+ * determine if a block given to us is a old binary cpio header
+ * (with/without header byte swapping)
+ * Return:
+ * 0 if a valid header, -1 otherwise
+ */
+
+int
+bcpio_id(char *blk, int size)
+{
+ if (size < (int)sizeof(HD_BCPIO))
+ return(-1);
+
+ /*
+ * check both normal and byte swapped magic cookies
+ */
+ if (((u_short)SHRT_EXT(blk)) == MAGIC)
+ return(0);
+ if (((u_short)RSHRT_EXT(blk)) == MAGIC) {
+ if (!swp_head)
+ ++swp_head;
+ return(0);
+ }
+ return(-1);
+}
+
+/*
+ * bcpio_rd()
+ * determine if a buffer is a old binary archive entry. (it may have byte
+ * swapped header) convert and store the values in the ARCHD parameter.
+ * This is a very old header format and should not really be used.
+ * Return:
+ * 0 if a valid header, -1 otherwise.
+ */
+
+int
+bcpio_rd(ARCHD *arcn, char *buf)
+{
+ HD_BCPIO *hd;
+ int nsz;
+
+ /*
+ * check the header
+ */
+ if (bcpio_id(buf, sizeof(HD_BCPIO)) < 0)
+ return(-1);
+
+ arcn->pad = 0;
+ hd = (HD_BCPIO *)buf;
+ if (swp_head) {
+ /*
+ * header has swapped bytes on 16 bit boundaries
+ */
+ arcn->sb.st_dev = (dev_t)(RSHRT_EXT(hd->h_dev));
+ arcn->sb.st_ino = (ino_t)(RSHRT_EXT(hd->h_ino));
+ arcn->sb.st_mode = (mode_t)(RSHRT_EXT(hd->h_mode));
+ arcn->sb.st_uid = (uid_t)(RSHRT_EXT(hd->h_uid));
+ arcn->sb.st_gid = (gid_t)(RSHRT_EXT(hd->h_gid));
+ arcn->sb.st_nlink = (nlink_t)(RSHRT_EXT(hd->h_nlink));
+ arcn->sb.st_rdev = (dev_t)(RSHRT_EXT(hd->h_rdev));
+ arcn->sb.st_mtime = (time_t)(RSHRT_EXT(hd->h_mtime_1));
+ arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) |
+ ((time_t)(RSHRT_EXT(hd->h_mtime_2)));
+ arcn->sb.st_size = (off_t)(RSHRT_EXT(hd->h_filesize_1));
+ arcn->sb.st_size = (arcn->sb.st_size << 16) |
+ ((off_t)(RSHRT_EXT(hd->h_filesize_2)));
+ nsz = (int)(RSHRT_EXT(hd->h_namesize));
+ } else {
+ arcn->sb.st_dev = (dev_t)(SHRT_EXT(hd->h_dev));
+ arcn->sb.st_ino = (ino_t)(SHRT_EXT(hd->h_ino));
+ arcn->sb.st_mode = (mode_t)(SHRT_EXT(hd->h_mode));
+ arcn->sb.st_uid = (uid_t)(SHRT_EXT(hd->h_uid));
+ arcn->sb.st_gid = (gid_t)(SHRT_EXT(hd->h_gid));
+ arcn->sb.st_nlink = (nlink_t)(SHRT_EXT(hd->h_nlink));
+ arcn->sb.st_rdev = (dev_t)(SHRT_EXT(hd->h_rdev));
+ arcn->sb.st_mtime = (time_t)(SHRT_EXT(hd->h_mtime_1));
+ arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) |
+ ((time_t)(SHRT_EXT(hd->h_mtime_2)));
+ arcn->sb.st_size = (off_t)(SHRT_EXT(hd->h_filesize_1));
+ arcn->sb.st_size = (arcn->sb.st_size << 16) |
+ ((off_t)(SHRT_EXT(hd->h_filesize_2)));
+ nsz = (int)(SHRT_EXT(hd->h_namesize));
+ }
+ arcn->sb.st_mtim.tv_nsec = 0;
+ arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim;
+
+ /*
+ * check the file name size, if bogus give up. otherwise read the file
+ * name
+ */
+ if (nsz < 2)
+ return(-1);
+ arcn->nlen = nsz - 1;
+ if (rd_nm(arcn, nsz) < 0)
+ return(-1);
+
+ /*
+ * header + file name are aligned to 2 byte boundaries, skip if needed
+ */
+ if (rd_skip(BCPIO_PAD(sizeof(HD_BCPIO) + nsz)) < 0)
+ return(-1);
+
+ /*
+ * if not a link (or a file with no data), calculate pad size (for
+ * padding which follows the file data), clear the link name and return
+ */
+ if (((arcn->sb.st_mode & C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)){
+ /*
+ * we have a valid header (not a link)
+ */
+ arcn->ln_nlen = 0;
+ arcn->ln_name[0] = '\0';
+ arcn->pad = BCPIO_PAD(arcn->sb.st_size);
+ return(com_rd(arcn));
+ }
+
+ if ((rd_ln_nm(arcn) < 0) ||
+ (rd_skip(BCPIO_PAD(arcn->sb.st_size)) < 0))
+ return(-1);
+
+ /*
+ * we have a valid header (with a link)
+ */
+ return(com_rd(arcn));
+}
+
+/*
+ * bcpio_endrd()
+ * no cleanup needed here, just return size of the trailer (for append)
+ * Return:
+ * size of trailer header in this format
+ */
+
+off_t
+bcpio_endrd(void)
+{
+ return sizeof(HD_BCPIO) + sizeof(TRAILER) +
+ (BCPIO_PAD(sizeof(HD_BCPIO) + sizeof(TRAILER)));
+}
+
+/*
+ * bcpio_wr()
+ * copy the data in the ARCHD to buffer in old binary cpio format
+ * There is a real chance of field overflow with this critter. So we
+ * always check the conversion is ok. nobody in their right mind
+ * should write an archive in this format...
+ * Return
+ * 0 if file has data to be written after the header, 1 if file has NO
+ * data to write after the header, -1 if archive write failed
+ */
+
+int
+bcpio_wr(ARCHD *arcn)
+{
+ HD_BCPIO *hd;
+ int nsz;
+ char hdblk[sizeof(HD_BCPIO)];
+ off_t t_offt;
+ int t_int;
+ time_t t_timet;
+
+ /*
+ * check and repair truncated device and inode fields in the cpio
+ * header
+ */
+ if (map_dev(arcn, BCPIO_MASK, BCPIO_MASK) < 0)
+ return(-1);
+
+ if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR))
+ arcn->sb.st_rdev = 0;
+ hd = (HD_BCPIO *)hdblk;
+
+ switch (arcn->type) {
+ case PAX_CTG:
+ case PAX_REG:
+ case PAX_HRG:
+ /*
+ * caller will copy file data to the archive. tell him how
+ * much to pad.
+ */
+ arcn->pad = BCPIO_PAD(arcn->sb.st_size);
+ hd->h_filesize_1[0] = CHR_WR_0(arcn->sb.st_size);
+ hd->h_filesize_1[1] = CHR_WR_1(arcn->sb.st_size);
+ hd->h_filesize_2[0] = CHR_WR_2(arcn->sb.st_size);
+ hd->h_filesize_2[1] = CHR_WR_3(arcn->sb.st_size);
+ t_offt = (off_t)(SHRT_EXT(hd->h_filesize_1));
+ t_offt = (t_offt<<16) | ((off_t)(SHRT_EXT(hd->h_filesize_2)));
+ if (arcn->sb.st_size != t_offt) {
+ paxwarn(1,"File is too large for bcpio format %s",
+ arcn->org_name);
+ return(1);
+ }
+ break;
+ case PAX_SLK:
+ /*
+ * no file data for the caller to process, the file data has
+ * the size of the link
+ */
+ arcn->pad = 0;
+ hd->h_filesize_1[0] = CHR_WR_0(arcn->ln_nlen);
+ hd->h_filesize_1[1] = CHR_WR_1(arcn->ln_nlen);
+ hd->h_filesize_2[0] = CHR_WR_2(arcn->ln_nlen);
+ hd->h_filesize_2[1] = CHR_WR_3(arcn->ln_nlen);
+ t_int = (int)(SHRT_EXT(hd->h_filesize_1));
+ t_int = (t_int << 16) | ((int)(SHRT_EXT(hd->h_filesize_2)));
+ if (arcn->ln_nlen != t_int)
+ goto out;
+ break;
+ default:
+ /*
+ * no file data for the caller to process
+ */
+ arcn->pad = 0;
+ hd->h_filesize_1[0] = (char)0;
+ hd->h_filesize_1[1] = (char)0;
+ hd->h_filesize_2[0] = (char)0;
+ hd->h_filesize_2[1] = (char)0;
+ break;
+ }
+
+ /*
+ * build up the rest of the fields
+ */
+ hd->h_magic[0] = CHR_WR_2(MAGIC);
+ hd->h_magic[1] = CHR_WR_3(MAGIC);
+ hd->h_dev[0] = CHR_WR_2(arcn->sb.st_dev);
+ hd->h_dev[1] = CHR_WR_3(arcn->sb.st_dev);
+ if (arcn->sb.st_dev != (dev_t)(SHRT_EXT(hd->h_dev)))
+ goto out;
+ hd->h_ino[0] = CHR_WR_2(arcn->sb.st_ino);
+ hd->h_ino[1] = CHR_WR_3(arcn->sb.st_ino);
+ if (arcn->sb.st_ino != (ino_t)(SHRT_EXT(hd->h_ino)))
+ goto out;
+ hd->h_mode[0] = CHR_WR_2(arcn->sb.st_mode);
+ hd->h_mode[1] = CHR_WR_3(arcn->sb.st_mode);
+ if (arcn->sb.st_mode != (mode_t)(SHRT_EXT(hd->h_mode)))
+ goto out;
+ hd->h_uid[0] = CHR_WR_2(arcn->sb.st_uid);
+ hd->h_uid[1] = CHR_WR_3(arcn->sb.st_uid);
+ if (arcn->sb.st_uid != (uid_t)(SHRT_EXT(hd->h_uid)))
+ goto out;
+ hd->h_gid[0] = CHR_WR_2(arcn->sb.st_gid);
+ hd->h_gid[1] = CHR_WR_3(arcn->sb.st_gid);
+ if (arcn->sb.st_gid != (gid_t)(SHRT_EXT(hd->h_gid)))
+ goto out;
+ hd->h_nlink[0] = CHR_WR_2(arcn->sb.st_nlink);
+ hd->h_nlink[1] = CHR_WR_3(arcn->sb.st_nlink);
+ if (arcn->sb.st_nlink != (nlink_t)(SHRT_EXT(hd->h_nlink)))
+ goto out;
+ hd->h_rdev[0] = CHR_WR_2(arcn->sb.st_rdev);
+ hd->h_rdev[1] = CHR_WR_3(arcn->sb.st_rdev);
+ if (arcn->sb.st_rdev != (dev_t)(SHRT_EXT(hd->h_rdev)))
+ goto out;
+ if (arcn->sb.st_mtime > 0) {
+ hd->h_mtime_1[0] = CHR_WR_0(arcn->sb.st_mtime);
+ hd->h_mtime_1[1] = CHR_WR_1(arcn->sb.st_mtime);
+ hd->h_mtime_2[0] = CHR_WR_2(arcn->sb.st_mtime);
+ hd->h_mtime_2[1] = CHR_WR_3(arcn->sb.st_mtime);
+ t_timet = (time_t)SHRT_EXT(hd->h_mtime_1);
+ t_timet = t_timet << 16 | (time_t)SHRT_EXT(hd->h_mtime_2);
+ if (arcn->sb.st_mtime != t_timet)
+ goto out;
+ } else {
+ hd->h_mtime_1[0] = hd->h_mtime_1[1] = 0;
+ hd->h_mtime_2[0] = hd->h_mtime_2[1] = 0;
+ }
+ nsz = arcn->nlen + 1;
+ hd->h_namesize[0] = CHR_WR_2(nsz);
+ hd->h_namesize[1] = CHR_WR_3(nsz);
+ if (nsz != (int)(SHRT_EXT(hd->h_namesize)))
+ goto out;
+
+ /*
+ * write the header, the file name and padding as required.
+ */
+ if ((wr_rdbuf(hdblk, (int)sizeof(HD_BCPIO)) < 0) ||
+ (wr_rdbuf(arcn->name, nsz) < 0) ||
+ (wr_skip(BCPIO_PAD(sizeof(HD_BCPIO) + nsz)) < 0)) {
+ paxwarn(1, "Could not write bcpio header for %s", arcn->org_name);
+ return(-1);
+ }
+
+ /*
+ * if we have file data, tell the caller we are done
+ */
+ if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG))
+ return(0);
+
+ /*
+ * if we are not a link, tell the caller we are done, go to next file
+ */
+ if (arcn->type != PAX_SLK)
+ return(1);
+
+ /*
+ * write the link name, tell the caller we are done.
+ */
+ if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) ||
+ (wr_skip(BCPIO_PAD(arcn->ln_nlen)) < 0)) {
+ paxwarn(1,"Could not write bcpio link name for %s",arcn->org_name);
+ return(-1);
+ }
+ return(1);
+
+ out:
+ /*
+ * header field is out of range
+ */
+ paxwarn(1,"Bcpio header field is too small for file %s", arcn->org_name);
+ return(1);
+}
diff --git a/bin/pax/cpio.h b/bin/pax/cpio.h
new file mode 100644
index 0000000..dfbd03f
--- /dev/null
+++ b/bin/pax/cpio.h
@@ -0,0 +1,150 @@
+/* $OpenBSD: cpio.h,v 1.4 2003/06/02 23:32:08 millert Exp $ */
+/* $NetBSD: cpio.h,v 1.3 1995/03/21 09:07:15 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cpio.h 8.1 (Berkeley) 5/31/93
+ */
+
+/*
+ * Defines common to all versions of cpio
+ */
+#define TRAILER "TRAILER!!!" /* name in last archive record */
+
+/*
+ * Header encoding of the different file types
+ */
+#define C_ISDIR 040000 /* Directory */
+#define C_ISFIFO 010000 /* FIFO */
+#define C_ISREG 0100000 /* Regular file */
+#define C_ISBLK 060000 /* Block special file */
+#define C_ISCHR 020000 /* Character special file */
+#define C_ISCTG 0110000 /* Reserved for contiguous files */
+#define C_ISLNK 0120000 /* Reserved for symbolic links */
+#define C_ISOCK 0140000 /* Reserved for sockets */
+#define C_IFMT 0170000 /* type of file */
+
+/*
+ * Data Interchange Format - Extended cpio header format - POSIX 1003.1-1990
+ */
+typedef struct {
+ char c_magic[6]; /* magic cookie */
+ char c_dev[6]; /* device number */
+ char c_ino[6]; /* inode number */
+ char c_mode[6]; /* file type/access */
+ char c_uid[6]; /* owners uid */
+ char c_gid[6]; /* owners gid */
+ char c_nlink[6]; /* # of links at archive creation */
+ char c_rdev[6]; /* block/char major/minor # */
+ char c_mtime[11]; /* modification time */
+ char c_namesize[6]; /* length of pathname */
+ char c_filesize[11]; /* length of file in bytes */
+} HD_CPIO;
+
+#define MAGIC 070707 /* transportable archive id */
+
+#ifdef _PAX_
+#define AMAGIC "070707" /* ascii equivalent string of MAGIC */
+#define CPIO_MASK 0x3ffff /* bits valid in the dev/ino fields */
+ /* used for dev/inode remaps */
+#endif /* _PAX_ */
+
+/*
+ * Binary cpio header structure
+ *
+ * CAUTION! CAUTION! CAUTION!
+ * Each field really represents a 16 bit short (NOT ASCII). Described as
+ * an array of chars in an attempt to improve portability!!
+ */
+typedef struct {
+ u_char h_magic[2];
+ u_char h_dev[2];
+ u_char h_ino[2];
+ u_char h_mode[2];
+ u_char h_uid[2];
+ u_char h_gid[2];
+ u_char h_nlink[2];
+ u_char h_rdev[2];
+ u_char h_mtime_1[2];
+ u_char h_mtime_2[2];
+ u_char h_namesize[2];
+ u_char h_filesize_1[2];
+ u_char h_filesize_2[2];
+} HD_BCPIO;
+
+#ifdef _PAX_
+/*
+ * extraction and creation macros for binary cpio
+ */
+#define SHRT_EXT(ch) ((((unsigned)(ch)[0])<<8) | (((unsigned)(ch)[1])&0xff))
+#define RSHRT_EXT(ch) ((((unsigned)(ch)[1])<<8) | (((unsigned)(ch)[0])&0xff))
+#define CHR_WR_0(val) ((char)(((val) >> 24) & 0xff))
+#define CHR_WR_1(val) ((char)(((val) >> 16) & 0xff))
+#define CHR_WR_2(val) ((char)(((val) >> 8) & 0xff))
+#define CHR_WR_3(val) ((char)((val) & 0xff))
+
+/*
+ * binary cpio masks and pads
+ */
+#define BCPIO_PAD(x) ((2 - ((x) & 1)) & 1) /* pad to next 2 byte word */
+#define BCPIO_MASK 0xffff /* mask for dev/ino fields */
+#endif /* _PAX_ */
+
+/*
+ * System VR4 cpio header structure (with/without file data crc)
+ */
+typedef struct {
+ char c_magic[6]; /* magic cookie */
+ char c_ino[8]; /* inode number */
+ char c_mode[8]; /* file type/access */
+ char c_uid[8]; /* owners uid */
+ char c_gid[8]; /* owners gid */
+ char c_nlink[8]; /* # of links at archive creation */
+ char c_mtime[8]; /* modification time */
+ char c_filesize[8]; /* length of file in bytes */
+ char c_maj[8]; /* block/char major # */
+ char c_min[8]; /* block/char minor # */
+ char c_rmaj[8]; /* special file major # */
+ char c_rmin[8]; /* special file minor # */
+ char c_namesize[8]; /* length of pathname */
+ char c_chksum[8]; /* 0 OR CRC of bytes of FILE data */
+} HD_VCPIO;
+
+#define VMAGIC 070701 /* sVr4 new portable archive id */
+#define VCMAGIC 070702 /* sVr4 new portable archive id CRC */
+#ifdef _PAX_
+#define AVMAGIC "070701" /* ascii string of above */
+#define AVCMAGIC "070702" /* ascii string of above */
+#define VCPIO_PAD(x) ((4 - ((x) & 3)) & 3) /* pad to next 4 byte word */
+#define VCPIO_MASK 0xffffffff /* mask for dev/ino fields */
+#endif /* _PAX_ */
diff --git a/bin/pax/extern.h b/bin/pax/extern.h
new file mode 100644
index 0000000..67a21d8
--- /dev/null
+++ b/bin/pax/extern.h
@@ -0,0 +1,310 @@
+/* $OpenBSD: extern.h,v 1.60 2020/03/23 20:04:19 espie Exp $ */
+/* $NetBSD: extern.h,v 1.5 1996/03/26 23:54:16 mrg Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.2 (Berkeley) 4/18/94
+ */
+
+/*
+ * External references from each source file
+ */
+
+/*
+ * ar_io.c
+ */
+extern const char *arcname;
+extern const char *gzip_program;
+extern int force_one_volume;
+int ar_open(const char *);
+void ar_close(int _in_sig);
+void ar_drain(void);
+int ar_set_wr(void);
+int ar_app_ok(void);
+int ar_read(char *, int);
+int ar_write(char *, int);
+int ar_rdsync(void);
+int ar_fow(off_t, off_t *);
+int ar_rev(off_t );
+int ar_next(void);
+
+/*
+ * ar_subs.c
+ */
+extern u_long flcnt;
+void list(void);
+void extract(void);
+void append(void);
+void archive(void);
+void copy(void);
+
+/*
+ * buf_subs.c
+ */
+extern int blksz;
+extern int wrblksz;
+extern int maxflt;
+extern int rdblksz;
+extern off_t wrlimit;
+extern off_t rdcnt;
+extern off_t wrcnt;
+int wr_start(void);
+int rd_start(void);
+void cp_start(void);
+int appnd_start(off_t);
+int rd_sync(void);
+void pback(char *, int);
+int rd_skip(off_t);
+void wr_fin(void);
+int wr_rdbuf(char *, int);
+int rd_wrbuf(char *, int);
+int wr_skip(off_t);
+int wr_rdfile(ARCHD *, int, off_t *);
+int rd_wrfile(ARCHD *, int, off_t *);
+void cp_file(ARCHD *, int, int);
+int buf_fill(void);
+int buf_flush(int);
+
+/*
+ * cpio.c
+ */
+int cpio_strd(void);
+int cpio_trail(ARCHD *, char *, int, int *);
+int cpio_endwr(void);
+int cpio_id(char *, int);
+int cpio_rd(ARCHD *, char *);
+off_t cpio_endrd(void);
+int cpio_stwr(void);
+int cpio_wr(ARCHD *);
+int vcpio_id(char *, int);
+int crc_id(char *, int);
+int crc_strd(void);
+int vcpio_rd(ARCHD *, char *);
+off_t vcpio_endrd(void);
+int crc_stwr(void);
+int vcpio_wr(ARCHD *);
+int bcpio_id(char *, int);
+int bcpio_rd(ARCHD *, char *);
+off_t bcpio_endrd(void);
+int bcpio_wr(ARCHD *);
+
+/*
+ * file_subs.c
+ */
+int file_creat(ARCHD *);
+void file_close(ARCHD *, int);
+int lnk_creat(ARCHD *);
+int cross_lnk(ARCHD *);
+int chk_same(ARCHD *);
+int node_creat(ARCHD *);
+int unlnk_exist(char *, int);
+int chk_path(char *, uid_t, gid_t, int);
+void set_ftime(const char *, const struct timespec *,
+ const struct timespec *, int);
+void fset_ftime(const char *, int, const struct timespec *,
+ const struct timespec *, int);
+int set_ids(char *, uid_t, gid_t);
+int fset_ids(char *, int, uid_t, gid_t);
+void set_pmode(char *, mode_t);
+void fset_pmode(char *, int, mode_t);
+int set_attr(const struct file_times *, int _force_times, mode_t, int _do_mode,
+ int _in_sig);
+int file_write(int, char *, int, int *, int *, int, char *);
+void file_flush(int, char *, int);
+void rdfile_close(ARCHD *, int *);
+int set_crc(ARCHD *, int);
+
+/*
+ * ftree.c
+ */
+int ftree_start(void);
+int ftree_add(char *, int);
+void ftree_sel(ARCHD *);
+void ftree_skipped_newer(ARCHD *);
+void ftree_chk(void);
+int next_file(ARCHD *);
+
+/*
+ * gen_subs.c
+ */
+void ls_list(ARCHD *, time_t, FILE *);
+void ls_tty(ARCHD *);
+void safe_print(const char *, FILE *);
+u_long asc_ul(char *, int, int);
+int ul_asc(u_long, char *, int, int);
+unsigned long long asc_ull(char *, int, int);
+int ull_asc(unsigned long long, char *, int, int);
+size_t fieldcpy(char *, size_t, const char *, size_t);
+
+/*
+ * getoldopt.c
+ */
+int getoldopt(int, char **, const char *);
+
+/*
+ * options.c
+ */
+extern FSUB fsub[];
+extern int ford[];
+void options(int, char **);
+OPLIST * opt_next(void);
+int opt_add(const char *);
+int bad_opt(void);
+extern char *chdname;
+
+/*
+ * pat_rep.c
+ */
+int rep_add(char *);
+int pat_add(char *, char *);
+void pat_chk(void);
+int pat_sel(ARCHD *);
+int pat_match(ARCHD *);
+int mod_name(ARCHD *);
+int set_dest(ARCHD *, char *, int);
+int has_dotdot(const char *);
+
+/*
+ * pax.c
+ */
+extern int act;
+extern FSUB *frmt;
+extern int cflag;
+extern int cwdfd;
+extern int dflag;
+extern int iflag;
+extern int kflag;
+extern int lflag;
+extern int nflag;
+extern int tflag;
+extern int uflag;
+extern int vflag;
+extern int Dflag;
+extern int Hflag;
+extern int Lflag;
+extern int Nflag;
+extern int Xflag;
+extern int Yflag;
+extern int Zflag;
+extern int zeroflag;
+extern int vfpart;
+extern int patime;
+extern int pmtime;
+extern int nodirs;
+extern int pmode;
+extern int pids;
+extern int rmleadslash;
+extern int exit_val;
+extern int docrc;
+extern char *dirptr;
+extern char *argv0;
+extern enum op_mode { OP_PAX, OP_TAR, OP_CPIO } op_mode;
+extern FILE *listf;
+extern int listfd;
+extern char *tempfile;
+extern char *tempbase;
+extern int havechd;
+
+void sig_cleanup(int);
+
+/*
+ * sel_subs.c
+ */
+int sel_chk(ARCHD *);
+int grp_add(char *);
+int usr_add(char *);
+int trng_add(char *);
+
+/*
+ * tables.c
+ */
+int lnk_start(void);
+int chk_lnk(ARCHD *);
+void purg_lnk(ARCHD *);
+void lnk_end(void);
+int ftime_start(void);
+int chk_ftime(ARCHD *);
+int sltab_start(void);
+int sltab_add_sym(const char *_path, const char *_value, mode_t _mode);
+int sltab_add_link(const char *, const struct stat *);
+void sltab_process(int _in_sig);
+int name_start(void);
+int add_name(char *, int, char *);
+void sub_name(char *, int *, int);
+#ifndef NOCPIO
+int dev_start(void);
+int add_dev(ARCHD *);
+int map_dev(ARCHD *, u_long, u_long);
+#else
+# define dev_start() 0
+# define add_dev(x) 0
+# define map_dev(x,y,z) 0
+#endif /* NOCPIO */
+int atdir_start(void);
+void atdir_end(void);
+void add_atdir(char *, dev_t, ino_t, const struct timespec *,
+ const struct timespec *);
+int do_atdir(const char *, dev_t, ino_t);
+int dir_start(void);
+void add_dir(char *, struct stat *, int);
+void delete_dir(dev_t, ino_t);
+void proc_dir(int _in_sig);
+u_int st_hash(const char *, int, int);
+
+/*
+ * tar.c
+ */
+extern int tar_nodir;
+extern char *gnu_name_string, *gnu_link_string;
+int tar_endwr(void);
+off_t tar_endrd(void);
+int tar_trail(ARCHD *, char *, int, int *);
+int tar_id(char *, int);
+int tar_opt(void);
+int tar_rd(ARCHD *, char *);
+int tar_wr(ARCHD *);
+int ustar_id(char *, int);
+int ustar_rd(ARCHD *, char *);
+int ustar_wr(ARCHD *);
+
+/*
+ * tty_subs.c
+ */
+int tty_init(void);
+void tty_prnt(const char *, ...)
+ __attribute__((nonnull(1), format(printf, 1, 2)));
+int tty_read(char *, int);
+void paxwarn(int, const char *, ...)
+ __attribute__((nonnull(2), format(printf, 2, 3)));
+void syswarn(int, int, const char *, ...)
+ __attribute__((nonnull(3), format(printf, 3, 4)));
diff --git a/bin/pax/file_subs.c b/bin/pax/file_subs.c
new file mode 100644
index 0000000..2c0994f
--- /dev/null
+++ b/bin/pax/file_subs.c
@@ -0,0 +1,1106 @@
+/* $OpenBSD: file_subs.c,v 1.55 2020/03/23 20:04:19 espie Exp $ */
+/* $NetBSD: file_subs.c,v 1.4 1995/03/21 09:07:18 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "pax.h"
+#include "extern.h"
+
+static int
+mk_link(char *, struct stat *, char *, int);
+
+/*
+ * routines that deal with file operations such as: creating, removing;
+ * and setting access modes, uid/gid and times of files
+ */
+
+/*
+ * file_creat()
+ * Create and open a file.
+ * Return:
+ * file descriptor or -1 for failure
+ */
+
+int
+file_creat(ARCHD *arcn)
+{
+ int fd = -1;
+ mode_t file_mode;
+ int oerrno;
+
+ /*
+ * Assume file doesn't exist, so just try to create it, most times this
+ * works. We have to take special handling when the file does exist. To
+ * detect this, we use O_EXCL. For example when trying to create a
+ * file and a character device or fifo exists with the same name, we
+ * can accidently open the device by mistake (or block waiting to open).
+ * If we find that the open has failed, then spend the effort to
+ * figure out why. This strategy was found to have better average
+ * performance in common use than checking the file (and the path)
+ * first with lstat.
+ */
+ file_mode = arcn->sb.st_mode & FILEBITS;
+ if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_EXCL,
+ file_mode)) >= 0)
+ return(fd);
+
+ /*
+ * the file seems to exist. First we try to get rid of it (found to be
+ * the second most common failure when traced). If this fails, only
+ * then we go to the expense to check and create the path to the file
+ */
+ if (unlnk_exist(arcn->name, arcn->type) != 0)
+ return(-1);
+
+ for (;;) {
+ /*
+ * try to open it again, if this fails, check all the nodes in
+ * the path and give it a final try. if chk_path() finds that
+ * it cannot fix anything, we will skip the last attempt
+ */
+ if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC,
+ file_mode)) >= 0)
+ break;
+ oerrno = errno;
+ if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid, 0) < 0) {
+ syswarn(1, oerrno, "Unable to create %s", arcn->name);
+ return(-1);
+ }
+ }
+ return(fd);
+}
+
+/*
+ * file_close()
+ * Close file descriptor to a file just created by pax. Sets modes,
+ * ownership and times as required.
+ * Return:
+ * 0 for success, -1 for failure
+ */
+
+void
+file_close(ARCHD *arcn, int fd)
+{
+ int res = 0;
+
+ if (fd < 0)
+ return;
+
+ /*
+ * set owner/groups first as this may strip off mode bits we want
+ * then set file permission modes. Then set file access and
+ * modification times.
+ */
+ if (pids)
+ res = fset_ids(arcn->name, fd, arcn->sb.st_uid,
+ arcn->sb.st_gid);
+
+ /*
+ * IMPORTANT SECURITY NOTE:
+ * if not preserving mode or we cannot set uid/gid, then PROHIBIT
+ * set uid/gid bits
+ */
+ if (!pmode || res)
+ arcn->sb.st_mode &= ~(SETBITS);
+ if (pmode)
+ fset_pmode(arcn->name, fd, arcn->sb.st_mode);
+ if (patime || pmtime)
+ fset_ftime(arcn->name, fd, &arcn->sb.st_mtim,
+ &arcn->sb.st_atim, 0);
+ if (close(fd) == -1)
+ syswarn(0, errno, "Unable to close file descriptor on %s",
+ arcn->name);
+}
+
+/*
+ * lnk_creat()
+ * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name
+ * must exist;
+ * Return:
+ * 0 if ok, -1 otherwise
+ */
+
+int
+lnk_creat(ARCHD *arcn)
+{
+ struct stat sb;
+ int res;
+
+ /*
+ * we may be running as root, so we have to be sure that link target
+ * is not a directory, so we lstat and check
+ */
+ if (lstat(arcn->ln_name, &sb) == -1) {
+ syswarn(1,errno,"Unable to link to %s from %s", arcn->ln_name,
+ arcn->name);
+ return(-1);
+ }
+
+ if (S_ISDIR(sb.st_mode)) {
+ paxwarn(1, "A hard link to the directory %s is not allowed",
+ arcn->ln_name);
+ return(-1);
+ }
+
+ res = mk_link(arcn->ln_name, &sb, arcn->name, 0);
+ if (res == 0) {
+ /* check for a hardlink to a placeholder symlink */
+ res = sltab_add_link(arcn->name, &sb);
+
+ if (res < 0) {
+ /* arrgh, it failed, clean up */
+ unlink(arcn->name);
+ }
+ }
+
+ return (res);
+}
+
+/*
+ * cross_lnk()
+ * Create a hard link to arcn->org_name from arcn->name. Only used in copy
+ * with the -l flag. No warning or error if this does not succeed (we will
+ * then just create the file)
+ * Return:
+ * 1 if copy() should try to create this file node
+ * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self).
+ */
+
+int
+cross_lnk(ARCHD *arcn)
+{
+ /*
+ * try to make a link to original file (-l flag in copy mode). make
+ * sure we do not try to link to directories in case we are running as
+ * root (and it might succeed).
+ */
+ if (arcn->type == PAX_DIR)
+ return(1);
+ return(mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1));
+}
+
+/*
+ * chk_same()
+ * In copy mode if we are not trying to make hard links between the src
+ * and destinations, make sure we are not going to overwrite ourselves by
+ * accident. This slows things down a little, but we have to protect all
+ * those people who make typing errors.
+ * Return:
+ * 1 the target does not exist, go ahead and copy
+ * 0 skip it file exists (-k) or may be the same as source file
+ */
+
+int
+chk_same(ARCHD *arcn)
+{
+ struct stat sb;
+
+ /*
+ * if file does not exist, return. if file exists and -k, skip it
+ * quietly
+ */
+ if (lstat(arcn->name, &sb) == -1)
+ return(1);
+ if (kflag)
+ return(0);
+
+ /*
+ * better make sure the user does not have src == dest by mistake
+ */
+ if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) {
+ paxwarn(1, "Unable to copy %s, file would overwrite itself",
+ arcn->name);
+ return(0);
+ }
+ return(1);
+}
+
+/*
+ * mk_link()
+ * try to make a hard link between two files. if ign set, we do not
+ * complain.
+ * Return:
+ * 0 if successful (or we are done with this file but no error, such as
+ * finding the from file exists and the user has set -k).
+ * 1 when ign was set to indicates we could not make the link but we
+ * should try to copy/extract the file as that might work (and is an
+ * allowed option). -1 an error occurred.
+ */
+
+static int
+mk_link(char *to, struct stat *to_sb, char *from, int ign)
+{
+ struct stat sb;
+ int oerrno;
+
+ /*
+ * if from file exists, it has to be unlinked to make the link. If the
+ * file exists and -k is set, skip it quietly
+ */
+ if (lstat(from, &sb) == 0) {
+ if (kflag)
+ return(0);
+
+ /*
+ * make sure it is not the same file, protect the user
+ */
+ if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) {
+ paxwarn(1, "Unable to link file %s to itself", to);
+ return(-1);
+ }
+
+ /*
+ * try to get rid of the file, based on the type
+ */
+ if (S_ISDIR(sb.st_mode)) {
+ if (rmdir(from) == -1) {
+ syswarn(1, errno, "Unable to remove %s", from);
+ return(-1);
+ }
+ delete_dir(sb.st_dev, sb.st_ino);
+ } else if (unlink(from) == -1) {
+ if (!ign) {
+ syswarn(1, errno, "Unable to remove %s", from);
+ return(-1);
+ }
+ return(1);
+ }
+ }
+
+ /*
+ * from file is gone (or did not exist), try to make the hard link.
+ * if it fails, check the path and try it again (if chk_path() says to
+ * try again)
+ */
+ for (;;) {
+ if (linkat(AT_FDCWD, to, AT_FDCWD, from, 0) == 0)
+ break;
+ oerrno = errno;
+ if (!nodirs && chk_path(from, to_sb->st_uid, to_sb->st_gid, ign) == 0)
+ continue;
+ if (!ign) {
+ syswarn(1, oerrno, "Could not link to %s from %s", to,
+ from);
+ return(-1);
+ }
+ return(1);
+ }
+
+ /*
+ * all right the link was made
+ */
+ return(0);
+}
+
+/*
+ * node_creat()
+ * create an entry in the file system (other than a file or hard link).
+ * If successful, sets uid/gid modes and times as required.
+ * Return:
+ * 0 if ok, -1 otherwise
+ */
+
+int
+node_creat(ARCHD *arcn)
+{
+ int res;
+ int ign = 0;
+ int oerrno;
+ int pass = 0;
+ mode_t file_mode;
+ struct stat sb;
+ char target[PATH_MAX];
+ char *nm = arcn->name;
+ int len, defer_pmode = 0;
+
+ /*
+ * create node based on type, if that fails try to unlink the node and
+ * try again. finally check the path and try again. As noted in the
+ * file and link creation routines, this method seems to exhibit the
+ * best performance in general use workloads.
+ */
+ file_mode = arcn->sb.st_mode & FILEBITS;
+
+ for (;;) {
+ switch (arcn->type) {
+ case PAX_DIR:
+ /*
+ * If -h (or -L) was given in tar-mode, follow the
+ * potential symlink chain before trying to create the
+ * directory.
+ */
+ if (op_mode == OP_TAR && Lflag) {
+ while (lstat(nm, &sb) == 0 &&
+ S_ISLNK(sb.st_mode)) {
+ len = readlink(nm, target,
+ sizeof target - 1);
+ if (len == -1) {
+ syswarn(0, errno,
+ "cannot follow symlink %s in chain for %s",
+ nm, arcn->name);
+ res = -1;
+ goto badlink;
+ }
+ target[len] = '\0';
+ nm = target;
+ }
+ }
+ res = mkdir(nm, file_mode);
+
+badlink:
+ if (ign)
+ res = 0;
+ break;
+ case PAX_CHR:
+ file_mode |= S_IFCHR;
+ res = mknod(nm, file_mode, arcn->sb.st_rdev);
+ break;
+ case PAX_BLK:
+ file_mode |= S_IFBLK;
+ res = mknod(nm, file_mode, arcn->sb.st_rdev);
+ break;
+ case PAX_FIF:
+ res = mkfifo(nm, file_mode);
+ break;
+ case PAX_SCK:
+ /*
+ * Skip sockets, operation has no meaning under BSD
+ */
+ paxwarn(0,
+ "%s skipped. Sockets cannot be copied or extracted",
+ nm);
+ return(-1);
+ case PAX_SLK:
+ if (arcn->ln_name[0] != '/' &&
+ !has_dotdot(arcn->ln_name))
+ res = symlink(arcn->ln_name, nm);
+ else {
+ /*
+ * absolute symlinks and symlinks with ".."
+ * have to be deferred to prevent the archive
+ * from bootstrapping itself to outside the
+ * working directory.
+ */
+ res = sltab_add_sym(nm, arcn->ln_name,
+ arcn->sb.st_mode);
+ if (res == 0)
+ defer_pmode = 1;
+ }
+ break;
+ case PAX_CTG:
+ case PAX_HLK:
+ case PAX_HRG:
+ case PAX_REG:
+ default:
+ /*
+ * we should never get here
+ */
+ paxwarn(0, "%s has an unknown file type, skipping",
+ nm);
+ return(-1);
+ }
+
+ /*
+ * if we were able to create the node break out of the loop,
+ * otherwise try to unlink the node and try again. if that
+ * fails check the full path and try a final time.
+ */
+ if (res == 0)
+ break;
+
+ /*
+ * we failed to make the node
+ */
+ oerrno = errno;
+ if ((ign = unlnk_exist(nm, arcn->type)) < 0)
+ return(-1);
+
+ if (++pass <= 1)
+ continue;
+
+ if (nodirs || chk_path(nm,arcn->sb.st_uid,arcn->sb.st_gid, 0) < 0) {
+ syswarn(1, oerrno, "Could not create: %s", nm);
+ return(-1);
+ }
+ }
+
+ /*
+ * we were able to create the node. set uid/gid, modes and times
+ */
+ if (pids)
+ res = set_ids(nm, arcn->sb.st_uid, arcn->sb.st_gid);
+ else
+ res = 0;
+
+ /*
+ * IMPORTANT SECURITY NOTE:
+ * if not preserving mode or we cannot set uid/gid, then PROHIBIT any
+ * set uid/gid bits
+ */
+ if (!pmode || res)
+ arcn->sb.st_mode &= ~(SETBITS);
+ if (pmode && !defer_pmode)
+ set_pmode(nm, arcn->sb.st_mode);
+
+ if (arcn->type == PAX_DIR && op_mode != OP_CPIO) {
+ /*
+ * Dirs must be processed again at end of extract to set times
+ * and modes to agree with those stored in the archive. However
+ * to allow extract to continue, we may have to also set owner
+ * rights. This allows nodes in the archive that are children
+ * of this directory to be extracted without failure. Both time
+ * and modes will be fixed after the entire archive is read and
+ * before pax exits. To do that safely, we want the dev+ino
+ * of the directory we created.
+ */
+ if (lstat(nm, &sb) == -1) {
+ syswarn(0, errno,"Could not access %s (stat)", nm);
+ } else if (access(nm, R_OK | W_OK | X_OK) == -1) {
+ /*
+ * We have to add rights to the dir, so we make
+ * sure to restore the mode. The mode must be
+ * restored AS CREATED and not as stored if
+ * pmode is not set.
+ */
+ set_pmode(nm,
+ ((sb.st_mode & FILEBITS) | S_IRWXU));
+ if (!pmode)
+ arcn->sb.st_mode = sb.st_mode;
+
+ /*
+ * we have to force the mode to what was set
+ * here, since we changed it from the default
+ * as created.
+ */
+ arcn->sb.st_dev = sb.st_dev;
+ arcn->sb.st_ino = sb.st_ino;
+ add_dir(nm, &(arcn->sb), 1);
+ } else if (pmode || patime || pmtime) {
+ arcn->sb.st_dev = sb.st_dev;
+ arcn->sb.st_ino = sb.st_ino;
+ add_dir(nm, &(arcn->sb), 0);
+ }
+ } else if (patime || pmtime)
+ set_ftime(nm, &arcn->sb.st_mtim, &arcn->sb.st_atim, 0);
+ return(0);
+}
+
+/*
+ * unlnk_exist()
+ * Remove node from file system with the specified name. We pass the type
+ * of the node that is going to replace it. When we try to create a
+ * directory and find that it already exists, we allow processing to
+ * continue as proper modes etc will always be set for it later on.
+ * Return:
+ * 0 is ok to proceed, no file with the specified name exists
+ * -1 we were unable to remove the node, or we should not remove it (-k)
+ * 1 we found a directory and we were going to create a directory.
+ */
+
+int
+unlnk_exist(char *name, int type)
+{
+ struct stat sb;
+
+ /*
+ * the file does not exist, or -k we are done
+ */
+ if (lstat(name, &sb) == -1)
+ return(0);
+ if (kflag)
+ return(-1);
+
+ if (S_ISDIR(sb.st_mode)) {
+ /*
+ * try to remove a directory, if it fails and we were going to
+ * create a directory anyway, tell the caller (return a 1)
+ */
+ if (rmdir(name) == -1) {
+ if (type == PAX_DIR)
+ return(1);
+ syswarn(1,errno,"Unable to remove directory %s", name);
+ return(-1);
+ }
+ delete_dir(sb.st_dev, sb.st_ino);
+ return(0);
+ }
+
+ /*
+ * try to get rid of all non-directory type nodes
+ */
+ if (unlink(name) == -1) {
+ syswarn(1, errno, "Could not unlink %s", name);
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * chk_path()
+ * We were trying to create some kind of node in the file system and it
+ * failed. chk_path() makes sure the path up to the node exists and is
+ * writeable. When we have to create a directory that is missing along the
+ * path somewhere, the directory we create will be set to the same
+ * uid/gid as the file has (when uid and gid are being preserved).
+ * NOTE: this routine is a real performance loss. It is only used as a
+ * last resort when trying to create entries in the file system.
+ * Return:
+ * -1 when it could find nothing it is allowed to fix.
+ * 0 otherwise
+ */
+
+int
+chk_path(char *name, uid_t st_uid, gid_t st_gid, int ign)
+{
+ char *spt = name;
+ char *next;
+ struct stat sb;
+ int retval = -1;
+
+ /*
+ * watch out for paths with nodes stored directly in / (e.g. /bozo)
+ */
+ while (*spt == '/')
+ ++spt;
+
+ for (;;) {
+ /*
+ * work forward from the first / and check each part of the path
+ */
+ spt = strchr(spt, '/');
+ if (spt == NULL)
+ break;
+
+ /*
+ * skip over duplicate slashes; stop if there're only
+ * trailing slashes left
+ */
+ next = spt + 1;
+ while (*next == '/')
+ next++;
+ if (*next == '\0')
+ break;
+
+ *spt = '\0';
+
+ /*
+ * if it exists we assume it is a directory, it is not within
+ * the spec (at least it seems to read that way) to alter the
+ * file system for nodes NOT EXPLICITLY stored on the archive.
+ * If that assumption is changed, you would test the node here
+ * and figure out how to get rid of it (probably like some
+ * recursive unlink()) or fix up the directory permissions if
+ * required (do an access()).
+ */
+ if (lstat(name, &sb) == 0) {
+ *spt = '/';
+ spt = next;
+ continue;
+ }
+
+ /*
+ * the path fails at this point, see if we can create the
+ * needed directory and continue on
+ */
+ if (mkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) == -1) {
+ if (!ign)
+ syswarn(1, errno, "Unable to mkdir %s", name);
+ *spt = '/';
+ retval = -1;
+ break;
+ }
+
+ /*
+ * we were able to create the directory. We will tell the
+ * caller that we found something to fix, and it is ok to try
+ * and create the node again.
+ */
+ retval = 0;
+ if (pids)
+ (void)set_ids(name, st_uid, st_gid);
+
+ /*
+ * make sure the user doesn't have some strange umask that
+ * causes this newly created directory to be unusable. We fix
+ * the modes and restore them back to the creation default at
+ * the end of pax
+ */
+ if ((access(name, R_OK | W_OK | X_OK) == -1) &&
+ (lstat(name, &sb) == 0)) {
+ set_pmode(name, ((sb.st_mode & FILEBITS) | S_IRWXU));
+ add_dir(name, &sb, 1);
+ }
+ *spt = '/';
+ spt = next;
+ continue;
+ }
+ return(retval);
+}
+
+/*
+ * set_ftime()
+ * Set the access time and modification time for a named file. If frc
+ * is non-zero we force these times to be set even if the user did not
+ * request access and/or modification time preservation (this is also
+ * used by -t to reset access times).
+ * When ign is zero, only those times the user has asked for are set, the
+ * other ones are left alone.
+ */
+
+void
+set_ftime(const char *fnm, const struct timespec *mtimp,
+ const struct timespec *atimp, int frc)
+{
+ struct timespec tv[2];
+
+ tv[0] = *atimp;
+ tv[1] = *mtimp;
+
+ if (!frc) {
+ /*
+ * if we are not forcing, only set those times the user wants
+ * set.
+ */
+ if (!patime)
+ tv[0].tv_nsec = UTIME_OMIT;
+ if (!pmtime)
+ tv[1].tv_nsec = UTIME_OMIT;
+ }
+
+ /*
+ * set the times
+ */
+ if (utimensat(AT_FDCWD, fnm, tv, AT_SYMLINK_NOFOLLOW) < 0)
+ syswarn(1, errno, "Access/modification time set failed on: %s",
+ fnm);
+}
+
+void
+fset_ftime(const char *fnm, int fd, const struct timespec *mtimp,
+ const struct timespec *atimp, int frc)
+{
+ struct timespec tv[2];
+
+
+ tv[0] = *atimp;
+ tv[1] = *mtimp;
+
+ if (!frc) {
+ /*
+ * if we are not forcing, only set those times the user wants
+ * set.
+ */
+ if (!patime)
+ tv[0].tv_nsec = UTIME_OMIT;
+ if (!pmtime)
+ tv[1].tv_nsec = UTIME_OMIT;
+ }
+ /*
+ * set the times
+ */
+ if (futimens(fd, tv) == -1)
+ syswarn(1, errno, "Access/modification time set failed on: %s",
+ fnm);
+}
+
+/*
+ * set_ids()
+ * set the uid and gid of a file system node
+ * Return:
+ * 0 when set, -1 on failure
+ */
+
+int
+set_ids(char *fnm, uid_t uid, gid_t gid)
+{
+ if (fchownat(AT_FDCWD, fnm, uid, gid, AT_SYMLINK_NOFOLLOW) == -1) {
+ /*
+ * ignore EPERM unless in verbose mode or being run by root.
+ * if running as pax, POSIX requires a warning.
+ */
+ if (op_mode == OP_PAX || errno != EPERM || vflag ||
+ geteuid() == 0)
+ syswarn(1, errno, "Unable to set file uid/gid of %s",
+ fnm);
+ return(-1);
+ }
+ return(0);
+}
+
+int
+fset_ids(char *fnm, int fd, uid_t uid, gid_t gid)
+{
+ if (fchown(fd, uid, gid) == -1) {
+ /*
+ * ignore EPERM unless in verbose mode or being run by root.
+ * if running as pax, POSIX requires a warning.
+ */
+ if (op_mode == OP_PAX || errno != EPERM || vflag ||
+ geteuid() == 0)
+ syswarn(1, errno, "Unable to set file uid/gid of %s",
+ fnm);
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * set_pmode()
+ * Set file access mode
+ */
+
+void
+set_pmode(char *fnm, mode_t mode)
+{
+ mode &= ABITS;
+ if (fchmodat(AT_FDCWD, fnm, mode, AT_SYMLINK_NOFOLLOW) == -1 && errno != EOPNOTSUPP)
+ syswarn(1, errno, "Could not set permissions on %s", fnm);
+}
+
+void
+fset_pmode(char *fnm, int fd, mode_t mode)
+{
+ mode &= ABITS;
+ if (fchmod(fd, mode) == -1)
+ syswarn(1, errno, "Could not set permissions on %s", fnm);
+}
+
+/*
+ * set_attr()
+ * Given a DIRDATA, restore the mode and times as indicated, but
+ * only after verifying that it's the directory that we wanted.
+ */
+int
+set_attr(const struct file_times *ft, int force_times, mode_t mode,
+ int do_mode, int in_sig)
+{
+ struct stat sb;
+ int fd, r;
+
+ if (!do_mode && !force_times && !patime && !pmtime)
+ return (0);
+
+ /*
+ * We could legitimately go through a symlink here,
+ * so do *not* use O_NOFOLLOW. The dev+ino check will
+ * protect us from evil.
+ */
+ fd = open(ft->ft_name, O_RDONLY | O_DIRECTORY);
+ if (fd == -1) {
+ if (!in_sig)
+ syswarn(1, errno, "Unable to restore mode and times"
+ " for directory: %s", ft->ft_name);
+ return (-1);
+ }
+
+ if (fstat(fd, &sb) == -1) {
+ if (!in_sig)
+ syswarn(1, errno, "Unable to stat directory: %s",
+ ft->ft_name);
+ r = -1;
+ } else if (ft->ft_ino != sb.st_ino || ft->ft_dev != sb.st_dev) {
+ if (!in_sig)
+ paxwarn(1, "Directory vanished before restoring"
+ " mode and times: %s", ft->ft_name);
+ r = -1;
+ } else {
+ /* Whew, it's a match! Is there anything to change? */
+ if (do_mode && (mode & ABITS) != (sb.st_mode & ABITS))
+ fset_pmode(ft->ft_name, fd, mode);
+ if (((force_times || patime) &&
+ timespeccmp(&ft->ft_atim, &sb.st_atim, !=)) ||
+ ((force_times || pmtime) &&
+ timespeccmp(&ft->ft_mtim, &sb.st_mtim, !=)))
+ fset_ftime(ft->ft_name, fd, &ft->ft_mtim,
+ &ft->ft_atim, force_times);
+ r = 0;
+ }
+ close(fd);
+
+ return (r);
+}
+
+
+/*
+ * file_write()
+ * Write/copy a file (during copy or archive extract). This routine knows
+ * how to copy files with lseek holes in it. (Which are read as file
+ * blocks containing all 0's but do not have any file blocks associated
+ * with the data). Typical examples of these are files created by dbm
+ * variants (.pag files). While the file size of these files are huge, the
+ * actual storage is quite small (the files are sparse). The problem is
+ * the holes read as all zeros so are probably stored on the archive that
+ * way (there is no way to determine if the file block is really a hole,
+ * we only know that a file block of all zero's can be a hole).
+ * At this writing, no major archive format knows how to archive files
+ * with holes. However, on extraction (or during copy, -rw) we have to
+ * deal with these files. Without detecting the holes, the files can
+ * consume a lot of file space if just written to disk. This replacement
+ * for write when passed the basic allocation size of a file system block,
+ * uses lseek whenever it detects the input data is all 0 within that
+ * file block. In more detail, the strategy is as follows:
+ * While the input is all zero keep doing an lseek. Keep track of when we
+ * pass over file block boundaries. Only write when we hit a non zero
+ * input. once we have written a file block, we continue to write it to
+ * the end (we stop looking at the input). When we reach the start of the
+ * next file block, start checking for zero blocks again. Working on file
+ * block boundaries significantly reduces the overhead when copying files
+ * that are NOT very sparse. This overhead (when compared to a write) is
+ * almost below the measurement resolution on many systems. Without it,
+ * files with holes cannot be safely copied. It does has a side effect as
+ * it can put holes into files that did not have them before, but that is
+ * not a problem since the file contents are unchanged (in fact it saves
+ * file space). (Except on paging files for diskless clients. But since we
+ * cannot determine one of those file from here, we ignore them). If this
+ * ever ends up on a system where CTG files are supported and the holes
+ * are not desired, just do a conditional test in those routines that
+ * call file_write() and have it call write() instead. BEFORE CLOSING THE
+ * FILE, make sure to call file_flush() when the last write finishes with
+ * an empty block. A lot of file systems will not create an lseek hole at
+ * the end. In this case we drop a single 0 at the end to force the
+ * trailing 0's in the file.
+ * ---Parameters---
+ * rem: how many bytes left in this file system block
+ * isempt: have we written to the file block yet (is it empty)
+ * sz: basic file block allocation size
+ * cnt: number of bytes on this write
+ * str: buffer to write
+ * Return:
+ * number of bytes written, -1 on write (or lseek) error.
+ */
+
+int
+file_write(int fd, char *str, int cnt, int *rem, int *isempt, int sz,
+ char *name)
+{
+ char *pt;
+ char *end;
+ int wcnt;
+ char *st = str;
+
+ /*
+ * while we have data to process
+ */
+ while (cnt) {
+ if (!*rem) {
+ /*
+ * We are now at the start of file system block again
+ * (or what we think one is...). start looking for
+ * empty blocks again
+ */
+ *isempt = 1;
+ *rem = sz;
+ }
+
+ /*
+ * only examine up to the end of the current file block or
+ * remaining characters to write, whatever is smaller
+ */
+ wcnt = MINIMUM(cnt, *rem);
+ cnt -= wcnt;
+ *rem -= wcnt;
+ if (*isempt) {
+ /*
+ * have not written to this block yet, so we keep
+ * looking for zero's
+ */
+ pt = st;
+ end = st + wcnt;
+
+ /*
+ * look for a zero filled buffer
+ */
+ while ((pt < end) && (*pt == '\0'))
+ ++pt;
+
+ if (pt == end) {
+ /*
+ * skip, buf is empty so far
+ */
+ if (fd > -1 &&
+ lseek(fd, wcnt, SEEK_CUR) < 0) {
+ syswarn(1,errno,"File seek on %s",
+ name);
+ return(-1);
+ }
+ st = pt;
+ continue;
+ }
+ /*
+ * drat, the buf is not zero filled
+ */
+ *isempt = 0;
+ }
+
+ /*
+ * have non-zero data in this file system block, have to write
+ */
+ if (write(fd, st, wcnt) != wcnt) {
+ syswarn(1, errno, "Failed write to file %s", name);
+ return(-1);
+ }
+ st += wcnt;
+ }
+ return(st - str);
+}
+
+/*
+ * file_flush()
+ * when the last file block in a file is zero, many file systems will not
+ * let us create a hole at the end. To get the last block with zeros, we
+ * write the last BYTE with a zero (back up one byte and write a zero).
+ */
+
+void
+file_flush(int fd, char *fname, int isempt)
+{
+ static char blnk[] = "\0";
+
+ /*
+ * silly test, but make sure we are only called when the last block is
+ * filled with all zeros.
+ */
+ if (!isempt)
+ return;
+
+ /*
+ * move back one byte and write a zero
+ */
+ if (lseek(fd, -1, SEEK_CUR) < 0) {
+ syswarn(1, errno, "Failed seek on file %s", fname);
+ return;
+ }
+
+ if (write(fd, blnk, 1) == -1)
+ syswarn(1, errno, "Failed write to file %s", fname);
+}
+
+/*
+ * rdfile_close()
+ * close a file we have been reading (to copy or archive). If we have to
+ * reset access time (tflag) do so (the times are stored in arcn).
+ */
+
+void
+rdfile_close(ARCHD *arcn, int *fd)
+{
+ /*
+ * make sure the file is open
+ */
+ if (*fd < 0)
+ return;
+
+ /*
+ * user wants last access time reset
+ */
+ if (tflag)
+ fset_ftime(arcn->org_name, *fd, &arcn->sb.st_mtim,
+ &arcn->sb.st_atim, 1);
+
+ (void)close(*fd);
+ *fd = -1;
+}
+
+/*
+ * set_crc()
+ * read a file to calculate its crc. This is a real drag. Archive formats
+ * that have this, end up reading the file twice (we have to write the
+ * header WITH the crc before writing the file contents. Oh well...
+ * Return:
+ * 0 if was able to calculate the crc, -1 otherwise
+ */
+
+int
+set_crc(ARCHD *arcn, int fd)
+{
+ int i;
+ int res;
+ off_t cpcnt = 0;
+ size_t size;
+ u_int32_t crc = 0;
+ char tbuf[FILEBLK];
+ struct stat sb;
+
+ if (fd < 0) {
+ /*
+ * hmm, no fd, should never happen. well no crc then.
+ */
+ arcn->crc = 0;
+ return(0);
+ }
+
+ if ((size = arcn->sb.st_blksize) > sizeof(tbuf))
+ size = sizeof(tbuf);
+
+ /*
+ * read all the bytes we think that there are in the file. If the user
+ * is trying to archive an active file, forget this file.
+ */
+ for (;;) {
+ if ((res = read(fd, tbuf, size)) <= 0)
+ break;
+ cpcnt += res;
+ for (i = 0; i < res; ++i)
+ crc += (tbuf[i] & 0xff);
+ }
+
+ /*
+ * safety check. we want to avoid archiving files that are active as
+ * they can create inconsistent archive copies.
+ */
+ if (cpcnt != arcn->sb.st_size)
+ paxwarn(1, "File changed size %s", arcn->org_name);
+ else if (fstat(fd, &sb) == -1)
+ syswarn(1, errno, "Failed stat on %s", arcn->org_name);
+ else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=))
+ paxwarn(1, "File %s was modified during read", arcn->org_name);
+ else if (lseek(fd, 0, SEEK_SET) < 0)
+ syswarn(1, errno, "File rewind failed on: %s", arcn->org_name);
+ else {
+ arcn->crc = crc;
+ return(0);
+ }
+ return(-1);
+}
diff --git a/bin/pax/ftree.c b/bin/pax/ftree.c
new file mode 100644
index 0000000..b780dbb
--- /dev/null
+++ b/bin/pax/ftree.c
@@ -0,0 +1,566 @@
+/* $OpenBSD: ftree.c,v 1.42 2019/06/28 13:34:59 deraadt Exp $ */
+/* $NetBSD: ftree.c,v 1.4 1995/03/21 09:07:21 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fts.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * Data structure used to store the file args to be handed to fts().
+ * It keeps track of which args generated a "selected" member.
+ */
+typedef struct ftree {
+ char *fname; /* file tree name */
+ int refcnt; /* has tree had a selected file? */
+ int newercnt; /* skipped due to -u/-D */
+ int chflg; /* change directory flag */
+ struct ftree *fow; /* pointer to next entry on list */
+} FTREE;
+
+
+/*
+ * routines to interface with the fts library function.
+ *
+ * file args supplied to pax are stored on a single linked list (of type FTREE)
+ * and given to fts to be processed one at a time. pax "selects" files from
+ * the expansion of each arg into the corresponding file tree (if the arg is a
+ * directory, otherwise the node itself is just passed to pax). The selection
+ * is modified by the -n and -u flags. The user is informed when a specific
+ * file arg does not generate any selected files. -n keeps expanding the file
+ * tree arg until one of its files is selected, then skips to the next file
+ * arg. when the user does not supply the file trees as command line args to
+ * pax, they are read from stdin
+ */
+
+static FTS *ftsp = NULL; /* current FTS handle */
+static int ftsopts; /* options to be used on fts_open */
+static char *farray[2]; /* array for passing each arg to fts */
+static FTREE *fthead = NULL; /* head of linked list of file args */
+static FTREE *fttail = NULL; /* tail of linked list of file args */
+static FTREE *ftcur = NULL; /* current file arg being processed */
+static FTSENT *ftent = NULL; /* current file tree entry */
+static int ftree_skip; /* when set skip to next file arg */
+
+static int ftree_arg(void);
+static char *getpathname(char *, int);
+
+/*
+ * ftree_start()
+ * initialize the options passed to fts_open() during this run of pax
+ * options are based on the selection of pax options by the user
+ * fts_start() also calls fts_arg() to open the first valid file arg. We
+ * also attempt to reset directory access times when -t (tflag) is set.
+ * Return:
+ * 0 if there is at least one valid file arg to process, -1 otherwise
+ */
+
+int
+ftree_start(void)
+{
+ /*
+ * set up the operation mode of fts, open the first file arg. We must
+ * use FTS_NOCHDIR, as the user may have to open multiple archives and
+ * if fts did a chdir off into the boondocks, we may create an archive
+ * volume in an place where the user did not expect to.
+ */
+ ftsopts = FTS_NOCHDIR;
+
+ /*
+ * optional user flags that effect file traversal
+ * -H command line symlink follow only (half follow)
+ * -L follow sylinks (logical)
+ * -P do not follow sylinks (physical). This is the default.
+ * -X do not cross over mount points
+ * -t preserve access times on files read.
+ * -n select only the first member of a file tree when a match is found
+ * -d do not extract subtrees rooted at a directory arg.
+ */
+ if (Lflag)
+ ftsopts |= FTS_LOGICAL;
+ else
+ ftsopts |= FTS_PHYSICAL;
+ if (Hflag)
+ ftsopts |= FTS_COMFOLLOW;
+ if (Xflag)
+ ftsopts |= FTS_XDEV;
+
+ if ((fthead == NULL) && ((farray[0] = malloc(PAXPATHLEN+2)) == NULL)) {
+ paxwarn(1, "Unable to allocate memory for file name buffer");
+ return(-1);
+ }
+
+ if (ftree_arg() < 0)
+ return(-1);
+ if (tflag && (atdir_start() < 0))
+ return(-1);
+ return(0);
+}
+
+/*
+ * ftree_add()
+ * add the arg to the linked list of files to process. Each will be
+ * processed by fts one at a time
+ * Return:
+ * 0 if added to the linked list, -1 if failed
+ */
+
+int
+ftree_add(char *str, int chflg)
+{
+ FTREE *ft;
+ int len;
+
+ /*
+ * simple check for bad args
+ */
+ if ((str == NULL) || (*str == '\0')) {
+ paxwarn(0, "Invalid file name argument");
+ return(-1);
+ }
+
+ /*
+ * allocate FTREE node and add to the end of the linked list (args are
+ * processed in the same order they were passed to pax). Get rid of any
+ * trailing / the user may pass us. (watch out for / by itself).
+ */
+ if ((ft = malloc(sizeof(FTREE))) == NULL) {
+ paxwarn(0, "Unable to allocate memory for filename");
+ return(-1);
+ }
+
+ if (((len = strlen(str) - 1) > 0) && (str[len] == '/'))
+ str[len] = '\0';
+ ft->fname = str;
+ ft->refcnt = 0;
+ ft->newercnt = 0;
+ ft->chflg = chflg;
+ ft->fow = NULL;
+ if (fthead == NULL) {
+ fttail = fthead = ft;
+ return(0);
+ }
+ fttail->fow = ft;
+ fttail = ft;
+ return(0);
+}
+
+/*
+ * ftree_sel()
+ * this entry has been selected by pax. bump up reference count and handle
+ * -n and -d processing.
+ */
+
+void
+ftree_sel(ARCHD *arcn)
+{
+ /*
+ * set reference bit for this pattern. This linked list is only used
+ * when file trees are supplied pax as args. The list is not used when
+ * the trees are read from stdin.
+ */
+ if (ftcur != NULL)
+ ftcur->refcnt = 1;
+
+ /*
+ * if -n we are done with this arg, force a skip to the next arg when
+ * pax asks for the next file in next_file().
+ * if -d we tell fts only to match the directory (if the arg is a dir)
+ * and not the entire file tree rooted at that point.
+ */
+ if (nflag)
+ ftree_skip = 1;
+
+ if (!dflag || (arcn->type != PAX_DIR))
+ return;
+
+ if (ftent != NULL)
+ (void)fts_set(ftsp, ftent, FTS_SKIP);
+}
+
+/*
+ * ftree_skipped_newer()
+ * file has been skipped because a newer file exists and -u/-D given
+ */
+
+void
+ftree_skipped_newer(ARCHD *arcn)
+{
+ /* skipped due to -u/-D, mark accordingly */
+ if (ftcur != NULL)
+ ftcur->newercnt = 1;
+}
+
+/*
+ * ftree_chk()
+ * called at end on pax execution. Prints all those file args that did not
+ * have a selected member (reference count still 0)
+ */
+
+void
+ftree_chk(void)
+{
+ FTREE *ft;
+ int wban = 0;
+
+ /*
+ * make sure all dir access times were reset.
+ */
+ if (tflag)
+ atdir_end();
+
+ /*
+ * walk down list and check reference count. Print out those members
+ * that never had a match
+ */
+ for (ft = fthead; ft != NULL; ft = ft->fow) {
+ if ((ft->refcnt > 0) || ft->newercnt > 0 || ft->chflg)
+ continue;
+ if (wban == 0) {
+ paxwarn(1,"WARNING! These file names were not selected:");
+ ++wban;
+ }
+ (void)fprintf(stderr, "%s\n", ft->fname);
+ }
+}
+
+/*
+ * ftree_arg()
+ * Get the next file arg for fts to process. Can be from either the linked
+ * list or read from stdin when the user did not them as args to pax. Each
+ * arg is processed until the first successful fts_open().
+ * Return:
+ * 0 when the next arg is ready to go, -1 if out of file args (or EOF on
+ * stdin).
+ */
+
+static int
+ftree_arg(void)
+{
+
+ /*
+ * close off the current file tree
+ */
+ if (ftsp != NULL) {
+ (void)fts_close(ftsp);
+ ftsp = NULL;
+ }
+
+ /*
+ * keep looping until we get a valid file tree to process. Stop when we
+ * reach the end of the list (or get an eof on stdin)
+ */
+ for (;;) {
+ if (fthead == NULL) {
+ /*
+ * the user didn't supply any args, get the file trees
+ * to process from stdin;
+ */
+ if (getpathname(farray[0], PAXPATHLEN+1) == NULL)
+ return(-1);
+ } else {
+ /*
+ * the user supplied the file args as arguments to pax
+ */
+ if (ftcur == NULL)
+ ftcur = fthead;
+ else if ((ftcur = ftcur->fow) == NULL)
+ return(-1);
+ if (ftcur->chflg) {
+ /* First fchdir() back... */
+ if (fchdir(cwdfd) == -1) {
+ syswarn(1, errno,
+ "Can't fchdir to starting directory");
+ return(-1);
+ }
+ if (chdir(ftcur->fname) == -1) {
+ syswarn(1, errno, "Can't chdir to %s",
+ ftcur->fname);
+ return(-1);
+ }
+ continue;
+ } else
+ farray[0] = ftcur->fname;
+ }
+
+ /*
+ * watch it, fts wants the file arg stored in a array of char
+ * ptrs, with the last one a null. we use a two element array
+ * and set farray[0] to point at the buffer with the file name
+ * in it. We cannot pass all the file args to fts at one shot
+ * as we need to keep a handle on which file arg generates what
+ * files (the -n and -d flags need this). If the open is
+ * successful, return a 0.
+ */
+ if ((ftsp = fts_open(farray, ftsopts, NULL)) != NULL)
+ break;
+ }
+ return(0);
+}
+
+/*
+ * next_file()
+ * supplies the next file to process in the supplied archd structure.
+ * Return:
+ * 0 when contents of arcn have been set with the next file, -1 when done.
+ */
+
+int
+next_file(ARCHD *arcn)
+{
+ int cnt;
+
+ /*
+ * ftree_sel() might have set the ftree_skip flag if the user has the
+ * -n option and a file was selected from this file arg tree. (-n says
+ * only one member is matched for each pattern) ftree_skip being 1
+ * forces us to go to the next arg now.
+ */
+ if (ftree_skip) {
+ /*
+ * clear and go to next arg
+ */
+ ftree_skip = 0;
+ if (ftree_arg() < 0)
+ return(-1);
+ }
+
+ /*
+ * loop until we get a valid file to process
+ */
+ for (;;) {
+ if ((ftent = fts_read(ftsp)) == NULL) {
+ if (errno)
+ syswarn(1, errno, "next_file");
+ /*
+ * out of files in this tree, go to next arg, if none
+ * we are done
+ */
+ if (ftree_arg() < 0)
+ return(-1);
+ continue;
+ }
+
+ /*
+ * handle each type of fts_read() flag
+ */
+ switch (ftent->fts_info) {
+ case FTS_D:
+ case FTS_DEFAULT:
+ case FTS_F:
+ case FTS_SL:
+ case FTS_SLNONE:
+ /*
+ * these are all ok
+ */
+ break;
+ case FTS_DP:
+ /*
+ * already saw this directory. If the user wants file
+ * access times reset, we use this to restore the
+ * access time for this directory since this is the
+ * last time we will see it in this file subtree
+ * remember to force the time (this is -t on a read
+ * directory, not a created directory).
+ */
+ if (!tflag)
+ continue;
+ do_atdir(ftent->fts_path, ftent->fts_statp->st_dev,
+ ftent->fts_statp->st_ino);
+ continue;
+ case FTS_DC:
+ /*
+ * fts claims a file system cycle
+ */
+ paxwarn(1,"File system cycle found at %s",ftent->fts_path);
+ continue;
+ case FTS_DNR:
+ syswarn(1, ftent->fts_errno,
+ "Unable to read directory %s", ftent->fts_path);
+ continue;
+ case FTS_ERR:
+ syswarn(1, ftent->fts_errno,
+ "File system traversal error");
+ continue;
+ case FTS_NS:
+ case FTS_NSOK:
+ syswarn(1, ftent->fts_errno,
+ "Unable to access %s", ftent->fts_path);
+ continue;
+ }
+
+ /*
+ * ok got a file tree node to process. copy info into arcn
+ * structure (initialize as required)
+ */
+ arcn->skip = 0;
+ arcn->pad = 0;
+ arcn->ln_nlen = 0;
+ arcn->ln_name[0] = '\0';
+ memcpy(&arcn->sb, ftent->fts_statp, sizeof(arcn->sb));
+
+ /*
+ * file type based set up and copy into the arcn struct
+ * SIDE NOTE:
+ * we try to reset the access time on all files and directories
+ * we may read when the -t flag is specified. files are reset
+ * when we close them after copying. we reset the directories
+ * when we are done with their file tree (we also clean up at
+ * end in case we cut short a file tree traversal). However
+ * there is no way to reset access times on symlinks.
+ */
+ switch (S_IFMT & arcn->sb.st_mode) {
+ case S_IFDIR:
+ arcn->type = PAX_DIR;
+ if (!tflag)
+ break;
+ add_atdir(ftent->fts_path, arcn->sb.st_dev,
+ arcn->sb.st_ino, &arcn->sb.st_mtim,
+ &arcn->sb.st_atim);
+ break;
+ case S_IFCHR:
+ arcn->type = PAX_CHR;
+ break;
+ case S_IFBLK:
+ arcn->type = PAX_BLK;
+ break;
+ case S_IFREG:
+ /*
+ * only regular files with have data to store on the
+ * archive. all others will store a zero length skip.
+ * the skip field is used by pax for actual data it has
+ * to read (or skip over).
+ */
+ arcn->type = PAX_REG;
+ arcn->skip = arcn->sb.st_size;
+ break;
+ case S_IFLNK:
+ arcn->type = PAX_SLK;
+ /*
+ * have to read the symlink path from the file
+ */
+ if ((cnt = readlink(ftent->fts_path, arcn->ln_name,
+ PAXPATHLEN)) == -1) {
+ syswarn(1, errno, "Unable to read symlink %s",
+ ftent->fts_path);
+ continue;
+ }
+ /*
+ * set link name length, watch out readlink does not
+ * NUL terminate the link path
+ */
+ arcn->ln_name[cnt] = '\0';
+ arcn->ln_nlen = cnt;
+ break;
+ case S_IFSOCK:
+ /*
+ * under BSD storing a socket is senseless but we will
+ * let the format specific write function make the
+ * decision of what to do with it.
+ */
+ arcn->type = PAX_SCK;
+ break;
+ case S_IFIFO:
+ arcn->type = PAX_FIF;
+ break;
+ }
+ break;
+ }
+
+ /*
+ * copy file name, set file name length
+ */
+ arcn->nlen = strlcpy(arcn->name, ftent->fts_path, sizeof(arcn->name));
+ if ((size_t)arcn->nlen >= sizeof(arcn->name))
+ arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */
+ arcn->org_name = ftent->fts_path;
+ return(0);
+}
+
+/*
+ * getpathname()
+ * Reads a pathname from stdin, handling NUL- or newline-termination.
+ * Return:
+ * NULL at end of file, otherwise the NUL-terminated buffer.
+ */
+
+static char *
+getpathname(char *buf, int buflen)
+{
+ char *bp, *ep;
+ int ch, term;
+
+ if (zeroflag) {
+ /*
+ * Read a NUL-terminated pathname, being especially
+ * paranoid about proper termination and pathname length.
+ */
+ for (bp = buf, ep = buf + buflen; bp < ep; bp++) {
+ if ((ch = getchar()) == EOF) {
+ if (bp != buf)
+ paxwarn(1, "Ignoring unterminated "
+ "pathname at EOF");
+ return(NULL);
+ }
+ if ((*bp = ch) == '\0')
+ return(buf);
+ }
+ /* Too long - skip this path */
+ *--bp = '\0';
+ term = '\0';
+ } else {
+ if (fgets(buf, buflen, stdin) == NULL)
+ return(NULL);
+ if ((bp = strchr(buf, '\n')) != NULL || feof(stdin)) {
+ if (bp != NULL)
+ *bp = '\0';
+ return(buf);
+ }
+ /* Too long - skip this path */
+ term = '\n';
+ }
+ while ((ch = getchar()) != term && ch != EOF)
+ continue;
+ paxwarn(1, "Ignoring too-long pathname: %s", buf);
+ return(NULL);
+}
diff --git a/bin/pax/gen_subs.c b/bin/pax/gen_subs.c
new file mode 100644
index 0000000..7eb8200
--- /dev/null
+++ b/bin/pax/gen_subs.c
@@ -0,0 +1,401 @@
+/* $OpenBSD: gen_subs.c,v 1.32 2016/08/26 05:06:14 guenther Exp $ */
+/* $NetBSD: gen_subs.c,v 1.5 1995/03/21 09:07:26 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <utmp.h>
+#include <vis.h>
+#ifndef major
+#include <sys/sysmacros.h>
+#endif
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * a collection of general purpose subroutines used by pax
+ */
+
+/*
+ * constants used by ls_list() when printing out archive members
+ */
+#define MODELEN 20
+#define DATELEN 64
+#define SECSPERDAY (24 * 60 * 60)
+#define SIXMONTHS (SECSPERDAY * 365 / 2)
+#define CURFRMT "%b %e %H:%M"
+#define OLDFRMT "%b %e %Y"
+#define NAME_WIDTH 8
+#define TIMEFMT(t, now) \
+ (((t) + SIXMONTHS <= (now) || (t) > (now)) ? OLDFRMT : CURFRMT)
+
+/*
+ * ls_list()
+ * list the members of an archive in ls format
+ */
+
+void
+ls_list(ARCHD *arcn, time_t now, FILE *fp)
+{
+ struct stat *sbp;
+ char f_mode[MODELEN];
+ char f_date[DATELEN];
+ int term;
+
+ term = zeroflag ? '\0' : '\n'; /* path termination character */
+
+ /*
+ * if not verbose, just print the file name
+ */
+ if (!vflag) {
+ if (zeroflag)
+ (void)fputs(arcn->name, fp);
+ else
+ safe_print(arcn->name, fp);
+ (void)putc(term, fp);
+ (void)fflush(fp);
+ return;
+ }
+
+ /*
+ * user wants long mode
+ */
+ sbp = &(arcn->sb);
+ strmode(sbp->st_mode, f_mode);
+
+ /*
+ * print file mode, link count, uid, gid and time
+ */
+ if (strftime(f_date, sizeof(f_date), TIMEFMT(sbp->st_mtime, now),
+ localtime(&(sbp->st_mtime))) == 0)
+ f_date[0] = '\0';
+ (void)fprintf(fp, "%s%2u %-*.*s %-*.*s ", f_mode, (unsigned)sbp->st_nlink,
+ NAME_WIDTH, UT_NAMESIZE, user_from_uid(sbp->st_uid, 0),
+ NAME_WIDTH, UT_NAMESIZE, group_from_gid(sbp->st_gid, 0));
+
+ /*
+ * print device id's for devices, or sizes for other nodes
+ */
+ if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK))
+ (void)fprintf(fp, "%4lu, %4lu ",
+ (unsigned long)MAJOR(sbp->st_rdev),
+ (unsigned long)MINOR(sbp->st_rdev));
+ else {
+ (void)fprintf(fp, "%9zu ", sbp->st_size);
+ }
+
+ /*
+ * print name and link info for hard and soft links
+ */
+ (void)fputs(f_date, fp);
+ (void)putc(' ', fp);
+ safe_print(arcn->name, fp);
+ if (PAX_IS_HARDLINK(arcn->type)) {
+ fputs(" == ", fp);
+ safe_print(arcn->ln_name, fp);
+ } else if (arcn->type == PAX_SLK) {
+ fputs(" -> ", fp);
+ safe_print(arcn->ln_name, fp);
+ }
+ (void)putc(term, fp);
+ (void)fflush(fp);
+}
+
+/*
+ * tty_ls()
+ * print a short summary of file to tty.
+ */
+
+void
+ls_tty(ARCHD *arcn)
+{
+ char f_date[DATELEN];
+ char f_mode[MODELEN];
+ time_t now = time(NULL);
+
+ /*
+ * convert time to string, and print
+ */
+ if (strftime(f_date, DATELEN, TIMEFMT(arcn->sb.st_mtime, now),
+ localtime(&(arcn->sb.st_mtime))) == 0)
+ f_date[0] = '\0';
+ strmode(arcn->sb.st_mode, f_mode);
+ tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name);
+}
+
+void
+safe_print(const char *str, FILE *fp)
+{
+ char visbuf[5];
+ const char *cp;
+
+ /*
+ * if printing to a tty, use vis(3) to print special characters.
+ */
+ if (isatty(fileno(fp))) {
+ for (cp = str; *cp; cp++) {
+ (void)vis(visbuf, cp[0], VIS_CSTYLE, cp[1]);
+ (void)fputs(visbuf, fp);
+ }
+ } else {
+ (void)fputs(str, fp);
+ }
+}
+
+/*
+ * asc_ul()
+ * convert hex/octal character string into a u_long. We do not have to
+ * check for overflow! (the headers in all supported formats are not large
+ * enough to create an overflow).
+ * NOTE: strings passed to us are NOT TERMINATED.
+ * Return:
+ * unsigned long value
+ */
+
+u_long
+asc_ul(char *str, int len, int base)
+{
+ char *stop;
+ u_long tval = 0;
+
+ stop = str + len;
+
+ /*
+ * skip over leading blanks and zeros
+ */
+ while ((str < stop) && ((*str == ' ') || (*str == '0')))
+ ++str;
+
+ /*
+ * for each valid digit, shift running value (tval) over to next digit
+ * and add next digit
+ */
+ if (base == HEX) {
+ while (str < stop) {
+ if ((*str >= '0') && (*str <= '9'))
+ tval = (tval << 4) + (*str++ - '0');
+ else if ((*str >= 'A') && (*str <= 'F'))
+ tval = (tval << 4) + 10 + (*str++ - 'A');
+ else if ((*str >= 'a') && (*str <= 'f'))
+ tval = (tval << 4) + 10 + (*str++ - 'a');
+ else
+ break;
+ }
+ } else {
+ while ((str < stop) && (*str >= '0') && (*str <= '7'))
+ tval = (tval << 3) + (*str++ - '0');
+ }
+ return(tval);
+}
+
+/*
+ * ul_asc()
+ * convert an unsigned long into an hex/oct ascii string. pads with LEADING
+ * ascii 0's to fill string completely
+ * NOTE: the string created is NOT TERMINATED.
+ */
+
+int
+ul_asc(u_long val, char *str, int len, int base)
+{
+ char *pt;
+ u_long digit;
+
+ /*
+ * WARNING str is not '\0' terminated by this routine
+ */
+ pt = str + len - 1;
+
+ /*
+ * do a tailwise conversion (start at right most end of string to place
+ * least significant digit). Keep shifting until conversion value goes
+ * to zero (all digits were converted)
+ */
+ if (base == HEX) {
+ while (pt >= str) {
+ if ((digit = (val & 0xf)) < 10)
+ *pt-- = '0' + (char)digit;
+ else
+ *pt-- = 'a' + (char)(digit - 10);
+ val >>= 4;
+ if (val == 0)
+ break;
+ }
+ } else {
+ while (pt >= str) {
+ *pt-- = '0' + (char)(val & 0x7);
+ val >>= 3;
+ if (val == 0)
+ break;
+ }
+ }
+
+ /*
+ * pad with leading ascii ZEROS. We return -1 if we ran out of space.
+ */
+ while (pt >= str)
+ *pt-- = '0';
+ if (val != 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * asc_ull()
+ * Convert hex/octal character string into a unsigned long long.
+ * We do not have to check for overflow! (The headers in all
+ * supported formats are not large enough to create an overflow).
+ * NOTE: strings passed to us are NOT TERMINATED.
+ * Return:
+ * unsigned long long value
+ */
+
+unsigned long long
+asc_ull(char *str, int len, int base)
+{
+ char *stop;
+ unsigned long long tval = 0;
+
+ stop = str + len;
+
+ /*
+ * skip over leading blanks and zeros
+ */
+ while ((str < stop) && ((*str == ' ') || (*str == '0')))
+ ++str;
+
+ /*
+ * for each valid digit, shift running value (tval) over to next digit
+ * and add next digit
+ */
+ if (base == HEX) {
+ while (str < stop) {
+ if ((*str >= '0') && (*str <= '9'))
+ tval = (tval << 4) + (*str++ - '0');
+ else if ((*str >= 'A') && (*str <= 'F'))
+ tval = (tval << 4) + 10 + (*str++ - 'A');
+ else if ((*str >= 'a') && (*str <= 'f'))
+ tval = (tval << 4) + 10 + (*str++ - 'a');
+ else
+ break;
+ }
+ } else {
+ while ((str < stop) && (*str >= '0') && (*str <= '7'))
+ tval = (tval << 3) + (*str++ - '0');
+ }
+ return(tval);
+}
+
+/*
+ * ull_asc()
+ * Convert an unsigned long long into a hex/oct ascii string.
+ * Pads with LEADING ascii 0's to fill string completely
+ * NOTE: the string created is NOT TERMINATED.
+ */
+
+int
+ull_asc(unsigned long long val, char *str, int len, int base)
+{
+ char *pt;
+ unsigned long long digit;
+
+ /*
+ * WARNING str is not '\0' terminated by this routine
+ */
+ pt = str + len - 1;
+
+ /*
+ * do a tailwise conversion (start at right most end of string to place
+ * least significant digit). Keep shifting until conversion value goes
+ * to zero (all digits were converted)
+ */
+ if (base == HEX) {
+ while (pt >= str) {
+ if ((digit = (val & 0xf)) < 10)
+ *pt-- = '0' + (char)digit;
+ else
+ *pt-- = 'a' + (char)(digit - 10);
+ val >>= 4;
+ if (val == 0)
+ break;
+ }
+ } else {
+ while (pt >= str) {
+ *pt-- = '0' + (char)(val & 0x7);
+ val >>= 3;
+ if (val == 0)
+ break;
+ }
+ }
+
+ /*
+ * pad with leading ascii ZEROS. We return -1 if we ran out of space.
+ */
+ while (pt >= str)
+ *pt-- = '0';
+ if (val != 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * Copy at max min(bufz, fieldsz) chars from field to buf, stopping
+ * at the first NUL char. NUL terminate buf if there is room left.
+ */
+size_t
+fieldcpy(char *buf, size_t bufsz, const char *field, size_t fieldsz)
+{
+ char *p = buf;
+ const char *q = field;
+ size_t i = 0;
+
+ if (fieldsz > bufsz)
+ fieldsz = bufsz;
+ while (i < fieldsz && *q != '\0') {
+ *p++ = *q++;
+ i++;
+ }
+ if (i < bufsz)
+ *p = '\0';
+ return(i);
+}
diff --git a/bin/pax/getoldopt.c b/bin/pax/getoldopt.c
new file mode 100644
index 0000000..8ceb189
--- /dev/null
+++ b/bin/pax/getoldopt.c
@@ -0,0 +1,69 @@
+/* $OpenBSD: getoldopt.c,v 1.9 2009/10/27 23:59:22 deraadt Exp $ */
+/* $NetBSD: getoldopt.c,v 1.3 1995/03/21 09:07:28 cgd Exp $ */
+
+/*
+ * Plug-compatible replacement for getopt() for parsing tar-like
+ * arguments. If the first argument begins with "-", it uses getopt;
+ * otherwise, it uses the old rules used by tar, dump, and ps.
+ *
+ * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu) and placed
+ * in the Public Domain for your edification and enjoyment.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "pax.h"
+#include "extern.h"
+
+int
+getoldopt(int argc, char **argv, const char *optstring)
+{
+ static char *key; /* Points to next keyletter */
+ static char use_getopt; /* !=0 if argv[1][0] was '-' */
+ char c;
+ char *place;
+
+ optarg = NULL;
+
+ if (key == NULL) { /* First time */
+ if (argc < 2)
+ return (-1);
+ key = argv[1];
+ if (*key == '-')
+ use_getopt++;
+ else
+ optind = 2;
+ }
+
+ if (use_getopt)
+ return (getopt(argc, argv, optstring));
+
+ c = *key++;
+ if (c == '\0') {
+ key--;
+ return (-1);
+ }
+ place = strchr(optstring, c);
+
+ if (place == NULL || c == ':') {
+ fprintf(stderr, "%s: unknown option %c\n", argv[0], c);
+ return ('?');
+ }
+
+ place++;
+ if (*place == ':') {
+ if (optind < argc) {
+ optarg = argv[optind];
+ optind++;
+ } else {
+ fprintf(stderr, "%s: %c argument missing\n",
+ argv[0], c);
+ return ('?');
+ }
+ }
+
+ return (c);
+}
diff --git a/bin/pax/options.c b/bin/pax/options.c
new file mode 100644
index 0000000..917414c
--- /dev/null
+++ b/bin/pax/options.c
@@ -0,0 +1,1788 @@
+/* $OpenBSD: options.c,v 1.103 2019/11/15 20:34:17 naddy Exp $ */
+/* $NetBSD: options.c,v 1.6 1996/03/26 23:54:18 mrg Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <limits.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pax.h"
+#include "cpio.h"
+#include "tar.h"
+#include "extern.h"
+
+/*
+ * argv[0] names. Used for tar and cpio emulation
+ */
+
+#define NM_TAR "tar"
+#define NM_CPIO "cpio"
+#define NM_PAX "pax"
+
+/*
+ * Constants used to specify the legal sets of flags in pax. For each major
+ * operation mode of pax, a set of illegal flags is defined. If any one of
+ * those illegal flags are found set, we scream and exit
+ */
+
+/*
+ * flags (one for each option).
+ */
+#define AF 0x00000001
+#define BF 0x00000002
+#define CF 0x00000004
+#define DF 0x00000008
+#define FF 0x00000010
+#define IF 0x00000020
+#define KF 0x00000040
+#define LF 0x00000080
+#define NF 0x00000100
+#define OF 0x00000200
+#define PF 0x00000400
+#define RF 0x00000800
+#define SF 0x00001000
+#define TF 0x00002000
+#define UF 0x00004000
+#define VF 0x00008000
+#define WF 0x00010000
+#define XF 0x00020000
+#define CBF 0x00040000 /* nonstandard extension */
+#define CDF 0x00080000 /* nonstandard extension */
+#define CEF 0x00100000 /* nonstandard extension */
+#define CGF 0x00200000 /* nonstandard extension */
+#define CHF 0x00400000 /* nonstandard extension */
+#define CLF 0x00800000 /* nonstandard extension */
+#define CPF 0x01000000 /* nonstandard extension */
+#define CTF 0x02000000 /* nonstandard extension */
+#define CUF 0x04000000 /* nonstandard extension */
+#define CXF 0x08000000
+#define CYF 0x10000000 /* nonstandard extension */
+#define CZF 0x20000000 /* nonstandard extension */
+#define C0F 0x40000000 /* nonstandard extension */
+
+/*
+ * ascii string indexed by bit position above (alter the above and you must
+ * alter this string) used to tell the user what flags caused us to complain
+ */
+#define FLGCH "abcdfiklnoprstuvwxBDEGHLPTUXYZ0"
+
+/*
+ * legal pax operation bit patterns
+ */
+
+#define ISLIST(x) (((x) & (RF|WF)) == 0)
+#define ISEXTRACT(x) (((x) & (RF|WF)) == RF)
+#define ISARCHIVE(x) (((x) & (AF|RF|WF)) == WF)
+#define ISAPPND(x) (((x) & (AF|RF|WF)) == (AF|WF))
+#define ISCOPY(x) (((x) & (RF|WF)) == (RF|WF))
+#define ISWRITE(x) (((x) & (RF|WF)) == WF)
+
+/*
+ * Illegal option flag subsets based on pax operation
+ */
+
+#define BDEXTR (AF|BF|LF|TF|WF|XF|CBF|CHF|CLF|CPF|CXF)
+#define BDARCH (CF|KF|LF|NF|PF|RF|CDF|CEF|CYF|CZF)
+#define BDCOPY (AF|BF|FF|OF|XF|CBF|CEF)
+#define BDLIST (AF|BF|IF|KF|LF|OF|PF|RF|TF|UF|WF|XF|CBF|CDF|CHF|CLF|CPF|CXF|CYF|CZF)
+
+
+/*
+ * Routines which handle command line options
+ */
+
+static char flgch[] = FLGCH; /* list of all possible flags */
+static OPLIST *ophead = NULL; /* head for format specific options -x */
+static OPLIST *optail = NULL; /* option tail */
+
+static int no_op(void);
+static void printflg(unsigned int);
+static off_t str_offt(char *);
+static char *get_line(FILE *fp);
+static void pax_options(int, char **);
+static void pax_usage(void);
+static void tar_options(int, char **);
+static void tar_usage(void);
+#ifndef NOCPIO
+static void cpio_options(int, char **);
+static void cpio_usage(void);
+#endif
+
+static int compress_id(char *_blk, int _size);
+static int gzip_id(char *_blk, int _size);
+static int bzip2_id(char *_blk, int _size);
+static int xz_id(char *_blk, int _size);
+
+#define GZIP_CMD "gzip" /* command to run as gzip */
+#define COMPRESS_CMD "compress" /* command to run as compress */
+#define BZIP2_CMD "bzip2" /* command to run as bzip2 */
+#define XZ_CMD "xz" /* command to run as xz */
+
+/*
+ * Format specific routine table
+ * (see pax.h for description of each function)
+ *
+ * name, blksz, hdsz, udev, hlk, blkagn, inhead, id, st_read,
+ * read, end_read, st_write, write, end_write, trail,
+ * rd_data, wr_data, options
+ */
+
+FSUB fsub[] = {
+#ifdef NOCPIO
+/* 0: OLD BINARY CPIO */
+ { },
+/* 1: OLD OCTAL CHARACTER CPIO */
+ { },
+/* 2: SVR4 HEX CPIO */
+ { },
+/* 3: SVR4 HEX CPIO WITH CRC */
+ { },
+#else
+/* 0: OLD BINARY CPIO */
+ {"bcpio", 5120, sizeof(HD_BCPIO), 1, 0, 0, 1, bcpio_id, cpio_strd,
+ bcpio_rd, bcpio_endrd, cpio_stwr, bcpio_wr, cpio_endwr, cpio_trail,
+ bad_opt},
+
+/* 1: OLD OCTAL CHARACTER CPIO */
+ {"cpio", 5120, sizeof(HD_CPIO), 1, 0, 0, 1, cpio_id, cpio_strd,
+ cpio_rd, cpio_endrd, cpio_stwr, cpio_wr, cpio_endwr, cpio_trail,
+ bad_opt},
+
+/* 2: SVR4 HEX CPIO */
+ {"sv4cpio", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, vcpio_id, cpio_strd,
+ vcpio_rd, vcpio_endrd, cpio_stwr, vcpio_wr, cpio_endwr, cpio_trail,
+ bad_opt},
+
+/* 3: SVR4 HEX CPIO WITH CRC */
+ {"sv4crc", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, crc_id, crc_strd,
+ vcpio_rd, vcpio_endrd, crc_stwr, vcpio_wr, cpio_endwr, cpio_trail,
+ bad_opt},
+#endif
+/* 4: OLD TAR */
+ {"tar", 10240, BLKMULT, 0, 1, BLKMULT, 0, tar_id, no_op,
+ tar_rd, tar_endrd, no_op, tar_wr, tar_endwr, tar_trail,
+ tar_opt},
+
+/* 5: POSIX USTAR */
+ {"ustar", 10240, BLKMULT, 0, 1, BLKMULT, 0, ustar_id, no_op,
+ ustar_rd, tar_endrd, no_op, ustar_wr, tar_endwr, tar_trail,
+ tar_opt},
+
+#ifdef SMALL
+/* 6: compress, to detect failure to use -Z */
+ { },
+/* 7: xz, to detect failure to decompress it */
+ { },
+/* 8: bzip2, to detect failure to use -j */
+ { },
+/* 9: gzip, to detect failure to use -z */
+ { },
+#else
+/* 6: compress, to detect failure to use -Z */
+ {NULL, 0, 4, 0, 0, 0, 0, compress_id},
+/* 7: xz, to detect failure to decompress it */
+ {NULL, 0, 4, 0, 0, 0, 0, xz_id},
+/* 8: bzip2, to detect failure to use -j */
+ {NULL, 0, 4, 0, 0, 0, 0, bzip2_id},
+/* 9: gzip, to detect failure to use -z */
+ {NULL, 0, 4, 0, 0, 0, 0, gzip_id},
+#endif
+};
+#define F_OCPIO 0 /* format when called as cpio -6 */
+#define F_ACPIO 1 /* format when called as cpio -c */
+#define F_CPIO 3 /* format when called as cpio */
+#define F_OTAR 4 /* format when called as tar -o */
+#define F_TAR 5 /* format when called as tar */
+#define DEFLT 5 /* default write format from list above */
+
+/*
+ * ford is the archive search order used by get_arc() to determine what kind
+ * of archive we are dealing with. This helps to properly id archive formats
+ * some formats may be subsets of others....
+ */
+int ford[] = {5, 4, 9, 8, 7, 6, 3, 2, 1, 0, -1};
+
+/*
+ * Do we have -C anywhere and what is it?
+ */
+int havechd = 0;
+char *chdname = NULL;
+
+/*
+ * options()
+ * figure out if we are pax, tar or cpio. Call the appropriate options
+ * parser
+ */
+
+void
+options(int argc, char **argv)
+{
+ extern char *__progname;
+
+ /*
+ * Are we acting like pax, tar or cpio (based on argv[0])
+ */
+ argv0 = __progname;
+
+ if (strcmp(NM_TAR, argv0) == 0) {
+ op_mode = OP_TAR;
+ tar_options(argc, argv);
+ return;
+ }
+#ifndef NOCPIO
+ else if (strcmp(NM_CPIO, argv0) == 0) {
+ op_mode = OP_CPIO;
+ cpio_options(argc, argv);
+ return;
+ }
+#endif /* !NOCPIO */
+ /*
+ * assume pax as the default
+ */
+ argv0 = NM_PAX;
+ op_mode = OP_PAX;
+ pax_options(argc, argv);
+}
+
+/*
+ * pax_options()
+ * look at the user specified flags. set globals as required and check if
+ * the user specified a legal set of flags. If not, complain and exit
+ */
+
+static void
+pax_options(int argc, char **argv)
+{
+ int c;
+ unsigned i;
+ unsigned int flg = 0;
+ unsigned int bflg = 0;
+ const char *errstr;
+ char *pt;
+
+ /*
+ * process option flags
+ */
+ while ((c=getopt(argc,argv,"ab:cdf:ijklno:p:rs:tuvwx:zB:DE:G:HJLOPT:U:XYZ0"))
+ != -1) {
+ switch (c) {
+ case 'a':
+ /*
+ * append
+ */
+ flg |= AF;
+ break;
+ case 'b':
+ /*
+ * specify blocksize
+ */
+ flg |= BF;
+ if ((wrblksz = (int)str_offt(optarg)) <= 0) {
+ paxwarn(1, "Invalid block size %s", optarg);
+ pax_usage();
+ }
+ break;
+ case 'c':
+ /*
+ * inverse match on patterns
+ */
+ cflag = 1;
+ flg |= CF;
+ break;
+ case 'd':
+ /*
+ * match only dir on extract, not the subtree at dir
+ */
+ dflag = 1;
+ flg |= DF;
+ break;
+ case 'f':
+ /*
+ * filename where the archive is stored
+ */
+ arcname = optarg;
+ flg |= FF;
+ break;
+ case 'i':
+ /*
+ * interactive file rename
+ */
+ iflag = 1;
+ flg |= IF;
+ break;
+ case 'j':
+ /*
+ * use bzip2. Non standard option.
+ */
+ gzip_program = BZIP2_CMD;
+ break;
+ case 'k':
+ /*
+ * do not clobber files that exist
+ */
+ kflag = 1;
+ flg |= KF;
+ break;
+ case 'l':
+ /*
+ * try to link src to dest with copy (-rw)
+ */
+ lflag = 1;
+ flg |= LF;
+ break;
+ case 'n':
+ /*
+ * select first match for a pattern only
+ */
+ nflag = 1;
+ flg |= NF;
+ break;
+ case 'o':
+ /*
+ * pass format specific options
+ */
+ flg |= OF;
+ if (opt_add(optarg) < 0)
+ pax_usage();
+ break;
+ case 'p':
+ /*
+ * specify file characteristic options
+ */
+ for (pt = optarg; *pt != '\0'; ++pt) {
+ switch (*pt) {
+ case 'a':
+ /*
+ * do not preserve access time
+ */
+ patime = 0;
+ break;
+ case 'e':
+ /*
+ * preserve user id, group id, file
+ * mode, access/modification times
+ */
+ pids = 1;
+ pmode = 1;
+ patime = 1;
+ pmtime = 1;
+ break;
+ case 'm':
+ /*
+ * do not preserve modification time
+ */
+ pmtime = 0;
+ break;
+ case 'o':
+ /*
+ * preserve uid/gid
+ */
+ pids = 1;
+ break;
+ case 'p':
+ /*
+ * preserve file mode bits
+ */
+ pmode = 1;
+ break;
+ default:
+ paxwarn(1, "Invalid -p string: %c", *pt);
+ pax_usage();
+ break;
+ }
+ }
+ flg |= PF;
+ break;
+ case 'r':
+ /*
+ * read the archive
+ */
+ flg |= RF;
+ break;
+ case 's':
+ /*
+ * file name substitution name pattern
+ */
+ if (rep_add(optarg) < 0) {
+ pax_usage();
+ break;
+ }
+ flg |= SF;
+ break;
+ case 't':
+ /*
+ * preserve access time on filesystem nodes we read
+ */
+ tflag = 1;
+ flg |= TF;
+ break;
+ case 'u':
+ /*
+ * ignore those older files
+ */
+ uflag = 1;
+ flg |= UF;
+ break;
+ case 'v':
+ /*
+ * verbose operation mode
+ */
+ vflag = 1;
+ flg |= VF;
+ break;
+ case 'w':
+ /*
+ * write an archive
+ */
+ flg |= WF;
+ break;
+ case 'x':
+ /*
+ * specify an archive format on write
+ */
+ for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i)
+ if (fsub[i].name != NULL &&
+ strcmp(fsub[i].name, optarg) == 0)
+ break;
+ if (i < sizeof(fsub)/sizeof(FSUB)) {
+ frmt = &fsub[i];
+ flg |= XF;
+ break;
+ }
+ paxwarn(1, "Unknown -x format: %s", optarg);
+ (void)fputs("pax: Known -x formats are:", stderr);
+ for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i)
+ if (fsub[i].name != NULL)
+ (void)fprintf(stderr, " %s",
+ fsub[i].name);
+ (void)fputs("\n\n", stderr);
+ pax_usage();
+ break;
+ case 'z':
+ /*
+ * use gzip. Non standard option.
+ */
+ gzip_program = GZIP_CMD;
+ break;
+ case 'B':
+ /*
+ * non-standard option on number of bytes written on a
+ * single archive volume.
+ */
+ if ((wrlimit = str_offt(optarg)) <= 0) {
+ paxwarn(1, "Invalid write limit %s", optarg);
+ pax_usage();
+ }
+ if (wrlimit % BLKMULT) {
+ paxwarn(1, "Write limit is not a %d byte multiple",
+ BLKMULT);
+ pax_usage();
+ }
+ flg |= CBF;
+ break;
+ case 'D':
+ /*
+ * On extraction check file inode change time before the
+ * modification of the file name. Non standard option.
+ */
+ Dflag = 1;
+ flg |= CDF;
+ break;
+ case 'E':
+ /*
+ * non-standard limit on read faults
+ * 0 indicates stop after first error, values
+ * indicate a limit
+ */
+ flg |= CEF;
+ maxflt = strtonum(optarg, 0, INT_MAX, &errstr);
+ if (errstr) {
+ paxwarn(1, "Error count value: %s", errstr);
+ pax_usage();
+ }
+ break;
+ case 'G':
+ /*
+ * non-standard option for selecting files within an
+ * archive by group (gid or name)
+ */
+ if (grp_add(optarg) < 0) {
+ pax_usage();
+ break;
+ }
+ flg |= CGF;
+ break;
+ case 'H':
+ /*
+ * follow command line symlinks only
+ */
+ Hflag = 1;
+ flg |= CHF;
+ break;
+ case 'J':
+ /*
+ * use xz. Non standard option.
+ */
+ gzip_program = XZ_CMD;
+ break;
+ case 'L':
+ /*
+ * follow symlinks
+ */
+ Lflag = 1;
+ flg |= CLF;
+ break;
+ case 'O':
+ /*
+ * Force one volume. Non standard option.
+ */
+ force_one_volume = 1;
+ break;
+ case 'P':
+ /*
+ * do NOT follow symlinks (default)
+ */
+ Lflag = 0;
+ flg |= CPF;
+ break;
+ case 'T':
+ /*
+ * non-standard option for selecting files within an
+ * archive by modification time range (lower,upper)
+ */
+ if (trng_add(optarg) < 0) {
+ pax_usage();
+ break;
+ }
+ flg |= CTF;
+ break;
+ case 'U':
+ /*
+ * non-standard option for selecting files within an
+ * archive by user (uid or name)
+ */
+ if (usr_add(optarg) < 0) {
+ pax_usage();
+ break;
+ }
+ flg |= CUF;
+ break;
+ case 'X':
+ /*
+ * do not pass over mount points in the file system
+ */
+ Xflag = 1;
+ flg |= CXF;
+ break;
+ case 'Y':
+ /*
+ * On extraction check file inode change time after the
+ * modification of the file name. Non standard option.
+ */
+ Yflag = 1;
+ flg |= CYF;
+ break;
+ case 'Z':
+ /*
+ * On extraction check modification time after the
+ * modification of the file name. Non standard option.
+ */
+ Zflag = 1;
+ flg |= CZF;
+ break;
+ case '0':
+ /*
+ * Use \0 as pathname terminator.
+ * (For use with the -print0 option of find(1).)
+ */
+ zeroflag = 1;
+ flg |= C0F;
+ break;
+ default:
+ pax_usage();
+ break;
+ }
+ }
+
+ /*
+ * figure out the operation mode of pax read,write,extract,copy,append
+ * or list. check that we have not been given a bogus set of flags
+ * for the operation mode.
+ */
+ if (ISLIST(flg)) {
+ act = LIST;
+ listf = stdout;
+ bflg = flg & BDLIST;
+ } else if (ISEXTRACT(flg)) {
+ act = EXTRACT;
+ bflg = flg & BDEXTR;
+ } else if (ISARCHIVE(flg)) {
+ act = ARCHIVE;
+ bflg = flg & BDARCH;
+ } else if (ISAPPND(flg)) {
+ act = APPND;
+ bflg = flg & BDARCH;
+ } else if (ISCOPY(flg)) {
+ act = COPY;
+ bflg = flg & BDCOPY;
+ } else
+ pax_usage();
+ if (bflg) {
+ printflg(flg);
+ pax_usage();
+ }
+
+ /*
+ * if we are writing (ARCHIVE) we use the default format if the user
+ * did not specify a format. when we write during an APPEND, we will
+ * adopt the format of the existing archive if none was supplied.
+ */
+ if (!(flg & XF) && (act == ARCHIVE))
+ frmt = &(fsub[DEFLT]);
+
+ /*
+ * process the args as they are interpreted by the operation mode
+ */
+ switch (act) {
+ case LIST:
+ case EXTRACT:
+ for (; optind < argc; optind++)
+ if (pat_add(argv[optind], NULL) < 0)
+ pax_usage();
+ break;
+ case COPY:
+ if (optind >= argc) {
+ paxwarn(0, "Destination directory was not supplied");
+ pax_usage();
+ }
+ --argc;
+ dirptr = argv[argc];
+ /* FALL THROUGH */
+ case ARCHIVE:
+ case APPND:
+ for (; optind < argc; optind++)
+ if (ftree_add(argv[optind], 0) < 0)
+ pax_usage();
+ /*
+ * no read errors allowed on updates/append operation!
+ */
+ maxflt = 0;
+ break;
+ }
+}
+
+
+/*
+ * tar_options()
+ * look at the user specified flags. set globals as required and check if
+ * the user specified a legal set of flags. If not, complain and exit
+ */
+
+static void
+tar_options(int argc, char **argv)
+{
+ int c;
+ int Oflag = 0;
+ int nincfiles = 0;
+ int incfiles_max = 0;
+ struct incfile {
+ char *file;
+ char *dir;
+ };
+ struct incfile *incfiles = NULL;
+
+ /*
+ * Set default values.
+ */
+ rmleadslash = 1;
+
+ /*
+ * process option flags
+ */
+ while ((c = getoldopt(argc, argv,
+ "b:cef:hjmopqruts:vwxzBC:HI:JLNOPXZ014578")) != -1) {
+ switch (c) {
+ case 'b':
+ /*
+ * specify blocksize in 512-byte blocks
+ */
+ if ((wrblksz = (int)str_offt(optarg)) <= 0) {
+ paxwarn(1, "Invalid block size %s", optarg);
+ tar_usage();
+ }
+ wrblksz *= 512; /* XXX - check for int oflow */
+ break;
+ case 'c':
+ /*
+ * create an archive
+ */
+ act = ARCHIVE;
+ break;
+ case 'e':
+ /*
+ * stop after first error
+ */
+ maxflt = 0;
+ break;
+ case 'f':
+ /*
+ * filename where the archive is stored
+ */
+ arcname = optarg;
+ break;
+ case 'h':
+ /*
+ * follow symlinks
+ */
+ Lflag = 1;
+ break;
+ case 'j':
+ /*
+ * use bzip2. Non standard option.
+ */
+ gzip_program = BZIP2_CMD;
+ break;
+ case 'm':
+ /*
+ * do not preserve modification time
+ */
+ pmtime = 0;
+ break;
+ case 'O':
+ Oflag = 1;
+ break;
+ case 'o':
+ Oflag = 2;
+ tar_nodir = 1;
+ break;
+ case 'p':
+ /*
+ * preserve uid/gid and file mode, regardless of umask
+ */
+ pmode = 1;
+ pids = 1;
+ break;
+ case 'q':
+ /*
+ * select first match for a pattern only
+ */
+ nflag = 1;
+ break;
+ case 'r':
+ case 'u':
+ /*
+ * append to the archive
+ */
+ act = APPND;
+ break;
+ case 's':
+ /*
+ * file name substitution name pattern
+ */
+ if (rep_add(optarg) < 0) {
+ tar_usage();
+ break;
+ }
+ break;
+ case 't':
+ /*
+ * list contents of the tape
+ */
+ act = LIST;
+ break;
+ case 'v':
+ /*
+ * verbose operation mode
+ */
+ vflag++;
+ break;
+ case 'w':
+ /*
+ * interactive file rename
+ */
+ iflag = 1;
+ break;
+ case 'x':
+ /*
+ * extract an archive, preserving mode,
+ * and mtime if possible.
+ */
+ act = EXTRACT;
+ pmtime = 1;
+ break;
+ case 'z':
+ /*
+ * use gzip. Non standard option.
+ */
+ gzip_program = GZIP_CMD;
+ break;
+ case 'B':
+ /*
+ * Nothing to do here, this is pax default
+ */
+ break;
+ case 'C':
+ havechd++;
+ chdname = optarg;
+ break;
+ case 'H':
+ /*
+ * follow command line symlinks only
+ */
+ Hflag = 1;
+ break;
+ case 'I':
+ if (++nincfiles > incfiles_max) {
+ size_t n = nincfiles + 3;
+ struct incfile *p;
+
+ p = reallocarray(incfiles, n,
+ sizeof(*incfiles));
+ if (p == NULL) {
+ paxwarn(0, "Unable to allocate space "
+ "for option list");
+ exit(1);
+ }
+ incfiles = p;
+ incfiles_max = n;
+ }
+ incfiles[nincfiles - 1].file = optarg;
+ incfiles[nincfiles - 1].dir = chdname;
+ break;
+ case 'J':
+ /*
+ * use xz. Non standard option.
+ */
+ gzip_program = XZ_CMD;
+ break;
+ case 'L':
+ /*
+ * follow symlinks
+ */
+ Lflag = 1;
+ break;
+ case 'N':
+ /* numeric uid and gid only */
+ Nflag = 1;
+ break;
+ case 'P':
+ /*
+ * do not remove leading '/' from pathnames
+ */
+ rmleadslash = 0;
+ break;
+ case 'X':
+ /*
+ * do not pass over mount points in the file system
+ */
+ Xflag = 1;
+ break;
+ case 'Z':
+ /*
+ * use compress.
+ */
+ gzip_program = COMPRESS_CMD;
+ break;
+ case '0':
+ arcname = DEV_0;
+ break;
+ case '1':
+ arcname = DEV_1;
+ break;
+ case '4':
+ arcname = DEV_4;
+ break;
+ case '5':
+ arcname = DEV_5;
+ break;
+ case '7':
+ arcname = DEV_7;
+ break;
+ case '8':
+ arcname = DEV_8;
+ break;
+ default:
+ tar_usage();
+ break;
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if ((arcname == NULL) || (*arcname == '\0')) {
+ arcname = getenv("TAPE");
+ if ((arcname == NULL) || (*arcname == '\0'))
+ arcname = "-";
+ }
+ if ((arcname[0] == '-') && (arcname[1]== '\0'))
+ arcname = NULL;
+
+ /*
+ * Traditional tar behaviour: list-like output goes to stdout unless
+ * writing the archive there. (pax uses stderr unless in list mode)
+ */
+ if (act == LIST || act == EXTRACT || arcname != NULL)
+ listf = stdout;
+
+ /* Traditional tar behaviour (pax wants to read file list from stdin) */
+ if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0)
+ exit(0);
+
+ /*
+ * process the args as they are interpreted by the operation mode
+ */
+ switch (act) {
+ case LIST:
+ case EXTRACT:
+ default:
+ {
+ int sawpat = 0;
+ char *file, *dir;
+
+ while (nincfiles || *argv != NULL) {
+ /*
+ * If we queued up any include files,
+ * pull them in now. Otherwise, check
+ * for -I and -C positional flags.
+ * Anything else must be a file to
+ * extract.
+ */
+ if (nincfiles) {
+ file = incfiles->file;
+ dir = incfiles->dir;
+ incfiles++;
+ nincfiles--;
+ } else if (strcmp(*argv, "-I") == 0) {
+ if (*++argv == NULL)
+ break;
+ file = *argv++;
+ dir = chdname;
+ } else
+ file = NULL;
+ if (file != NULL) {
+ FILE *fp;
+ char *str;
+
+ if (strcmp(file, "-") == 0)
+ fp = stdin;
+ else if ((fp = fopen(file, "r")) == NULL) {
+ syswarn(1, errno,
+ "Unable to open %s", file);
+ tar_usage();
+ }
+ while ((str = get_line(fp)) != NULL) {
+ if (pat_add(str, dir) < 0)
+ tar_usage();
+ sawpat = 1;
+ }
+ if (ferror(fp)) {
+ syswarn(1, errno,
+ "Unable to read from %s",
+ strcmp(file, "-") ? file :
+ "stdin");
+ tar_usage();
+ }
+ if (strcmp(file, "-") != 0)
+ fclose(fp);
+ } else if (strcmp(*argv, "-C") == 0) {
+ if (*++argv == NULL)
+ break;
+ chdname = *argv++;
+ havechd++;
+ } else if (pat_add(*argv++, chdname) < 0)
+ tar_usage();
+ else
+ sawpat = 1;
+ }
+ /*
+ * if patterns were added, we are doing chdir()
+ * on a file-by-file basis, else, just one
+ * global chdir (if any) after opening input.
+ */
+ if (sawpat > 0)
+ chdname = NULL;
+ }
+ break;
+ case ARCHIVE:
+ case APPND:
+ frmt = &(fsub[Oflag ? F_OTAR : F_TAR]);
+
+ if (chdname != NULL) { /* initial chdir() */
+ if (ftree_add(chdname, 1) < 0)
+ tar_usage();
+ }
+
+ while (nincfiles || *argv != NULL) {
+ char *file, *dir;
+
+ /*
+ * If we queued up any include files, pull them in
+ * now. Otherwise, check for -I and -C positional
+ * flags. Anything else must be a file to include
+ * in the archive.
+ */
+ if (nincfiles) {
+ file = incfiles->file;
+ dir = incfiles->dir;
+ incfiles++;
+ nincfiles--;
+ } else if (strcmp(*argv, "-I") == 0) {
+ if (*++argv == NULL)
+ break;
+ file = *argv++;
+ dir = NULL;
+ } else
+ file = NULL;
+ if (file != NULL) {
+ FILE *fp;
+ char *str;
+
+ /* Set directory if needed */
+ if (dir) {
+ if (ftree_add(dir, 1) < 0)
+ tar_usage();
+ }
+
+ if (strcmp(file, "-") == 0)
+ fp = stdin;
+ else if ((fp = fopen(file, "r")) == NULL) {
+ syswarn(1, errno, "Unable to open %s",
+ file);
+ tar_usage();
+ }
+ while ((str = get_line(fp)) != NULL) {
+ if (ftree_add(str, 0) < 0)
+ tar_usage();
+ }
+ if (ferror(fp)) {
+ syswarn(1, errno,
+ "Unable to read from %s",
+ strcmp(file, "-") ? file : "stdin");
+ tar_usage();
+ }
+ if (strcmp(file, "-") != 0)
+ fclose(fp);
+ } else if (strcmp(*argv, "-C") == 0) {
+ if (*++argv == NULL)
+ break;
+ if (ftree_add(*argv++, 1) < 0)
+ tar_usage();
+ havechd++;
+ } else if (ftree_add(*argv++, 0) < 0)
+ tar_usage();
+ }
+ /*
+ * no read errors allowed on updates/append operation!
+ */
+ maxflt = 0;
+ break;
+ }
+}
+
+int mkpath(char *);
+
+int
+mkpath(path)
+ char *path;
+{
+ struct stat sb;
+ char *slash;
+ int done = 0;
+
+ slash = path;
+
+ while (!done) {
+ slash += strspn(slash, "/");
+ slash += strcspn(slash, "/");
+
+ done = (*slash == '\0');
+ *slash = '\0';
+
+ if (stat(path, &sb)) {
+ if (errno != ENOENT || mkdir(path, 0777)) {
+ paxwarn(1, "%s", path);
+ return (-1);
+ }
+ } else if (!S_ISDIR(sb.st_mode)) {
+ syswarn(1, ENOTDIR, "%s", path);
+ return (-1);
+ }
+
+ if (!done)
+ *slash = '/';
+ }
+
+ return (0);
+}
+
+#ifndef NOCPIO
+/*
+ * cpio_options()
+ * look at the user specified flags. set globals as required and check if
+ * the user specified a legal set of flags. If not, complain and exit
+ */
+
+static void
+cpio_options(int argc, char **argv)
+{
+ const char *errstr;
+ int c, list_only = 0;
+ unsigned i;
+ char *str;
+ FILE *fp;
+
+ kflag = 1;
+ pids = 1;
+ pmode = 1;
+ pmtime = 0;
+ arcname = NULL;
+ dflag = 1;
+ act = -1;
+ nodirs = 1;
+ while ((c=getopt(argc,argv,"abcdfijklmoprstuvzABC:E:F:H:I:JLO:SZ6")) != -1)
+ switch (c) {
+ case 'a':
+ /*
+ * preserve access time on files read
+ */
+ tflag = 1;
+ break;
+ case 'b':
+ /*
+ * swap bytes and half-words when reading data
+ */
+ break;
+ case 'c':
+ /*
+ * ASCII cpio header
+ */
+ frmt = &(fsub[F_ACPIO]);
+ break;
+ case 'd':
+ /*
+ * create directories as needed
+ */
+ nodirs = 0;
+ break;
+ case 'f':
+ /*
+ * invert meaning of pattern list
+ */
+ cflag = 1;
+ break;
+ case 'i':
+ /*
+ * restore an archive
+ */
+ act = EXTRACT;
+ break;
+ case 'j':
+ /*
+ * use bzip2. Non standard option.
+ */
+ gzip_program = BZIP2_CMD;
+ break;
+ case 'k':
+ break;
+ case 'l':
+ /*
+ * use links instead of copies when possible
+ */
+ lflag = 1;
+ break;
+ case 'm':
+ /*
+ * preserve modification time
+ */
+ pmtime = 1;
+ break;
+ case 'o':
+ /*
+ * create an archive
+ */
+ act = ARCHIVE;
+ if (frmt == NULL)
+ frmt = &(fsub[F_CPIO]);
+ break;
+ case 'p':
+ /*
+ * copy-pass mode
+ */
+ act = COPY;
+ break;
+ case 'r':
+ /*
+ * interactively rename files
+ */
+ iflag = 1;
+ break;
+ case 's':
+ /*
+ * swap bytes after reading data
+ */
+ break;
+ case 't':
+ /*
+ * list contents of archive
+ */
+ list_only = 1;
+ break;
+ case 'u':
+ /*
+ * replace newer files
+ */
+ kflag = 0;
+ break;
+ case 'v':
+ /*
+ * verbose operation mode
+ */
+ vflag = 1;
+ break;
+ case 'z':
+ /*
+ * use gzip. Non standard option.
+ */
+ gzip_program = GZIP_CMD;
+ break;
+ case 'A':
+ /*
+ * append mode
+ */
+ act = APPND;
+ break;
+ case 'B':
+ /*
+ * Use 5120 byte block size
+ */
+ wrblksz = 5120;
+ break;
+ case 'C':
+ /*
+ * set block size in bytes
+ */
+ wrblksz = strtonum(optarg, 0, INT_MAX, &errstr);
+ if (errstr) {
+ paxwarn(1, "Invalid block size %s: %s",
+ optarg, errstr);
+ pax_usage();
+ }
+ break;
+ case 'E':
+ /*
+ * file with patterns to extract or list
+ */
+ if ((fp = fopen(optarg, "r")) == NULL) {
+ syswarn(1, errno, "Unable to open %s",
+ optarg);
+ cpio_usage();
+ }
+ while ((str = get_line(fp)) != NULL) {
+ pat_add(str, NULL);
+ }
+ if (ferror(fp)) {
+ syswarn(1, errno,
+ "Unable to read from %s", optarg);
+ cpio_usage();
+ }
+ fclose(fp);
+ break;
+ case 'F':
+ case 'I':
+ case 'O':
+ /*
+ * filename where the archive is stored
+ */
+ if ((optarg[0] == '-') && (optarg[1]== '\0')) {
+ /*
+ * treat a - as stdin
+ */
+ arcname = NULL;
+ break;
+ }
+ arcname = optarg;
+ break;
+ case 'H':
+ /*
+ * specify an archive format on write
+ */
+ for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i)
+ if (fsub[i].name != NULL &&
+ strcmp(fsub[i].name, optarg) == 0)
+ break;
+ if (i < sizeof(fsub)/sizeof(FSUB)) {
+ frmt = &fsub[i];
+ break;
+ }
+ paxwarn(1, "Unknown -H format: %s", optarg);
+ (void)fputs("cpio: Known -H formats are:", stderr);
+ for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i)
+ if (fsub[i].name != NULL)
+ (void)fprintf(stderr, " %s",
+ fsub[i].name);
+ (void)fputs("\n\n", stderr);
+ cpio_usage();
+ break;
+ case 'J':
+ /*
+ * use xz. Non standard option.
+ */
+ gzip_program = XZ_CMD;
+ break;
+ case 'L':
+ /*
+ * follow symbolic links
+ */
+ Lflag = 1;
+ break;
+ case 'S':
+ /*
+ * swap halfwords after reading data
+ */
+ break;
+ case 'Z':
+ /*
+ * use compress. Non standard option.
+ */
+ gzip_program = COMPRESS_CMD;
+ break;
+ case '6':
+ /*
+ * process Version 6 cpio format
+ */
+ frmt = &(fsub[F_OCPIO]);
+ break;
+ case '?':
+ default:
+ cpio_usage();
+ break;
+ }
+ argc -= optind;
+ argv += optind;
+
+ /*
+ * process the args as they are interpreted by the operation mode
+ */
+ switch (act) {
+ case EXTRACT:
+ if (list_only) {
+ act = LIST;
+
+ /*
+ * cpio is like pax: list to stderr
+ * unless in list mode
+ */
+ listf = stdout;
+ }
+ while (*argv != NULL)
+ if (pat_add(*argv++, NULL) < 0)
+ cpio_usage();
+ break;
+ case COPY:
+ if (*argv == NULL) {
+ paxwarn(0, "Destination directory was not supplied");
+ cpio_usage();
+ }
+ dirptr = *argv;
+ if (mkpath(dirptr) < 0)
+ cpio_usage();
+ --argc;
+ ++argv;
+ /* FALL THROUGH */
+ case ARCHIVE:
+ case APPND:
+ if (*argv != NULL)
+ cpio_usage();
+ /*
+ * no read errors allowed on updates/append operation!
+ */
+ maxflt = 0;
+ while ((str = get_line(stdin)) != NULL) {
+ ftree_add(str, 0);
+ }
+ if (ferror(stdin)) {
+ syswarn(1, errno, "Unable to read from %s",
+ "stdin");
+ cpio_usage();
+ }
+ break;
+ default:
+ cpio_usage();
+ break;
+ }
+}
+#endif /* !NOCPIO */
+
+/*
+ * printflg()
+ * print out those invalid flag sets found to the user
+ */
+
+static void
+printflg(unsigned int flg)
+{
+ int nxt;
+ int pos = 0;
+
+ (void)fprintf(stderr,"%s: Invalid combination of options:", argv0);
+ while ((nxt = ffs(flg)) != 0) {
+ flg >>= nxt;
+ pos += nxt;
+ (void)fprintf(stderr, " -%c", flgch[pos-1]);
+ }
+ (void)putc('\n', stderr);
+}
+
+/*
+ * opt_next()
+ * called by format specific options routines to get each format specific
+ * flag and value specified with -o
+ * Return:
+ * pointer to next OPLIST entry or NULL (end of list).
+ */
+
+OPLIST *
+opt_next(void)
+{
+ OPLIST *opt;
+
+ if ((opt = ophead) != NULL)
+ ophead = ophead->fow;
+ return(opt);
+}
+
+/*
+ * bad_opt()
+ * generic routine used to complain about a format specific options
+ * when the format does not support options.
+ */
+
+int
+bad_opt(void)
+{
+ OPLIST *opt;
+
+ if (ophead == NULL)
+ return(0);
+ /*
+ * print all we were given
+ */
+ paxwarn(1,"These format options are not supported");
+ while ((opt = opt_next()) != NULL)
+ (void)fprintf(stderr, "\t%s = %s\n", opt->name, opt->value);
+ pax_usage();
+ return(0);
+}
+
+/*
+ * opt_add()
+ * breaks the value supplied to -o into a option name and value. options
+ * are given to -o in the form -o name-value,name=value
+ * multiple -o may be specified.
+ * Return:
+ * 0 if format in name=value format, -1 if -o is passed junk
+ */
+
+int
+opt_add(const char *str)
+{
+ OPLIST *opt;
+ char *frpt;
+ char *pt;
+ char *endpt;
+ char *dstr;
+
+ if ((str == NULL) || (*str == '\0')) {
+ paxwarn(0, "Invalid option name");
+ return(-1);
+ }
+ if ((dstr = strdup(str)) == NULL) {
+ paxwarn(0, "Unable to allocate space for option list");
+ return(-1);
+ }
+ frpt = endpt = dstr;
+
+ /*
+ * break into name and values pieces and stuff each one into a
+ * OPLIST structure. When we know the format, the format specific
+ * option function will go through this list
+ */
+ while ((frpt != NULL) && (*frpt != '\0')) {
+ if ((endpt = strchr(frpt, ',')) != NULL)
+ *endpt = '\0';
+ if ((pt = strchr(frpt, '=')) == NULL) {
+ paxwarn(0, "Invalid options format");
+ free(dstr);
+ return(-1);
+ }
+ if ((opt = malloc(sizeof(OPLIST))) == NULL) {
+ paxwarn(0, "Unable to allocate space for option list");
+ free(dstr);
+ return(-1);
+ }
+ dstr = NULL; /* parts of string going onto the OPLIST */
+ *pt++ = '\0';
+ opt->name = frpt;
+ opt->value = pt;
+ opt->fow = NULL;
+ if (endpt != NULL)
+ frpt = endpt + 1;
+ else
+ frpt = NULL;
+ if (ophead == NULL) {
+ optail = ophead = opt;
+ continue;
+ }
+ optail->fow = opt;
+ optail = opt;
+ }
+ free(dstr);
+ return(0);
+}
+
+/*
+ * str_offt()
+ * Convert an expression of the following forms to an off_t > 0.
+ * 1) A positive decimal number.
+ * 2) A positive decimal number followed by a b (mult by 512).
+ * 3) A positive decimal number followed by a k (mult by 1024).
+ * 4) A positive decimal number followed by a m (mult by 512).
+ * 5) A positive decimal number followed by a w (mult by sizeof int)
+ * 6) Two or more positive decimal numbers (with/without k,b or w).
+ * separated by x (also * for backwards compatibility), specifying
+ * the product of the indicated values.
+ * Return:
+ * 0 for an error, a positive value o.w.
+ */
+
+static off_t
+str_offt(char *val)
+{
+ char *expr;
+ off_t num, t;
+
+ num = strtoll(val, &expr, 0);
+ if ((num == LLONG_MAX) || (num <= 0) || (expr == val))
+ return(0);
+
+ switch (*expr) {
+ case 'b':
+ t = num;
+ num *= 512;
+ if (t > num)
+ return(0);
+ ++expr;
+ break;
+ case 'k':
+ t = num;
+ num *= 1024;
+ if (t > num)
+ return(0);
+ ++expr;
+ break;
+ case 'm':
+ t = num;
+ num *= 1048576;
+ if (t > num)
+ return(0);
+ ++expr;
+ break;
+ case 'w':
+ t = num;
+ num *= sizeof(int);
+ if (t > num)
+ return(0);
+ ++expr;
+ break;
+ }
+
+ switch (*expr) {
+ case '\0':
+ break;
+ case '*':
+ case 'x':
+ t = num;
+ num *= str_offt(expr + 1);
+ if (t > num)
+ return(0);
+ break;
+ default:
+ return(0);
+ }
+ return(num);
+}
+
+char *
+get_line(FILE *f)
+{
+ char *str = NULL;
+ size_t size = 0;
+ ssize_t len;
+
+ do {
+ len = getline(&str, &size, f);
+ if (len == -1) {
+ free(str);
+ return NULL;
+ }
+ if (str[len - 1] == '\n')
+ str[len - 1] = '\0';
+ } while (str[0] == '\0');
+ return str;
+}
+
+/*
+ * no_op()
+ * for those option functions where the archive format has nothing to do.
+ * Return:
+ * 0
+ */
+
+static int
+no_op(void)
+{
+ return(0);
+}
+
+/*
+ * pax_usage()
+ * print the usage summary to the user
+ */
+
+void
+pax_usage(void)
+{
+ (void)fputs(
+ "usage: pax [-0cdjnOvz] [-E limit] [-f archive] [-G group] [-s replstr]\n"
+ " [-T range] [-U user] [pattern ...]\n"
+ " pax -r [-0cDdijknOuvYZz] [-E limit] [-f archive] [-G group] [-o options]\n"
+ " [-p string] [-s replstr] [-T range] [-U user] [pattern ...]\n"
+ " pax -w [-0adHijLOPtuvXz] [-B bytes] [-b blocksize] [-f archive]\n"
+ " [-G group] [-o options] [-s replstr] [-T range] [-U user]\n"
+ " [-x format] [file ...]\n"
+ " pax -rw [-0DdHikLlnOPtuvXYZ] [-G group] [-p string] [-s replstr]\n"
+ " [-T range] [-U user] [file ...] directory\n",
+ stderr);
+ exit(1);
+}
+
+/*
+ * tar_usage()
+ * print the usage summary to the user
+ */
+
+void
+tar_usage(void)
+{
+ (void)fputs(
+ "usage: tar {crtux}[014578befHhjLmNOoPpqsvwXZz]\n"
+ " [blocking-factor | archive | replstr] [-C directory] [-I file]\n"
+ " [file ...]\n"
+ " tar {-crtux} [-014578eHhjLmNOoPpqvwXZz] [-b blocking-factor]\n"
+ " [-C directory] [-f archive] [-I file] [-s replstr] [file ...]\n",
+ stderr);
+ exit(1);
+}
+
+#ifndef NOCPIO
+/*
+ * cpio_usage()
+ * print the usage summary to the user
+ */
+
+void
+cpio_usage(void)
+{
+ (void)fputs(
+ "usage: cpio -o [-AaBcjLvZz] [-C bytes] [-F archive] [-H format]\n"
+ " [-O archive] < name-list [> archive]\n"
+ " cpio -i [-6BbcdfjmrSstuvZz] [-C bytes] [-E file] [-F archive] [-H format]\n"
+ " [-I archive] [pattern ...] [< archive]\n"
+ " cpio -p [-adLlmuv] destination-directory < name-list\n",
+ stderr);
+ exit(1);
+}
+#endif /* !NOCPIO */
+
+#ifndef SMALL
+static int
+compress_id(char *blk, int size)
+{
+ if (size >= 2 && blk[0] == '\037' && blk[1] == '\235') {
+ paxwarn(0, "input compressed with %s; use the -%c option"
+ " to decompress it", "compress", 'Z');
+ exit(1);
+ }
+ return (-1);
+}
+
+static int
+gzip_id(char *blk, int size)
+{
+ if (size >= 2 && blk[0] == '\037' && blk[1] == '\213') {
+ paxwarn(0, "input compressed with %s; use the -%c option"
+ " to decompress it", "gzip", 'z');
+ exit(1);
+ }
+ return (-1);
+}
+
+static int
+bzip2_id(char *blk, int size)
+{
+ if (size >= 3 && blk[0] == 'B' && blk[1] == 'Z' && blk[2] == 'h') {
+ paxwarn(0, "input compressed with %s; use the -%c option"
+ " to decompress it", "bzip2", 'j');
+ exit(1);
+ }
+ return (-1);
+}
+
+static int
+xz_id(char *blk, int size)
+{
+ if (size >= 6 && memcmp(blk, "\xFD\x37\x7A\x58\x5A", 6) == 0) {
+ paxwarn(0, "input compressed with xz");
+ exit(1);
+ }
+ return (-1);
+}
+#endif /* !SMALL */
diff --git a/bin/pax/pat_rep.c b/bin/pax/pat_rep.c
new file mode 100644
index 0000000..deddca0
--- /dev/null
+++ b/bin/pax/pat_rep.c
@@ -0,0 +1,1108 @@
+/* $OpenBSD: pat_rep.c,v 1.43 2017/09/16 07:42:34 otto Exp $ */
+/* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * data structure for storing user supplied replacement strings (-s)
+ */
+typedef struct replace {
+ char *nstr; /* the new string we will substitute with */
+ regex_t rcmp; /* compiled regular expression used to match */
+ int flgs; /* print conversions? global in operation? */
+#define PRNT 0x1
+#define GLOB 0x2
+ struct replace *fow; /* pointer to next pattern */
+} REPLACE;
+
+/*
+ * routines to handle pattern matching, name modification (regular expression
+ * substitution and interactive renames), and destination name modification for
+ * copy (-rw). Both file name and link names are adjusted as required in these
+ * routines.
+ */
+
+#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
+static PATTERN *pathead = NULL; /* file pattern match list head */
+static PATTERN *pattail = NULL; /* file pattern match list tail */
+static REPLACE *rephead = NULL; /* replacement string list head */
+static REPLACE *reptail = NULL; /* replacement string list tail */
+
+static int rep_name(char *, size_t, int *, int);
+static int tty_rename(ARCHD *);
+static int fix_path(char *, int *, char *, int);
+static int fn_match(char *, char *, char **);
+static char * range_match(char *, int);
+static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
+
+/*
+ * rep_add()
+ * parses the -s replacement string; compiles the regular expression
+ * and stores the compiled value and it's replacement string together in
+ * replacement string list. Input to this function is of the form:
+ * /old/new/pg
+ * The first char in the string specifies the delimiter used by this
+ * replacement string. "Old" is a regular expression in "ed" format which
+ * is compiled by regcomp() and is applied to filenames. "new" is the
+ * substitution string; p and g are options flags for printing and global
+ * replacement (over the single filename)
+ * Return:
+ * 0 if a proper replacement string and regular expression was added to
+ * the list of replacement patterns; -1 otherwise.
+ */
+
+int
+rep_add(char *str)
+{
+ char *pt1;
+ char *pt2;
+ REPLACE *rep;
+ int res;
+ char rebuf[BUFSIZ];
+
+ /*
+ * throw out the bad parameters
+ */
+ if ((str == NULL) || (*str == '\0')) {
+ paxwarn(1, "Empty replacement string");
+ return(-1);
+ }
+
+ /*
+ * first character in the string specifies what the delimiter is for
+ * this expression
+ */
+ for (pt1 = str+1; *pt1; pt1++) {
+ if (*pt1 == '\\') {
+ pt1++;
+ continue;
+ }
+ if (*pt1 == *str)
+ break;
+ }
+ if (*pt1 == '\0') {
+ paxwarn(1, "Invalid replacement string %s", str);
+ return(-1);
+ }
+
+ /*
+ * allocate space for the node that handles this replacement pattern
+ * and split out the regular expression and try to compile it
+ */
+ if ((rep = malloc(sizeof(REPLACE))) == NULL) {
+ paxwarn(1, "Unable to allocate memory for replacement string");
+ return(-1);
+ }
+
+ *pt1 = '\0';
+ if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
+ regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
+ paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
+ free(rep);
+ return(-1);
+ }
+
+ /*
+ * put the delimiter back in case we need an error message and
+ * locate the delimiter at the end of the replacement string
+ * we then point the node at the new substitution string
+ */
+ *pt1++ = *str;
+ for (pt2 = pt1; *pt2; pt2++) {
+ if (*pt2 == '\\') {
+ pt2++;
+ continue;
+ }
+ if (*pt2 == *str)
+ break;
+ }
+ if (*pt2 == '\0') {
+ regfree(&(rep->rcmp));
+ free(rep);
+ paxwarn(1, "Invalid replacement string %s", str);
+ return(-1);
+ }
+
+ *pt2 = '\0';
+ rep->nstr = pt1;
+ pt1 = pt2++;
+ rep->flgs = 0;
+
+ /*
+ * set the options if any
+ */
+ while (*pt2 != '\0') {
+ switch (*pt2) {
+ case 'g':
+ case 'G':
+ rep->flgs |= GLOB;
+ break;
+ case 'p':
+ case 'P':
+ rep->flgs |= PRNT;
+ break;
+ default:
+ regfree(&(rep->rcmp));
+ free(rep);
+ *pt1 = *str;
+ paxwarn(1, "Invalid replacement string option %s", str);
+ return(-1);
+ }
+ ++pt2;
+ }
+
+ /*
+ * all done, link it in at the end
+ */
+ rep->fow = NULL;
+ if (rephead == NULL) {
+ reptail = rephead = rep;
+ return(0);
+ }
+ reptail->fow = rep;
+ reptail = rep;
+ return(0);
+}
+
+/*
+ * pat_add()
+ * add a pattern match to the pattern match list. Pattern matches are used
+ * to select which archive members are extracted. (They appear as
+ * arguments to pax in the list and read modes). If no patterns are
+ * supplied to pax, all members in the archive will be selected (and the
+ * pattern match list is empty).
+ * Return:
+ * 0 if the pattern was added to the list, -1 otherwise
+ */
+
+int
+pat_add(char *str, char *chdirname)
+{
+ PATTERN *pt;
+
+ /*
+ * throw out the junk
+ */
+ if ((str == NULL) || (*str == '\0')) {
+ paxwarn(1, "Empty pattern string");
+ return(-1);
+ }
+
+ /*
+ * allocate space for the pattern and store the pattern. the pattern is
+ * part of argv so do not bother to copy it, just point at it. Add the
+ * node to the end of the pattern list
+ */
+ if ((pt = malloc(sizeof(PATTERN))) == NULL) {
+ paxwarn(1, "Unable to allocate memory for pattern string");
+ return(-1);
+ }
+
+ pt->pstr = str;
+ pt->pend = NULL;
+ pt->plen = strlen(str);
+ pt->fow = NULL;
+ pt->flgs = 0;
+ pt->chdname = chdirname;
+
+ if (pathead == NULL) {
+ pattail = pathead = pt;
+ return(0);
+ }
+ pattail->fow = pt;
+ pattail = pt;
+ return(0);
+}
+
+/*
+ * pat_chk()
+ * complain if any the user supplied pattern did not result in a match to
+ * a selected archive member.
+ */
+
+void
+pat_chk(void)
+{
+ PATTERN *pt;
+ int wban = 0;
+
+ /*
+ * walk down the list checking the flags to make sure MTCH was set,
+ * if not complain
+ */
+ for (pt = pathead; pt != NULL; pt = pt->fow) {
+ if (pt->flgs & MTCH)
+ continue;
+ if (!wban) {
+ paxwarn(1, "WARNING! These patterns were not matched:");
+ ++wban;
+ }
+ (void)fprintf(stderr, "%s\n", pt->pstr);
+ }
+}
+
+/*
+ * pat_sel()
+ * the archive member which matches a pattern was selected. Mark the
+ * pattern as having selected an archive member. arcn->pat points at the
+ * pattern that was matched. arcn->pat is set in pat_match()
+ *
+ * NOTE: When the -c option is used, we are called when there was no match
+ * by pat_match() (that means we did match before the inverted sense of
+ * the logic). Now this seems really strange at first, but with -c we
+ * need to keep track of those patterns that cause an archive member to NOT
+ * be selected (it found an archive member with a specified pattern)
+ * Return:
+ * 0 if the pattern pointed at by arcn->pat was tagged as creating a
+ * match, -1 otherwise.
+ */
+
+int
+pat_sel(ARCHD *arcn)
+{
+ PATTERN *pt;
+ PATTERN **ppt;
+ size_t len;
+
+ /*
+ * if no patterns just return
+ */
+ if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
+ return(0);
+
+ /*
+ * when we are NOT limited to a single match per pattern mark the
+ * pattern and return
+ */
+ if (!nflag) {
+ pt->flgs |= MTCH;
+ return(0);
+ }
+
+ /*
+ * we reach this point only when we allow a single selected match per
+ * pattern, if the pattern matches a directory and we do not have -d
+ * (dflag) we are done with this pattern. We may also be handed a file
+ * in the subtree of a directory. in that case when we are operating
+ * with -d, this pattern was already selected and we are done
+ */
+ if (pt->flgs & DIR_MTCH)
+ return(0);
+
+ if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
+ /*
+ * ok we matched a directory and we are allowing
+ * subtree matches but because of the -n only its children will
+ * match. This is tagged as a DIR_MTCH type.
+ * WATCH IT, the code assumes that pt->pend points
+ * into arcn->name and arcn->name has not been modified.
+ * If not we will have a big mess. Yup this is another kludge
+ */
+
+ /*
+ * if this was a prefix match, remove trailing part of path
+ * so we can copy it. Future matches will be exact prefix match
+ */
+ if (pt->pend != NULL)
+ *pt->pend = '\0';
+
+ if ((pt->pstr = strdup(arcn->name)) == NULL) {
+ paxwarn(1, "Pattern select out of memory");
+ if (pt->pend != NULL)
+ *pt->pend = '/';
+ pt->pend = NULL;
+ return(-1);
+ }
+
+ /*
+ * put the trailing / back in the source string
+ */
+ if (pt->pend != NULL) {
+ *pt->pend = '/';
+ pt->pend = NULL;
+ }
+ pt->plen = strlen(pt->pstr);
+
+ /*
+ * strip off any trailing /, this should really never happen
+ */
+ len = pt->plen - 1;
+ if (*(pt->pstr + len) == '/') {
+ *(pt->pstr + len) = '\0';
+ pt->plen = len;
+ }
+ pt->flgs = DIR_MTCH | MTCH;
+ arcn->pat = pt;
+ return(0);
+ }
+
+ /*
+ * we are then done with this pattern, so we delete it from the list
+ * because it can never be used for another match.
+ * Seems kind of strange to do for a -c, but the pax spec is really
+ * vague on the interaction of -c, -n and -d. We assume that when -c
+ * and the pattern rejects a member (i.e. it matched it) it is done.
+ * In effect we place the order of the flags as having -c last.
+ */
+ pt = pathead;
+ ppt = &pathead;
+ while ((pt != NULL) && (pt != arcn->pat)) {
+ ppt = &(pt->fow);
+ pt = pt->fow;
+ }
+
+ if (pt == NULL) {
+ /*
+ * should never happen....
+ */
+ paxwarn(1, "Pattern list inconsistent");
+ return(-1);
+ }
+ *ppt = pt->fow;
+ free(pt);
+ arcn->pat = NULL;
+ return(0);
+}
+
+/*
+ * pat_match()
+ * see if this archive member matches any supplied pattern, if a match
+ * is found, arcn->pat is set to point at the potential pattern. Later if
+ * this archive member is "selected" we process and mark the pattern as
+ * one which matched a selected archive member (see pat_sel())
+ * Return:
+ * 0 if this archive member should be processed, 1 if it should be
+ * skipped and -1 if we are done with all patterns (and pax should quit
+ * looking for more members)
+ */
+
+int
+pat_match(ARCHD *arcn)
+{
+ PATTERN *pt;
+
+ arcn->pat = NULL;
+
+ /*
+ * if there are no more patterns and we have -n (and not -c) we are
+ * done. otherwise with no patterns to match, matches all
+ */
+ if (pathead == NULL) {
+ if (nflag && !cflag)
+ return(-1);
+ return(0);
+ }
+
+ /*
+ * have to search down the list one at a time looking for a match.
+ */
+ pt = pathead;
+ while (pt != NULL) {
+ /*
+ * check for a file name match unless we have DIR_MTCH set in
+ * this pattern then we want a prefix match
+ */
+ if (pt->flgs & DIR_MTCH) {
+ /*
+ * this pattern was matched before to a directory
+ * as we must have -n set for this (but not -d). We can
+ * only match CHILDREN of that directory so we must use
+ * an exact prefix match (no wildcards).
+ */
+ if ((arcn->name[pt->plen] == '/') &&
+ (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
+ break;
+ } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
+ break;
+ pt = pt->fow;
+ }
+
+ /*
+ * return the result, remember that cflag (-c) inverts the sense of a
+ * match
+ */
+ if (pt == NULL)
+ return(cflag ? 0 : 1);
+
+ /*
+ * we had a match, now when we invert the sense (-c) we reject this
+ * member. However we have to tag the pattern a being successful, (in a
+ * match, not in selecting a archive member) so we call pat_sel() here.
+ */
+ arcn->pat = pt;
+ if (!cflag)
+ return(0);
+
+ if (pat_sel(arcn) < 0)
+ return(-1);
+ arcn->pat = NULL;
+ return(1);
+}
+
+/*
+ * fn_match()
+ * Return:
+ * 0 if this archive member should be processed, 1 if it should be
+ * skipped and -1 if we are done with all patterns (and pax should quit
+ * looking for more members)
+ * Note: *pend may be changed to show where the prefix ends.
+ */
+
+static int
+fn_match(char *pattern, char *string, char **pend)
+{
+ char c;
+ char test;
+
+ *pend = NULL;
+ for (;;) {
+ switch (c = *pattern++) {
+ case '\0':
+ /*
+ * Ok we found an exact match
+ */
+ if (*string == '\0')
+ return(0);
+
+ /*
+ * Check if it is a prefix match
+ */
+ if ((dflag == 1) || (*string != '/'))
+ return(-1);
+
+ /*
+ * It is a prefix match, remember where the trailing
+ * / is located
+ */
+ *pend = string;
+ return(0);
+ case '?':
+ if ((test = *string++) == '\0')
+ return (-1);
+ break;
+ case '*':
+ c = *pattern;
+ /*
+ * Collapse multiple *'s.
+ */
+ while (c == '*')
+ c = *++pattern;
+
+ /*
+ * Optimized hack for pattern with a * at the end
+ */
+ if (c == '\0')
+ return (0);
+
+ /*
+ * General case, use recursion.
+ */
+ while ((test = *string) != '\0') {
+ if (!fn_match(pattern, string, pend))
+ return (0);
+ ++string;
+ }
+ return (-1);
+ case '[':
+ /*
+ * range match
+ */
+ if (((test = *string++) == '\0') ||
+ ((pattern = range_match(pattern, test)) == NULL))
+ return (-1);
+ break;
+ case '\\':
+ if ((c = *pattern++) == '\0')
+ return (-1);
+ /* FALLTHROUGH */
+ default:
+ if (c != *string++)
+ return (-1);
+ break;
+ }
+ }
+ /* NOTREACHED */
+}
+
+static char *
+range_match(char *pattern, int test)
+{
+ char c;
+ char c2;
+ int negate;
+ int ok = 0;
+
+ if ((negate = (*pattern == '!')) != 0)
+ ++pattern;
+
+ while ((c = *pattern++) != ']') {
+ /*
+ * Illegal pattern
+ */
+ if (c == '\0')
+ return (NULL);
+
+ if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
+ (c2 != ']')) {
+ if ((c <= test) && (test <= c2))
+ ok = 1;
+ pattern += 2;
+ } else if (c == test)
+ ok = 1;
+ }
+ return (ok == negate ? NULL : pattern);
+}
+
+/*
+ * has_dotdot()
+ * Returns true iff the supplied path contains a ".." component.
+ */
+
+int
+has_dotdot(const char *path)
+{
+ const char *p = path;
+
+ while ((p = strstr(p, "..")) != NULL) {
+ if ((p == path || p[-1] == '/') &&
+ (p[2] == '/' || p[2] == '\0'))
+ return (1);
+ p += 2;
+ }
+ return (0);
+}
+
+/*
+ * mod_name()
+ * modify a selected file name. first attempt to apply replacement string
+ * expressions, then apply interactive file rename. We apply replacement
+ * string expressions to both filenames and file links (if we didn't the
+ * links would point to the wrong place, and we could never be able to
+ * move an archive that has a file link in it). When we rename files
+ * interactively, we store that mapping (old name to user input name) so
+ * if we spot any file links to the old file name in the future, we will
+ * know exactly how to fix the file link.
+ * Return:
+ * 0 continue to process file, 1 skip this file, -1 pax is finished
+ */
+
+int
+mod_name(ARCHD *arcn)
+{
+ int res = 0;
+
+ /*
+ * Strip off leading '/' if appropriate.
+ * Currently, this option is only set for the tar format.
+ */
+ while (rmleadslash && arcn->name[0] == '/') {
+ if (arcn->name[1] == '\0') {
+ arcn->name[0] = '.';
+ } else {
+ (void)memmove(arcn->name, &arcn->name[1],
+ strlen(arcn->name));
+ arcn->nlen--;
+ }
+ if (rmleadslash < 2) {
+ rmleadslash = 2;
+ paxwarn(0, "Removing leading / from absolute path names in the archive");
+ }
+ }
+ while (rmleadslash && arcn->ln_name[0] == '/' &&
+ PAX_IS_HARDLINK(arcn->type)) {
+ if (arcn->ln_name[1] == '\0') {
+ arcn->ln_name[0] = '.';
+ } else {
+ (void)memmove(arcn->ln_name, &arcn->ln_name[1],
+ strlen(arcn->ln_name));
+ arcn->ln_nlen--;
+ }
+ if (rmleadslash < 2) {
+ rmleadslash = 2;
+ paxwarn(0, "Removing leading / from absolute path names in the archive");
+ }
+ }
+ if (rmleadslash) {
+ const char *last = NULL;
+ const char *p = arcn->name;
+
+ while ((p = strstr(p, "..")) != NULL) {
+ if ((p == arcn->name || p[-1] == '/') &&
+ (p[2] == '/' || p[2] == '\0'))
+ last = p + 2;
+ p += 2;
+ }
+ if (last != NULL) {
+ last++;
+ paxwarn(1, "Removing leading \"%.*s\"",
+ (int)(last - arcn->name), arcn->name);
+ arcn->nlen = strlen(last);
+ if (arcn->nlen > 0)
+ memmove(arcn->name, last, arcn->nlen + 1);
+ else {
+ arcn->name[0] = '.';
+ arcn->name[1] = '\0';
+ arcn->nlen = 1;
+ }
+ }
+ }
+
+ /*
+ * IMPORTANT: We have a problem. what do we do with symlinks?
+ * Modifying a hard link name makes sense, as we know the file it
+ * points at should have been seen already in the archive (and if it
+ * wasn't seen because of a read error or a bad archive, we lose
+ * anyway). But there are no such requirements for symlinks. On one
+ * hand the symlink that refers to a file in the archive will have to
+ * be modified to so it will still work at its new location in the
+ * file system. On the other hand a symlink that points elsewhere (and
+ * should continue to do so) should not be modified. There is clearly
+ * no perfect solution here. So we handle them like hardlinks. Clearly
+ * a replacement made by the interactive rename mapping is very likely
+ * to be correct since it applies to a single file and is an exact
+ * match. The regular expression replacements are a little harder to
+ * justify though. We claim that the symlink name is only likely
+ * to be replaced when it points within the file tree being moved and
+ * in that case it should be modified. what we really need to do is to
+ * call an oracle here. :)
+ */
+ if (rephead != NULL) {
+ /*
+ * we have replacement strings, modify the name and the link
+ * name if any.
+ */
+ if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0)
+ return(res);
+
+ if (PAX_IS_LINK(arcn->type)) {
+ if ((res = rep_name(arcn->ln_name,
+ sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0)
+ return(res);
+ }
+ }
+
+ if (iflag) {
+ /*
+ * perform interactive file rename, then map the link if any
+ */
+ if ((res = tty_rename(arcn)) != 0)
+ return(res);
+ if (PAX_IS_LINK(arcn->type))
+ sub_name(arcn->ln_name, &(arcn->ln_nlen),
+ sizeof(arcn->ln_name));
+ }
+ return(res);
+}
+
+/*
+ * tty_rename()
+ * Prompt the user for a replacement file name. A "." keeps the old name,
+ * a empty line skips the file, and an EOF on reading the tty, will cause
+ * pax to stop processing and exit. Otherwise the file name input, replaces
+ * the old one.
+ * Return:
+ * 0 process this file, 1 skip this file, -1 we need to exit pax
+ */
+
+static int
+tty_rename(ARCHD *arcn)
+{
+ char tmpname[PAXPATHLEN+2];
+ int res;
+
+ /*
+ * prompt user for the replacement name for a file, keep trying until
+ * we get some reasonable input. Archives may have more than one file
+ * on them with the same name (from updates etc). We print verbose info
+ * on the file so the user knows what is up.
+ */
+ tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
+
+ for (;;) {
+ ls_tty(arcn);
+ tty_prnt("Input new name, or a \".\" to keep the old name, ");
+ tty_prnt("or a \"return\" to skip this file.\n");
+ tty_prnt("Input > ");
+ if (tty_read(tmpname, sizeof(tmpname)) < 0)
+ return(-1);
+ if (strcmp(tmpname, "..") == 0) {
+ tty_prnt("Try again, illegal file name: ..\n");
+ continue;
+ }
+ if (strlen(tmpname) > PAXPATHLEN) {
+ tty_prnt("Try again, file name too long\n");
+ continue;
+ }
+ break;
+ }
+
+ /*
+ * empty file name, skips this file. a "." leaves it alone
+ */
+ if (tmpname[0] == '\0') {
+ tty_prnt("Skipping file.\n");
+ return(1);
+ }
+ if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
+ tty_prnt("Processing continues, name unchanged.\n");
+ return(0);
+ }
+
+ /*
+ * ok the name changed. We may run into links that point at this
+ * file later. we have to remember where the user sent the file
+ * in order to repair any links.
+ */
+ tty_prnt("Processing continues, name changed to: %s\n", tmpname);
+ res = add_name(arcn->name, arcn->nlen, tmpname);
+ arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
+ if ((size_t)arcn->nlen >= sizeof(arcn->name))
+ arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */
+ if (res < 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * set_dest()
+ * fix up the file name and the link name (if any) so this file will land
+ * in the destination directory (used during copy() -rw).
+ * Return:
+ * 0 if ok, -1 if failure (name too long)
+ */
+
+int
+set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
+{
+ if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
+ return(-1);
+
+ /*
+ * It is really hard to deal with symlinks here, we cannot be sure
+ * if the name they point was moved (or will be moved). It is best to
+ * leave them alone.
+ */
+ if (!PAX_IS_HARDLINK(arcn->type))
+ return(0);
+
+ if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * fix_path
+ * concatenate dir_name and or_name and store the result in or_name (if
+ * it fits). This is one ugly function.
+ * Return:
+ * 0 if ok, -1 if the final name is too long
+ */
+
+static int
+fix_path(char *or_name, int *or_len, char *dir_name, int dir_len)
+{
+ char *src;
+ char *dest;
+ char *start;
+ int len;
+
+ /*
+ * we shift the or_name to the right enough to tack in the dir_name
+ * at the front. We make sure we have enough space for it all before
+ * we start. since dest always ends in a slash, we skip of or_name
+ * if it also starts with one.
+ */
+ start = or_name;
+ src = start + *or_len;
+ dest = src + dir_len;
+ if (*start == '/') {
+ ++start;
+ --dest;
+ }
+ if ((len = dest - or_name) > PAXPATHLEN) {
+ paxwarn(1, "File name %s/%s, too long", dir_name, start);
+ return(-1);
+ }
+ *or_len = len;
+
+ /*
+ * enough space, shift
+ */
+ while (src >= start)
+ *dest-- = *src--;
+ src = dir_name + dir_len - 1;
+
+ /*
+ * splice in the destination directory name
+ */
+ while (src >= dir_name)
+ *dest-- = *src--;
+
+ *(or_name + len) = '\0';
+ return(0);
+}
+
+/*
+ * rep_name()
+ * walk down the list of replacement strings applying each one in order.
+ * when we find one with a successful substitution, we modify the name
+ * as specified. if required, we print the results. if the resulting name
+ * is empty, we will skip this archive member. We use the regexp(3)
+ * routines (regexp() ought to win a prize as having the most cryptic
+ * library function manual page).
+ * --Parameters--
+ * name is the file name we are going to apply the regular expressions to
+ * (and may be modified)
+ * nsize is the size of the name buffer.
+ * nlen is the length of this name (and is modified to hold the length of
+ * the final string).
+ * prnt is a flag that says whether to print the final result.
+ * Return:
+ * 0 if substitution was successful, 1 if we are to skip the file (the name
+ * ended up empty)
+ */
+
+static int
+rep_name(char *name, size_t nsize, int *nlen, int prnt)
+{
+ REPLACE *pt;
+ char *inpt;
+ char *outpt;
+ char *endpt;
+ char *rpt;
+ int found = 0;
+ int res;
+ regmatch_t pm[MAXSUBEXP];
+ char nname[PAXPATHLEN+1]; /* final result of all replacements */
+ char buf1[PAXPATHLEN+1]; /* where we work on the name */
+
+ /*
+ * copy the name into buf1, where we will work on it. We need to keep
+ * the orig string around so we can print out the result of the final
+ * replacement. We build up the final result in nname. inpt points at
+ * the string we apply the regular expression to. prnt is used to
+ * suppress printing when we handle replacements on the link field
+ * (the user already saw that substitution go by)
+ */
+ pt = rephead;
+ (void)strlcpy(buf1, name, sizeof(buf1));
+ inpt = buf1;
+ outpt = nname;
+ endpt = outpt + PAXPATHLEN;
+
+ /*
+ * try each replacement string in order
+ */
+ while (pt != NULL) {
+ do {
+ char *oinpt = inpt;
+ /*
+ * check for a successful substitution, if not go to
+ * the next pattern, or cleanup if we were global
+ */
+ if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
+ break;
+
+ /*
+ * ok we found one. We have three parts, the prefix
+ * which did not match, the section that did and the
+ * tail (that also did not match). Copy the prefix to
+ * the final output buffer (watching to make sure we
+ * do not create a string too long).
+ */
+ found = 1;
+ rpt = inpt + pm[0].rm_so;
+
+ while ((inpt < rpt) && (outpt < endpt))
+ *outpt++ = *inpt++;
+ if (outpt == endpt)
+ break;
+
+ /*
+ * for the second part (which matched the regular
+ * expression) apply the substitution using the
+ * replacement string and place it the prefix in the
+ * final output. If we have problems, skip it.
+ */
+ if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt))
+ < 0) {
+ if (prnt)
+ paxwarn(1, "Replacement name error %s",
+ name);
+ return(1);
+ }
+ outpt += res;
+
+ /*
+ * we set up to look again starting at the first
+ * character in the tail (of the input string right
+ * after the last character matched by the regular
+ * expression (inpt always points at the first char in
+ * the string to process). If we are not doing a global
+ * substitution, we will use inpt to copy the tail to
+ * the final result. Make sure we do not overrun the
+ * output buffer
+ */
+ inpt += pm[0].rm_eo - pm[0].rm_so;
+
+ if ((outpt == endpt) || (*inpt == '\0'))
+ break;
+
+ /*
+ * if the user wants global we keep trying to
+ * substitute until it fails, then we are done.
+ */
+ } while (pt->flgs & GLOB);
+
+ if (found)
+ break;
+
+ /*
+ * a successful substitution did NOT occur, try the next one
+ */
+ pt = pt->fow;
+ }
+
+ if (found) {
+ /*
+ * we had a substitution, copy the last tail piece (if there is
+ * room) to the final result
+ */
+ while ((outpt < endpt) && (*inpt != '\0'))
+ *outpt++ = *inpt++;
+
+ *outpt = '\0';
+ if ((outpt == endpt) && (*inpt != '\0')) {
+ if (prnt)
+ paxwarn(1,"Replacement name too long %s >> %s",
+ name, nname);
+ return(1);
+ }
+
+ /*
+ * inform the user of the result if wanted
+ */
+ if (prnt && (pt->flgs & PRNT)) {
+ if (*nname == '\0')
+ (void)fprintf(stderr,"%s >> <empty string>\n",
+ name);
+ else
+ (void)fprintf(stderr,"%s >> %s\n", name, nname);
+ }
+
+ /*
+ * if empty inform the caller this file is to be skipped
+ * otherwise copy the new name over the orig name and return
+ */
+ if (*nname == '\0')
+ return(1);
+ *nlen = strlcpy(name, nname, nsize);
+ }
+ return(0);
+}
+
+/*
+ * resub()
+ * apply the replacement to the matched expression. expand out the old
+ * style ed(1) subexpression expansion.
+ * Return:
+ * -1 if error, or the number of characters added to the destination.
+ */
+
+static int
+resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest,
+ char *destend)
+{
+ char *spt;
+ char *dpt;
+ char c;
+ regmatch_t *pmpt;
+ int len;
+ int subexcnt;
+
+ spt = src;
+ dpt = dest;
+ subexcnt = rp->re_nsub;
+ while ((dpt < destend) && ((c = *spt++) != '\0')) {
+ /*
+ * see if we just have an ordinary replacement character
+ * or we refer to a subexpression.
+ */
+ if (c == '&') {
+ pmpt = pm;
+ } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
+ /*
+ * make sure there is a subexpression as specified
+ */
+ if ((len = *spt++ - '0') > subexcnt)
+ return(-1);
+ pmpt = pm + len;
+ } else {
+ /*
+ * Ordinary character, just copy it
+ */
+ if ((c == '\\') && (*spt != '\0'))
+ c = *spt++;
+ *dpt++ = c;
+ continue;
+ }
+
+ /*
+ * continue if the subexpression is bogus
+ */
+ if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
+ ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
+ continue;
+
+ /*
+ * copy the subexpression to the destination.
+ * fail if we run out of space or the match string is damaged
+ */
+ if (len > (destend - dpt))
+ return (-1);
+ strncpy(dpt, inpt + pmpt->rm_so, len);
+ dpt += len;
+ }
+ return(dpt - dest);
+}
diff --git a/bin/pax/pax.1 b/bin/pax/pax.1
new file mode 100644
index 0000000..d146a96
--- /dev/null
+++ b/bin/pax/pax.1
@@ -0,0 +1,1112 @@
+.\" $OpenBSD: pax.1,v 1.75 2020/01/16 16:46:46 schwarze Exp $
+.\" $NetBSD: pax.1,v 1.3 1995/03/21 09:07:37 cgd Exp $
+.\"
+.\" Copyright (c) 1992 Keith Muller.
+.\" Copyright (c) 1992, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Keith Muller of the University of California, San Diego.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)pax.1 8.4 (Berkeley) 4/18/94
+.\"
+.Dd $Mdocdate: January 16 2020 $
+.Dt PAX 1
+.Os
+.Sh NAME
+.Nm pax
+.Nd read and write file archives and copy directory hierarchies
+.Sh SYNOPSIS
+.Nm pax
+.Op Fl 0cdjnOvz
+.Op Fl E Ar limit
+.Op Fl f Ar archive
+.Op Fl G Ar group
+.Op Fl s Ar replstr
+.Op Fl T Ar range
+.Op Fl U Ar user
+.Op Ar pattern ...
+.Nm pax
+.Fl r
+.Op Fl 0cDdijknOuvYZz
+.Op Fl E Ar limit
+.Op Fl f Ar archive
+.Op Fl G Ar group
+.Op Fl o Ar options
+.Op Fl p Ar string
+.Op Fl s Ar replstr
+.Op Fl T Ar range
+.Op Fl U Ar user
+.Op Ar pattern ...
+.Nm pax
+.Fl w
+.Op Fl 0adHijLOPtuvXz
+.Op Fl B Ar bytes
+.Op Fl b Ar blocksize
+.Op Fl f Ar archive
+.Op Fl G Ar group
+.Op Fl o Ar options
+.Op Fl s Ar replstr
+.Op Fl T Ar range
+.Op Fl U Ar user
+.Op Fl x Ar format
+.Op Ar
+.Nm pax
+.Fl rw
+.Op Fl 0DdHijkLlnOPtuvXYZ
+.Op Fl G Ar group
+.Op Fl p Ar string
+.Op Fl s Ar replstr
+.Op Fl T Ar range
+.Op Fl U Ar user
+.Op Ar
+.Ar directory
+.Sh DESCRIPTION
+.Nm
+will read, write, and list the members of an archive file
+and will copy directory hierarchies.
+.Nm
+operation is independent of the specific archive format
+and supports a wide variety of different archive formats.
+A list of supported archive formats can be found under the description of the
+.Fl x
+option.
+.Pp
+The presence of the
+.Fl r
+and the
+.Fl w
+options specifies which of the following functional modes
+.Nm
+will operate under:
+.Em list , read , write ,
+and
+.Em copy .
+.Bl -tag -width 6n
+.It Aq none
+.Em List .
+.Nm
+will write to standard output
+a table of contents of the members of the archive file read from
+standard input, whose pathnames match the specified
+.Ar pattern
+arguments.
+The table of contents contains one filename per line
+and is written using single line buffering.
+.It Fl r
+.Em Read .
+.Nm
+extracts the members of the archive file read from the standard input,
+with pathnames matching the specified
+.Ar pattern
+arguments.
+The archive format and blocking is automatically determined on input.
+When an extracted file is a directory, the entire file hierarchy
+rooted at that directory is extracted.
+All extracted files are created relative to the current file hierarchy.
+The setting of ownership, access and modification times, and file mode of
+the extracted files are discussed in more detail under the
+.Fl p
+option.
+.It Fl w
+.Em Write .
+.Nm
+writes an archive containing the
+.Ar file
+operands to standard output
+using the specified archive format.
+When no
+.Ar file
+operands are specified, a list of files to copy with one per line is read from
+standard input.
+When a
+.Ar file
+operand is also a directory, the entire file hierarchy rooted
+at that directory will be included.
+.It Fl rw
+.Em Copy .
+.Nm
+copies the
+.Ar file
+operands to the destination
+.Ar directory .
+When no
+.Ar file
+operands are specified, a list of files to copy with one per line is read from
+the standard input.
+When a
+.Ar file
+operand is also a directory the entire file
+hierarchy rooted at that directory will be included.
+The effect of the
+.Em copy
+is as if the copied files were written to an archive file and then
+subsequently extracted, except that there may be hard links between
+the original and the copied files (see the
+.Fl l
+option below).
+.Pp
+.Sy Warning :
+The destination
+.Ar directory
+must not be one of the
+.Ar file
+operands or a member of a file hierarchy rooted at one of the
+.Ar file
+operands.
+The result of a
+.Em copy
+under these conditions is unpredictable.
+.El
+.Pp
+While processing a damaged archive during a read or list operation,
+.Nm
+will attempt to recover from media defects and will search through the archive
+to locate and process the largest number of archive members possible (see the
+.Fl E
+option for more details on error handling).
+.Pp
+The
+.Ar directory
+operand specifies a destination directory pathname.
+If the
+.Ar directory
+operand does not exist, or it is not writable by the user,
+or it is not of type directory,
+.Nm
+will exit with a non-zero exit status.
+.Pp
+The
+.Ar pattern
+operand is used to select one or more pathnames of archive members.
+Archive members are selected using the pattern matching notation described
+by
+.Xr glob 7 .
+When the
+.Ar pattern
+operand is not supplied, all members of the archive will be selected.
+When a
+.Ar pattern
+matches a directory, the entire file hierarchy rooted at that directory will
+be selected.
+When a
+.Ar pattern
+operand does not select at least one archive member,
+.Nm
+will write these
+.Ar pattern
+operands in a diagnostic message to standard error
+and then exit with a non-zero exit status.
+.Pp
+The
+.Ar file
+operand specifies the pathname of a file to be copied or archived.
+When a
+.Ar file
+operand does not select at least one archive member,
+.Nm
+will write these
+.Ar file
+operand pathnames in a diagnostic message to standard error
+and then exit with a non-zero exit status.
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl 0
+Use the NUL
+.Pq Ql \e0
+character as a pathname terminator, instead of newline
+.Pq Ql \en .
+This applies only to the pathnames read from standard input in
+the write and copy modes,
+and to the pathnames written to standard output in list mode.
+This option is expected to be used in concert with the
+.Fl print0
+function in
+.Xr find 1
+or the
+.Fl 0
+flag in
+.Xr xargs 1 .
+.It Fl a
+Append the given
+.Ar file
+operands
+to the end of an archive that was previously written.
+If an archive format is not specified with a
+.Fl x
+option, the format currently being used in the archive will be selected.
+Any attempt to append to an archive in a format different from the
+format already used in the archive will cause
+.Nm
+to exit immediately
+with a non-zero exit status.
+The blocking size used in the archive volume where writing starts
+will continue to be used for the remainder of that archive volume.
+.Pp
+.Sy Warning :
+Many storage devices are not able to support the operations necessary
+to perform an append operation.
+Any attempt to append to an archive stored on such a device may damage the
+archive or have other unpredictable results.
+Tape drives in particular are more likely to not support an append operation.
+An archive stored in a regular file system file or on a disk device will
+usually support an append operation.
+.It Fl B Ar bytes
+Limit the number of bytes written to a single archive volume to
+.Ar bytes .
+The
+.Ar bytes
+limit can end with
+.Sq Li m ,
+.Sq Li k ,
+or
+.Sq Li b
+to specify multiplication by 1048576 (1M), 1024 (1K) or 512, respectively.
+A pair of
+.Ar bytes
+limits can be separated by
+.Sq Li x
+to indicate a product.
+.Pp
+.Sy Warning :
+Only use this option when writing an archive to a device which supports
+an end of file read condition based on last (or largest) write offset
+(such as a regular file or a tape drive).
+The use of this option with a floppy or hard disk is not recommended.
+.It Fl b Ar blocksize
+When writing an archive,
+block the output at a positive decimal integer number of
+bytes per write to the archive file.
+The
+.Ar blocksize
+must be a multiple of 512 bytes with a maximum of 64512 bytes.
+Archive block sizes larger than 32256 bytes violate the POSIX
+standard and will not be portable to all systems.
+A
+.Ar blocksize
+can end with
+.Sq Li k
+or
+.Sq Li b
+to specify multiplication by 1024 (1K) or 512, respectively.
+A pair of blocksizes can be separated by
+.Sq Li x
+to indicate a product.
+A specific archive device may impose additional restrictions on the size
+of blocking it will support.
+When blocking is not specified, the default
+.Ar blocksize
+is dependent on the specific archive format being used (see the
+.Fl x
+option).
+.It Fl c
+Match all file or archive members
+.Em except
+those specified by the
+.Ar pattern
+and
+.Ar file
+operands.
+.It Fl D
+This option is the same as the
+.Fl u
+option, except that the file inode change time is checked instead of the
+file modification time.
+The file inode change time can be used to select files whose inode information
+(e.g., UID, GID, etc.) is newer than a copy of the file in the destination
+.Ar directory .
+.It Fl d
+Cause files of type directory being copied or archived, or archive members of
+type directory being extracted, to match only the directory file or archive
+member and not the file hierarchy rooted at the directory.
+.It Fl E Ar limit
+Limit the number of consecutive read faults while trying to read a flawed
+archive to
+.Ar limit .
+With a positive
+.Ar limit ,
+.Nm
+will attempt to recover from an archive read error and will
+continue processing starting with the next file stored in the archive.
+A
+.Ar limit
+of 0 will cause
+.Nm
+to stop operation after the first read error is detected on an archive volume.
+The default
+.Ar limit
+is a small positive number of retries.
+.It Fl f Ar archive
+Specify
+.Ar archive
+as the pathname of the input or output archive, overriding the default
+standard input (for list and read)
+or standard output
+(for write).
+A single archive may span multiple files and different archive devices.
+When required,
+.Nm
+will prompt for the pathname of the file or device of the next volume in the
+archive.
+.It Fl G Ar group
+Select a file based on its
+.Ar group
+name, or when starting with a
+.Cm # ,
+a numeric GID.
+A
+.Ql \e
+can be used to escape the
+.Cm # .
+Multiple
+.Fl G
+options may be supplied and checking stops with the first match.
+.It Fl H
+Follow only command-line symbolic links while performing a physical file
+system traversal.
+.It Fl i
+Interactively rename files or archive members.
+For each archive member matching a
+.Ar pattern
+operand or each file matching a
+.Ar file
+operand,
+.Nm
+will prompt to
+.Pa /dev/tty
+giving the name of the file, its file mode, and its modification time.
+.Nm
+will then read a line from
+.Pa /dev/tty .
+If this line is blank, the file or archive member is skipped.
+If this line consists of a single period, the
+file or archive member is processed with no modification to its name.
+Otherwise, its name is replaced with the contents of the line.
+.Nm
+will immediately exit with a non-zero exit status if
+.Dv EOF
+is encountered when reading a response or if
+.Pa /dev/tty
+cannot be opened for reading and writing.
+.It Fl j
+Use bzip2 to compress (decompress) the archive while writing (reading).
+The bzip2 utility must be installed separately.
+Incompatible with
+.Fl a .
+.It Fl k
+Do not overwrite existing files.
+.It Fl L
+Follow all symbolic links to perform a logical file system traversal.
+.It Fl l
+(The lowercase letter
+.Dq ell . )
+Link files.
+In copy mode
+.Pq Fl r Fl w ,
+hard links are made between the source and destination file hierarchies
+whenever possible.
+.It Fl n
+Select the first archive member that matches each
+.Ar pattern
+operand.
+No more than one archive member is matched for each
+.Ar pattern .
+When members of type directory are matched, the file hierarchy rooted at that
+directory is also matched (unless
+.Fl d
+is also specified).
+.It Fl O
+Force the archive to be one volume.
+If a volume ends prematurely,
+.Nm
+will not prompt for a new volume.
+This option can be useful for
+automated tasks where error recovery cannot be performed by a human.
+.It Fl o Ar options
+Information to modify the algorithm for extracting or writing archive files
+which is specific to the archive format specified by
+.Fl x .
+In general,
+.Ar options
+take the form:
+.Ar name Ns = Ns Ar value .
+.Pp
+The following options are available for the
+.Cm ustar
+and old
+.Bx
+.Cm tar
+formats:
+.Pp
+.Bl -tag -width Ds -compact
+.It Cm write_opt=nodir
+When writing archives, omit the storage of directories.
+.El
+.It Fl P
+Do not follow symbolic links, perform a physical file system traversal.
+This is the default mode.
+.It Fl p Ar string
+Specify one or more file characteristic options (privileges).
+The
+.Ar string
+option-argument is a string specifying file characteristics to be retained or
+discarded on extraction.
+The string consists of the specification characters
+.Cm a , e , m , o ,
+and
+.Cm p .
+Multiple characteristics can be concatenated within the same string
+and multiple
+.Fl p
+options can be specified.
+The meanings of the specification characters are as follows:
+.Bl -tag -width 2n
+.It Cm a
+Do not preserve file access times.
+By default, file access times are preserved whenever possible.
+.It Cm e
+.Dq Preserve everything ,
+the user ID, group ID, file mode bits,
+file access time, and file modification time.
+This is intended to be used by root,
+someone with all the appropriate privileges, in order to preserve all
+aspects of the files as they are recorded in the archive.
+The
+.Cm e
+flag is the sum of the
+.Cm o
+and
+.Cm p
+flags.
+.It Cm m
+Do not preserve file modification times.
+By default, file modification times are preserved whenever possible.
+.It Cm o
+Preserve the user ID and group ID.
+.It Cm p
+.Dq Preserve
+the file mode bits.
+This is intended to be used by a user with regular privileges
+who wants to preserve all aspects of the file other than the ownership.
+The file times are preserved by default, but two other flags are offered to
+disable this and use the time of extraction instead.
+.El
+.Pp
+In the preceding list,
+.Sq preserve
+indicates that an attribute stored in the archive is given to the
+extracted file, subject to the permissions of the invoking
+process.
+Otherwise the attribute of the extracted file is determined as
+part of the normal file creation action.
+If neither the
+.Cm e
+nor the
+.Cm o
+specification character is specified, or the user ID and group ID are not
+preserved for any reason,
+.Nm
+will not set the
+.Dv S_ISUID
+(setuid) and
+.Dv S_ISGID
+(setgid) bits of the file mode.
+If the preservation of any of these items fails for any reason,
+.Nm
+will write a diagnostic message to standard error.
+Failure to preserve these items will affect the final exit status,
+but will not cause the extracted file to be deleted.
+If the file characteristic letters in any of the string option-arguments are
+duplicated or conflict with each other, the one(s) given last will take
+precedence.
+For example, if
+.Fl p Ar eme
+is specified, file modification times are still preserved.
+.It Fl r
+Read an archive file from standard input
+and extract the specified
+.Ar file
+operands.
+If any intermediate directories are needed in order to extract an archive
+member, these directories will be created as if
+.Xr mkdir 2
+was called with the bitwise OR of
+.Dv S_IRWXU , S_IRWXG ,
+and
+.Dv S_IRWXO
+as the mode argument.
+When the selected archive format supports the specification of linked
+files and these files cannot be linked while the archive is being extracted,
+.Nm
+will write a diagnostic message to standard error
+and exit with a non-zero exit status at the completion of operation.
+.It Fl s Ar replstr
+Modify the archive member names according to the substitution expression
+.Ar replstr ,
+using the syntax of the
+.Xr ed 1
+utility regular expressions.
+.Ar file
+or
+.Ar pattern
+arguments may be given to restrict the list of archive members to those
+specified.
+.Pp
+The format of these regular expressions is:
+.Pp
+.Dl /old/new/[gp]
+.Pp
+As in
+.Xr ed 1 ,
+.Ar old
+is a basic regular expression (see
+.Xr re_format 7 )
+and
+.Ar new
+can contain an ampersand
+.Pq Ql & ,
+.Ql \e Ns Em n
+(where
+.Em n
+is a digit) back-references,
+or subexpression matching.
+The
+.Ar old
+string may also contain newline characters.
+Any non-null character can be used as a delimiter
+.Po
+.Ql /
+is shown here
+.Pc .
+Multiple
+.Fl s
+expressions can be specified.
+The expressions are applied in the order they are specified on the
+command line, terminating with the first successful substitution.
+.Pp
+The optional trailing
+.Cm g
+continues to apply the substitution expression to the pathname substring,
+which starts with the first character following the end of the last successful
+substitution.
+The first unsuccessful substitution stops the operation of the
+.Cm g
+option.
+The optional trailing
+.Cm p
+will cause the final result of a successful substitution to be written to
+standard error in the following format:
+.Pp
+.D1 Em original-pathname No >> Em new-pathname
+.Pp
+File or archive member names that substitute to the empty string
+are not selected and will be skipped.
+.It Fl T Ar range
+Allow files to be selected based on a file modification or inode change
+time falling within the specified time range.
+The range has the format:
+.Sm off
+.Bd -filled -offset indent
+.Op Ar from_date
+.Op \&, Ar to_date
+.Op / Oo Cm c Oc Op Cm m
+.Ed
+.Sm on
+.Pp
+The dates specified by
+.Ar from_date
+to
+.Ar to_date
+are inclusive.
+If only a
+.Ar from_date
+is supplied, all files with a modification or inode change time
+equal to or younger are selected.
+If only a
+.Ar to_date
+is supplied, all files with a modification or inode change time
+equal to or older will be selected.
+When the
+.Ar from_date
+is equal to the
+.Ar to_date ,
+only files with a modification or inode change time of exactly that
+time will be selected.
+.Pp
+When
+.Nm
+is in write or copy mode, the optional trailing field
+.Oo Cm c Oc Ns Op Cm m
+can be used to determine which file time (inode change, file modification or
+both) are used in the comparison.
+If neither is specified, the default is to use file modification time only.
+The
+.Cm m
+specifies the comparison of file modification time (the time when
+the file was last written).
+The
+.Cm c
+specifies the comparison of inode change time (the time when the file
+inode was last changed; e.g., a change of owner, group, mode, etc).
+When
+.Cm c
+and
+.Cm m
+are both specified, then the modification and inode change times are
+both compared.
+.Pp
+The inode change time comparison is useful in selecting files whose
+attributes were recently changed or selecting files which were recently
+created and had their modification time reset to an older time (as what
+happens when a file is extracted from an archive and the modification time
+is preserved).
+Time comparisons using both file times is useful when
+.Nm
+is used to create a time based incremental archive (only files that were
+changed during a specified time range will be archived).
+.Pp
+A time range is made up of six different fields and each field must contain two
+digits.
+The format is:
+.Pp
+.Dl [[[[[cc]yy]mm]dd]HH]MM[.SS]
+.Pp
+Where
+.Ar cc
+is the first two digits of the year (the century),
+.Ar yy
+is the last two digits of the year,
+the first
+.Ar mm
+is the month (from 01 to 12),
+.Ar dd
+is the day of the month (from 01 to 31),
+.Ar HH
+is the hour of the day (from 00 to 23),
+.Ar MM
+is the minute (from 00 to 59),
+and
+.Ar SS
+is the seconds (from 00 to 59).
+The minute field
+.Ar MM
+is required, while the other fields are optional and must be added in the
+following order:
+.Ar HH , dd , mm ,
+.Ar yy , cc .
+.Pp
+The
+.Ar SS
+field may be added independently of the other fields.
+Time ranges are relative to the current time, so
+.Ic -T 1234/cm
+would select all files with a modification or inode change time
+of 12:34 PM today or later.
+Multiple
+.Fl T
+time range can be supplied and checking stops with the first match.
+.It Fl t
+Reset the access times of any file or directory read or accessed by
+.Nm
+to be the same as they were before being read or accessed by
+.Nm pax .
+.It Fl U Ar user
+Select a file based on its
+.Ar user
+name, or when starting with a
+.Cm # ,
+a numeric UID.
+A
+.Ql \e
+can be used to escape the
+.Cm # .
+Multiple
+.Fl U
+options may be supplied and checking stops with the first match.
+.It Fl u
+Ignore files that are older (having a less recent file modification time)
+than a pre-existing file or archive member with the same name.
+During read,
+an archive member with the same name as a file in the file system will be
+extracted if the archive member is newer than the file.
+During write,
+a file system member with the same name as an archive member will be
+written to the archive if it is newer than the archive member.
+During copy,
+the file in the destination hierarchy is replaced by the file in the source
+hierarchy or by a link to the file in the source hierarchy if the file in
+the source hierarchy is newer.
+.It Fl v
+During a list operation, produce a verbose table of contents using the format of the
+.Xr ls 1
+utility with the
+.Fl l
+option.
+For pathnames representing a hard link to a previous member of the archive,
+the output has the format:
+.Pp
+.Dl Em ls -l listing No == Em link-name
+.Pp
+For pathnames representing a symbolic link, the output has the format:
+.Pp
+.Dl Em ls -l listing No -> Em link-name
+.Pp
+Where
+.Em ls -l listing
+is the output format specified by the
+.Xr ls 1
+utility when used with the
+.Fl l
+option.
+Otherwise for all the other operational modes
+(read, write, and copy),
+pathnames are written and flushed to standard error
+without a trailing newline
+as soon as processing begins on that file or
+archive member.
+The trailing newline
+is not buffered and is written only after the file has been read or written.
+.It Fl w
+Write files to the standard output
+in the specified archive format.
+When no
+.Ar file
+operands are specified, standard input
+is read for a list of pathnames with one per line without any leading or
+trailing
+.Aq blanks .
+.It Fl X
+When traversing the file hierarchy specified by a pathname,
+do not descend into directories that have a different device ID.
+See the
+.Li st_dev
+field as described in
+.Xr stat 2
+for more information about device IDs.
+.It Fl x Ar format
+Specify the output archive format, with the default format being
+.Cm ustar .
+.Nm
+currently supports the following formats:
+.Bl -tag -width "sv4cpio"
+.It Cm bcpio
+The old binary cpio format.
+The default blocksize for this format is 5120 bytes.
+This format is not very portable and should not be used when other formats
+are available.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm cpio
+The extended cpio interchange format specified in the
+.St -p1003.2
+standard.
+The default blocksize for this format is 5120 bytes.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm sv4cpio
+The System V release 4 cpio.
+The default blocksize for this format is 5120 bytes.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm sv4crc
+The System V release 4 cpio with file CRC checksums.
+The default blocksize for this format is 5120 bytes.
+Inode and device information about a file (used for detecting file hard links
+by this format), which may be truncated by this format, is detected by
+.Nm
+and is repaired.
+.It Cm tar
+The old
+.Bx
+tar format as found in
+.Bx 4.3 .
+The default blocksize for this format is 10240 bytes.
+Pathnames stored by this format must be 100 characters or less in length.
+Only regular files, hard links, soft links, and directories
+will be archived (other file system types are not supported).
+For backwards compatibility with even older tar formats, a
+.Fl o
+option can be used when writing an archive to omit the storage of directories.
+This option takes the form:
+.Pp
+.Dl Fl o Cm write_opt=nodir
+.It Cm ustar
+The extended tar interchange format specified in the
+.St -p1003.2
+standard.
+The default blocksize for this format is 10240 bytes.
+Filenames stored by this format must be 100 characters or less in length;
+the total pathname must be 256 characters or less.
+.El
+.Pp
+.Nm
+will detect and report any file that it is unable to store or extract
+as the result of any specific archive format restrictions.
+The individual archive formats may impose additional restrictions on use.
+Typical archive format restrictions include (but are not limited to):
+file pathname length, file size, link pathname length, and the type of the
+file.
+.It Fl Y
+This option is the same as the
+.Fl D
+option, except that the inode change time is checked using the
+pathname created after all the file name modifications have completed.
+.It Fl Z
+This option is the same as the
+.Fl u
+option, except that the modification time is checked using the
+pathname created after all the file name modifications have completed.
+.It Fl z
+Use
+.Xr gzip 1
+to compress (decompress) the archive while writing (reading).
+Incompatible with
+.Fl a .
+.El
+.Pp
+The options that operate on the names of files or archive members
+.Po Fl c ,
+.Fl i ,
+.Fl j ,
+.Fl n ,
+.Fl s ,
+.Fl u ,
+.Fl v ,
+.Fl D ,
+.Fl G ,
+.Fl T ,
+.Fl U ,
+.Fl Y ,
+and
+.Fl Z
+.Pc
+interact as follows.
+.Pp
+When extracting files during a read operation, archive members are
+.Sq selected ,
+based only on the user specified pattern operands as modified by the
+.Fl c ,
+.Fl n ,
+.Fl u ,
+.Fl D ,
+.Fl G ,
+.Fl T ,
+.Fl U
+options.
+Then any
+.Fl s
+and
+.Fl i
+options will modify in that order, the names of these selected files.
+Then the
+.Fl Y
+and
+.Fl Z
+options will be applied based on the final pathname.
+Finally, the
+.Fl v
+option will write the names resulting from these modifications.
+.Pp
+When archiving files during a write operation,
+or copying files during a copy operation,
+archive members are
+.Sq selected ,
+based only on the user specified pathnames as modified by the
+.Fl n ,
+.Fl u ,
+.Fl D ,
+.Fl G ,
+.Fl T ,
+and
+.Fl U
+options (the
+.Fl D
+option only applies during a copy operation).
+Then any
+.Fl s
+and
+.Fl i
+options will modify in that order, the names of these selected files.
+Then during a copy operation the
+.Fl Y
+and the
+.Fl Z
+options will be applied based on the final pathname.
+Finally, the
+.Fl v
+option will write the names resulting from these modifications.
+.Pp
+When one or both of the
+.Fl u
+or
+.Fl D
+options are specified along with the
+.Fl n
+option, a file is not considered selected unless it is newer
+than the file to which it is compared.
+.Sh ENVIRONMENT
+.Bl -tag -width Ds
+.It Ev TMPDIR
+Path in which to store temporary files.
+.El
+.Sh EXIT STATUS
+.Ex -std pax
+.Sh EXAMPLES
+Copy the contents of the current directory to the device
+.Pa /dev/rst0 :
+.Pp
+.Dl $ pax -w -f /dev/rst0 \&.
+.Pp
+Give the verbose table of contents for an archive stored in
+.Pa filename :
+.Pp
+.Dl $ pax -v -f filename
+.Pp
+This sequence of commands will copy the entire
+.Pa olddir
+directory hierarchy to
+.Pa newdir :
+.Bd -literal -offset indent
+$ mkdir newdir
+$ cd olddir
+$ pax -rw . ../newdir
+.Ed
+.Pp
+Extract files from the archive
+.Pa a.pax .
+Files rooted in
+.Pa /usr
+are extracted relative to the current working directory;
+all other files are extracted to their unmodified path.
+.Pp
+.Dl $ pax -r -s ',^/usr/,,' -f a.pax
+.Pp
+This can be used to interactively select the files to copy from the
+current directory to
+.Pa dest_dir :
+.Pp
+.Dl $ pax -rw -i \&. dest_dir
+.Pp
+Extract all files from the archive
+.Pa a.pax
+which are owned by
+.Em root
+with group
+.Em bin
+and preserve all file permissions:
+.Pp
+.Dl $ pax -r -pe -U root -G bin -f a.pax
+.Pp
+Update (and list) only those files in the destination directory
+.Pa /backup
+which are older (less recent inode change or file modification times) than
+files with the same name found in the source file tree
+.Pa home :
+.Pp
+.Dl $ pax -r -w -v -Y -Z home /backup
+.Sh DIAGNOSTICS
+Whenever
+.Nm
+cannot create a file or a link when reading an archive or cannot
+find a file when writing an archive, or cannot preserve the user ID,
+group ID, or file mode when the
+.Fl p
+option is specified, a diagnostic message is written to standard error
+and a non-zero exit status will be returned, but processing will continue.
+In the case where
+.Nm
+cannot create a link to a file,
+.Nm
+will not create a second copy of the file.
+.Pp
+If the extraction of a file from an archive is prematurely terminated by
+a signal or error,
+.Nm
+may have only partially extracted a file the user wanted.
+Additionally, the file modes of extracted files and directories
+may have incorrect file bits, and the modification and access times may be
+wrong.
+.Pp
+If the creation of an archive is prematurely terminated by a signal or error,
+.Nm
+may have only partially created the archive, which may violate the specific
+archive format specification.
+.Pp
+If while doing a copy,
+.Nm
+detects a file is about to overwrite itself, the file is not copied,
+a diagnostic message is written to standard error
+and when
+.Nm
+completes it will exit with a non-zero exit status.
+.Sh SEE ALSO
+.Xr cpio 1 ,
+.Xr tar 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification,
+except that the
+.Cm pax
+archive format and the
+.Cm listopt
+keyword are unsupported.
+.Pp
+The flags
+.Op Fl 0BDEGjOPTUYZz ,
+the archive formats
+.Cm bcpio ,
+.Cm sv4cpio ,
+.Cm sv4crc ,
+and
+.Cm tar ,
+the
+.Cm b , k ,
+and
+.Cm x
+additions to the
+.Fl b
+flag,
+and the flawed archive handling during list and read operations
+are extensions to that specification.
+.Sh HISTORY
+A
+.Nm
+utility appeared in
+.Bx 4.4 .
+.Sh AUTHORS
+.An Keith Muller
+at the University of California, San Diego.
diff --git a/bin/pax/pax.c b/bin/pax/pax.c
new file mode 100644
index 0000000..4d0fc68
--- /dev/null
+++ b/bin/pax/pax.c
@@ -0,0 +1,446 @@
+/* $OpenBSD: pax.c,v 1.53 2019/06/28 13:34:59 deraadt Exp $ */
+/* $NetBSD: pax.c,v 1.5 1996/03/26 23:54:20 mrg Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <err.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <paths.h>
+#include <pwd.h>
+#include <stdio.h>
+
+#include "pax.h"
+#include "extern.h"
+static int gen_init(void);
+
+/*
+ * PAX main routines, general globals and some simple start up routines
+ */
+
+/*
+ * Variables that can be accessed by any routine within pax
+ */
+int act = DEFOP; /* read/write/append/copy */
+FSUB *frmt = NULL; /* archive format type */
+int cflag; /* match all EXCEPT pattern/file */
+int cwdfd; /* starting cwd */
+int dflag; /* directory member match only */
+int iflag; /* interactive file/archive rename */
+int kflag; /* do not overwrite existing files */
+int lflag; /* use hard links when possible */
+int nflag; /* select first archive member match */
+int tflag; /* restore access time after read */
+int uflag; /* ignore older modification time files */
+int vflag; /* produce verbose output */
+int Dflag; /* same as uflag except inode change time */
+int Hflag; /* follow command line symlinks (write only) */
+int Lflag; /* follow symlinks when writing */
+int Nflag; /* only use numeric uid and gid */
+int Xflag; /* archive files with same device id only */
+int Yflag; /* same as Dflag except after name mode */
+int Zflag; /* same as uflag except after name mode */
+int zeroflag; /* use \0 as pathname terminator */
+int vfpart; /* is partial verbose output in progress */
+int patime = 1; /* preserve file access time */
+int pmtime = 1; /* preserve file modification times */
+int nodirs; /* do not create directories as needed */
+int pmode; /* preserve file mode bits */
+int pids; /* preserve file uid/gid */
+int rmleadslash = 0; /* remove leading '/' from pathnames */
+int exit_val; /* exit value */
+int docrc; /* check/create file crc */
+char *dirptr; /* destination dir in a copy */
+char *argv0; /* root of argv[0] */
+enum op_mode op_mode; /* what program are we acting as? */
+sigset_t s_mask; /* signal mask for cleanup critical sect */
+FILE *listf; /* file pointer to print file list to */
+int listfd = STDERR_FILENO; /* fd matching listf, for sighandler output */
+char *tempfile; /* tempfile to use for mkstemp(3) */
+char *tempbase; /* basename of tempfile to use for mkstemp(3) */
+
+/*
+ * PAX - Portable Archive Interchange
+ *
+ * A utility to read, write, and write lists of the members of archive
+ * files and copy directory hierarchies. A variety of archive formats
+ * are supported (some are described in POSIX 1003.1 10.1):
+ *
+ * ustar - 10.1.1 extended tar interchange format
+ * cpio - 10.1.2 extended cpio interchange format
+ * tar - old BSD 4.3 tar format
+ * binary cpio - old cpio with binary header format
+ * sysVR4 cpio - with and without CRC
+ *
+ * This version is a superset of IEEE Std 1003.2b-d3
+ *
+ * Summary of Extensions to the IEEE Standard:
+ *
+ * 1 READ ENHANCEMENTS
+ * 1.1 Operations which read archives will continue to operate even when
+ * processing archives which may be damaged, truncated, or fail to meet
+ * format specs in several different ways. Damaged sections of archives
+ * are detected and avoided if possible. Attempts will be made to resync
+ * archive read operations even with badly damaged media.
+ * 1.2 Blocksize requirements are not strictly enforced on archive read.
+ * Tapes which have variable sized records can be read without errors.
+ * 1.3 The user can specify via the non-standard option flag -E if error
+ * resync operation should stop on a media error, try a specified number
+ * of times to correct, or try to correct forever.
+ * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks
+ * of all zeros will be restored with holes appropriate for the target
+ * filesystem
+ * 1.5 The user is notified whenever something is found during archive
+ * read operations which violates spec (but the read will continue).
+ * 1.6 Multiple archive volumes can be read and may span over different
+ * archive devices
+ * 1.7 Rigidly restores all file attributes exactly as they are stored on the
+ * archive.
+ * 1.8 Modification change time ranges can be specified via multiple -T
+ * options. These allow a user to select files whose modification time
+ * lies within a specific time range.
+ * 1.9 Files can be selected based on owner (user name or uid) via one or more
+ * -U options.
+ * 1.10 Files can be selected based on group (group name or gid) via one o
+ * more -G options.
+ * 1.11 File modification time can be checked against existing file after
+ * name modification (-Z)
+ *
+ * 2 WRITE ENHANCEMENTS
+ * 2.1 Write operation will stop instead of allowing a user to create a flawed
+ * flawed archive (due to any problem).
+ * 2.2 Archives written by pax are forced to strictly conform to both the
+ * archive and pax the specific format specifications.
+ * 2.3 Blocking size and format is rigidly enforced on writes.
+ * 2.4 Formats which may exhibit header overflow problems (they have fields
+ * too small for large file systems, such as inode number storage), use
+ * routines designed to repair this problem. These techniques still
+ * conform to both pax and format specifications, but no longer truncate
+ * these fields. This removes any restrictions on using these archive
+ * formats on large file systems.
+ * 2.5 Multiple archive volumes can be written and may span over different
+ * archive devices
+ * 2.6 A archive volume record limit allows the user to specify the number
+ * of bytes stored on an archive volume. When reached the user is
+ * prompted for the next archive volume. This is specified with the
+ * non-standard -B flag. The limit is rounded up to the next blocksize.
+ * 2.7 All archive padding during write use zero filled sections. This makes
+ * it much easier to pull data out of flawed archive during read
+ * operations.
+ * 2.8 Access time reset with the -t applies to all file nodes (including
+ * directories).
+ * 2.9 Symbolic links can be followed with -L (optional in the spec).
+ * 2.10 Modification or inode change time ranges can be specified via
+ * multiple -T options. These allow a user to select files whose
+ * modification or inode change time lies within a specific time range.
+ * 2.11 Files can be selected based on owner (user name or uid) via one or more
+ * -U options.
+ * 2.12 Files can be selected based on group (group name or gid) via one o
+ * more -G options.
+ * 2.13 Symlinks which appear on the command line can be followed (without
+ * following other symlinks; -H flag)
+ *
+ * 3 COPY ENHANCEMENTS
+ * 3.1 Sparse files (lseek holes) can be copied without expanding the holes
+ * into zero filled blocks. The file copy is created with holes which are
+ * appropriate for the target filesystem
+ * 3.2 Access time as well as modification time on copied file trees can be
+ * preserved with the appropriate -p options.
+ * 3.3 Access time reset with the -t applies to all file nodes (including
+ * directories).
+ * 3.4 Symbolic links can be followed with -L (optional in the spec).
+ * 3.5 Modification or inode change time ranges can be specified via
+ * multiple -T options. These allow a user to select files whose
+ * modification or inode change time lies within a specific time range.
+ * 3.6 Files can be selected based on owner (user name or uid) via one or more
+ * -U options.
+ * 3.7 Files can be selected based on group (group name or gid) via one o
+ * more -G options.
+ * 3.8 Symlinks which appear on the command line can be followed (without
+ * following other symlinks; -H flag)
+ * 3.9 File inode change time can be checked against existing file before
+ * name modification (-D)
+ * 3.10 File inode change time can be checked against existing file after
+ * name modification (-Y)
+ * 3.11 File modification time can be checked against existing file after
+ * name modification (-Z)
+ *
+ * 4 GENERAL ENHANCEMENTS
+ * 4.1 Internal structure is designed to isolate format dependent and
+ * independent functions. Formats are selected via a format driver table.
+ * This encourages the addition of new archive formats by only having to
+ * write those routines which id, read and write the archive header.
+ */
+
+/*
+ * main()
+ * parse options, set up and operate as specified by the user.
+ * any operational flaw will set exit_val to non-zero
+ * Return: 0 if ok, 1 otherwise
+ */
+
+int
+main(int argc, char **argv)
+{
+ char *tmpdir;
+ size_t tdlen;
+
+ listf = stderr;
+
+ /*
+ * Keep a reference to cwd, so we can always come back home.
+ */
+ cwdfd = open(".", O_RDONLY | O_CLOEXEC);
+ if (cwdfd == -1) {
+ syswarn(1, errno, "Can't open current working directory.");
+ return(exit_val);
+ }
+
+ /*
+ * Where should we put temporary files?
+ */
+ if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
+ tmpdir = _PATH_TMP;
+ tdlen = strlen(tmpdir);
+ while (tdlen > 0 && tmpdir[tdlen - 1] == '/')
+ tdlen--;
+ tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE));
+ if (tempfile == NULL) {
+ paxwarn(1, "Cannot allocate memory for temp file name.");
+ return(exit_val);
+ }
+ if (tdlen)
+ memcpy(tempfile, tmpdir, tdlen);
+ tempbase = tempfile + tdlen;
+ *tempbase++ = '/';
+
+ /*
+ * keep passwd and group files open for faster lookups.
+ */
+ setpassent(1);
+ setgroupent(1);
+
+ /*
+ * parse options, determine operational mode, general init
+ */
+ options(argc, argv);
+ if ((gen_init() < 0) || (tty_init() < 0))
+ return(exit_val);
+
+ /*
+ * pmode needs to restore setugid bits when extracting or copying,
+ * so can't pledge at all then.
+ */
+ if (pmode == 0 || (act != EXTRACT && act != COPY)) {
+ if (pledge("stdio rpath wpath cpath fattr dpath getpw proc exec tape",
+ NULL) == -1)
+ err(1, "pledge");
+
+ /* Copy mode, or no gzip -- don't need to fork/exec. */
+ if (gzip_program == NULL || act == COPY) {
+ if (pledge("stdio rpath wpath cpath fattr dpath getpw tape",
+ NULL) == -1)
+ err(1, "pledge");
+ }
+ }
+
+ /*
+ * select a primary operation mode
+ */
+ switch (act) {
+ case EXTRACT:
+ extract();
+ break;
+ case ARCHIVE:
+ archive();
+ break;
+ case APPND:
+ if (gzip_program != NULL)
+ errx(1, "can not gzip while appending");
+ append();
+ break;
+ case COPY:
+ copy();
+ break;
+ default:
+ case LIST:
+ list();
+ break;
+ }
+ return(exit_val);
+}
+
+/*
+ * sig_cleanup()
+ * when interrupted we try to do whatever delayed processing we can.
+ * This is not critical, but we really ought to limit our damage when we
+ * are aborted by the user.
+ * Return:
+ * never....
+ */
+
+void
+sig_cleanup(int which_sig)
+{
+ /*
+ * restore modes and times for any dirs we may have created
+ * or any dirs we may have read.
+ */
+
+ /* paxwarn() uses stdio; fake it as well as we can */
+ if (which_sig == SIGXCPU)
+ dprintf(STDERR_FILENO, "\nCPU time limit reached, cleaning up.\n");
+ else
+ dprintf(STDERR_FILENO, "\nSignal caught, cleaning up.\n");
+
+ ar_close(1);
+ sltab_process(1);
+ proc_dir(1);
+ if (tflag)
+ atdir_end();
+ _exit(1);
+}
+
+/*
+ * setup_sig()
+ * set a signal to be caught, but only if it isn't being ignored already
+ */
+
+static int
+setup_sig(int sig, const struct sigaction *n_hand)
+{
+ struct sigaction o_hand;
+
+ if (sigaction(sig, NULL, &o_hand) == -1)
+ return (-1);
+
+ if (o_hand.sa_handler == SIG_IGN)
+ return (0);
+
+ return (sigaction(sig, n_hand, NULL));
+}
+
+/*
+ * gen_init()
+ * general setup routines. Not all are required, but they really help
+ * when dealing with a medium to large sized archives.
+ */
+
+static int
+gen_init(void)
+{
+ struct rlimit reslimit;
+ struct sigaction n_hand;
+
+ /*
+ * Really needed to handle large archives. We can run out of memory for
+ * internal tables really fast when we have a whole lot of files...
+ */
+ if (getrlimit(RLIMIT_DATA , &reslimit) == 0){
+ reslimit.rlim_cur = reslimit.rlim_max;
+ (void)setrlimit(RLIMIT_DATA , &reslimit);
+ }
+
+ /*
+ * should file size limits be waived? if the os limits us, this is
+ * needed if we want to write a large archive
+ */
+ if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){
+ reslimit.rlim_cur = reslimit.rlim_max;
+ (void)setrlimit(RLIMIT_FSIZE , &reslimit);
+ }
+
+ /*
+ * increase the size the stack can grow to
+ */
+ if (getrlimit(RLIMIT_STACK , &reslimit) == 0){
+ reslimit.rlim_cur = reslimit.rlim_max;
+ (void)setrlimit(RLIMIT_STACK , &reslimit);
+ }
+
+ /*
+ * not really needed, but doesn't hurt
+ */
+ if (getrlimit(RLIMIT_RSS , &reslimit) == 0){
+ reslimit.rlim_cur = reslimit.rlim_max;
+ (void)setrlimit(RLIMIT_RSS , &reslimit);
+ }
+
+ /*
+ * signal handling to reset stored directory times and modes. Since
+ * we deal with broken pipes via failed writes we ignore it. We also
+ * deal with any file size limit through failed writes. Cpu time
+ * limits are caught and a cleanup is forced.
+ */
+ if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) ||
+ (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) ||
+ (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) ||
+ (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) {
+ paxwarn(1, "Unable to set up signal mask");
+ return(-1);
+ }
+
+ /* snag the fd to be used from the signal handler */
+ listfd = fileno(listf);
+
+ memset(&n_hand, 0, sizeof n_hand);
+ n_hand.sa_mask = s_mask;
+ n_hand.sa_flags = 0;
+ n_hand.sa_handler = sig_cleanup;
+
+ if (setup_sig(SIGHUP, &n_hand) ||
+ setup_sig(SIGTERM, &n_hand) ||
+ setup_sig(SIGINT, &n_hand) ||
+ setup_sig(SIGQUIT, &n_hand) ||
+ setup_sig(SIGXCPU, &n_hand))
+ goto out;
+
+ n_hand.sa_handler = SIG_IGN;
+ if ((sigaction(SIGPIPE, &n_hand, NULL) == -1) ||
+ (sigaction(SIGXFSZ, &n_hand, NULL) == -1))
+ goto out;
+ return(0);
+
+ out:
+ syswarn(1, errno, "Unable to set up signal handler");
+ return(-1);
+}
diff --git a/bin/pax/pax.h b/bin/pax/pax.h
new file mode 100644
index 0000000..65d445a
--- /dev/null
+++ b/bin/pax/pax.h
@@ -0,0 +1,262 @@
+/* $OpenBSD: pax.h,v 1.29 2017/09/12 17:11:11 otto Exp $ */
+/* $NetBSD: pax.h,v 1.3 1995/03/21 09:07:41 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pax.h 8.2 (Berkeley) 4/18/94
+ */
+
+/*
+ * BSD PAX global data structures and constants.
+ */
+
+#define MAXBLK 64512 /* MAX blocksize supported (posix SPEC) */
+ /* WARNING: increasing MAXBLK past 32256 */
+ /* will violate posix spec. */
+#define MAXBLK_POSIX 32256 /* MAX blocksize supported as per POSIX */
+#define BLKMULT 512 /* blocksize must be even mult of 512 bytes */
+ /* Don't even think of changing this */
+#define DEVBLK 8192 /* default read blksize for devices */
+#define FILEBLK 10240 /* default read blksize for files */
+#define PAXPATHLEN 3072 /* maximum path length for pax. MUST be */
+ /* longer than the system PATH_MAX */
+
+/*
+ * Pax modes of operation
+ */
+#define LIST 0 /* List the file in an archive */
+#define EXTRACT 1 /* extract the files in an archive */
+#define ARCHIVE 2 /* write a new archive */
+#define APPND 3 /* append to the end of an archive */
+#define COPY 4 /* copy files to destination dir */
+#define DEFOP LIST /* if no flags default is to LIST */
+
+/*
+ * Device type of the current archive volume
+ */
+#define ISREG 0 /* regular file */
+#define ISCHR 1 /* character device */
+#define ISBLK 2 /* block device */
+#define ISTAPE 3 /* tape drive */
+#define ISPIPE 4 /* pipe/socket */
+
+/*
+ * Pattern matching structure
+ *
+ * Used to store command line patterns
+ */
+typedef struct pattern {
+ char *pstr; /* pattern to match, user supplied */
+ char *pend; /* end of a prefix match */
+ char *chdname; /* the dir to change to if not NULL. */
+ size_t plen; /* length of pstr */
+ int flgs; /* processing/state flags */
+#define MTCH 0x1 /* pattern has been matched */
+#define DIR_MTCH 0x2 /* pattern matched a directory */
+ struct pattern *fow; /* next pattern */
+} PATTERN;
+
+/*
+ * General Archive Structure (used internal to pax)
+ *
+ * This structure is used to pass information about archive members between
+ * the format independent routines and the format specific routines. When
+ * new archive formats are added, they must accept requests and supply info
+ * encoded in a structure of this type. The name fields are declared statically
+ * here, as there is only ONE of these floating around, size is not a major
+ * consideration. Eventually converting the name fields to a dynamic length
+ * may be required if and when the supporting operating system removes all
+ * restrictions on the length of pathnames it will resolve.
+ */
+typedef struct {
+ int nlen; /* file name length */
+ char name[PAXPATHLEN+1]; /* file name */
+ int ln_nlen; /* link name length */
+ char ln_name[PAXPATHLEN+1]; /* name to link to (if any) */
+ char *org_name; /* orig name in file system */
+ PATTERN *pat; /* ptr to pattern match (if any) */
+ struct stat sb; /* stat buffer see stat(2) */
+ off_t pad; /* bytes of padding after file xfer */
+ off_t skip; /* bytes of real data after header */
+ /* IMPORTANT. The st_size field does */
+ /* not always indicate the amount of */
+ /* data following the header. */
+ u_int32_t crc; /* file crc */
+ int type; /* type of file node */
+#define PAX_DIR 1 /* directory */
+#define PAX_CHR 2 /* character device */
+#define PAX_BLK 3 /* block device */
+#define PAX_REG 4 /* regular file */
+#define PAX_SLK 5 /* symbolic link */
+#define PAX_SCK 6 /* socket */
+#define PAX_FIF 7 /* fifo */
+#define PAX_HLK 8 /* hard link */
+#define PAX_HRG 9 /* hard link to a regular file */
+#define PAX_CTG 10 /* high performance file */
+#define PAX_GLL 11 /* GNU long symlink */
+#define PAX_GLF 12 /* GNU long file */
+} ARCHD;
+
+#define PAX_IS_REG(type) ((type) == PAX_REG || (type) == PAX_CTG)
+#define PAX_IS_HARDLINK(type) ((type) == PAX_HLK || (type) == PAX_HRG)
+#define PAX_IS_LINK(type) ((type) == PAX_SLK || PAX_IS_HARDLINK(type))
+
+/*
+ * Format Specific Routine Table
+ *
+ * The format specific routine table allows new archive formats to be quickly
+ * added. Overall pax operation is independent of the actual format used to
+ * form the archive. Only those routines which deal directly with the archive
+ * are tailored to the oddities of the specific format. All other routines are
+ * independent of the archive format. Data flow in and out of the format
+ * dependent routines pass pointers to ARCHD structure (described below).
+ */
+typedef struct {
+ char *name; /* name of format, this is the name the user */
+ /* gives to -x option to select it. */
+ int bsz; /* default block size. used when the user */
+ /* does not specify a blocksize for writing */
+ /* Appends continue to with the blocksize */
+ /* the archive is currently using. */
+ int hsz; /* Header size in bytes. this is the size of */
+ /* the smallest header this format supports. */
+ /* Headers are assumed to fit in a BLKMULT. */
+ /* If they are bigger, get_head() and */
+ /* get_arc() must be adjusted */
+ int udev; /* does append require unique dev/ino? some */
+ /* formats use the device and inode fields */
+ /* to specify hard links. when members in */
+ /* the archive have the same inode/dev they */
+ /* are assumed to be hard links. During */
+ /* append we may have to generate unique ids */
+ /* to avoid creating incorrect hard links */
+ int hlk; /* does archive store hard links info? if */
+ /* not, we do not bother to look for them */
+ /* during archive write operations */
+ int blkalgn; /* writes must be aligned to blkalgn boundary */
+ int inhead; /* is the trailer encoded in a valid header? */
+ /* if not, trailers are assumed to be found */
+ /* in invalid headers (i.e like tar) */
+ int (*id)(char *, /* checks if a buffer is a valid header */
+ int); /* returns 1 if it is, o.w. returns a 0 */
+ int (*st_rd)(void); /* initialize routine for read. so format */
+ /* can set up tables etc before it starts */
+ /* reading an archive */
+ int (*rd)(ARCHD *, /* read header routine. passed a pointer to */
+ char *); /* ARCHD. It must extract the info from the */
+ /* format and store it in the ARCHD struct. */
+ /* This routine is expected to fill all the */
+ /* fields in the ARCHD (including stat buf) */
+ /* 0 is returned when a valid header is */
+ /* found. -1 when not valid. This routine */
+ /* set the skip and pad fields so the format */
+ /* independent routines know the amount of */
+ /* padding and the number of bytes of data */
+ /* which follow the header. This info is */
+ /* used skip to the next file header */
+ off_t (*end_rd)(void); /* read cleanup. Allows format to clean up */
+ /* and MUST RETURN THE LENGTH OF THE TRAILER */
+ /* RECORD (so append knows how many bytes */
+ /* to move back to rewrite the trailer) */
+ int (*st_wr)(void); /* initialize routine for write operations */
+ int (*wr)(ARCHD *); /* write archive header. Passed an ARCHD */
+ /* filled with the specs on the next file to */
+ /* archived. Returns a 1 if no file data is */
+ /* is to be stored; 0 if file data is to be */
+ /* added. A -1 is returned if a write */
+ /* operation to the archive failed. this */
+ /* function sets the skip and pad fields so */
+ /* the proper padding can be added after */
+ /* file data. This routine must NEVER write */
+ /* a flawed archive header. */
+ int (*end_wr)(void); /* end write. write the trailer and do any */
+ /* other format specific functions needed */
+ /* at the end of an archive write */
+ int (*trail)(ARCHD *, /* returns 0 if a valid trailer, -1 if not */
+ char *, int, /* For formats which encode the trailer */
+ int *); /* outside of a valid header, a return value */
+ /* of 1 indicates that the block passed to */
+ /* it can never contain a valid header (skip */
+ /* this block, no point in looking at it) */
+ /* CAUTION: parameters to this function are */
+ /* different for trailers inside or outside */
+ /* of headers. See get_head() for details */
+ int (*options)(void); /* process format specific options (-o) */
+} FSUB;
+
+/*
+ * Time data for a given file. This is usually embedded in a structure
+ * indexed by dev+ino, by name, by order in the archive, etc. set_attr()
+ * takes one of these and will only change the times or mode if the file
+ * at the given name has the indicated dev+ino.
+ */
+struct file_times {
+ ino_t ft_ino; /* inode number to verify */
+ struct timespec ft_mtim; /* times to set */
+ struct timespec ft_atim;
+ char *ft_name; /* name of file to set the times on */
+ dev_t ft_dev; /* device number to verify */
+};
+
+/*
+ * Format Specific Options List
+ *
+ * Used to pass format options to the format options handler
+ */
+typedef struct oplist {
+ char *name; /* option variable name e.g. name= */
+ char *value; /* value for option variable */
+ struct oplist *fow; /* next option */
+} OPLIST;
+
+/*
+ * General Macros
+ */
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+#define MAJOR(x) major(x)
+#define MINOR(x) minor(x)
+#define TODEV(x, y) makedev((x), (y))
+
+#define FILEBITS (S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO)
+#define SETBITS (S_ISUID | S_ISGID)
+#define ABITS (FILEBITS | SETBITS)
+
+/*
+ * General Defines
+ */
+#define HEX 16
+#define OCT 8
+#define _PAX_ 1
+#define _TFILE_BASE "paxXXXXXXXXXX"
+#define MAX_TIME_T (sizeof(time_t) == sizeof(long long) ? \
+ LLONG_MAX : INT_MAX)
diff --git a/bin/pax/sel_subs.c b/bin/pax/sel_subs.c
new file mode 100644
index 0000000..578c445
--- /dev/null
+++ b/bin/pax/sel_subs.c
@@ -0,0 +1,632 @@
+/* $OpenBSD: sel_subs.c,v 1.28 2019/06/24 03:33:09 deraadt Exp $ */
+/* $NetBSD: sel_subs.c,v 1.5 1995/03/21 09:07:42 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * data structure for storing uid/grp selects (-U, -G non standard options)
+ */
+
+#define USR_TB_SZ 317 /* user selection table size */
+#define GRP_TB_SZ 317 /* user selection table size */
+
+typedef struct usrt {
+ uid_t uid;
+ struct usrt *fow; /* next uid */
+} USRT;
+
+typedef struct grpt {
+ gid_t gid;
+ struct grpt *fow; /* next gid */
+} GRPT;
+
+/*
+ * data structure for storing user supplied time ranges (-T option)
+ */
+
+#define ATOI2(ar) ((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2;
+
+typedef struct time_rng {
+ time_t low_time; /* lower inclusive time limit */
+ time_t high_time; /* higher inclusive time limit */
+ int flgs; /* option flags */
+#define HASLOW 0x01 /* has lower time limit */
+#define HASHIGH 0x02 /* has higher time limit */
+#define CMPMTME 0x04 /* compare file modification time */
+#define CMPCTME 0x08 /* compare inode change time */
+#define CMPBOTH (CMPMTME|CMPCTME) /* compare inode and mod time */
+ struct time_rng *fow; /* next pattern */
+} TIME_RNG;
+
+static int str_sec(const char *, time_t *);
+static int usr_match(ARCHD *);
+static int grp_match(ARCHD *);
+static int trng_match(ARCHD *);
+
+static TIME_RNG *trhead = NULL; /* time range list head */
+static TIME_RNG *trtail = NULL; /* time range list tail */
+static USRT **usrtb = NULL; /* user selection table */
+static GRPT **grptb = NULL; /* group selection table */
+
+/*
+ * Routines for selection of archive members
+ */
+
+/*
+ * sel_chk()
+ * check if this file matches a specified uid, gid or time range
+ * Return:
+ * 0 if this archive member should be processed, 1 if it should be skipped
+ */
+
+int
+sel_chk(ARCHD *arcn)
+{
+ if (((usrtb != NULL) && usr_match(arcn)) ||
+ ((grptb != NULL) && grp_match(arcn)) ||
+ ((trhead != NULL) && trng_match(arcn)))
+ return(1);
+ return(0);
+}
+
+/*
+ * User/group selection routines
+ *
+ * Routines to handle user selection of files based on the file uid/gid. To
+ * add an entry, the user supplies either the name or the uid/gid starting with
+ * a # on the command line. A \# will escape the #.
+ */
+
+/*
+ * usr_add()
+ * add a user match to the user match hash table
+ * Return:
+ * 0 if added ok, -1 otherwise;
+ */
+
+int
+usr_add(char *str)
+{
+ u_int indx;
+ USRT *pt;
+ uid_t uid;
+
+ /*
+ * create the table if it doesn't exist
+ */
+ if ((str == NULL) || (*str == '\0'))
+ return(-1);
+ if ((usrtb == NULL) &&
+ ((usrtb = calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) {
+ paxwarn(1, "Unable to allocate memory for user selection table");
+ return(-1);
+ }
+
+ /*
+ * figure out user spec
+ */
+ if (str[0] != '#') {
+ /*
+ * it is a user name, \# escapes # as first char in user name
+ */
+ if ((str[0] == '\\') && (str[1] == '#'))
+ ++str;
+ if (uid_from_user(str, &uid) == -1) {
+ paxwarn(1, "Unable to find uid for user: %s", str);
+ return(-1);
+ }
+ } else
+ uid = (uid_t)strtoul(str+1, NULL, 10);
+ endpwent();
+
+ /*
+ * hash it and go down the hash chain (if any) looking for it
+ */
+ indx = ((unsigned)uid) % USR_TB_SZ;
+ if ((pt = usrtb[indx]) != NULL) {
+ while (pt != NULL) {
+ if (pt->uid == uid)
+ return(0);
+ pt = pt->fow;
+ }
+ }
+
+ /*
+ * uid is not yet in the table, add it to the front of the chain
+ */
+ if ((pt = malloc(sizeof(USRT))) != NULL) {
+ pt->uid = uid;
+ pt->fow = usrtb[indx];
+ usrtb[indx] = pt;
+ return(0);
+ }
+ paxwarn(1, "User selection table out of memory");
+ return(-1);
+}
+
+/*
+ * usr_match()
+ * check if this files uid matches a selected uid.
+ * Return:
+ * 0 if this archive member should be processed, 1 if it should be skipped
+ */
+
+static int
+usr_match(ARCHD *arcn)
+{
+ USRT *pt;
+
+ /*
+ * hash and look for it in the table
+ */
+ pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ];
+ while (pt != NULL) {
+ if (pt->uid == arcn->sb.st_uid)
+ return(0);
+ pt = pt->fow;
+ }
+
+ /*
+ * not found
+ */
+ return(1);
+}
+
+/*
+ * grp_add()
+ * add a group match to the group match hash table
+ * Return:
+ * 0 if added ok, -1 otherwise;
+ */
+
+int
+grp_add(char *str)
+{
+ u_int indx;
+ GRPT *pt;
+ gid_t gid;
+
+ /*
+ * create the table if it doesn't exist
+ */
+ if ((str == NULL) || (*str == '\0'))
+ return(-1);
+ if ((grptb == NULL) &&
+ ((grptb = calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) {
+ paxwarn(1, "Unable to allocate memory fo group selection table");
+ return(-1);
+ }
+
+ /*
+ * figure out group spec
+ */
+ if (str[0] != '#') {
+ /*
+ * it is a group name, \# escapes # as first char in group name
+ */
+ if ((str[0] == '\\') && (str[1] == '#'))
+ ++str;
+ if (gid_from_group(str, &gid) == -1) {
+ paxwarn(1,"Cannot determine gid for group name: %s", str);
+ return(-1);
+ }
+ } else
+ gid = (gid_t)strtoul(str+1, NULL, 10);
+ endgrent();
+
+ /*
+ * hash it and go down the hash chain (if any) looking for it
+ */
+ indx = ((unsigned)gid) % GRP_TB_SZ;
+ if ((pt = grptb[indx]) != NULL) {
+ while (pt != NULL) {
+ if (pt->gid == gid)
+ return(0);
+ pt = pt->fow;
+ }
+ }
+
+ /*
+ * gid not in the table, add it to the front of the chain
+ */
+ if ((pt = malloc(sizeof(GRPT))) != NULL) {
+ pt->gid = gid;
+ pt->fow = grptb[indx];
+ grptb[indx] = pt;
+ return(0);
+ }
+ paxwarn(1, "Group selection table out of memory");
+ return(-1);
+}
+
+/*
+ * grp_match()
+ * check if this files gid matches a selected gid.
+ * Return:
+ * 0 if this archive member should be processed, 1 if it should be skipped
+ */
+
+static int
+grp_match(ARCHD *arcn)
+{
+ GRPT *pt;
+
+ /*
+ * hash and look for it in the table
+ */
+ pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ];
+ while (pt != NULL) {
+ if (pt->gid == arcn->sb.st_gid)
+ return(0);
+ pt = pt->fow;
+ }
+
+ /*
+ * not found
+ */
+ return(1);
+}
+
+/*
+ * Time range selection routines
+ *
+ * Routines to handle user selection of files based on the modification and/or
+ * inode change time falling within a specified time range (the non-standard
+ * -T flag). The user may specify any number of different file time ranges.
+ * Time ranges are checked one at a time until a match is found (if at all).
+ * If the file has a mtime (and/or ctime) which lies within one of the time
+ * ranges, the file is selected. Time ranges may have a lower and/or a upper
+ * value. These ranges are inclusive. When no time ranges are supplied to pax
+ * with the -T option, all members in the archive will be selected by the time
+ * range routines. When only a lower range is supplied, only files with a
+ * mtime (and/or ctime) equal to or younger are selected. When only a upper
+ * range is supplied, only files with a mtime (and/or ctime) equal to or older
+ * are selected. When the lower time range is equal to the upper time range,
+ * only files with a mtime (or ctime) of exactly that time are selected.
+ */
+
+/*
+ * trng_add()
+ * add a time range match to the time range list.
+ * This is a non-standard pax option. Lower and upper ranges are in the
+ * format: [[[[[cc]yy]mm]dd]HH]MM[.SS] and are comma separated.
+ * Time ranges are based on current time, so 1234 would specify a time of
+ * 12:34 today.
+ * Return:
+ * 0 if the time range was added to the list, -1 otherwise
+ */
+
+int
+trng_add(char *str)
+{
+ TIME_RNG *pt;
+ char *up_pt = NULL;
+ char *stpt;
+ char *flgpt;
+ int dot = 0;
+
+ /*
+ * throw out the badly formed time ranges
+ */
+ if ((str == NULL) || (*str == '\0')) {
+ paxwarn(1, "Empty time range string");
+ return(-1);
+ }
+
+ /*
+ * locate optional flags suffix /{cm}.
+ */
+ if ((flgpt = strrchr(str, '/')) != NULL)
+ *flgpt++ = '\0';
+
+ for (stpt = str; *stpt != '\0'; ++stpt) {
+ if ((*stpt >= '0') && (*stpt <= '9'))
+ continue;
+ if ((*stpt == ',') && (up_pt == NULL)) {
+ *stpt = '\0';
+ up_pt = stpt + 1;
+ dot = 0;
+ continue;
+ }
+
+ /*
+ * allow only one dot per range (secs)
+ */
+ if ((*stpt == '.') && (!dot)) {
+ ++dot;
+ continue;
+ }
+ paxwarn(1, "Improperly specified time range: %s", str);
+ goto out;
+ }
+
+ /*
+ * allocate space for the time range and store the limits
+ */
+ if ((pt = malloc(sizeof(TIME_RNG))) == NULL) {
+ paxwarn(1, "Unable to allocate memory for time range");
+ return(-1);
+ }
+
+ /*
+ * by default we only will check file mtime, but user can specify
+ * mtime, ctime (inode change time) or both.
+ */
+ if ((flgpt == NULL) || (*flgpt == '\0'))
+ pt->flgs = CMPMTME;
+ else {
+ pt->flgs = 0;
+ while (*flgpt != '\0') {
+ switch (*flgpt) {
+ case 'M':
+ case 'm':
+ pt->flgs |= CMPMTME;
+ break;
+ case 'C':
+ case 'c':
+ pt->flgs |= CMPCTME;
+ break;
+ default:
+ paxwarn(1, "Bad option %c with time range %s",
+ *flgpt, str);
+ free(pt);
+ goto out;
+ }
+ ++flgpt;
+ }
+ }
+
+ /*
+ * start off with the current time
+ */
+ pt->low_time = pt->high_time = time(NULL);
+ if (*str != '\0') {
+ /*
+ * add lower limit
+ */
+ if (str_sec(str, &(pt->low_time)) < 0) {
+ paxwarn(1, "Illegal lower time range %s", str);
+ free(pt);
+ goto out;
+ }
+ pt->flgs |= HASLOW;
+ }
+
+ if ((up_pt != NULL) && (*up_pt != '\0')) {
+ /*
+ * add upper limit
+ */
+ if (str_sec(up_pt, &(pt->high_time)) < 0) {
+ paxwarn(1, "Illegal upper time range %s", up_pt);
+ free(pt);
+ goto out;
+ }
+ pt->flgs |= HASHIGH;
+
+ /*
+ * check that the upper and lower do not overlap
+ */
+ if (pt->flgs & HASLOW) {
+ if (pt->low_time > pt->high_time) {
+ paxwarn(1, "Upper %s and lower %s time overlap",
+ up_pt, str);
+ free(pt);
+ return(-1);
+ }
+ }
+ }
+
+ pt->fow = NULL;
+ if (trhead == NULL) {
+ trtail = trhead = pt;
+ return(0);
+ }
+ trtail->fow = pt;
+ trtail = pt;
+ return(0);
+
+ out:
+ paxwarn(1, "Time range format is: [[[[[cc]yy]mm]dd]HH]MM[.SS][/[c][m]]");
+ return(-1);
+}
+
+/*
+ * trng_match()
+ * check if this files mtime/ctime falls within any supplied time range.
+ * Return:
+ * 0 if this archive member should be processed, 1 if it should be skipped
+ */
+
+static int
+trng_match(ARCHD *arcn)
+{
+ TIME_RNG *pt;
+
+ /*
+ * have to search down the list one at a time looking for a match.
+ * remember time range limits are inclusive.
+ */
+ pt = trhead;
+ while (pt != NULL) {
+ switch (pt->flgs & CMPBOTH) {
+ case CMPBOTH:
+ /*
+ * user wants both mtime and ctime checked for this
+ * time range
+ */
+ if (((pt->flgs & HASLOW) &&
+ (arcn->sb.st_mtime < pt->low_time) &&
+ (arcn->sb.st_ctime < pt->low_time)) ||
+ ((pt->flgs & HASHIGH) &&
+ (arcn->sb.st_mtime > pt->high_time) &&
+ (arcn->sb.st_ctime > pt->high_time))) {
+ pt = pt->fow;
+ continue;
+ }
+ break;
+ case CMPCTME:
+ /*
+ * user wants only ctime checked for this time range
+ */
+ if (((pt->flgs & HASLOW) &&
+ (arcn->sb.st_ctime < pt->low_time)) ||
+ ((pt->flgs & HASHIGH) &&
+ (arcn->sb.st_ctime > pt->high_time))) {
+ pt = pt->fow;
+ continue;
+ }
+ break;
+ case CMPMTME:
+ default:
+ /*
+ * user wants only mtime checked for this time range
+ */
+ if (((pt->flgs & HASLOW) &&
+ (arcn->sb.st_mtime < pt->low_time)) ||
+ ((pt->flgs & HASHIGH) &&
+ (arcn->sb.st_mtime > pt->high_time))) {
+ pt = pt->fow;
+ continue;
+ }
+ break;
+ }
+ break;
+ }
+
+ if (pt == NULL)
+ return(1);
+ return(0);
+}
+
+/*
+ * str_sec()
+ * Convert a time string in the format of [[[[[cc]yy]mm]dd]HH]MM[.SS] to
+ * seconds UTC. Tval already has current time loaded into it at entry.
+ * Return:
+ * 0 if converted ok, -1 otherwise
+ */
+
+static int
+str_sec(const char *p, time_t *tval)
+{
+ struct tm *lt;
+ const char *dot, *t;
+ size_t len;
+ int bigyear;
+ int yearset;
+
+ yearset = 0;
+ len = strlen(p);
+
+ for (t = p, dot = NULL; *t; ++t) {
+ if (isdigit((unsigned char)*t))
+ continue;
+ if (*t == '.' && dot == NULL) {
+ dot = t;
+ continue;
+ }
+ return(-1);
+ }
+
+ lt = localtime(tval);
+
+ if (dot != NULL) { /* .SS */
+ if (strlen(++dot) != 2)
+ return(-1);
+ lt->tm_sec = ATOI2(dot);
+ if (lt->tm_sec > 61)
+ return(-1);
+ len -= 3;
+ } else
+ lt->tm_sec = 0;
+
+ switch (len) {
+ case 12: /* cc */
+ bigyear = ATOI2(p);
+ lt->tm_year = (bigyear * 100) - 1900;
+ yearset = 1;
+ /* FALLTHROUGH */
+ case 10: /* yy */
+ if (yearset) {
+ lt->tm_year += ATOI2(p);
+ } else {
+ lt->tm_year = ATOI2(p);
+ if (lt->tm_year < 69) /* hack for 2000 ;-} */
+ lt->tm_year += (2000 - 1900);
+ }
+ /* FALLTHROUGH */
+ case 8: /* mm */
+ lt->tm_mon = ATOI2(p);
+ if ((lt->tm_mon > 12) || !lt->tm_mon)
+ return(-1);
+ --lt->tm_mon; /* time struct is 0 - 11 */
+ /* FALLTHROUGH */
+ case 6: /* dd */
+ lt->tm_mday = ATOI2(p);
+ if ((lt->tm_mday > 31) || !lt->tm_mday)
+ return(-1);
+ /* FALLTHROUGH */
+ case 4: /* HH */
+ lt->tm_hour = ATOI2(p);
+ if (lt->tm_hour > 23)
+ return(-1);
+ /* FALLTHROUGH */
+ case 2: /* MM */
+ lt->tm_min = ATOI2(p);
+ if (lt->tm_min > 59)
+ return(-1);
+ break;
+ default:
+ return(-1);
+ }
+
+ /* convert broken-down time to UTC clock time seconds */
+ if ((*tval = mktime(lt)) == -1)
+ return(-1);
+ return(0);
+}
diff --git a/bin/pax/tables.c b/bin/pax/tables.c
new file mode 100644
index 0000000..0a7b71f
--- /dev/null
+++ b/bin/pax/tables.c
@@ -0,0 +1,1786 @@
+/* $OpenBSD: tables.c,v 1.54 2019/06/28 05:35:34 deraadt Exp $ */
+/* $NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * Routines for controlling the contents of all the different databases pax
+ * keeps. Tables are dynamically created only when they are needed. The
+ * goal was speed and the ability to work with HUGE archives. The databases
+ * were kept simple, but do have complex rules for when the contents change.
+ * As of this writing, the posix library functions were more complex than
+ * needed for this application (pax databases have very short lifetimes and
+ * do not survive after pax is finished). Pax is required to handle very
+ * large archives. These database routines carefully combine memory usage and
+ * temporary file storage in ways which will not significantly impact runtime
+ * performance while allowing the largest possible archives to be handled.
+ * Trying to force the fit to the posix database routines was not considered
+ * time well spent.
+ */
+
+/*
+ * data structures and constants used by the different databases kept by pax
+ */
+
+/*
+ * Hash Table Sizes MUST BE PRIME, if set too small performance suffers.
+ * Probably safe to expect 500000 inodes per tape. Assuming good key
+ * distribution (inodes) chains of under 50 long (worst case) is ok.
+ */
+#define L_TAB_SZ 2503 /* hard link hash table size */
+#define F_TAB_SZ 50503 /* file time hash table size */
+#define N_TAB_SZ 541 /* interactive rename hash table */
+#define D_TAB_SZ 317 /* unique device mapping table */
+#define A_TAB_SZ 317 /* ftree dir access time reset table */
+#define SL_TAB_SZ 317 /* escape symlink tables */
+#define MAXKEYLEN 64 /* max number of chars for hash */
+#define DIRP_SIZE 64 /* initial size of created dir table */
+
+/*
+ * file hard link structure (hashed by dev/ino and chained) used to find the
+ * hard links in a file system or with some archive formats (cpio)
+ */
+typedef struct hrdlnk {
+ ino_t ino; /* files inode number */
+ char *name; /* name of first file seen with this ino/dev */
+ dev_t dev; /* files device number */
+ u_long nlink; /* expected link count */
+ struct hrdlnk *fow;
+} HRDLNK;
+
+/*
+ * Archive write update file time table (the -u, -C flag), hashed by filename.
+ * Filenames are stored in a scratch file at seek offset into the file. The
+ * file time (mod time) and the file name length (for a quick check) are
+ * stored in a hash table node. We were forced to use a scratch file because
+ * with -u, the mtime for every node in the archive must always be available
+ * to compare against (and this data can get REALLY large with big archives).
+ * By being careful to read only when we have a good chance of a match, the
+ * performance loss is not measurable (and the size of the archive we can
+ * handle is greatly increased).
+ */
+typedef struct ftm {
+ off_t seek; /* location in scratch file */
+ struct timespec mtim; /* files last modification time */
+ struct ftm *fow;
+ int namelen; /* file name length */
+} FTM;
+
+/*
+ * Interactive rename table (-i flag), hashed by orig filename.
+ * We assume this will not be a large table as this mapping data can only be
+ * obtained through interactive input by the user. Nobody is going to type in
+ * changes for 500000 files? We use chaining to resolve collisions.
+ */
+
+typedef struct namt {
+ char *oname; /* old name */
+ char *nname; /* new name typed in by the user */
+ struct namt *fow;
+} NAMT;
+
+/*
+ * Unique device mapping tables. Some protocols (e.g. cpio) require that the
+ * <c_dev,c_ino> pair will uniquely identify a file in an archive unless they
+ * are links to the same file. Appending to archives can break this. For those
+ * protocols that have this requirement we map c_dev to a unique value not seen
+ * in the archive when we append. We also try to handle inode truncation with
+ * this table. (When the inode field in the archive header are too small, we
+ * remap the dev on writes to remove accidental collisions).
+ *
+ * The list is hashed by device number using chain collision resolution. Off of
+ * each DEVT are linked the various remaps for this device based on those bits
+ * in the inode which were truncated. For example if we are just remapping to
+ * avoid a device number during an update append, off the DEVT we would have
+ * only a single DLIST that has a truncation id of 0 (no inode bits were
+ * stripped for this device so far). When we spot inode truncation we create
+ * a new mapping based on the set of bits in the inode which were stripped off.
+ * so if the top four bits of the inode are stripped and they have a pattern of
+ * 0110...... (where . are those bits not truncated) we would have a mapping
+ * assigned for all inodes that has the same 0110.... pattern (with this dev
+ * number of course). This keeps the mapping sparse and should be able to store
+ * close to the limit of files which can be represented by the optimal
+ * combination of dev and inode bits, and without creating a fouled up archive.
+ * Note we also remap truncated devs in the same way (an exercise for the
+ * dedicated reader; always wanted to say that...:)
+ */
+
+typedef struct devt {
+ dev_t dev; /* the orig device number we now have to map */
+ struct devt *fow; /* new device map list */
+ struct dlist *list; /* map list based on inode truncation bits */
+} DEVT;
+
+typedef struct dlist {
+ ino_t trunc_bits; /* truncation pattern for a specific map */
+ dev_t dev; /* the new device id we use */
+ struct dlist *fow;
+} DLIST;
+
+/*
+ * ftree directory access time reset table. When we are done with a
+ * subtree we reset the access and mod time of the directory when the tflag is
+ * set. Not really explicitly specified in the pax spec, but easy and fast to
+ * do (and this may have even been intended in the spec, it is not clear).
+ * table is hashed by inode with chaining.
+ */
+
+typedef struct atdir {
+ struct file_times ft;
+ struct atdir *fow;
+} ATDIR;
+
+/*
+ * created directory time and mode storage entry. After pax is finished during
+ * extraction or copy, we must reset directory access modes and times that
+ * may have been modified after creation (they no longer have the specified
+ * times and/or modes). We must reset time in the reverse order of creation,
+ * because entries are added from the top of the file tree to the bottom.
+ * We MUST reset times from leaf to root (it will not work the other
+ * direction).
+ */
+
+typedef struct dirdata {
+ struct file_times ft;
+ u_int16_t mode; /* file mode to restore */
+ u_int16_t frc_mode; /* do we force mode settings? */
+} DIRDATA;
+
+static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */
+static FTM **ftab = NULL; /* file time table for updating arch */
+static NAMT **ntab = NULL; /* interactive rename storage table */
+#ifndef NOCPIO
+static DEVT **dtab = NULL; /* device/inode mapping tables */
+#endif
+static ATDIR **atab = NULL; /* file tree directory time reset table */
+static DIRDATA *dirp = NULL; /* storage for setting created dir time/mode */
+static size_t dirsize; /* size of dirp table */
+static size_t dircnt = 0; /* entries in dir time/mode storage */
+static int ffd = -1; /* tmp file for file time table name storage */
+
+/*
+ * hard link table routines
+ *
+ * The hard link table tries to detect hard links to files using the device and
+ * inode values. We do this when writing an archive, so we can tell the format
+ * write routine that this file is a hard link to another file. The format
+ * write routine then can store this file in whatever way it wants (as a hard
+ * link if the format supports that like tar, or ignore this info like cpio).
+ * (Actually a field in the format driver table tells us if the format wants
+ * hard link info. if not, we do not waste time looking for them). We also use
+ * the same table when reading an archive. In that situation, this table is
+ * used by the format read routine to detect hard links from stored dev and
+ * inode numbers (like cpio). This will allow pax to create a link when one
+ * can be detected by the archive format.
+ */
+
+/*
+ * lnk_start
+ * Creates the hard link table.
+ * Return:
+ * 0 if created, -1 if failure
+ */
+
+int
+lnk_start(void)
+{
+ if (ltab != NULL)
+ return(0);
+ if ((ltab = calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) {
+ paxwarn(1, "Cannot allocate memory for hard link table");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * chk_lnk()
+ * Looks up entry in hard link hash table. If found, it copies the name
+ * of the file it is linked to (we already saw that file) into ln_name.
+ * lnkcnt is decremented and if goes to 1 the node is deleted from the
+ * database. (We have seen all the links to this file). If not found,
+ * we add the file to the database if it has the potential for having
+ * hard links to other files we may process (it has a link count > 1)
+ * Return:
+ * if found returns 1; if not found returns 0; -1 on error
+ */
+
+int
+chk_lnk(ARCHD *arcn)
+{
+ HRDLNK *pt;
+ HRDLNK **ppt;
+ u_int indx;
+
+ if (ltab == NULL)
+ return(-1);
+ /*
+ * ignore those nodes that cannot have hard links
+ */
+ if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1))
+ return(0);
+
+ /*
+ * hash inode number and look for this file
+ */
+ indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
+ if ((pt = ltab[indx]) != NULL) {
+ /*
+ * its hash chain in not empty, walk down looking for it
+ */
+ ppt = &(ltab[indx]);
+ while (pt != NULL) {
+ if ((pt->ino == arcn->sb.st_ino) &&
+ (pt->dev == arcn->sb.st_dev))
+ break;
+ ppt = &(pt->fow);
+ pt = pt->fow;
+ }
+
+ if (pt != NULL) {
+ /*
+ * found a link. set the node type and copy in the
+ * name of the file it is to link to. we need to
+ * handle hardlinks to regular files differently than
+ * other links.
+ */
+ arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name,
+ sizeof(arcn->ln_name));
+ /* XXX truncate? */
+ if ((size_t)arcn->nlen >= sizeof(arcn->name))
+ arcn->nlen = sizeof(arcn->name) - 1;
+ if (arcn->type == PAX_REG)
+ arcn->type = PAX_HRG;
+ else
+ arcn->type = PAX_HLK;
+
+ /*
+ * if we have found all the links to this file, remove
+ * it from the database
+ */
+ if (--pt->nlink <= 1) {
+ *ppt = pt->fow;
+ free(pt->name);
+ free(pt);
+ }
+ return(1);
+ }
+ }
+
+ /*
+ * we never saw this file before. It has links so we add it to the
+ * front of this hash chain
+ */
+ if ((pt = malloc(sizeof(HRDLNK))) != NULL) {
+ if ((pt->name = strdup(arcn->name)) != NULL) {
+ pt->dev = arcn->sb.st_dev;
+ pt->ino = arcn->sb.st_ino;
+ pt->nlink = arcn->sb.st_nlink;
+ pt->fow = ltab[indx];
+ ltab[indx] = pt;
+ return(0);
+ }
+ free(pt);
+ }
+
+ paxwarn(1, "Hard link table out of memory");
+ return(-1);
+}
+
+/*
+ * purg_lnk
+ * remove reference for a file that we may have added to the data base as
+ * a potential source for hard links. We ended up not using the file, so
+ * we do not want to accidently point another file at it later on.
+ */
+
+void
+purg_lnk(ARCHD *arcn)
+{
+ HRDLNK *pt;
+ HRDLNK **ppt;
+ u_int indx;
+
+ if (ltab == NULL)
+ return;
+ /*
+ * do not bother to look if it could not be in the database
+ */
+ if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) ||
+ PAX_IS_HARDLINK(arcn->type))
+ return;
+
+ /*
+ * find the hash chain for this inode value, if empty return
+ */
+ indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
+ if ((pt = ltab[indx]) == NULL)
+ return;
+
+ /*
+ * walk down the list looking for the inode/dev pair, unlink and
+ * free if found
+ */
+ ppt = &(ltab[indx]);
+ while (pt != NULL) {
+ if ((pt->ino == arcn->sb.st_ino) &&
+ (pt->dev == arcn->sb.st_dev))
+ break;
+ ppt = &(pt->fow);
+ pt = pt->fow;
+ }
+ if (pt == NULL)
+ return;
+
+ /*
+ * remove and free it
+ */
+ *ppt = pt->fow;
+ free(pt->name);
+ free(pt);
+}
+
+/*
+ * lnk_end()
+ * pull apart a existing link table so we can reuse it. We do this between
+ * read and write phases of append with update. (The format may have
+ * used the link table, and we need to start with a fresh table for the
+ * write phase
+ */
+
+void
+lnk_end(void)
+{
+ int i;
+ HRDLNK *pt;
+ HRDLNK *ppt;
+
+ if (ltab == NULL)
+ return;
+
+ for (i = 0; i < L_TAB_SZ; ++i) {
+ if (ltab[i] == NULL)
+ continue;
+ pt = ltab[i];
+ ltab[i] = NULL;
+
+ /*
+ * free up each entry on this chain
+ */
+ while (pt != NULL) {
+ ppt = pt;
+ pt = ppt->fow;
+ free(ppt->name);
+ free(ppt);
+ }
+ }
+}
+
+/*
+ * modification time table routines
+ *
+ * The modification time table keeps track of last modification times for all
+ * files stored in an archive during a write phase when -u is set. We only
+ * add a file to the archive if it is newer than a file with the same name
+ * already stored on the archive (if there is no other file with the same
+ * name on the archive it is added). This applies to writes and appends.
+ * An append with an -u must read the archive and store the modification time
+ * for every file on that archive before starting the write phase. It is clear
+ * that this is one HUGE database. To save memory space, the actual file names
+ * are stored in a scratch file and indexed by an in-memory hash table. The
+ * hash table is indexed by hashing the file path. The nodes in the table store
+ * the length of the filename and the lseek offset within the scratch file
+ * where the actual name is stored. Since there are never any deletions from
+ * this table, fragmentation of the scratch file is never a issue. Lookups
+ * seem to not exhibit any locality at all (files in the database are rarely
+ * looked up more than once...), so caching is just a waste of memory. The
+ * only limitation is the amount of scratch file space available to store the
+ * path names.
+ */
+
+/*
+ * ftime_start()
+ * create the file time hash table and open for read/write the scratch
+ * file. (after created it is unlinked, so when we exit we leave
+ * no witnesses).
+ * Return:
+ * 0 if the table and file was created ok, -1 otherwise
+ */
+
+int
+ftime_start(void)
+{
+
+ if (ftab != NULL)
+ return(0);
+ if ((ftab = calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) {
+ paxwarn(1, "Cannot allocate memory for file time table");
+ return(-1);
+ }
+
+ /*
+ * get random name and create temporary scratch file, unlink name
+ * so it will get removed on exit
+ */
+ memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
+ if ((ffd = mkstemp(tempfile)) == -1) {
+ syswarn(1, errno, "Unable to create temporary file: %s",
+ tempfile);
+ return(-1);
+ }
+ (void)unlink(tempfile);
+
+ return(0);
+}
+
+/*
+ * chk_ftime()
+ * looks up entry in file time hash table. If not found, the file is
+ * added to the hash table and the file named stored in the scratch file.
+ * If a file with the same name is found, the file times are compared and
+ * the most recent file time is retained. If the new file was younger (or
+ * was not in the database) the new file is selected for storage.
+ * Return:
+ * 0 if file should be added to the archive, 1 if it should be skipped,
+ * -1 on error
+ */
+
+int
+chk_ftime(ARCHD *arcn)
+{
+ FTM *pt;
+ int namelen;
+ u_int indx;
+ char ckname[PAXPATHLEN+1];
+
+ /*
+ * no info, go ahead and add to archive
+ */
+ if (ftab == NULL)
+ return(0);
+
+ /*
+ * hash the pathname and look up in table
+ */
+ namelen = arcn->nlen;
+ indx = st_hash(arcn->name, namelen, F_TAB_SZ);
+ if ((pt = ftab[indx]) != NULL) {
+ /*
+ * the hash chain is not empty, walk down looking for match
+ * only read up the path names if the lengths match, speeds
+ * up the search a lot
+ */
+ while (pt != NULL) {
+ if (pt->namelen == namelen) {
+ /*
+ * potential match, have to read the name
+ * from the scratch file.
+ */
+ if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {
+ syswarn(1, errno,
+ "Failed ftime table seek");
+ return(-1);
+ }
+ if (read(ffd, ckname, namelen) != namelen) {
+ syswarn(1, errno,
+ "Failed ftime table read");
+ return(-1);
+ }
+
+ /*
+ * if the names match, we are done
+ */
+ if (!strncmp(ckname, arcn->name, namelen))
+ break;
+ }
+
+ /*
+ * try the next entry on the chain
+ */
+ pt = pt->fow;
+ }
+
+ if (pt != NULL) {
+ /*
+ * found the file, compare the times, save the newer
+ */
+ if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)) {
+ /*
+ * file is newer
+ */
+ pt->mtim = arcn->sb.st_mtim;
+ return(0);
+ }
+ /*
+ * file is older
+ */
+ return(1);
+ }
+ }
+
+ /*
+ * not in table, add it
+ */
+ if ((pt = malloc(sizeof(FTM))) != NULL) {
+ /*
+ * add the name at the end of the scratch file, saving the
+ * offset. add the file to the head of the hash chain
+ */
+ if ((pt->seek = lseek(ffd, 0, SEEK_END)) >= 0) {
+ if (write(ffd, arcn->name, namelen) == namelen) {
+ pt->mtim = arcn->sb.st_mtim;
+ pt->namelen = namelen;
+ pt->fow = ftab[indx];
+ ftab[indx] = pt;
+ return(0);
+ }
+ syswarn(1, errno, "Failed write to file time table");
+ } else
+ syswarn(1, errno, "Failed seek on file time table");
+ } else
+ paxwarn(1, "File time table ran out of memory");
+
+ if (pt != NULL)
+ free(pt);
+ return(-1);
+}
+
+/*
+ * escaping (absolute or w/"..") symlink table routines
+ *
+ * By default, an archive shouldn't be able extract to outside of the
+ * current directory. What should we do if the archive contains a symlink
+ * whose value is either absolute or contains ".." components? What we'll
+ * do is initially create the path as an empty file (to block attempts to
+ * reference _through_ it) and instead record its path and desired
+ * final value and mode. Then once all the other archive
+ * members are created (but before the pass to set timestamps on
+ * directories) we'll process those records, replacing the placeholder with
+ * the correct symlink and setting them to the correct mode, owner, group,
+ * and timestamps.
+ *
+ * Note: we also need to handle hardlinks to symlinks (barf) as well as
+ * hardlinks whose target is replaced by a later entry in the archive (barf^2).
+ *
+ * So we track things by dev+ino of the placeholder file, associating with
+ * that the value and mode of the final symlink and a list of paths that
+ * should all be hardlinks of that. We'll 'store' the symlink's desired
+ * timestamps, owner, and group by setting them on the placeholder file.
+ *
+ * The operations are:
+ * a) create an escaping symlink: create the placeholder file and add an entry
+ * for the new link
+ * b) create a hardlink: do the link. If the target turns out to be a
+ * zero-length file whose dev+ino are in the symlink table, then add this
+ * path to the list of names for that link
+ * c) perform deferred processing: for each entry, check each associated path:
+ * if it's a zero-length file with the correct dev+ino then recreate it as
+ * the specified symlink or hardlink to the first such
+ */
+
+struct slpath {
+ char *sp_path;
+ struct slpath *sp_next;
+};
+struct slinode {
+ ino_t sli_ino;
+ char *sli_value;
+ struct slpath sli_paths;
+ struct slinode *sli_fow; /* hash table chain */
+ dev_t sli_dev;
+ mode_t sli_mode;
+};
+
+static struct slinode **slitab = NULL;
+
+/*
+ * sltab_start()
+ * create the hash table
+ * Return:
+ * 0 if the table and file was created ok, -1 otherwise
+ */
+
+int
+sltab_start(void)
+{
+
+ if ((slitab = calloc(SL_TAB_SZ, sizeof *slitab)) == NULL) {
+ syswarn(1, errno, "symlink table");
+ return(-1);
+ }
+
+ return(0);
+}
+
+/*
+ * sltab_add_sym()
+ * Create the placeholder and tracking info for an escaping symlink.
+ * Return:
+ * 0 on success, -1 otherwise
+ */
+
+int
+sltab_add_sym(const char *path0, const char *value0, mode_t mode)
+{
+ struct stat sb;
+ struct slinode *s;
+ struct slpath *p;
+ char *path, *value;
+ u_int indx;
+ int fd;
+
+ /* create the placeholder */
+ fd = open(path0, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0600);
+ if (fd == -1)
+ return (-1);
+ if (fstat(fd, &sb) == -1) {
+ unlink(path0);
+ close(fd);
+ return (-1);
+ }
+ close(fd);
+
+ if (havechd && *path0 != '/') {
+ if ((path = realpath(path0, NULL)) == NULL) {
+ syswarn(1, errno, "Cannot canonicalize %s", path0);
+ unlink(path0);
+ return (-1);
+ }
+ } else if ((path = strdup(path0)) == NULL) {
+ syswarn(1, errno, "defered symlink path");
+ unlink(path0);
+ return (-1);
+ }
+ if ((value = strdup(value0)) == NULL) {
+ syswarn(1, errno, "defered symlink value");
+ unlink(path);
+ free(path);
+ return (-1);
+ }
+
+ /* now check the hash table for conflicting entry */
+ indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ;
+ for (s = slitab[indx]; s != NULL; s = s->sli_fow) {
+ if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev)
+ continue;
+
+ /*
+ * One of our placeholders got removed behind our back and
+ * we've reused the inode. Weird, but clean up the mess.
+ */
+ free(s->sli_value);
+ free(s->sli_paths.sp_path);
+ p = s->sli_paths.sp_next;
+ while (p != NULL) {
+ struct slpath *next_p = p->sp_next;
+
+ free(p->sp_path);
+ free(p);
+ p = next_p;
+ }
+ goto set_value;
+ }
+
+ /* Normal case: create a new node */
+ if ((s = malloc(sizeof *s)) == NULL) {
+ syswarn(1, errno, "defered symlink");
+ unlink(path);
+ free(path);
+ free(value);
+ return (-1);
+ }
+ s->sli_ino = sb.st_ino;
+ s->sli_dev = sb.st_dev;
+ s->sli_fow = slitab[indx];
+ slitab[indx] = s;
+
+set_value:
+ s->sli_paths.sp_path = path;
+ s->sli_paths.sp_next = NULL;
+ s->sli_value = value;
+ s->sli_mode = mode;
+ return (0);
+}
+
+/*
+ * sltab_add_link()
+ * A hardlink was created; if it looks like a placeholder, handle the
+ * tracking.
+ * Return:
+ * 0 if things are ok, -1 if something went wrong
+ */
+
+int
+sltab_add_link(const char *path, const struct stat *sb)
+{
+ struct slinode *s;
+ struct slpath *p;
+ u_int indx;
+
+ if (!S_ISREG(sb->st_mode) || sb->st_size != 0)
+ return (1);
+
+ /* find the hash table entry for this hardlink */
+ indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ;
+ for (s = slitab[indx]; s != NULL; s = s->sli_fow) {
+ if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev)
+ continue;
+
+ if ((p = malloc(sizeof *p)) == NULL) {
+ syswarn(1, errno, "deferred symlink hardlink");
+ return (-1);
+ }
+ if (havechd && *path != '/') {
+ if ((p->sp_path = realpath(path, NULL)) == NULL) {
+ syswarn(1, errno, "Cannot canonicalize %s",
+ path);
+ free(p);
+ return (-1);
+ }
+ } else if ((p->sp_path = strdup(path)) == NULL) {
+ syswarn(1, errno, "defered symlink hardlink path");
+ free(p);
+ return (-1);
+ }
+
+ /* link it in */
+ p->sp_next = s->sli_paths.sp_next;
+ s->sli_paths.sp_next = p;
+ return (0);
+ }
+
+ /* not found */
+ return (1);
+}
+
+
+static int
+sltab_process_one(struct slinode *s, struct slpath *p, const char *first,
+ int in_sig)
+{
+ struct stat sb;
+ char *path = p->sp_path;
+ mode_t mode;
+ int err;
+
+ /*
+ * is it the expected placeholder? This can fail legimately
+ * if the archive overwrote the link with another, later entry,
+ * so don't warn.
+ */
+ if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode) || sb.st_size != 0 ||
+ sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev)
+ return (0);
+
+ if (unlink(path) && errno != ENOENT) {
+ if (!in_sig)
+ syswarn(1, errno, "deferred symlink removal");
+ return (0);
+ }
+
+ err = 0;
+ if (first != NULL) {
+ /* add another hardlink to the existing symlink */
+ if (linkat(AT_FDCWD, first, AT_FDCWD, path, 0) == 0)
+ return (0);
+
+ /*
+ * Couldn't hardlink the symlink for some reason, so we'll
+ * try creating it as its own symlink, but save the error
+ * for reporting if that fails.
+ */
+ err = errno;
+ }
+
+ if (symlink(s->sli_value, path)) {
+ if (!in_sig) {
+ const char *qualifier = "";
+ if (err)
+ qualifier = " hardlink";
+ else
+ err = errno;
+
+ syswarn(1, err, "deferred symlink%s: %s",
+ qualifier, path);
+ }
+ return (0);
+ }
+
+ /* success, so set the id, mode, and times */
+ mode = s->sli_mode;
+ if (pids) {
+ /* if can't set the ids, force the set[ug]id bits off */
+ if (set_ids(path, sb.st_uid, sb.st_gid))
+ mode &= ~(SETBITS);
+ }
+
+ if (pmode)
+ set_pmode(path, mode);
+
+ if (patime || pmtime)
+ set_ftime(path, &sb.st_mtim, &sb.st_atim, 0);
+
+ /*
+ * If we tried to link to first but failed, then this new symlink
+ * might be a better one to try in the future. Guess from the errno.
+ */
+ if (err == 0 || err == ENOENT || err == EMLINK || err == EOPNOTSUPP)
+ return (1);
+ return (0);
+}
+
+/*
+ * sltab_process()
+ * Do all the delayed process for escape symlinks
+ */
+
+void
+sltab_process(int in_sig)
+{
+ struct slinode *s;
+ struct slpath *p;
+ char *first;
+ u_int indx;
+
+ if (slitab == NULL)
+ return;
+
+ /* walk across the entire hash table */
+ for (indx = 0; indx < SL_TAB_SZ; indx++) {
+ while ((s = slitab[indx]) != NULL) {
+ /* pop this entry */
+ slitab[indx] = s->sli_fow;
+
+ first = NULL;
+ p = &s->sli_paths;
+ while (1) {
+ struct slpath *next_p;
+
+ if (sltab_process_one(s, p, first, in_sig)) {
+ if (!in_sig)
+ free(first);
+ first = p->sp_path;
+ } else if (!in_sig)
+ free(p->sp_path);
+
+ if ((next_p = p->sp_next) == NULL)
+ break;
+ *p = *next_p;
+ if (!in_sig)
+ free(next_p);
+ }
+ if (!in_sig) {
+ free(first);
+ free(s->sli_value);
+ free(s);
+ }
+ }
+ }
+ if (!in_sig)
+ free(slitab);
+ slitab = NULL;
+}
+
+
+/*
+ * Interactive rename table routines
+ *
+ * The interactive rename table keeps track of the new names that the user
+ * assigns to files from tty input. Since this map is unique for each file
+ * we must store it in case there is a reference to the file later in archive
+ * (a link). Otherwise we will be unable to find the file we know was
+ * extracted. The remapping of these files is stored in a memory based hash
+ * table (it is assumed since input must come from /dev/tty, it is unlikely to
+ * be a very large table).
+ */
+
+/*
+ * name_start()
+ * create the interactive rename table
+ * Return:
+ * 0 if successful, -1 otherwise
+ */
+
+int
+name_start(void)
+{
+ if (ntab != NULL)
+ return(0);
+ if ((ntab = calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) {
+ paxwarn(1, "Cannot allocate memory for interactive rename table");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * add_name()
+ * add the new name to old name mapping just created by the user.
+ * If an old name mapping is found (there may be duplicate names on an
+ * archive) only the most recent is kept.
+ * Return:
+ * 0 if added, -1 otherwise
+ */
+
+int
+add_name(char *oname, int onamelen, char *nname)
+{
+ NAMT *pt;
+ u_int indx;
+
+ if (ntab == NULL) {
+ /*
+ * should never happen
+ */
+ paxwarn(0, "No interactive rename table, links may fail");
+ return(0);
+ }
+
+ /*
+ * look to see if we have already mapped this file, if so we
+ * will update it
+ */
+ indx = st_hash(oname, onamelen, N_TAB_SZ);
+ if ((pt = ntab[indx]) != NULL) {
+ /*
+ * look down the has chain for the file
+ */
+ while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))
+ pt = pt->fow;
+
+ if (pt != NULL) {
+ /*
+ * found an old mapping, replace it with the new one
+ * the user just input (if it is different)
+ */
+ if (strcmp(nname, pt->nname) == 0)
+ return(0);
+
+ free(pt->nname);
+ if ((pt->nname = strdup(nname)) == NULL) {
+ paxwarn(1, "Cannot update rename table");
+ return(-1);
+ }
+ return(0);
+ }
+ }
+
+ /*
+ * this is a new mapping, add it to the table
+ */
+ if ((pt = malloc(sizeof(NAMT))) != NULL) {
+ if ((pt->oname = strdup(oname)) != NULL) {
+ if ((pt->nname = strdup(nname)) != NULL) {
+ pt->fow = ntab[indx];
+ ntab[indx] = pt;
+ return(0);
+ }
+ free(pt->oname);
+ }
+ free(pt);
+ }
+ paxwarn(1, "Interactive rename table out of memory");
+ return(-1);
+}
+
+/*
+ * sub_name()
+ * look up a link name to see if it points at a file that has been
+ * remapped by the user. If found, the link is adjusted to contain the
+ * new name (oname is the link to name)
+ */
+
+void
+sub_name(char *oname, int *onamelen, int onamesize)
+{
+ NAMT *pt;
+ u_int indx;
+
+ if (ntab == NULL)
+ return;
+ /*
+ * look the name up in the hash table
+ */
+ indx = st_hash(oname, *onamelen, N_TAB_SZ);
+ if ((pt = ntab[indx]) == NULL)
+ return;
+
+ while (pt != NULL) {
+ /*
+ * walk down the hash chain looking for a match
+ */
+ if (strcmp(oname, pt->oname) == 0) {
+ /*
+ * found it, replace it with the new name
+ * and return (we know that oname has enough space)
+ */
+ *onamelen = strlcpy(oname, pt->nname, onamesize);
+ if (*onamelen >= onamesize)
+ *onamelen = onamesize - 1; /* XXX truncate? */
+ return;
+ }
+ pt = pt->fow;
+ }
+
+ /*
+ * no match, just return
+ */
+}
+
+#ifndef NOCPIO
+/*
+ * device/inode mapping table routines
+ * (used with formats that store device and inodes fields)
+ *
+ * device/inode mapping tables remap the device field in a archive header. The
+ * device/inode fields are used to determine when files are hard links to each
+ * other. However these values have very little meaning outside of that. This
+ * database is used to solve one of two different problems.
+ *
+ * 1) when files are appended to an archive, while the new files may have hard
+ * links to each other, you cannot determine if they have hard links to any
+ * file already stored on the archive from a prior run of pax. We must assume
+ * that these inode/device pairs are unique only within a SINGLE run of pax
+ * (which adds a set of files to an archive). So we have to make sure the
+ * inode/dev pairs we add each time are always unique. We do this by observing
+ * while the inode field is very dense, the use of the dev field is fairly
+ * sparse. Within each run of pax, we remap any device number of a new archive
+ * member that has a device number used in a prior run and already stored in a
+ * file on the archive. During the read phase of the append, we store the
+ * device numbers used and mark them to not be used by any file during the
+ * write phase. If during write we go to use one of those old device numbers,
+ * we remap it to a new value.
+ *
+ * 2) Often the fields in the archive header used to store these values are
+ * too small to store the entire value. The result is an inode or device value
+ * which can be truncated. This really can foul up an archive. With truncation
+ * we end up creating links between files that are really not links (after
+ * truncation the inodes are the same value). We address that by detecting
+ * truncation and forcing a remap of the device field to split truncated
+ * inodes away from each other. Each truncation creates a pattern of bits that
+ * are removed. We use this pattern of truncated bits to partition the inodes
+ * on a single device to many different devices (each one represented by the
+ * truncated bit pattern). All inodes on the same device that have the same
+ * truncation pattern are mapped to the same new device. Two inodes that
+ * truncate to the same value clearly will always have different truncation
+ * bit patterns, so they will be split from away each other. When we spot
+ * device truncation we remap the device number to a non truncated value.
+ * (for more info see table.h for the data structures involved).
+ */
+
+static DEVT *chk_dev(dev_t, int);
+
+/*
+ * dev_start()
+ * create the device mapping table
+ * Return:
+ * 0 if successful, -1 otherwise
+ */
+
+int
+dev_start(void)
+{
+ if (dtab != NULL)
+ return(0);
+ if ((dtab = calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) {
+ paxwarn(1, "Cannot allocate memory for device mapping table");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * add_dev()
+ * add a device number to the table. this will force the device to be
+ * remapped to a new value if it be used during a write phase. This
+ * function is called during the read phase of an append to prohibit the
+ * use of any device number already in the archive.
+ * Return:
+ * 0 if added ok, -1 otherwise
+ */
+
+int
+add_dev(ARCHD *arcn)
+{
+ if (chk_dev(arcn->sb.st_dev, 1) == NULL)
+ return(-1);
+ return(0);
+}
+
+/*
+ * chk_dev()
+ * check for a device value in the device table. If not found and the add
+ * flag is set, it is added. This does NOT assign any mapping values, just
+ * adds the device number as one that need to be remapped. If this device
+ * is already mapped, just return with a pointer to that entry.
+ * Return:
+ * pointer to the entry for this device in the device map table. Null
+ * if the add flag is not set and the device is not in the table (it is
+ * not been seen yet). If add is set and the device cannot be added, null
+ * is returned (indicates an error).
+ */
+
+static DEVT *
+chk_dev(dev_t dev, int add)
+{
+ DEVT *pt;
+ u_int indx;
+
+ if (dtab == NULL)
+ return(NULL);
+ /*
+ * look to see if this device is already in the table
+ */
+ indx = ((unsigned)dev) % D_TAB_SZ;
+ if ((pt = dtab[indx]) != NULL) {
+ while ((pt != NULL) && (pt->dev != dev))
+ pt = pt->fow;
+
+ /*
+ * found it, return a pointer to it
+ */
+ if (pt != NULL)
+ return(pt);
+ }
+
+ /*
+ * not in table, we add it only if told to as this may just be a check
+ * to see if a device number is being used.
+ */
+ if (add == 0)
+ return(NULL);
+
+ /*
+ * allocate a node for this device and add it to the front of the hash
+ * chain. Note we do not assign remaps values here, so the pt->list
+ * list must be NULL.
+ */
+ if ((pt = malloc(sizeof(DEVT))) == NULL) {
+ paxwarn(1, "Device map table out of memory");
+ return(NULL);
+ }
+ pt->dev = dev;
+ pt->list = NULL;
+ pt->fow = dtab[indx];
+ dtab[indx] = pt;
+ return(pt);
+}
+/*
+ * map_dev()
+ * given an inode and device storage mask (the mask has a 1 for each bit
+ * the archive format is able to store in a header), we check for inode
+ * and device truncation and remap the device as required. Device mapping
+ * can also occur when during the read phase of append a device number was
+ * seen (and was marked as do not use during the write phase). WE ASSUME
+ * that unsigned longs are the same size or bigger than the fields used
+ * for ino_t and dev_t. If not the types will have to be changed.
+ * Return:
+ * 0 if all ok, -1 otherwise.
+ */
+
+int
+map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
+{
+ DEVT *pt;
+ DLIST *dpt;
+ static dev_t lastdev = 0; /* next device number to try */
+ int trc_ino = 0;
+ int trc_dev = 0;
+ ino_t trunc_bits = 0;
+ ino_t nino;
+
+ if (dtab == NULL)
+ return(0);
+ /*
+ * check for device and inode truncation, and extract the truncated
+ * bit pattern.
+ */
+ if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
+ ++trc_dev;
+ if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
+ ++trc_ino;
+ trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
+ }
+
+ /*
+ * see if this device is already being mapped, look up the device
+ * then find the truncation bit pattern which applies
+ */
+ if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {
+ /*
+ * this device is already marked to be remapped
+ */
+ for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)
+ if (dpt->trunc_bits == trunc_bits)
+ break;
+
+ if (dpt != NULL) {
+ /*
+ * we are being remapped for this device and pattern
+ * change the device number to be stored and return
+ */
+ arcn->sb.st_dev = dpt->dev;
+ arcn->sb.st_ino = nino;
+ return(0);
+ }
+ } else {
+ /*
+ * this device is not being remapped YET. if we do not have any
+ * form of truncation, we do not need a remap
+ */
+ if (!trc_ino && !trc_dev)
+ return(0);
+
+ /*
+ * we have truncation, have to add this as a device to remap
+ */
+ if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)
+ goto bad;
+
+ /*
+ * if we just have a truncated inode, we have to make sure that
+ * all future inodes that do not truncate (they have the
+ * truncation pattern of all 0's) continue to map to the same
+ * device number. We probably have already written inodes with
+ * this device number to the archive with the truncation
+ * pattern of all 0's. So we add the mapping for all 0's to the
+ * same device number.
+ */
+ if (!trc_dev && (trunc_bits != 0)) {
+ if ((dpt = malloc(sizeof(DLIST))) == NULL)
+ goto bad;
+ dpt->trunc_bits = 0;
+ dpt->dev = arcn->sb.st_dev;
+ dpt->fow = pt->list;
+ pt->list = dpt;
+ }
+ }
+
+ /*
+ * look for a device number not being used. We must watch for wrap
+ * around on lastdev (so we do not get stuck looking forever!)
+ */
+ while (++lastdev > 0) {
+ if (chk_dev(lastdev, 0) != NULL)
+ continue;
+ /*
+ * found an unused value. If we have reached truncation point
+ * for this format we are hosed, so we give up. Otherwise we
+ * mark it as being used.
+ */
+ if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||
+ (chk_dev(lastdev, 1) == NULL))
+ goto bad;
+ break;
+ }
+
+ if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL))
+ goto bad;
+
+ /*
+ * got a new device number, store it under this truncation pattern.
+ * change the device number this file is being stored with.
+ */
+ dpt->trunc_bits = trunc_bits;
+ dpt->dev = lastdev;
+ dpt->fow = pt->list;
+ pt->list = dpt;
+ arcn->sb.st_dev = lastdev;
+ arcn->sb.st_ino = nino;
+ return(0);
+
+ bad:
+ paxwarn(1, "Unable to fix truncated inode/device field when storing %s",
+ arcn->name);
+ paxwarn(0, "Archive may create improper hard links when extracted");
+ return(0);
+}
+#endif /* NOCPIO */
+
+/*
+ * directory access/mod time reset table routines (for directories READ by pax)
+ *
+ * The pax -t flag requires that access times of archive files be the same
+ * before being read by pax. For regular files, access time is restored after
+ * the file has been copied. This database provides the same functionality for
+ * directories read during file tree traversal. Restoring directory access time
+ * is more complex than files since directories may be read several times until
+ * all the descendants in their subtree are visited by fts. Directory access
+ * and modification times are stored during the fts pre-order visit (done
+ * before any descendants in the subtree are visited) and restored after the
+ * fts post-order visit (after all the descendants have been visited). In the
+ * case of premature exit from a subtree (like from the effects of -n), any
+ * directory entries left in this database are reset during final cleanup
+ * operations of pax. Entries are hashed by inode number for fast lookup.
+ */
+
+/*
+ * atdir_start()
+ * create the directory access time database for directories READ by pax.
+ * Return:
+ * 0 is created ok, -1 otherwise.
+ */
+
+int
+atdir_start(void)
+{
+ if (atab != NULL)
+ return(0);
+ if ((atab = calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) {
+ paxwarn(1,"Cannot allocate space for directory access time table");
+ return(-1);
+ }
+ return(0);
+}
+
+
+/*
+ * atdir_end()
+ * walk through the directory access time table and reset the access time
+ * of any directory who still has an entry left in the database. These
+ * entries are for directories READ by pax
+ */
+
+void
+atdir_end(void)
+{
+ ATDIR *pt;
+ int i;
+
+ if (atab == NULL)
+ return;
+ /*
+ * for each non-empty hash table entry reset all the directories
+ * chained there.
+ */
+ for (i = 0; i < A_TAB_SZ; ++i) {
+ if ((pt = atab[i]) == NULL)
+ continue;
+ /*
+ * remember to force the times, set_ftime() looks at pmtime
+ * and patime, which only applies to things CREATED by pax,
+ * not read by pax. Read time reset is controlled by -t.
+ */
+ for (; pt != NULL; pt = pt->fow)
+ set_attr(&pt->ft, 1, 0, 0, 0);
+ }
+}
+
+/*
+ * add_atdir()
+ * add a directory to the directory access time table. Table is hashed
+ * and chained by inode number. This is for directories READ by pax
+ */
+
+void
+add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp,
+ const struct timespec *atimp)
+{
+ ATDIR *pt;
+ sigset_t allsigs, savedsigs;
+ u_int indx;
+
+ if (atab == NULL)
+ return;
+
+ /*
+ * make sure this directory is not already in the table, if so just
+ * return (the older entry always has the correct time). The only
+ * way this will happen is when the same subtree can be traversed by
+ * different args to pax and the -n option is aborting fts out of a
+ * subtree before all the post-order visits have been made.
+ */
+ indx = ((unsigned)ino) % A_TAB_SZ;
+ if ((pt = atab[indx]) != NULL) {
+ while (pt != NULL) {
+ if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev))
+ break;
+ pt = pt->fow;
+ }
+
+ /*
+ * oops, already there. Leave it alone.
+ */
+ if (pt != NULL)
+ return;
+ }
+
+ /*
+ * add it to the front of the hash chain
+ */
+ sigfillset(&allsigs);
+ sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
+ if ((pt = malloc(sizeof *pt)) != NULL) {
+ if ((pt->ft.ft_name = strdup(fname)) != NULL) {
+ pt->ft.ft_dev = dev;
+ pt->ft.ft_ino = ino;
+ pt->ft.ft_mtim = *mtimp;
+ pt->ft.ft_atim = *atimp;
+ pt->fow = atab[indx];
+ atab[indx] = pt;
+ sigprocmask(SIG_SETMASK, &savedsigs, NULL);
+ return;
+ }
+ free(pt);
+ }
+
+ sigprocmask(SIG_SETMASK, &savedsigs, NULL);
+ paxwarn(1, "Directory access time reset table ran out of memory");
+}
+
+/*
+ * get_atdir()
+ * look up a directory by inode and device number to obtain the access
+ * and modification time you want to set to. If found, the modification
+ * and access time parameters are set and the entry is removed from the
+ * table (as it is no longer needed). These are for directories READ by
+ * pax
+ * Return:
+ * 0 if found, -1 if not found.
+ */
+
+int
+do_atdir(const char *name, dev_t dev, ino_t ino)
+{
+ ATDIR *pt;
+ ATDIR **ppt;
+ sigset_t allsigs, savedsigs;
+ u_int indx;
+
+ if (atab == NULL)
+ return(-1);
+ /*
+ * hash by inode and search the chain for an inode and device match
+ */
+ indx = ((unsigned)ino) % A_TAB_SZ;
+ if ((pt = atab[indx]) == NULL)
+ return(-1);
+
+ ppt = &(atab[indx]);
+ while (pt != NULL) {
+ if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev))
+ break;
+ /*
+ * no match, go to next one
+ */
+ ppt = &(pt->fow);
+ pt = pt->fow;
+ }
+
+ /*
+ * return if we did not find it.
+ */
+ if (pt == NULL || pt->ft.ft_name == NULL ||
+ strcmp(name, pt->ft.ft_name) == 0)
+ return(-1);
+
+ /*
+ * found it. set the times and remove the entry from the table.
+ */
+ set_attr(&pt->ft, 1, 0, 0, 0);
+ sigfillset(&allsigs);
+ sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
+ *ppt = pt->fow;
+ sigprocmask(SIG_SETMASK, &savedsigs, NULL);
+ free(pt->ft.ft_name);
+ free(pt);
+ return(0);
+}
+
+/*
+ * directory access mode and time storage routines (for directories CREATED
+ * by pax).
+ *
+ * Pax requires that extracted directories, by default, have their access/mod
+ * times and permissions set to the values specified in the archive. During the
+ * actions of extracting (and creating the destination subtree during -rw copy)
+ * directories extracted may be modified after being created. Even worse is
+ * that these directories may have been created with file permissions which
+ * prohibits any descendants of these directories from being extracted. When
+ * directories are created by pax, access rights may be added to permit the
+ * creation of files in their subtree. Every time pax creates a directory, the
+ * times and file permissions specified by the archive are stored. After all
+ * files have been extracted (or copied), these directories have their times
+ * and file modes reset to the stored values. The directory info is restored in
+ * reverse order as entries were added from root to leaf: to restore atime
+ * properly, we must go backwards.
+ */
+
+/*
+ * dir_start()
+ * set up the directory time and file mode storage for directories CREATED
+ * by pax.
+ * Return:
+ * 0 if ok, -1 otherwise
+ */
+
+int
+dir_start(void)
+{
+ if (dirp != NULL)
+ return(0);
+
+ dirsize = DIRP_SIZE;
+ if ((dirp = reallocarray(NULL, dirsize, sizeof(DIRDATA))) == NULL) {
+ paxwarn(1, "Unable to allocate memory for directory times");
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * add_dir()
+ * add the mode and times for a newly CREATED directory
+ * name is name of the directory, psb the stat buffer with the data in it,
+ * frc_mode is a flag that says whether to force the setting of the mode
+ * (ignoring the user set values for preserving file mode). Frc_mode is
+ * for the case where we created a file and found that the resulting
+ * directory was not writeable and the user asked for file modes to NOT
+ * be preserved. (we have to preserve what was created by default, so we
+ * have to force the setting at the end. this is stated explicitly in the
+ * pax spec)
+ */
+
+void
+add_dir(char *name, struct stat *psb, int frc_mode)
+{
+ DIRDATA *dblk;
+ sigset_t allsigs, savedsigs;
+ char realname[PATH_MAX], *rp;
+
+ if (dirp == NULL)
+ return;
+
+ if (havechd && *name != '/') {
+ if ((rp = realpath(name, realname)) == NULL) {
+ paxwarn(1, "Cannot canonicalize %s", name);
+ return;
+ }
+ name = rp;
+ }
+ if (dircnt == dirsize) {
+ dblk = reallocarray(dirp, dirsize * 2, sizeof(DIRDATA));
+ if (dblk == NULL) {
+ paxwarn(1, "Unable to store mode and times for created"
+ " directory: %s", name);
+ return;
+ }
+ sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
+ dirp = dblk;
+ dirsize *= 2;
+ sigprocmask(SIG_SETMASK, &savedsigs, NULL);
+ }
+ dblk = &dirp[dircnt];
+ if ((dblk->ft.ft_name = strdup(name)) == NULL) {
+ paxwarn(1, "Unable to store mode and times for created"
+ " directory: %s", name);
+ return;
+ }
+ dblk->ft.ft_mtim = psb->st_mtim;
+ dblk->ft.ft_atim = psb->st_atim;
+ dblk->ft.ft_ino = psb->st_ino;
+ dblk->ft.ft_dev = psb->st_dev;
+ dblk->mode = psb->st_mode & ABITS;
+ dblk->frc_mode = frc_mode;
+ sigprocmask(SIG_BLOCK, &allsigs, &savedsigs);
+ ++dircnt;
+ sigprocmask(SIG_SETMASK, &savedsigs, NULL);
+}
+
+/*
+ * delete_dir()
+ * When we rmdir a directory, we may want to make sure we don't
+ * later warn about being unable to set its mode and times.
+ */
+
+void
+delete_dir(dev_t dev, ino_t ino)
+{
+ DIRDATA *dblk;
+ char *name;
+ size_t i;
+
+ if (dirp == NULL)
+ return;
+ for (i = 0; i < dircnt; i++) {
+ dblk = &dirp[i];
+
+ if (dblk->ft.ft_name == NULL)
+ continue;
+ if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) {
+ name = dblk->ft.ft_name;
+ dblk->ft.ft_name = NULL;
+ free(name);
+ break;
+ }
+ }
+}
+
+/*
+ * proc_dir(int in_sig)
+ * process all file modes and times stored for directories CREATED
+ * by pax. If in_sig is set, we're in a signal handler and can't
+ * free stuff.
+ */
+
+void
+proc_dir(int in_sig)
+{
+ DIRDATA *dblk;
+ size_t cnt;
+
+ if (dirp == NULL)
+ return;
+ /*
+ * read backwards through the file and process each directory
+ */
+ cnt = dircnt;
+ while (cnt-- > 0) {
+ dblk = &dirp[cnt];
+ /*
+ * If we remove a directory we created, we replace the
+ * ft_name with NULL. Ignore those.
+ */
+ if (dblk->ft.ft_name == NULL)
+ continue;
+
+ /*
+ * frc_mode set, make sure we set the file modes even if
+ * the user didn't ask for it (see file_subs.c for more info)
+ */
+ set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode,
+ in_sig);
+ if (!in_sig)
+ free(dblk->ft.ft_name);
+ }
+
+ if (!in_sig)
+ free(dirp);
+ dirp = NULL;
+ dircnt = 0;
+}
+
+/*
+ * database independent routines
+ */
+
+/*
+ * st_hash()
+ * hashes filenames to a u_int for hashing into a table. Looks at the tail
+ * end of file, as this provides far better distribution than any other
+ * part of the name. For performance reasons we only care about the last
+ * MAXKEYLEN chars (should be at LEAST large enough to pick off the file
+ * name). Was tested on 500,000 name file tree traversal from the root
+ * and gave almost a perfectly uniform distribution of keys when used with
+ * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
+ * chars at a time and pads with 0 for last addition.
+ * Return:
+ * the hash value of the string MOD (%) the table size.
+ */
+
+u_int
+st_hash(const char *name, int len, int tabsz)
+{
+ const char *pt;
+ char *dest;
+ const char *end;
+ int i;
+ u_int key = 0;
+ int steps;
+ int res;
+ u_int val;
+
+ /*
+ * only look at the tail up to MAXKEYLEN, we do not need to waste
+ * time here (remember these are pathnames, the tail is what will
+ * spread out the keys)
+ */
+ if (len > MAXKEYLEN) {
+ pt = &(name[len - MAXKEYLEN]);
+ len = MAXKEYLEN;
+ } else
+ pt = name;
+
+ /*
+ * calculate the number of u_int size steps in the string and if
+ * there is a runt to deal with
+ */
+ steps = len/sizeof(u_int);
+ res = len % sizeof(u_int);
+
+ /*
+ * add up the value of the string in unsigned integer sized pieces
+ * too bad we cannot have unsigned int aligned strings, then we
+ * could avoid the expensive copy.
+ */
+ for (i = 0; i < steps; ++i) {
+ end = pt + sizeof(u_int);
+ dest = (char *)&val;
+ while (pt < end)
+ *dest++ = *pt++;
+ key += val;
+ }
+
+ /*
+ * add in the runt padded with zero to the right
+ */
+ if (res) {
+ val = 0;
+ end = pt + res;
+ dest = (char *)&val;
+ while (pt < end)
+ *dest++ = *pt++;
+ key += val;
+ }
+
+ /*
+ * return the result mod the table size
+ */
+ return(key % tabsz);
+}
diff --git a/bin/pax/tar.1 b/bin/pax/tar.1
new file mode 100644
index 0000000..bbdef11
--- /dev/null
+++ b/bin/pax/tar.1
@@ -0,0 +1,410 @@
+.\" $OpenBSD: tar.1,v 1.62 2020/01/16 16:46:46 schwarze Exp $
+.\"
+.\" Copyright (c) 1996 SigmaSoft, Th. Lockert
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd $Mdocdate: January 16 2020 $
+.Dt TAR 1
+.Os
+.Sh NAME
+.Nm tar
+.Nd tape archiver
+.Sh SYNOPSIS
+.Nm tar
+.Sm off
+.No { Cm crtux No } Op Cm 014578befHhjLmNOoPpqsvwXZz
+.Sm on
+.Bk -words
+.Op Ar blocking-factor | archive | replstr
+.Op Fl C Ar directory
+.Op Fl I Ar file
+.Op Ar
+.Ek
+.Nm tar
+.No { Ns Fl crtux Ns }
+.Bk -words
+.Op Fl 014578eHhjLmNOoPpqvwXZz
+.Op Fl b Ar blocking-factor
+.Op Fl C Ar directory
+.Op Fl f Ar archive
+.Op Fl I Ar file
+.Op Fl s Ar replstr
+.Op Ar
+.Ek
+.Sh DESCRIPTION
+The
+.Nm
+command creates, adds files to, or extracts files from an
+archive file in
+.Dq tar
+format.
+A tar archive is often stored on a magnetic tape, but can be
+stored equally well on a floppy, CD-ROM, or in a regular disk file.
+.Pp
+In the first (legacy) form, all option flags except for
+.Fl C
+and
+.Fl I
+must be contained within the first argument to
+.Nm
+and must not be prefixed by a hyphen
+.Pq Sq - .
+Option arguments, if any, are processed as subsequent arguments to
+.Nm
+and are processed in the order in which their corresponding option
+flags have been presented on the command line.
+.Pp
+In the second and preferred form, option flags may be given in any order
+and are immediately followed by their corresponding option argument
+values.
+.Pp
+One of the following flags must be present:
+.Bl -tag -width Ds
+.It Fl c
+Create new archive, or overwrite an existing archive,
+adding the specified files to it.
+.It Fl r
+Append the named new files to existing archive.
+Note that this will only work on media on which an end-of-file mark
+can be overwritten.
+.It Fl t
+List contents of archive.
+If any files are named on the
+command line, only those files will be listed.
+The
+.Ar file
+arguments may be specified as glob patterns (see
+.Xr glob 7
+for more information), in which case
+.Nm
+will list all archive members that match each pattern.
+.It Fl u
+Alias for
+.Fl r .
+.It Fl x
+Extract files from archive.
+If any files are named on the
+command line, only those files will be extracted from the
+archive.
+The
+.Ar file
+arguments may be specified as glob patterns (see
+.Xr glob 7
+for more information), in which case
+.Nm
+will extract all archive members that match each pattern.
+.Pp
+If more than one copy of a file exists in the
+archive, later copies will overwrite earlier copies during
+extraction.
+The file mode and modification time are preserved
+if possible.
+The file mode is subject to modification by the
+.Xr umask 2 .
+.El
+.Pp
+In addition to the flags mentioned above, any of the following
+flags may be used:
+.Bl -tag -width Ds
+.It Fl b Ar blocking-factor
+Set blocking factor to use for the archive.
+.Nm
+uses 512-byte blocks.
+The default is 20, the maximum is 126.
+Archives with a blocking factor larger than 63
+violate the POSIX standard and will not be portable to all systems.
+.It Fl C Ar directory
+This is a positional argument which sets the working directory for the
+following files.
+When extracting, files will be extracted into
+the specified directory; when creating, the specified files will be matched
+from the directory.
+.It Fl e
+Stop after the first error.
+.It Fl f Ar archive
+Filename where the archive is stored.
+Defaults to
+.Pa /dev/rst0 .
+If set to hyphen
+.Pq Sq -
+standard output is used.
+See also the
+.Ev TAPE
+environment variable.
+.It Fl H
+Follow symlinks given on the command line only.
+.It Fl h
+Follow symbolic links as if they were normal files
+or directories.
+In extract mode this means that a directory entry in the archive
+will not overwrite an existing symbolic link, but rather what the
+link ultimately points to.
+.It Fl I Ar file
+This is a positional argument which reads the names of files to
+archive or extract from the given file, one per line.
+.It Fl j
+Compress archive using bzip2.
+The bzip2 utility must be installed separately.
+.It Fl L
+Synonym for the
+.Fl h
+option.
+.It Fl m
+Do not preserve modification time.
+.It Fl N
+Use only the numeric UID and GID values when creating or extracting an
+archive.
+.It Fl O
+Write old-style (non-POSIX) archives.
+.It Fl o
+Don't write directory information that the older (V7) style
+.Nm
+is unable to decode.
+This implies the
+.Fl O
+flag.
+.It Fl P
+Do not strip leading slashes
+.Pq Sq /
+from pathnames.
+The default is to strip leading slashes.
+.It Fl p
+Preserve user and group ID as well as file mode regardless of
+the current
+.Xr umask 2 .
+The setuid and setgid bits are only preserved if the user and group ID
+could be preserved.
+Only meaningful in conjunction with the
+.Fl x
+flag.
+.It Fl q
+Select the first archive member that matches each
+.Ar file
+operand.
+No more than one archive member is matched for each
+.Ar file .
+When members of type directory are matched, the file hierarchy rooted at that
+directory is also matched.
+.It Fl s Ar replstr
+Modify the archive member names according to the substitution expression
+.Ar replstr ,
+using the syntax of the
+.Xr ed 1
+utility regular expressions.
+.Ar file
+arguments may be given to restrict the list of archive members to those
+specified.
+.Pp
+The format of these regular expressions is
+.Pp
+.Dl /old/new/[gp]
+.Pp
+As in
+.Xr ed 1 ,
+.Va old
+is a basic regular expression (see
+.Xr re_format 7 )
+and
+.Va new
+can contain an ampersand
+.Pq Ql & ,
+.Ql \e Ns Em n
+(where
+.Em n
+is a digit) back-references,
+or subexpression matching.
+The
+.Va old
+string may also contain newline characters.
+Any non-null character can be used as a delimiter
+.Po
+.Ql /
+is shown here
+.Pc .
+Multiple
+.Fl s
+expressions can be specified.
+The expressions are applied in the order they are specified on the
+command line, terminating with the first successful substitution.
+.Pp
+The optional trailing
+.Cm g
+continues to apply the substitution expression to the pathname substring,
+which starts with the first character following the end of the last successful
+substitution.
+The first unsuccessful substitution stops the operation of the
+.Cm g
+option.
+The optional trailing
+.Cm p
+will cause the final result of a successful substitution to be written to
+standard error in the following format:
+.Pp
+.D1 Em original-pathname No >> Em new-pathname
+.Pp
+File or archive member names that substitute to the empty string
+are not selected and will be skipped.
+.It Fl v
+Verbose operation mode.
+If
+.Fl v
+is specified multiple times or if the
+.Fl t
+option is also specified,
+.Nm
+will use a long format for listing files, similar to
+.Xr ls 1
+.Fl l .
+.It Fl w
+Interactively rename files.
+This option causes
+.Nm
+to prompt the user for the filename to use when storing or
+extracting files in an archive.
+.It Fl X
+Do not cross mount points in the file system.
+.It Fl Z
+Compress archive using
+.Xr compress 1 .
+.It Fl z
+Compress archive using
+.Xr gzip 1 .
+.El
+.Pp
+The options
+.Op Fl 014578
+can be used to select one of the compiled-in backup devices,
+.Pa /dev/rstN .
+.Sh ENVIRONMENT
+.Bl -tag -width Ds
+.It Ev TMPDIR
+Path in which to store temporary files.
+.It Ev TAPE
+Default tape device to use instead of
+.Pa /dev/rst0 .
+If set to hyphen
+.Pq Sq -
+standard output is used.
+.El
+.Sh FILES
+.Bl -tag -width "/dev/rst0"
+.It Pa /dev/rst0
+default archive name
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It 0
+All files were processed successfully.
+.It 1
+An error occurred.
+.El
+.Sh EXAMPLES
+Create an archive on the default tape drive, containing the files named
+.Pa bonvole
+and
+.Pa sekve :
+.Pp
+.Dl $ tar c bonvole sekve
+.Pp
+Output a
+.Xr gzip 1
+compressed archive containing the files
+.Pa bonvole
+and
+.Pa sekve
+to a file called
+.Pa foriru.tar.gz :
+.Pp
+.Dl $ tar zcf foriru.tar.gz bonvole sekve
+.Pp
+Verbosely create an archive, called
+.Pa backup.tar.gz ,
+of all files matching the shell
+.Xr glob 7
+function
+.Pa *.c :
+.Pp
+.Dl $ tar zcvf backup.tar.gz *.c
+.Pp
+Verbosely list, but do not extract, all files ending in
+.Pa .jpeg
+from a compressed archive named
+.Pa backup.tar.gz .
+Note that the glob pattern has been quoted to avoid expansion by the shell:
+.Pp
+.Dl $ tar tvzf backup.tar.gz '*.jpeg'
+.Pp
+For more detailed examples, see
+.Xr pax 1 .
+.Sh DIAGNOSTICS
+Whenever
+.Nm
+cannot create a file or a link when extracting an archive or cannot
+find a file while writing an archive, or cannot preserve the user
+ID, group ID, file mode, or access and modification times when the
+.Fl p
+option is specified, a diagnostic message is written to standard
+error and a non-zero exit value will be returned, but processing
+will continue.
+In the case where
+.Nm
+cannot create a link to a file,
+.Nm
+will not create a second copy of the file.
+.Pp
+If the extraction of a file from an archive is prematurely terminated
+by a signal or error,
+.Nm
+may have only partially extracted the file the user wanted.
+Additionally, the file modes of extracted files and directories may
+have incorrect file bits, and the modification and access times may
+be wrong.
+.Pp
+If the creation of an archive is prematurely terminated by a signal
+or error,
+.Nm
+may have only partially created the archive, which may violate the
+specific archive format specification.
+.Sh SEE ALSO
+.Xr cpio 1 ,
+.Xr pax 1
+.Sh HISTORY
+A
+.Nm
+command first appeared in
+.At v7 .
+.Sh AUTHORS
+.An Keith Muller
+at the University of California, San Diego.
+.Sh CAVEATS
+The
+.Fl j
+and
+.Fl L
+flags are not portable to other versions of
+.Nm
+where they may have a different meaning.
diff --git a/bin/pax/tar.c b/bin/pax/tar.c
new file mode 100644
index 0000000..c62705b
--- /dev/null
+++ b/bin/pax/tar.c
@@ -0,0 +1,1284 @@
+/* $OpenBSD: tar.c,v 1.68 2019/06/24 03:33:09 deraadt Exp $ */
+/* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <errno.h>
+#include <grp.h>
+#include <limits.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#ifndef major
+#include <sys/sysmacros.h>
+#endif
+
+#include "pax.h"
+#include "extern.h"
+#include "tar.h"
+
+/*
+ * Routines for reading, writing and header identify of various versions of tar
+ */
+
+static size_t expandname(char *, size_t, char **, const char *, size_t);
+static u_long tar_chksm(char *, int);
+static char *name_split(char *, int);
+static int ul_oct(u_long, char *, int, int);
+static int ull_oct(unsigned long long, char *, int, int);
+#ifndef SMALL
+static int rd_xheader(ARCHD *arcn, int, off_t);
+#endif
+
+static uid_t uid_nobody;
+static uid_t uid_warn;
+static gid_t gid_nobody;
+static gid_t gid_warn;
+
+/*
+ * Routines common to all versions of tar
+ */
+
+int tar_nodir; /* do not write dirs under old tar */
+char *gnu_name_string; /* GNU ././@LongLink hackery name */
+char *gnu_link_string; /* GNU ././@LongLink hackery link */
+
+/*
+ * tar_endwr()
+ * add the tar trailer of two null blocks
+ * Return:
+ * 0 if ok, -1 otherwise (what wr_skip returns)
+ */
+
+int
+tar_endwr(void)
+{
+ return wr_skip(NULLCNT * BLKMULT);
+}
+
+/*
+ * tar_endrd()
+ * no cleanup needed here, just return size of trailer (for append)
+ * Return:
+ * size of trailer (2 * BLKMULT)
+ */
+
+off_t
+tar_endrd(void)
+{
+ return NULLCNT * BLKMULT;
+}
+
+/*
+ * tar_trail()
+ * Called to determine if a header block is a valid trailer. We are passed
+ * the block, the in_sync flag (which tells us we are in resync mode;
+ * looking for a valid header), and cnt (which starts at zero) which is
+ * used to count the number of empty blocks we have seen so far.
+ * Return:
+ * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
+ * could never contain a header.
+ */
+
+int
+tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
+{
+ int i;
+
+ /*
+ * look for all zero, trailer is two consecutive blocks of zero
+ */
+ for (i = 0; i < BLKMULT; ++i) {
+ if (buf[i] != '\0')
+ break;
+ }
+
+ /*
+ * if not all zero it is not a trailer, but MIGHT be a header.
+ */
+ if (i != BLKMULT)
+ return(-1);
+
+ /*
+ * When given a zero block, we must be careful!
+ * If we are not in resync mode, check for the trailer. Have to watch
+ * out that we do not mis-identify file data as the trailer, so we do
+ * NOT try to id a trailer during resync mode. During resync mode we
+ * might as well throw this block out since a valid header can NEVER be
+ * a block of all 0 (we must have a valid file name).
+ */
+ if (!in_resync && (++*cnt >= NULLCNT))
+ return(0);
+ return(1);
+}
+
+/*
+ * ul_oct()
+ * convert an unsigned long to an octal string. many oddball field
+ * termination characters are used by the various versions of tar in the
+ * different fields. term selects which kind to use. str is '0' padded
+ * at the front to len. we are unable to use only one format as many old
+ * tar readers are very cranky about this.
+ * Return:
+ * 0 if the number fit into the string, -1 otherwise
+ */
+
+static int
+ul_oct(u_long val, char *str, int len, int term)
+{
+ char *pt;
+
+ /*
+ * term selects the appropriate character(s) for the end of the string
+ */
+ pt = str + len - 1;
+ switch (term) {
+ case 3:
+ *pt-- = '\0';
+ break;
+ case 2:
+ *pt-- = ' ';
+ *pt-- = '\0';
+ break;
+ case 1:
+ *pt-- = ' ';
+ break;
+ case 0:
+ default:
+ *pt-- = '\0';
+ *pt-- = ' ';
+ break;
+ }
+
+ /*
+ * convert and blank pad if there is space
+ */
+ while (pt >= str) {
+ *pt-- = '0' + (char)(val & 0x7);
+ val >>= 3;
+ if (val == 0)
+ break;
+ }
+
+ while (pt >= str)
+ *pt-- = '0';
+ if (val != 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * ull_oct()
+ * Convert an unsigned long long to an octal string. One of many oddball
+ * field termination characters are used by the various versions of tar
+ * in the different fields. term selects which kind to use. str is
+ * '0' padded at the front to len. We are unable to use only one format
+ * as many old tar readers are very cranky about this.
+ * Return:
+ * 0 if the number fit into the string, -1 otherwise
+ */
+
+static int
+ull_oct(unsigned long long val, char *str, int len, int term)
+{
+ char *pt;
+
+ /*
+ * term selects the appropriate character(s) for the end of the string
+ */
+ pt = str + len - 1;
+ switch (term) {
+ case 3:
+ *pt-- = '\0';
+ break;
+ case 2:
+ *pt-- = ' ';
+ *pt-- = '\0';
+ break;
+ case 1:
+ *pt-- = ' ';
+ break;
+ case 0:
+ default:
+ *pt-- = '\0';
+ *pt-- = ' ';
+ break;
+ }
+
+ /*
+ * convert and blank pad if there is space
+ */
+ while (pt >= str) {
+ *pt-- = '0' + (char)(val & 0x7);
+ val >>= 3;
+ if (val == 0)
+ break;
+ }
+
+ while (pt >= str)
+ *pt-- = '0';
+ if (val != 0)
+ return(-1);
+ return(0);
+}
+
+/*
+ * tar_chksm()
+ * calculate the checksum for a tar block counting the checksum field as
+ * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
+ * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
+ * pad headers with 0.
+ * Return:
+ * unsigned long checksum
+ */
+
+static u_long
+tar_chksm(char *blk, int len)
+{
+ char *stop;
+ char *pt;
+ u_long chksm = BLNKSUM; /* initial value is checksum field sum */
+
+ /*
+ * add the part of the block before the checksum field
+ */
+ pt = blk;
+ stop = blk + CHK_OFFSET;
+ while (pt < stop)
+ chksm += (u_long)(*pt++ & 0xff);
+ /*
+ * move past the checksum field and keep going, spec counts the
+ * checksum field as the sum of 8 blanks (which is pre-computed as
+ * BLNKSUM).
+ * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
+ * starts, no point in summing zero's)
+ */
+ pt += CHK_LEN;
+ stop = blk + len;
+ while (pt < stop)
+ chksm += (u_long)(*pt++ & 0xff);
+ return(chksm);
+}
+
+/*
+ * Routines for old BSD style tar (also made portable to sysV tar)
+ */
+
+/*
+ * tar_id()
+ * determine if a block given to us is a valid tar header (and not a USTAR
+ * header). We have to be on the lookout for those pesky blocks of all
+ * zero's.
+ * Return:
+ * 0 if a tar header, -1 otherwise
+ */
+
+int
+tar_id(char *blk, int size)
+{
+ HD_TAR *hd;
+ HD_USTAR *uhd;
+
+ if (size < BLKMULT)
+ return(-1);
+ hd = (HD_TAR *)blk;
+ uhd = (HD_USTAR *)blk;
+
+ /*
+ * check for block of zero's first, a simple and fast test, then make
+ * sure this is not a ustar header by looking for the ustar magic
+ * cookie. We should use TMAGLEN, but some USTAR archive programs are
+ * wrong and create archives missing the \0. Last we check the
+ * checksum. If this is ok we have to assume it is a valid header.
+ */
+ if (hd->name[0] == '\0')
+ return(-1);
+ if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
+ return(-1);
+ if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
+ return(-1);
+ force_one_volume = 1;
+ return(0);
+}
+
+/*
+ * tar_opt()
+ * handle tar format specific -o options
+ * Return:
+ * 0 if ok -1 otherwise
+ */
+
+int
+tar_opt(void)
+{
+ OPLIST *opt;
+
+ while ((opt = opt_next()) != NULL) {
+ if (strcmp(opt->name, TAR_OPTION) ||
+ strcmp(opt->value, TAR_NODIR)) {
+ paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
+ opt->name, opt->value);
+ paxwarn(1,"%s=%s is the only supported tar format option",
+ TAR_OPTION, TAR_NODIR);
+ return(-1);
+ }
+
+ /*
+ * we only support one option, and only when writing
+ */
+ if ((act != APPND) && (act != ARCHIVE)) {
+ paxwarn(1, "%s=%s is only supported when writing.",
+ opt->name, opt->value);
+ return(-1);
+ }
+ tar_nodir = 1;
+ }
+ return(0);
+}
+
+
+/*
+ * tar_rd()
+ * extract the values out of block already determined to be a tar header.
+ * store the values in the ARCHD parameter.
+ * Return:
+ * 0
+ */
+
+int
+tar_rd(ARCHD *arcn, char *buf)
+{
+ HD_TAR *hd;
+ unsigned long long val;
+ char *pt;
+
+ /*
+ * we only get proper sized buffers passed to us
+ */
+ if (tar_id(buf, BLKMULT) < 0)
+ return(-1);
+ memset(arcn, 0, sizeof(*arcn));
+ arcn->org_name = arcn->name;
+ arcn->sb.st_nlink = 1;
+
+ /*
+ * copy out the name and values in the stat buffer
+ */
+ hd = (HD_TAR *)buf;
+ if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
+ arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
+ &gnu_name_string, hd->name, sizeof(hd->name));
+ arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
+ &gnu_link_string, hd->linkname, sizeof(hd->linkname));
+ }
+ arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
+ 0xfff);
+ arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
+ arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
+ arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT);
+ val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT);
+ if (val > MAX_TIME_T)
+ arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */
+ else
+ arcn->sb.st_mtime = val;
+ arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
+
+ /*
+ * have to look at the last character, it may be a '/' and that is used
+ * to encode this as a directory
+ */
+ pt = &(arcn->name[arcn->nlen - 1]);
+ arcn->pad = 0;
+ arcn->skip = 0;
+ switch (hd->linkflag) {
+ case SYMTYPE:
+ /*
+ * symbolic link, need to get the link name and set the type in
+ * the st_mode so -v printing will look correct.
+ */
+ arcn->type = PAX_SLK;
+ arcn->sb.st_mode |= S_IFLNK;
+ break;
+ case LNKTYPE:
+ /*
+ * hard link, need to get the link name, set the type in the
+ * st_mode and st_nlink so -v printing will look better.
+ */
+ arcn->type = PAX_HLK;
+ arcn->sb.st_nlink = 2;
+
+ /*
+ * no idea of what type this thing really points at, but
+ * we set something for printing only.
+ */
+ arcn->sb.st_mode |= S_IFREG;
+ break;
+ case LONGLINKTYPE:
+ case LONGNAMETYPE:
+ /*
+ * GNU long link/file; we tag these here and let the
+ * pax internals deal with it -- too ugly otherwise.
+ */
+ arcn->type =
+ hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ arcn->skip = arcn->sb.st_size;
+ break;
+ case DIRTYPE:
+ /*
+ * It is a directory, set the mode for -v printing
+ */
+ arcn->type = PAX_DIR;
+ arcn->sb.st_mode |= S_IFDIR;
+ arcn->sb.st_nlink = 2;
+ break;
+ case AREGTYPE:
+ case REGTYPE:
+ default:
+ /*
+ * If we have a trailing / this is a directory and NOT a file.
+ */
+ arcn->ln_name[0] = '\0';
+ arcn->ln_nlen = 0;
+ if (*pt == '/') {
+ /*
+ * it is a directory, set the mode for -v printing
+ */
+ arcn->type = PAX_DIR;
+ arcn->sb.st_mode |= S_IFDIR;
+ arcn->sb.st_nlink = 2;
+ } else {
+ /*
+ * have a file that will be followed by data. Set the
+ * skip value to the size field and calculate the size
+ * of the padding.
+ */
+ arcn->type = PAX_REG;
+ arcn->sb.st_mode |= S_IFREG;
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ arcn->skip = arcn->sb.st_size;
+ }
+ break;
+ }
+
+ /*
+ * strip off any trailing slash.
+ */
+ if (*pt == '/') {
+ *pt = '\0';
+ --arcn->nlen;
+ }
+ return(0);
+}
+
+/*
+ * tar_wr()
+ * write a tar header for the file specified in the ARCHD to the archive.
+ * Have to check for file types that cannot be stored and file names that
+ * are too long. Be careful of the term (last arg) to ul_oct, each field
+ * of tar has it own spec for the termination character(s).
+ * ASSUMED: space after header in header block is zero filled
+ * Return:
+ * 0 if file has data to be written after the header, 1 if file has NO
+ * data to write after the header, -1 if archive write failed
+ */
+
+int
+tar_wr(ARCHD *arcn)
+{
+ HD_TAR *hd;
+ int len;
+ char hdblk[sizeof(HD_TAR)];
+
+ /*
+ * check for those file system types which tar cannot store
+ */
+ switch (arcn->type) {
+ case PAX_DIR:
+ /*
+ * user asked that dirs not be written to the archive
+ */
+ if (tar_nodir)
+ return(1);
+ break;
+ case PAX_CHR:
+ paxwarn(1, "Tar cannot archive a character device %s",
+ arcn->org_name);
+ return(1);
+ case PAX_BLK:
+ paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
+ return(1);
+ case PAX_SCK:
+ paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
+ return(1);
+ case PAX_FIF:
+ paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
+ return(1);
+ case PAX_SLK:
+ case PAX_HLK:
+ case PAX_HRG:
+ if ((size_t)arcn->ln_nlen > sizeof(hd->linkname)) {
+ paxwarn(1, "Link name too long for tar %s",
+ arcn->ln_name);
+ return(1);
+ }
+ break;
+ case PAX_REG:
+ case PAX_CTG:
+ default:
+ break;
+ }
+
+ /*
+ * check file name len, remember extra char for dirs (the / at the end)
+ */
+ len = arcn->nlen;
+ if (arcn->type == PAX_DIR)
+ ++len;
+ if ((size_t)len > sizeof(hd->name)) {
+ paxwarn(1, "File name too long for tar %s", arcn->name);
+ return(1);
+ }
+
+ /*
+ * Copy the data out of the ARCHD into the tar header based on the type
+ * of the file. Remember, many tar readers want all fields to be
+ * padded with zero so we zero the header first. We then set the
+ * linkflag field (type), the linkname, the size, and set the padding
+ * (if any) to be added after the file data (0 for all other types,
+ * as they only have a header).
+ */
+ memset(hdblk, 0, sizeof(hdblk));
+ hd = (HD_TAR *)hdblk;
+ fieldcpy(hd->name, sizeof(hd->name), arcn->name, sizeof(arcn->name));
+ arcn->pad = 0;
+
+ if (arcn->type == PAX_DIR) {
+ /*
+ * directories are the same as files, except have a filename
+ * that ends with a /, we add the slash here. No data follows
+ * dirs, so no pad.
+ */
+ hd->linkflag = AREGTYPE;
+ hd->name[len-1] = '/';
+ if (ul_oct(0, hd->size, sizeof(hd->size), 1))
+ goto out;
+ } else if (arcn->type == PAX_SLK) {
+ /*
+ * no data follows this file, so no pad
+ */
+ hd->linkflag = SYMTYPE;
+ fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
+ sizeof(arcn->ln_name));
+ if (ul_oct(0, hd->size, sizeof(hd->size), 1))
+ goto out;
+ } else if (PAX_IS_HARDLINK(arcn->type)) {
+ /*
+ * no data follows this file, so no pad
+ */
+ hd->linkflag = LNKTYPE;
+ fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
+ sizeof(arcn->ln_name));
+ if (ul_oct(0, hd->size, sizeof(hd->size), 1))
+ goto out;
+ } else {
+ /*
+ * data follows this file, so set the pad
+ */
+ hd->linkflag = AREGTYPE;
+ if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
+ paxwarn(1, "File is too large for tar %s",
+ arcn->org_name);
+ return(1);
+ }
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ }
+
+ /*
+ * copy those fields that are independent of the type
+ */
+ if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
+ ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime,
+ sizeof(hd->mtime), 1) ||
+ ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
+ ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0))
+ goto out;
+
+ /*
+ * calculate and add the checksum, then write the header. A return of
+ * 0 tells the caller to now write the file data, 1 says no data needs
+ * to be written
+ */
+ if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
+ sizeof(hd->chksum), 3))
+ goto out;
+ if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
+ return(-1);
+ if (wr_skip(BLKMULT - sizeof(HD_TAR)) < 0)
+ return(-1);
+ if (PAX_IS_REG(arcn->type))
+ return(0);
+ return(1);
+
+ out:
+ /*
+ * header field is out of range
+ */
+ paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
+ return(1);
+}
+
+/*
+ * Routines for POSIX ustar
+ */
+
+/*
+ * ustar_id()
+ * determine if a block given to us is a valid ustar header. We have to
+ * be on the lookout for those pesky blocks of all zero's
+ * Return:
+ * 0 if a ustar header, -1 otherwise
+ */
+
+int
+ustar_id(char *blk, int size)
+{
+ HD_USTAR *hd;
+
+ if (size < BLKMULT)
+ return(-1);
+ hd = (HD_USTAR *)blk;
+
+ /*
+ * check for block of zero's first, a simple and fast test then check
+ * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
+ * programs are fouled up and create archives missing the \0. Last we
+ * check the checksum. If ok we have to assume it is a valid header.
+ */
+ if (hd->prefix[0] == '\0' && hd->name[0] == '\0')
+ return(-1);
+ if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
+ return(-1);
+ if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
+ return(-1);
+ return(0);
+}
+
+/*
+ * ustar_rd()
+ * extract the values out of block already determined to be a ustar header.
+ * store the values in the ARCHD parameter.
+ * Return:
+ * 0
+ */
+
+int
+ustar_rd(ARCHD *arcn, char *buf)
+{
+ HD_USTAR *hd = (HD_USTAR *)buf;
+ char *dest;
+ int cnt = 0;
+ dev_t devmajor;
+ dev_t devminor;
+ unsigned long long val;
+
+ /*
+ * we only get proper sized buffers
+ */
+ if (ustar_id(buf, BLKMULT) < 0)
+ return(-1);
+
+#ifndef SMALL
+reset:
+#endif
+ memset(arcn, 0, sizeof(*arcn));
+ arcn->org_name = arcn->name;
+ arcn->sb.st_nlink = 1;
+
+#ifndef SMALL
+ /* Process Extended headers. */
+ if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) {
+ if (rd_xheader(arcn, hd->typeflag == GHDRTYPE,
+ (off_t)asc_ul(hd->size, sizeof(hd->size), OCT)) < 0)
+ return (-1);
+
+ /* Update and check the ustar header. */
+ if (rd_wrbuf(buf, BLKMULT) != BLKMULT)
+ return (-1);
+ if (ustar_id(buf, BLKMULT) < 0)
+ return(-1);
+
+ /* if the next block is another extension, reset the values */
+ if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE)
+ goto reset;
+ }
+#endif
+
+ if (!arcn->nlen) {
+ /*
+ * See if the filename is split into two parts. if, so join
+ * the parts. We copy the prefix first and add a / between
+ * the prefix and name.
+ */
+ dest = arcn->name;
+ if (*(hd->prefix) != '\0') {
+ cnt = fieldcpy(dest, sizeof(arcn->name) - 1,
+ hd->prefix, sizeof(hd->prefix));
+ dest += cnt;
+ *dest++ = '/';
+ cnt++;
+ } else
+ cnt = 0;
+
+ if (hd->typeflag != LONGLINKTYPE &&
+ hd->typeflag != LONGNAMETYPE) {
+ arcn->nlen = cnt + expandname(dest,
+ sizeof(arcn->name) - cnt, &gnu_name_string,
+ hd->name, sizeof(hd->name));
+ }
+ }
+
+ if (!arcn->ln_nlen &&
+ hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
+ arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
+ &gnu_link_string, hd->linkname, sizeof(hd->linkname));
+ }
+
+ /*
+ * follow the spec to the letter. we should only have mode bits, strip
+ * off all other crud we may be passed.
+ */
+ arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
+ 0xfff);
+ arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT);
+ val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT);
+ if (val > MAX_TIME_T)
+ arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */
+ else
+ arcn->sb.st_mtime = val;
+ arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
+
+ /*
+ * If we can find the ascii names for gname and uname in the password
+ * and group files we will use the uid's and gid they bind. Otherwise
+ * we use the uid and gid values stored in the header. (This is what
+ * the posix spec wants).
+ */
+ hd->gname[sizeof(hd->gname) - 1] = '\0';
+ if (Nflag || gid_from_group(hd->gname, &(arcn->sb.st_gid)) == -1)
+ arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
+ hd->uname[sizeof(hd->uname) - 1] = '\0';
+ if (Nflag || uid_from_user(hd->uname, &(arcn->sb.st_uid)) == -1)
+ arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
+
+ /*
+ * set the defaults, these may be changed depending on the file type
+ */
+ arcn->pad = 0;
+ arcn->skip = 0;
+ arcn->sb.st_rdev = (dev_t)0;
+
+ /*
+ * set the mode and PAX type according to the typeflag in the header
+ */
+ switch (hd->typeflag) {
+ case FIFOTYPE:
+ arcn->type = PAX_FIF;
+ arcn->sb.st_mode |= S_IFIFO;
+ break;
+ case DIRTYPE:
+ arcn->type = PAX_DIR;
+ arcn->sb.st_mode |= S_IFDIR;
+ arcn->sb.st_nlink = 2;
+
+ /*
+ * Some programs that create ustar archives append a '/'
+ * to the pathname for directories. This clearly violates
+ * ustar specs, but we will silently strip it off anyway.
+ */
+ if (arcn->name[arcn->nlen - 1] == '/')
+ arcn->name[--arcn->nlen] = '\0';
+ break;
+ case BLKTYPE:
+ case CHRTYPE:
+ /*
+ * this type requires the rdev field to be set.
+ */
+ if (hd->typeflag == BLKTYPE) {
+ arcn->type = PAX_BLK;
+ arcn->sb.st_mode |= S_IFBLK;
+ } else {
+ arcn->type = PAX_CHR;
+ arcn->sb.st_mode |= S_IFCHR;
+ }
+ devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
+ devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
+ arcn->sb.st_rdev = TODEV(devmajor, devminor);
+ break;
+ case SYMTYPE:
+ case LNKTYPE:
+ if (hd->typeflag == SYMTYPE) {
+ arcn->type = PAX_SLK;
+ arcn->sb.st_mode |= S_IFLNK;
+ } else {
+ arcn->type = PAX_HLK;
+ /*
+ * so printing looks better
+ */
+ arcn->sb.st_mode |= S_IFREG;
+ arcn->sb.st_nlink = 2;
+ }
+ break;
+ case LONGLINKTYPE:
+ case LONGNAMETYPE:
+ /*
+ * GNU long link/file; we tag these here and let the
+ * pax internals deal with it -- too ugly otherwise.
+ */
+ arcn->type =
+ hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ arcn->skip = arcn->sb.st_size;
+ break;
+ case CONTTYPE:
+ case AREGTYPE:
+ case REGTYPE:
+ default:
+ /*
+ * these types have file data that follows. Set the skip and
+ * pad fields.
+ */
+ arcn->type = PAX_REG;
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ arcn->skip = arcn->sb.st_size;
+ arcn->sb.st_mode |= S_IFREG;
+ break;
+ }
+ return(0);
+}
+
+/*
+ * ustar_wr()
+ * write a ustar header for the file specified in the ARCHD to the archive
+ * Have to check for file types that cannot be stored and file names that
+ * are too long. Be careful of the term (last arg) to ul_oct, we only use
+ * '\0' for the termination character (this is different than picky tar)
+ * ASSUMED: space after header in header block is zero filled
+ * Return:
+ * 0 if file has data to be written after the header, 1 if file has NO
+ * data to write after the header, -1 if archive write failed
+ */
+
+int
+ustar_wr(ARCHD *arcn)
+{
+ HD_USTAR *hd;
+ const char *name;
+ char *pt, hdblk[sizeof(HD_USTAR)];
+
+ /*
+ * check for those file system types ustar cannot store
+ */
+ if (arcn->type == PAX_SCK) {
+ paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
+ return(1);
+ }
+
+ /*
+ * user asked that dirs not be written to the archive
+ */
+ if (arcn->type == PAX_DIR && tar_nodir)
+ return (1);
+
+ /*
+ * check the length of the linkname
+ */
+ if (PAX_IS_LINK(arcn->type) &&
+ ((size_t)arcn->ln_nlen > sizeof(hd->linkname))) {
+ paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
+ return(1);
+ }
+
+ /*
+ * split the path name into prefix and name fields (if needed). if
+ * pt != arcn->name, the name has to be split
+ */
+ if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
+ paxwarn(1, "File name too long for ustar %s", arcn->name);
+ return(1);
+ }
+
+ /*
+ * zero out the header so we don't have to worry about zero fill below
+ */
+ memset(hdblk, 0, sizeof(hdblk));
+ hd = (HD_USTAR *)hdblk;
+ arcn->pad = 0;
+
+ /*
+ * split the name, or zero out the prefix
+ */
+ if (pt != arcn->name) {
+ /*
+ * name was split, pt points at the / where the split is to
+ * occur, we remove the / and copy the first part to the prefix
+ */
+ *pt = '\0';
+ fieldcpy(hd->prefix, sizeof(hd->prefix), arcn->name,
+ sizeof(arcn->name));
+ *pt++ = '/';
+ }
+
+ /*
+ * copy the name part. this may be the whole path or the part after
+ * the prefix
+ */
+ fieldcpy(hd->name, sizeof(hd->name), pt,
+ sizeof(arcn->name) - (pt - arcn->name));
+
+ /*
+ * set the fields in the header that are type dependent
+ */
+ switch (arcn->type) {
+ case PAX_DIR:
+ hd->typeflag = DIRTYPE;
+ if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+ goto out;
+ break;
+ case PAX_CHR:
+ case PAX_BLK:
+ if (arcn->type == PAX_CHR)
+ hd->typeflag = CHRTYPE;
+ else
+ hd->typeflag = BLKTYPE;
+ if (ul_oct(MAJOR(arcn->sb.st_rdev), hd->devmajor,
+ sizeof(hd->devmajor), 3) ||
+ ul_oct(MINOR(arcn->sb.st_rdev), hd->devminor,
+ sizeof(hd->devminor), 3) ||
+ ul_oct(0, hd->size, sizeof(hd->size), 3))
+ goto out;
+ break;
+ case PAX_FIF:
+ hd->typeflag = FIFOTYPE;
+ if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+ goto out;
+ break;
+ case PAX_SLK:
+ case PAX_HLK:
+ case PAX_HRG:
+ if (arcn->type == PAX_SLK)
+ hd->typeflag = SYMTYPE;
+ else
+ hd->typeflag = LNKTYPE;
+ fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name,
+ sizeof(arcn->ln_name));
+ if (ul_oct(0, hd->size, sizeof(hd->size), 3))
+ goto out;
+ break;
+ case PAX_REG:
+ case PAX_CTG:
+ default:
+ /*
+ * file data with this type, set the padding
+ */
+ if (arcn->type == PAX_CTG)
+ hd->typeflag = CONTTYPE;
+ else
+ hd->typeflag = REGTYPE;
+ arcn->pad = TAR_PAD(arcn->sb.st_size);
+ if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) {
+ paxwarn(1, "File is too long for ustar %s",
+ arcn->org_name);
+ return(1);
+ }
+ break;
+ }
+
+ memcpy(hd->magic, TMAGIC, TMAGLEN);
+ memcpy(hd->version, TVERSION, TVERSLEN);
+
+ /*
+ * set the remaining fields. Some versions want all 16 bits of mode
+ * we better humor them (they really do not meet spec though)....
+ */
+ if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) {
+ if (uid_nobody == 0) {
+ if (uid_from_user("nobody", &uid_nobody) == -1)
+ goto out;
+ }
+ if (uid_warn != arcn->sb.st_uid) {
+ uid_warn = arcn->sb.st_uid;
+ paxwarn(1,
+ "Ustar header field is too small for uid %lu, "
+ "using nobody", (u_long)arcn->sb.st_uid);
+ }
+ if (ul_oct(uid_nobody, hd->uid, sizeof(hd->uid), 3))
+ goto out;
+ }
+ if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) {
+ if (gid_nobody == 0) {
+ if (gid_from_group("nobody", &gid_nobody) == -1)
+ goto out;
+ }
+ if (gid_warn != arcn->sb.st_gid) {
+ gid_warn = arcn->sb.st_gid;
+ paxwarn(1,
+ "Ustar header field is too small for gid %lu, "
+ "using nobody", (u_long)arcn->sb.st_gid);
+ }
+ if (ul_oct(gid_nobody, hd->gid, sizeof(hd->gid), 3))
+ goto out;
+ }
+ if (ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime,
+ sizeof(hd->mtime), 3) ||
+ ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3))
+ goto out;
+ if (!Nflag) {
+ if ((name = user_from_uid(arcn->sb.st_uid, 1)) != NULL)
+ strncpy(hd->uname, name, sizeof(hd->uname));
+ if ((name = group_from_gid(arcn->sb.st_gid, 1)) != NULL)
+ strncpy(hd->gname, name, sizeof(hd->gname));
+ }
+
+ /*
+ * calculate and store the checksum write the header to the archive
+ * return 0 tells the caller to now write the file data, 1 says no data
+ * needs to be written
+ */
+ if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
+ sizeof(hd->chksum), 3))
+ goto out;
+ if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
+ return(-1);
+ if (wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0)
+ return(-1);
+ if (PAX_IS_REG(arcn->type))
+ return(0);
+ return(1);
+
+ out:
+ /*
+ * header field is out of range
+ */
+ paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
+ return(1);
+}
+
+/*
+ * name_split()
+ * see if the name has to be split for storage in a ustar header. We try
+ * to fit the entire name in the name field without splitting if we can.
+ * The split point is always at a /
+ * Return
+ * character pointer to split point (always the / that is to be removed
+ * if the split is not needed, the points is set to the start of the file
+ * name (it would violate the spec to split there). A NULL is returned if
+ * the file name is too long
+ */
+
+static char *
+name_split(char *name, int len)
+{
+ char *start;
+
+ /*
+ * check to see if the file name is small enough to fit in the name
+ * field. if so just return a pointer to the name.
+ * The strings can fill the complete name and prefix fields
+ * without a NUL terminator.
+ */
+ if (len <= TNMSZ)
+ return(name);
+ if (len > (TPFSZ + TNMSZ + 1))
+ return(NULL);
+
+ /*
+ * we start looking at the biggest sized piece that fits in the name
+ * field. We walk forward looking for a slash to split at. The idea is
+ * to find the biggest piece to fit in the name field (or the smallest
+ * prefix we can find) (the -1 is correct the biggest piece would
+ * include the slash between the two parts that gets thrown away)
+ */
+ start = name + len - TNMSZ - 1;
+
+ /*
+ * the prefix may not be empty, so skip the first character when
+ * trying to split a path of exactly TNMSZ+1 characters.
+ * NOTE: This means the ustar format can't store /str if
+ * str contains no slashes and the length of str == TNMSZ
+ */
+ if (start == name)
+ ++start;
+
+ while ((*start != '\0') && (*start != '/'))
+ ++start;
+
+ /*
+ * if we hit the end of the string, this name cannot be split, so we
+ * cannot store this file.
+ */
+ if (*start == '\0')
+ return(NULL);
+
+ /*
+ * the split point isn't valid if it results in a prefix
+ * longer than TPFSZ
+ */
+ if ((start - name) > TPFSZ)
+ return(NULL);
+
+ /*
+ * ok have a split point, return it to the caller
+ */
+ return(start);
+}
+
+static size_t
+expandname(char *buf, size_t len, char **gnu_name, const char *name,
+ size_t limit)
+{
+ size_t nlen;
+
+ if (*gnu_name) {
+ /* *gnu_name is NUL terminated */
+ if ((nlen = strlcpy(buf, *gnu_name, len)) >= len)
+ nlen = len - 1;
+ free(*gnu_name);
+ *gnu_name = NULL;
+ } else
+ nlen = fieldcpy(buf, len, name, limit);
+ return(nlen);
+}
+
+#ifndef SMALL
+
+/* shortest possible extended record: "5 a=\n" */
+#define MINXHDRSZ 5
+
+/* longest record we'll accept */
+#define MAXXHDRSZ BLKMULT
+
+static int
+rd_xheader(ARCHD *arcn, int global, off_t size)
+{
+ char buf[MAXXHDRSZ];
+ long len;
+ char *delim, *keyword;
+ char *nextp, *p, *end;
+ int pad, ret = 0;
+
+ /* before we alter size, make note of how much we have to skip */
+ pad = TAR_PAD((unsigned)size);
+
+ p = end = buf;
+ while (size > 0 || p < end) {
+ if (size > 0) {
+ int rdlen;
+
+ /* shift stuff down */
+ if (p > buf) {
+ memmove(buf, p, end - p);
+ end -= p - buf;
+ p = buf;
+ }
+
+ /* fill starting at end */
+ rdlen = MINIMUM(size, (buf + sizeof buf) - end);
+ if (rd_wrbuf(end, rdlen) != rdlen) {
+ ret = -1;
+ break;
+ }
+ size -= rdlen;
+ end += rdlen;
+ }
+
+ /* [p, end) is good */
+ if (memchr(p, ' ', end - p) == NULL ||
+ !isdigit((unsigned char)*p)) {
+ paxwarn(1, "Invalid extended header record");
+ ret = -1;
+ break;
+ }
+ errno = 0;
+ len = strtol(p, &delim, 10);
+ if (*delim != ' ' || (errno == ERANGE && len == LONG_MAX) ||
+ len < MINXHDRSZ) {
+ paxwarn(1, "Invalid extended header record length");
+ ret = -1;
+ break;
+ }
+ if (len > end - p) {
+ paxwarn(1, "Extended header record length %lu is "
+ "out of range", len);
+ /* if we can just toss this record, do so */
+ len -= end - p;
+ if (len <= size && rd_skip(len) == 0) {
+ size -= len;
+ p = end = buf;
+ continue;
+ }
+ ret = -1;
+ break;
+ }
+ nextp = p + len;
+ keyword = p = delim + 1;
+ p = memchr(p, '=', len);
+ if (!p || nextp[-1] != '\n') {
+ paxwarn(1, "Malformed extended header record");
+ ret = -1;
+ break;
+ }
+ *p++ = nextp[-1] = '\0';
+ if (!global) {
+ if (!strcmp(keyword, "path")) {
+ arcn->nlen = strlcpy(arcn->name, p,
+ sizeof(arcn->name));
+ } else if (!strcmp(keyword, "linkpath")) {
+ arcn->ln_nlen = strlcpy(arcn->ln_name, p,
+ sizeof(arcn->ln_name));
+ }
+ }
+ p = nextp;
+ }
+
+ if (rd_skip(size + pad) < 0)
+ return (-1);
+ return (ret);
+}
+#endif
diff --git a/bin/pax/tar.h b/bin/pax/tar.h
new file mode 100644
index 0000000..318d099
--- /dev/null
+++ b/bin/pax/tar.h
@@ -0,0 +1,159 @@
+/* $OpenBSD: tar.h,v 1.9 2014/01/08 06:43:34 deraadt Exp $ */
+/* $NetBSD: tar.h,v 1.3 1995/03/21 09:07:51 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tar.h 8.2 (Berkeley) 4/18/94
+ */
+
+/*
+ * defines and data structures common to all tar formats
+ */
+#define CHK_LEN 8 /* length of checksum field */
+#define TNMSZ 100 /* size of name field */
+#ifdef _PAX_
+#define NULLCNT 2 /* number of null blocks in trailer */
+#define CHK_OFFSET 148 /* start of chksum field */
+#define BLNKSUM 256L /* sum of checksum field using ' ' */
+#endif /* _PAX_ */
+
+/*
+ * Values used in typeflag field in all tar formats
+ * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers)
+ */
+#define REGTYPE '0' /* Regular File */
+#define AREGTYPE '\0' /* Regular File */
+#define LNKTYPE '1' /* Link */
+#define SYMTYPE '2' /* Symlink */
+#define CHRTYPE '3' /* Character Special File */
+#define BLKTYPE '4' /* Block Special File */
+#define DIRTYPE '5' /* Directory */
+#define FIFOTYPE '6' /* FIFO */
+#define CONTTYPE '7' /* high perf file */
+
+/*
+ * Extended header - POSIX.1-2001
+ */
+#define XHDRTYPE 'x' /* Extended header */
+#define GHDRTYPE 'g' /* Global header*/
+
+/*
+ * GNU tar compatibility;
+ */
+#define LONGLINKTYPE 'K' /* Long Symlink */
+#define LONGNAMETYPE 'L' /* Long File */
+
+/*
+ * Mode field encoding of the different file types - values in octal
+ */
+#define TSUID 04000 /* Set UID on execution */
+#define TSGID 02000 /* Set GID on execution */
+#define TSVTX 01000 /* Reserved */
+#define TUREAD 00400 /* Read by owner */
+#define TUWRITE 00200 /* Write by owner */
+#define TUEXEC 00100 /* Execute/Search by owner */
+#define TGREAD 00040 /* Read by group */
+#define TGWRITE 00020 /* Write by group */
+#define TGEXEC 00010 /* Execute/Search by group */
+#define TOREAD 00004 /* Read by other */
+#define TOWRITE 00002 /* Write by other */
+#define TOEXEC 00001 /* Execute/Search by other */
+
+#ifdef _PAX_
+/*
+ * Pad with a bit mask, much faster than doing a mod but only works on powers
+ * of 2. Macro below is for block of 512 bytes.
+ */
+#define TAR_PAD(x) ((512 - ((x) & 511)) & 511)
+#endif /* _PAX_ */
+
+/*
+ * structure of an old tar header as it appeared in BSD releases
+ */
+typedef struct {
+ char name[TNMSZ]; /* name of entry */
+ char mode[8]; /* mode */
+ char uid[8]; /* uid */
+ char gid[8]; /* gid */
+ char size[12]; /* size */
+ char mtime[12]; /* modification time */
+ char chksum[CHK_LEN]; /* checksum */
+ char linkflag; /* norm, hard, or sym. */
+ char linkname[TNMSZ]; /* linked to name */
+} HD_TAR;
+
+#ifdef _PAX_
+/*
+ * -o options for BSD tar to not write directories to the archive
+ */
+#define TAR_NODIR "nodir"
+#define TAR_OPTION "write_opt"
+
+/*
+ * default device names
+ */
+#define DEV_0 "/dev/rst0"
+#define DEV_1 "/dev/rst1"
+#define DEV_4 "/dev/rst4"
+#define DEV_5 "/dev/rst5"
+#define DEV_7 "/dev/rst7"
+#define DEV_8 "/dev/rst8"
+#endif /* _PAX_ */
+
+/*
+ * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990
+ */
+#define TPFSZ 155
+#define TMAGIC "ustar" /* ustar and a null */
+#define TMAGLEN 6
+#define TVERSION "00" /* 00 and no null */
+#define TVERSLEN 2
+
+typedef struct {
+ char name[TNMSZ]; /* name of entry */
+ char mode[8]; /* mode */
+ char uid[8]; /* uid */
+ char gid[8]; /* gid */
+ char size[12]; /* size */
+ char mtime[12]; /* modification time */
+ char chksum[CHK_LEN]; /* checksum */
+ char typeflag; /* type of file. */
+ char linkname[TNMSZ]; /* linked to name */
+ char magic[TMAGLEN]; /* magic cookie */
+ char version[TVERSLEN]; /* version */
+ char uname[32]; /* ascii owner name */
+ char gname[32]; /* ascii group name */
+ char devmajor[8]; /* major device number */
+ char devminor[8]; /* minor device number */
+ char prefix[TPFSZ]; /* linked to name */
+} HD_USTAR;
diff --git a/bin/pax/tty_subs.c b/bin/pax/tty_subs.c
new file mode 100644
index 0000000..a07264a
--- /dev/null
+++ b/bin/pax/tty_subs.c
@@ -0,0 +1,187 @@
+/* $OpenBSD: tty_subs.c,v 1.17 2016/08/26 04:22:13 guenther Exp $ */
+/* $NetBSD: tty_subs.c,v 1.5 1995/03/21 09:07:52 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1992 Keith Muller.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Keith Muller of the University of California, San Diego.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pax.h"
+#include "extern.h"
+
+/*
+ * routines that deal with I/O to and from the user
+ */
+
+#define DEVTTY "/dev/tty" /* device for interactive i/o */
+static FILE *ttyoutf = NULL; /* output pointing at control tty */
+static FILE *ttyinf = NULL; /* input pointing at control tty */
+
+/*
+ * tty_init()
+ * try to open the controlling terminal (if any) for this process. if the
+ * open fails, future ops that require user input will get an EOF
+ */
+
+int
+tty_init(void)
+{
+ int ttyfd;
+
+ if ((ttyfd = open(DEVTTY, O_RDWR | O_CLOEXEC)) >= 0) {
+ if ((ttyoutf = fdopen(ttyfd, "w")) != NULL) {
+ if ((ttyinf = fdopen(ttyfd, "r")) != NULL)
+ return(0);
+ (void)fclose(ttyoutf);
+ }
+ (void)close(ttyfd);
+ }
+
+ if (iflag) {
+ paxwarn(1, "Fatal error, cannot open %s", DEVTTY);
+ return(-1);
+ }
+ return(0);
+}
+
+/*
+ * tty_prnt()
+ * print a message using the specified format to the controlling tty
+ * if there is no controlling terminal, just return.
+ */
+
+void
+tty_prnt(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (ttyoutf == NULL) {
+ va_end(ap);
+ return;
+ }
+ (void)vfprintf(ttyoutf, fmt, ap);
+ va_end(ap);
+ (void)fflush(ttyoutf);
+}
+
+/*
+ * tty_read()
+ * read a string from the controlling terminal if it is open into the
+ * supplied buffer
+ * Return:
+ * 0 if data was read, -1 otherwise.
+ */
+
+int
+tty_read(char *str, int len)
+{
+ if (ttyinf == NULL || fgets(str, len, ttyinf) == NULL)
+ return(-1);
+
+ /*
+ * strip off that trailing newline
+ */
+ str[strcspn(str, "\n")] = '\0';
+ return(0);
+}
+
+/*
+ * paxwarn()
+ * write a warning message to stderr. if "set" the exit value of pax
+ * will be non-zero.
+ */
+
+void
+paxwarn(int set, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (set)
+ exit_val = 1;
+ /*
+ * when vflag we better ship out an extra \n to get this message on a
+ * line by itself
+ */
+ if (vflag && vfpart) {
+ (void)fflush(listf);
+ (void)fputc('\n', stderr);
+ vfpart = 0;
+ }
+ (void)fprintf(stderr, "%s: ", argv0);
+ (void)vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ (void)fputc('\n', stderr);
+}
+
+/*
+ * syswarn()
+ * write a warning message to stderr. if "set" the exit value of pax
+ * will be non-zero.
+ */
+
+void
+syswarn(int set, int errnum, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (set)
+ exit_val = 1;
+ /*
+ * when vflag we better ship out an extra \n to get this message on a
+ * line by itself
+ */
+ if (vflag && vfpart) {
+ (void)fflush(listf);
+ (void)fputc('\n', stderr);
+ vfpart = 0;
+ }
+ (void)fprintf(stderr, "%s: ", argv0);
+ (void)vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ /*
+ * format and print the errno
+ */
+ if (errnum > 0)
+ (void)fprintf(stderr, ": %s", strerror(errnum));
+ (void)fputc('\n', stderr);
+}
diff --git a/usr.bin/diff/CVS/Entries b/usr.bin/diff/CVS/Entries
new file mode 100644
index 0000000..912da0e
--- /dev/null
+++ b/usr.bin/diff/CVS/Entries
@@ -0,0 +1,9 @@
+/Makefile/1.3/Tue May 29 18:24:56 2007//
+/diff.1/1.49/Sat Feb 8 01:09:58 2020//
+/diff.c/1.67/Fri Jun 28 13:35:00 2019//
+/diff.h/1.33/Mon Oct 5 20:15:00 2015//
+/diffdir.c/1.47/Fri Jan 25 00:19:26 2019//
+/diffreg.c/1.93/Fri Jun 28 13:35:00 2019//
+/xmalloc.c/1.10/Fri Jun 28 05:44:09 2019//
+/xmalloc.h/1.4/Thu Nov 12 16:30:30 2015//
+D
diff --git a/usr.bin/diff/CVS/Repository b/usr.bin/diff/CVS/Repository
new file mode 100644
index 0000000..088ef75
--- /dev/null
+++ b/usr.bin/diff/CVS/Repository
@@ -0,0 +1 @@
+src/usr.bin/diff
diff --git a/usr.bin/diff/CVS/Root b/usr.bin/diff/CVS/Root
new file mode 100644
index 0000000..3811072
--- /dev/null
+++ b/usr.bin/diff/CVS/Root
@@ -0,0 +1 @@
+/cvs
diff --git a/usr.bin/diff/Makefile b/usr.bin/diff/Makefile
new file mode 100644
index 0000000..4f1c9d5
--- /dev/null
+++ b/usr.bin/diff/Makefile
@@ -0,0 +1,7 @@
+# $OpenBSD: Makefile,v 1.3 2007/05/29 18:24:56 ray Exp $
+
+PROG= diff
+SRCS= diff.c diffdir.c diffreg.c xmalloc.c
+COPTS+= -Wall
+
+.include <bsd.prog.mk>
diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1
new file mode 100644
index 0000000..353c770
--- /dev/null
+++ b/usr.bin/diff/diff.1
@@ -0,0 +1,474 @@
+.\" $OpenBSD: diff.1,v 1.49 2020/02/08 01:09:58 jsg Exp $
+.\"
+.\" Copyright (c) 1980, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)diff.1 8.1 (Berkeley) 6/30/93
+.\"
+.Dd $Mdocdate: February 8 2020 $
+.Dt DIFF 1
+.Os
+.Sh NAME
+.Nm diff
+.Nd differential file and directory comparator
+.Sh SYNOPSIS
+.Nm diff
+.Op Fl abdipTtw
+.Oo
+.Fl c | e | f |
+.Fl n | q | u
+.Oc
+.Op Fl I Ar pattern
+.Op Fl L Ar label
+.Ar file1 file2
+.Nm diff
+.Op Fl abdilpTtw
+.Op Fl I Ar pattern
+.Op Fl L Ar label
+.Fl C Ar number
+.Ar file1 file2
+.Nm diff
+.Op Fl abdiltw
+.Op Fl I Ar pattern
+.Fl D Ar string
+.Ar file1 file2
+.Nm diff
+.Op Fl abdilpTtw
+.Op Fl I Ar pattern
+.Op Fl L Ar label
+.Fl U Ar number
+.Ar file1 file2
+.Nm diff
+.Op Fl abdilNPprsTtw
+.Oo
+.Fl c | e | f |
+.Fl n | q | u
+.Oc
+.Op Fl I Ar pattern
+.Bk -words
+.Op Fl L Ar label
+.Op Fl S Ar name
+.Op Fl X Ar file
+.Op Fl x Ar pattern
+.Ek
+.Ar dir1 dir2
+.Sh DESCRIPTION
+The
+.Nm
+utility compares the contents of
+.Ar file1
+and
+.Ar file2
+and writes to the standard output the list of changes necessary to
+convert one file into the other.
+No output is produced if the files are identical.
+.Pp
+Output options (mutually exclusive):
+.Bl -tag -width Ds
+.It Fl C Ar number
+Like
+.Fl c
+but produces a diff with
+.Ar number
+lines of context.
+.It Fl c
+Produces a diff with 3 lines of context.
+With
+.Fl c
+the output format is modified slightly:
+the output begins with identification of the files involved and
+their creation dates and then each change is separated
+by a line with fifteen
+.Li * Ns 's .
+The lines removed from
+.Ar file1
+are marked with
+.Sq \-\ \& ;
+those added to
+.Ar file2
+are marked
+.Sq +\ \& .
+Lines which are changed from one file to the other are marked in
+both files with
+.Sq !\ \& .
+Changes which lie within 3 lines of each other are grouped together on
+output.
+.It Fl D Ar string
+Creates a merged version of
+.Ar file1
+and
+.Ar file2
+on the standard output, with C preprocessor controls included so that
+a compilation of the result without defining
+.Ar string
+is equivalent to compiling
+.Ar file1 ,
+while defining
+.Ar string
+will yield
+.Ar file2 .
+.It Fl e
+Produces output in a form suitable as input for the editor utility,
+.Xr ed 1 ,
+which can then be used to convert file1 into file2.
+.Pp
+Extra commands are added to the output when comparing directories with
+.Fl e ,
+so that the result is a
+.Xr sh 1
+script for converting text files which are common to the two directories
+from their state in
+.Ar dir1
+to their state in
+.Ar dir2 .
+.It Fl f
+Identical output to that of the
+.Fl e
+flag, but in reverse order.
+It cannot be digested by
+.Xr ed 1 .
+.It Fl n
+Produces a script similar to that of
+.Fl e ,
+but in the opposite order and with a count of changed lines on each
+insert or delete command.
+This is the form used by
+.Xr rcsdiff 1 .
+.It Fl q
+Just print a line when the files differ.
+Does not output a list of changes.
+.It Fl U Ar number
+Like
+.Fl u
+but produces a diff with
+.Ar number
+lines of context.
+.It Fl u
+Produces a
+.Em unified
+diff with 3 lines of context.
+A unified diff is similar to the context diff produced by the
+.Fl c
+option.
+However, unlike with
+.Fl c ,
+all lines to be changed (added and/or removed) are present in
+a single section.
+.El
+.Pp
+Comparison options:
+.Bl -tag -width Ds
+.It Fl a
+Treat all files as ASCII text.
+Normally
+.Nm
+will simply print
+.Dq Binary files ... differ
+if files contain binary characters.
+Use of this option forces
+.Nm
+to produce a diff.
+.It Fl b
+Causes trailing blanks (spaces and tabs) to be ignored, and other
+strings of blanks to compare equal.
+.It Fl d
+Try very hard to produce a diff as small as possible.
+This may consume a lot of processing power and memory when processing
+large files with many changes.
+.It Fl I Ar pattern
+Ignores changes, insertions, and deletions whose lines match the
+extended regular expression
+.Ar pattern .
+Multiple
+.Fl I
+patterns may be specified.
+All lines in the change must match some pattern for the change to be
+ignored.
+See
+.Xr re_format 7
+for more information on regular expression patterns.
+.It Fl i
+Ignores the case of letters.
+E.g.,
+.Dq A
+will compare equal to
+.Dq a .
+.It Fl L Ar label
+Print
+.Ar label
+instead of the first (and second, if this option is specified twice)
+file name and time in the context or unified diff header.
+.It Fl p
+With unified and context diffs, show with each change
+the first 40 characters of the last line before the context beginning
+with a letter, an underscore or a dollar sign.
+For C source code following standard layout conventions, this will
+show the prototype of the function the change applies to.
+.It Fl T
+Print a tab rather than a space before the rest of the line for the
+normal, context or unified output formats.
+This makes the alignment of tabs in the line consistent.
+.It Fl t
+Will expand tabs in output lines.
+Normal or
+.Fl c
+output adds character(s) to the front of each line which may screw up
+the indentation of the original source lines and make the output listing
+difficult to interpret.
+This option will preserve the original source's indentation.
+.It Fl w
+Is similar to
+.Fl b
+but causes whitespace (blanks and tabs) to be totally ignored.
+E.g.,
+.Dq if (\ \&a == b \&)
+will compare equal to
+.Dq if(a==b) .
+.El
+.Pp
+Directory comparison options:
+.Bl -tag -width Ds
+.It Fl N
+If a file is found in only one directory, act as if it was found in the
+other directory too but was of zero size.
+.It Fl P
+If a file is found only in
+.Ar dir2 ,
+act as if it was found in
+.Ar dir1
+too but was of zero size.
+.It Fl r
+Causes application of
+.Nm
+recursively to common subdirectories encountered.
+.It Fl S Ar name
+Re-starts a directory
+.Nm
+in the middle, beginning with file
+.Ar name .
+.It Fl s
+Causes
+.Nm
+to report files which are the same, which are otherwise not mentioned.
+.It Fl X Ar file
+Exclude files and subdirectories from comparison whose basenames match
+lines in
+.Ar file .
+Multiple
+.Fl X
+options may be specified.
+.It Fl x Ar pattern
+Exclude files and subdirectories from comparison whose basenames match
+.Ar pattern .
+Patterns are matched using shell-style globbing via
+.Xr fnmatch 3 .
+Multiple
+.Fl x
+options may be specified.
+.El
+.Pp
+If both arguments are directories,
+.Nm
+sorts the contents of the directories by name, and then runs the
+regular file
+.Nm
+algorithm, producing a change list,
+on text files which are different.
+Binary files which differ,
+common subdirectories, and files which appear in only one directory
+are described as such.
+In directory mode only regular files and directories are compared.
+If a non-regular file such as a device special file or FIFO
+is encountered, a diagnostic message is printed.
+.Pp
+If only one of
+.Ar file1
+and
+.Ar file2
+is a directory,
+.Nm
+is applied to the non-directory file and the file contained in
+the directory file with a filename that is the same as the
+last component of the non-directory file.
+.Pp
+If either
+.Ar file1
+or
+.Ar file2
+is
+.Sq - ,
+the standard input is
+used in its place.
+.Ss Output Style
+The default (without
+.Fl e ,
+.Fl c ,
+or
+.Fl n
+.\" -C
+options)
+output contains lines of these forms, where
+.Va XX , YY , ZZ , QQ
+are line numbers respective of file order.
+.Pp
+.Bl -tag -width "XX,YYcZZ,QQ" -compact
+.It Li XX Ns Ic a Ns Li YY
+At (the end of) line
+.Va XX
+of
+.Ar file1 ,
+append the contents
+of line
+.Va YY
+of
+.Ar file2
+to make them equal.
+.It Li XX Ns Ic a Ns Li YY,ZZ
+Same as above, but append the range of lines,
+.Va YY
+through
+.Va ZZ
+of
+.Ar file2
+to line
+.Va XX
+of file1.
+.It Li XX Ns Ic d Ns Li YY
+At line
+.Va XX
+delete
+the line.
+The value
+.Va YY
+tells to which line the change would bring
+.Ar file1
+in line with
+.Ar file2 .
+.It Li XX,YY Ns Ic d Ns Li ZZ
+Delete the range of lines
+.Va XX
+through
+.Va YY
+in
+.Ar file1 .
+.It Li XX Ns Ic c Ns Li YY
+Change the line
+.Va XX
+in
+.Ar file1
+to the line
+.Va YY
+in
+.Ar file2 .
+.It Li XX,YY Ns Ic c Ns Li ZZ
+Replace the range of specified lines with the line
+.Va ZZ .
+.It Li XX,YY Ns Ic c Ns Li ZZ,QQ
+Replace the range
+.Va XX , Ns Va YY
+from
+.Ar file1
+with the range
+.Va ZZ , Ns Va QQ
+from
+.Ar file2 .
+.El
+.Pp
+These lines resemble
+.Xr ed 1
+subcommands to convert
+.Ar file1
+into
+.Ar file2 .
+The line numbers before the action letters pertain to
+.Ar file1 ;
+those after pertain to
+.Ar file2 .
+Thus, by exchanging
+.Ic a
+for
+.Ic d
+and reading the line in reverse order, one can also
+determine how to convert
+.Ar file2
+into
+.Ar file1 .
+As in
+.Xr ed 1 ,
+identical
+pairs (where num1 = num2) are abbreviated as a single
+number.
+.Sh FILES
+.Bl -tag -width /tmp/diff.XXXXXXXX -compact
+.It Pa /tmp/diff. Ns Ar XXXXXXXX
+Temporary file used when comparing a device or the standard input.
+Note that the temporary file is unlinked as soon as it is created
+so it will not show up in a directory listing.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It 0
+No differences were found.
+.It 1
+Differences were found.
+.It >1
+An error occurred.
+.El
+.Sh SEE ALSO
+.Xr cmp 1 ,
+.Xr comm 1 ,
+.Xr diff3 1 ,
+.Xr ed 1 ,
+.Xr patch 1 ,
+.Xr sdiff 1
+.Rs
+.%A James W. Hunt
+.%A M. Douglas McIlroy
+.%T "An Algorithm for Differential File Comparison"
+.%J Computing Science Technical Report
+.%Q Bell Laboratories 41
+.%D June 1976
+.Re
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The flags
+.Op Fl aDdIiLlNnPpqSsTtwXx
+are extensions to that specification.
+.Sh HISTORY
+A
+.Nm
+command appeared in
+.At v5 .
diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c
new file mode 100644
index 0000000..64cdd45
--- /dev/null
+++ b/usr.bin/diff/diff.c
@@ -0,0 +1,402 @@
+/* $OpenBSD: diff.c,v 1.67 2019/06/28 13:35:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2003 Todd C. Miller <millert@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <limits.h>
+
+#include "diff.h"
+#include "xmalloc.h"
+
+int Nflag, Pflag, rflag, sflag, Tflag;
+int diff_format, diff_context, status;
+char *start, *ifdefname, *diffargs, *label[2], *ignore_pats;
+struct stat stb1, stb2;
+struct excludes *excludes_list;
+regex_t ignore_re;
+
+#define OPTIONS "0123456789abC:cdD:efhI:iL:lnNPpqrS:sTtU:uwX:x:"
+static struct option longopts[] = {
+ { "text", no_argument, 0, 'a' },
+ { "ignore-space-change", no_argument, 0, 'b' },
+ { "context", optional_argument, 0, 'C' },
+ { "ifdef", required_argument, 0, 'D' },
+ { "minimal", no_argument, 0, 'd' },
+ { "ed", no_argument, 0, 'e' },
+ { "forward-ed", no_argument, 0, 'f' },
+ { "ignore-matching-lines", required_argument, 0, 'I' },
+ { "ignore-case", no_argument, 0, 'i' },
+ { "label", required_argument, 0, 'L' },
+ { "new-file", no_argument, 0, 'N' },
+ { "rcs", no_argument, 0, 'n' },
+ { "unidirectional-new-file", no_argument, 0, 'P' },
+ { "show-c-function", no_argument, 0, 'p' },
+ { "brief", no_argument, 0, 'q' },
+ { "recursive", no_argument, 0, 'r' },
+ { "report-identical-files", no_argument, 0, 's' },
+ { "starting-file", required_argument, 0, 'S' },
+ { "expand-tabs", no_argument, 0, 't' },
+ { "initial-tab", no_argument, 0, 'T' },
+ { "unified", optional_argument, 0, 'U' },
+ { "ignore-all-space", no_argument, 0, 'w' },
+ { "exclude", required_argument, 0, 'x' },
<