diff options
author | Cem Keylan <cem@ckyln.com> | 2020-10-16 17:47:01 +0300 |
---|---|---|
committer | Cem Keylan <cem@ckyln.com> | 2020-10-16 17:47:01 +0300 |
commit | 5d69c6a2661bba0a22f3ecfd517e2e9767a38346 (patch) | |
tree | 1f479b2714e127835db7f33a3bfed4c38c52f883 | |
parent | e2abcdca396661cbe0ae2ddb13d5c2b85682c13a (diff) | |
download | otools-5d69c6a2661bba0a22f3ecfd517e2e9767a38346.tar.gz |
add tools
243 files changed, 91034 insertions, 9 deletions
@@ -1,11 +1,11 @@ *.a *.o -diff -doas -m4 -mandoc -md5 -nc -patch -pax -signify +/diff +/doas +/m4 +/mandoc +/md5 +/nc +/patch +/pax +/signify diff --git a/bin/md5/CVS/Entries b/bin/md5/CVS/Entries new file mode 100644 index 0000000..86ca07f --- /dev/null +++ b/bin/md5/CVS/Entries @@ -0,0 +1,7 @@ +/Makefile/1.15/Wed Mar 30 06:38:40 2016// +/cksum.1/1.39/Sat Sep 3 17:01:01 2016// +/crc.c/1.5/Fri Jan 25 00:19:25 2019// +/crc.h/1.4/Fri Jan 25 00:19:25 2019// +/md5.1/1.48/Fri Jan 25 00:19:25 2019// +/md5.c/1.95/Sat May 18 16:53:39 2019// +D diff --git a/bin/md5/CVS/Repository b/bin/md5/CVS/Repository new file mode 100644 index 0000000..0c71c98 --- /dev/null +++ b/bin/md5/CVS/Repository @@ -0,0 +1 @@ +src/bin/md5 diff --git a/bin/md5/CVS/Root b/bin/md5/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/bin/md5/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/bin/md5/Makefile b/bin/md5/Makefile new file mode 100644 index 0000000..c094424 --- /dev/null +++ b/bin/md5/Makefile @@ -0,0 +1,14 @@ +# $OpenBSD: Makefile,v 1.15 2016/03/30 06:38:40 jmc Exp $ + +PROG= md5 +SRCS= crc.c md5.c +MAN= cksum.1 md5.1 +LINKS= ${BINDIR}/md5 ${BINDIR}/sha1 \ + ${BINDIR}/md5 ${BINDIR}/sha256 \ + ${BINDIR}/md5 ${BINDIR}/sha512 \ + ${BINDIR}/md5 ${BINDIR}/cksum + +CPPFLAGS+= -I${.CURDIR} +COPTS+= -Wall -Wconversion -Wmissing-prototypes + +.include <bsd.prog.mk> diff --git a/bin/md5/cksum.1 b/bin/md5/cksum.1 new file mode 100644 index 0000000..af52a4a --- /dev/null +++ b/bin/md5/cksum.1 @@ -0,0 +1,191 @@ +.\" $OpenBSD: cksum.1,v 1.39 2016/09/03 17:01:01 tedu Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)cksum.1 8.2 (Berkeley) 4/28/95 +.\" +.Dd $Mdocdate: September 3 2016 $ +.Dt CKSUM 1 +.Os +.Sh NAME +.Nm cksum +.Nd display file checksums and block counts +.Sh SYNOPSIS +.Nm cksum +.Bk -words +.Op Fl bcpqrtx +.Op Fl a Ar algorithms +.Op Fl C Ar checklist +.Op Fl h Ar hashfile +.Op Fl s Ar string +.Op Ar +.Ek +.Sh DESCRIPTION +The +.Nm cksum +utility writes to the standard output a single line for each input file. +The format of this line varies with the algorithm being used as follows: +.Bl -tag -width allxothers +.It cksum +The output line consists of three whitespace separated fields: +a CRC checksum, the number of octets in the input, +and name of the file or string. +If no file name is specified, the standard input is used and no file name +is written. +.It all others +The output line consists of four whitespace separated fields: +the name of the algorithm used, the name of the file or string in +parentheses, an equals sign, and the cryptographic hash of the input. +If no file name is specified, the standard input is used and only +the cryptographic hash is output. +.El +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a Ar algorithms +Use the specified algorithm(s) instead of the default (cksum). +Supported algorithms include cksum, md5, rmd160, sha1, +sha224, sha256, sha384, sha512/256, and sha512. +Multiple algorithms may be specified, separated by a comma or whitespace. +Additionally, multiple +.Fl a +options may be specified on the command line. +Case is ignored when matching algorithms. +The output format may be specified on a per-algorithm basis +by using a single-character suffix, e.g.\& +.Dq sha256b . +If the algorithm has a +.Sq b +suffix, the checksum will be output in base64 format. +If the algorithm has an +.Sq x +suffix, the checksum will be output in hex format. +If an algorithm with the same output format is repeated, +only the first instance is used. +Note that output format suffixes are not supported +for the cksum algorithm. +.It Fl b +Output checksums in base64 notation, not hexadecimal by +default. +A +.Sq b +or +.Sq x +suffix on the algorithm will override this default. +This option is ignored for the cksum algorithm. +.It Fl C Ar checklist +Compare the checksum of each +.Ar file +against the checksums in the +.Ar checklist . +Any specified +.Ar file +that is not listed in the +.Ar checklist +will generate an error. +.It Fl c +If this option is specified, the +.Ar file +options become checklists. +Each checklist should contain hash results in the normal format, +which will be verified against the specified paths. +Output consists of the digest used, the file name, +and an OK, FAILED, or MISSING for the result of the comparison. +This will validate any of the supported checksums. +If no file is given, stdin is used. +The +.Fl c +option may not be used in conjunction with more than a single +.Fl a +option. +.It Fl h Ar hashfile +Place the checksum into +.Ar hashfile +instead of stdout. +.It Fl p +Echoes stdin to stdout and appends the +checksum to stdout. +.It Fl q +Only print the checksum (quiet mode) or if used in conjunction with the +.Fl c +flag, only print the failed cases. +.It Fl r +Reverse the format of the hash algorithm output, making +it match the checksum output format. +.It Fl s Ar string +Prints a checksum of the given +.Ar string . +.It Fl t +Runs a built-in time trial. +Specifying +.Fl t +multiple times results in the number of rounds being multiplied +by 10 for each additional flag. +.It Fl x +Runs a built-in test script. +.El +.Pp +The default CRC used is based on the polynomial used for CRC error checking +in the networking standard +ISO/IEC 8802-3:1996. +The other available algorithms are described in their respective +man pages in section 3 of the manual. +.Sh EXIT STATUS +.Ex -std cksum +.Sh SEE ALSO +.Xr md5 1 +.Pp +The default calculation is identical to that given in pseudo-code +in the following ACM article: +.Rs +.%T "Computation of Cyclic Redundancy Checks Via Table Lookup" +.%A Dilip V. Sarwate +.%J "Communications of the ACM" +.%D "August 1988" +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +All the flags are extensions to that specification. +.Sh HISTORY +The +.Nm cksum +utility appeared in +.Bx 4.4 . +.Sh CAVEATS +Do not use the cksum or md5 algorithms to verify file integrity. +An attacker can trivially produce modified payload that +has the same checksum as the original version. +Use a cryptographic checksum instead. diff --git a/bin/md5/crc.c b/bin/md5/crc.c new file mode 100644 index 0000000..19ea317 --- /dev/null +++ b/bin/md5/crc.c @@ -0,0 +1,137 @@ +/* $OpenBSD: crc.c,v 1.5 2019/01/25 00:19:25 millert Exp $ */ + +/* + * Copyright (c) 2004 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "crc.h" + +/* + * Table-driven version of the following polynomial from POSIX 1003.2: + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + + * x^7 + x^5 + x^4 + x^2 + x + 1 + */ +static const u_int32_t crc32tab[] = { + 0x00000000U, + 0x04c11db7U, 0x09823b6eU, 0x0d4326d9U, 0x130476dcU, 0x17c56b6bU, + 0x1a864db2U, 0x1e475005U, 0x2608edb8U, 0x22c9f00fU, 0x2f8ad6d6U, + 0x2b4bcb61U, 0x350c9b64U, 0x31cd86d3U, 0x3c8ea00aU, 0x384fbdbdU, + 0x4c11db70U, 0x48d0c6c7U, 0x4593e01eU, 0x4152fda9U, 0x5f15adacU, + 0x5bd4b01bU, 0x569796c2U, 0x52568b75U, 0x6a1936c8U, 0x6ed82b7fU, + 0x639b0da6U, 0x675a1011U, 0x791d4014U, 0x7ddc5da3U, 0x709f7b7aU, + 0x745e66cdU, 0x9823b6e0U, 0x9ce2ab57U, 0x91a18d8eU, 0x95609039U, + 0x8b27c03cU, 0x8fe6dd8bU, 0x82a5fb52U, 0x8664e6e5U, 0xbe2b5b58U, + 0xbaea46efU, 0xb7a96036U, 0xb3687d81U, 0xad2f2d84U, 0xa9ee3033U, + 0xa4ad16eaU, 0xa06c0b5dU, 0xd4326d90U, 0xd0f37027U, 0xddb056feU, + 0xd9714b49U, 0xc7361b4cU, 0xc3f706fbU, 0xceb42022U, 0xca753d95U, + 0xf23a8028U, 0xf6fb9d9fU, 0xfbb8bb46U, 0xff79a6f1U, 0xe13ef6f4U, + 0xe5ffeb43U, 0xe8bccd9aU, 0xec7dd02dU, 0x34867077U, 0x30476dc0U, + 0x3d044b19U, 0x39c556aeU, 0x278206abU, 0x23431b1cU, 0x2e003dc5U, + 0x2ac12072U, 0x128e9dcfU, 0x164f8078U, 0x1b0ca6a1U, 0x1fcdbb16U, + 0x018aeb13U, 0x054bf6a4U, 0x0808d07dU, 0x0cc9cdcaU, 0x7897ab07U, + 0x7c56b6b0U, 0x71159069U, 0x75d48ddeU, 0x6b93dddbU, 0x6f52c06cU, + 0x6211e6b5U, 0x66d0fb02U, 0x5e9f46bfU, 0x5a5e5b08U, 0x571d7dd1U, + 0x53dc6066U, 0x4d9b3063U, 0x495a2dd4U, 0x44190b0dU, 0x40d816baU, + 0xaca5c697U, 0xa864db20U, 0xa527fdf9U, 0xa1e6e04eU, 0xbfa1b04bU, + 0xbb60adfcU, 0xb6238b25U, 0xb2e29692U, 0x8aad2b2fU, 0x8e6c3698U, + 0x832f1041U, 0x87ee0df6U, 0x99a95df3U, 0x9d684044U, 0x902b669dU, + 0x94ea7b2aU, 0xe0b41de7U, 0xe4750050U, 0xe9362689U, 0xedf73b3eU, + 0xf3b06b3bU, 0xf771768cU, 0xfa325055U, 0xfef34de2U, 0xc6bcf05fU, + 0xc27dede8U, 0xcf3ecb31U, 0xcbffd686U, 0xd5b88683U, 0xd1799b34U, + 0xdc3abdedU, 0xd8fba05aU, 0x690ce0eeU, 0x6dcdfd59U, 0x608edb80U, + 0x644fc637U, 0x7a089632U, 0x7ec98b85U, 0x738aad5cU, 0x774bb0ebU, + 0x4f040d56U, 0x4bc510e1U, 0x46863638U, 0x42472b8fU, 0x5c007b8aU, + 0x58c1663dU, 0x558240e4U, 0x51435d53U, 0x251d3b9eU, 0x21dc2629U, + 0x2c9f00f0U, 0x285e1d47U, 0x36194d42U, 0x32d850f5U, 0x3f9b762cU, + 0x3b5a6b9bU, 0x0315d626U, 0x07d4cb91U, 0x0a97ed48U, 0x0e56f0ffU, + 0x1011a0faU, 0x14d0bd4dU, 0x19939b94U, 0x1d528623U, 0xf12f560eU, + 0xf5ee4bb9U, 0xf8ad6d60U, 0xfc6c70d7U, 0xe22b20d2U, 0xe6ea3d65U, + 0xeba91bbcU, 0xef68060bU, 0xd727bbb6U, 0xd3e6a601U, 0xdea580d8U, + 0xda649d6fU, 0xc423cd6aU, 0xc0e2d0ddU, 0xcda1f604U, 0xc960ebb3U, + 0xbd3e8d7eU, 0xb9ff90c9U, 0xb4bcb610U, 0xb07daba7U, 0xae3afba2U, + 0xaafbe615U, 0xa7b8c0ccU, 0xa379dd7bU, 0x9b3660c6U, 0x9ff77d71U, + 0x92b45ba8U, 0x9675461fU, 0x8832161aU, 0x8cf30badU, 0x81b02d74U, + 0x857130c3U, 0x5d8a9099U, 0x594b8d2eU, 0x5408abf7U, 0x50c9b640U, + 0x4e8ee645U, 0x4a4ffbf2U, 0x470cdd2bU, 0x43cdc09cU, 0x7b827d21U, + 0x7f436096U, 0x7200464fU, 0x76c15bf8U, 0x68860bfdU, 0x6c47164aU, + 0x61043093U, 0x65c52d24U, 0x119b4be9U, 0x155a565eU, 0x18197087U, + 0x1cd86d30U, 0x029f3d35U, 0x065e2082U, 0x0b1d065bU, 0x0fdc1becU, + 0x3793a651U, 0x3352bbe6U, 0x3e119d3fU, 0x3ad08088U, 0x2497d08dU, + 0x2056cd3aU, 0x2d15ebe3U, 0x29d4f654U, 0xc5a92679U, 0xc1683bceU, + 0xcc2b1d17U, 0xc8ea00a0U, 0xd6ad50a5U, 0xd26c4d12U, 0xdf2f6bcbU, + 0xdbee767cU, 0xe3a1cbc1U, 0xe760d676U, 0xea23f0afU, 0xeee2ed18U, + 0xf0a5bd1dU, 0xf464a0aaU, 0xf9278673U, 0xfde69bc4U, 0x89b8fd09U, + 0x8d79e0beU, 0x803ac667U, 0x84fbdbd0U, 0x9abc8bd5U, 0x9e7d9662U, + 0x933eb0bbU, 0x97ffad0cU, 0xafb010b1U, 0xab710d06U, 0xa6322bdfU, + 0xa2f33668U, 0xbcb4666dU, 0xb8757bdaU, 0xb5365d03U, 0xb1f740b4U +}; + +void +CKSUM_Init(CKSUM_CTX *ctx) +{ + ctx->crc = 0; + ctx->len = 0; +} + +#define UPDATE(crc, byte) do \ + (crc) = ((crc) << 8) ^ crc32tab[((crc) >> 24) ^ (byte)]; \ +while(0) + +void +CKSUM_Update(CKSUM_CTX *ctx, const unsigned char *buf, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + UPDATE(ctx->crc, buf[i]); + ctx->len += len; +} + +void +CKSUM_Final(CKSUM_CTX *ctx) +{ + off_t len = ctx->len; + + /* add in number of bytes read and finish */ + while (len != 0) { + UPDATE(ctx->crc, len & 0xff); + len >>= 8; + } + ctx->crc = ~ctx->crc; +} + +char * +CKSUM_End(CKSUM_CTX *ctx, char *outstr) +{ + CKSUM_Final(ctx); + + if (outstr == NULL) { + if (asprintf(&outstr, "%u %lld", ctx->crc, ctx->len) == -1) + return (NULL); + } else { + (void)snprintf(outstr, (size_t)CKSUM_DIGEST_STRING_LENGTH, + "%u %lld", ctx->crc, ctx->len); + } + + return (outstr); +} diff --git a/bin/md5/crc.h b/bin/md5/crc.h new file mode 100644 index 0000000..2000c48 --- /dev/null +++ b/bin/md5/crc.h @@ -0,0 +1,31 @@ +/* $OpenBSD: crc.h,v 1.4 2019/01/25 00:19:25 millert Exp $ */ + +/* + * Copyright (c) 2004 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define CKSUM_DIGEST_LENGTH 4 +#define CKSUM_DIGEST_STRING_LENGTH (10 + 1 + 20 + 1) + +typedef struct CKSUMContext { + u_int32_t crc; + off_t len; +} CKSUM_CTX; + +void CKSUM_Init(CKSUM_CTX *); +void CKSUM_Update(CKSUM_CTX *, const u_int8_t *, size_t); +void CKSUM_Final(CKSUM_CTX *); +char *CKSUM_End(CKSUM_CTX *, char *); +char *CKSUM_Data(const u_int8_t *, size_t, char *); diff --git a/bin/md5/md5.1 b/bin/md5/md5.1 new file mode 100644 index 0000000..fa860eb --- /dev/null +++ b/bin/md5/md5.1 @@ -0,0 +1,142 @@ +.\" $OpenBSD: md5.1,v 1.48 2019/01/25 00:19:25 millert Exp $ +.\" +.\" Copyright (c) 2003, 2004, 2006 Todd C. Miller <millert@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.\" Sponsored in part by the Defense Advanced Research Projects +.\" Agency (DARPA) and Air Force Research Laboratory, Air Force +.\" Materiel Command, USAF, under agreement number F39502-99-1-0512. +.\" +.Dd $Mdocdate: January 25 2019 $ +.Dt MD5 1 +.Os +.Sh NAME +.Nm md5 , +.Nm sha1 , +.Nm sha256 , +.Nm sha512 +.Nd calculate a message digest (checksum) for a file +.Sh SYNOPSIS +.Nm md5 +.Op Fl bcpqrtx +.Op Fl C Ar checklist +.Op Fl h Ar hashfile +.Op Fl s Ar string +.Op Ar +.Nm sha1 +.Op Fl bcpqrtx +.Op Fl C Ar checklist +.Op Fl h Ar hashfile +.Op Fl s Ar string +.Op Ar +.Nm sha256 +.Op Fl bcpqrtx +.Op Fl C Ar checklist +.Op Fl h Ar hashfile +.Op Fl s Ar string +.Op Ar +.Nm sha512 +.Op Fl bcpqrtx +.Op Fl C Ar checklist +.Op Fl h Ar hashfile +.Op Fl s Ar string +.Op Ar +.Sh DESCRIPTION +These utilities take as input a message of arbitrary length and produce +as output a message digest (checksum) of the input. +.Pp +Two messages having the same message digest (a collision) have been produced +for MD5 and for SHA-1, so their use is deprecated. +.Pp +The options for use with each command are as follows: +.Bl -tag -width Ds +.It Fl b +Output checksums in base64 notation, not hexadecimal. +.It Fl C Ar checklist +Compare the checksum of each +.Ar file +against the checksums in the +.Ar checklist . +Any specified +.Ar file +that is not listed in the +.Ar checklist +will generate an error. +.It Fl c +If this option is specified, the +.Ar file +options become checklists. +Each checklist should contain hash results in the normal format, +which will be verified against the specified paths. +Output consists of the digest used, the file name, +and an OK, FAILED, or MISSING for the result of the comparison. +This will validate any of the supported checksums (see +.Xr cksum 1 ) . +If no file is given, stdin is used. +.It Fl h Ar hashfile +Place the checksum into +.Ar hashfile +instead of stdout. +.It Fl p +Echoes stdin to stdout and appends the +checksum to stdout. +.It Fl q +Only print the checksum (quiet mode) or if used in conjunction with the +.Fl c +flag, only print the failed cases. +.It Fl r +Reverse the format of the hash algorithm output, making +it match the output format used by +.Xr cksum 1 . +.It Fl s Ar string +Prints a checksum of the given +.Ar string . +.It Fl t +Runs a built-in time trial. +Specifying +.Fl t +multiple times results in the number of rounds being multiplied +by 10 for each additional flag. +.It Fl x +Runs a built-in test script. +.El +.Sh EXIT STATUS +These utilities exit 0 on success, +and \*(Gt0 if an error occurs. +.Sh SEE ALSO +.Xr cksum 1 +.Sh STANDARDS +.Rs +.%A R. Rivest +.%D April 1992 +.%R RFC 1321 +.%T The MD5 Message-Digest Algorithm +.Re +.Rs +.%A J. Burrows +.%O FIPS PUB 180-1 +.%T The Secure Hash Standard +.Re +.Pp +.Rs +.%A D. Eastlake +.%A P. Jones +.%D September 2001 +.%R RFC 3174 +.%T US Secure Hash Algorithm 1 (SHA1) +.Re +.Rs +.%T Secure Hash Standard +.%O FIPS PUB 180-2 +.Re diff --git a/bin/md5/md5.c b/bin/md5/md5.c new file mode 100644 index 0000000..069b2f5 --- /dev/null +++ b/bin/md5/md5.c @@ -0,0 +1,852 @@ +/* $OpenBSD: md5.c,v 1.95 2019/05/18 16:53:39 otto Exp $ */ + +/* + * Copyright (c) 2001,2003,2005-2007,2010,2013,2014 + * Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/queue.h> +#include <sys/resource.h> +#include <netinet/in.h> +#include <ctype.h> +#include <err.h> +#include <fcntl.h> +#include <resolv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <time.h> +#include <unistd.h> +#include <errno.h> + +#include <md5.h> +#include <rmd160.h> +#include <sha1.h> +#include <sha2.h> +#include "crc.h" + +#define STYLE_MD5 0 +#define STYLE_CKSUM 1 +#define STYLE_TERSE 2 + +#define MAX_DIGEST_LEN 128 + +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) +#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) + +union ANY_CTX { +#if !defined(SHA2_ONLY) + CKSUM_CTX cksum; + MD5_CTX md5; + RMD160_CTX rmd160; + SHA1_CTX sha1; +#endif /* !defined(SHA2_ONLY) */ + SHA2_CTX sha2; +}; + +struct hash_function { + const char *name; + size_t digestlen; + int style; + int base64; + void *ctx; /* XXX - only used by digest_file() */ + void (*init)(void *); + void (*update)(void *, const unsigned char *, size_t); + void (*final)(unsigned char *, void *); + char * (*end)(void *, char *); + TAILQ_ENTRY(hash_function) tailq; +} functions[] = { +#if !defined(SHA2_ONLY) + { + "CKSUM", + CKSUM_DIGEST_LENGTH, + STYLE_CKSUM, + -1, + NULL, + (void (*)(void *))CKSUM_Init, + (void (*)(void *, const unsigned char *, size_t))CKSUM_Update, + (void (*)(unsigned char *, void *))CKSUM_Final, + (char *(*)(void *, char *))CKSUM_End + }, + { + "MD5", + MD5_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))MD5Init, + (void (*)(void *, const unsigned char *, size_t))MD5Update, + (void (*)(unsigned char *, void *))MD5Final, + (char *(*)(void *, char *))MD5End + }, + { + "RMD160", + RMD160_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))RMD160Init, + (void (*)(void *, const unsigned char *, size_t))RMD160Update, + (void (*)(unsigned char *, void *))RMD160Final, + (char *(*)(void *, char *))RMD160End + }, + { + "SHA1", + SHA1_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))SHA1Init, + (void (*)(void *, const unsigned char *, size_t))SHA1Update, + (void (*)(unsigned char *, void *))SHA1Final, + (char *(*)(void *, char *))SHA1End + }, + { + "SHA224", + SHA224_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))SHA224Init, + (void (*)(void *, const unsigned char *, size_t))SHA224Update, + (void (*)(unsigned char *, void *))SHA224Final, + (char *(*)(void *, char *))SHA224End + }, +#endif /* !defined(SHA2_ONLY) */ + { + "SHA256", + SHA256_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))SHA256Init, + (void (*)(void *, const unsigned char *, size_t))SHA256Update, + (void (*)(unsigned char *, void *))SHA256Final, + (char *(*)(void *, char *))SHA256End + }, +#if !defined(SHA2_ONLY) + { + "SHA384", + SHA384_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))SHA384Init, + (void (*)(void *, const unsigned char *, size_t))SHA384Update, + (void (*)(unsigned char *, void *))SHA384Final, + (char *(*)(void *, char *))SHA384End + }, + { + "SHA512/256", + SHA512_256_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))SHA512_256Init, + (void (*)(void *, const unsigned char *, size_t))SHA512_256Update, + (void (*)(unsigned char *, void *))SHA512_256Final, + (char *(*)(void *, char *))SHA512_256End + }, +#endif /* !defined(SHA2_ONLY) */ + { + "SHA512", + SHA512_DIGEST_LENGTH, + STYLE_MD5, + 0, + NULL, + (void (*)(void *))SHA512Init, + (void (*)(void *, const unsigned char *, size_t))SHA512Update, + (void (*)(unsigned char *, void *))SHA512Final, + (char *(*)(void *, char *))SHA512End + }, + { + NULL, + } +}; + +TAILQ_HEAD(hash_list, hash_function); + +void digest_end(const struct hash_function *, void *, char *, size_t, int); +int digest_file(const char *, struct hash_list *, int); +void digest_print(const struct hash_function *, const char *, const char *); +#if !defined(SHA2_ONLY) +int digest_filelist(const char *, struct hash_function *, int, char **); +void digest_printstr(const struct hash_function *, const char *, const char *); +void digest_string(char *, struct hash_list *); +void digest_test(struct hash_list *); +void digest_time(struct hash_list *, int); +#endif /* !defined(SHA2_ONLY) */ +void hash_insert(struct hash_list *, struct hash_function *, int); +void usage(void) __attribute__((__noreturn__)); + +extern char *__progname; +int qflag = 0; +FILE *ofile = NULL; + +int +main(int argc, char **argv) +{ + struct hash_function *hf, *hftmp; + struct hash_list hl; + size_t len; + char *cp, *input_string, *selective_checklist; + const char *optstr; + int fl, error, base64; + int bflag, cflag, pflag, rflag, tflag, xflag; + + if (pledge("stdio rpath wpath cpath", NULL) == -1) + err(1, "pledge"); + + TAILQ_INIT(&hl); + input_string = NULL; + selective_checklist = NULL; + error = bflag = cflag = pflag = qflag = rflag = tflag = xflag = 0; + +#if !defined(SHA2_ONLY) + if (strcmp(__progname, "cksum") == 0) + optstr = "a:bC:ch:pqrs:tx"; + else +#endif /* !defined(SHA2_ONLY) */ + optstr = "bC:ch:pqrs:tx"; + + /* Check for -b option early since it changes behavior. */ + while ((fl = getopt(argc, argv, optstr)) != -1) { + switch (fl) { + case 'b': + bflag = 1; + break; + case '?': + usage(); + } + } + optind = 1; + optreset = 1; + while ((fl = getopt(argc, argv, optstr)) != -1) { + switch (fl) { + case 'a': + while ((cp = strsep(&optarg, " \t,")) != NULL) { + if (*cp == '\0') + continue; + base64 = -1; + for (hf = functions; hf->name != NULL; hf++) { + len = strlen(hf->name); + if (strncasecmp(cp, hf->name, len) != 0) + continue; + if (cp[len] == '\0') { + if (hf->base64 != -1) + base64 = bflag; + break; /* exact match */ + } + if (cp[len + 1] == '\0' && + (cp[len] == 'b' || cp[len] == 'x')) { + base64 = + cp[len] == 'b' ? 1 : 0; + break; /* match w/ suffix */ + } + } + if (hf->name == NULL) { + warnx("unknown algorithm \"%s\"", cp); + usage(); + } + if (hf->base64 == -1 && base64 != -1) { + warnx("%s doesn't support %s", + hf->name, + base64 ? "base64" : "hex"); + usage(); + } + /* Check for dupes. */ + TAILQ_FOREACH(hftmp, &hl, tailq) { + if (hftmp->base64 == base64 && + strcmp(hf->name, hftmp->name) == 0) + break; + } + if (hftmp == NULL) + hash_insert(&hl, hf, base64); + } + break; + case 'b': + /* has already been parsed */ + break; + case 'h': + ofile = fopen(optarg, "w"); + if (ofile == NULL) + err(1, "%s", optarg); + break; +#if !defined(SHA2_ONLY) + case 'C': + selective_checklist = optarg; + break; + case 'c': + cflag = 1; + break; +#endif /* !defined(SHA2_ONLY) */ + case 'p': + pflag = 1; + break; + case 'q': + qflag = 1; + break; + case 'r': + rflag = 1; + break; + case 's': + input_string = optarg; + break; + case 't': + tflag++; + break; + case 'x': + xflag = 1; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (ofile == NULL) + ofile = stdout; + + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + + /* Most arguments are mutually exclusive */ + fl = pflag + (tflag ? 1 : 0) + xflag + cflag + (input_string != NULL); + if (fl > 1 || (fl && argc && cflag == 0) || (rflag && qflag) || + (selective_checklist != NULL && argc == 0)) + usage(); + if (selective_checklist || cflag) { + if (TAILQ_FIRST(&hl) != TAILQ_LAST(&hl, hash_list)) + errx(1, "only a single algorithm may be specified " + "in -C or -c mode"); + } + + /* No algorithm specified, check the name we were called as. */ + if (TAILQ_EMPTY(&hl)) { + for (hf = functions; hf->name != NULL; hf++) { + if (strcasecmp(hf->name, __progname) == 0) + break; + } + if (hf->name == NULL) + hf = &functions[0]; /* default to cksum */ + hash_insert(&hl, hf, (hf->base64 == -1 ? 0 : bflag)); + } + + if (rflag || qflag) { + const int new_style = rflag ? STYLE_CKSUM : STYLE_TERSE; + TAILQ_FOREACH(hf, &hl, tailq) { + hf->style = new_style; + } + } + +#if !defined(SHA2_ONLY) + if (tflag) + digest_time(&hl, tflag); + else if (xflag) + digest_test(&hl); + else if (input_string) + digest_string(input_string, &hl); + else if (selective_checklist) { + int i; + + error = digest_filelist(selective_checklist, TAILQ_FIRST(&hl), + argc, argv); + for (i = 0; i < argc; i++) { + if (argv[i] != NULL) { + warnx("%s does not exist in %s", argv[i], + selective_checklist); + error++; + } + } + } else if (cflag) { + if (argc == 0) + error = digest_filelist("-", TAILQ_FIRST(&hl), 0, NULL); + else + while (argc--) + error += digest_filelist(*argv++, + TAILQ_FIRST(&hl), 0, NULL); + } else +#endif /* !defined(SHA2_ONLY) */ + if (pflag || argc == 0) + error = digest_file("-", &hl, pflag); + else + while (argc--) + error += digest_file(*argv++, &hl, 0); + + return(error ? EXIT_FAILURE : EXIT_SUCCESS); +} + +void +hash_insert(struct hash_list *hl, struct hash_function *hf, int base64) +{ + struct hash_function *hftmp; + + hftmp = malloc(sizeof(*hftmp)); + if (hftmp == NULL) + err(1, NULL); + *hftmp = *hf; + hftmp->base64 = base64; + TAILQ_INSERT_TAIL(hl, hftmp, tailq); +} + +void +digest_end(const struct hash_function *hf, void *ctx, char *buf, size_t bsize, + int base64) +{ + u_char *digest; + + if (base64 == 1) { + if ((digest = malloc(hf->digestlen)) == NULL) + err(1, NULL); + hf->final(digest, ctx); + if (b64_ntop(digest, hf->digestlen, buf, bsize) == -1) + errx(1, "error encoding base64"); + free(digest); + } else { + hf->end(ctx, buf); + } +} + +#if !defined(SHA2_ONLY) +void +digest_string(char *string, struct hash_list *hl) +{ + struct hash_function *hf; + char digest[MAX_DIGEST_LEN + 1]; + union ANY_CTX context; + + TAILQ_FOREACH(hf, hl, tailq) { + hf->init(&context); + hf->update(&context, string, strlen(string)); + digest_end(hf, &context, digest, sizeof(digest), + hf->base64); + digest_printstr(hf, string, digest); + } +} +#endif /* !defined(SHA2_ONLY) */ + +void +digest_print(const struct hash_function *hf, const char *what, + const char *digest) +{ + switch (hf->style) { + case STYLE_MD5: + (void)fprintf(ofile, "%s (%s) = %s\n", hf->name, what, digest); + break; + case STYLE_CKSUM: + (void)fprintf(ofile, "%s %s\n", digest, what); + break; + case STYLE_TERSE: + (void)fprintf(ofile, "%s\n", digest); + break; + } +} + +#if !defined(SHA2_ONLY) +void +digest_printstr(const struct hash_function *hf, const char *what, + const char *digest) +{ + switch (hf->style) { + case STYLE_MD5: + (void)fprintf(ofile, "%s (\"%s\") = %s\n", hf->name, what, digest); + break; + case STYLE_CKSUM: + (void)fprintf(ofile, "%s %s\n", digest, what); + break; + case STYLE_TERSE: + (void)fprintf(ofile, "%s\n", digest); + break; + } +} +#endif /* !defined(SHA2_ONLY) */ + +int +digest_file(const char *file, struct hash_list *hl, int echo) +{ + struct hash_function *hf; + FILE *fp; + size_t nread; + u_char data[32 * 1024]; + char digest[MAX_DIGEST_LEN + 1]; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + warn("cannot open %s", file); + return(1); + } + + TAILQ_FOREACH(hf, hl, tailq) { + if ((hf->ctx = malloc(sizeof(union ANY_CTX))) == NULL) + err(1, NULL); + hf->init(hf->ctx); + } + while ((nread = fread(data, 1UL, sizeof(data), fp)) != 0) { + if (echo) { + (void)fwrite(data, nread, 1UL, stdout); + if (fflush(stdout) != 0) + err(1, "stdout: write error"); + } + TAILQ_FOREACH(hf, hl, tailq) + hf->update(hf->ctx, data, nread); + } + if (ferror(fp)) { + warn("%s: read error", file); + if (fp != stdin) + fclose(fp); + TAILQ_FOREACH(hf, hl, tailq) { + free(hf->ctx); + hf->ctx = NULL; + } + return(1); + } + if (fp != stdin) + fclose(fp); + TAILQ_FOREACH(hf, hl, tailq) { + digest_end(hf, hf->ctx, digest, sizeof(digest), hf->base64); + free(hf->ctx); + hf->ctx = NULL; + if (fp == stdin) + fprintf(ofile, "%s\n", digest); + else + digest_print(hf, file, digest); + } + return(0); +} + +#if !defined(SHA2_ONLY) +/* + * Parse through the input file looking for valid lines. + * If one is found, use this checksum and file as a reference and + * generate a new checksum against the file on the filesystem. + * Print out the result of each comparison. + */ +int +digest_filelist(const char *file, struct hash_function *defhash, int selcount, + char **sel) +{ + int found, base64, error, cmp, i; + size_t algorithm_max, algorithm_min; + const char *algorithm; + char *filename, *checksum, *line, *p, *tmpline; + char digest[MAX_DIGEST_LEN + 1]; + ssize_t linelen; + FILE *listfp, *fp; + size_t len, linesize, nread; + int *sel_found = NULL; + u_char data[32 * 1024]; + union ANY_CTX context; + struct hash_function *hf; + + if (strcmp(file, "-") == 0) { + listfp = stdin; + } else if ((listfp = fopen(file, "r")) == NULL) { + warn("cannot open %s", file); + return(1); + } + + if (sel != NULL) { + sel_found = calloc((size_t)selcount, sizeof(*sel_found)); + if (sel_found == NULL) + err(1, NULL); + } + + algorithm_max = algorithm_min = strlen(functions[0].name); + for (hf = &functions[1]; hf->name != NULL; hf++) { + len = strlen(hf->name); + algorithm_max = MAXIMUM(algorithm_max, len); + algorithm_min = MINIMUM(algorithm_min, len); + } + + error = found = 0; + line = NULL; + linesize = 0; + while ((linelen = getline(&line, &linesize, listfp)) != -1) { + tmpline = line; + base64 = 0; + if (line[linelen - 1] == '\n') + line[linelen - 1] = '\0'; + while (isspace((unsigned char)*tmpline)) + tmpline++; + + /* + * Crack the line into an algorithm, filename, and checksum. + * Lines are of the form: + * ALGORITHM (FILENAME) = CHECKSUM + * + * Fallback on GNU form: + * CHECKSUM FILENAME + */ + p = strchr(tmpline, ' '); + if (p != NULL && *(p + 1) == '(') { + /* BSD form */ + *p = '\0'; + algorithm = tmpline; + len = strlen(algorithm); + if (len > algorithm_max || len < algorithm_min) + continue; + + filename = p + 2; + p = strrchr(filename, ')'); + if (p == NULL || strncmp(p + 1, " = ", (size_t)3) != 0) + continue; + *p = '\0'; + + checksum = p + 4; + p = strpbrk(checksum, " \t\r"); + if (p != NULL) + *p = '\0'; + + /* + * Check that the algorithm is one we recognize. + */ + for (hf = functions; hf->name != NULL; hf++) { + if (strcasecmp(algorithm, hf->name) == 0) + break; + } + if (hf->name == NULL || *checksum == '\0') + continue; + /* + * Check the length to see if this could be + * a valid checksum. If hex, it will be 2x the + * size of the binary data. For base64, we have + * to check both with and without the '=' padding. + */ + len = strlen(checksum); + if (len != hf->digestlen * 2) { + size_t len2; + + if (checksum[len - 1] == '=') { + /* use padding */ + len2 = 4 * ((hf->digestlen + 2) / 3); + } else { + /* no padding */ + len2 = (4 * hf->digestlen + 2) / 3; + } + if (len != len2) + continue; + base64 = 1; + } + } else { + /* could be GNU form */ + if ((hf = defhash) == NULL) + continue; + algorithm = hf->name; + checksum = tmpline; + if ((p = strchr(checksum, ' ')) == NULL) + continue; + if (hf->style == STYLE_CKSUM) { + if ((p = strchr(p + 1, ' ')) == NULL) + continue; + } + *p++ = '\0'; + while (isspace((unsigned char)*p)) + p++; + if (*p == '\0') + continue; + filename = p; + p = strpbrk(filename, "\t\r"); + if (p != NULL) + *p = '\0'; + } + found = 1; + + /* + * If only a selection of files is wanted, proceed only + * if the filename matches one of those in the selection. + */ + if (sel != NULL) { + for (i = 0; i < selcount; i++) { + if (strcmp(sel[i], filename) == 0) { + sel_found[i] = 1; + break; + } + } + if (i == selcount) + continue; + } + + if ((fp = fopen(filename, "r")) == NULL) { + warn("cannot open %s", filename); + (void)printf("(%s) %s: %s\n", algorithm, filename, + (errno == ENOENT ? "MISSING" : "FAILED")); + error = 1; + continue; + } + + hf->init(&context); + while ((nread = fread(data, 1UL, sizeof(data), fp)) > 0) + hf->update(&context, data, nread); + if (ferror(fp)) { + warn("%s: read error", file); + error = 1; + fclose(fp); + continue; + } + fclose(fp); + digest_end(hf, &context, digest, sizeof(digest), base64); + + if (base64) + cmp = strncmp(checksum, digest, len); + else + cmp = strcasecmp(checksum, digest); + if (cmp == 0) { + if (qflag == 0) + (void)printf("(%s) %s: OK\n", algorithm, + filename); + } else { + (void)printf("(%s) %s: FAILED\n", algorithm, filename); + error = 1; + } + } + free(line); + if (ferror(listfp)) { + warn("%s: getline", file); + error = 1; + } + if (listfp != stdin) + fclose(listfp); + if (!found) + warnx("%s: no properly formatted checksum lines found", file); + if (sel_found != NULL) { + /* + * Mark found files by setting them to NULL so that we can + * detect files that are missing from the checklist later. + */ + for (i = 0; i < selcount; i++) { + if (sel_found[i]) + sel[i] = NULL; + } + free(sel_found); + } + return(error || !found); +} + +#define TEST_BLOCK_LEN 10000 +#define TEST_BLOCK_COUNT 10000 + +void +digest_time(struct hash_list *hl, int times) +{ + struct hash_function *hf; + struct rusage start, stop; + struct timeval res; + union ANY_CTX context; + u_int i; + u_char data[TEST_BLOCK_LEN]; + char digest[MAX_DIGEST_LEN + 1]; + double elapsed; + int count = TEST_BLOCK_COUNT; + while (--times > 0 && count < INT_MAX / 10) + count *= 10; + + TAILQ_FOREACH(hf, hl, tailq) { + (void)printf("%s time trial. Processing %d %d-byte blocks...", + hf->name, count, TEST_BLOCK_LEN); + fflush(stdout); + + /* Initialize data based on block number. */ + for (i = 0; i < TEST_BLOCK_LEN; i++) + data[i] = (u_char)(i & 0xff); + + getrusage(RUSAGE_SELF, &start); + hf->init(&context); + for (i = 0; i < count; i++) + hf->update(&context, data, (size_t)TEST_BLOCK_LEN); + digest_end(hf, &context, digest, sizeof(digest), hf->base64); + getrusage(RUSAGE_SELF, &stop); + timersub(&stop.ru_utime, &start.ru_utime, &res); + elapsed = res.tv_sec + res.tv_usec / 1000000.0; + + (void)printf("\nDigest = %s\n", digest); + (void)printf("Time = %f seconds\n", elapsed); + (void)printf("Speed = %f bytes/second\n", + (double)TEST_BLOCK_LEN * count / elapsed); + } +} + +void +digest_test(struct hash_list *hl) +{ + struct hash_function *hf; + union ANY_CTX context; + int i; + char digest[MAX_DIGEST_LEN + 1]; + unsigned char buf[1000]; + unsigned const char *test_strings[] = { + "", + "a", + "abc", + "message digest", + "abcdefghijklmnopqrstuvwxyz", + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789", + "12345678901234567890123456789012345678901234567890123456789" + "012345678901234567890", + }; + + TAILQ_FOREACH(hf, hl, tailq) { + (void)printf("%s test suite:\n", hf->name); + + for (i = 0; i < 8; i++) { + hf->init(&context); + hf->update(&context, test_strings[i], + strlen(test_strings[i])); + digest_end(hf, &context, digest, sizeof(digest), + hf->base64); + digest_printstr(hf, test_strings[i], digest); + } + + /* Now simulate a string of a million 'a' characters. */ + memset(buf, 'a', sizeof(buf)); + hf->init(&context); + for (i = 0; i < 1000; i++) + hf->update(&context, buf, sizeof(buf)); + digest_end(hf, &context, digest, sizeof(digest), hf->base64); + digest_print(hf, "one million 'a' characters", + digest); + } +} +#endif /* !defined(SHA2_ONLY) */ + +void +usage(void) +{ +#if !defined(SHA2_ONLY) + if (strcmp(__progname, "cksum") == 0) + fprintf(stderr, "usage: %s [-bcpqrtx] [-a algorithms] [-C checklist] " + "[-h hashfile]\n" + " [-s string] [file ...]\n", + __progname); + else +#endif /* !defined(SHA2_ONLY) */ + fprintf(stderr, "usage:" + "\t%s [-bcpqrtx] [-C checklist] [-h hashfile] [-s string] " + "[file ...]\n", + __progname); + + exit(EXIT_FAILURE); +} diff --git a/bin/pax/CVS/Entries b/bin/pax/CVS/Entries new file mode 100644 index 0000000..0ba2835 --- /dev/null +++ b/bin/pax/CVS/Entries @@ -0,0 +1,24 @@ +/Makefile/1.13/Thu Sep 13 12:33:43 2018// +/ar_io.c/1.63/Fri Jun 28 13:34:59 2019// +/ar_subs.c/1.49/Fri Jun 28 13:34:59 2019// +/buf_subs.c/1.31/Fri Jun 28 13:34:59 2019// +/cpio.1/1.36/Thu Jan 16 16:46:46 2020// +/cpio.c/1.33/Sat Sep 16 07:42:34 2017// +/cpio.h/1.4/Mon Jun 2 23:32:08 2003// +/extern.h/1.60/Mon Mar 23 20:04:19 2020// +/file_subs.c/1.55/Mon Mar 23 20:04:19 2020// +/ftree.c/1.42/Fri Jun 28 13:34:59 2019// +/gen_subs.c/1.32/Fri Aug 26 05:06:14 2016// +/getoldopt.c/1.9/Tue Oct 27 23:59:22 2009// +/options.c/1.103/Fri Nov 15 20:34:17 2019// +/pat_rep.c/1.43/Sat Sep 16 07:42:34 2017// +/pax.1/1.75/Thu Jan 16 16:46:46 2020// +/pax.c/1.53/Fri Jun 28 13:34:59 2019// +/pax.h/1.29/Tue Sep 12 17:11:11 2017// +/sel_subs.c/1.28/Mon Jun 24 03:33:09 2019// +/tables.c/1.54/Fri Jun 28 05:35:34 2019// +/tar.1/1.62/Thu Jan 16 16:46:46 2020// +/tar.c/1.68/Mon Jun 24 03:33:09 2019// +/tar.h/1.9/Wed Jan 8 06:43:34 2014// +/tty_subs.c/1.17/Fri Aug 26 04:22:13 2016// +D diff --git a/bin/pax/CVS/Repository b/bin/pax/CVS/Repository new file mode 100644 index 0000000..19b1a65 --- /dev/null +++ b/bin/pax/CVS/Repository @@ -0,0 +1 @@ +src/bin/pax diff --git a/bin/pax/CVS/Root b/bin/pax/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/bin/pax/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/bin/pax/Makefile b/bin/pax/Makefile new file mode 100644 index 0000000..5dd36e2 --- /dev/null +++ b/bin/pax/Makefile @@ -0,0 +1,11 @@ +# $OpenBSD: Makefile,v 1.13 2018/09/13 12:33:43 millert Exp $ + +WARNINGS=Yes +PROG= pax +SRCS= ar_io.c ar_subs.c buf_subs.c cpio.c file_subs.c ftree.c\ + gen_subs.c getoldopt.c options.c pat_rep.c pax.c sel_subs.c tables.c\ + tar.c tty_subs.c +MAN= pax.1 tar.1 cpio.1 +LINKS= ${BINDIR}/pax ${BINDIR}/tar ${BINDIR}/pax ${BINDIR}/cpio + +.include <bsd.prog.mk> diff --git a/bin/pax/ar_io.c b/bin/pax/ar_io.c new file mode 100644 index 0000000..ddbd36e --- /dev/null +++ b/bin/pax/ar_io.c @@ -0,0 +1,1288 @@ +/* $OpenBSD: ar_io.c,v 1.63 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: ar_io.c,v 1.5 1996/03/26 23:54:13 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/mtio.h> +#include <sys/wait.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * Routines which deal directly with the archive I/O device/file. + */ + +#define DMOD 0666 /* default mode of created archives */ +#define EXT_MODE O_RDONLY /* open mode for list/extract */ +#define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */ +#define APP_MODE O_RDWR /* mode for append */ +#define STDO "<STDOUT>" /* pseudo name for stdout */ +#define STDN "<STDIN>" /* pseudo name for stdin */ +static int arfd = -1; /* archive file descriptor */ +static int artyp = ISREG; /* archive type: file/FIFO/tape */ +static int arvol = 1; /* archive volume number */ +static int lstrval = -1; /* return value from last i/o */ +static int io_ok; /* i/o worked on volume after resync */ +static int did_io; /* did i/o ever occur on volume? */ +static int done; /* set via tty termination */ +static struct stat arsb; /* stat of archive device at open */ +static int invld_rec; /* tape has out of spec record size */ +static int wr_trail = 1; /* trailer was rewritten in append */ +static int can_unlnk = 0; /* do we unlink null archives? */ +const char *arcname; /* printable name of archive */ +const char *gzip_program; /* name of gzip program */ +static pid_t zpid = -1; /* pid of child process */ +int force_one_volume; /* 1 if we ignore volume changes */ + +static int get_phys(void); +extern sigset_t s_mask; +static void ar_start_gzip(int, const char *, int); + +/* + * ar_open() + * Opens the next archive volume. Determines the type of the device and + * sets up block sizes as required by the archive device and the format. + * Note: we may be called with name == NULL on the first open only. + * Return: + * -1 on failure, 0 otherwise + */ + +int +ar_open(const char *name) +{ + struct mtget mb; + + if (arfd != -1) + (void)close(arfd); + arfd = -1; + can_unlnk = did_io = io_ok = invld_rec = 0; + artyp = ISREG; + flcnt = 0; + + /* + * open based on overall operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + if (name == NULL) { + arfd = STDIN_FILENO; + arcname = STDN; + } else if ((arfd = open(name, EXT_MODE, DMOD)) == -1) + syswarn(1, errno, "Failed open to read on %s", name); + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 0); + break; + case ARCHIVE: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, AR_MODE, DMOD)) == -1) + syswarn(1, errno, "Failed open to write on %s", name); + else + can_unlnk = 1; + if (arfd != -1 && gzip_program != NULL) + ar_start_gzip(arfd, gzip_program, 1); + break; + case APPND: + if (name == NULL) { + arfd = STDOUT_FILENO; + arcname = STDO; + } else if ((arfd = open(name, APP_MODE, DMOD)) == -1) + syswarn(1, errno, "Failed open to read/write on %s", + name); + break; + case COPY: + /* + * arfd not used in COPY mode + */ + arcname = "<NONE>"; + lstrval = 1; + return(0); + } + if (arfd < 0) + return(-1); + + if (chdname != NULL) + if (chdir(chdname) == -1) { + syswarn(1, errno, "Failed chdir to %s", chdname); + return(-1); + } + /* + * set up is based on device type + */ + if (fstat(arfd, &arsb) == -1) { + syswarn(1, errno, "Failed stat on %s", arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + if (S_ISDIR(arsb.st_mode)) { + paxwarn(0, "Cannot write an archive on top of a directory %s", + arcname); + (void)close(arfd); + arfd = -1; + can_unlnk = 0; + return(-1); + } + + if (S_ISCHR(arsb.st_mode)) + artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE; + else if (S_ISBLK(arsb.st_mode)) + artyp = ISBLK; + else if ((lseek(arfd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)) + artyp = ISPIPE; + else + artyp = ISREG; + + /* + * make sure beyond any doubt that we can unlink only regular files + * we created + */ + if (artyp != ISREG) + can_unlnk = 0; + /* + * if we are writing, we are done + */ + if (act == ARCHIVE) { + blksz = rdblksz = wrblksz; + lstrval = 1; + return(0); + } + + /* + * set default blksz on read. APPNDs writes rdblksz on the last volume + * On all new archive volumes, we shift to wrblksz (if the user + * specified one, otherwise we will continue to use rdblksz). We + * must set blocksize based on what kind of device the archive is + * stored. + */ + switch (artyp) { + case ISTAPE: + /* + * Tape drives come in at least two flavors. Those that support + * variable sized records and those that have fixed sized + * records. They must be treated differently. For tape drives + * that support variable sized records, we must make large + * reads to make sure we get the entire record, otherwise we + * will just get the first part of the record (up to size we + * asked). Tapes with fixed sized records may or may not return + * multiple records in a single read. We really do not care + * what the physical record size is UNLESS we are going to + * append. (We will need the physical block size to rewrite + * the trailer). Only when we are appending do we go to the + * effort to figure out the true PHYSICAL record size. + */ + blksz = rdblksz = MAXBLK; + break; + case ISPIPE: + case ISBLK: + case ISCHR: + /* + * Blocksize is not a major issue with these devices (but must + * be kept a multiple of 512). If the user specified a write + * block size, we use that to read. Under append, we must + * always keep blksz == rdblksz. Otherwise we go ahead and use + * the device optimal blocksize as (and if) returned by stat + * and if it is within pax specs. + */ + if ((act == APPND) && wrblksz) { + blksz = rdblksz = wrblksz; + break; + } + + if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) && + ((arsb.st_blksize % BLKMULT) == 0)) + rdblksz = arsb.st_blksize; + else + rdblksz = DEVBLK; + /* + * For performance go for large reads when we can without harm + */ + if ((act == APPND) || (artyp == ISCHR)) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + case ISREG: + /* + * if the user specified wrblksz works, use it. Under appends + * we must always keep blksz == rdblksz + */ + if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){ + blksz = rdblksz = wrblksz; + break; + } + /* + * See if we can find the blocking factor from the file size + */ + for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT) + if ((arsb.st_size % rdblksz) == 0) + break; + /* + * When we cannot find a match, we may have a flawed archive. + */ + if (rdblksz <= 0) + rdblksz = FILEBLK; + /* + * for performance go for large reads when we can + */ + if (act == APPND) + blksz = rdblksz; + else + blksz = MAXBLK; + break; + default: + /* + * should never happen, worst case, slow... + */ + blksz = rdblksz = BLKMULT; + break; + } + lstrval = 1; + return(0); +} + +/* + * ar_close(int int_sig) + * closes archive device, increments volume number, and prints i/o summary + * If in_sig is set we're in a signal handler and can't flush stdio. + */ +void +ar_close(int in_sig) +{ + int status; + + if (arfd < 0) { + did_io = io_ok = flcnt = 0; + return; + } + if (!in_sig) + fflush(listf); + + /* + * Close archive file. This may take a LONG while on tapes (we may be + * forced to wait for the rewind to complete) so tell the user what is + * going on (this avoids the user hitting control-c thinking pax is + * broken). + */ + if (vflag && (artyp == ISTAPE)) { + (void)dprintf(listfd, + "%s%s: Waiting for tape drive close to complete...", + vfpart ? "\n" : "", argv0); + } + + /* + * if nothing was written to the archive (and we created it), we remove + * it + */ + if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) && + (arsb.st_size == 0)) { + (void)unlink(arcname); + can_unlnk = 0; + } + + /* + * for a quick extract/list, pax frequently exits before the child + * process is done + */ + if ((act == LIST || act == EXTRACT) && nflag && zpid > 0) { + kill(zpid, SIGINT); + zpid = -1; + } + + (void)close(arfd); + + /* Do not exit before child to ensure data integrity */ + if (zpid > 0) { + waitpid(zpid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + exit_val = 1; + } + + + if (vflag && (artyp == ISTAPE)) { + (void)write(listfd, "done.\n", sizeof("done.\n")-1); + vfpart = 0; + } + arfd = -1; + + if (!io_ok && !did_io) { + flcnt = 0; + return; + } + did_io = io_ok = 0; + + /* + * The volume number is only increased when the last device has data + * and we have already determined the archive format. + */ + if (frmt != NULL) + ++arvol; + + if (!vflag) { + flcnt = 0; + return; + } + + /* + * Print out a summary of I/O for this archive volume. + */ + if (vfpart) { + (void)write(listfd, "\n", 1); + vfpart = 0; + } + + /* + * If we have not determined the format yet, we just say how many bytes + * we have skipped over looking for a header to id. there is no way we + * could have written anything yet. + */ + if (frmt == NULL) { + (void)dprintf(listfd, + "%s: unknown format, %llu bytes skipped.\n", argv0, rdcnt); + flcnt = 0; + return; + } + + if (op_mode == OP_PAX) + (void)dprintf(listfd, "%s: %s vol %d, %lu files," + " %llu bytes read, %llu bytes written.\n", + argv0, frmt->name, arvol-1, flcnt, rdcnt, wrcnt); +#ifndef NOCPIO + else if (op_mode == OP_CPIO) + (void)dprintf(listfd, "%llu blocks\n", + (rdcnt ? rdcnt : wrcnt) / 5120); +#endif /* !NOCPIO */ + flcnt = 0; +} + +/* + * ar_drain() + * drain any archive format independent padding from an archive read + * from a socket or a pipe. This is to prevent the process on the + * other side of the pipe from getting a SIGPIPE (pax will stop + * reading an archive once a format dependent trailer is detected). + */ +void +ar_drain(void) +{ + int res; + char drbuf[MAXBLK]; + + /* + * we only drain from a pipe/socket. Other devices can be closed + * without reading up to end of file. We sure hope that pipe is closed + * on the other side so we will get an EOF. + */ + if ((artyp != ISPIPE) || (lstrval <= 0)) + return; + + /* + * keep reading until pipe is drained + */ + while ((res = read(arfd, drbuf, sizeof(drbuf))) > 0) + continue; + lstrval = res; +} + +/* + * ar_set_wr() + * Set up device right before switching from read to write in an append. + * device dependent code (if required) to do this should be added here. + * For all archive devices we are already positioned at the place we want + * to start writing when this routine is called. + * Return: + * 0 if all ready to write, -1 otherwise + */ + +int +ar_set_wr(void) +{ + off_t cpos; + + /* + * we must make sure the trailer is rewritten on append, ar_next() + * will stop us if the archive containing the trailer was not written + */ + wr_trail = 0; + + /* + * Add any device dependent code as required here + */ + if (artyp != ISREG) + return(0); + /* + * Ok we have an archive in a regular file. If we were rewriting a + * file, we must get rid of all the stuff after the current offset + * (it was not written by pax). + */ + if (((cpos = lseek(arfd, 0, SEEK_CUR)) < 0) || + (ftruncate(arfd, cpos) == -1)) { + syswarn(1, errno, "Unable to truncate archive file"); + return(-1); + } + return(0); +} + +/* + * ar_app_ok() + * check if the last volume in the archive allows appends. We cannot check + * this until we are ready to write since there is no spec that says all + * volumes in a single archive have to be of the same type... + * Return: + * 0 if we can append, -1 otherwise. + */ + +int +ar_app_ok(void) +{ + if (artyp == ISPIPE) { + paxwarn(1, "Cannot append to an archive obtained from a pipe."); + return(-1); + } + + if (!invld_rec) + return(0); + paxwarn(1,"Cannot append, device record size %d does not support %s spec", + rdblksz, argv0); + return(-1); +} + +/* + * ar_read() + * read up to a specified number of bytes from the archive into the + * supplied buffer. When dealing with tapes we may not always be able to + * read what we want. + * Return: + * Number of bytes in buffer. 0 for end of file, -1 for a read error. + */ + +int +ar_read(char *buf, int cnt) +{ + int res = 0; + + /* + * if last i/o was in error, no more reads until reset or new volume + */ + if (lstrval <= 0) + return(lstrval); + + /* + * how we read must be based on device type + */ + switch (artyp) { + case ISTAPE: + if ((res = read(arfd, buf, cnt)) > 0) { + /* + * CAUTION: tape systems may not always return the same + * sized records so we leave blksz == MAXBLK. The + * physical record size that a tape drive supports is + * very hard to determine in a uniform and portable + * manner. + */ + io_ok = 1; + if (res != rdblksz) { + /* + * Record size changed. If this happens on + * any record after the first, we probably have + * a tape drive which has a fixed record size + * (we are getting multiple records in a single + * read). Watch out for record blocking that + * violates pax spec (must be a multiple of + * BLKMULT). + */ + rdblksz = res; + if (rdblksz % BLKMULT) + invld_rec = 1; + } + return(res); + } + break; + case ISREG: + case ISBLK: + case ISCHR: + case ISPIPE: + default: + /* + * Files are so easy to deal with. These other things cannot + * be trusted at all. So when we are dealing with character + * devices and pipes we just take what they have ready for us + * and return. Trying to do anything else with them runs the + * risk of failure. + */ + if ((res = read(arfd, buf, cnt)) > 0) { + io_ok = 1; + return(res); + } + break; + } + + /* + * We are in trouble at this point, something is broken... + */ + lstrval = res; + if (res < 0) + syswarn(1, errno, "Failed read on archive volume %d", arvol); + else + paxwarn(0, "End of archive volume %d reached", arvol); + return(res); +} + +/* + * ar_write() + * Write a specified number of bytes in supplied buffer to the archive + * device so it appears as a single "block". Deals with errors and tries + * to recover when faced with short writes. + * Return: + * Number of bytes written. 0 indicates end of volume reached and with no + * flaws (as best that can be detected). A -1 indicates an unrecoverable + * error in the archive occurred. + */ + +int +ar_write(char *buf, int bsz) +{ + ssize_t res; + off_t cpos; + + /* + * do not allow pax to create a "bad" archive. Once a write fails on + * an archive volume prevent further writes to it. + */ + if (lstrval <= 0) + return(lstrval); + + if ((res = write(arfd, buf, bsz)) == bsz) { + wr_trail = 1; + io_ok = 1; + return(bsz); + } + /* + * write broke, see what we can do with it. We try to send any partial + * writes that may violate pax spec to the next archive volume. + */ + if (res == -1) + lstrval = res; + else + lstrval = 0; + + switch (artyp) { + case ISREG: + if ((res > 0) && (res % BLKMULT)) { + /* + * try to fix up partial writes which are not BLKMULT + * in size by forcing the runt record to next archive + * volume + */ + if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) + break; + cpos -= res; + if (ftruncate(arfd, cpos) == -1) + break; + res = lstrval = 0; + break; + } + if (res >= 0) + break; + /* + * if file is out of space, handle it like a return of 0 + */ + if ((errno == ENOSPC) || (errno == EFBIG) || (errno == EDQUOT)) + res = lstrval = 0; + break; + case ISTAPE: + case ISCHR: + case ISBLK: + if (res >= 0) + break; + if (errno == EACCES) { + paxwarn(0, "Write failed, archive is write protected."); + res = lstrval = 0; + return(0); + } + /* + * see if we reached the end of media, if so force a change to + * the next volume + */ + if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO)) + res = lstrval = 0; + break; + case ISPIPE: + default: + /* + * we cannot fix errors to these devices + */ + break; + } + + /* + * Better tell the user the bad news... + * if this is a block aligned archive format, we may have a bad archive + * if the format wants the header to start at a BLKMULT boundary. While + * we can deal with the mis-aligned data, it violates spec and other + * archive readers will likely fail. if the format is not block + * aligned, the user may be lucky (and the archive is ok). + */ + if (res >= 0) { + if (res > 0) + wr_trail = 1; + io_ok = 1; + } + + /* + * If we were trying to rewrite the trailer and it didn't work, we + * must quit right away. + */ + if (!wr_trail && (res <= 0)) { + paxwarn(1,"Unable to append, trailer re-write failed. Quitting."); + return(res); + } + + if (res == 0) + paxwarn(0, "End of archive volume %d reached", arvol); + else if (res < 0) + syswarn(1, errno, "Failed write to archive volume: %d", arvol); + else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0)) + paxwarn(0,"WARNING: partial archive write. Archive MAY BE FLAWED"); + else + paxwarn(1,"WARNING: partial archive write. Archive IS FLAWED"); + return(res); +} + +/* + * ar_rdsync() + * Try to move past a bad spot on a flawed archive as needed to continue + * I/O. Clears error flags to allow I/O to continue. + * Return: + * 0 when ok to try i/o again, -1 otherwise. + */ + +int +ar_rdsync(void) +{ + long fsbz; + off_t cpos; + off_t mpos; + struct mtop mb; + + /* + * Fail resync attempts at user request (done) or if this is going to be + * an update/append to a existing archive. if last i/o hit media end, + * we need to go to the next volume not try a resync + */ + if ((done > 0) || (lstrval == 0)) + return(-1); + + if ((act == APPND) || (act == ARCHIVE)) { + paxwarn(1, "Cannot allow updates to an archive with flaws."); + return(-1); + } + if (io_ok) + did_io = 1; + + switch (artyp) { + case ISTAPE: + /* + * if the last i/o was a successful data transfer, we assume + * the fault is just a bad record on the tape that we are now + * past. If we did not get any data since the last resync try + * to move the tape forward one PHYSICAL record past any + * damaged tape section. Some tape drives are stubborn and need + * to be pushed. + */ + if (io_ok) { + io_ok = 0; + lstrval = 1; + break; + } + mb.mt_op = MTFSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) + break; + lstrval = 1; + break; + case ISREG: + case ISCHR: + case ISBLK: + /* + * try to step over the bad part of the device. + */ + io_ok = 0; + if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG)) + fsbz = BLKMULT; + if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) + break; + mpos = fsbz - (cpos % fsbz); + if (lseek(arfd, mpos, SEEK_CUR) == -1) + break; + lstrval = 1; + break; + case ISPIPE: + default: + /* + * cannot recover on these archive device types + */ + io_ok = 0; + break; + } + if (lstrval <= 0) { + paxwarn(1, "Unable to recover from an archive read failure."); + return(-1); + } + paxwarn(0, "Attempting to recover from an archive read failure."); + return(0); +} + +/* + * ar_fow() + * Move the I/O position within the archive forward the specified number of + * bytes as supported by the device. If we cannot move the requested + * number of bytes, return the actual number of bytes moved in skipped. + * Return: + * 0 if moved the requested distance, -1 on complete failure, 1 on + * partial move (the amount moved is in skipped) + */ + +int +ar_fow(off_t sksz, off_t *skipped) +{ + off_t cpos; + off_t mpos; + + *skipped = 0; + if (sksz <= 0) + return(0); + + /* + * we cannot move forward at EOF or error + */ + if (lstrval <= 0) + return(lstrval); + + /* + * Safer to read forward on devices where it is hard to find the end of + * the media without reading to it. With tapes we cannot be sure of the + * number of physical blocks to skip (we do not know physical block + * size at this point), so we must only read forward on tapes! + */ + if (artyp != ISREG) + return(0); + + /* + * figure out where we are in the archive + */ + if ((cpos = lseek(arfd, 0, SEEK_CUR)) >= 0) { + /* + * we can be asked to move farther than there are bytes in this + * volume, if so, just go to file end and let normal buf_fill() + * deal with the end of file (it will go to next volume by + * itself) + */ + if ((mpos = cpos + sksz) > arsb.st_size) { + *skipped = arsb.st_size - cpos; + mpos = arsb.st_size; + } else + *skipped = sksz; + if (lseek(arfd, mpos, SEEK_SET) >= 0) + return(0); + } + syswarn(1, errno, "Forward positioning operation on archive failed"); + lstrval = -1; + return(-1); +} + +/* + * ar_rev() + * move the i/o position within the archive backwards the specified byte + * count as supported by the device. With tapes drives we RESET rdblksz to + * the PHYSICAL blocksize. + * NOTE: We should only be called to move backwards so we can rewrite the + * last records (the trailer) of an archive (APPEND). + * Return: + * 0 if moved the requested distance, -1 on complete failure + */ + +int +ar_rev(off_t sksz) +{ + off_t cpos; + struct mtop mb; + int phyblk; + + /* + * make sure we do not have try to reverse on a flawed archive + */ + if (lstrval < 0) + return(lstrval); + + switch (artyp) { + case ISPIPE: + if (sksz <= 0) + break; + /* + * cannot go backwards on these critters + */ + paxwarn(1, "Reverse positioning on pipes is not supported."); + lstrval = -1; + return(-1); + case ISREG: + case ISBLK: + case ISCHR: + default: + if (sksz <= 0) + break; + + /* + * For things other than files, backwards movement has a very + * high probability of failure as we really do not know the + * true attributes of the device we are talking to (the device + * may not even have the ability to lseek() in any direction). + * First we figure out where we are in the archive. + */ + if ((cpos = lseek(arfd, 0, SEEK_CUR)) == -1) { + syswarn(1, errno, + "Unable to obtain current archive byte offset"); + lstrval = -1; + return(-1); + } + + /* + * we may try to go backwards past the start when the archive + * is only a single record. If this happens and we are on a + * multi-volume archive, we need to go to the end of the + * previous volume and continue our movement backwards from + * there. + */ + if ((cpos -= sksz) < 0) { + if (arvol > 1) { + /* + * this should never happen + */ + paxwarn(1,"Reverse position on previous volume."); + lstrval = -1; + return(-1); + } + cpos = 0; + } + if (lseek(arfd, cpos, SEEK_SET) == -1) { + syswarn(1, errno, "Unable to seek archive backwards"); + lstrval = -1; + return(-1); + } + break; + case ISTAPE: + /* + * Calculate and move the proper number of PHYSICAL tape + * blocks. If the sksz is not an even multiple of the physical + * tape size, we cannot do the move (this should never happen). + * (We also cannot handle trailers spread over two vols.) + * get_phys() also makes sure we are in front of the filemark. + */ + if ((phyblk = get_phys()) <= 0) { + lstrval = -1; + return(-1); + } + + /* + * make sure future tape reads only go by physical tape block + * size (set rdblksz to the real size). + */ + rdblksz = phyblk; + + /* + * if no movement is required, just return (we must be after + * get_phys() so the physical blocksize is properly set) + */ + if (sksz <= 0) + break; + + /* + * ok we have to move. Make sure the tape drive can do it. + */ + if (sksz % phyblk) { + paxwarn(1, + "Tape drive unable to backspace requested amount"); + lstrval = -1; + return(-1); + } + + /* + * move backwards the requested number of bytes + */ + mb.mt_op = MTBSR; + mb.mt_count = sksz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1,errno, "Unable to backspace tape %d blocks.", + mb.mt_count); + lstrval = -1; + return(-1); + } + break; + } + lstrval = 1; + return(0); +} + +/* + * get_phys() + * Determine the physical block size on a tape drive. We need the physical + * block size so we know how many bytes we skip over when we move with + * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when + * return. + * This is one really SLOW routine... + * Return: + * physical block size if ok (ok > 0), -1 otherwise + */ + +static int +get_phys(void) +{ + int padsz = 0; + int res; + int phyblk; + struct mtop mb; + char scbuf[MAXBLK]; + + /* + * move to the file mark, and then back up one record and read it. + * this should tell us the physical record size the tape is using. + */ + if (lstrval == 1) { + /* + * we know we are at file mark when we get back a 0 from + * read() + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + padsz += res; + if (res == -1) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + } + + /* + * move backwards over the file mark so we are at the end of the + * last record. + */ + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * move backwards so we are in front of the last record and read it to + * get physical tape blocksize. + */ + mb.mt_op = MTBSR; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1, errno, "Unable to backspace over last tape block."); + return(-1); + } + if ((phyblk = read(arfd, scbuf, sizeof(scbuf))) <= 0) { + syswarn(1, errno, "Cannot determine archive tape blocksize."); + return(-1); + } + + /* + * read forward to the file mark, then back up in front of the filemark + * (this is a bit paranoid, but should be safe to do). + */ + while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) + continue; + if (res == -1) { + syswarn(1, errno, "Unable to locate tape filemark."); + return(-1); + } + mb.mt_op = MTBSF; + mb.mt_count = 1; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1, errno, "Unable to backspace over tape filemark."); + return(-1); + } + + /* + * set lstrval so we know that the filemark has not been seen + */ + lstrval = 1; + + /* + * return if there was no padding + */ + if (padsz == 0) + return(phyblk); + + /* + * make sure we can move backwards over the padding. (this should + * never fail). + */ + if (padsz % phyblk) { + paxwarn(1, "Tape drive unable to backspace requested amount"); + return(-1); + } + + /* + * move backwards over the padding so the head is where it was when + * we were first called (if required). + */ + mb.mt_op = MTBSR; + mb.mt_count = padsz/phyblk; + if (ioctl(arfd, MTIOCTOP, &mb) == -1) { + syswarn(1,errno,"Unable to backspace tape over %d pad blocks", + mb.mt_count); + return(-1); + } + return(phyblk); +} + +/* + * ar_next() + * prompts the user for the next volume in this archive. For some devices + * we may allow the media to be changed. Otherwise a new archive is + * prompted for. By pax spec, if there is no controlling tty or an eof is + * read on tty input, we must quit pax. + * Return: + * 0 when ready to continue, -1 when all done + */ + +int +ar_next(void) +{ + char buf[PAXPATHLEN+2]; + static int freeit = 0; + sigset_t o_mask; + + /* + * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so + * things like writing EOF etc will be done) (Watch out ar_close() can + * also be called via a signal handler, so we must prevent a race. + */ + if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) == -1) + syswarn(0, errno, "Unable to set signal mask"); + ar_close(0); + if (sigprocmask(SIG_SETMASK, &o_mask, NULL) == -1) + syswarn(0, errno, "Unable to restore signal mask"); + + if (done || !wr_trail || force_one_volume || op_mode == OP_TAR) + return(-1); + + tty_prnt("\nATTENTION! %s archive volume change required.\n", argv0); + + /* + * if i/o is on stdin or stdout, we cannot reopen it (we do not know + * the name), the user will be forced to type it in. + */ + if (strcmp(arcname, STDO) && strcmp(arcname, STDN) && (artyp != ISREG) + && (artyp != ISPIPE)) { + if (artyp == ISTAPE) { + tty_prnt("%s ready for archive tape volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT TAPE on the tape drive"); + } else { + tty_prnt("%s ready for archive volume: %d\n", + arcname, arvol); + tty_prnt("Load the NEXT STORAGE MEDIA (if required)"); + } + + if ((act == ARCHIVE) || (act == APPND)) + tty_prnt(" and make sure it is WRITE ENABLED.\n"); + else + tty_prnt("\n"); + + for (;;) { + tty_prnt("Type \"y\" to continue, \".\" to quit %s,", + argv0); + tty_prnt(" or \"s\" to switch to new device.\nIf you"); + tty_prnt(" cannot change storage media, type \"s\"\n"); + tty_prnt("Is the device ready and online? > "); + + if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){ + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + + if ((buf[0] == '\0') || (buf[1] != '\0')) { + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + + switch (buf[0]) { + case 'y': + case 'Y': + /* + * we are to continue with the same device + */ + if (ar_open(arcname) >= 0) + return(0); + tty_prnt("Cannot re-open %s, try again\n", + arcname); + continue; + case 's': + case 'S': + /* + * user wants to open a different device + */ + tty_prnt("Switching to a different archive\n"); + break; + default: + tty_prnt("%s unknown command, try again\n",buf); + continue; + } + break; + } + } else + tty_prnt("Ready for archive volume: %d\n", arvol); + + /* + * have to go to a different archive + */ + for (;;) { + tty_prnt("Input archive name or \".\" to quit %s.\n", argv0); + tty_prnt("Archive name > "); + + if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) { + done = 1; + lstrval = -1; + tty_prnt("Quitting %s!\n", argv0); + vfpart = 0; + return(-1); + } + if (buf[0] == '\0') { + tty_prnt("Empty file name, try again\n"); + continue; + } + if (!strcmp(buf, "..")) { + tty_prnt("Illegal file name: .. try again\n"); + continue; + } + if (strlen(buf) > PAXPATHLEN) { + tty_prnt("File name too long, try again\n"); + continue; + } + + /* + * try to open new archive + */ + if (ar_open(buf) >= 0) { + if (freeit) { + free((char *)arcname); + freeit = 0; + } + if ((arcname = strdup(buf)) == NULL) { + done = 1; + lstrval = -1; + paxwarn(0, "Cannot save archive name."); + return(-1); + } + freeit = 1; + break; + } + tty_prnt("Cannot open %s, try again\n", buf); + continue; + } + return(0); +} + +/* + * ar_start_gzip() + * starts the gzip compression/decompression process as a child, using magic + * to keep the fd the same in the calling function (parent). + */ +void +ar_start_gzip(int fd, const char *path, int wr) +{ + int fds[2]; + const char *gzip_flags; + + if (pipe(fds) == -1) + err(1, "could not pipe"); + zpid = fork(); + if (zpid == -1) + err(1, "could not fork"); + + /* parent */ + if (zpid) { + if (wr) + dup2(fds[1], fd); + else + dup2(fds[0], fd); + close(fds[0]); + close(fds[1]); + + if (pmode == 0 || (act != EXTRACT && act != COPY)) { + if (pledge("stdio rpath wpath cpath fattr dpath getpw proc tape", + NULL) == -1) + err(1, "pledge"); + } + } else { + if (wr) { + dup2(fds[0], STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + gzip_flags = "-c"; + } else { + dup2(fds[1], STDOUT_FILENO); + dup2(fd, STDIN_FILENO); + gzip_flags = "-dc"; + } + close(fds[0]); + close(fds[1]); + + /* System compressors are more likely to use pledge(2) */ + putenv("PATH=/usr/bin:/usr/local/bin"); + + if (execlp(path, path, gzip_flags, (char *)NULL) == -1) + err(1, "could not exec %s", path); + /* NOTREACHED */ + } +} diff --git a/bin/pax/ar_subs.c b/bin/pax/ar_subs.c new file mode 100644 index 0000000..f0a55ab --- /dev/null +++ b/bin/pax/ar_subs.c @@ -0,0 +1,1277 @@ +/* $OpenBSD: ar_subs.c,v 1.49 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include "pax.h" +#include "extern.h" + +static void wr_archive(ARCHD *, int is_app); +static int get_arc(void); +static int next_head(ARCHD *); +static int rd_gnu_string(ARCHD *); +extern sigset_t s_mask; + +/* + * Routines which control the overall operation modes of pax as specified by + * the user: list, append, read ... + */ + +static char hdbuf[BLKMULT]; /* space for archive header on read */ +u_long flcnt; /* number of files processed */ + +/* + * list() + * list the contents of an archive which match user supplied pattern(s) + * (no pattern matches all). + */ + +void +list(void) +{ + ARCHD *arcn; + int res; + ARCHD archd; + time_t now; + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine. We + * also save current time for ls_list() so we do not make a system + * call for each file we need to print. If verbose (vflag) start up + * the name and group caches. + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0)) + return; + + now = time(NULL); + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + if (rd_gnu_string(arcn)) + continue; + + /* + * check for pattern, and user specified options match. + * When all patterns are matched we are done. + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res == 0) && (sel_chk(arcn) == 0)) { + /* + * pattern resulted in a selected file + */ + if (pat_sel(arcn) < 0) + break; + + /* + * modify the name as requested by the user if name + * survives modification, do a listing of the file + */ + if ((res = mod_name(arcn)) < 0) + break; + if (res == 0) + ls_list(arcn, now, stdout); + } + + /* + * skip to next archive format header using values calculated + * by the format header read routine + */ + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + } + + /* + * all done, let format have a chance to cleanup, and make sure that + * the patterns supplied by the user were all matched + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + pat_chk(); +} + +static int +cmp_file_times(int mtime_flag, int ctime_flag, ARCHD *arcn, struct stat *sbp) +{ + struct stat sb; + + if (sbp == NULL) { + if (lstat(arcn->name, &sb) != 0) + return (0); + sbp = &sb; + } + + if (ctime_flag && mtime_flag) + return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=) && + timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=)); + else if (ctime_flag) + return (timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=)); + else + return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=)); +} + +/* + * extract() + * extract the member(s) of an archive as specified by user supplied + * pattern(s) (no patterns extracts all members) + */ + +void +extract(void) +{ + ARCHD *arcn; + int res; + off_t cnt; + ARCHD archd; + int fd; + time_t now; + + sltab_start(); + + arcn = &archd; + /* + * figure out archive type; pass any format specific options to the + * archive option processing routine; call the format init routine; + * start up the directory modification time and access mode database + */ + if ((get_arc() < 0) || ((*frmt->options)() < 0) || + ((*frmt->st_rd)() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + now = time(NULL); + + /* + * step through each entry on the archive until the format read routine + * says it is done + */ + while (next_head(arcn) == 0) { + if (rd_gnu_string(arcn)) + continue; + + /* + * check for pattern, and user specified options match. When + * all the patterns are matched we are done + */ + if ((res = pat_match(arcn)) < 0) + break; + + if ((res > 0) || (sel_chk(arcn) != 0)) { + /* + * file is not selected. skip past any file data and + * padding and go back for the next archive member + */ + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * with -u or -D only extract when the archive member is newer + * than the file with the same name in the file system (no + * test of being the same type is required). + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this operation can be confusing to the + * user who might expect the test to be done on an existing + * file AFTER the name mod. In honesty the pax spec is probably + * flawed in this respect. + */ + if ((uflag || Dflag) && + cmp_file_times(uflag, Dflag, arcn, NULL)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * this archive member is now been selected. modify the name. + */ + if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if (res > 0) { + /* + * a bad name mod, skip and purge name from link table + */ + purg_lnk(arcn); + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + /* + * Non standard -Y and -Z flag. When the existing file is + * same age or newer skip + */ + if ((Yflag || Zflag) && + cmp_file_times(Yflag, Zflag, arcn, NULL)) { + (void)rd_skip(arcn->skip + arcn->pad); + continue; + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + } + + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + if (chdir(arcn->pat->chdname) != 0) + syswarn(1, errno, "Cannot chdir to %s", + arcn->pat->chdname); + + /* + * all ok, extract this member based on type + */ + if (!PAX_IS_REG(arcn->type)) { + /* + * process archive members that are not regular files. + * throw out padding and any data that might follow the + * header (as determined by the format). + */ + if (PAX_IS_HARDLINK(arcn->type)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + + (void)rd_skip(arcn->skip + arcn->pad); + if (res < 0) + purg_lnk(arcn); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + goto popd; + } + /* + * we have a file with data here. If we can not create it, skip + * over the data and purge the name from hard link table + */ + if ((fd = file_creat(arcn)) < 0) { + (void)rd_skip(arcn->skip + arcn->pad); + purg_lnk(arcn); + goto popd; + } + /* + * extract the file from the archive and skip over padding and + * any unprocessed data + */ + res = rd_wrfile(arcn, fd, &cnt); + file_close(arcn, fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (!res) + (void)rd_skip(cnt + arcn->pad); + +popd: + /* + * if required, chdir around. + */ + if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) + if (fchdir(cwdfd) != 0) + syswarn(1, errno, + "Can't fchdir to starting directory"); + } + + /* + * all done, restore directory modes and times as required; make sure + * all patterns supplied by the user were matched; block off signals + * to avoid chance for multiple entry into the cleanup code. + */ + (void)(*frmt->end_rd)(); + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + sltab_process(0); + proc_dir(0); + pat_chk(); +} + +/* + * wr_archive() + * Write an archive. used in both creating a new archive and appends on + * previously written archive. + */ + +static void +wr_archive(ARCHD *arcn, int is_app) +{ + int res; + int hlk; + int wr_one; + off_t cnt; + int (*wrf)(ARCHD *); + int fd = -1; + time_t now; + + /* + * if this format supports hard link storage, start up the database + * that detects them. + */ + if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) + return; + + /* + * if this is not append, and there are no files, we do not write a + * trailer + */ + wr_one = is_app; + + /* + * start up the file traversal code and format specific write + */ + if (ftree_start() < 0) { + if (is_app) + goto trailer; + return; + } else if (((*frmt->st_wr)() < 0)) + return; + + wrf = frmt->wr; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + now = time(NULL); + + /* + * while there are files to archive, process them one at at time + */ + while (next_file(arcn) == 0) { + /* + * check if this file meets user specified options match. + */ + if (sel_chk(arcn) != 0) + continue; + fd = -1; + if (uflag) { + /* + * only archive if this file is newer than a file with + * the same name that is already stored on the archive + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + ftree_skipped_newer(arcn); + continue; + } + } + + /* + * this file is considered selected now. see if this is a hard + * link to a file already stored + */ + ftree_sel(arcn); + if (hlk && (chk_lnk(arcn) < 0)) + break; + + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) { + /* + * we will have to read this file. by opening it now we + * can avoid writing a header to the archive for a file + * we were later unable to read (we also purge it from + * the link table). + */ + if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1,errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + } + + /* + * Now modify the name as requested by the user + */ + if ((res = mod_name(arcn)) < 0) { + /* + * name modification says to skip this file, close the + * file and purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + break; + } + + if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { + /* + * unable to obtain the crc we need, close the file, + * purge link table entry + */ + rdfile_close(arcn, &fd); + purg_lnk(arcn); + continue; + } + + if (vflag) { + if (vflag > 1) + ls_list(arcn, now, listf); + else { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + } + ++flcnt; + + /* + * looks safe to store the file, have the format specific + * routine write routine store the file header on the archive + */ + if ((res = (*wrf)(arcn)) < 0) { + rdfile_close(arcn, &fd); + break; + } + wr_one = 1; + if (res > 0) { + /* + * format write says no file data needs to be stored + * so we are done messing with this file + */ + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + rdfile_close(arcn, &fd); + continue; + } + + /* + * Add file data to the archive, quit on write error. if we + * cannot write the entire file contents to the archive we + * must pad the archive to replace the missing file data + * (otherwise during an extract the file header for the file + * which FOLLOWS this one will not be where we expect it to + * be). + */ + res = wr_rdfile(arcn, fd, &cnt); + rdfile_close(arcn, &fd); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + if (res < 0) + break; + + /* + * pad as required, cnt is number of bytes not written + */ + if (((cnt > 0) && (wr_skip(cnt) < 0)) || + ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) + break; + } + +trailer: + /* + * tell format to write trailer; pad to block boundary; reset directory + * mode/access times, and check if all patterns supplied by the user + * were matched. block off signals to avoid chance for multiple entry + * into the cleanup code + */ + if (wr_one) { + (*frmt->end_wr)(); + wr_fin(); + } + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + if (tflag) + proc_dir(0); + ftree_chk(); +} + +/* + * append() + * Add file to previously written archive. Archive format specified by the + * user must agree with archive. The archive is read first to collect + * modification times (if -u) and locate the archive trailer. The archive + * is positioned in front of the record with the trailer and wr_archive() + * is called to add the new members. + * PAX IMPLEMENTATION DETAIL NOTE: + * -u is implemented by adding the new members to the end of the archive. + * Care is taken so that these do not end up as links to the older + * version of the same file already stored in the archive. It is expected + * when extraction occurs these newer versions will over-write the older + * ones stored "earlier" in the archive (this may be a bad assumption as + * it depends on the implementation of the program doing the extraction). + * It is really difficult to splice in members without either re-writing + * the entire archive (from the point were the old version was), or having + * assistance of the format specification in terms of a special update + * header that invalidates a previous archive record. The posix spec left + * the method used to implement -u unspecified. This pax is able to + * over write existing files that it creates. + */ + +void +append(void) +{ + ARCHD *arcn; + int res; + ARCHD archd; + FSUB *orgfrmt; + int udev; + off_t tlen; + + arcn = &archd; + orgfrmt = frmt; + + /* + * Do not allow an append operation if the actual archive is of a + * different format than the user specified format. + */ + if (get_arc() < 0) + return; + if ((orgfrmt != NULL) && (orgfrmt != frmt)) { + paxwarn(1, "Cannot mix current archive format %s with %s", + frmt->name, orgfrmt->name); + return; + } + + /* + * pass the format any options and start up format + */ + if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) + return; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files we see. + */ + if (uflag && (ftime_start() < 0)) + return; + + /* + * some archive formats encode hard links by recording the device and + * file serial number (inode) but copy the file anyway (multiple times) + * to the archive. When we append, we run the risk that newly added + * files may have the same device and inode numbers as those recorded + * on the archive but during a previous run. If this happens, when the + * archive is extracted we get INCORRECT hard links. We avoid this by + * remapping the device numbers so that newly added files will never + * use the same device number as one found on the archive. remapping + * allows new members to safely have links among themselves. remapping + * also avoids problems with file inode (serial number) truncations + * when the inode number is larger than storage space in the archive + * header. See the remap routines for more details. + */ + if ((udev = frmt->udev) && (dev_start() < 0)) + return; + + /* + * reading the archive may take a long time. If verbose tell the user + */ + if (vflag) { + (void)fprintf(listf, + "%s: Reading archive to position at the end...", argv0); + vfpart = 1; + } + + /* + * step through the archive until the format says it is done + */ + while (next_head(arcn) == 0) { + /* + * check if this file meets user specified options. + */ + if (sel_chk(arcn) != 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + + if (uflag) { + /* + * see if this is the newest version of this file has + * already been seen, if so skip. + */ + if ((res = chk_ftime(arcn)) < 0) + break; + if (res > 0) { + if (rd_skip(arcn->skip + arcn->pad) == 1) + break; + continue; + } + } + + /* + * Store this device number. Device numbers seen during the + * read phase of append will cause newly appended files with a + * device number seen in the old part of the archive to be + * remapped to an unused device number. + */ + if ((udev && (add_dev(arcn) < 0)) || + (rd_skip(arcn->skip + arcn->pad) == 1)) + break; + } + + /* + * done, finish up read and get the number of bytes to back up so we + * can add new members. The format might have used the hard link table, + * purge it. + */ + tlen = (*frmt->end_rd)(); + lnk_end(); + + /* + * try to position for write, if this fails quit. if any error occurs, + * we will refuse to write + */ + if (appnd_start(tlen) < 0) + return; + + /* + * tell the user we are done reading. + */ + if (vflag && vfpart) { + (void)fputs("done.\n", listf); + vfpart = 0; + } + + /* + * go to the writing phase to add the new members + */ + wr_archive(arcn, 1); +} + +/* + * archive() + * write a new archive + */ + +void +archive(void) +{ + ARCHD archd; + + /* + * if we only are adding members that are newer, we need to save the + * mod times for all files; set up for writing; pass the format any + * options write the archive + */ + if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) + return; + if ((*frmt->options)() < 0) + return; + + wr_archive(&archd, 0); +} + +/* + * copy() + * copy files from one part of the file system to another. this does not + * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an + * archive was written and then extracted in the destination directory + * (except the files are forced to be under the destination directory). + */ + +void +copy(void) +{ + ARCHD *arcn; + int res; + int fddest; + char *dest_pt; + size_t dlen; + size_t drem; + int fdsrc = -1; + struct stat sb; + ARCHD archd; + char dirbuf[PAXPATHLEN+1]; + + sltab_start(); + + arcn = &archd; + /* + * set up the destination dir path and make sure it is a directory. We + * make sure we have a trailing / on the destination + */ + dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf)); + if (dlen >= sizeof(dirbuf) || + (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) { + paxwarn(1, "directory name is too long %s", dirptr); + return; + } + dest_pt = dirbuf + dlen; + if (*(dest_pt-1) != '/') { + *dest_pt++ = '/'; + *dest_pt = '\0'; + ++dlen; + } + drem = PAXPATHLEN - dlen; + + if (stat(dirptr, &sb) == -1) { + syswarn(1, errno, "Cannot access destination directory %s", + dirptr); + return; + } + if (!S_ISDIR(sb.st_mode)) { + paxwarn(1, "Destination is not a directory %s", dirptr); + return; + } + + /* + * start up the hard link table; file traversal routines and the + * modification time and access mode database + */ + if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) + return; + + /* + * When we are doing interactive rename, we store the mapping of names + * so we can fix up hard links files later in the archive. + */ + if (iflag && (name_start() < 0)) + return; + + /* + * set up to cp file trees + */ + cp_start(); + + /* + * while there are files to archive, process them + */ + while (next_file(arcn) == 0) { + fdsrc = -1; + + /* + * check if this file meets user specified options + */ + if (sel_chk(arcn) != 0) + continue; + + /* + * if there is already a file in the destination directory with + * the same name and it is newer, skip the one stored on the + * archive. + * NOTE: this test is done BEFORE name modifications as + * specified by pax. this can be confusing to the user who + * might expect the test to be done on an existing file AFTER + * the name mod. In honesty the pax spec is probably flawed in + * this respect + */ + if (uflag || Dflag) { + /* + * create the destination name + */ + if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'), + drem + 1) > drem) { + paxwarn(1, "Destination pathname too long %s", + arcn->name); + continue; + } + + /* + * if existing file is same age or newer skip + */ + res = lstat(dirbuf, &sb); + *dest_pt = '\0'; + + if (res == 0) { + ftree_skipped_newer(arcn); + if (cmp_file_times(uflag, Dflag, arcn, &sb)) + continue; + } + } + + /* + * this file is considered selected. See if this is a hard link + * to a previous file; modify the name as requested by the + * user; set the final destination. + */ + ftree_sel(arcn); + if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0)) + break; + if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { + /* + * skip file, purge from link table + */ + purg_lnk(arcn); + continue; + } + + /* + * Non standard -Y and -Z flag. When the existing file is + * same age or newer skip + */ + if ((Yflag || Zflag) && + cmp_file_times(Yflag, Zflag, arcn, NULL)) + continue; + + if (vflag) { + (void)safe_print(arcn->name, listf); + vfpart = 1; + } + ++flcnt; + + /* + * try to create a hard link to the src file if requested + * but make sure we are not trying to overwrite ourselves. + */ + if (lflag) + res = cross_lnk(arcn); + else + res = chk_same(arcn); + if (res <= 0) { + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to create a new file + */ + if (!PAX_IS_REG(arcn->type)) { + /* + * create a link or special file + */ + if (PAX_IS_HARDLINK(arcn->type)) + res = lnk_creat(arcn); + else + res = node_creat(arcn); + if (res < 0) + purg_lnk(arcn); + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + continue; + } + + /* + * have to copy a regular file to the destination directory. + * first open source file and then create the destination file + */ + if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { + syswarn(1, errno, "Unable to open %s to read", + arcn->org_name); + purg_lnk(arcn); + continue; + } + if ((fddest = file_creat(arcn)) < 0) { + rdfile_close(arcn, &fdsrc); + purg_lnk(arcn); + continue; + } + + /* + * copy source file data to the destination file + */ + cp_file(arcn, fdsrc, fddest); + file_close(arcn, fddest); + rdfile_close(arcn, &fdsrc); + + if (vflag && vfpart) { + (void)putc('\n', listf); + vfpart = 0; + } + } + + /* + * restore directory modes and times as required; make sure all + * patterns were selected block off signals to avoid chance for + * multiple entry into the cleanup code. + */ + (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); + ar_close(0); + sltab_process(0); + proc_dir(0); + ftree_chk(); +} + +/* + * next_head() + * try to find a valid header in the archive. Uses format specific + * routines to extract the header and id the trailer. Trailers may be + * located within a valid header or in an invalid header (the location + * is format specific. The inhead field from the option table tells us + * where to look for the trailer). + * We keep reading (and resyncing) until we get enough contiguous data + * to check for a header. If we cannot find one, we shift by a byte + * add a new byte from the archive to the end of the buffer and try again. + * If we get a read error, we throw out what we have (as we must have + * contiguous data) and start over again. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if we got a header, -1 if we are unable to ever find another one + * (we reached the end of input, or we reached the limit on retries. see + * the specs for rd_wrbuf() for more details) + */ + +static int +next_head(ARCHD *arcn) +{ + int ret; + char *hdend; + int res; + int shftsz; + int hsz; + int in_resync = 0; /* set when we are in resync mode */ + int cnt = 0; /* counter for trailer function */ + int first = 1; /* on 1st read, EOF isn't premature. */ + + /* + * set up initial conditions, we want a whole frmt->hsz block as we + * have no data yet. + */ + res = hsz = frmt->hsz; + hdend = hdbuf; + shftsz = hsz - 1; + for (;;) { + /* + * keep looping until we get a contiguous FULL buffer + * (frmt->hsz is the proper size) + */ + for (;;) { + if ((ret = rd_wrbuf(hdend, res)) == res) + break; + + /* + * If we read 0 bytes (EOF) from an archive when we + * expect to find a header, we have stepped upon + * an archive without the customary block of zeroes + * end marker. It's just stupid to error out on + * them, so exit gracefully. + */ + if (first && ret == 0) + return(-1); + first = 0; + + /* + * some kind of archive read problem, try to resync the + * storage device, better give the user the bad news. + */ + if ((ret == 0) || (rd_sync() < 0)) { + paxwarn(1,"Premature end of file on archive read"); + return(-1); + } + if (!in_resync) { + if (act == APPND) { + paxwarn(1, + "Archive I/O error, cannot continue"); + return(-1); + } + paxwarn(1,"Archive I/O error. Trying to recover."); + ++in_resync; + } + + /* + * oh well, throw it all out and start over + */ + res = hsz; + hdend = hdbuf; + } + + /* + * ok we have a contiguous buffer of the right size. Call the + * format read routine. If this was not a valid header and this + * format stores trailers outside of the header, call the + * format specific trailer routine to check for a trailer. We + * have to watch out that we do not mis-identify file data or + * block padding as a header or trailer. Format specific + * trailer functions must NOT check for the trailer while we + * are running in resync mode. Some trailer functions may tell + * us that this block cannot contain a valid header either, so + * we then throw out the entire block and start over. + */ + if ((*frmt->rd)(arcn, hdbuf) == 0) + break; + + if (!frmt->inhead) { + /* + * this format has trailers outside of valid headers + */ + if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){ + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + if (ret == 1) { + /* + * we are in resync and we were told to throw + * the whole block out because none of the + * bytes in this block can be used to form a + * valid header + */ + res = hsz; + hdend = hdbuf; + continue; + } + } + + /* + * Brute force section. + * not a valid header. We may be able to find a header yet. So + * we shift over by one byte, and set up to read one byte at a + * time from the archive and place it at the end of the buffer. + * We will keep moving byte at a time until we find a header or + * get a read error and have to start over. + */ + if (!in_resync) { + if (act == APPND) { + paxwarn(1,"Unable to append, archive header flaw"); + return(-1); + } + paxwarn(1,"Invalid header, starting valid header search."); + ++in_resync; + } + memmove(hdbuf, hdbuf+1, shftsz); + res = 1; + hdend = hdbuf + shftsz; + } + + /* + * ok got a valid header, check for trailer if format encodes it in the + * the header. NOTE: the parameters are different than trailer routines + * which encode trailers outside of the header! + */ + if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) { + /* + * valid trailer found, drain input as required + */ + ar_drain(); + return(-1); + } + + ++flcnt; + return(0); +} + +/* + * get_arc() + * Figure out what format an archive is. Handles archive with flaws by + * brute force searches for a legal header in any supported format. The + * format id routines have to be careful to NOT mis-identify a format. + * ASSUMED: headers fit within a BLKMULT header. + * Return: + * 0 if archive found -1 otherwise + */ + +static int +get_arc(void) +{ + int i; + int hdsz = 0; + int res; + int minhd = BLKMULT; + char *hdend; + int notice = 0; + + /* + * find the smallest header size in all archive formats and then set up + * to read the archive. + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].name != NULL && fsub[ford[i]].hsz < minhd) + minhd = fsub[ford[i]].hsz; + } + if (rd_start() < 0) + return(-1); + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + for (;;) { + for (;;) { + /* + * fill the buffer with at least the smallest header + */ + i = rd_wrbuf(hdend, res); + if (i > 0) + hdsz += i; + if (hdsz >= minhd) + break; + + /* + * if we cannot recover from a read error quit + */ + if ((i == 0) || (rd_sync() < 0)) + goto out; + + /* + * when we get an error none of the data we already + * have can be used to create a legal header (we just + * got an error in the middle), so we throw it all out + * and refill the buffer with fresh data. + */ + res = BLKMULT; + hdsz = 0; + hdend = hdbuf; + if (!notice) { + if (act == APPND) + return(-1); + paxwarn(1,"Cannot identify format. Searching..."); + ++notice; + } + } + + /* + * we have at least the size of the smallest header in any + * archive format. Look to see if we have a match. The array + * ford[] is used to specify the header id order to reduce the + * chance of incorrectly id'ing a valid header (some formats + * may be subsets of each other and the order would then be + * important). + */ + for (i = 0; ford[i] >= 0; ++i) { + if (fsub[ford[i]].id == NULL || + (*fsub[ford[i]].id)(hdbuf, hdsz) < 0) + continue; + frmt = &(fsub[ford[i]]); + /* + * yuck, to avoid slow special case code in the extract + * routines, just push this header back as if it was + * not seen. We have left extra space at start of the + * buffer for this purpose. This is a bit ugly, but + * adding all the special case code is far worse. + */ + pback(hdbuf, hdsz); + return(0); + } + + /* + * We have a flawed archive, no match. we start searching, but + * we never allow additions to flawed archives + */ + if (!notice) { + if (act == APPND) + return(-1); + paxwarn(1, "Cannot identify format. Searching..."); + ++notice; + } + + /* + * brute force search for a header that we can id. + * we shift through byte at a time. this is slow, but we cannot + * determine the nature of the flaw in the archive in a + * portable manner + */ + if (--hdsz > 0) { + memmove(hdbuf, hdbuf+1, hdsz); + res = BLKMULT - hdsz; + hdend = hdbuf + hdsz; + } else { + res = BLKMULT; + hdend = hdbuf; + hdsz = 0; + } + } + + out: + /* + * we cannot find a header, bow, apologize and quit + */ + paxwarn(1, "Sorry, unable to determine archive format."); + return(-1); +} + +/* + * rd_gnu_string() + * Read the file contents into an allocated string if it is a GNU tar + * long link/file. + * Return: + * 1 if gnu string read, 0 otherwise + */ + +static int +rd_gnu_string(ARCHD *arcn) +{ + char **strp; + + switch (arcn->type) { + case PAX_GLF: + strp = &gnu_name_string; + break; + case PAX_GLL: + strp = &gnu_link_string; + break; + default: + strp = NULL; + break; + } + if (!strp) + return 0; + /* + * we need to read, to get the real filename + */ + if (*strp) + err(1, "WARNING! Major Internal Error! GNU hack Failing!"); + *strp = malloc(arcn->sb.st_size + 1); + if (*strp == NULL) { + paxwarn(1, "Out of memory"); + (void)rd_skip(arcn->skip + arcn->pad); + } else if (rd_wrbuf(*strp, arcn->sb.st_size) < arcn->sb.st_size) { + free(*strp); + *strp = NULL; + } else { + (*strp)[arcn->sb.st_size] = '\0'; + (void)rd_skip(arcn->pad); + } + return 1; +} diff --git a/bin/pax/buf_subs.c b/bin/pax/buf_subs.c new file mode 100644 index 0000000..e84f9e0 --- /dev/null +++ b/bin/pax/buf_subs.c @@ -0,0 +1,983 @@ +/* $OpenBSD: buf_subs.c,v 1.31 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: buf_subs.c,v 1.5 1995/03/21 09:07:08 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include "pax.h" +#include "extern.h" + +/* + * routines which implement archive and file buffering + */ + +#define MINFBSZ 512 /* default block size for hole detect */ +#define MAXFLT 10 /* default media read error limit */ + +/* + * Need to change bufmem to dynamic allocation when the upper + * limit on blocking size is removed (though that will violate pax spec) + * MAXBLK define and tests will also need to be updated. + */ +static char bufmem[MAXBLK+BLKMULT]; /* i/o buffer + pushback id space */ +static char *buf; /* normal start of i/o buffer */ +static char *bufend; /* end or last char in i/o buffer */ +static char *bufpt; /* read/write point in i/o buffer */ +int blksz = MAXBLK; /* block input/output size in bytes */ +int wrblksz; /* user spec output size in bytes */ +int maxflt = MAXFLT; /* MAX consecutive media errors */ +int rdblksz; /* first read blksize (tapes only) */ +off_t wrlimit; /* # of bytes written per archive vol */ +off_t wrcnt; /* # of bytes written on current vol */ +off_t rdcnt; /* # of bytes read on current vol */ + +/* + * wr_start() + * set up the buffering system to operate in a write mode + * Return: + * 0 if ok, -1 if the user specified write block size violates pax spec + */ + +int +wr_start(void) +{ + buf = &(bufmem[BLKMULT]); + /* + * Check to make sure the write block size meets pax specs. If the user + * does not specify a blocksize, we use the format default blocksize. + * We must be picky on writes, so we do not allow the user to create an + * archive that might be hard to read elsewhere. If all ok, we then + * open the first archive volume + */ + if (!wrblksz) + wrblksz = frmt->bsz; + if (wrblksz > MAXBLK) { + paxwarn(1, "Write block size of %d too large, maximium is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + paxwarn(1, "Write block size of %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + if (wrblksz > MAXBLK_POSIX) { + paxwarn(0, "Write block size of %d larger than POSIX max %d, archive may not be portable", + wrblksz, MAXBLK_POSIX); + return(-1); + } + + /* + * we only allow wrblksz to be used with all archive operations + */ + blksz = rdblksz = wrblksz; + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + wrcnt = 0; + bufend = buf + wrblksz; + bufpt = buf; + return(0); +} + +/* + * rd_start() + * set up buffering system to read an archive + * Return: + * 0 if ok, -1 otherwise + */ + +int +rd_start(void) +{ + /* + * leave space for the header pushback (see get_arc()). If we are + * going to append and user specified a write block size, check it + * right away + */ + buf = &(bufmem[BLKMULT]); + if ((act == APPND) && wrblksz) { + if (wrblksz > MAXBLK) { + paxwarn(1,"Write block size %d too large, maximium is: %d", + wrblksz, MAXBLK); + return(-1); + } + if (wrblksz % BLKMULT) { + paxwarn(1, "Write block size %d is not a %d byte multiple", + wrblksz, BLKMULT); + return(-1); + } + } + + /* + * open the archive + */ + if ((ar_open(arcname) < 0) && (ar_next() < 0)) + return(-1); + bufend = buf + rdblksz; + bufpt = bufend; + rdcnt = 0; + return(0); +} + +/* + * cp_start() + * set up buffer system for copying within the file system + */ + +void +cp_start(void) +{ + buf = &(bufmem[BLKMULT]); + rdblksz = blksz = MAXBLK; +} + +/* + * appnd_start() + * Set up the buffering system to append new members to an archive that + * was just read. The last block(s) of an archive may contain a format + * specific trailer. To append a new member, this trailer has to be + * removed from the archive. The first byte of the trailer is replaced by + * the start of the header of the first file added to the archive. The + * format specific end read function tells us how many bytes to move + * backwards in the archive to be positioned BEFORE the trailer. Two + * different position have to be adjusted, the O.S. file offset (e.g. the + * position of the tape head) and the write point within the data we have + * stored in the read (soon to become write) buffer. We may have to move + * back several records (the number depends on the size of the archive + * record and the size of the format trailer) to read up the record where + * the first byte of the trailer is recorded. Trailers may span (and + * overlap) record boundaries. + * We first calculate which record has the first byte of the trailer. We + * move the OS file offset back to the start of this record and read it + * up. We set the buffer write pointer to be at this byte (the byte where + * the trailer starts). We then move the OS file pointer back to the + * start of this record so a flush of this buffer will replace the record + * in the archive. + * A major problem is rewriting this last record. For archives stored + * on disk files, this is trivial. However, many devices are really picky + * about the conditions under which they will allow a write to occur. + * Often devices restrict the conditions where writes can be made, + * so it may not be feasible to append archives stored on all types of + * devices. + * Return: + * 0 for success, -1 for failure + */ + +int +appnd_start(off_t skcnt) +{ + int res; + off_t cnt; + + if (exit_val != 0) { + paxwarn(0, "Cannot append to an archive that may have flaws."); + return(-1); + } + /* + * if the user did not specify a write blocksize, inherit the size used + * in the last archive volume read. (If a is set we still use rdblksz + * until next volume, cannot shift sizes within a single volume). + */ + if (!wrblksz) + wrblksz = blksz = rdblksz; + else + blksz = rdblksz; + + /* + * make sure that this volume allows appends + */ + if (ar_app_ok() < 0) + return(-1); + + /* + * Calculate bytes to move back and move in front of record where we + * need to start writing from. Remember we have to add in any padding + * that might be in the buffer after the trailer in the last block. We + * travel skcnt + padding ROUNDED UP to blksize. + */ + skcnt += bufend - bufpt; + if ((cnt = (skcnt/blksz) * blksz) < skcnt) + cnt += blksz; + if (ar_rev(cnt) < 0) + goto out; + + /* + * We may have gone too far if there is valid data in the block we are + * now in front of, read up the block and position the pointer after + * the valid data. + */ + if ((cnt -= skcnt) > 0) { + /* + * watch out for stupid tape drives. ar_rev() will set rdblksz + * to be real physical blocksize so we must loop until we get + * the old rdblksz (now in blksz). If ar_rev() fouls up the + * determination of the physical block size, we will fail. + */ + bufpt = buf; + bufend = buf + blksz; + while (bufpt < bufend) { + if ((res = ar_read(bufpt, rdblksz)) <= 0) + goto out; + bufpt += res; + } + if (ar_rev(bufpt - buf) < 0) + goto out; + bufpt = buf + cnt; + bufend = buf + blksz; + } else { + /* + * buffer is empty + */ + bufend = buf + blksz; + bufpt = buf; + } + rdblksz = blksz; + rdcnt -= skcnt; + wrcnt = 0; + + /* + * At this point we are ready to write. If the device requires special + * handling to write at a point were previously recorded data resides, + * that is handled in ar_set_wr(). From now on we operate under normal + * ARCHIVE mode (write) conditions + */ + if (ar_set_wr() < 0) + return(-1); + act = ARCHIVE; + return(0); + + out: + paxwarn(1, "Unable to rewrite archive trailer, cannot append."); + return(-1); +} + +/* + * rd_sync() + * A read error occurred on this archive volume. Resync the buffer and + * try to reset the device (if possible) so we can continue to read. Keep + * trying to do this until we get a valid read, or we reach the limit on + * consecutive read faults (at which point we give up). The user can + * adjust the read error limit through a command line option. + * Returns: + * 0 on success, and -1 on failure + */ + +int +rd_sync(void) +{ + int errcnt = 0; + int res; + + /* + * if the user says bail out on first fault, we are out of here... + */ + if (maxflt == 0) + return(-1); + if (act == APPND) { + paxwarn(1, "Unable to append when there are archive read errors."); + return(-1); + } + + /* + * poke at device and try to get past media error + */ + if (ar_rdsync() < 0) { + if (ar_next() < 0) + return(-1); + else + rdcnt = 0; + } + + for (;;) { + if ((res = ar_read(buf, blksz)) > 0) { + /* + * All right! got some data, fill that buffer + */ + bufpt = buf; + bufend = buf + res; + rdcnt += res; + return(0); + } + + /* + * Oh well, yet another failed read... + * if error limit reached, ditch. o.w. poke device to move past + * bad media and try again. if media is badly damaged, we ask + * the poor (and upset user at this point) for the next archive + * volume. remember the goal on reads is to get the most we + * can extract out of the archive. + */ + if ((maxflt > 0) && (++errcnt > maxflt)) + paxwarn(0,"Archive read error limit (%d) reached",maxflt); + else if (ar_rdsync() == 0) + continue; + if (ar_next() < 0) + break; + rdcnt = 0; + errcnt = 0; + } + return(-1); +} + +/* + * pback() + * push the data used during the archive id phase back into the I/O + * buffer. This is required as we cannot be sure that the header does NOT + * overlap a block boundary (as in the case we are trying to recover a + * flawed archived). This was not designed to be used for any other + * purpose. (What software engineering, HA!) + * WARNING: do not even THINK of pback greater than BLKMULT, unless the + * pback space is increased. + */ + +void +pback(char *pt, int cnt) +{ + bufpt -= cnt; + memcpy(bufpt, pt, cnt); +} + +/* + * rd_skip() + * skip forward in the archive during a archive read. Used to get quickly + * past file data and padding for files the user did NOT select. + * Return: + * 0 if ok, -1 failure, and 1 when EOF on the archive volume was detected. + */ + +int +rd_skip(off_t skcnt) +{ + off_t res; + off_t cnt; + off_t skipped = 0; + + /* + * consume what data we have in the buffer. If we have to move forward + * whole records, we call the low level skip function to see if we can + * move within the archive without doing the expensive reads on data we + * do not want. + */ + if (skcnt == 0) + return(0); + res = MINIMUM((bufend - bufpt), skcnt); + bufpt += res; + skcnt -= res; + + /* + * if skcnt is now 0, then no additional i/o is needed + */ + if (skcnt == 0) + return(0); + + /* + * We have to read more, calculate complete and partial record reads + * based on rdblksz. we skip over "cnt" complete records + */ + res = skcnt%rdblksz; + cnt = (skcnt/rdblksz) * rdblksz; + + /* + * if the skip fails, we will have to resync. ar_fow will tell us + * how much it can skip over. We will have to read the rest. + */ + if (ar_fow(cnt, &skipped) < 0) + return(-1); + res += cnt - skipped; + rdcnt += skipped; + + /* + * what is left we have to read (which may be the whole thing if + * ar_fow() told us the device can only read to skip records); + */ + while (res > 0) { + cnt = bufend - bufpt; + /* + * if the read fails, we will have to resync + */ + if ((cnt <= 0) && ((cnt = buf_fill()) < 0)) + return(-1); + if (cnt == 0) + return(1); + cnt = MINIMUM(cnt, res); + bufpt += cnt; + res -= cnt; + } + return(0); +} + +/* + * wr_fin() + * flush out any data (and pad if required) the last block. We always pad + * with zero (even though we do not have to). Padding with 0 makes it a + * lot easier to recover if the archive is damaged. zero padding SHOULD + * BE a requirement.... + */ + +void +wr_fin(void) +{ + if (bufpt > buf) { + memset(bufpt, 0, bufend - bufpt); + bufpt = bufend; + (void)buf_flush(blksz); + } +} + +/* + * wr_rdbuf() + * fill the write buffer from data passed to it in a buffer (usually used + * by format specific write routines to pass a file header). On failure we + * punt. We do not allow the user to continue to write flawed archives. + * We assume these headers are not very large (the memory copy we use is + * a bit expensive). + * Return: + * 0 if buffer was filled ok, -1 o.w. (buffer flush failure) + */ + +int +wr_rdbuf(char *out, int outcnt) +{ + int cnt; + + /* + * while there is data to copy copy into the write buffer. when the + * write buffer fills, flush it to the archive and continue + */ + while (outcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + /* + * only move what we have space for + */ + cnt = MINIMUM(cnt, outcnt); + memcpy(bufpt, out, cnt); + bufpt += cnt; + out += cnt; + outcnt -= cnt; + } + return(0); +} + +/* + * rd_wrbuf() + * copy from the read buffer into a supplied buffer a specified number of + * bytes. If the read buffer is empty fill it and continue to copy. + * usually used to obtain a file header for processing by a format + * specific read routine. + * Return + * number of bytes copied to the buffer, 0 indicates EOF on archive volume, + * -1 is a read error + */ + +int +rd_wrbuf(char *in, int cpcnt) +{ + int res; + int cnt; + int incnt = cpcnt; + + /* + * loop until we fill the buffer with the requested number of bytes + */ + while (incnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) { + /* + * read error, return what we got (or the error if + * no data was copied). The caller must know that an + * error occurred and has the best knowledge what to + * do with it + */ + if ((res = cpcnt - incnt) > 0) + return(res); + return(cnt); + } + + /* + * calculate how much data to copy based on whats left and + * state of buffer + */ + cnt = MINIMUM(cnt, incnt); + memcpy(in, bufpt, cnt); + bufpt += cnt; + incnt -= cnt; + in += cnt; + } + return(cpcnt); +} + +/* + * wr_skip() + * skip forward during a write. In other words add padding to the file. + * we add zero filled padding as it makes flawed archives much easier to + * recover from. the caller tells us how many bytes of padding to add + * This routine was not designed to add HUGE amount of padding, just small + * amounts (a few 512 byte blocks at most) + * Return: + * 0 if ok, -1 if there was a buf_flush failure + */ + +int +wr_skip(off_t skcnt) +{ + int cnt; + + /* + * loop while there is more padding to add + */ + while (skcnt > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) + return(-1); + cnt = MINIMUM(cnt, skcnt); + memset(bufpt, 0, cnt); + bufpt += cnt; + skcnt -= cnt; + } + return(0); +} + +/* + * wr_rdfile() + * fill write buffer with the contents of a file. We are passed an open + * file descriptor to the file an the archive structure that describes the + * file we are storing. The variable "left" is modified to contain the + * number of bytes of the file we were NOT able to write to the archive. + * it is important that we always write EXACTLY the number of bytes that + * the format specific write routine told us to. The file can also get + * bigger, so reading to the end of file would create an improper archive, + * we just detect this case and warn the user. We never create a bad + * archive if we can avoid it. Of course trying to archive files that are + * active is asking for trouble. It we fail, we pass back how much we + * could NOT copy and let the caller deal with it. + * Return: + * 0 ok, -1 if archive write failure. a short read of the file returns a + * 0, but "left" is set to be greater than zero. + */ + +int +wr_rdfile(ARCHD *arcn, int ifd, off_t *left) +{ + int cnt; + int res = 0; + off_t size = arcn->sb.st_size; + struct stat sb; + + /* + * while there are more bytes to write + */ + while (size > 0) { + cnt = bufend - bufpt; + if ((cnt <= 0) && ((cnt = buf_flush(blksz)) < 0)) { + *left = size; + return(-1); + } + cnt = MINIMUM(cnt, size); + if ((res = read(ifd, bufpt, cnt)) <= 0) + break; + size -= res; + bufpt += res; + } + + /* + * better check the file did not change during this operation + * or the file read failed. + */ + if (res < 0) + syswarn(1, errno, "Read fault on %s", arcn->org_name); + else if (size != 0) + paxwarn(1, "File changed size during read %s", arcn->org_name); + else if (fstat(ifd, &sb) == -1) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=)) + paxwarn(1, "File %s was modified during copy to archive", + arcn->org_name); + *left = size; + return(0); +} + +/* + * rd_wrfile() + * extract the contents of a file from the archive. If we are unable to + * extract the entire file (due to failure to write the file) we return + * the numbers of bytes we did NOT process. This way the caller knows how + * many bytes to skip past to find the next archive header. If the failure + * was due to an archive read, we will catch that when we try to skip. If + * the format supplies a file data crc value, we calculate the actual crc + * so that it can be compared to the value stored in the header + * NOTE: + * We call a special function to write the file. This function attempts to + * restore file holes (blocks of zeros) into the file. When files are + * sparse this saves space, and is a LOT faster. For non sparse files + * the performance hit is small. As of this writing, no archive supports + * information on where the file holes are. + * Return: + * 0 ok, -1 if archive read failure. if we cannot write the entire file, + * we return a 0 but "left" is set to be the amount unwritten + */ + +int +rd_wrfile(ARCHD *arcn, int ofd, off_t *left) +{ + int cnt = 0; + off_t size = arcn->sb.st_size; + int res = 0; + char *fnm = arcn->name; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + u_int32_t crc = 0; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(ofd, &sb) == 0) { + if (sb.st_blksize > 0) + sz = (int)sb.st_blksize; + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + *left = 0; + + /* + * Copy the archive to the file the number of bytes specified. We have + * to assume that we want to recover file holes as none of the archive + * formats can record the location of file holes. + */ + while (size > 0) { + cnt = bufend - bufpt; + /* + * if we get a read error, we do not want to skip, as we may + * miss a header, so we do not set left, but if we get a write + * error, we do want to skip over the unprocessed data. + */ + if ((cnt <= 0) && ((cnt = buf_fill()) <= 0)) + break; + cnt = MINIMUM(cnt, size); + if ((res = file_write(ofd,bufpt,cnt,&rem,&isem,sz,fnm)) <= 0) { + *left = size; + break; + } + + if (docrc) { + /* + * update the actual crc value + */ + cnt = res; + while (--cnt >= 0) + crc += *bufpt++ & 0xff; + } else + bufpt += res; + size -= res; + } + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (isem && (arcn->sb.st_size > 0)) + file_flush(ofd, fnm, isem); + + /* + * if we failed from archive read, we do not want to skip + */ + if ((size > 0) && (*left == 0)) + return(-1); + + /* + * some formats record a crc on file data. If so, then we compare the + * calculated crc to the crc stored in the archive + */ + if (docrc && (size == 0) && (arcn->crc != crc)) + paxwarn(1,"Actual crc does not match expected crc %s",arcn->name); + return(0); +} + +/* + * cp_file() + * copy the contents of one file to another. used during -rw phase of pax + * just as in rd_wrfile() we use a special write function to write the + * destination file so we can properly copy files with holes. + */ + +void +cp_file(ARCHD *arcn, int fd1, int fd2) +{ + int cnt; + off_t cpcnt = 0; + int res = 0; + char *fnm = arcn->name; + int no_hole = 0; + int isem = 1; + int rem; + int sz = MINFBSZ; + struct stat sb; + + /* + * check for holes in the source file. If none, we will use regular + * write instead of file write. + */ + if (((off_t)(arcn->sb.st_blocks * BLKMULT)) >= arcn->sb.st_size) + ++no_hole; + + /* + * pass the blocksize of the file being written to the write routine, + * if the size is zero, use the default MINFBSZ + */ + if (fstat(fd2, &sb) == 0) { + if (sb.st_blksize > 0) + sz = sb.st_blksize; + } else + syswarn(0,errno,"Unable to obtain block size for file %s",fnm); + rem = sz; + + /* + * read the source file and copy to destination file until EOF + */ + for (;;) { + if ((cnt = read(fd1, buf, blksz)) <= 0) + break; + if (no_hole) + res = write(fd2, buf, cnt); + else + res = file_write(fd2, buf, cnt, &rem, &isem, sz, fnm); + if (res != cnt) + break; + cpcnt += cnt; + } + + /* + * check to make sure the copy is valid. + */ + if (res < 0) + syswarn(1, errno, "Failed write during copy of %s to %s", + arcn->org_name, arcn->name); + else if (cpcnt != arcn->sb.st_size) + paxwarn(1, "File %s changed size during copy to %s", + arcn->org_name, arcn->name); + else if (fstat(fd1, &sb) == -1) + syswarn(1, errno, "Failed stat of %s", arcn->org_name); + else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=)) + paxwarn(1, "File %s was modified during copy to %s", + arcn->org_name, arcn->name); + + /* + * if the last block has a file hole (all zero), we must make sure this + * gets updated in the file. We force the last block of zeros to be + * written. just closing with the file offset moved forward may not put + * a hole at the end of the file. + */ + if (!no_hole && isem && (arcn->sb.st_size > 0)) + file_flush(fd2, fnm, isem); +} + +/* + * buf_fill() + * fill the read buffer with the next record (or what we can get) from + * the archive volume. + * Return: + * Number of bytes of data in the read buffer, -1 for read error, and + * 0 when finished (user specified termination in ar_next()). + */ + +int +buf_fill(void) +{ + int cnt; + static int fini = 0; + + if (fini) + return(0); + + for (;;) { + /* + * try to fill the buffer. on error the next archive volume is + * opened and we try again. + */ + if ((cnt = ar_read(buf, blksz)) > 0) { + bufpt = buf; + bufend = buf + cnt; + rdcnt += cnt; + return(cnt); + } + + /* + * errors require resync, EOF goes to next archive + * but in case we have not determined yet the format, + * this means that we have a very short file, so we + * are done again. + */ + if (cnt < 0) + break; + if (frmt == NULL || ar_next() < 0) { + fini = 1; + return(0); + } + rdcnt = 0; + } + exit_val = 1; + return(-1); +} + +/* + * buf_flush() + * force the write buffer to the archive. We are passed the number of + * bytes in the buffer at the point of the flush. When we change archives + * the record size might change. (either larger or smaller). + * Return: + * 0 if all is ok, -1 when a write error occurs. + */ + +int +buf_flush(int bufcnt) +{ + int cnt; + int push = 0; + int totcnt = 0; + + /* + * if we have reached the user specified byte count for each archive + * volume, prompt for the next volume. (The non-standard -R flag). + * NOTE: If the wrlimit is smaller than wrcnt, we will always write + * at least one record. We always round limit UP to next blocksize. + */ + if ((wrlimit > 0) && (wrcnt > wrlimit)) { + paxwarn(0, "User specified archive volume byte limit reached."); + if (ar_next() < 0) { + wrcnt = 0; + exit_val = 1; + return(-1); + } + wrcnt = 0; + + /* + * The new archive volume might have changed the size of the + * write blocksize. if so we figure out if we need to write + * (one or more times), or if there is now free space left in + * the buffer (it is no longer full). bufcnt has the number of + * bytes in the buffer, (the blocksize, at the point we were + * CALLED). Push has the amount of "extra" data in the buffer + * if the block size has shrunk from a volume change. + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * We have enough data to write at least one archive block + */ + for (;;) { + /* + * write a block and check if it all went out ok + */ + cnt = ar_write(buf, blksz); + if (cnt == blksz) { + /* + * the write went ok + */ + wrcnt += cnt; + totcnt += cnt; + if (push > 0) { + /* we have extra data to push to the front. + * check for more than 1 block of push, and if + * so we loop back to write again + */ + memcpy(buf, bufend, push); + bufpt = buf + push; + if (push >= blksz) { + push -= blksz; + continue; + } + } else + bufpt = buf; + return(totcnt); + } else if (cnt > 0) { + /* + * Oh drat we got a partial write! + * if format does not care about alignment let it go, + * we warned the user in ar_write().... but this means + * the last record on this volume violates pax spec.... + */ + totcnt += cnt; + wrcnt += cnt; + bufpt = buf + cnt; + cnt = bufcnt - cnt; + memcpy(buf, bufpt, cnt); + bufpt = buf + cnt; + if (!frmt->blkalgn || ((cnt % frmt->blkalgn) == 0)) + return(totcnt); + break; + } + + /* + * All done, go to next archive + */ + wrcnt = 0; + if (ar_next() < 0) + break; + + /* + * The new archive volume might also have changed the block + * size. if so, figure out if we have too much or too little + * data for using the new block size + */ + bufend = buf + blksz; + if (blksz > bufcnt) + return(0); + if (blksz < bufcnt) + push = bufcnt - blksz; + } + + /* + * write failed, stop pax. we must not create a bad archive! + */ + exit_val = 1; + return(-1); +} diff --git a/bin/pax/cpio.1 b/bin/pax/cpio.1 new file mode 100644 index 0000000..89a6e36 --- /dev/null +++ b/bin/pax/cpio.1 @@ -0,0 +1,309 @@ +.\" $OpenBSD: cpio.1,v 1.36 2020/01/16 16:46:46 schwarze Exp $ +.\" +.\" Copyright (c) 1997 SigmaSoft, Th. Lockert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd $Mdocdate: January 16 2020 $ +.Dt CPIO 1 +.Os +.Sh NAME +.Nm cpio +.Nd copy file archives in and out +.Sh SYNOPSIS +.Nm cpio +.Fl o +.Op Fl AaBcjLvZz +.Op Fl C Ar bytes +.Op Fl F Ar archive +.Op Fl H Ar format +.Op Fl O Ar archive +.No \*(Lt Ar name-list +.Op \*(Gt Ar archive +.Nm cpio +.Fl i +.Op Fl 6BbcdfjmrSstuvZz +.Op Fl C Ar bytes +.Op Fl E Ar file +.Op Fl F Ar archive +.Op Fl H Ar format +.Op Fl I Ar archive +.Op Ar pattern ... +.Op \*(Lt Ar archive +.Nm cpio +.Fl p +.Op Fl adLlmuv +.Ar destination-directory +.No \*(Lt Ar name-list +.Sh DESCRIPTION +The +.Nm +command copies files to and from a +.Nm +archive. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl o +Create an archive. +Reads the list of files to store in the +archive from standard input, and writes the archive on standard +output. +.Bl -tag -width Ds +.It Fl A +Append to the specified archive. +.It Fl a +Reset the access times on files that have been copied to the +archive. +.It Fl B +Set block size of output to 5120 bytes. +.It Fl C Ar bytes +Set the block size of output to +.Ar bytes . +.It Fl c +Use ASCII format for +.Nm +header for portability. +.It Fl F Ar archive +Use the specified file as the input for the archive. +.It Fl H Ar format +Write the archive in the specified format. +Recognized formats are: +.Pp +.Bl -tag -width sv4cpio -compact +.It Ar bcpio +Old binary +.Nm +format. +.It Ar cpio +Old octal character +.Nm +format. +.It Ar sv4cpio +SVR4 hex +.Nm +format. +.It Ar tar +Old tar format. +.It Ar ustar +POSIX ustar format. +.El +.It Fl j +Compress archive using the bzip2 format. +The bzip2 utility must be installed separately. +.It Fl L +Follow symbolic links. +.It Fl O Ar archive +Use the specified file name as the archive to write to. +.It Fl v +Be verbose about operations. +List filenames as they are written to the archive. +.It Fl Z +Compress archive using +.Xr compress 1 +format. +.It Fl z +Compress archive using +.Xr gzip 1 +format. +.El +.It Fl i +Restore files from an archive. +Reads the archive file from +standard input and extracts files matching the +.Ar patterns +that were specified on the command line. +.Bl -tag -width Ds +.It Fl 6 +Process old-style +.Nm +format archives. +.It Fl B +Set the block size of the archive being read to 5120 bytes. +.It Fl b +Do byte and word swapping after reading in data from the +archive, for restoring archives created on systems with +a different byte order. +.It Fl C Ar bytes +Read archive written with a block size of +.Ar bytes . +.It Fl c +Expect the archive headers to be in ASCII format. +.It Fl d +Create any intermediate directories as needed during +restore. +.It Fl E Ar file +Read list of file name patterns to extract or list from +.Ar file . +.It Fl F Ar archive , Fl I Ar archive +Use the specified file as the input for the archive. +.It Fl f +Restore all files except those matching the +.Ar patterns +given on the command line. +.It Fl H Ar format +Read an archive of the specified format. +Recognized formats are: +.Pp +.Bl -tag -width sv4cpio -compact +.It Ar bcpio +Old binary +.Nm +format. +.It Ar cpio +Old octal character +.Nm +format. +.It Ar sv4cpio +SVR4 hex +.Nm +format. +.It Ar tar +Old tar format. +.It Ar ustar +POSIX ustar format. +.El +.It Fl j +Uncompress archive using the bzip2 format. +The bzip2 utility must be installed separately. +.It Fl m +Restore modification times on files. +.It Fl r +Rename restored files interactively. +.It Fl S +Swap words after reading data from the archive. +.It Fl s +Swap bytes after reading data from the archive. +.It Fl t +Only list the contents of the archive, no files or +directories will be created. +.It Fl u +Overwrite files even when the file in the archive is +older than the one that will be overwritten. +.It Fl v +Be verbose about operations. +List filenames as they are copied in from the archive. +.It Fl Z +Uncompress archive using +.Xr compress 1 +format. +.It Fl z +Uncompress archive using +.Xr gzip 1 +format. +.El +.It Fl p +Copy files from one location to another in a single pass. +The list of files to copy are read from standard input and +written out to a directory relative to the specified +.Ar directory +argument. +.Bl -tag -width Ds +.It Fl a +Reset the access times on files that have been copied. +.It Fl d +Create any intermediate directories as needed to write +the files at the new location. +.It Fl L +Follow symbolic links. +.It Fl l +When possible, link files rather than creating an +extra copy. +.It Fl m +Restore modification times on files. +.It Fl u +Overwrite files even when the original file being copied is +older than the one that will be overwritten. +.It Fl v +Be verbose about operations. +List filenames as they are copied. +.El +.El +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev TMPDIR +Path in which to store temporary files. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Sh DIAGNOSTICS +Whenever +.Nm +cannot create a file or a link when extracting an archive or cannot +find a file while writing an archive, or cannot preserve the user +ID, group ID, file mode, or access and modification times when the +.Fl p +option is specified, a diagnostic message is written to standard +error and a non-zero exit value will be returned, but processing +will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated +by a signal or error, +.Nm +may have only partially extracted the file the user wanted. +Additionally, the file modes of extracted files and directories may +have incorrect file bits, and the modification and access times may +be wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal +or error, +.Nm +may have only partially created the archive, which may violate the +specific archive format specification. +.Sh SEE ALSO +.Xr pax 1 , +.Xr tar 1 +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. +.Sh CAVEATS +Different file formats have different maximum file sizes. +It is recommended that a format such as cpio or ustar +be used for larger files. +.Bl -column "File format" "Maximum file size" -offset indent +.It Sy "File format" Ta Sy "Maximum file size" +.It bcpio Ta "4 Gigabytes" +.It sv4cpio Ta "4 Gigabytes" +.It cpio Ta "8 Gigabytes" +.It tar Ta "8 Gigabytes" +.It ustar Ta "8 Gigabytes" +.El +.Sh BUGS +The +.Fl s +and +.Fl S +options are currently not implemented. diff --git a/bin/pax/cpio.c b/bin/pax/cpio.c new file mode 100644 index 0000000..769a9df --- /dev/null +++ b/bin/pax/cpio.c @@ -0,0 +1,1106 @@ +/* $OpenBSD: cpio.c,v 1.33 2017/09/16 07:42:34 otto Exp $ */ +/* $NetBSD: cpio.c,v 1.5 1995/03/21 09:07:13 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#ifndef major +#include <sys/sysmacros.h> +#endif + +#include "pax.h" +#include "cpio.h" +#include "extern.h" + +static int rd_nm(ARCHD *, int); +static int rd_ln_nm(ARCHD *); +static int com_rd(ARCHD *); + +/* + * Routines which support the different cpio versions + */ + +static int swp_head; /* binary cpio header byte swap */ + +/* + * Routines common to all versions of cpio + */ + +/* + * cpio_strd() + * Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +cpio_strd(void) +{ + return(lnk_start()); +} + +/* + * cpio_trail() + * Called to determine if a header block is a valid trailer. We are + * passed the block, the in_sync flag (which tells us we are in resync + * mode; looking for a valid header), and cnt (which starts at zero) + * which is used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, + */ + +int +cpio_trail(ARCHD *arcn, char *notused, int notused2, int *notused3) +{ + /* + * look for trailer id in file we are about to process + */ + if ((strcmp(arcn->name, TRAILER) == 0) && (arcn->sb.st_size == 0)) + return(0); + return(-1); +} + +/* + * com_rd() + * operations common to all cpio read functions. + * Return: + * 0 + */ + +static int +com_rd(ARCHD *arcn) +{ + arcn->skip = 0; + arcn->pat = NULL; + arcn->org_name = arcn->name; + switch (arcn->sb.st_mode & C_IFMT) { + case C_ISFIFO: + arcn->type = PAX_FIF; + break; + case C_ISDIR: + arcn->type = PAX_DIR; + break; + case C_ISBLK: + arcn->type = PAX_BLK; + break; + case C_ISCHR: + arcn->type = PAX_CHR; + break; + case C_ISLNK: + arcn->type = PAX_SLK; + break; + case C_ISOCK: + arcn->type = PAX_SCK; + break; + case C_ISCTG: + case C_ISREG: + default: + /* + * we have file data, set up skip (pad is set in the format + * specific sections) + */ + arcn->sb.st_mode = (arcn->sb.st_mode & 0xfff) | C_ISREG; + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + } + if (chk_lnk(arcn) < 0) + return(-1); + return(0); +} + +/* + * cpio_endwr() + * write the special file with the name trailer in the proper format + * Return: + * result of the write of the trailer from the cpio specific write func + */ + +int +cpio_endwr(void) +{ + ARCHD last; + + /* + * create a trailer request and call the proper format write function + */ + memset(&last, 0, sizeof(last)); + last.nlen = sizeof(TRAILER) - 1; + last.type = PAX_REG; + last.sb.st_nlink = 1; + (void)strlcpy(last.name, TRAILER, sizeof(last.name)); + return((*frmt->wr)(&last)); +} + +/* + * rd_nm() + * read in the file name which follows the cpio header + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_nm(ARCHD *arcn, int nsz) +{ + /* + * do not even try bogus values + */ + if ((nsz == 0) || ((size_t)nsz > sizeof(arcn->name))) { + paxwarn(1, "Cpio file name length %d is out of range", nsz); + return(-1); + } + + /* + * read the name and make sure it is not empty and is \0 terminated + */ + if ((rd_wrbuf(arcn->name,nsz) != nsz) || (arcn->name[nsz-1] != '\0') || + (arcn->name[0] == '\0')) { + paxwarn(1, "Cpio file name in header is corrupted"); + return(-1); + } + return(0); +} + +/* + * rd_ln_nm() + * read in the link name for a file with links. The link name is stored + * like file data (and is NOT \0 terminated!) + * Return: + * 0 if ok, -1 otherwise + */ + +static int +rd_ln_nm(ARCHD *arcn) +{ + /* + * check the length specified for bogus values + */ + if ((arcn->sb.st_size <= 0) || + (arcn->sb.st_size >= (off_t)sizeof(arcn->ln_name))) { + paxwarn(1, "Cpio link name length is invalid: %zu", + arcn->sb.st_size); + return(-1); + } + + /* + * read in the link name and \0 terminate it + */ + if (rd_wrbuf(arcn->ln_name, (int)arcn->sb.st_size) != + (int)arcn->sb.st_size) { + paxwarn(1, "Cpio link name read error"); + return(-1); + } + arcn->ln_nlen = arcn->sb.st_size; + arcn->ln_name[arcn->ln_nlen] = '\0'; + + /* + * watch out for those empty link names + */ + if (arcn->ln_name[0] == '\0') { + paxwarn(1, "Cpio link name is corrupt"); + return(-1); + } + return(0); +} + +/* + * Routines common to the extended byte oriented cpio format + */ + +/* + * cpio_id() + * determine if a block given to us is a valid extended byte oriented + * cpio header + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +cpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_CPIO)) || + (strncmp(blk, AMAGIC, sizeof(AMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * cpio_rd() + * determine if a buffer is a byte oriented extended cpio archive entry. + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +cpio_rd(ARCHD *arcn, char *buf) +{ + int nsz; + unsigned long long val; + HD_CPIO *hd; + + /* + * check that this is a valid header, if not return -1 + */ + if (cpio_id(buf, sizeof(HD_CPIO)) < 0) + return(-1); + hd = (HD_CPIO *)buf; + + /* + * byte oriented cpio (posix) does not have padding! extract the octal + * ascii fields from the header + */ + arcn->pad = 0; + arcn->sb.st_dev = (dev_t)asc_ul(hd->c_dev, sizeof(hd->c_dev), OCT); + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), OCT); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), OCT); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), OCT); + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + OCT); + arcn->sb.st_rdev = (dev_t)asc_ul(hd->c_rdev, sizeof(hd->c_rdev), OCT); + val = asc_ull(hd->c_mtime, sizeof(hd->c_mtime), OCT); + if (val > MAX_TIME_T) + arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ + else + arcn->sb.st_mtime = val; + arcn->sb.st_mtim.tv_nsec = 0; + arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; + arcn->sb.st_size = (off_t)asc_ull(hd->c_filesize,sizeof(hd->c_filesize), + OCT); + + /* + * check name size and if valid, read in the name of this entry (name + * follows header in the archive) + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),OCT)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * no link name to read for this file + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + return(com_rd(arcn)); + } + + /* + * check link name size and read in the link name. Link names are + * stored like file data. + */ + if (rd_ln_nm(arcn) < 0) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * cpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +cpio_endrd(void) +{ + return sizeof(HD_CPIO) + sizeof(TRAILER); +} + +/* + * cpio_stwr() + * start up the device mapping table + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +cpio_stwr(void) +{ + return(dev_start()); +} + +/* + * cpio_wr() + * copy the data in the ARCHD to buffer in extended byte oriented cpio + * format. + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +cpio_wr(ARCHD *arcn) +{ + HD_CPIO *hd; + int nsz; + char hdblk[sizeof(HD_CPIO)]; + + /* + * check and repair truncated device and inode fields in the header + */ + if (map_dev(arcn, CPIO_MASK, CPIO_MASK) < 0) + return(-1); + + arcn->pad = 0; + nsz = arcn->nlen + 1; + hd = (HD_CPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + switch (arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * set data size for file data + */ + if (ull_asc(arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) { + paxwarn(1,"File is too large for cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * set data size to hold link name + */ + if (ul_asc(arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), OCT)) + goto out; + break; + default: + /* + * all other file types have no file data + */ + if (ul_asc(0, hd->c_filesize, sizeof(hd->c_filesize), OCT)) + goto out; + break; + } + + /* + * copy the values to the header using octal ascii + */ + if (ul_asc(MAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc(arcn->sb.st_dev, hd->c_dev, sizeof(hd->c_dev), OCT) || + ul_asc(arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), OCT) || + ul_asc(arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), OCT) || + ul_asc(arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), OCT) || + ul_asc(arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), OCT) || + ul_asc(arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), OCT) || + ul_asc(arcn->sb.st_rdev, hd->c_rdev, sizeof(hd->c_rdev), OCT) || + ull_asc(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->c_mtime, + sizeof(hd->c_mtime), OCT) || + ul_asc(nsz, hd->c_namesize, sizeof(hd->c_namesize), OCT)) + goto out; + + /* + * write the file name to the archive + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_CPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0)) { + paxwarn(1, "Unable to write cpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if this file has data, we are done. The caller will write the file + * data, if we are link tell caller we are done, go to next file + */ + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) + return(0); + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name to the archive, tell the caller to go to the + * next file as we are done. + */ + if (wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) { + paxwarn(1,"Unable to write cpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Cpio header field is too small to store file %s", + arcn->org_name); + return(1); +} + +/* + * Routines common to the system VR4 version of cpio (with/without file CRC) + */ + +/* + * vcpio_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITHOUT crc. WATCH it the magic cookies are in OCTAL, the header + * uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +vcpio_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVMAGIC, sizeof(AVMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_id() + * determine if a block given to us is a valid system VR4 cpio header + * WITH crc. WATCH it the magic cookies are in OCTAL the header uses HEX + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +crc_id(char *blk, int size) +{ + if ((size < (int)sizeof(HD_VCPIO)) || + (strncmp(blk, AVCMAGIC, sizeof(AVCMAGIC) - 1) != 0)) + return(-1); + return(0); +} + +/* + * crc_strd() + w set file data CRC calculations. Fire up the hard link detection code + * Return: + * 0 if ok -1 otherwise (the return values of lnk_start()) + */ + +int +crc_strd(void) +{ + docrc = 1; + return(lnk_start()); +} + +/* + * vcpio_rd() + * determine if a buffer is a system VR4 archive entry. (with/without CRC) + * convert and store the values in the ARCHD parameter. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +vcpio_rd(ARCHD *arcn, char *buf) +{ + HD_VCPIO *hd; + dev_t devminor; + dev_t devmajor; + int nsz; + + /* + * during the id phase it was determined if we were using CRC, use the + * proper id routine. + */ + if (docrc) { + if (crc_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } else { + if (vcpio_id(buf, sizeof(HD_VCPIO)) < 0) + return(-1); + } + + hd = (HD_VCPIO *)buf; + arcn->pad = 0; + + /* + * extract the hex ascii fields from the header + */ + arcn->sb.st_ino = (ino_t)asc_ul(hd->c_ino, sizeof(hd->c_ino), HEX); + arcn->sb.st_mode = (mode_t)asc_ul(hd->c_mode, sizeof(hd->c_mode), HEX); + arcn->sb.st_uid = (uid_t)asc_ul(hd->c_uid, sizeof(hd->c_uid), HEX); + arcn->sb.st_gid = (gid_t)asc_ul(hd->c_gid, sizeof(hd->c_gid), HEX); + arcn->sb.st_mtime = (time_t)asc_ul(hd->c_mtime,sizeof(hd->c_mtime),HEX); + arcn->sb.st_mtim.tv_nsec = 0; + arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; + arcn->sb.st_size = (off_t)asc_ull(hd->c_filesize, + sizeof(hd->c_filesize), HEX); + arcn->sb.st_nlink = (nlink_t)asc_ul(hd->c_nlink, sizeof(hd->c_nlink), + HEX); + devmajor = (dev_t)asc_ul(hd->c_maj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_min, sizeof(hd->c_min), HEX); + arcn->sb.st_dev = TODEV(devmajor, devminor); + devmajor = (dev_t)asc_ul(hd->c_rmaj, sizeof(hd->c_maj), HEX); + devminor = (dev_t)asc_ul(hd->c_rmin, sizeof(hd->c_min), HEX); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + arcn->crc = asc_ul(hd->c_chksum, sizeof(hd->c_chksum), HEX); + + /* + * check the length of the file name, if ok read it in, return -1 if + * bogus + */ + if ((nsz = (int)asc_ul(hd->c_namesize,sizeof(hd->c_namesize),HEX)) < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * skip padding. header + filename is aligned to 4 byte boundaries + */ + if (rd_skip(VCPIO_PAD(sizeof(HD_VCPIO) + nsz)) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode&C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)) { + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + /* + * read in the link name and skip over the padding + */ + if ((rd_ln_nm(arcn) < 0) || + (rd_skip(VCPIO_PAD(arcn->sb.st_size)) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * vcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +vcpio_endrd(void) +{ + return sizeof(HD_VCPIO) + sizeof(TRAILER) + + (VCPIO_PAD(sizeof(HD_VCPIO) + sizeof(TRAILER))); +} + +/* + * crc_stwr() + * start up the device mapping table, enable crc file calculation + * Return: + * 0 if ok, -1 otherwise (what dev_start() returns) + */ + +int +crc_stwr(void) +{ + docrc = 1; + return(dev_start()); +} + +/* + * vcpio_wr() + * copy the data in the ARCHD to buffer in system VR4 cpio + * (with/without crc) format. + * Return + * 0 if file has data to be written after the header, 1 if file has + * NO data to write after the header, -1 if archive write failed + */ + +int +vcpio_wr(ARCHD *arcn) +{ + HD_VCPIO *hd; + unsigned int nsz; + char hdblk[sizeof(HD_VCPIO)]; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, VCPIO_MASK, VCPIO_MASK) < 0) + return(-1); + nsz = arcn->nlen + 1; + hd = (HD_VCPIO *)hdblk; + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + + /* + * add the proper magic value depending whether we were asked for + * file data crc's, and the crc if needed. + */ + if (docrc) { + if (ul_asc(VCMAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc(arcn->crc,hd->c_chksum,sizeof(hd->c_chksum), HEX)) + goto out; + } else { + if (ul_asc(VMAGIC, hd->c_magic, sizeof(hd->c_magic), OCT) || + ul_asc(0, hd->c_chksum, sizeof(hd->c_chksum),HEX)) + goto out; + } + + switch (arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = VCPIO_PAD(arcn->sb.st_size); + if (ull_asc(arcn->sb.st_size, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) { + paxwarn(1,"File is too large for sv4cpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0; + if (ul_asc(arcn->ln_nlen, hd->c_filesize, + sizeof(hd->c_filesize), HEX)) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0; + if (ul_asc(0, hd->c_filesize, sizeof(hd->c_filesize), HEX)) + goto out; + break; + } + + /* + * set the other fields in the header + */ + if (ul_asc(arcn->sb.st_ino, hd->c_ino, sizeof(hd->c_ino), HEX) || + ul_asc(arcn->sb.st_mode, hd->c_mode, sizeof(hd->c_mode), HEX) || + ul_asc(arcn->sb.st_uid, hd->c_uid, sizeof(hd->c_uid), HEX) || + ul_asc(arcn->sb.st_gid, hd->c_gid, sizeof(hd->c_gid), HEX) || + ul_asc(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->c_mtime, + sizeof(hd->c_mtime), HEX) || + ul_asc(arcn->sb.st_nlink, hd->c_nlink, sizeof(hd->c_nlink), HEX) || + ul_asc(MAJOR(arcn->sb.st_dev),hd->c_maj, sizeof(hd->c_maj), HEX) || + ul_asc(MINOR(arcn->sb.st_dev),hd->c_min, sizeof(hd->c_min), HEX) || + ul_asc(MAJOR(arcn->sb.st_rdev),hd->c_rmaj,sizeof(hd->c_maj), HEX) || + ul_asc(MINOR(arcn->sb.st_rdev),hd->c_rmin,sizeof(hd->c_min), HEX) || + ul_asc(nsz, hd->c_namesize, sizeof(hd->c_namesize), HEX)) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_VCPIO)) < 0) || + (wr_rdbuf(arcn->name, (int)nsz) < 0) || + (wr_skip(VCPIO_PAD(sizeof(HD_VCPIO) + nsz)) < 0)) { + paxwarn(1,"Could not write sv4cpio header for %s",arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done, copy the file + */ + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip(VCPIO_PAD(arcn->ln_nlen)) < 0)) { + paxwarn(1,"Could not write sv4cpio link name for %s", + arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1,"Sv4cpio header field is too small for file %s",arcn->org_name); + return(1); +} + +/* + * Routines common to the old binary header cpio + */ + +/* + * bcpio_id() + * determine if a block given to us is a old binary cpio header + * (with/without header byte swapping) + * Return: + * 0 if a valid header, -1 otherwise + */ + +int +bcpio_id(char *blk, int size) +{ + if (size < (int)sizeof(HD_BCPIO)) + return(-1); + + /* + * check both normal and byte swapped magic cookies + */ + if (((u_short)SHRT_EXT(blk)) == MAGIC) + return(0); + if (((u_short)RSHRT_EXT(blk)) == MAGIC) { + if (!swp_head) + ++swp_head; + return(0); + } + return(-1); +} + +/* + * bcpio_rd() + * determine if a buffer is a old binary archive entry. (it may have byte + * swapped header) convert and store the values in the ARCHD parameter. + * This is a very old header format and should not really be used. + * Return: + * 0 if a valid header, -1 otherwise. + */ + +int +bcpio_rd(ARCHD *arcn, char *buf) +{ + HD_BCPIO *hd; + int nsz; + + /* + * check the header + */ + if (bcpio_id(buf, sizeof(HD_BCPIO)) < 0) + return(-1); + + arcn->pad = 0; + hd = (HD_BCPIO *)buf; + if (swp_head) { + /* + * header has swapped bytes on 16 bit boundaries + */ + arcn->sb.st_dev = (dev_t)(RSHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(RSHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(RSHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(RSHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(RSHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(RSHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(RSHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(RSHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(RSHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(RSHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(RSHRT_EXT(hd->h_filesize_2))); + nsz = (int)(RSHRT_EXT(hd->h_namesize)); + } else { + arcn->sb.st_dev = (dev_t)(SHRT_EXT(hd->h_dev)); + arcn->sb.st_ino = (ino_t)(SHRT_EXT(hd->h_ino)); + arcn->sb.st_mode = (mode_t)(SHRT_EXT(hd->h_mode)); + arcn->sb.st_uid = (uid_t)(SHRT_EXT(hd->h_uid)); + arcn->sb.st_gid = (gid_t)(SHRT_EXT(hd->h_gid)); + arcn->sb.st_nlink = (nlink_t)(SHRT_EXT(hd->h_nlink)); + arcn->sb.st_rdev = (dev_t)(SHRT_EXT(hd->h_rdev)); + arcn->sb.st_mtime = (time_t)(SHRT_EXT(hd->h_mtime_1)); + arcn->sb.st_mtime = (arcn->sb.st_mtime << 16) | + ((time_t)(SHRT_EXT(hd->h_mtime_2))); + arcn->sb.st_size = (off_t)(SHRT_EXT(hd->h_filesize_1)); + arcn->sb.st_size = (arcn->sb.st_size << 16) | + ((off_t)(SHRT_EXT(hd->h_filesize_2))); + nsz = (int)(SHRT_EXT(hd->h_namesize)); + } + arcn->sb.st_mtim.tv_nsec = 0; + arcn->sb.st_ctim = arcn->sb.st_atim = arcn->sb.st_mtim; + + /* + * check the file name size, if bogus give up. otherwise read the file + * name + */ + if (nsz < 2) + return(-1); + arcn->nlen = nsz - 1; + if (rd_nm(arcn, nsz) < 0) + return(-1); + + /* + * header + file name are aligned to 2 byte boundaries, skip if needed + */ + if (rd_skip(BCPIO_PAD(sizeof(HD_BCPIO) + nsz)) < 0) + return(-1); + + /* + * if not a link (or a file with no data), calculate pad size (for + * padding which follows the file data), clear the link name and return + */ + if (((arcn->sb.st_mode & C_IFMT) != C_ISLNK)||(arcn->sb.st_size == 0)){ + /* + * we have a valid header (not a link) + */ + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + return(com_rd(arcn)); + } + + if ((rd_ln_nm(arcn) < 0) || + (rd_skip(BCPIO_PAD(arcn->sb.st_size)) < 0)) + return(-1); + + /* + * we have a valid header (with a link) + */ + return(com_rd(arcn)); +} + +/* + * bcpio_endrd() + * no cleanup needed here, just return size of the trailer (for append) + * Return: + * size of trailer header in this format + */ + +off_t +bcpio_endrd(void) +{ + return sizeof(HD_BCPIO) + sizeof(TRAILER) + + (BCPIO_PAD(sizeof(HD_BCPIO) + sizeof(TRAILER))); +} + +/* + * bcpio_wr() + * copy the data in the ARCHD to buffer in old binary cpio format + * There is a real chance of field overflow with this critter. So we + * always check the conversion is ok. nobody in their right mind + * should write an archive in this format... + * Return + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +bcpio_wr(ARCHD *arcn) +{ + HD_BCPIO *hd; + int nsz; + char hdblk[sizeof(HD_BCPIO)]; + off_t t_offt; + int t_int; + time_t t_timet; + + /* + * check and repair truncated device and inode fields in the cpio + * header + */ + if (map_dev(arcn, BCPIO_MASK, BCPIO_MASK) < 0) + return(-1); + + if ((arcn->type != PAX_BLK) && (arcn->type != PAX_CHR)) + arcn->sb.st_rdev = 0; + hd = (HD_BCPIO *)hdblk; + + switch (arcn->type) { + case PAX_CTG: + case PAX_REG: + case PAX_HRG: + /* + * caller will copy file data to the archive. tell him how + * much to pad. + */ + arcn->pad = BCPIO_PAD(arcn->sb.st_size); + hd->h_filesize_1[0] = CHR_WR_0(arcn->sb.st_size); + hd->h_filesize_1[1] = CHR_WR_1(arcn->sb.st_size); + hd->h_filesize_2[0] = CHR_WR_2(arcn->sb.st_size); + hd->h_filesize_2[1] = CHR_WR_3(arcn->sb.st_size); + t_offt = (off_t)(SHRT_EXT(hd->h_filesize_1)); + t_offt = (t_offt<<16) | ((off_t)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->sb.st_size != t_offt) { + paxwarn(1,"File is too large for bcpio format %s", + arcn->org_name); + return(1); + } + break; + case PAX_SLK: + /* + * no file data for the caller to process, the file data has + * the size of the link + */ + arcn->pad = 0; + hd->h_filesize_1[0] = CHR_WR_0(arcn->ln_nlen); + hd->h_filesize_1[1] = CHR_WR_1(arcn->ln_nlen); + hd->h_filesize_2[0] = CHR_WR_2(arcn->ln_nlen); + hd->h_filesize_2[1] = CHR_WR_3(arcn->ln_nlen); + t_int = (int)(SHRT_EXT(hd->h_filesize_1)); + t_int = (t_int << 16) | ((int)(SHRT_EXT(hd->h_filesize_2))); + if (arcn->ln_nlen != t_int) + goto out; + break; + default: + /* + * no file data for the caller to process + */ + arcn->pad = 0; + hd->h_filesize_1[0] = (char)0; + hd->h_filesize_1[1] = (char)0; + hd->h_filesize_2[0] = (char)0; + hd->h_filesize_2[1] = (char)0; + break; + } + + /* + * build up the rest of the fields + */ + hd->h_magic[0] = CHR_WR_2(MAGIC); + hd->h_magic[1] = CHR_WR_3(MAGIC); + hd->h_dev[0] = CHR_WR_2(arcn->sb.st_dev); + hd->h_dev[1] = CHR_WR_3(arcn->sb.st_dev); + if (arcn->sb.st_dev != (dev_t)(SHRT_EXT(hd->h_dev))) + goto out; + hd->h_ino[0] = CHR_WR_2(arcn->sb.st_ino); + hd->h_ino[1] = CHR_WR_3(arcn->sb.st_ino); + if (arcn->sb.st_ino != (ino_t)(SHRT_EXT(hd->h_ino))) + goto out; + hd->h_mode[0] = CHR_WR_2(arcn->sb.st_mode); + hd->h_mode[1] = CHR_WR_3(arcn->sb.st_mode); + if (arcn->sb.st_mode != (mode_t)(SHRT_EXT(hd->h_mode))) + goto out; + hd->h_uid[0] = CHR_WR_2(arcn->sb.st_uid); + hd->h_uid[1] = CHR_WR_3(arcn->sb.st_uid); + if (arcn->sb.st_uid != (uid_t)(SHRT_EXT(hd->h_uid))) + goto out; + hd->h_gid[0] = CHR_WR_2(arcn->sb.st_gid); + hd->h_gid[1] = CHR_WR_3(arcn->sb.st_gid); + if (arcn->sb.st_gid != (gid_t)(SHRT_EXT(hd->h_gid))) + goto out; + hd->h_nlink[0] = CHR_WR_2(arcn->sb.st_nlink); + hd->h_nlink[1] = CHR_WR_3(arcn->sb.st_nlink); + if (arcn->sb.st_nlink != (nlink_t)(SHRT_EXT(hd->h_nlink))) + goto out; + hd->h_rdev[0] = CHR_WR_2(arcn->sb.st_rdev); + hd->h_rdev[1] = CHR_WR_3(arcn->sb.st_rdev); + if (arcn->sb.st_rdev != (dev_t)(SHRT_EXT(hd->h_rdev))) + goto out; + if (arcn->sb.st_mtime > 0) { + hd->h_mtime_1[0] = CHR_WR_0(arcn->sb.st_mtime); + hd->h_mtime_1[1] = CHR_WR_1(arcn->sb.st_mtime); + hd->h_mtime_2[0] = CHR_WR_2(arcn->sb.st_mtime); + hd->h_mtime_2[1] = CHR_WR_3(arcn->sb.st_mtime); + t_timet = (time_t)SHRT_EXT(hd->h_mtime_1); + t_timet = t_timet << 16 | (time_t)SHRT_EXT(hd->h_mtime_2); + if (arcn->sb.st_mtime != t_timet) + goto out; + } else { + hd->h_mtime_1[0] = hd->h_mtime_1[1] = 0; + hd->h_mtime_2[0] = hd->h_mtime_2[1] = 0; + } + nsz = arcn->nlen + 1; + hd->h_namesize[0] = CHR_WR_2(nsz); + hd->h_namesize[1] = CHR_WR_3(nsz); + if (nsz != (int)(SHRT_EXT(hd->h_namesize))) + goto out; + + /* + * write the header, the file name and padding as required. + */ + if ((wr_rdbuf(hdblk, (int)sizeof(HD_BCPIO)) < 0) || + (wr_rdbuf(arcn->name, nsz) < 0) || + (wr_skip(BCPIO_PAD(sizeof(HD_BCPIO) + nsz)) < 0)) { + paxwarn(1, "Could not write bcpio header for %s", arcn->org_name); + return(-1); + } + + /* + * if we have file data, tell the caller we are done + */ + if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) + return(0); + + /* + * if we are not a link, tell the caller we are done, go to next file + */ + if (arcn->type != PAX_SLK) + return(1); + + /* + * write the link name, tell the caller we are done. + */ + if ((wr_rdbuf(arcn->ln_name, arcn->ln_nlen) < 0) || + (wr_skip(BCPIO_PAD(arcn->ln_nlen)) < 0)) { + paxwarn(1,"Could not write bcpio link name for %s",arcn->org_name); + return(-1); + } + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1,"Bcpio header field is too small for file %s", arcn->org_name); + return(1); +} diff --git a/bin/pax/cpio.h b/bin/pax/cpio.h new file mode 100644 index 0000000..dfbd03f --- /dev/null +++ b/bin/pax/cpio.h @@ -0,0 +1,150 @@ +/* $OpenBSD: cpio.h,v 1.4 2003/06/02 23:32:08 millert Exp $ */ +/* $NetBSD: cpio.h,v 1.3 1995/03/21 09:07:15 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cpio.h 8.1 (Berkeley) 5/31/93 + */ + +/* + * Defines common to all versions of cpio + */ +#define TRAILER "TRAILER!!!" /* name in last archive record */ + +/* + * Header encoding of the different file types + */ +#define C_ISDIR 040000 /* Directory */ +#define C_ISFIFO 010000 /* FIFO */ +#define C_ISREG 0100000 /* Regular file */ +#define C_ISBLK 060000 /* Block special file */ +#define C_ISCHR 020000 /* Character special file */ +#define C_ISCTG 0110000 /* Reserved for contiguous files */ +#define C_ISLNK 0120000 /* Reserved for symbolic links */ +#define C_ISOCK 0140000 /* Reserved for sockets */ +#define C_IFMT 0170000 /* type of file */ + +/* + * Data Interchange Format - Extended cpio header format - POSIX 1003.1-1990 + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_dev[6]; /* device number */ + char c_ino[6]; /* inode number */ + char c_mode[6]; /* file type/access */ + char c_uid[6]; /* owners uid */ + char c_gid[6]; /* owners gid */ + char c_nlink[6]; /* # of links at archive creation */ + char c_rdev[6]; /* block/char major/minor # */ + char c_mtime[11]; /* modification time */ + char c_namesize[6]; /* length of pathname */ + char c_filesize[11]; /* length of file in bytes */ +} HD_CPIO; + +#define MAGIC 070707 /* transportable archive id */ + +#ifdef _PAX_ +#define AMAGIC "070707" /* ascii equivalent string of MAGIC */ +#define CPIO_MASK 0x3ffff /* bits valid in the dev/ino fields */ + /* used for dev/inode remaps */ +#endif /* _PAX_ */ + +/* + * Binary cpio header structure + * + * CAUTION! CAUTION! CAUTION! + * Each field really represents a 16 bit short (NOT ASCII). Described as + * an array of chars in an attempt to improve portability!! + */ +typedef struct { + u_char h_magic[2]; + u_char h_dev[2]; + u_char h_ino[2]; + u_char h_mode[2]; + u_char h_uid[2]; + u_char h_gid[2]; + u_char h_nlink[2]; + u_char h_rdev[2]; + u_char h_mtime_1[2]; + u_char h_mtime_2[2]; + u_char h_namesize[2]; + u_char h_filesize_1[2]; + u_char h_filesize_2[2]; +} HD_BCPIO; + +#ifdef _PAX_ +/* + * extraction and creation macros for binary cpio + */ +#define SHRT_EXT(ch) ((((unsigned)(ch)[0])<<8) | (((unsigned)(ch)[1])&0xff)) +#define RSHRT_EXT(ch) ((((unsigned)(ch)[1])<<8) | (((unsigned)(ch)[0])&0xff)) +#define CHR_WR_0(val) ((char)(((val) >> 24) & 0xff)) +#define CHR_WR_1(val) ((char)(((val) >> 16) & 0xff)) +#define CHR_WR_2(val) ((char)(((val) >> 8) & 0xff)) +#define CHR_WR_3(val) ((char)((val) & 0xff)) + +/* + * binary cpio masks and pads + */ +#define BCPIO_PAD(x) ((2 - ((x) & 1)) & 1) /* pad to next 2 byte word */ +#define BCPIO_MASK 0xffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ + +/* + * System VR4 cpio header structure (with/without file data crc) + */ +typedef struct { + char c_magic[6]; /* magic cookie */ + char c_ino[8]; /* inode number */ + char c_mode[8]; /* file type/access */ + char c_uid[8]; /* owners uid */ + char c_gid[8]; /* owners gid */ + char c_nlink[8]; /* # of links at archive creation */ + char c_mtime[8]; /* modification time */ + char c_filesize[8]; /* length of file in bytes */ + char c_maj[8]; /* block/char major # */ + char c_min[8]; /* block/char minor # */ + char c_rmaj[8]; /* special file major # */ + char c_rmin[8]; /* special file minor # */ + char c_namesize[8]; /* length of pathname */ + char c_chksum[8]; /* 0 OR CRC of bytes of FILE data */ +} HD_VCPIO; + +#define VMAGIC 070701 /* sVr4 new portable archive id */ +#define VCMAGIC 070702 /* sVr4 new portable archive id CRC */ +#ifdef _PAX_ +#define AVMAGIC "070701" /* ascii string of above */ +#define AVCMAGIC "070702" /* ascii string of above */ +#define VCPIO_PAD(x) ((4 - ((x) & 3)) & 3) /* pad to next 4 byte word */ +#define VCPIO_MASK 0xffffffff /* mask for dev/ino fields */ +#endif /* _PAX_ */ diff --git a/bin/pax/extern.h b/bin/pax/extern.h new file mode 100644 index 0000000..67a21d8 --- /dev/null +++ b/bin/pax/extern.h @@ -0,0 +1,310 @@ +/* $OpenBSD: extern.h,v 1.60 2020/03/23 20:04:19 espie Exp $ */ +/* $NetBSD: extern.h,v 1.5 1996/03/26 23:54:16 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * External references from each source file + */ + +/* + * ar_io.c + */ +extern const char *arcname; +extern const char *gzip_program; +extern int force_one_volume; +int ar_open(const char *); +void ar_close(int _in_sig); +void ar_drain(void); +int ar_set_wr(void); +int ar_app_ok(void); +int ar_read(char *, int); +int ar_write(char *, int); +int ar_rdsync(void); +int ar_fow(off_t, off_t *); +int ar_rev(off_t ); +int ar_next(void); + +/* + * ar_subs.c + */ +extern u_long flcnt; +void list(void); +void extract(void); +void append(void); +void archive(void); +void copy(void); + +/* + * buf_subs.c + */ +extern int blksz; +extern int wrblksz; +extern int maxflt; +extern int rdblksz; +extern off_t wrlimit; +extern off_t rdcnt; +extern off_t wrcnt; +int wr_start(void); +int rd_start(void); +void cp_start(void); +int appnd_start(off_t); +int rd_sync(void); +void pback(char *, int); +int rd_skip(off_t); +void wr_fin(void); +int wr_rdbuf(char *, int); +int rd_wrbuf(char *, int); +int wr_skip(off_t); +int wr_rdfile(ARCHD *, int, off_t *); +int rd_wrfile(ARCHD *, int, off_t *); +void cp_file(ARCHD *, int, int); +int buf_fill(void); +int buf_flush(int); + +/* + * cpio.c + */ +int cpio_strd(void); +int cpio_trail(ARCHD *, char *, int, int *); +int cpio_endwr(void); +int cpio_id(char *, int); +int cpio_rd(ARCHD *, char *); +off_t cpio_endrd(void); +int cpio_stwr(void); +int cpio_wr(ARCHD *); +int vcpio_id(char *, int); +int crc_id(char *, int); +int crc_strd(void); +int vcpio_rd(ARCHD *, char *); +off_t vcpio_endrd(void); +int crc_stwr(void); +int vcpio_wr(ARCHD *); +int bcpio_id(char *, int); +int bcpio_rd(ARCHD *, char *); +off_t bcpio_endrd(void); +int bcpio_wr(ARCHD *); + +/* + * file_subs.c + */ +int file_creat(ARCHD *); +void file_close(ARCHD *, int); +int lnk_creat(ARCHD *); +int cross_lnk(ARCHD *); +int chk_same(ARCHD *); +int node_creat(ARCHD *); +int unlnk_exist(char *, int); +int chk_path(char *, uid_t, gid_t, int); +void set_ftime(const char *, const struct timespec *, + const struct timespec *, int); +void fset_ftime(const char *, int, const struct timespec *, + const struct timespec *, int); +int set_ids(char *, uid_t, gid_t); +int fset_ids(char *, int, uid_t, gid_t); +void set_pmode(char *, mode_t); +void fset_pmode(char *, int, mode_t); +int set_attr(const struct file_times *, int _force_times, mode_t, int _do_mode, + int _in_sig); +int file_write(int, char *, int, int *, int *, int, char *); +void file_flush(int, char *, int); +void rdfile_close(ARCHD *, int *); +int set_crc(ARCHD *, int); + +/* + * ftree.c + */ +int ftree_start(void); +int ftree_add(char *, int); +void ftree_sel(ARCHD *); +void ftree_skipped_newer(ARCHD *); +void ftree_chk(void); +int next_file(ARCHD *); + +/* + * gen_subs.c + */ +void ls_list(ARCHD *, time_t, FILE *); +void ls_tty(ARCHD *); +void safe_print(const char *, FILE *); +u_long asc_ul(char *, int, int); +int ul_asc(u_long, char *, int, int); +unsigned long long asc_ull(char *, int, int); +int ull_asc(unsigned long long, char *, int, int); +size_t fieldcpy(char *, size_t, const char *, size_t); + +/* + * getoldopt.c + */ +int getoldopt(int, char **, const char *); + +/* + * options.c + */ +extern FSUB fsub[]; +extern int ford[]; +void options(int, char **); +OPLIST * opt_next(void); +int opt_add(const char *); +int bad_opt(void); +extern char *chdname; + +/* + * pat_rep.c + */ +int rep_add(char *); +int pat_add(char *, char *); +void pat_chk(void); +int pat_sel(ARCHD *); +int pat_match(ARCHD *); +int mod_name(ARCHD *); +int set_dest(ARCHD *, char *, int); +int has_dotdot(const char *); + +/* + * pax.c + */ +extern int act; +extern FSUB *frmt; +extern int cflag; +extern int cwdfd; +extern int dflag; +extern int iflag; +extern int kflag; +extern int lflag; +extern int nflag; +extern int tflag; +extern int uflag; +extern int vflag; +extern int Dflag; +extern int Hflag; +extern int Lflag; +extern int Nflag; +extern int Xflag; +extern int Yflag; +extern int Zflag; +extern int zeroflag; +extern int vfpart; +extern int patime; +extern int pmtime; +extern int nodirs; +extern int pmode; +extern int pids; +extern int rmleadslash; +extern int exit_val; +extern int docrc; +extern char *dirptr; +extern char *argv0; +extern enum op_mode { OP_PAX, OP_TAR, OP_CPIO } op_mode; +extern FILE *listf; +extern int listfd; +extern char *tempfile; +extern char *tempbase; +extern int havechd; + +void sig_cleanup(int); + +/* + * sel_subs.c + */ +int sel_chk(ARCHD *); +int grp_add(char *); +int usr_add(char *); +int trng_add(char *); + +/* + * tables.c + */ +int lnk_start(void); +int chk_lnk(ARCHD *); +void purg_lnk(ARCHD *); +void lnk_end(void); +int ftime_start(void); +int chk_ftime(ARCHD *); +int sltab_start(void); +int sltab_add_sym(const char *_path, const char *_value, mode_t _mode); +int sltab_add_link(const char *, const struct stat *); +void sltab_process(int _in_sig); +int name_start(void); +int add_name(char *, int, char *); +void sub_name(char *, int *, int); +#ifndef NOCPIO +int dev_start(void); +int add_dev(ARCHD *); +int map_dev(ARCHD *, u_long, u_long); +#else +# define dev_start() 0 +# define add_dev(x) 0 +# define map_dev(x,y,z) 0 +#endif /* NOCPIO */ +int atdir_start(void); +void atdir_end(void); +void add_atdir(char *, dev_t, ino_t, const struct timespec *, + const struct timespec *); +int do_atdir(const char *, dev_t, ino_t); +int dir_start(void); +void add_dir(char *, struct stat *, int); +void delete_dir(dev_t, ino_t); +void proc_dir(int _in_sig); +u_int st_hash(const char *, int, int); + +/* + * tar.c + */ +extern int tar_nodir; +extern char *gnu_name_string, *gnu_link_string; +int tar_endwr(void); +off_t tar_endrd(void); +int tar_trail(ARCHD *, char *, int, int *); +int tar_id(char *, int); +int tar_opt(void); +int tar_rd(ARCHD *, char *); +int tar_wr(ARCHD *); +int ustar_id(char *, int); +int ustar_rd(ARCHD *, char *); +int ustar_wr(ARCHD *); + +/* + * tty_subs.c + */ +int tty_init(void); +void tty_prnt(const char *, ...) + __attribute__((nonnull(1), format(printf, 1, 2))); +int tty_read(char *, int); +void paxwarn(int, const char *, ...) + __attribute__((nonnull(2), format(printf, 2, 3))); +void syswarn(int, int, const char *, ...) + __attribute__((nonnull(3), format(printf, 3, 4))); diff --git a/bin/pax/file_subs.c b/bin/pax/file_subs.c new file mode 100644 index 0000000..2c0994f --- /dev/null +++ b/bin/pax/file_subs.c @@ -0,0 +1,1106 @@ +/* $OpenBSD: file_subs.c,v 1.55 2020/03/23 20:04:19 espie Exp $ */ +/* $NetBSD: file_subs.c,v 1.4 1995/03/21 09:07:18 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "pax.h" +#include "extern.h" + +static int +mk_link(char *, struct stat *, char *, int); + +/* + * routines that deal with file operations such as: creating, removing; + * and setting access modes, uid/gid and times of files + */ + +/* + * file_creat() + * Create and open a file. + * Return: + * file descriptor or -1 for failure + */ + +int +file_creat(ARCHD *arcn) +{ + int fd = -1; + mode_t file_mode; + int oerrno; + + /* + * Assume file doesn't exist, so just try to create it, most times this + * works. We have to take special handling when the file does exist. To + * detect this, we use O_EXCL. For example when trying to create a + * file and a character device or fifo exists with the same name, we + * can accidently open the device by mistake (or block waiting to open). + * If we find that the open has failed, then spend the effort to + * figure out why. This strategy was found to have better average + * performance in common use than checking the file (and the path) + * first with lstat. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_EXCL, + file_mode)) >= 0) + return(fd); + + /* + * the file seems to exist. First we try to get rid of it (found to be + * the second most common failure when traced). If this fails, only + * then we go to the expense to check and create the path to the file + */ + if (unlnk_exist(arcn->name, arcn->type) != 0) + return(-1); + + for (;;) { + /* + * try to open it again, if this fails, check all the nodes in + * the path and give it a final try. if chk_path() finds that + * it cannot fix anything, we will skip the last attempt + */ + if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC, + file_mode)) >= 0) + break; + oerrno = errno; + if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid, 0) < 0) { + syswarn(1, oerrno, "Unable to create %s", arcn->name); + return(-1); + } + } + return(fd); +} + +/* + * file_close() + * Close file descriptor to a file just created by pax. Sets modes, + * ownership and times as required. + * Return: + * 0 for success, -1 for failure + */ + +void +file_close(ARCHD *arcn, int fd) +{ + int res = 0; + + if (fd < 0) + return; + + /* + * set owner/groups first as this may strip off mode bits we want + * then set file permission modes. Then set file access and + * modification times. + */ + if (pids) + res = fset_ids(arcn->name, fd, arcn->sb.st_uid, + arcn->sb.st_gid); + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode) + fset_pmode(arcn->name, fd, arcn->sb.st_mode); + if (patime || pmtime) + fset_ftime(arcn->name, fd, &arcn->sb.st_mtim, + &arcn->sb.st_atim, 0); + if (close(fd) == -1) + syswarn(0, errno, "Unable to close file descriptor on %s", + arcn->name); +} + +/* + * lnk_creat() + * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name + * must exist; + * Return: + * 0 if ok, -1 otherwise + */ + +int +lnk_creat(ARCHD *arcn) +{ + struct stat sb; + int res; + + /* + * we may be running as root, so we have to be sure that link target + * is not a directory, so we lstat and check + */ + if (lstat(arcn->ln_name, &sb) == -1) { + syswarn(1,errno,"Unable to link to %s from %s", arcn->ln_name, + arcn->name); + return(-1); + } + + if (S_ISDIR(sb.st_mode)) { + paxwarn(1, "A hard link to the directory %s is not allowed", + arcn->ln_name); + return(-1); + } + + res = mk_link(arcn->ln_name, &sb, arcn->name, 0); + if (res == 0) { + /* check for a hardlink to a placeholder symlink */ + res = sltab_add_link(arcn->name, &sb); + + if (res < 0) { + /* arrgh, it failed, clean up */ + unlink(arcn->name); + } + } + + return (res); +} + +/* + * cross_lnk() + * Create a hard link to arcn->org_name from arcn->name. Only used in copy + * with the -l flag. No warning or error if this does not succeed (we will + * then just create the file) + * Return: + * 1 if copy() should try to create this file node + * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self). + */ + +int +cross_lnk(ARCHD *arcn) +{ + /* + * try to make a link to original file (-l flag in copy mode). make + * sure we do not try to link to directories in case we are running as + * root (and it might succeed). + */ + if (arcn->type == PAX_DIR) + return(1); + return(mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1)); +} + +/* + * chk_same() + * In copy mode if we are not trying to make hard links between the src + * and destinations, make sure we are not going to overwrite ourselves by + * accident. This slows things down a little, but we have to protect all + * those people who make typing errors. + * Return: + * 1 the target does not exist, go ahead and copy + * 0 skip it file exists (-k) or may be the same as source file + */ + +int +chk_same(ARCHD *arcn) +{ + struct stat sb; + + /* + * if file does not exist, return. if file exists and -k, skip it + * quietly + */ + if (lstat(arcn->name, &sb) == -1) + return(1); + if (kflag) + return(0); + + /* + * better make sure the user does not have src == dest by mistake + */ + if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) { + paxwarn(1, "Unable to copy %s, file would overwrite itself", + arcn->name); + return(0); + } + return(1); +} + +/* + * mk_link() + * try to make a hard link between two files. if ign set, we do not + * complain. + * Return: + * 0 if successful (or we are done with this file but no error, such as + * finding the from file exists and the user has set -k). + * 1 when ign was set to indicates we could not make the link but we + * should try to copy/extract the file as that might work (and is an + * allowed option). -1 an error occurred. + */ + +static int +mk_link(char *to, struct stat *to_sb, char *from, int ign) +{ + struct stat sb; + int oerrno; + + /* + * if from file exists, it has to be unlinked to make the link. If the + * file exists and -k is set, skip it quietly + */ + if (lstat(from, &sb) == 0) { + if (kflag) + return(0); + + /* + * make sure it is not the same file, protect the user + */ + if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) { + paxwarn(1, "Unable to link file %s to itself", to); + return(-1); + } + + /* + * try to get rid of the file, based on the type + */ + if (S_ISDIR(sb.st_mode)) { + if (rmdir(from) == -1) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + delete_dir(sb.st_dev, sb.st_ino); + } else if (unlink(from) == -1) { + if (!ign) { + syswarn(1, errno, "Unable to remove %s", from); + return(-1); + } + return(1); + } + } + + /* + * from file is gone (or did not exist), try to make the hard link. + * if it fails, check the path and try it again (if chk_path() says to + * try again) + */ + for (;;) { + if (linkat(AT_FDCWD, to, AT_FDCWD, from, 0) == 0) + break; + oerrno = errno; + if (!nodirs && chk_path(from, to_sb->st_uid, to_sb->st_gid, ign) == 0) + continue; + if (!ign) { + syswarn(1, oerrno, "Could not link to %s from %s", to, + from); + return(-1); + } + return(1); + } + + /* + * all right the link was made + */ + return(0); +} + +/* + * node_creat() + * create an entry in the file system (other than a file or hard link). + * If successful, sets uid/gid modes and times as required. + * Return: + * 0 if ok, -1 otherwise + */ + +int +node_creat(ARCHD *arcn) +{ + int res; + int ign = 0; + int oerrno; + int pass = 0; + mode_t file_mode; + struct stat sb; + char target[PATH_MAX]; + char *nm = arcn->name; + int len, defer_pmode = 0; + + /* + * create node based on type, if that fails try to unlink the node and + * try again. finally check the path and try again. As noted in the + * file and link creation routines, this method seems to exhibit the + * best performance in general use workloads. + */ + file_mode = arcn->sb.st_mode & FILEBITS; + + for (;;) { + switch (arcn->type) { + case PAX_DIR: + /* + * If -h (or -L) was given in tar-mode, follow the + * potential symlink chain before trying to create the + * directory. + */ + if (op_mode == OP_TAR && Lflag) { + while (lstat(nm, &sb) == 0 && + S_ISLNK(sb.st_mode)) { + len = readlink(nm, target, + sizeof target - 1); + if (len == -1) { + syswarn(0, errno, + "cannot follow symlink %s in chain for %s", + nm, arcn->name); + res = -1; + goto badlink; + } + target[len] = '\0'; + nm = target; + } + } + res = mkdir(nm, file_mode); + +badlink: + if (ign) + res = 0; + break; + case PAX_CHR: + file_mode |= S_IFCHR; + res = mknod(nm, file_mode, arcn->sb.st_rdev); + break; + case PAX_BLK: + file_mode |= S_IFBLK; + res = mknod(nm, file_mode, arcn->sb.st_rdev); + break; + case PAX_FIF: + res = mkfifo(nm, file_mode); + break; + case PAX_SCK: + /* + * Skip sockets, operation has no meaning under BSD + */ + paxwarn(0, + "%s skipped. Sockets cannot be copied or extracted", + nm); + return(-1); + case PAX_SLK: + if (arcn->ln_name[0] != '/' && + !has_dotdot(arcn->ln_name)) + res = symlink(arcn->ln_name, nm); + else { + /* + * absolute symlinks and symlinks with ".." + * have to be deferred to prevent the archive + * from bootstrapping itself to outside the + * working directory. + */ + res = sltab_add_sym(nm, arcn->ln_name, + arcn->sb.st_mode); + if (res == 0) + defer_pmode = 1; + } + break; + case PAX_CTG: + case PAX_HLK: + case PAX_HRG: + case PAX_REG: + default: + /* + * we should never get here + */ + paxwarn(0, "%s has an unknown file type, skipping", + nm); + return(-1); + } + + /* + * if we were able to create the node break out of the loop, + * otherwise try to unlink the node and try again. if that + * fails check the full path and try a final time. + */ + if (res == 0) + break; + + /* + * we failed to make the node + */ + oerrno = errno; + if ((ign = unlnk_exist(nm, arcn->type)) < 0) + return(-1); + + if (++pass <= 1) + continue; + + if (nodirs || chk_path(nm,arcn->sb.st_uid,arcn->sb.st_gid, 0) < 0) { + syswarn(1, oerrno, "Could not create: %s", nm); + return(-1); + } + } + + /* + * we were able to create the node. set uid/gid, modes and times + */ + if (pids) + res = set_ids(nm, arcn->sb.st_uid, arcn->sb.st_gid); + else + res = 0; + + /* + * IMPORTANT SECURITY NOTE: + * if not preserving mode or we cannot set uid/gid, then PROHIBIT any + * set uid/gid bits + */ + if (!pmode || res) + arcn->sb.st_mode &= ~(SETBITS); + if (pmode && !defer_pmode) + set_pmode(nm, arcn->sb.st_mode); + + if (arcn->type == PAX_DIR && op_mode != OP_CPIO) { + /* + * Dirs must be processed again at end of extract to set times + * and modes to agree with those stored in the archive. However + * to allow extract to continue, we may have to also set owner + * rights. This allows nodes in the archive that are children + * of this directory to be extracted without failure. Both time + * and modes will be fixed after the entire archive is read and + * before pax exits. To do that safely, we want the dev+ino + * of the directory we created. + */ + if (lstat(nm, &sb) == -1) { + syswarn(0, errno,"Could not access %s (stat)", nm); + } else if (access(nm, R_OK | W_OK | X_OK) == -1) { + /* + * We have to add rights to the dir, so we make + * sure to restore the mode. The mode must be + * restored AS CREATED and not as stored if + * pmode is not set. + */ + set_pmode(nm, + ((sb.st_mode & FILEBITS) | S_IRWXU)); + if (!pmode) + arcn->sb.st_mode = sb.st_mode; + + /* + * we have to force the mode to what was set + * here, since we changed it from the default + * as created. + */ + arcn->sb.st_dev = sb.st_dev; + arcn->sb.st_ino = sb.st_ino; + add_dir(nm, &(arcn->sb), 1); + } else if (pmode || patime || pmtime) { + arcn->sb.st_dev = sb.st_dev; + arcn->sb.st_ino = sb.st_ino; + add_dir(nm, &(arcn->sb), 0); + } + } else if (patime || pmtime) + set_ftime(nm, &arcn->sb.st_mtim, &arcn->sb.st_atim, 0); + return(0); +} + +/* + * unlnk_exist() + * Remove node from file system with the specified name. We pass the type + * of the node that is going to replace it. When we try to create a + * directory and find that it already exists, we allow processing to + * continue as proper modes etc will always be set for it later on. + * Return: + * 0 is ok to proceed, no file with the specified name exists + * -1 we were unable to remove the node, or we should not remove it (-k) + * 1 we found a directory and we were going to create a directory. + */ + +int +unlnk_exist(char *name, int type) +{ + struct stat sb; + + /* + * the file does not exist, or -k we are done + */ + if (lstat(name, &sb) == -1) + return(0); + if (kflag) + return(-1); + + if (S_ISDIR(sb.st_mode)) { + /* + * try to remove a directory, if it fails and we were going to + * create a directory anyway, tell the caller (return a 1) + */ + if (rmdir(name) == -1) { + if (type == PAX_DIR) + return(1); + syswarn(1,errno,"Unable to remove directory %s", name); + return(-1); + } + delete_dir(sb.st_dev, sb.st_ino); + return(0); + } + + /* + * try to get rid of all non-directory type nodes + */ + if (unlink(name) == -1) { + syswarn(1, errno, "Could not unlink %s", name); + return(-1); + } + return(0); +} + +/* + * chk_path() + * We were trying to create some kind of node in the file system and it + * failed. chk_path() makes sure the path up to the node exists and is + * writeable. When we have to create a directory that is missing along the + * path somewhere, the directory we create will be set to the same + * uid/gid as the file has (when uid and gid are being preserved). + * NOTE: this routine is a real performance loss. It is only used as a + * last resort when trying to create entries in the file system. + * Return: + * -1 when it could find nothing it is allowed to fix. + * 0 otherwise + */ + +int +chk_path(char *name, uid_t st_uid, gid_t st_gid, int ign) +{ + char *spt = name; + char *next; + struct stat sb; + int retval = -1; + + /* + * watch out for paths with nodes stored directly in / (e.g. /bozo) + */ + while (*spt == '/') + ++spt; + + for (;;) { + /* + * work forward from the first / and check each part of the path + */ + spt = strchr(spt, '/'); + if (spt == NULL) + break; + + /* + * skip over duplicate slashes; stop if there're only + * trailing slashes left + */ + next = spt + 1; + while (*next == '/') + next++; + if (*next == '\0') + break; + + *spt = '\0'; + + /* + * if it exists we assume it is a directory, it is not within + * the spec (at least it seems to read that way) to alter the + * file system for nodes NOT EXPLICITLY stored on the archive. + * If that assumption is changed, you would test the node here + * and figure out how to get rid of it (probably like some + * recursive unlink()) or fix up the directory permissions if + * required (do an access()). + */ + if (lstat(name, &sb) == 0) { + *spt = '/'; + spt = next; + continue; + } + + /* + * the path fails at this point, see if we can create the + * needed directory and continue on + */ + if (mkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) == -1) { + if (!ign) + syswarn(1, errno, "Unable to mkdir %s", name); + *spt = '/'; + retval = -1; + break; + } + + /* + * we were able to create the directory. We will tell the + * caller that we found something to fix, and it is ok to try + * and create the node again. + */ + retval = 0; + if (pids) + (void)set_ids(name, st_uid, st_gid); + + /* + * make sure the user doesn't have some strange umask that + * causes this newly created directory to be unusable. We fix + * the modes and restore them back to the creation default at + * the end of pax + */ + if ((access(name, R_OK | W_OK | X_OK) == -1) && + (lstat(name, &sb) == 0)) { + set_pmode(name, ((sb.st_mode & FILEBITS) | S_IRWXU)); + add_dir(name, &sb, 1); + } + *spt = '/'; + spt = next; + continue; + } + return(retval); +} + +/* + * set_ftime() + * Set the access time and modification time for a named file. If frc + * is non-zero we force these times to be set even if the user did not + * request access and/or modification time preservation (this is also + * used by -t to reset access times). + * When ign is zero, only those times the user has asked for are set, the + * other ones are left alone. + */ + +void +set_ftime(const char *fnm, const struct timespec *mtimp, + const struct timespec *atimp, int frc) +{ + struct timespec tv[2]; + + tv[0] = *atimp; + tv[1] = *mtimp; + + if (!frc) { + /* + * if we are not forcing, only set those times the user wants + * set. + */ + if (!patime) + tv[0].tv_nsec = UTIME_OMIT; + if (!pmtime) + tv[1].tv_nsec = UTIME_OMIT; + } + + /* + * set the times + */ + if (utimensat(AT_FDCWD, fnm, tv, AT_SYMLINK_NOFOLLOW) < 0) + syswarn(1, errno, "Access/modification time set failed on: %s", + fnm); +} + +void +fset_ftime(const char *fnm, int fd, const struct timespec *mtimp, + const struct timespec *atimp, int frc) +{ + struct timespec tv[2]; + + + tv[0] = *atimp; + tv[1] = *mtimp; + + if (!frc) { + /* + * if we are not forcing, only set those times the user wants + * set. + */ + if (!patime) + tv[0].tv_nsec = UTIME_OMIT; + if (!pmtime) + tv[1].tv_nsec = UTIME_OMIT; + } + /* + * set the times + */ + if (futimens(fd, tv) == -1) + syswarn(1, errno, "Access/modification time set failed on: %s", + fnm); +} + +/* + * set_ids() + * set the uid and gid of a file system node + * Return: + * 0 when set, -1 on failure + */ + +int +set_ids(char *fnm, uid_t uid, gid_t gid) +{ + if (fchownat(AT_FDCWD, fnm, uid, gid, AT_SYMLINK_NOFOLLOW) == -1) { + /* + * ignore EPERM unless in verbose mode or being run by root. + * if running as pax, POSIX requires a warning. + */ + if (op_mode == OP_PAX || errno != EPERM || vflag || + geteuid() == 0) + syswarn(1, errno, "Unable to set file uid/gid of %s", + fnm); + return(-1); + } + return(0); +} + +int +fset_ids(char *fnm, int fd, uid_t uid, gid_t gid) +{ + if (fchown(fd, uid, gid) == -1) { + /* + * ignore EPERM unless in verbose mode or being run by root. + * if running as pax, POSIX requires a warning. + */ + if (op_mode == OP_PAX || errno != EPERM || vflag || + geteuid() == 0) + syswarn(1, errno, "Unable to set file uid/gid of %s", + fnm); + return(-1); + } + return(0); +} + +/* + * set_pmode() + * Set file access mode + */ + +void +set_pmode(char *fnm, mode_t mode) +{ + mode &= ABITS; + if (fchmodat(AT_FDCWD, fnm, mode, AT_SYMLINK_NOFOLLOW) == -1 && errno != EOPNOTSUPP) + syswarn(1, errno, "Could not set permissions on %s", fnm); +} + +void +fset_pmode(char *fnm, int fd, mode_t mode) +{ + mode &= ABITS; + if (fchmod(fd, mode) == -1) + syswarn(1, errno, "Could not set permissions on %s", fnm); +} + +/* + * set_attr() + * Given a DIRDATA, restore the mode and times as indicated, but + * only after verifying that it's the directory that we wanted. + */ +int +set_attr(const struct file_times *ft, int force_times, mode_t mode, + int do_mode, int in_sig) +{ + struct stat sb; + int fd, r; + + if (!do_mode && !force_times && !patime && !pmtime) + return (0); + + /* + * We could legitimately go through a symlink here, + * so do *not* use O_NOFOLLOW. The dev+ino check will + * protect us from evil. + */ + fd = open(ft->ft_name, O_RDONLY | O_DIRECTORY); + if (fd == -1) { + if (!in_sig) + syswarn(1, errno, "Unable to restore mode and times" + " for directory: %s", ft->ft_name); + return (-1); + } + + if (fstat(fd, &sb) == -1) { + if (!in_sig) + syswarn(1, errno, "Unable to stat directory: %s", + ft->ft_name); + r = -1; + } else if (ft->ft_ino != sb.st_ino || ft->ft_dev != sb.st_dev) { + if (!in_sig) + paxwarn(1, "Directory vanished before restoring" + " mode and times: %s", ft->ft_name); + r = -1; + } else { + /* Whew, it's a match! Is there anything to change? */ + if (do_mode && (mode & ABITS) != (sb.st_mode & ABITS)) + fset_pmode(ft->ft_name, fd, mode); + if (((force_times || patime) && + timespeccmp(&ft->ft_atim, &sb.st_atim, !=)) || + ((force_times || pmtime) && + timespeccmp(&ft->ft_mtim, &sb.st_mtim, !=))) + fset_ftime(ft->ft_name, fd, &ft->ft_mtim, + &ft->ft_atim, force_times); + r = 0; + } + close(fd); + + return (r); +} + + +/* + * file_write() + * Write/copy a file (during copy or archive extract). This routine knows + * how to copy files with lseek holes in it. (Which are read as file + * blocks containing all 0's but do not have any file blocks associated + * with the data). Typical examples of these are files created by dbm + * variants (.pag files). While the file size of these files are huge, the + * actual storage is quite small (the files are sparse). The problem is + * the holes read as all zeros so are probably stored on the archive that + * way (there is no way to determine if the file block is really a hole, + * we only know that a file block of all zero's can be a hole). + * At this writing, no major archive format knows how to archive files + * with holes. However, on extraction (or during copy, -rw) we have to + * deal with these files. Without detecting the holes, the files can + * consume a lot of file space if just written to disk. This replacement + * for write when passed the basic allocation size of a file system block, + * uses lseek whenever it detects the input data is all 0 within that + * file block. In more detail, the strategy is as follows: + * While the input is all zero keep doing an lseek. Keep track of when we + * pass over file block boundaries. Only write when we hit a non zero + * input. once we have written a file block, we continue to write it to + * the end (we stop looking at the input). When we reach the start of the + * next file block, start checking for zero blocks again. Working on file + * block boundaries significantly reduces the overhead when copying files + * that are NOT very sparse. This overhead (when compared to a write) is + * almost below the measurement resolution on many systems. Without it, + * files with holes cannot be safely copied. It does has a side effect as + * it can put holes into files that did not have them before, but that is + * not a problem since the file contents are unchanged (in fact it saves + * file space). (Except on paging files for diskless clients. But since we + * cannot determine one of those file from here, we ignore them). If this + * ever ends up on a system where CTG files are supported and the holes + * are not desired, just do a conditional test in those routines that + * call file_write() and have it call write() instead. BEFORE CLOSING THE + * FILE, make sure to call file_flush() when the last write finishes with + * an empty block. A lot of file systems will not create an lseek hole at + * the end. In this case we drop a single 0 at the end to force the + * trailing 0's in the file. + * ---Parameters--- + * rem: how many bytes left in this file system block + * isempt: have we written to the file block yet (is it empty) + * sz: basic file block allocation size + * cnt: number of bytes on this write + * str: buffer to write + * Return: + * number of bytes written, -1 on write (or lseek) error. + */ + +int +file_write(int fd, char *str, int cnt, int *rem, int *isempt, int sz, + char *name) +{ + char *pt; + char *end; + int wcnt; + char *st = str; + + /* + * while we have data to process + */ + while (cnt) { + if (!*rem) { + /* + * We are now at the start of file system block again + * (or what we think one is...). start looking for + * empty blocks again + */ + *isempt = 1; + *rem = sz; + } + + /* + * only examine up to the end of the current file block or + * remaining characters to write, whatever is smaller + */ + wcnt = MINIMUM(cnt, *rem); + cnt -= wcnt; + *rem -= wcnt; + if (*isempt) { + /* + * have not written to this block yet, so we keep + * looking for zero's + */ + pt = st; + end = st + wcnt; + + /* + * look for a zero filled buffer + */ + while ((pt < end) && (*pt == '\0')) + ++pt; + + if (pt == end) { + /* + * skip, buf is empty so far + */ + if (fd > -1 && + lseek(fd, wcnt, SEEK_CUR) < 0) { + syswarn(1,errno,"File seek on %s", + name); + return(-1); + } + st = pt; + continue; + } + /* + * drat, the buf is not zero filled + */ + *isempt = 0; + } + + /* + * have non-zero data in this file system block, have to write + */ + if (write(fd, st, wcnt) != wcnt) { + syswarn(1, errno, "Failed write to file %s", name); + return(-1); + } + st += wcnt; + } + return(st - str); +} + +/* + * file_flush() + * when the last file block in a file is zero, many file systems will not + * let us create a hole at the end. To get the last block with zeros, we + * write the last BYTE with a zero (back up one byte and write a zero). + */ + +void +file_flush(int fd, char *fname, int isempt) +{ + static char blnk[] = "\0"; + + /* + * silly test, but make sure we are only called when the last block is + * filled with all zeros. + */ + if (!isempt) + return; + + /* + * move back one byte and write a zero + */ + if (lseek(fd, -1, SEEK_CUR) < 0) { + syswarn(1, errno, "Failed seek on file %s", fname); + return; + } + + if (write(fd, blnk, 1) == -1) + syswarn(1, errno, "Failed write to file %s", fname); +} + +/* + * rdfile_close() + * close a file we have been reading (to copy or archive). If we have to + * reset access time (tflag) do so (the times are stored in arcn). + */ + +void +rdfile_close(ARCHD *arcn, int *fd) +{ + /* + * make sure the file is open + */ + if (*fd < 0) + return; + + /* + * user wants last access time reset + */ + if (tflag) + fset_ftime(arcn->org_name, *fd, &arcn->sb.st_mtim, + &arcn->sb.st_atim, 1); + + (void)close(*fd); + *fd = -1; +} + +/* + * set_crc() + * read a file to calculate its crc. This is a real drag. Archive formats + * that have this, end up reading the file twice (we have to write the + * header WITH the crc before writing the file contents. Oh well... + * Return: + * 0 if was able to calculate the crc, -1 otherwise + */ + +int +set_crc(ARCHD *arcn, int fd) +{ + int i; + int res; + off_t cpcnt = 0; + size_t size; + u_int32_t crc = 0; + char tbuf[FILEBLK]; + struct stat sb; + + if (fd < 0) { + /* + * hmm, no fd, should never happen. well no crc then. + */ + arcn->crc = 0; + return(0); + } + + if ((size = arcn->sb.st_blksize) > sizeof(tbuf)) + size = sizeof(tbuf); + + /* + * read all the bytes we think that there are in the file. If the user + * is trying to archive an active file, forget this file. + */ + for (;;) { + if ((res = read(fd, tbuf, size)) <= 0) + break; + cpcnt += res; + for (i = 0; i < res; ++i) + crc += (tbuf[i] & 0xff); + } + + /* + * safety check. we want to avoid archiving files that are active as + * they can create inconsistent archive copies. + */ + if (cpcnt != arcn->sb.st_size) + paxwarn(1, "File changed size %s", arcn->org_name); + else if (fstat(fd, &sb) == -1) + syswarn(1, errno, "Failed stat on %s", arcn->org_name); + else if (timespeccmp(&arcn->sb.st_mtim, &sb.st_mtim, !=)) + paxwarn(1, "File %s was modified during read", arcn->org_name); + else if (lseek(fd, 0, SEEK_SET) < 0) + syswarn(1, errno, "File rewind failed on: %s", arcn->org_name); + else { + arcn->crc = crc; + return(0); + } + return(-1); +} diff --git a/bin/pax/ftree.c b/bin/pax/ftree.c new file mode 100644 index 0000000..b780dbb --- /dev/null +++ b/bin/pax/ftree.c @@ -0,0 +1,566 @@ +/* $OpenBSD: ftree.c,v 1.42 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: ftree.c,v 1.4 1995/03/21 09:07:21 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fts.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * Data structure used to store the file args to be handed to fts(). + * It keeps track of which args generated a "selected" member. + */ +typedef struct ftree { + char *fname; /* file tree name */ + int refcnt; /* has tree had a selected file? */ + int newercnt; /* skipped due to -u/-D */ + int chflg; /* change directory flag */ + struct ftree *fow; /* pointer to next entry on list */ +} FTREE; + + +/* + * routines to interface with the fts library function. + * + * file args supplied to pax are stored on a single linked list (of type FTREE) + * and given to fts to be processed one at a time. pax "selects" files from + * the expansion of each arg into the corresponding file tree (if the arg is a + * directory, otherwise the node itself is just passed to pax). The selection + * is modified by the -n and -u flags. The user is informed when a specific + * file arg does not generate any selected files. -n keeps expanding the file + * tree arg until one of its files is selected, then skips to the next file + * arg. when the user does not supply the file trees as command line args to + * pax, they are read from stdin + */ + +static FTS *ftsp = NULL; /* current FTS handle */ +static int ftsopts; /* options to be used on fts_open */ +static char *farray[2]; /* array for passing each arg to fts */ +static FTREE *fthead = NULL; /* head of linked list of file args */ +static FTREE *fttail = NULL; /* tail of linked list of file args */ +static FTREE *ftcur = NULL; /* current file arg being processed */ +static FTSENT *ftent = NULL; /* current file tree entry */ +static int ftree_skip; /* when set skip to next file arg */ + +static int ftree_arg(void); +static char *getpathname(char *, int); + +/* + * ftree_start() + * initialize the options passed to fts_open() during this run of pax + * options are based on the selection of pax options by the user + * fts_start() also calls fts_arg() to open the first valid file arg. We + * also attempt to reset directory access times when -t (tflag) is set. + * Return: + * 0 if there is at least one valid file arg to process, -1 otherwise + */ + +int +ftree_start(void) +{ + /* + * set up the operation mode of fts, open the first file arg. We must + * use FTS_NOCHDIR, as the user may have to open multiple archives and + * if fts did a chdir off into the boondocks, we may create an archive + * volume in an place where the user did not expect to. + */ + ftsopts = FTS_NOCHDIR; + + /* + * optional user flags that effect file traversal + * -H command line symlink follow only (half follow) + * -L follow sylinks (logical) + * -P do not follow sylinks (physical). This is the default. + * -X do not cross over mount points + * -t preserve access times on files read. + * -n select only the first member of a file tree when a match is found + * -d do not extract subtrees rooted at a directory arg. + */ + if (Lflag) + ftsopts |= FTS_LOGICAL; + else + ftsopts |= FTS_PHYSICAL; + if (Hflag) + ftsopts |= FTS_COMFOLLOW; + if (Xflag) + ftsopts |= FTS_XDEV; + + if ((fthead == NULL) && ((farray[0] = malloc(PAXPATHLEN+2)) == NULL)) { + paxwarn(1, "Unable to allocate memory for file name buffer"); + return(-1); + } + + if (ftree_arg() < 0) + return(-1); + if (tflag && (atdir_start() < 0)) + return(-1); + return(0); +} + +/* + * ftree_add() + * add the arg to the linked list of files to process. Each will be + * processed by fts one at a time + * Return: + * 0 if added to the linked list, -1 if failed + */ + +int +ftree_add(char *str, int chflg) +{ + FTREE *ft; + int len; + + /* + * simple check for bad args + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(0, "Invalid file name argument"); + return(-1); + } + + /* + * allocate FTREE node and add to the end of the linked list (args are + * processed in the same order they were passed to pax). Get rid of any + * trailing / the user may pass us. (watch out for / by itself). + */ + if ((ft = malloc(sizeof(FTREE))) == NULL) { + paxwarn(0, "Unable to allocate memory for filename"); + return(-1); + } + + if (((len = strlen(str) - 1) > 0) && (str[len] == '/')) + str[len] = '\0'; + ft->fname = str; + ft->refcnt = 0; + ft->newercnt = 0; + ft->chflg = chflg; + ft->fow = NULL; + if (fthead == NULL) { + fttail = fthead = ft; + return(0); + } + fttail->fow = ft; + fttail = ft; + return(0); +} + +/* + * ftree_sel() + * this entry has been selected by pax. bump up reference count and handle + * -n and -d processing. + */ + +void +ftree_sel(ARCHD *arcn) +{ + /* + * set reference bit for this pattern. This linked list is only used + * when file trees are supplied pax as args. The list is not used when + * the trees are read from stdin. + */ + if (ftcur != NULL) + ftcur->refcnt = 1; + + /* + * if -n we are done with this arg, force a skip to the next arg when + * pax asks for the next file in next_file(). + * if -d we tell fts only to match the directory (if the arg is a dir) + * and not the entire file tree rooted at that point. + */ + if (nflag) + ftree_skip = 1; + + if (!dflag || (arcn->type != PAX_DIR)) + return; + + if (ftent != NULL) + (void)fts_set(ftsp, ftent, FTS_SKIP); +} + +/* + * ftree_skipped_newer() + * file has been skipped because a newer file exists and -u/-D given + */ + +void +ftree_skipped_newer(ARCHD *arcn) +{ + /* skipped due to -u/-D, mark accordingly */ + if (ftcur != NULL) + ftcur->newercnt = 1; +} + +/* + * ftree_chk() + * called at end on pax execution. Prints all those file args that did not + * have a selected member (reference count still 0) + */ + +void +ftree_chk(void) +{ + FTREE *ft; + int wban = 0; + + /* + * make sure all dir access times were reset. + */ + if (tflag) + atdir_end(); + + /* + * walk down list and check reference count. Print out those members + * that never had a match + */ + for (ft = fthead; ft != NULL; ft = ft->fow) { + if ((ft->refcnt > 0) || ft->newercnt > 0 || ft->chflg) + continue; + if (wban == 0) { + paxwarn(1,"WARNING! These file names were not selected:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", ft->fname); + } +} + +/* + * ftree_arg() + * Get the next file arg for fts to process. Can be from either the linked + * list or read from stdin when the user did not them as args to pax. Each + * arg is processed until the first successful fts_open(). + * Return: + * 0 when the next arg is ready to go, -1 if out of file args (or EOF on + * stdin). + */ + +static int +ftree_arg(void) +{ + + /* + * close off the current file tree + */ + if (ftsp != NULL) { + (void)fts_close(ftsp); + ftsp = NULL; + } + + /* + * keep looping until we get a valid file tree to process. Stop when we + * reach the end of the list (or get an eof on stdin) + */ + for (;;) { + if (fthead == NULL) { + /* + * the user didn't supply any args, get the file trees + * to process from stdin; + */ + if (getpathname(farray[0], PAXPATHLEN+1) == NULL) + return(-1); + } else { + /* + * the user supplied the file args as arguments to pax + */ + if (ftcur == NULL) + ftcur = fthead; + else if ((ftcur = ftcur->fow) == NULL) + return(-1); + if (ftcur->chflg) { + /* First fchdir() back... */ + if (fchdir(cwdfd) == -1) { + syswarn(1, errno, + "Can't fchdir to starting directory"); + return(-1); + } + if (chdir(ftcur->fname) == -1) { + syswarn(1, errno, "Can't chdir to %s", + ftcur->fname); + return(-1); + } + continue; + } else + farray[0] = ftcur->fname; + } + + /* + * watch it, fts wants the file arg stored in a array of char + * ptrs, with the last one a null. we use a two element array + * and set farray[0] to point at the buffer with the file name + * in it. We cannot pass all the file args to fts at one shot + * as we need to keep a handle on which file arg generates what + * files (the -n and -d flags need this). If the open is + * successful, return a 0. + */ + if ((ftsp = fts_open(farray, ftsopts, NULL)) != NULL) + break; + } + return(0); +} + +/* + * next_file() + * supplies the next file to process in the supplied archd structure. + * Return: + * 0 when contents of arcn have been set with the next file, -1 when done. + */ + +int +next_file(ARCHD *arcn) +{ + int cnt; + + /* + * ftree_sel() might have set the ftree_skip flag if the user has the + * -n option and a file was selected from this file arg tree. (-n says + * only one member is matched for each pattern) ftree_skip being 1 + * forces us to go to the next arg now. + */ + if (ftree_skip) { + /* + * clear and go to next arg + */ + ftree_skip = 0; + if (ftree_arg() < 0) + return(-1); + } + + /* + * loop until we get a valid file to process + */ + for (;;) { + if ((ftent = fts_read(ftsp)) == NULL) { + if (errno) + syswarn(1, errno, "next_file"); + /* + * out of files in this tree, go to next arg, if none + * we are done + */ + if (ftree_arg() < 0) + return(-1); + continue; + } + + /* + * handle each type of fts_read() flag + */ + switch (ftent->fts_info) { + case FTS_D: + case FTS_DEFAULT: + case FTS_F: + case FTS_SL: + case FTS_SLNONE: + /* + * these are all ok + */ + break; + case FTS_DP: + /* + * already saw this directory. If the user wants file + * access times reset, we use this to restore the + * access time for this directory since this is the + * last time we will see it in this file subtree + * remember to force the time (this is -t on a read + * directory, not a created directory). + */ + if (!tflag) + continue; + do_atdir(ftent->fts_path, ftent->fts_statp->st_dev, + ftent->fts_statp->st_ino); + continue; + case FTS_DC: + /* + * fts claims a file system cycle + */ + paxwarn(1,"File system cycle found at %s",ftent->fts_path); + continue; + case FTS_DNR: + syswarn(1, ftent->fts_errno, + "Unable to read directory %s", ftent->fts_path); + continue; + case FTS_ERR: + syswarn(1, ftent->fts_errno, + "File system traversal error"); + continue; + case FTS_NS: + case FTS_NSOK: + syswarn(1, ftent->fts_errno, + "Unable to access %s", ftent->fts_path); + continue; + } + + /* + * ok got a file tree node to process. copy info into arcn + * structure (initialize as required) + */ + arcn->skip = 0; + arcn->pad = 0; + arcn->ln_nlen = 0; + arcn->ln_name[0] = '\0'; + memcpy(&arcn->sb, ftent->fts_statp, sizeof(arcn->sb)); + + /* + * file type based set up and copy into the arcn struct + * SIDE NOTE: + * we try to reset the access time on all files and directories + * we may read when the -t flag is specified. files are reset + * when we close them after copying. we reset the directories + * when we are done with their file tree (we also clean up at + * end in case we cut short a file tree traversal). However + * there is no way to reset access times on symlinks. + */ + switch (S_IFMT & arcn->sb.st_mode) { + case S_IFDIR: + arcn->type = PAX_DIR; + if (!tflag) + break; + add_atdir(ftent->fts_path, arcn->sb.st_dev, + arcn->sb.st_ino, &arcn->sb.st_mtim, + &arcn->sb.st_atim); + break; + case S_IFCHR: + arcn->type = PAX_CHR; + break; + case S_IFBLK: + arcn->type = PAX_BLK; + break; + case S_IFREG: + /* + * only regular files with have data to store on the + * archive. all others will store a zero length skip. + * the skip field is used by pax for actual data it has + * to read (or skip over). + */ + arcn->type = PAX_REG; + arcn->skip = arcn->sb.st_size; + break; + case S_IFLNK: + arcn->type = PAX_SLK; + /* + * have to read the symlink path from the file + */ + if ((cnt = readlink(ftent->fts_path, arcn->ln_name, + PAXPATHLEN)) == -1) { + syswarn(1, errno, "Unable to read symlink %s", + ftent->fts_path); + continue; + } + /* + * set link name length, watch out readlink does not + * NUL terminate the link path + */ + arcn->ln_name[cnt] = '\0'; + arcn->ln_nlen = cnt; + break; + case S_IFSOCK: + /* + * under BSD storing a socket is senseless but we will + * let the format specific write function make the + * decision of what to do with it. + */ + arcn->type = PAX_SCK; + break; + case S_IFIFO: + arcn->type = PAX_FIF; + break; + } + break; + } + + /* + * copy file name, set file name length + */ + arcn->nlen = strlcpy(arcn->name, ftent->fts_path, sizeof(arcn->name)); + if ((size_t)arcn->nlen >= sizeof(arcn->name)) + arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ + arcn->org_name = ftent->fts_path; + return(0); +} + +/* + * getpathname() + * Reads a pathname from stdin, handling NUL- or newline-termination. + * Return: + * NULL at end of file, otherwise the NUL-terminated buffer. + */ + +static char * +getpathname(char *buf, int buflen) +{ + char *bp, *ep; + int ch, term; + + if (zeroflag) { + /* + * Read a NUL-terminated pathname, being especially + * paranoid about proper termination and pathname length. + */ + for (bp = buf, ep = buf + buflen; bp < ep; bp++) { + if ((ch = getchar()) == EOF) { + if (bp != buf) + paxwarn(1, "Ignoring unterminated " + "pathname at EOF"); + return(NULL); + } + if ((*bp = ch) == '\0') + return(buf); + } + /* Too long - skip this path */ + *--bp = '\0'; + term = '\0'; + } else { + if (fgets(buf, buflen, stdin) == NULL) + return(NULL); + if ((bp = strchr(buf, '\n')) != NULL || feof(stdin)) { + if (bp != NULL) + *bp = '\0'; + return(buf); + } + /* Too long - skip this path */ + term = '\n'; + } + while ((ch = getchar()) != term && ch != EOF) + continue; + paxwarn(1, "Ignoring too-long pathname: %s", buf); + return(NULL); +} diff --git a/bin/pax/gen_subs.c b/bin/pax/gen_subs.c new file mode 100644 index 0000000..7eb8200 --- /dev/null +++ b/bin/pax/gen_subs.c @@ -0,0 +1,401 @@ +/* $OpenBSD: gen_subs.c,v 1.32 2016/08/26 05:06:14 guenther Exp $ */ +/* $NetBSD: gen_subs.c,v 1.5 1995/03/21 09:07:26 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <grp.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <utmp.h> +#include <vis.h> +#ifndef major +#include <sys/sysmacros.h> +#endif + +#include "pax.h" +#include "extern.h" + +/* + * a collection of general purpose subroutines used by pax + */ + +/* + * constants used by ls_list() when printing out archive members + */ +#define MODELEN 20 +#define DATELEN 64 +#define SECSPERDAY (24 * 60 * 60) +#define SIXMONTHS (SECSPERDAY * 365 / 2) +#define CURFRMT "%b %e %H:%M" +#define OLDFRMT "%b %e %Y" +#define NAME_WIDTH 8 +#define TIMEFMT(t, now) \ + (((t) + SIXMONTHS <= (now) || (t) > (now)) ? OLDFRMT : CURFRMT) + +/* + * ls_list() + * list the members of an archive in ls format + */ + +void +ls_list(ARCHD *arcn, time_t now, FILE *fp) +{ + struct stat *sbp; + char f_mode[MODELEN]; + char f_date[DATELEN]; + int term; + + term = zeroflag ? '\0' : '\n'; /* path termination character */ + + /* + * if not verbose, just print the file name + */ + if (!vflag) { + if (zeroflag) + (void)fputs(arcn->name, fp); + else + safe_print(arcn->name, fp); + (void)putc(term, fp); + (void)fflush(fp); + return; + } + + /* + * user wants long mode + */ + sbp = &(arcn->sb); + strmode(sbp->st_mode, f_mode); + + /* + * print file mode, link count, uid, gid and time + */ + if (strftime(f_date, sizeof(f_date), TIMEFMT(sbp->st_mtime, now), + localtime(&(sbp->st_mtime))) == 0) + f_date[0] = '\0'; + (void)fprintf(fp, "%s%2u %-*.*s %-*.*s ", f_mode, (unsigned)sbp->st_nlink, + NAME_WIDTH, UT_NAMESIZE, user_from_uid(sbp->st_uid, 0), + NAME_WIDTH, UT_NAMESIZE, group_from_gid(sbp->st_gid, 0)); + + /* + * print device id's for devices, or sizes for other nodes + */ + if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK)) + (void)fprintf(fp, "%4lu, %4lu ", + (unsigned long)MAJOR(sbp->st_rdev), + (unsigned long)MINOR(sbp->st_rdev)); + else { + (void)fprintf(fp, "%9zu ", sbp->st_size); + } + + /* + * print name and link info for hard and soft links + */ + (void)fputs(f_date, fp); + (void)putc(' ', fp); + safe_print(arcn->name, fp); + if (PAX_IS_HARDLINK(arcn->type)) { + fputs(" == ", fp); + safe_print(arcn->ln_name, fp); + } else if (arcn->type == PAX_SLK) { + fputs(" -> ", fp); + safe_print(arcn->ln_name, fp); + } + (void)putc(term, fp); + (void)fflush(fp); +} + +/* + * tty_ls() + * print a short summary of file to tty. + */ + +void +ls_tty(ARCHD *arcn) +{ + char f_date[DATELEN]; + char f_mode[MODELEN]; + time_t now = time(NULL); + + /* + * convert time to string, and print + */ + if (strftime(f_date, DATELEN, TIMEFMT(arcn->sb.st_mtime, now), + localtime(&(arcn->sb.st_mtime))) == 0) + f_date[0] = '\0'; + strmode(arcn->sb.st_mode, f_mode); + tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name); +} + +void +safe_print(const char *str, FILE *fp) +{ + char visbuf[5]; + const char *cp; + + /* + * if printing to a tty, use vis(3) to print special characters. + */ + if (isatty(fileno(fp))) { + for (cp = str; *cp; cp++) { + (void)vis(visbuf, cp[0], VIS_CSTYLE, cp[1]); + (void)fputs(visbuf, fp); + } + } else { + (void)fputs(str, fp); + } +} + +/* + * asc_ul() + * convert hex/octal character string into a u_long. We do not have to + * check for overflow! (the headers in all supported formats are not large + * enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * unsigned long value + */ + +u_long +asc_ul(char *str, int len, int base) +{ + char *stop; + u_long tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * ul_asc() + * convert an unsigned long into an hex/oct ascii string. pads with LEADING + * ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +ul_asc(u_long val, char *str, int len, int base) +{ + char *pt; + u_long digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + val >>= 4; + if (val == 0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * asc_ull() + * Convert hex/octal character string into a unsigned long long. + * We do not have to check for overflow! (The headers in all + * supported formats are not large enough to create an overflow). + * NOTE: strings passed to us are NOT TERMINATED. + * Return: + * unsigned long long value + */ + +unsigned long long +asc_ull(char *str, int len, int base) +{ + char *stop; + unsigned long long tval = 0; + + stop = str + len; + + /* + * skip over leading blanks and zeros + */ + while ((str < stop) && ((*str == ' ') || (*str == '0'))) + ++str; + + /* + * for each valid digit, shift running value (tval) over to next digit + * and add next digit + */ + if (base == HEX) { + while (str < stop) { + if ((*str >= '0') && (*str <= '9')) + tval = (tval << 4) + (*str++ - '0'); + else if ((*str >= 'A') && (*str <= 'F')) + tval = (tval << 4) + 10 + (*str++ - 'A'); + else if ((*str >= 'a') && (*str <= 'f')) + tval = (tval << 4) + 10 + (*str++ - 'a'); + else + break; + } + } else { + while ((str < stop) && (*str >= '0') && (*str <= '7')) + tval = (tval << 3) + (*str++ - '0'); + } + return(tval); +} + +/* + * ull_asc() + * Convert an unsigned long long into a hex/oct ascii string. + * Pads with LEADING ascii 0's to fill string completely + * NOTE: the string created is NOT TERMINATED. + */ + +int +ull_asc(unsigned long long val, char *str, int len, int base) +{ + char *pt; + unsigned long long digit; + + /* + * WARNING str is not '\0' terminated by this routine + */ + pt = str + len - 1; + + /* + * do a tailwise conversion (start at right most end of string to place + * least significant digit). Keep shifting until conversion value goes + * to zero (all digits were converted) + */ + if (base == HEX) { + while (pt >= str) { + if ((digit = (val & 0xf)) < 10) + *pt-- = '0' + (char)digit; + else + *pt-- = 'a' + (char)(digit - 10); + val >>= 4; + if (val == 0) + break; + } + } else { + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + } + + /* + * pad with leading ascii ZEROS. We return -1 if we ran out of space. + */ + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * Copy at max min(bufz, fieldsz) chars from field to buf, stopping + * at the first NUL char. NUL terminate buf if there is room left. + */ +size_t +fieldcpy(char *buf, size_t bufsz, const char *field, size_t fieldsz) +{ + char *p = buf; + const char *q = field; + size_t i = 0; + + if (fieldsz > bufsz) + fieldsz = bufsz; + while (i < fieldsz && *q != '\0') { + *p++ = *q++; + i++; + } + if (i < bufsz) + *p = '\0'; + return(i); +} diff --git a/bin/pax/getoldopt.c b/bin/pax/getoldopt.c new file mode 100644 index 0000000..8ceb189 --- /dev/null +++ b/bin/pax/getoldopt.c @@ -0,0 +1,69 @@ +/* $OpenBSD: getoldopt.c,v 1.9 2009/10/27 23:59:22 deraadt Exp $ */ +/* $NetBSD: getoldopt.c,v 1.3 1995/03/21 09:07:28 cgd Exp $ */ + +/* + * Plug-compatible replacement for getopt() for parsing tar-like + * arguments. If the first argument begins with "-", it uses getopt; + * otherwise, it uses the old rules used by tar, dump, and ps. + * + * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu) and placed + * in the Public Domain for your edification and enjoyment. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include "pax.h" +#include "extern.h" + +int +getoldopt(int argc, char **argv, const char *optstring) +{ + static char *key; /* Points to next keyletter */ + static char use_getopt; /* !=0 if argv[1][0] was '-' */ + char c; + char *place; + + optarg = NULL; + + if (key == NULL) { /* First time */ + if (argc < 2) + return (-1); + key = argv[1]; + if (*key == '-') + use_getopt++; + else + optind = 2; + } + + if (use_getopt) + return (getopt(argc, argv, optstring)); + + c = *key++; + if (c == '\0') { + key--; + return (-1); + } + place = strchr(optstring, c); + + if (place == NULL || c == ':') { + fprintf(stderr, "%s: unknown option %c\n", argv[0], c); + return ('?'); + } + + place++; + if (*place == ':') { + if (optind < argc) { + optarg = argv[optind]; + optind++; + } else { + fprintf(stderr, "%s: %c argument missing\n", + argv[0], c); + return ('?'); + } + } + + return (c); +} diff --git a/bin/pax/options.c b/bin/pax/options.c new file mode 100644 index 0000000..917414c --- /dev/null +++ b/bin/pax/options.c @@ -0,0 +1,1788 @@ +/* $OpenBSD: options.c,v 1.103 2019/11/15 20:34:17 naddy Exp $ */ +/* $NetBSD: options.c,v 1.6 1996/03/26 23:54:18 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <limits.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "cpio.h" +#include "tar.h" +#include "extern.h" + +/* + * argv[0] names. Used for tar and cpio emulation + */ + +#define NM_TAR "tar" +#define NM_CPIO "cpio" +#define NM_PAX "pax" + +/* + * Constants used to specify the legal sets of flags in pax. For each major + * operation mode of pax, a set of illegal flags is defined. If any one of + * those illegal flags are found set, we scream and exit + */ + +/* + * flags (one for each option). + */ +#define AF 0x00000001 +#define BF 0x00000002 +#define CF 0x00000004 +#define DF 0x00000008 +#define FF 0x00000010 +#define IF 0x00000020 +#define KF 0x00000040 +#define LF 0x00000080 +#define NF 0x00000100 +#define OF 0x00000200 +#define PF 0x00000400 +#define RF 0x00000800 +#define SF 0x00001000 +#define TF 0x00002000 +#define UF 0x00004000 +#define VF 0x00008000 +#define WF 0x00010000 +#define XF 0x00020000 +#define CBF 0x00040000 /* nonstandard extension */ +#define CDF 0x00080000 /* nonstandard extension */ +#define CEF 0x00100000 /* nonstandard extension */ +#define CGF 0x00200000 /* nonstandard extension */ +#define CHF 0x00400000 /* nonstandard extension */ +#define CLF 0x00800000 /* nonstandard extension */ +#define CPF 0x01000000 /* nonstandard extension */ +#define CTF 0x02000000 /* nonstandard extension */ +#define CUF 0x04000000 /* nonstandard extension */ +#define CXF 0x08000000 +#define CYF 0x10000000 /* nonstandard extension */ +#define CZF 0x20000000 /* nonstandard extension */ +#define C0F 0x40000000 /* nonstandard extension */ + +/* + * ascii string indexed by bit position above (alter the above and you must + * alter this string) used to tell the user what flags caused us to complain + */ +#define FLGCH "abcdfiklnoprstuvwxBDEGHLPTUXYZ0" + +/* + * legal pax operation bit patterns + */ + +#define ISLIST(x) (((x) & (RF|WF)) == 0) +#define ISEXTRACT(x) (((x) & (RF|WF)) == RF) +#define ISARCHIVE(x) (((x) & (AF|RF|WF)) == WF) +#define ISAPPND(x) (((x) & (AF|RF|WF)) == (AF|WF)) +#define ISCOPY(x) (((x) & (RF|WF)) == (RF|WF)) +#define ISWRITE(x) (((x) & (RF|WF)) == WF) + +/* + * Illegal option flag subsets based on pax operation + */ + +#define BDEXTR (AF|BF|LF|TF|WF|XF|CBF|CHF|CLF|CPF|CXF) +#define BDARCH (CF|KF|LF|NF|PF|RF|CDF|CEF|CYF|CZF) +#define BDCOPY (AF|BF|FF|OF|XF|CBF|CEF) +#define BDLIST (AF|BF|IF|KF|LF|OF|PF|RF|TF|UF|WF|XF|CBF|CDF|CHF|CLF|CPF|CXF|CYF|CZF) + + +/* + * Routines which handle command line options + */ + +static char flgch[] = FLGCH; /* list of all possible flags */ +static OPLIST *ophead = NULL; /* head for format specific options -x */ +static OPLIST *optail = NULL; /* option tail */ + +static int no_op(void); +static void printflg(unsigned int); +static off_t str_offt(char *); +static char *get_line(FILE *fp); +static void pax_options(int, char **); +static void pax_usage(void); +static void tar_options(int, char **); +static void tar_usage(void); +#ifndef NOCPIO +static void cpio_options(int, char **); +static void cpio_usage(void); +#endif + +static int compress_id(char *_blk, int _size); +static int gzip_id(char *_blk, int _size); +static int bzip2_id(char *_blk, int _size); +static int xz_id(char *_blk, int _size); + +#define GZIP_CMD "gzip" /* command to run as gzip */ +#define COMPRESS_CMD "compress" /* command to run as compress */ +#define BZIP2_CMD "bzip2" /* command to run as bzip2 */ +#define XZ_CMD "xz" /* command to run as xz */ + +/* + * Format specific routine table + * (see pax.h for description of each function) + * + * name, blksz, hdsz, udev, hlk, blkagn, inhead, id, st_read, + * read, end_read, st_write, write, end_write, trail, + * rd_data, wr_data, options + */ + +FSUB fsub[] = { +#ifdef NOCPIO +/* 0: OLD BINARY CPIO */ + { }, +/* 1: OLD OCTAL CHARACTER CPIO */ + { }, +/* 2: SVR4 HEX CPIO */ + { }, +/* 3: SVR4 HEX CPIO WITH CRC */ + { }, +#else +/* 0: OLD BINARY CPIO */ + {"bcpio", 5120, sizeof(HD_BCPIO), 1, 0, 0, 1, bcpio_id, cpio_strd, + bcpio_rd, bcpio_endrd, cpio_stwr, bcpio_wr, cpio_endwr, cpio_trail, + bad_opt}, + +/* 1: OLD OCTAL CHARACTER CPIO */ + {"cpio", 5120, sizeof(HD_CPIO), 1, 0, 0, 1, cpio_id, cpio_strd, + cpio_rd, cpio_endrd, cpio_stwr, cpio_wr, cpio_endwr, cpio_trail, + bad_opt}, + +/* 2: SVR4 HEX CPIO */ + {"sv4cpio", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, vcpio_id, cpio_strd, + vcpio_rd, vcpio_endrd, cpio_stwr, vcpio_wr, cpio_endwr, cpio_trail, + bad_opt}, + +/* 3: SVR4 HEX CPIO WITH CRC */ + {"sv4crc", 5120, sizeof(HD_VCPIO), 1, 0, 0, 1, crc_id, crc_strd, + vcpio_rd, vcpio_endrd, crc_stwr, vcpio_wr, cpio_endwr, cpio_trail, + bad_opt}, +#endif +/* 4: OLD TAR */ + {"tar", 10240, BLKMULT, 0, 1, BLKMULT, 0, tar_id, no_op, + tar_rd, tar_endrd, no_op, tar_wr, tar_endwr, tar_trail, + tar_opt}, + +/* 5: POSIX USTAR */ + {"ustar", 10240, BLKMULT, 0, 1, BLKMULT, 0, ustar_id, no_op, + ustar_rd, tar_endrd, no_op, ustar_wr, tar_endwr, tar_trail, + tar_opt}, + +#ifdef SMALL +/* 6: compress, to detect failure to use -Z */ + { }, +/* 7: xz, to detect failure to decompress it */ + { }, +/* 8: bzip2, to detect failure to use -j */ + { }, +/* 9: gzip, to detect failure to use -z */ + { }, +#else +/* 6: compress, to detect failure to use -Z */ + {NULL, 0, 4, 0, 0, 0, 0, compress_id}, +/* 7: xz, to detect failure to decompress it */ + {NULL, 0, 4, 0, 0, 0, 0, xz_id}, +/* 8: bzip2, to detect failure to use -j */ + {NULL, 0, 4, 0, 0, 0, 0, bzip2_id}, +/* 9: gzip, to detect failure to use -z */ + {NULL, 0, 4, 0, 0, 0, 0, gzip_id}, +#endif +}; +#define F_OCPIO 0 /* format when called as cpio -6 */ +#define F_ACPIO 1 /* format when called as cpio -c */ +#define F_CPIO 3 /* format when called as cpio */ +#define F_OTAR 4 /* format when called as tar -o */ +#define F_TAR 5 /* format when called as tar */ +#define DEFLT 5 /* default write format from list above */ + +/* + * ford is the archive search order used by get_arc() to determine what kind + * of archive we are dealing with. This helps to properly id archive formats + * some formats may be subsets of others.... + */ +int ford[] = {5, 4, 9, 8, 7, 6, 3, 2, 1, 0, -1}; + +/* + * Do we have -C anywhere and what is it? + */ +int havechd = 0; +char *chdname = NULL; + +/* + * options() + * figure out if we are pax, tar or cpio. Call the appropriate options + * parser + */ + +void +options(int argc, char **argv) +{ + extern char *__progname; + + /* + * Are we acting like pax, tar or cpio (based on argv[0]) + */ + argv0 = __progname; + + if (strcmp(NM_TAR, argv0) == 0) { + op_mode = OP_TAR; + tar_options(argc, argv); + return; + } +#ifndef NOCPIO + else if (strcmp(NM_CPIO, argv0) == 0) { + op_mode = OP_CPIO; + cpio_options(argc, argv); + return; + } +#endif /* !NOCPIO */ + /* + * assume pax as the default + */ + argv0 = NM_PAX; + op_mode = OP_PAX; + pax_options(argc, argv); +} + +/* + * pax_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +pax_options(int argc, char **argv) +{ + int c; + unsigned i; + unsigned int flg = 0; + unsigned int bflg = 0; + const char *errstr; + char *pt; + + /* + * process option flags + */ + while ((c=getopt(argc,argv,"ab:cdf:ijklno:p:rs:tuvwx:zB:DE:G:HJLOPT:U:XYZ0")) + != -1) { + switch (c) { + case 'a': + /* + * append + */ + flg |= AF; + break; + case 'b': + /* + * specify blocksize + */ + flg |= BF; + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid block size %s", optarg); + pax_usage(); + } + break; + case 'c': + /* + * inverse match on patterns + */ + cflag = 1; + flg |= CF; + break; + case 'd': + /* + * match only dir on extract, not the subtree at dir + */ + dflag = 1; + flg |= DF; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + flg |= FF; + break; + case 'i': + /* + * interactive file rename + */ + iflag = 1; + flg |= IF; + break; + case 'j': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'k': + /* + * do not clobber files that exist + */ + kflag = 1; + flg |= KF; + break; + case 'l': + /* + * try to link src to dest with copy (-rw) + */ + lflag = 1; + flg |= LF; + break; + case 'n': + /* + * select first match for a pattern only + */ + nflag = 1; + flg |= NF; + break; + case 'o': + /* + * pass format specific options + */ + flg |= OF; + if (opt_add(optarg) < 0) + pax_usage(); + break; + case 'p': + /* + * specify file characteristic options + */ + for (pt = optarg; *pt != '\0'; ++pt) { + switch (*pt) { + case 'a': + /* + * do not preserve access time + */ + patime = 0; + break; + case 'e': + /* + * preserve user id, group id, file + * mode, access/modification times + */ + pids = 1; + pmode = 1; + patime = 1; + pmtime = 1; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'o': + /* + * preserve uid/gid + */ + pids = 1; + break; + case 'p': + /* + * preserve file mode bits + */ + pmode = 1; + break; + default: + paxwarn(1, "Invalid -p string: %c", *pt); + pax_usage(); + break; + } + } + flg |= PF; + break; + case 'r': + /* + * read the archive + */ + flg |= RF; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= SF; + break; + case 't': + /* + * preserve access time on filesystem nodes we read + */ + tflag = 1; + flg |= TF; + break; + case 'u': + /* + * ignore those older files + */ + uflag = 1; + flg |= UF; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + flg |= VF; + break; + case 'w': + /* + * write an archive + */ + flg |= WF; + break; + case 'x': + /* + * specify an archive format on write + */ + for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i) + if (fsub[i].name != NULL && + strcmp(fsub[i].name, optarg) == 0) + break; + if (i < sizeof(fsub)/sizeof(FSUB)) { + frmt = &fsub[i]; + flg |= XF; + break; + } + paxwarn(1, "Unknown -x format: %s", optarg); + (void)fputs("pax: Known -x formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + if (fsub[i].name != NULL) + (void)fprintf(stderr, " %s", + fsub[i].name); + (void)fputs("\n\n", stderr); + pax_usage(); + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * non-standard option on number of bytes written on a + * single archive volume. + */ + if ((wrlimit = str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid write limit %s", optarg); + pax_usage(); + } + if (wrlimit % BLKMULT) { + paxwarn(1, "Write limit is not a %d byte multiple", + BLKMULT); + pax_usage(); + } + flg |= CBF; + break; + case 'D': + /* + * On extraction check file inode change time before the + * modification of the file name. Non standard option. + */ + Dflag = 1; + flg |= CDF; + break; + case 'E': + /* + * non-standard limit on read faults + * 0 indicates stop after first error, values + * indicate a limit + */ + flg |= CEF; + maxflt = strtonum(optarg, 0, INT_MAX, &errstr); + if (errstr) { + paxwarn(1, "Error count value: %s", errstr); + pax_usage(); + } + break; + case 'G': + /* + * non-standard option for selecting files within an + * archive by group (gid or name) + */ + if (grp_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CGF; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + flg |= CHF; + break; + case 'J': + /* + * use xz. Non standard option. + */ + gzip_program = XZ_CMD; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + flg |= CLF; + break; + case 'O': + /* + * Force one volume. Non standard option. + */ + force_one_volume = 1; + break; + case 'P': + /* + * do NOT follow symlinks (default) + */ + Lflag = 0; + flg |= CPF; + break; + case 'T': + /* + * non-standard option for selecting files within an + * archive by modification time range (lower,upper) + */ + if (trng_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CTF; + break; + case 'U': + /* + * non-standard option for selecting files within an + * archive by user (uid or name) + */ + if (usr_add(optarg) < 0) { + pax_usage(); + break; + } + flg |= CUF; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + flg |= CXF; + break; + case 'Y': + /* + * On extraction check file inode change time after the + * modification of the file name. Non standard option. + */ + Yflag = 1; + flg |= CYF; + break; + case 'Z': + /* + * On extraction check modification time after the + * modification of the file name. Non standard option. + */ + Zflag = 1; + flg |= CZF; + break; + case '0': + /* + * Use \0 as pathname terminator. + * (For use with the -print0 option of find(1).) + */ + zeroflag = 1; + flg |= C0F; + break; + default: + pax_usage(); + break; + } + } + + /* + * figure out the operation mode of pax read,write,extract,copy,append + * or list. check that we have not been given a bogus set of flags + * for the operation mode. + */ + if (ISLIST(flg)) { + act = LIST; + listf = stdout; + bflg = flg & BDLIST; + } else if (ISEXTRACT(flg)) { + act = EXTRACT; + bflg = flg & BDEXTR; + } else if (ISARCHIVE(flg)) { + act = ARCHIVE; + bflg = flg & BDARCH; + } else if (ISAPPND(flg)) { + act = APPND; + bflg = flg & BDARCH; + } else if (ISCOPY(flg)) { + act = COPY; + bflg = flg & BDCOPY; + } else + pax_usage(); + if (bflg) { + printflg(flg); + pax_usage(); + } + + /* + * if we are writing (ARCHIVE) we use the default format if the user + * did not specify a format. when we write during an APPEND, we will + * adopt the format of the existing archive if none was supplied. + */ + if (!(flg & XF) && (act == ARCHIVE)) + frmt = &(fsub[DEFLT]); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + for (; optind < argc; optind++) + if (pat_add(argv[optind], NULL) < 0) + pax_usage(); + break; + case COPY: + if (optind >= argc) { + paxwarn(0, "Destination directory was not supplied"); + pax_usage(); + } + --argc; + dirptr = argv[argc]; + /* FALL THROUGH */ + case ARCHIVE: + case APPND: + for (; optind < argc; optind++) + if (ftree_add(argv[optind], 0) < 0) + pax_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + + +/* + * tar_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +tar_options(int argc, char **argv) +{ + int c; + int Oflag = 0; + int nincfiles = 0; + int incfiles_max = 0; + struct incfile { + char *file; + char *dir; + }; + struct incfile *incfiles = NULL; + + /* + * Set default values. + */ + rmleadslash = 1; + + /* + * process option flags + */ + while ((c = getoldopt(argc, argv, + "b:cef:hjmopqruts:vwxzBC:HI:JLNOPXZ014578")) != -1) { + switch (c) { + case 'b': + /* + * specify blocksize in 512-byte blocks + */ + if ((wrblksz = (int)str_offt(optarg)) <= 0) { + paxwarn(1, "Invalid block size %s", optarg); + tar_usage(); + } + wrblksz *= 512; /* XXX - check for int oflow */ + break; + case 'c': + /* + * create an archive + */ + act = ARCHIVE; + break; + case 'e': + /* + * stop after first error + */ + maxflt = 0; + break; + case 'f': + /* + * filename where the archive is stored + */ + arcname = optarg; + break; + case 'h': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'j': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'm': + /* + * do not preserve modification time + */ + pmtime = 0; + break; + case 'O': + Oflag = 1; + break; + case 'o': + Oflag = 2; + tar_nodir = 1; + break; + case 'p': + /* + * preserve uid/gid and file mode, regardless of umask + */ + pmode = 1; + pids = 1; + break; + case 'q': + /* + * select first match for a pattern only + */ + nflag = 1; + break; + case 'r': + case 'u': + /* + * append to the archive + */ + act = APPND; + break; + case 's': + /* + * file name substitution name pattern + */ + if (rep_add(optarg) < 0) { + tar_usage(); + break; + } + break; + case 't': + /* + * list contents of the tape + */ + act = LIST; + break; + case 'v': + /* + * verbose operation mode + */ + vflag++; + break; + case 'w': + /* + * interactive file rename + */ + iflag = 1; + break; + case 'x': + /* + * extract an archive, preserving mode, + * and mtime if possible. + */ + act = EXTRACT; + pmtime = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'B': + /* + * Nothing to do here, this is pax default + */ + break; + case 'C': + havechd++; + chdname = optarg; + break; + case 'H': + /* + * follow command line symlinks only + */ + Hflag = 1; + break; + case 'I': + if (++nincfiles > incfiles_max) { + size_t n = nincfiles + 3; + struct incfile *p; + + p = reallocarray(incfiles, n, + sizeof(*incfiles)); + if (p == NULL) { + paxwarn(0, "Unable to allocate space " + "for option list"); + exit(1); + } + incfiles = p; + incfiles_max = n; + } + incfiles[nincfiles - 1].file = optarg; + incfiles[nincfiles - 1].dir = chdname; + break; + case 'J': + /* + * use xz. Non standard option. + */ + gzip_program = XZ_CMD; + break; + case 'L': + /* + * follow symlinks + */ + Lflag = 1; + break; + case 'N': + /* numeric uid and gid only */ + Nflag = 1; + break; + case 'P': + /* + * do not remove leading '/' from pathnames + */ + rmleadslash = 0; + break; + case 'X': + /* + * do not pass over mount points in the file system + */ + Xflag = 1; + break; + case 'Z': + /* + * use compress. + */ + gzip_program = COMPRESS_CMD; + break; + case '0': + arcname = DEV_0; + break; + case '1': + arcname = DEV_1; + break; + case '4': + arcname = DEV_4; + break; + case '5': + arcname = DEV_5; + break; + case '7': + arcname = DEV_7; + break; + case '8': + arcname = DEV_8; + break; + default: + tar_usage(); + break; + } + } + argc -= optind; + argv += optind; + + if ((arcname == NULL) || (*arcname == '\0')) { + arcname = getenv("TAPE"); + if ((arcname == NULL) || (*arcname == '\0')) + arcname = "-"; + } + if ((arcname[0] == '-') && (arcname[1]== '\0')) + arcname = NULL; + + /* + * Traditional tar behaviour: list-like output goes to stdout unless + * writing the archive there. (pax uses stderr unless in list mode) + */ + if (act == LIST || act == EXTRACT || arcname != NULL) + listf = stdout; + + /* Traditional tar behaviour (pax wants to read file list from stdin) */ + if ((act == ARCHIVE || act == APPND) && argc == 0 && nincfiles == 0) + exit(0); + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case LIST: + case EXTRACT: + default: + { + int sawpat = 0; + char *file, *dir; + + while (nincfiles || *argv != NULL) { + /* + * If we queued up any include files, + * pull them in now. Otherwise, check + * for -I and -C positional flags. + * Anything else must be a file to + * extract. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = chdname; + } else + file = NULL; + if (file != NULL) { + FILE *fp; + char *str; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + syswarn(1, errno, + "Unable to open %s", file); + tar_usage(); + } + while ((str = get_line(fp)) != NULL) { + if (pat_add(str, dir) < 0) + tar_usage(); + sawpat = 1; + } + if (ferror(fp)) { + syswarn(1, errno, + "Unable to read from %s", + strcmp(file, "-") ? file : + "stdin"); + tar_usage(); + } + if (strcmp(file, "-") != 0) + fclose(fp); + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + chdname = *argv++; + havechd++; + } else if (pat_add(*argv++, chdname) < 0) + tar_usage(); + else + sawpat = 1; + } + /* + * if patterns were added, we are doing chdir() + * on a file-by-file basis, else, just one + * global chdir (if any) after opening input. + */ + if (sawpat > 0) + chdname = NULL; + } + break; + case ARCHIVE: + case APPND: + frmt = &(fsub[Oflag ? F_OTAR : F_TAR]); + + if (chdname != NULL) { /* initial chdir() */ + if (ftree_add(chdname, 1) < 0) + tar_usage(); + } + + while (nincfiles || *argv != NULL) { + char *file, *dir; + + /* + * If we queued up any include files, pull them in + * now. Otherwise, check for -I and -C positional + * flags. Anything else must be a file to include + * in the archive. + */ + if (nincfiles) { + file = incfiles->file; + dir = incfiles->dir; + incfiles++; + nincfiles--; + } else if (strcmp(*argv, "-I") == 0) { + if (*++argv == NULL) + break; + file = *argv++; + dir = NULL; + } else + file = NULL; + if (file != NULL) { + FILE *fp; + char *str; + + /* Set directory if needed */ + if (dir) { + if (ftree_add(dir, 1) < 0) + tar_usage(); + } + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) { + syswarn(1, errno, "Unable to open %s", + file); + tar_usage(); + } + while ((str = get_line(fp)) != NULL) { + if (ftree_add(str, 0) < 0) + tar_usage(); + } + if (ferror(fp)) { + syswarn(1, errno, + "Unable to read from %s", + strcmp(file, "-") ? file : "stdin"); + tar_usage(); + } + if (strcmp(file, "-") != 0) + fclose(fp); + } else if (strcmp(*argv, "-C") == 0) { + if (*++argv == NULL) + break; + if (ftree_add(*argv++, 1) < 0) + tar_usage(); + havechd++; + } else if (ftree_add(*argv++, 0) < 0) + tar_usage(); + } + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + break; + } +} + +int mkpath(char *); + +int +mkpath(path) + char *path; +{ + struct stat sb; + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (stat(path, &sb)) { + if (errno != ENOENT || mkdir(path, 0777)) { + paxwarn(1, "%s", path); + return (-1); + } + } else if (!S_ISDIR(sb.st_mode)) { + syswarn(1, ENOTDIR, "%s", path); + return (-1); + } + + if (!done) + *slash = '/'; + } + + return (0); +} + +#ifndef NOCPIO +/* + * cpio_options() + * look at the user specified flags. set globals as required and check if + * the user specified a legal set of flags. If not, complain and exit + */ + +static void +cpio_options(int argc, char **argv) +{ + const char *errstr; + int c, list_only = 0; + unsigned i; + char *str; + FILE *fp; + + kflag = 1; + pids = 1; + pmode = 1; + pmtime = 0; + arcname = NULL; + dflag = 1; + act = -1; + nodirs = 1; + while ((c=getopt(argc,argv,"abcdfijklmoprstuvzABC:E:F:H:I:JLO:SZ6")) != -1) + switch (c) { + case 'a': + /* + * preserve access time on files read + */ + tflag = 1; + break; + case 'b': + /* + * swap bytes and half-words when reading data + */ + break; + case 'c': + /* + * ASCII cpio header + */ + frmt = &(fsub[F_ACPIO]); + break; + case 'd': + /* + * create directories as needed + */ + nodirs = 0; + break; + case 'f': + /* + * invert meaning of pattern list + */ + cflag = 1; + break; + case 'i': + /* + * restore an archive + */ + act = EXTRACT; + break; + case 'j': + /* + * use bzip2. Non standard option. + */ + gzip_program = BZIP2_CMD; + break; + case 'k': + break; + case 'l': + /* + * use links instead of copies when possible + */ + lflag = 1; + break; + case 'm': + /* + * preserve modification time + */ + pmtime = 1; + break; + case 'o': + /* + * create an archive + */ + act = ARCHIVE; + if (frmt == NULL) + frmt = &(fsub[F_CPIO]); + break; + case 'p': + /* + * copy-pass mode + */ + act = COPY; + break; + case 'r': + /* + * interactively rename files + */ + iflag = 1; + break; + case 's': + /* + * swap bytes after reading data + */ + break; + case 't': + /* + * list contents of archive + */ + list_only = 1; + break; + case 'u': + /* + * replace newer files + */ + kflag = 0; + break; + case 'v': + /* + * verbose operation mode + */ + vflag = 1; + break; + case 'z': + /* + * use gzip. Non standard option. + */ + gzip_program = GZIP_CMD; + break; + case 'A': + /* + * append mode + */ + act = APPND; + break; + case 'B': + /* + * Use 5120 byte block size + */ + wrblksz = 5120; + break; + case 'C': + /* + * set block size in bytes + */ + wrblksz = strtonum(optarg, 0, INT_MAX, &errstr); + if (errstr) { + paxwarn(1, "Invalid block size %s: %s", + optarg, errstr); + pax_usage(); + } + break; + case 'E': + /* + * file with patterns to extract or list + */ + if ((fp = fopen(optarg, "r")) == NULL) { + syswarn(1, errno, "Unable to open %s", + optarg); + cpio_usage(); + } + while ((str = get_line(fp)) != NULL) { + pat_add(str, NULL); + } + if (ferror(fp)) { + syswarn(1, errno, + "Unable to read from %s", optarg); + cpio_usage(); + } + fclose(fp); + break; + case 'F': + case 'I': + case 'O': + /* + * filename where the archive is stored + */ + if ((optarg[0] == '-') && (optarg[1]== '\0')) { + /* + * treat a - as stdin + */ + arcname = NULL; + break; + } + arcname = optarg; + break; + case 'H': + /* + * specify an archive format on write + */ + for (i = 0; i < sizeof(fsub)/sizeof(FSUB); ++i) + if (fsub[i].name != NULL && + strcmp(fsub[i].name, optarg) == 0) + break; + if (i < sizeof(fsub)/sizeof(FSUB)) { + frmt = &fsub[i]; + break; + } + paxwarn(1, "Unknown -H format: %s", optarg); + (void)fputs("cpio: Known -H formats are:", stderr); + for (i = 0; i < (sizeof(fsub)/sizeof(FSUB)); ++i) + if (fsub[i].name != NULL) + (void)fprintf(stderr, " %s", + fsub[i].name); + (void)fputs("\n\n", stderr); + cpio_usage(); + break; + case 'J': + /* + * use xz. Non standard option. + */ + gzip_program = XZ_CMD; + break; + case 'L': + /* + * follow symbolic links + */ + Lflag = 1; + break; + case 'S': + /* + * swap halfwords after reading data + */ + break; + case 'Z': + /* + * use compress. Non standard option. + */ + gzip_program = COMPRESS_CMD; + break; + case '6': + /* + * process Version 6 cpio format + */ + frmt = &(fsub[F_OCPIO]); + break; + case '?': + default: + cpio_usage(); + break; + } + argc -= optind; + argv += optind; + + /* + * process the args as they are interpreted by the operation mode + */ + switch (act) { + case EXTRACT: + if (list_only) { + act = LIST; + + /* + * cpio is like pax: list to stderr + * unless in list mode + */ + listf = stdout; + } + while (*argv != NULL) + if (pat_add(*argv++, NULL) < 0) + cpio_usage(); + break; + case COPY: + if (*argv == NULL) { + paxwarn(0, "Destination directory was not supplied"); + cpio_usage(); + } + dirptr = *argv; + if (mkpath(dirptr) < 0) + cpio_usage(); + --argc; + ++argv; + /* FALL THROUGH */ + case ARCHIVE: + case APPND: + if (*argv != NULL) + cpio_usage(); + /* + * no read errors allowed on updates/append operation! + */ + maxflt = 0; + while ((str = get_line(stdin)) != NULL) { + ftree_add(str, 0); + } + if (ferror(stdin)) { + syswarn(1, errno, "Unable to read from %s", + "stdin"); + cpio_usage(); + } + break; + default: + cpio_usage(); + break; + } +} +#endif /* !NOCPIO */ + +/* + * printflg() + * print out those invalid flag sets found to the user + */ + +static void +printflg(unsigned int flg) +{ + int nxt; + int pos = 0; + + (void)fprintf(stderr,"%s: Invalid combination of options:", argv0); + while ((nxt = ffs(flg)) != 0) { + flg >>= nxt; + pos += nxt; + (void)fprintf(stderr, " -%c", flgch[pos-1]); + } + (void)putc('\n', stderr); +} + +/* + * opt_next() + * called by format specific options routines to get each format specific + * flag and value specified with -o + * Return: + * pointer to next OPLIST entry or NULL (end of list). + */ + +OPLIST * +opt_next(void) +{ + OPLIST *opt; + + if ((opt = ophead) != NULL) + ophead = ophead->fow; + return(opt); +} + +/* + * bad_opt() + * generic routine used to complain about a format specific options + * when the format does not support options. + */ + +int +bad_opt(void) +{ + OPLIST *opt; + + if (ophead == NULL) + return(0); + /* + * print all we were given + */ + paxwarn(1,"These format options are not supported"); + while ((opt = opt_next()) != NULL) + (void)fprintf(stderr, "\t%s = %s\n", opt->name, opt->value); + pax_usage(); + return(0); +} + +/* + * opt_add() + * breaks the value supplied to -o into a option name and value. options + * are given to -o in the form -o name-value,name=value + * multiple -o may be specified. + * Return: + * 0 if format in name=value format, -1 if -o is passed junk + */ + +int +opt_add(const char *str) +{ + OPLIST *opt; + char *frpt; + char *pt; + char *endpt; + char *dstr; + + if ((str == NULL) || (*str == '\0')) { + paxwarn(0, "Invalid option name"); + return(-1); + } + if ((dstr = strdup(str)) == NULL) { + paxwarn(0, "Unable to allocate space for option list"); + return(-1); + } + frpt = endpt = dstr; + + /* + * break into name and values pieces and stuff each one into a + * OPLIST structure. When we know the format, the format specific + * option function will go through this list + */ + while ((frpt != NULL) && (*frpt != '\0')) { + if ((endpt = strchr(frpt, ',')) != NULL) + *endpt = '\0'; + if ((pt = strchr(frpt, '=')) == NULL) { + paxwarn(0, "Invalid options format"); + free(dstr); + return(-1); + } + if ((opt = malloc(sizeof(OPLIST))) == NULL) { + paxwarn(0, "Unable to allocate space for option list"); + free(dstr); + return(-1); + } + dstr = NULL; /* parts of string going onto the OPLIST */ + *pt++ = '\0'; + opt->name = frpt; + opt->value = pt; + opt->fow = NULL; + if (endpt != NULL) + frpt = endpt + 1; + else + frpt = NULL; + if (ophead == NULL) { + optail = ophead = opt; + continue; + } + optail->fow = opt; + optail = opt; + } + free(dstr); + return(0); +} + +/* + * str_offt() + * Convert an expression of the following forms to an off_t > 0. + * 1) A positive decimal number. + * 2) A positive decimal number followed by a b (mult by 512). + * 3) A positive decimal number followed by a k (mult by 1024). + * 4) A positive decimal number followed by a m (mult by 512). + * 5) A positive decimal number followed by a w (mult by sizeof int) + * 6) Two or more positive decimal numbers (with/without k,b or w). + * separated by x (also * for backwards compatibility), specifying + * the product of the indicated values. + * Return: + * 0 for an error, a positive value o.w. + */ + +static off_t +str_offt(char *val) +{ + char *expr; + off_t num, t; + + num = strtoll(val, &expr, 0); + if ((num == LLONG_MAX) || (num <= 0) || (expr == val)) + return(0); + + switch (*expr) { + case 'b': + t = num; + num *= 512; + if (t > num) + return(0); + ++expr; + break; + case 'k': + t = num; + num *= 1024; + if (t > num) + return(0); + ++expr; + break; + case 'm': + t = num; + num *= 1048576; + if (t > num) + return(0); + ++expr; + break; + case 'w': + t = num; + num *= sizeof(int); + if (t > num) + return(0); + ++expr; + break; + } + + switch (*expr) { + case '\0': + break; + case '*': + case 'x': + t = num; + num *= str_offt(expr + 1); + if (t > num) + return(0); + break; + default: + return(0); + } + return(num); +} + +char * +get_line(FILE *f) +{ + char *str = NULL; + size_t size = 0; + ssize_t len; + + do { + len = getline(&str, &size, f); + if (len == -1) { + free(str); + return NULL; + } + if (str[len - 1] == '\n') + str[len - 1] = '\0'; + } while (str[0] == '\0'); + return str; +} + +/* + * no_op() + * for those option functions where the archive format has nothing to do. + * Return: + * 0 + */ + +static int +no_op(void) +{ + return(0); +} + +/* + * pax_usage() + * print the usage summary to the user + */ + +void +pax_usage(void) +{ + (void)fputs( + "usage: pax [-0cdjnOvz] [-E limit] [-f archive] [-G group] [-s replstr]\n" + " [-T range] [-U user] [pattern ...]\n" + " pax -r [-0cDdijknOuvYZz] [-E limit] [-f archive] [-G group] [-o options]\n" + " [-p string] [-s replstr] [-T range] [-U user] [pattern ...]\n" + " pax -w [-0adHijLOPtuvXz] [-B bytes] [-b blocksize] [-f archive]\n" + " [-G group] [-o options] [-s replstr] [-T range] [-U user]\n" + " [-x format] [file ...]\n" + " pax -rw [-0DdHikLlnOPtuvXYZ] [-G group] [-p string] [-s replstr]\n" + " [-T range] [-U user] [file ...] directory\n", + stderr); + exit(1); +} + +/* + * tar_usage() + * print the usage summary to the user + */ + +void +tar_usage(void) +{ + (void)fputs( + "usage: tar {crtux}[014578befHhjLmNOoPpqsvwXZz]\n" + " [blocking-factor | archive | replstr] [-C directory] [-I file]\n" + " [file ...]\n" + " tar {-crtux} [-014578eHhjLmNOoPpqvwXZz] [-b blocking-factor]\n" + " [-C directory] [-f archive] [-I file] [-s replstr] [file ...]\n", + stderr); + exit(1); +} + +#ifndef NOCPIO +/* + * cpio_usage() + * print the usage summary to the user + */ + +void +cpio_usage(void) +{ + (void)fputs( + "usage: cpio -o [-AaBcjLvZz] [-C bytes] [-F archive] [-H format]\n" + " [-O archive] < name-list [> archive]\n" + " cpio -i [-6BbcdfjmrSstuvZz] [-C bytes] [-E file] [-F archive] [-H format]\n" + " [-I archive] [pattern ...] [< archive]\n" + " cpio -p [-adLlmuv] destination-directory < name-list\n", + stderr); + exit(1); +} +#endif /* !NOCPIO */ + +#ifndef SMALL +static int +compress_id(char *blk, int size) +{ + if (size >= 2 && blk[0] == '\037' && blk[1] == '\235') { + paxwarn(0, "input compressed with %s; use the -%c option" + " to decompress it", "compress", 'Z'); + exit(1); + } + return (-1); +} + +static int +gzip_id(char *blk, int size) +{ + if (size >= 2 && blk[0] == '\037' && blk[1] == '\213') { + paxwarn(0, "input compressed with %s; use the -%c option" + " to decompress it", "gzip", 'z'); + exit(1); + } + return (-1); +} + +static int +bzip2_id(char *blk, int size) +{ + if (size >= 3 && blk[0] == 'B' && blk[1] == 'Z' && blk[2] == 'h') { + paxwarn(0, "input compressed with %s; use the -%c option" + " to decompress it", "bzip2", 'j'); + exit(1); + } + return (-1); +} + +static int +xz_id(char *blk, int size) +{ + if (size >= 6 && memcmp(blk, "\xFD\x37\x7A\x58\x5A", 6) == 0) { + paxwarn(0, "input compressed with xz"); + exit(1); + } + return (-1); +} +#endif /* !SMALL */ diff --git a/bin/pax/pat_rep.c b/bin/pax/pat_rep.c new file mode 100644 index 0000000..deddca0 --- /dev/null +++ b/bin/pax/pat_rep.c @@ -0,0 +1,1108 @@ +/* $OpenBSD: pat_rep.c,v 1.43 2017/09/16 07:42:34 otto Exp $ */ +/* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "pax.h" +#include "extern.h" + +/* + * data structure for storing user supplied replacement strings (-s) + */ +typedef struct replace { + char *nstr; /* the new string we will substitute with */ + regex_t rcmp; /* compiled regular expression used to match */ + int flgs; /* print conversions? global in operation? */ +#define PRNT 0x1 +#define GLOB 0x2 + struct replace *fow; /* pointer to next pattern */ +} REPLACE; + +/* + * routines to handle pattern matching, name modification (regular expression + * substitution and interactive renames), and destination name modification for + * copy (-rw). Both file name and link names are adjusted as required in these + * routines. + */ + +#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ +static PATTERN *pathead = NULL; /* file pattern match list head */ +static PATTERN *pattail = NULL; /* file pattern match list tail */ +static REPLACE *rephead = NULL; /* replacement string list head */ +static REPLACE *reptail = NULL; /* replacement string list tail */ + +static int rep_name(char *, size_t, int *, int); +static int tty_rename(ARCHD *); +static int fix_path(char *, int *, char *, int); +static int fn_match(char *, char *, char **); +static char * range_match(char *, int); +static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); + +/* + * rep_add() + * parses the -s replacement string; compiles the regular expression + * and stores the compiled value and it's replacement string together in + * replacement string list. Input to this function is of the form: + * /old/new/pg + * The first char in the string specifies the delimiter used by this + * replacement string. "Old" is a regular expression in "ed" format which + * is compiled by regcomp() and is applied to filenames. "new" is the + * substitution string; p and g are options flags for printing and global + * replacement (over the single filename) + * Return: + * 0 if a proper replacement string and regular expression was added to + * the list of replacement patterns; -1 otherwise. + */ + +int +rep_add(char *str) +{ + char *pt1; + char *pt2; + REPLACE *rep; + int res; + char rebuf[BUFSIZ]; + + /* + * throw out the bad parameters + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty replacement string"); + return(-1); + } + + /* + * first character in the string specifies what the delimiter is for + * this expression + */ + for (pt1 = str+1; *pt1; pt1++) { + if (*pt1 == '\\') { + pt1++; + continue; + } + if (*pt1 == *str) + break; + } + if (*pt1 == '\0') { + paxwarn(1, "Invalid replacement string %s", str); + return(-1); + } + + /* + * allocate space for the node that handles this replacement pattern + * and split out the regular expression and try to compile it + */ + if ((rep = malloc(sizeof(REPLACE))) == NULL) { + paxwarn(1, "Unable to allocate memory for replacement string"); + return(-1); + } + + *pt1 = '\0'; + if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { + regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); + paxwarn(1, "%s while compiling regular expression %s", rebuf, str); + free(rep); + return(-1); + } + + /* + * put the delimiter back in case we need an error message and + * locate the delimiter at the end of the replacement string + * we then point the node at the new substitution string + */ + *pt1++ = *str; + for (pt2 = pt1; *pt2; pt2++) { + if (*pt2 == '\\') { + pt2++; + continue; + } + if (*pt2 == *str) + break; + } + if (*pt2 == '\0') { + regfree(&(rep->rcmp)); + free(rep); + paxwarn(1, "Invalid replacement string %s", str); + return(-1); + } + + *pt2 = '\0'; + rep->nstr = pt1; + pt1 = pt2++; + rep->flgs = 0; + + /* + * set the options if any + */ + while (*pt2 != '\0') { + switch (*pt2) { + case 'g': + case 'G': + rep->flgs |= GLOB; + break; + case 'p': + case 'P': + rep->flgs |= PRNT; + break; + default: + regfree(&(rep->rcmp)); + free(rep); + *pt1 = *str; + paxwarn(1, "Invalid replacement string option %s", str); + return(-1); + } + ++pt2; + } + + /* + * all done, link it in at the end + */ + rep->fow = NULL; + if (rephead == NULL) { + reptail = rephead = rep; + return(0); + } + reptail->fow = rep; + reptail = rep; + return(0); +} + +/* + * pat_add() + * add a pattern match to the pattern match list. Pattern matches are used + * to select which archive members are extracted. (They appear as + * arguments to pax in the list and read modes). If no patterns are + * supplied to pax, all members in the archive will be selected (and the + * pattern match list is empty). + * Return: + * 0 if the pattern was added to the list, -1 otherwise + */ + +int +pat_add(char *str, char *chdirname) +{ + PATTERN *pt; + + /* + * throw out the junk + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty pattern string"); + return(-1); + } + + /* + * allocate space for the pattern and store the pattern. the pattern is + * part of argv so do not bother to copy it, just point at it. Add the + * node to the end of the pattern list + */ + if ((pt = malloc(sizeof(PATTERN))) == NULL) { + paxwarn(1, "Unable to allocate memory for pattern string"); + return(-1); + } + + pt->pstr = str; + pt->pend = NULL; + pt->plen = strlen(str); + pt->fow = NULL; + pt->flgs = 0; + pt->chdname = chdirname; + + if (pathead == NULL) { + pattail = pathead = pt; + return(0); + } + pattail->fow = pt; + pattail = pt; + return(0); +} + +/* + * pat_chk() + * complain if any the user supplied pattern did not result in a match to + * a selected archive member. + */ + +void +pat_chk(void) +{ + PATTERN *pt; + int wban = 0; + + /* + * walk down the list checking the flags to make sure MTCH was set, + * if not complain + */ + for (pt = pathead; pt != NULL; pt = pt->fow) { + if (pt->flgs & MTCH) + continue; + if (!wban) { + paxwarn(1, "WARNING! These patterns were not matched:"); + ++wban; + } + (void)fprintf(stderr, "%s\n", pt->pstr); + } +} + +/* + * pat_sel() + * the archive member which matches a pattern was selected. Mark the + * pattern as having selected an archive member. arcn->pat points at the + * pattern that was matched. arcn->pat is set in pat_match() + * + * NOTE: When the -c option is used, we are called when there was no match + * by pat_match() (that means we did match before the inverted sense of + * the logic). Now this seems really strange at first, but with -c we + * need to keep track of those patterns that cause an archive member to NOT + * be selected (it found an archive member with a specified pattern) + * Return: + * 0 if the pattern pointed at by arcn->pat was tagged as creating a + * match, -1 otherwise. + */ + +int +pat_sel(ARCHD *arcn) +{ + PATTERN *pt; + PATTERN **ppt; + size_t len; + + /* + * if no patterns just return + */ + if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) + return(0); + + /* + * when we are NOT limited to a single match per pattern mark the + * pattern and return + */ + if (!nflag) { + pt->flgs |= MTCH; + return(0); + } + + /* + * we reach this point only when we allow a single selected match per + * pattern, if the pattern matches a directory and we do not have -d + * (dflag) we are done with this pattern. We may also be handed a file + * in the subtree of a directory. in that case when we are operating + * with -d, this pattern was already selected and we are done + */ + if (pt->flgs & DIR_MTCH) + return(0); + + if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { + /* + * ok we matched a directory and we are allowing + * subtree matches but because of the -n only its children will + * match. This is tagged as a DIR_MTCH type. + * WATCH IT, the code assumes that pt->pend points + * into arcn->name and arcn->name has not been modified. + * If not we will have a big mess. Yup this is another kludge + */ + + /* + * if this was a prefix match, remove trailing part of path + * so we can copy it. Future matches will be exact prefix match + */ + if (pt->pend != NULL) + *pt->pend = '\0'; + + if ((pt->pstr = strdup(arcn->name)) == NULL) { + paxwarn(1, "Pattern select out of memory"); + if (pt->pend != NULL) + *pt->pend = '/'; + pt->pend = NULL; + return(-1); + } + + /* + * put the trailing / back in the source string + */ + if (pt->pend != NULL) { + *pt->pend = '/'; + pt->pend = NULL; + } + pt->plen = strlen(pt->pstr); + + /* + * strip off any trailing /, this should really never happen + */ + len = pt->plen - 1; + if (*(pt->pstr + len) == '/') { + *(pt->pstr + len) = '\0'; + pt->plen = len; + } + pt->flgs = DIR_MTCH | MTCH; + arcn->pat = pt; + return(0); + } + + /* + * we are then done with this pattern, so we delete it from the list + * because it can never be used for another match. + * Seems kind of strange to do for a -c, but the pax spec is really + * vague on the interaction of -c, -n and -d. We assume that when -c + * and the pattern rejects a member (i.e. it matched it) it is done. + * In effect we place the order of the flags as having -c last. + */ + pt = pathead; + ppt = &pathead; + while ((pt != NULL) && (pt != arcn->pat)) { + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt == NULL) { + /* + * should never happen.... + */ + paxwarn(1, "Pattern list inconsistent"); + return(-1); + } + *ppt = pt->fow; + free(pt); + arcn->pat = NULL; + return(0); +} + +/* + * pat_match() + * see if this archive member matches any supplied pattern, if a match + * is found, arcn->pat is set to point at the potential pattern. Later if + * this archive member is "selected" we process and mark the pattern as + * one which matched a selected archive member (see pat_sel()) + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + */ + +int +pat_match(ARCHD *arcn) +{ + PATTERN *pt; + + arcn->pat = NULL; + + /* + * if there are no more patterns and we have -n (and not -c) we are + * done. otherwise with no patterns to match, matches all + */ + if (pathead == NULL) { + if (nflag && !cflag) + return(-1); + return(0); + } + + /* + * have to search down the list one at a time looking for a match. + */ + pt = pathead; + while (pt != NULL) { + /* + * check for a file name match unless we have DIR_MTCH set in + * this pattern then we want a prefix match + */ + if (pt->flgs & DIR_MTCH) { + /* + * this pattern was matched before to a directory + * as we must have -n set for this (but not -d). We can + * only match CHILDREN of that directory so we must use + * an exact prefix match (no wildcards). + */ + if ((arcn->name[pt->plen] == '/') && + (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) + break; + } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) + break; + pt = pt->fow; + } + + /* + * return the result, remember that cflag (-c) inverts the sense of a + * match + */ + if (pt == NULL) + return(cflag ? 0 : 1); + + /* + * we had a match, now when we invert the sense (-c) we reject this + * member. However we have to tag the pattern a being successful, (in a + * match, not in selecting a archive member) so we call pat_sel() here. + */ + arcn->pat = pt; + if (!cflag) + return(0); + + if (pat_sel(arcn) < 0) + return(-1); + arcn->pat = NULL; + return(1); +} + +/* + * fn_match() + * Return: + * 0 if this archive member should be processed, 1 if it should be + * skipped and -1 if we are done with all patterns (and pax should quit + * looking for more members) + * Note: *pend may be changed to show where the prefix ends. + */ + +static int +fn_match(char *pattern, char *string, char **pend) +{ + char c; + char test; + + *pend = NULL; + for (;;) { + switch (c = *pattern++) { + case '\0': + /* + * Ok we found an exact match + */ + if (*string == '\0') + return(0); + + /* + * Check if it is a prefix match + */ + if ((dflag == 1) || (*string != '/')) + return(-1); + + /* + * It is a prefix match, remember where the trailing + * / is located + */ + *pend = string; + return(0); + case '?': + if ((test = *string++) == '\0') + return (-1); + break; + case '*': + c = *pattern; + /* + * Collapse multiple *'s. + */ + while (c == '*') + c = *++pattern; + + /* + * Optimized hack for pattern with a * at the end + */ + if (c == '\0') + return (0); + + /* + * General case, use recursion. + */ + while ((test = *string) != '\0') { + if (!fn_match(pattern, string, pend)) + return (0); + ++string; + } + return (-1); + case '[': + /* + * range match + */ + if (((test = *string++) == '\0') || + ((pattern = range_match(pattern, test)) == NULL)) + return (-1); + break; + case '\\': + if ((c = *pattern++) == '\0') + return (-1); + /* FALLTHROUGH */ + default: + if (c != *string++) + return (-1); + break; + } + } + /* NOTREACHED */ +} + +static char * +range_match(char *pattern, int test) +{ + char c; + char c2; + int negate; + int ok = 0; + + if ((negate = (*pattern == '!')) != 0) + ++pattern; + + while ((c = *pattern++) != ']') { + /* + * Illegal pattern + */ + if (c == '\0') + return (NULL); + + if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && + (c2 != ']')) { + if ((c <= test) && (test <= c2)) + ok = 1; + pattern += 2; + } else if (c == test) + ok = 1; + } + return (ok == negate ? NULL : pattern); +} + +/* + * has_dotdot() + * Returns true iff the supplied path contains a ".." component. + */ + +int +has_dotdot(const char *path) +{ + const char *p = path; + + while ((p = strstr(p, "..")) != NULL) { + if ((p == path || p[-1] == '/') && + (p[2] == '/' || p[2] == '\0')) + return (1); + p += 2; + } + return (0); +} + +/* + * mod_name() + * modify a selected file name. first attempt to apply replacement string + * expressions, then apply interactive file rename. We apply replacement + * string expressions to both filenames and file links (if we didn't the + * links would point to the wrong place, and we could never be able to + * move an archive that has a file link in it). When we rename files + * interactively, we store that mapping (old name to user input name) so + * if we spot any file links to the old file name in the future, we will + * know exactly how to fix the file link. + * Return: + * 0 continue to process file, 1 skip this file, -1 pax is finished + */ + +int +mod_name(ARCHD *arcn) +{ + int res = 0; + + /* + * Strip off leading '/' if appropriate. + * Currently, this option is only set for the tar format. + */ + while (rmleadslash && arcn->name[0] == '/') { + if (arcn->name[1] == '\0') { + arcn->name[0] = '.'; + } else { + (void)memmove(arcn->name, &arcn->name[1], + strlen(arcn->name)); + arcn->nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + paxwarn(0, "Removing leading / from absolute path names in the archive"); + } + } + while (rmleadslash && arcn->ln_name[0] == '/' && + PAX_IS_HARDLINK(arcn->type)) { + if (arcn->ln_name[1] == '\0') { + arcn->ln_name[0] = '.'; + } else { + (void)memmove(arcn->ln_name, &arcn->ln_name[1], + strlen(arcn->ln_name)); + arcn->ln_nlen--; + } + if (rmleadslash < 2) { + rmleadslash = 2; + paxwarn(0, "Removing leading / from absolute path names in the archive"); + } + } + if (rmleadslash) { + const char *last = NULL; + const char *p = arcn->name; + + while ((p = strstr(p, "..")) != NULL) { + if ((p == arcn->name || p[-1] == '/') && + (p[2] == '/' || p[2] == '\0')) + last = p + 2; + p += 2; + } + if (last != NULL) { + last++; + paxwarn(1, "Removing leading \"%.*s\"", + (int)(last - arcn->name), arcn->name); + arcn->nlen = strlen(last); + if (arcn->nlen > 0) + memmove(arcn->name, last, arcn->nlen + 1); + else { + arcn->name[0] = '.'; + arcn->name[1] = '\0'; + arcn->nlen = 1; + } + } + } + + /* + * IMPORTANT: We have a problem. what do we do with symlinks? + * Modifying a hard link name makes sense, as we know the file it + * points at should have been seen already in the archive (and if it + * wasn't seen because of a read error or a bad archive, we lose + * anyway). But there are no such requirements for symlinks. On one + * hand the symlink that refers to a file in the archive will have to + * be modified to so it will still work at its new location in the + * file system. On the other hand a symlink that points elsewhere (and + * should continue to do so) should not be modified. There is clearly + * no perfect solution here. So we handle them like hardlinks. Clearly + * a replacement made by the interactive rename mapping is very likely + * to be correct since it applies to a single file and is an exact + * match. The regular expression replacements are a little harder to + * justify though. We claim that the symlink name is only likely + * to be replaced when it points within the file tree being moved and + * in that case it should be modified. what we really need to do is to + * call an oracle here. :) + */ + if (rephead != NULL) { + /* + * we have replacement strings, modify the name and the link + * name if any. + */ + if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0) + return(res); + + if (PAX_IS_LINK(arcn->type)) { + if ((res = rep_name(arcn->ln_name, + sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0) + return(res); + } + } + + if (iflag) { + /* + * perform interactive file rename, then map the link if any + */ + if ((res = tty_rename(arcn)) != 0) + return(res); + if (PAX_IS_LINK(arcn->type)) + sub_name(arcn->ln_name, &(arcn->ln_nlen), + sizeof(arcn->ln_name)); + } + return(res); +} + +/* + * tty_rename() + * Prompt the user for a replacement file name. A "." keeps the old name, + * a empty line skips the file, and an EOF on reading the tty, will cause + * pax to stop processing and exit. Otherwise the file name input, replaces + * the old one. + * Return: + * 0 process this file, 1 skip this file, -1 we need to exit pax + */ + +static int +tty_rename(ARCHD *arcn) +{ + char tmpname[PAXPATHLEN+2]; + int res; + + /* + * prompt user for the replacement name for a file, keep trying until + * we get some reasonable input. Archives may have more than one file + * on them with the same name (from updates etc). We print verbose info + * on the file so the user knows what is up. + */ + tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); + + for (;;) { + ls_tty(arcn); + tty_prnt("Input new name, or a \".\" to keep the old name, "); + tty_prnt("or a \"return\" to skip this file.\n"); + tty_prnt("Input > "); + if (tty_read(tmpname, sizeof(tmpname)) < 0) + return(-1); + if (strcmp(tmpname, "..") == 0) { + tty_prnt("Try again, illegal file name: ..\n"); + continue; + } + if (strlen(tmpname) > PAXPATHLEN) { + tty_prnt("Try again, file name too long\n"); + continue; + } + break; + } + + /* + * empty file name, skips this file. a "." leaves it alone + */ + if (tmpname[0] == '\0') { + tty_prnt("Skipping file.\n"); + return(1); + } + if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { + tty_prnt("Processing continues, name unchanged.\n"); + return(0); + } + + /* + * ok the name changed. We may run into links that point at this + * file later. we have to remember where the user sent the file + * in order to repair any links. + */ + tty_prnt("Processing continues, name changed to: %s\n", tmpname); + res = add_name(arcn->name, arcn->nlen, tmpname); + arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); + if ((size_t)arcn->nlen >= sizeof(arcn->name)) + arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ + if (res < 0) + return(-1); + return(0); +} + +/* + * set_dest() + * fix up the file name and the link name (if any) so this file will land + * in the destination directory (used during copy() -rw). + * Return: + * 0 if ok, -1 if failure (name too long) + */ + +int +set_dest(ARCHD *arcn, char *dest_dir, int dir_len) +{ + if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) + return(-1); + + /* + * It is really hard to deal with symlinks here, we cannot be sure + * if the name they point was moved (or will be moved). It is best to + * leave them alone. + */ + if (!PAX_IS_HARDLINK(arcn->type)) + return(0); + + if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) + return(-1); + return(0); +} + +/* + * fix_path + * concatenate dir_name and or_name and store the result in or_name (if + * it fits). This is one ugly function. + * Return: + * 0 if ok, -1 if the final name is too long + */ + +static int +fix_path(char *or_name, int *or_len, char *dir_name, int dir_len) +{ + char *src; + char *dest; + char *start; + int len; + + /* + * we shift the or_name to the right enough to tack in the dir_name + * at the front. We make sure we have enough space for it all before + * we start. since dest always ends in a slash, we skip of or_name + * if it also starts with one. + */ + start = or_name; + src = start + *or_len; + dest = src + dir_len; + if (*start == '/') { + ++start; + --dest; + } + if ((len = dest - or_name) > PAXPATHLEN) { + paxwarn(1, "File name %s/%s, too long", dir_name, start); + return(-1); + } + *or_len = len; + + /* + * enough space, shift + */ + while (src >= start) + *dest-- = *src--; + src = dir_name + dir_len - 1; + + /* + * splice in the destination directory name + */ + while (src >= dir_name) + *dest-- = *src--; + + *(or_name + len) = '\0'; + return(0); +} + +/* + * rep_name() + * walk down the list of replacement strings applying each one in order. + * when we find one with a successful substitution, we modify the name + * as specified. if required, we print the results. if the resulting name + * is empty, we will skip this archive member. We use the regexp(3) + * routines (regexp() ought to win a prize as having the most cryptic + * library function manual page). + * --Parameters-- + * name is the file name we are going to apply the regular expressions to + * (and may be modified) + * nsize is the size of the name buffer. + * nlen is the length of this name (and is modified to hold the length of + * the final string). + * prnt is a flag that says whether to print the final result. + * Return: + * 0 if substitution was successful, 1 if we are to skip the file (the name + * ended up empty) + */ + +static int +rep_name(char *name, size_t nsize, int *nlen, int prnt) +{ + REPLACE *pt; + char *inpt; + char *outpt; + char *endpt; + char *rpt; + int found = 0; + int res; + regmatch_t pm[MAXSUBEXP]; + char nname[PAXPATHLEN+1]; /* final result of all replacements */ + char buf1[PAXPATHLEN+1]; /* where we work on the name */ + + /* + * copy the name into buf1, where we will work on it. We need to keep + * the orig string around so we can print out the result of the final + * replacement. We build up the final result in nname. inpt points at + * the string we apply the regular expression to. prnt is used to + * suppress printing when we handle replacements on the link field + * (the user already saw that substitution go by) + */ + pt = rephead; + (void)strlcpy(buf1, name, sizeof(buf1)); + inpt = buf1; + outpt = nname; + endpt = outpt + PAXPATHLEN; + + /* + * try each replacement string in order + */ + while (pt != NULL) { + do { + char *oinpt = inpt; + /* + * check for a successful substitution, if not go to + * the next pattern, or cleanup if we were global + */ + if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) + break; + + /* + * ok we found one. We have three parts, the prefix + * which did not match, the section that did and the + * tail (that also did not match). Copy the prefix to + * the final output buffer (watching to make sure we + * do not create a string too long). + */ + found = 1; + rpt = inpt + pm[0].rm_so; + + while ((inpt < rpt) && (outpt < endpt)) + *outpt++ = *inpt++; + if (outpt == endpt) + break; + + /* + * for the second part (which matched the regular + * expression) apply the substitution using the + * replacement string and place it the prefix in the + * final output. If we have problems, skip it. + */ + if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt)) + < 0) { + if (prnt) + paxwarn(1, "Replacement name error %s", + name); + return(1); + } + outpt += res; + + /* + * we set up to look again starting at the first + * character in the tail (of the input string right + * after the last character matched by the regular + * expression (inpt always points at the first char in + * the string to process). If we are not doing a global + * substitution, we will use inpt to copy the tail to + * the final result. Make sure we do not overrun the + * output buffer + */ + inpt += pm[0].rm_eo - pm[0].rm_so; + + if ((outpt == endpt) || (*inpt == '\0')) + break; + + /* + * if the user wants global we keep trying to + * substitute until it fails, then we are done. + */ + } while (pt->flgs & GLOB); + + if (found) + break; + + /* + * a successful substitution did NOT occur, try the next one + */ + pt = pt->fow; + } + + if (found) { + /* + * we had a substitution, copy the last tail piece (if there is + * room) to the final result + */ + while ((outpt < endpt) && (*inpt != '\0')) + *outpt++ = *inpt++; + + *outpt = '\0'; + if ((outpt == endpt) && (*inpt != '\0')) { + if (prnt) + paxwarn(1,"Replacement name too long %s >> %s", + name, nname); + return(1); + } + + /* + * inform the user of the result if wanted + */ + if (prnt && (pt->flgs & PRNT)) { + if (*nname == '\0') + (void)fprintf(stderr,"%s >> <empty string>\n", + name); + else + (void)fprintf(stderr,"%s >> %s\n", name, nname); + } + + /* + * if empty inform the caller this file is to be skipped + * otherwise copy the new name over the orig name and return + */ + if (*nname == '\0') + return(1); + *nlen = strlcpy(name, nname, nsize); + } + return(0); +} + +/* + * resub() + * apply the replacement to the matched expression. expand out the old + * style ed(1) subexpression expansion. + * Return: + * -1 if error, or the number of characters added to the destination. + */ + +static int +resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest, + char *destend) +{ + char *spt; + char *dpt; + char c; + regmatch_t *pmpt; + int len; + int subexcnt; + + spt = src; + dpt = dest; + subexcnt = rp->re_nsub; + while ((dpt < destend) && ((c = *spt++) != '\0')) { + /* + * see if we just have an ordinary replacement character + * or we refer to a subexpression. + */ + if (c == '&') { + pmpt = pm; + } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { + /* + * make sure there is a subexpression as specified + */ + if ((len = *spt++ - '0') > subexcnt) + return(-1); + pmpt = pm + len; + } else { + /* + * Ordinary character, just copy it + */ + if ((c == '\\') && (*spt != '\0')) + c = *spt++; + *dpt++ = c; + continue; + } + + /* + * continue if the subexpression is bogus + */ + if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || + ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) + continue; + + /* + * copy the subexpression to the destination. + * fail if we run out of space or the match string is damaged + */ + if (len > (destend - dpt)) + return (-1); + strncpy(dpt, inpt + pmpt->rm_so, len); + dpt += len; + } + return(dpt - dest); +} diff --git a/bin/pax/pax.1 b/bin/pax/pax.1 new file mode 100644 index 0000000..d146a96 --- /dev/null +++ b/bin/pax/pax.1 @@ -0,0 +1,1112 @@ +.\" $OpenBSD: pax.1,v 1.75 2020/01/16 16:46:46 schwarze Exp $ +.\" $NetBSD: pax.1,v 1.3 1995/03/21 09:07:37 cgd Exp $ +.\" +.\" Copyright (c) 1992 Keith Muller. +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Keith Muller of the University of California, San Diego. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)pax.1 8.4 (Berkeley) 4/18/94 +.\" +.Dd $Mdocdate: January 16 2020 $ +.Dt PAX 1 +.Os +.Sh NAME +.Nm pax +.Nd read and write file archives and copy directory hierarchies +.Sh SYNOPSIS +.Nm pax +.Op Fl 0cdjnOvz +.Op Fl E Ar limit +.Op Fl f Ar archive +.Op Fl G Ar group +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Ar pattern ... +.Nm pax +.Fl r +.Op Fl 0cDdijknOuvYZz +.Op Fl E Ar limit +.Op Fl f Ar archive +.Op Fl G Ar group +.Op Fl o Ar options +.Op Fl p Ar string +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Ar pattern ... +.Nm pax +.Fl w +.Op Fl 0adHijLOPtuvXz +.Op Fl B Ar bytes +.Op Fl b Ar blocksize +.Op Fl f Ar archive +.Op Fl G Ar group +.Op Fl o Ar options +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Fl x Ar format +.Op Ar +.Nm pax +.Fl rw +.Op Fl 0DdHijkLlnOPtuvXYZ +.Op Fl G Ar group +.Op Fl p Ar string +.Op Fl s Ar replstr +.Op Fl T Ar range +.Op Fl U Ar user +.Op Ar +.Ar directory +.Sh DESCRIPTION +.Nm +will read, write, and list the members of an archive file +and will copy directory hierarchies. +.Nm +operation is independent of the specific archive format +and supports a wide variety of different archive formats. +A list of supported archive formats can be found under the description of the +.Fl x +option. +.Pp +The presence of the +.Fl r +and the +.Fl w +options specifies which of the following functional modes +.Nm +will operate under: +.Em list , read , write , +and +.Em copy . +.Bl -tag -width 6n +.It Aq none +.Em List . +.Nm +will write to standard output +a table of contents of the members of the archive file read from +standard input, whose pathnames match the specified +.Ar pattern +arguments. +The table of contents contains one filename per line +and is written using single line buffering. +.It Fl r +.Em Read . +.Nm +extracts the members of the archive file read from the standard input, +with pathnames matching the specified +.Ar pattern +arguments. +The archive format and blocking is automatically determined on input. +When an extracted file is a directory, the entire file hierarchy +rooted at that directory is extracted. +All extracted files are created relative to the current file hierarchy. +The setting of ownership, access and modification times, and file mode of +the extracted files are discussed in more detail under the +.Fl p +option. +.It Fl w +.Em Write . +.Nm +writes an archive containing the +.Ar file +operands to standard output +using the specified archive format. +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +standard input. +When a +.Ar file +operand is also a directory, the entire file hierarchy rooted +at that directory will be included. +.It Fl rw +.Em Copy . +.Nm +copies the +.Ar file +operands to the destination +.Ar directory . +When no +.Ar file +operands are specified, a list of files to copy with one per line is read from +the standard input. +When a +.Ar file +operand is also a directory the entire file +hierarchy rooted at that directory will be included. +The effect of the +.Em copy +is as if the copied files were written to an archive file and then +subsequently extracted, except that there may be hard links between +the original and the copied files (see the +.Fl l +option below). +.Pp +.Sy Warning : +The destination +.Ar directory +must not be one of the +.Ar file +operands or a member of a file hierarchy rooted at one of the +.Ar file +operands. +The result of a +.Em copy +under these conditions is unpredictable. +.El +.Pp +While processing a damaged archive during a read or list operation, +.Nm +will attempt to recover from media defects and will search through the archive +to locate and process the largest number of archive members possible (see the +.Fl E +option for more details on error handling). +.Pp +The +.Ar directory +operand specifies a destination directory pathname. +If the +.Ar directory +operand does not exist, or it is not writable by the user, +or it is not of type directory, +.Nm +will exit with a non-zero exit status. +.Pp +The +.Ar pattern +operand is used to select one or more pathnames of archive members. +Archive members are selected using the pattern matching notation described +by +.Xr glob 7 . +When the +.Ar pattern +operand is not supplied, all members of the archive will be selected. +When a +.Ar pattern +matches a directory, the entire file hierarchy rooted at that directory will +be selected. +When a +.Ar pattern +operand does not select at least one archive member, +.Nm +will write these +.Ar pattern +operands in a diagnostic message to standard error +and then exit with a non-zero exit status. +.Pp +The +.Ar file +operand specifies the pathname of a file to be copied or archived. +When a +.Ar file +operand does not select at least one archive member, +.Nm +will write these +.Ar file +operand pathnames in a diagnostic message to standard error +and then exit with a non-zero exit status. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl 0 +Use the NUL +.Pq Ql \e0 +character as a pathname terminator, instead of newline +.Pq Ql \en . +This applies only to the pathnames read from standard input in +the write and copy modes, +and to the pathnames written to standard output in list mode. +This option is expected to be used in concert with the +.Fl print0 +function in +.Xr find 1 +or the +.Fl 0 +flag in +.Xr xargs 1 . +.It Fl a +Append the given +.Ar file +operands +to the end of an archive that was previously written. +If an archive format is not specified with a +.Fl x +option, the format currently being used in the archive will be selected. +Any attempt to append to an archive in a format different from the +format already used in the archive will cause +.Nm +to exit immediately +with a non-zero exit status. +The blocking size used in the archive volume where writing starts +will continue to be used for the remainder of that archive volume. +.Pp +.Sy Warning : +Many storage devices are not able to support the operations necessary +to perform an append operation. +Any attempt to append to an archive stored on such a device may damage the +archive or have other unpredictable results. +Tape drives in particular are more likely to not support an append operation. +An archive stored in a regular file system file or on a disk device will +usually support an append operation. +.It Fl B Ar bytes +Limit the number of bytes written to a single archive volume to +.Ar bytes . +The +.Ar bytes +limit can end with +.Sq Li m , +.Sq Li k , +or +.Sq Li b +to specify multiplication by 1048576 (1M), 1024 (1K) or 512, respectively. +A pair of +.Ar bytes +limits can be separated by +.Sq Li x +to indicate a product. +.Pp +.Sy Warning : +Only use this option when writing an archive to a device which supports +an end of file read condition based on last (or largest) write offset +(such as a regular file or a tape drive). +The use of this option with a floppy or hard disk is not recommended. +.It Fl b Ar blocksize +When writing an archive, +block the output at a positive decimal integer number of +bytes per write to the archive file. +The +.Ar blocksize +must be a multiple of 512 bytes with a maximum of 64512 bytes. +Archive block sizes larger than 32256 bytes violate the POSIX +standard and will not be portable to all systems. +A +.Ar blocksize +can end with +.Sq Li k +or +.Sq Li b +to specify multiplication by 1024 (1K) or 512, respectively. +A pair of blocksizes can be separated by +.Sq Li x +to indicate a product. +A specific archive device may impose additional restrictions on the size +of blocking it will support. +When blocking is not specified, the default +.Ar blocksize +is dependent on the specific archive format being used (see the +.Fl x +option). +.It Fl c +Match all file or archive members +.Em except +those specified by the +.Ar pattern +and +.Ar file +operands. +.It Fl D +This option is the same as the +.Fl u +option, except that the file inode change time is checked instead of the +file modification time. +The file inode change time can be used to select files whose inode information +(e.g., UID, GID, etc.) is newer than a copy of the file in the destination +.Ar directory . +.It Fl d +Cause files of type directory being copied or archived, or archive members of +type directory being extracted, to match only the directory file or archive +member and not the file hierarchy rooted at the directory. +.It Fl E Ar limit +Limit the number of consecutive read faults while trying to read a flawed +archive to +.Ar limit . +With a positive +.Ar limit , +.Nm +will attempt to recover from an archive read error and will +continue processing starting with the next file stored in the archive. +A +.Ar limit +of 0 will cause +.Nm +to stop operation after the first read error is detected on an archive volume. +The default +.Ar limit +is a small positive number of retries. +.It Fl f Ar archive +Specify +.Ar archive +as the pathname of the input or output archive, overriding the default +standard input (for list and read) +or standard output +(for write). +A single archive may span multiple files and different archive devices. +When required, +.Nm +will prompt for the pathname of the file or device of the next volume in the +archive. +.It Fl G Ar group +Select a file based on its +.Ar group +name, or when starting with a +.Cm # , +a numeric GID. +A +.Ql \e +can be used to escape the +.Cm # . +Multiple +.Fl G +options may be supplied and checking stops with the first match. +.It Fl H +Follow only command-line symbolic links while performing a physical file +system traversal. +.It Fl i +Interactively rename files or archive members. +For each archive member matching a +.Ar pattern +operand or each file matching a +.Ar file +operand, +.Nm +will prompt to +.Pa /dev/tty +giving the name of the file, its file mode, and its modification time. +.Nm +will then read a line from +.Pa /dev/tty . +If this line is blank, the file or archive member is skipped. +If this line consists of a single period, the +file or archive member is processed with no modification to its name. +Otherwise, its name is replaced with the contents of the line. +.Nm +will immediately exit with a non-zero exit status if +.Dv EOF +is encountered when reading a response or if +.Pa /dev/tty +cannot be opened for reading and writing. +.It Fl j +Use bzip2 to compress (decompress) the archive while writing (reading). +The bzip2 utility must be installed separately. +Incompatible with +.Fl a . +.It Fl k +Do not overwrite existing files. +.It Fl L +Follow all symbolic links to perform a logical file system traversal. +.It Fl l +(The lowercase letter +.Dq ell . ) +Link files. +In copy mode +.Pq Fl r Fl w , +hard links are made between the source and destination file hierarchies +whenever possible. +.It Fl n +Select the first archive member that matches each +.Ar pattern +operand. +No more than one archive member is matched for each +.Ar pattern . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched (unless +.Fl d +is also specified). +.It Fl O +Force the archive to be one volume. +If a volume ends prematurely, +.Nm +will not prompt for a new volume. +This option can be useful for +automated tasks where error recovery cannot be performed by a human. +.It Fl o Ar options +Information to modify the algorithm for extracting or writing archive files +which is specific to the archive format specified by +.Fl x . +In general, +.Ar options +take the form: +.Ar name Ns = Ns Ar value . +.Pp +The following options are available for the +.Cm ustar +and old +.Bx +.Cm tar +formats: +.Pp +.Bl -tag -width Ds -compact +.It Cm write_opt=nodir +When writing archives, omit the storage of directories. +.El +.It Fl P +Do not follow symbolic links, perform a physical file system traversal. +This is the default mode. +.It Fl p Ar string +Specify one or more file characteristic options (privileges). +The +.Ar string +option-argument is a string specifying file characteristics to be retained or +discarded on extraction. +The string consists of the specification characters +.Cm a , e , m , o , +and +.Cm p . +Multiple characteristics can be concatenated within the same string +and multiple +.Fl p +options can be specified. +The meanings of the specification characters are as follows: +.Bl -tag -width 2n +.It Cm a +Do not preserve file access times. +By default, file access times are preserved whenever possible. +.It Cm e +.Dq Preserve everything , +the user ID, group ID, file mode bits, +file access time, and file modification time. +This is intended to be used by root, +someone with all the appropriate privileges, in order to preserve all +aspects of the files as they are recorded in the archive. +The +.Cm e +flag is the sum of the +.Cm o +and +.Cm p +flags. +.It Cm m +Do not preserve file modification times. +By default, file modification times are preserved whenever possible. +.It Cm o +Preserve the user ID and group ID. +.It Cm p +.Dq Preserve +the file mode bits. +This is intended to be used by a user with regular privileges +who wants to preserve all aspects of the file other than the ownership. +The file times are preserved by default, but two other flags are offered to +disable this and use the time of extraction instead. +.El +.Pp +In the preceding list, +.Sq preserve +indicates that an attribute stored in the archive is given to the +extracted file, subject to the permissions of the invoking +process. +Otherwise the attribute of the extracted file is determined as +part of the normal file creation action. +If neither the +.Cm e +nor the +.Cm o +specification character is specified, or the user ID and group ID are not +preserved for any reason, +.Nm +will not set the +.Dv S_ISUID +(setuid) and +.Dv S_ISGID +(setgid) bits of the file mode. +If the preservation of any of these items fails for any reason, +.Nm +will write a diagnostic message to standard error. +Failure to preserve these items will affect the final exit status, +but will not cause the extracted file to be deleted. +If the file characteristic letters in any of the string option-arguments are +duplicated or conflict with each other, the one(s) given last will take +precedence. +For example, if +.Fl p Ar eme +is specified, file modification times are still preserved. +.It Fl r +Read an archive file from standard input +and extract the specified +.Ar file +operands. +If any intermediate directories are needed in order to extract an archive +member, these directories will be created as if +.Xr mkdir 2 +was called with the bitwise OR of +.Dv S_IRWXU , S_IRWXG , +and +.Dv S_IRWXO +as the mode argument. +When the selected archive format supports the specification of linked +files and these files cannot be linked while the archive is being extracted, +.Nm +will write a diagnostic message to standard error +and exit with a non-zero exit status at the completion of operation. +.It Fl s Ar replstr +Modify the archive member names according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +.Ar file +or +.Ar pattern +arguments may be given to restrict the list of archive members to those +specified. +.Pp +The format of these regular expressions is: +.Pp +.Dl /old/new/[gp] +.Pp +As in +.Xr ed 1 , +.Ar old +is a basic regular expression (see +.Xr re_format 7 ) +and +.Ar new +can contain an ampersand +.Pq Ql & , +.Ql \e Ns Em n +(where +.Em n +is a digit) back-references, +or subexpression matching. +The +.Ar old +string may also contain newline characters. +Any non-null character can be used as a delimiter +.Po +.Ql / +is shown here +.Pc . +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +.Pp +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring, +which starts with the first character following the end of the last successful +substitution. +The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +standard error in the following format: +.Pp +.D1 Em original-pathname No >> Em new-pathname +.Pp +File or archive member names that substitute to the empty string +are not selected and will be skipped. +.It Fl T Ar range +Allow files to be selected based on a file modification or inode change +time falling within the specified time range. +The range has the format: +.Sm off +.Bd -filled -offset indent +.Op Ar from_date +.Op \&, Ar to_date +.Op / Oo Cm c Oc Op Cm m +.Ed +.Sm on +.Pp +The dates specified by +.Ar from_date +to +.Ar to_date +are inclusive. +If only a +.Ar from_date +is supplied, all files with a modification or inode change time +equal to or younger are selected. +If only a +.Ar to_date +is supplied, all files with a modification or inode change time +equal to or older will be selected. +When the +.Ar from_date +is equal to the +.Ar to_date , +only files with a modification or inode change time of exactly that +time will be selected. +.Pp +When +.Nm +is in write or copy mode, the optional trailing field +.Oo Cm c Oc Ns Op Cm m +can be used to determine which file time (inode change, file modification or +both) are used in the comparison. +If neither is specified, the default is to use file modification time only. +The +.Cm m +specifies the comparison of file modification time (the time when +the file was last written). +The +.Cm c +specifies the comparison of inode change time (the time when the file +inode was last changed; e.g., a change of owner, group, mode, etc). +When +.Cm c +and +.Cm m +are both specified, then the modification and inode change times are +both compared. +.Pp +The inode change time comparison is useful in selecting files whose +attributes were recently changed or selecting files which were recently +created and had their modification time reset to an older time (as what +happens when a file is extracted from an archive and the modification time +is preserved). +Time comparisons using both file times is useful when +.Nm +is used to create a time based incremental archive (only files that were +changed during a specified time range will be archived). +.Pp +A time range is made up of six different fields and each field must contain two +digits. +The format is: +.Pp +.Dl [[[[[cc]yy]mm]dd]HH]MM[.SS] +.Pp +Where +.Ar cc +is the first two digits of the year (the century), +.Ar yy +is the last two digits of the year, +the first +.Ar mm +is the month (from 01 to 12), +.Ar dd +is the day of the month (from 01 to 31), +.Ar HH +is the hour of the day (from 00 to 23), +.Ar MM +is the minute (from 00 to 59), +and +.Ar SS +is the seconds (from 00 to 59). +The minute field +.Ar MM +is required, while the other fields are optional and must be added in the +following order: +.Ar HH , dd , mm , +.Ar yy , cc . +.Pp +The +.Ar SS +field may be added independently of the other fields. +Time ranges are relative to the current time, so +.Ic -T 1234/cm +would select all files with a modification or inode change time +of 12:34 PM today or later. +Multiple +.Fl T +time range can be supplied and checking stops with the first match. +.It Fl t +Reset the access times of any file or directory read or accessed by +.Nm +to be the same as they were before being read or accessed by +.Nm pax . +.It Fl U Ar user +Select a file based on its +.Ar user +name, or when starting with a +.Cm # , +a numeric UID. +A +.Ql \e +can be used to escape the +.Cm # . +Multiple +.Fl U +options may be supplied and checking stops with the first match. +.It Fl u +Ignore files that are older (having a less recent file modification time) +than a pre-existing file or archive member with the same name. +During read, +an archive member with the same name as a file in the file system will be +extracted if the archive member is newer than the file. +During write, +a file system member with the same name as an archive member will be +written to the archive if it is newer than the archive member. +During copy, +the file in the destination hierarchy is replaced by the file in the source +hierarchy or by a link to the file in the source hierarchy if the file in +the source hierarchy is newer. +.It Fl v +During a list operation, produce a verbose table of contents using the format of the +.Xr ls 1 +utility with the +.Fl l +option. +For pathnames representing a hard link to a previous member of the archive, +the output has the format: +.Pp +.Dl Em ls -l listing No == Em link-name +.Pp +For pathnames representing a symbolic link, the output has the format: +.Pp +.Dl Em ls -l listing No -> Em link-name +.Pp +Where +.Em ls -l listing +is the output format specified by the +.Xr ls 1 +utility when used with the +.Fl l +option. +Otherwise for all the other operational modes +(read, write, and copy), +pathnames are written and flushed to standard error +without a trailing newline +as soon as processing begins on that file or +archive member. +The trailing newline +is not buffered and is written only after the file has been read or written. +.It Fl w +Write files to the standard output +in the specified archive format. +When no +.Ar file +operands are specified, standard input +is read for a list of pathnames with one per line without any leading or +trailing +.Aq blanks . +.It Fl X +When traversing the file hierarchy specified by a pathname, +do not descend into directories that have a different device ID. +See the +.Li st_dev +field as described in +.Xr stat 2 +for more information about device IDs. +.It Fl x Ar format +Specify the output archive format, with the default format being +.Cm ustar . +.Nm +currently supports the following formats: +.Bl -tag -width "sv4cpio" +.It Cm bcpio +The old binary cpio format. +The default blocksize for this format is 5120 bytes. +This format is not very portable and should not be used when other formats +are available. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm cpio +The extended cpio interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm sv4cpio +The System V release 4 cpio. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm sv4crc +The System V release 4 cpio with file CRC checksums. +The default blocksize for this format is 5120 bytes. +Inode and device information about a file (used for detecting file hard links +by this format), which may be truncated by this format, is detected by +.Nm +and is repaired. +.It Cm tar +The old +.Bx +tar format as found in +.Bx 4.3 . +The default blocksize for this format is 10240 bytes. +Pathnames stored by this format must be 100 characters or less in length. +Only regular files, hard links, soft links, and directories +will be archived (other file system types are not supported). +For backwards compatibility with even older tar formats, a +.Fl o +option can be used when writing an archive to omit the storage of directories. +This option takes the form: +.Pp +.Dl Fl o Cm write_opt=nodir +.It Cm ustar +The extended tar interchange format specified in the +.St -p1003.2 +standard. +The default blocksize for this format is 10240 bytes. +Filenames stored by this format must be 100 characters or less in length; +the total pathname must be 256 characters or less. +.El +.Pp +.Nm +will detect and report any file that it is unable to store or extract +as the result of any specific archive format restrictions. +The individual archive formats may impose additional restrictions on use. +Typical archive format restrictions include (but are not limited to): +file pathname length, file size, link pathname length, and the type of the +file. +.It Fl Y +This option is the same as the +.Fl D +option, except that the inode change time is checked using the +pathname created after all the file name modifications have completed. +.It Fl Z +This option is the same as the +.Fl u +option, except that the modification time is checked using the +pathname created after all the file name modifications have completed. +.It Fl z +Use +.Xr gzip 1 +to compress (decompress) the archive while writing (reading). +Incompatible with +.Fl a . +.El +.Pp +The options that operate on the names of files or archive members +.Po Fl c , +.Fl i , +.Fl j , +.Fl n , +.Fl s , +.Fl u , +.Fl v , +.Fl D , +.Fl G , +.Fl T , +.Fl U , +.Fl Y , +and +.Fl Z +.Pc +interact as follows. +.Pp +When extracting files during a read operation, archive members are +.Sq selected , +based only on the user specified pattern operands as modified by the +.Fl c , +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +.Fl U +options. +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then the +.Fl Y +and +.Fl Z +options will be applied based on the final pathname. +Finally, the +.Fl v +option will write the names resulting from these modifications. +.Pp +When archiving files during a write operation, +or copying files during a copy operation, +archive members are +.Sq selected , +based only on the user specified pathnames as modified by the +.Fl n , +.Fl u , +.Fl D , +.Fl G , +.Fl T , +and +.Fl U +options (the +.Fl D +option only applies during a copy operation). +Then any +.Fl s +and +.Fl i +options will modify in that order, the names of these selected files. +Then during a copy operation the +.Fl Y +and the +.Fl Z +options will be applied based on the final pathname. +Finally, the +.Fl v +option will write the names resulting from these modifications. +.Pp +When one or both of the +.Fl u +or +.Fl D +options are specified along with the +.Fl n +option, a file is not considered selected unless it is newer +than the file to which it is compared. +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev TMPDIR +Path in which to store temporary files. +.El +.Sh EXIT STATUS +.Ex -std pax +.Sh EXAMPLES +Copy the contents of the current directory to the device +.Pa /dev/rst0 : +.Pp +.Dl $ pax -w -f /dev/rst0 \&. +.Pp +Give the verbose table of contents for an archive stored in +.Pa filename : +.Pp +.Dl $ pax -v -f filename +.Pp +This sequence of commands will copy the entire +.Pa olddir +directory hierarchy to +.Pa newdir : +.Bd -literal -offset indent +$ mkdir newdir +$ cd olddir +$ pax -rw . ../newdir +.Ed +.Pp +Extract files from the archive +.Pa a.pax . +Files rooted in +.Pa /usr +are extracted relative to the current working directory; +all other files are extracted to their unmodified path. +.Pp +.Dl $ pax -r -s ',^/usr/,,' -f a.pax +.Pp +This can be used to interactively select the files to copy from the +current directory to +.Pa dest_dir : +.Pp +.Dl $ pax -rw -i \&. dest_dir +.Pp +Extract all files from the archive +.Pa a.pax +which are owned by +.Em root +with group +.Em bin +and preserve all file permissions: +.Pp +.Dl $ pax -r -pe -U root -G bin -f a.pax +.Pp +Update (and list) only those files in the destination directory +.Pa /backup +which are older (less recent inode change or file modification times) than +files with the same name found in the source file tree +.Pa home : +.Pp +.Dl $ pax -r -w -v -Y -Z home /backup +.Sh DIAGNOSTICS +Whenever +.Nm +cannot create a file or a link when reading an archive or cannot +find a file when writing an archive, or cannot preserve the user ID, +group ID, or file mode when the +.Fl p +option is specified, a diagnostic message is written to standard error +and a non-zero exit status will be returned, but processing will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated by +a signal or error, +.Nm +may have only partially extracted a file the user wanted. +Additionally, the file modes of extracted files and directories +may have incorrect file bits, and the modification and access times may be +wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal or error, +.Nm +may have only partially created the archive, which may violate the specific +archive format specification. +.Pp +If while doing a copy, +.Nm +detects a file is about to overwrite itself, the file is not copied, +a diagnostic message is written to standard error +and when +.Nm +completes it will exit with a non-zero exit status. +.Sh SEE ALSO +.Xr cpio 1 , +.Xr tar 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification, +except that the +.Cm pax +archive format and the +.Cm listopt +keyword are unsupported. +.Pp +The flags +.Op Fl 0BDEGjOPTUYZz , +the archive formats +.Cm bcpio , +.Cm sv4cpio , +.Cm sv4crc , +and +.Cm tar , +the +.Cm b , k , +and +.Cm x +additions to the +.Fl b +flag, +and the flawed archive handling during list and read operations +are extensions to that specification. +.Sh HISTORY +A +.Nm +utility appeared in +.Bx 4.4 . +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. diff --git a/bin/pax/pax.c b/bin/pax/pax.c new file mode 100644 index 0000000..4d0fc68 --- /dev/null +++ b/bin/pax/pax.c @@ -0,0 +1,446 @@ +/* $OpenBSD: pax.c,v 1.53 2019/06/28 13:34:59 deraadt Exp $ */ +/* $NetBSD: pax.c,v 1.5 1996/03/26 23:54:20 mrg Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <signal.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <err.h> +#include <fcntl.h> +#include <grp.h> +#include <paths.h> +#include <pwd.h> +#include <stdio.h> + +#include "pax.h" +#include "extern.h" +static int gen_init(void); + +/* + * PAX main routines, general globals and some simple start up routines + */ + +/* + * Variables that can be accessed by any routine within pax + */ +int act = DEFOP; /* read/write/append/copy */ +FSUB *frmt = NULL; /* archive format type */ +int cflag; /* match all EXCEPT pattern/file */ +int cwdfd; /* starting cwd */ +int dflag; /* directory member match only */ +int iflag; /* interactive file/archive rename */ +int kflag; /* do not overwrite existing files */ +int lflag; /* use hard links when possible */ +int nflag; /* select first archive member match */ +int tflag; /* restore access time after read */ +int uflag; /* ignore older modification time files */ +int vflag; /* produce verbose output */ +int Dflag; /* same as uflag except inode change time */ +int Hflag; /* follow command line symlinks (write only) */ +int Lflag; /* follow symlinks when writing */ +int Nflag; /* only use numeric uid and gid */ +int Xflag; /* archive files with same device id only */ +int Yflag; /* same as Dflag except after name mode */ +int Zflag; /* same as uflag except after name mode */ +int zeroflag; /* use \0 as pathname terminator */ +int vfpart; /* is partial verbose output in progress */ +int patime = 1; /* preserve file access time */ +int pmtime = 1; /* preserve file modification times */ +int nodirs; /* do not create directories as needed */ +int pmode; /* preserve file mode bits */ +int pids; /* preserve file uid/gid */ +int rmleadslash = 0; /* remove leading '/' from pathnames */ +int exit_val; /* exit value */ +int docrc; /* check/create file crc */ +char *dirptr; /* destination dir in a copy */ +char *argv0; /* root of argv[0] */ +enum op_mode op_mode; /* what program are we acting as? */ +sigset_t s_mask; /* signal mask for cleanup critical sect */ +FILE *listf; /* file pointer to print file list to */ +int listfd = STDERR_FILENO; /* fd matching listf, for sighandler output */ +char *tempfile; /* tempfile to use for mkstemp(3) */ +char *tempbase; /* basename of tempfile to use for mkstemp(3) */ + +/* + * PAX - Portable Archive Interchange + * + * A utility to read, write, and write lists of the members of archive + * files and copy directory hierarchies. A variety of archive formats + * are supported (some are described in POSIX 1003.1 10.1): + * + * ustar - 10.1.1 extended tar interchange format + * cpio - 10.1.2 extended cpio interchange format + * tar - old BSD 4.3 tar format + * binary cpio - old cpio with binary header format + * sysVR4 cpio - with and without CRC + * + * This version is a superset of IEEE Std 1003.2b-d3 + * + * Summary of Extensions to the IEEE Standard: + * + * 1 READ ENHANCEMENTS + * 1.1 Operations which read archives will continue to operate even when + * processing archives which may be damaged, truncated, or fail to meet + * format specs in several different ways. Damaged sections of archives + * are detected and avoided if possible. Attempts will be made to resync + * archive read operations even with badly damaged media. + * 1.2 Blocksize requirements are not strictly enforced on archive read. + * Tapes which have variable sized records can be read without errors. + * 1.3 The user can specify via the non-standard option flag -E if error + * resync operation should stop on a media error, try a specified number + * of times to correct, or try to correct forever. + * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks + * of all zeros will be restored with holes appropriate for the target + * filesystem + * 1.5 The user is notified whenever something is found during archive + * read operations which violates spec (but the read will continue). + * 1.6 Multiple archive volumes can be read and may span over different + * archive devices + * 1.7 Rigidly restores all file attributes exactly as they are stored on the + * archive. + * 1.8 Modification change time ranges can be specified via multiple -T + * options. These allow a user to select files whose modification time + * lies within a specific time range. + * 1.9 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 1.10 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 1.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 2 WRITE ENHANCEMENTS + * 2.1 Write operation will stop instead of allowing a user to create a flawed + * flawed archive (due to any problem). + * 2.2 Archives written by pax are forced to strictly conform to both the + * archive and pax the specific format specifications. + * 2.3 Blocking size and format is rigidly enforced on writes. + * 2.4 Formats which may exhibit header overflow problems (they have fields + * too small for large file systems, such as inode number storage), use + * routines designed to repair this problem. These techniques still + * conform to both pax and format specifications, but no longer truncate + * these fields. This removes any restrictions on using these archive + * formats on large file systems. + * 2.5 Multiple archive volumes can be written and may span over different + * archive devices + * 2.6 A archive volume record limit allows the user to specify the number + * of bytes stored on an archive volume. When reached the user is + * prompted for the next archive volume. This is specified with the + * non-standard -B flag. The limit is rounded up to the next blocksize. + * 2.7 All archive padding during write use zero filled sections. This makes + * it much easier to pull data out of flawed archive during read + * operations. + * 2.8 Access time reset with the -t applies to all file nodes (including + * directories). + * 2.9 Symbolic links can be followed with -L (optional in the spec). + * 2.10 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 2.11 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 2.12 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 2.13 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * + * 3 COPY ENHANCEMENTS + * 3.1 Sparse files (lseek holes) can be copied without expanding the holes + * into zero filled blocks. The file copy is created with holes which are + * appropriate for the target filesystem + * 3.2 Access time as well as modification time on copied file trees can be + * preserved with the appropriate -p options. + * 3.3 Access time reset with the -t applies to all file nodes (including + * directories). + * 3.4 Symbolic links can be followed with -L (optional in the spec). + * 3.5 Modification or inode change time ranges can be specified via + * multiple -T options. These allow a user to select files whose + * modification or inode change time lies within a specific time range. + * 3.6 Files can be selected based on owner (user name or uid) via one or more + * -U options. + * 3.7 Files can be selected based on group (group name or gid) via one o + * more -G options. + * 3.8 Symlinks which appear on the command line can be followed (without + * following other symlinks; -H flag) + * 3.9 File inode change time can be checked against existing file before + * name modification (-D) + * 3.10 File inode change time can be checked against existing file after + * name modification (-Y) + * 3.11 File modification time can be checked against existing file after + * name modification (-Z) + * + * 4 GENERAL ENHANCEMENTS + * 4.1 Internal structure is designed to isolate format dependent and + * independent functions. Formats are selected via a format driver table. + * This encourages the addition of new archive formats by only having to + * write those routines which id, read and write the archive header. + */ + +/* + * main() + * parse options, set up and operate as specified by the user. + * any operational flaw will set exit_val to non-zero + * Return: 0 if ok, 1 otherwise + */ + +int +main(int argc, char **argv) +{ + char *tmpdir; + size_t tdlen; + + listf = stderr; + + /* + * Keep a reference to cwd, so we can always come back home. + */ + cwdfd = open(".", O_RDONLY | O_CLOEXEC); + if (cwdfd == -1) { + syswarn(1, errno, "Can't open current working directory."); + return(exit_val); + } + + /* + * Where should we put temporary files? + */ + if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') + tmpdir = _PATH_TMP; + tdlen = strlen(tmpdir); + while (tdlen > 0 && tmpdir[tdlen - 1] == '/') + tdlen--; + tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE)); + if (tempfile == NULL) { + paxwarn(1, "Cannot allocate memory for temp file name."); + return(exit_val); + } + if (tdlen) + memcpy(tempfile, tmpdir, tdlen); + tempbase = tempfile + tdlen; + *tempbase++ = '/'; + + /* + * keep passwd and group files open for faster lookups. + */ + setpassent(1); + setgroupent(1); + + /* + * parse options, determine operational mode, general init + */ + options(argc, argv); + if ((gen_init() < 0) || (tty_init() < 0)) + return(exit_val); + + /* + * pmode needs to restore setugid bits when extracting or copying, + * so can't pledge at all then. + */ + if (pmode == 0 || (act != EXTRACT && act != COPY)) { + if (pledge("stdio rpath wpath cpath fattr dpath getpw proc exec tape", + NULL) == -1) + err(1, "pledge"); + + /* Copy mode, or no gzip -- don't need to fork/exec. */ + if (gzip_program == NULL || act == COPY) { + if (pledge("stdio rpath wpath cpath fattr dpath getpw tape", + NULL) == -1) + err(1, "pledge"); + } + } + + /* + * select a primary operation mode + */ + switch (act) { + case EXTRACT: + extract(); + break; + case ARCHIVE: + archive(); + break; + case APPND: + if (gzip_program != NULL) + errx(1, "can not gzip while appending"); + append(); + break; + case COPY: + copy(); + break; + default: + case LIST: + list(); + break; + } + return(exit_val); +} + +/* + * sig_cleanup() + * when interrupted we try to do whatever delayed processing we can. + * This is not critical, but we really ought to limit our damage when we + * are aborted by the user. + * Return: + * never.... + */ + +void +sig_cleanup(int which_sig) +{ + /* + * restore modes and times for any dirs we may have created + * or any dirs we may have read. + */ + + /* paxwarn() uses stdio; fake it as well as we can */ + if (which_sig == SIGXCPU) + dprintf(STDERR_FILENO, "\nCPU time limit reached, cleaning up.\n"); + else + dprintf(STDERR_FILENO, "\nSignal caught, cleaning up.\n"); + + ar_close(1); + sltab_process(1); + proc_dir(1); + if (tflag) + atdir_end(); + _exit(1); +} + +/* + * setup_sig() + * set a signal to be caught, but only if it isn't being ignored already + */ + +static int +setup_sig(int sig, const struct sigaction *n_hand) +{ + struct sigaction o_hand; + + if (sigaction(sig, NULL, &o_hand) == -1) + return (-1); + + if (o_hand.sa_handler == SIG_IGN) + return (0); + + return (sigaction(sig, n_hand, NULL)); +} + +/* + * gen_init() + * general setup routines. Not all are required, but they really help + * when dealing with a medium to large sized archives. + */ + +static int +gen_init(void) +{ + struct rlimit reslimit; + struct sigaction n_hand; + + /* + * Really needed to handle large archives. We can run out of memory for + * internal tables really fast when we have a whole lot of files... + */ + if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_DATA , &reslimit); + } + + /* + * should file size limits be waived? if the os limits us, this is + * needed if we want to write a large archive + */ + if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_FSIZE , &reslimit); + } + + /* + * increase the size the stack can grow to + */ + if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_STACK , &reslimit); + } + + /* + * not really needed, but doesn't hurt + */ + if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ + reslimit.rlim_cur = reslimit.rlim_max; + (void)setrlimit(RLIMIT_RSS , &reslimit); + } + + /* + * signal handling to reset stored directory times and modes. Since + * we deal with broken pipes via failed writes we ignore it. We also + * deal with any file size limit through failed writes. Cpu time + * limits are caught and a cleanup is forced. + */ + if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || + (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || + (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) || + (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) { + paxwarn(1, "Unable to set up signal mask"); + return(-1); + } + + /* snag the fd to be used from the signal handler */ + listfd = fileno(listf); + + memset(&n_hand, 0, sizeof n_hand); + n_hand.sa_mask = s_mask; + n_hand.sa_flags = 0; + n_hand.sa_handler = sig_cleanup; + + if (setup_sig(SIGHUP, &n_hand) || + setup_sig(SIGTERM, &n_hand) || + setup_sig(SIGINT, &n_hand) || + setup_sig(SIGQUIT, &n_hand) || + setup_sig(SIGXCPU, &n_hand)) + goto out; + + n_hand.sa_handler = SIG_IGN; + if ((sigaction(SIGPIPE, &n_hand, NULL) == -1) || + (sigaction(SIGXFSZ, &n_hand, NULL) == -1)) + goto out; + return(0); + + out: + syswarn(1, errno, "Unable to set up signal handler"); + return(-1); +} diff --git a/bin/pax/pax.h b/bin/pax/pax.h new file mode 100644 index 0000000..65d445a --- /dev/null +++ b/bin/pax/pax.h @@ -0,0 +1,262 @@ +/* $OpenBSD: pax.h,v 1.29 2017/09/12 17:11:11 otto Exp $ */ +/* $NetBSD: pax.h,v 1.3 1995/03/21 09:07:41 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pax.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * BSD PAX global data structures and constants. + */ + +#define MAXBLK 64512 /* MAX blocksize supported (posix SPEC) */ + /* WARNING: increasing MAXBLK past 32256 */ + /* will violate posix spec. */ +#define MAXBLK_POSIX 32256 /* MAX blocksize supported as per POSIX */ +#define BLKMULT 512 /* blocksize must be even mult of 512 bytes */ + /* Don't even think of changing this */ +#define DEVBLK 8192 /* default read blksize for devices */ +#define FILEBLK 10240 /* default read blksize for files */ +#define PAXPATHLEN 3072 /* maximum path length for pax. MUST be */ + /* longer than the system PATH_MAX */ + +/* + * Pax modes of operation + */ +#define LIST 0 /* List the file in an archive */ +#define EXTRACT 1 /* extract the files in an archive */ +#define ARCHIVE 2 /* write a new archive */ +#define APPND 3 /* append to the end of an archive */ +#define COPY 4 /* copy files to destination dir */ +#define DEFOP LIST /* if no flags default is to LIST */ + +/* + * Device type of the current archive volume + */ +#define ISREG 0 /* regular file */ +#define ISCHR 1 /* character device */ +#define ISBLK 2 /* block device */ +#define ISTAPE 3 /* tape drive */ +#define ISPIPE 4 /* pipe/socket */ + +/* + * Pattern matching structure + * + * Used to store command line patterns + */ +typedef struct pattern { + char *pstr; /* pattern to match, user supplied */ + char *pend; /* end of a prefix match */ + char *chdname; /* the dir to change to if not NULL. */ + size_t plen; /* length of pstr */ + int flgs; /* processing/state flags */ +#define MTCH 0x1 /* pattern has been matched */ +#define DIR_MTCH 0x2 /* pattern matched a directory */ + struct pattern *fow; /* next pattern */ +} PATTERN; + +/* + * General Archive Structure (used internal to pax) + * + * This structure is used to pass information about archive members between + * the format independent routines and the format specific routines. When + * new archive formats are added, they must accept requests and supply info + * encoded in a structure of this type. The name fields are declared statically + * here, as there is only ONE of these floating around, size is not a major + * consideration. Eventually converting the name fields to a dynamic length + * may be required if and when the supporting operating system removes all + * restrictions on the length of pathnames it will resolve. + */ +typedef struct { + int nlen; /* file name length */ + char name[PAXPATHLEN+1]; /* file name */ + int ln_nlen; /* link name length */ + char ln_name[PAXPATHLEN+1]; /* name to link to (if any) */ + char *org_name; /* orig name in file system */ + PATTERN *pat; /* ptr to pattern match (if any) */ + struct stat sb; /* stat buffer see stat(2) */ + off_t pad; /* bytes of padding after file xfer */ + off_t skip; /* bytes of real data after header */ + /* IMPORTANT. The st_size field does */ + /* not always indicate the amount of */ + /* data following the header. */ + u_int32_t crc; /* file crc */ + int type; /* type of file node */ +#define PAX_DIR 1 /* directory */ +#define PAX_CHR 2 /* character device */ +#define PAX_BLK 3 /* block device */ +#define PAX_REG 4 /* regular file */ +#define PAX_SLK 5 /* symbolic link */ +#define PAX_SCK 6 /* socket */ +#define PAX_FIF 7 /* fifo */ +#define PAX_HLK 8 /* hard link */ +#define PAX_HRG 9 /* hard link to a regular file */ +#define PAX_CTG 10 /* high performance file */ +#define PAX_GLL 11 /* GNU long symlink */ +#define PAX_GLF 12 /* GNU long file */ +} ARCHD; + +#define PAX_IS_REG(type) ((type) == PAX_REG || (type) == PAX_CTG) +#define PAX_IS_HARDLINK(type) ((type) == PAX_HLK || (type) == PAX_HRG) +#define PAX_IS_LINK(type) ((type) == PAX_SLK || PAX_IS_HARDLINK(type)) + +/* + * Format Specific Routine Table + * + * The format specific routine table allows new archive formats to be quickly + * added. Overall pax operation is independent of the actual format used to + * form the archive. Only those routines which deal directly with the archive + * are tailored to the oddities of the specific format. All other routines are + * independent of the archive format. Data flow in and out of the format + * dependent routines pass pointers to ARCHD structure (described below). + */ +typedef struct { + char *name; /* name of format, this is the name the user */ + /* gives to -x option to select it. */ + int bsz; /* default block size. used when the user */ + /* does not specify a blocksize for writing */ + /* Appends continue to with the blocksize */ + /* the archive is currently using. */ + int hsz; /* Header size in bytes. this is the size of */ + /* the smallest header this format supports. */ + /* Headers are assumed to fit in a BLKMULT. */ + /* If they are bigger, get_head() and */ + /* get_arc() must be adjusted */ + int udev; /* does append require unique dev/ino? some */ + /* formats use the device and inode fields */ + /* to specify hard links. when members in */ + /* the archive have the same inode/dev they */ + /* are assumed to be hard links. During */ + /* append we may have to generate unique ids */ + /* to avoid creating incorrect hard links */ + int hlk; /* does archive store hard links info? if */ + /* not, we do not bother to look for them */ + /* during archive write operations */ + int blkalgn; /* writes must be aligned to blkalgn boundary */ + int inhead; /* is the trailer encoded in a valid header? */ + /* if not, trailers are assumed to be found */ + /* in invalid headers (i.e like tar) */ + int (*id)(char *, /* checks if a buffer is a valid header */ + int); /* returns 1 if it is, o.w. returns a 0 */ + int (*st_rd)(void); /* initialize routine for read. so format */ + /* can set up tables etc before it starts */ + /* reading an archive */ + int (*rd)(ARCHD *, /* read header routine. passed a pointer to */ + char *); /* ARCHD. It must extract the info from the */ + /* format and store it in the ARCHD struct. */ + /* This routine is expected to fill all the */ + /* fields in the ARCHD (including stat buf) */ + /* 0 is returned when a valid header is */ + /* found. -1 when not valid. This routine */ + /* set the skip and pad fields so the format */ + /* independent routines know the amount of */ + /* padding and the number of bytes of data */ + /* which follow the header. This info is */ + /* used skip to the next file header */ + off_t (*end_rd)(void); /* read cleanup. Allows format to clean up */ + /* and MUST RETURN THE LENGTH OF THE TRAILER */ + /* RECORD (so append knows how many bytes */ + /* to move back to rewrite the trailer) */ + int (*st_wr)(void); /* initialize routine for write operations */ + int (*wr)(ARCHD *); /* write archive header. Passed an ARCHD */ + /* filled with the specs on the next file to */ + /* archived. Returns a 1 if no file data is */ + /* is to be stored; 0 if file data is to be */ + /* added. A -1 is returned if a write */ + /* operation to the archive failed. this */ + /* function sets the skip and pad fields so */ + /* the proper padding can be added after */ + /* file data. This routine must NEVER write */ + /* a flawed archive header. */ + int (*end_wr)(void); /* end write. write the trailer and do any */ + /* other format specific functions needed */ + /* at the end of an archive write */ + int (*trail)(ARCHD *, /* returns 0 if a valid trailer, -1 if not */ + char *, int, /* For formats which encode the trailer */ + int *); /* outside of a valid header, a return value */ + /* of 1 indicates that the block passed to */ + /* it can never contain a valid header (skip */ + /* this block, no point in looking at it) */ + /* CAUTION: parameters to this function are */ + /* different for trailers inside or outside */ + /* of headers. See get_head() for details */ + int (*options)(void); /* process format specific options (-o) */ +} FSUB; + +/* + * Time data for a given file. This is usually embedded in a structure + * indexed by dev+ino, by name, by order in the archive, etc. set_attr() + * takes one of these and will only change the times or mode if the file + * at the given name has the indicated dev+ino. + */ +struct file_times { + ino_t ft_ino; /* inode number to verify */ + struct timespec ft_mtim; /* times to set */ + struct timespec ft_atim; + char *ft_name; /* name of file to set the times on */ + dev_t ft_dev; /* device number to verify */ +}; + +/* + * Format Specific Options List + * + * Used to pass format options to the format options handler + */ +typedef struct oplist { + char *name; /* option variable name e.g. name= */ + char *value; /* value for option variable */ + struct oplist *fow; /* next option */ +} OPLIST; + +/* + * General Macros + */ +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) +#define MAJOR(x) major(x) +#define MINOR(x) minor(x) +#define TODEV(x, y) makedev((x), (y)) + +#define FILEBITS (S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) +#define SETBITS (S_ISUID | S_ISGID) +#define ABITS (FILEBITS | SETBITS) + +/* + * General Defines + */ +#define HEX 16 +#define OCT 8 +#define _PAX_ 1 +#define _TFILE_BASE "paxXXXXXXXXXX" +#define MAX_TIME_T (sizeof(time_t) == sizeof(long long) ? \ + LLONG_MAX : INT_MAX) diff --git a/bin/pax/sel_subs.c b/bin/pax/sel_subs.c new file mode 100644 index 0000000..578c445 --- /dev/null +++ b/bin/pax/sel_subs.c @@ -0,0 +1,632 @@ +/* $OpenBSD: sel_subs.c,v 1.28 2019/06/24 03:33:09 deraadt Exp $ */ +/* $NetBSD: sel_subs.c,v 1.5 1995/03/21 09:07:42 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <ctype.h> +#include <grp.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * data structure for storing uid/grp selects (-U, -G non standard options) + */ + +#define USR_TB_SZ 317 /* user selection table size */ +#define GRP_TB_SZ 317 /* user selection table size */ + +typedef struct usrt { + uid_t uid; + struct usrt *fow; /* next uid */ +} USRT; + +typedef struct grpt { + gid_t gid; + struct grpt *fow; /* next gid */ +} GRPT; + +/* + * data structure for storing user supplied time ranges (-T option) + */ + +#define ATOI2(ar) ((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2; + +typedef struct time_rng { + time_t low_time; /* lower inclusive time limit */ + time_t high_time; /* higher inclusive time limit */ + int flgs; /* option flags */ +#define HASLOW 0x01 /* has lower time limit */ +#define HASHIGH 0x02 /* has higher time limit */ +#define CMPMTME 0x04 /* compare file modification time */ +#define CMPCTME 0x08 /* compare inode change time */ +#define CMPBOTH (CMPMTME|CMPCTME) /* compare inode and mod time */ + struct time_rng *fow; /* next pattern */ +} TIME_RNG; + +static int str_sec(const char *, time_t *); +static int usr_match(ARCHD *); +static int grp_match(ARCHD *); +static int trng_match(ARCHD *); + +static TIME_RNG *trhead = NULL; /* time range list head */ +static TIME_RNG *trtail = NULL; /* time range list tail */ +static USRT **usrtb = NULL; /* user selection table */ +static GRPT **grptb = NULL; /* group selection table */ + +/* + * Routines for selection of archive members + */ + +/* + * sel_chk() + * check if this file matches a specified uid, gid or time range + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +int +sel_chk(ARCHD *arcn) +{ + if (((usrtb != NULL) && usr_match(arcn)) || + ((grptb != NULL) && grp_match(arcn)) || + ((trhead != NULL) && trng_match(arcn))) + return(1); + return(0); +} + +/* + * User/group selection routines + * + * Routines to handle user selection of files based on the file uid/gid. To + * add an entry, the user supplies either the name or the uid/gid starting with + * a # on the command line. A \# will escape the #. + */ + +/* + * usr_add() + * add a user match to the user match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +usr_add(char *str) +{ + u_int indx; + USRT *pt; + uid_t uid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((usrtb == NULL) && + ((usrtb = calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) { + paxwarn(1, "Unable to allocate memory for user selection table"); + return(-1); + } + + /* + * figure out user spec + */ + if (str[0] != '#') { + /* + * it is a user name, \# escapes # as first char in user name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if (uid_from_user(str, &uid) == -1) { + paxwarn(1, "Unable to find uid for user: %s", str); + return(-1); + } + } else + uid = (uid_t)strtoul(str+1, NULL, 10); + endpwent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)uid) % USR_TB_SZ; + if ((pt = usrtb[indx]) != NULL) { + while (pt != NULL) { + if (pt->uid == uid) + return(0); + pt = pt->fow; + } + } + + /* + * uid is not yet in the table, add it to the front of the chain + */ + if ((pt = malloc(sizeof(USRT))) != NULL) { + pt->uid = uid; + pt->fow = usrtb[indx]; + usrtb[indx] = pt; + return(0); + } + paxwarn(1, "User selection table out of memory"); + return(-1); +} + +/* + * usr_match() + * check if this files uid matches a selected uid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +usr_match(ARCHD *arcn) +{ + USRT *pt; + + /* + * hash and look for it in the table + */ + pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ]; + while (pt != NULL) { + if (pt->uid == arcn->sb.st_uid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * grp_add() + * add a group match to the group match hash table + * Return: + * 0 if added ok, -1 otherwise; + */ + +int +grp_add(char *str) +{ + u_int indx; + GRPT *pt; + gid_t gid; + + /* + * create the table if it doesn't exist + */ + if ((str == NULL) || (*str == '\0')) + return(-1); + if ((grptb == NULL) && + ((grptb = calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) { + paxwarn(1, "Unable to allocate memory fo group selection table"); + return(-1); + } + + /* + * figure out group spec + */ + if (str[0] != '#') { + /* + * it is a group name, \# escapes # as first char in group name + */ + if ((str[0] == '\\') && (str[1] == '#')) + ++str; + if (gid_from_group(str, &gid) == -1) { + paxwarn(1,"Cannot determine gid for group name: %s", str); + return(-1); + } + } else + gid = (gid_t)strtoul(str+1, NULL, 10); + endgrent(); + + /* + * hash it and go down the hash chain (if any) looking for it + */ + indx = ((unsigned)gid) % GRP_TB_SZ; + if ((pt = grptb[indx]) != NULL) { + while (pt != NULL) { + if (pt->gid == gid) + return(0); + pt = pt->fow; + } + } + + /* + * gid not in the table, add it to the front of the chain + */ + if ((pt = malloc(sizeof(GRPT))) != NULL) { + pt->gid = gid; + pt->fow = grptb[indx]; + grptb[indx] = pt; + return(0); + } + paxwarn(1, "Group selection table out of memory"); + return(-1); +} + +/* + * grp_match() + * check if this files gid matches a selected gid. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +grp_match(ARCHD *arcn) +{ + GRPT *pt; + + /* + * hash and look for it in the table + */ + pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ]; + while (pt != NULL) { + if (pt->gid == arcn->sb.st_gid) + return(0); + pt = pt->fow; + } + + /* + * not found + */ + return(1); +} + +/* + * Time range selection routines + * + * Routines to handle user selection of files based on the modification and/or + * inode change time falling within a specified time range (the non-standard + * -T flag). The user may specify any number of different file time ranges. + * Time ranges are checked one at a time until a match is found (if at all). + * If the file has a mtime (and/or ctime) which lies within one of the time + * ranges, the file is selected. Time ranges may have a lower and/or a upper + * value. These ranges are inclusive. When no time ranges are supplied to pax + * with the -T option, all members in the archive will be selected by the time + * range routines. When only a lower range is supplied, only files with a + * mtime (and/or ctime) equal to or younger are selected. When only a upper + * range is supplied, only files with a mtime (and/or ctime) equal to or older + * are selected. When the lower time range is equal to the upper time range, + * only files with a mtime (or ctime) of exactly that time are selected. + */ + +/* + * trng_add() + * add a time range match to the time range list. + * This is a non-standard pax option. Lower and upper ranges are in the + * format: [[[[[cc]yy]mm]dd]HH]MM[.SS] and are comma separated. + * Time ranges are based on current time, so 1234 would specify a time of + * 12:34 today. + * Return: + * 0 if the time range was added to the list, -1 otherwise + */ + +int +trng_add(char *str) +{ + TIME_RNG *pt; + char *up_pt = NULL; + char *stpt; + char *flgpt; + int dot = 0; + + /* + * throw out the badly formed time ranges + */ + if ((str == NULL) || (*str == '\0')) { + paxwarn(1, "Empty time range string"); + return(-1); + } + + /* + * locate optional flags suffix /{cm}. + */ + if ((flgpt = strrchr(str, '/')) != NULL) + *flgpt++ = '\0'; + + for (stpt = str; *stpt != '\0'; ++stpt) { + if ((*stpt >= '0') && (*stpt <= '9')) + continue; + if ((*stpt == ',') && (up_pt == NULL)) { + *stpt = '\0'; + up_pt = stpt + 1; + dot = 0; + continue; + } + + /* + * allow only one dot per range (secs) + */ + if ((*stpt == '.') && (!dot)) { + ++dot; + continue; + } + paxwarn(1, "Improperly specified time range: %s", str); + goto out; + } + + /* + * allocate space for the time range and store the limits + */ + if ((pt = malloc(sizeof(TIME_RNG))) == NULL) { + paxwarn(1, "Unable to allocate memory for time range"); + return(-1); + } + + /* + * by default we only will check file mtime, but user can specify + * mtime, ctime (inode change time) or both. + */ + if ((flgpt == NULL) || (*flgpt == '\0')) + pt->flgs = CMPMTME; + else { + pt->flgs = 0; + while (*flgpt != '\0') { + switch (*flgpt) { + case 'M': + case 'm': + pt->flgs |= CMPMTME; + break; + case 'C': + case 'c': + pt->flgs |= CMPCTME; + break; + default: + paxwarn(1, "Bad option %c with time range %s", + *flgpt, str); + free(pt); + goto out; + } + ++flgpt; + } + } + + /* + * start off with the current time + */ + pt->low_time = pt->high_time = time(NULL); + if (*str != '\0') { + /* + * add lower limit + */ + if (str_sec(str, &(pt->low_time)) < 0) { + paxwarn(1, "Illegal lower time range %s", str); + free(pt); + goto out; + } + pt->flgs |= HASLOW; + } + + if ((up_pt != NULL) && (*up_pt != '\0')) { + /* + * add upper limit + */ + if (str_sec(up_pt, &(pt->high_time)) < 0) { + paxwarn(1, "Illegal upper time range %s", up_pt); + free(pt); + goto out; + } + pt->flgs |= HASHIGH; + + /* + * check that the upper and lower do not overlap + */ + if (pt->flgs & HASLOW) { + if (pt->low_time > pt->high_time) { + paxwarn(1, "Upper %s and lower %s time overlap", + up_pt, str); + free(pt); + return(-1); + } + } + } + + pt->fow = NULL; + if (trhead == NULL) { + trtail = trhead = pt; + return(0); + } + trtail->fow = pt; + trtail = pt; + return(0); + + out: + paxwarn(1, "Time range format is: [[[[[cc]yy]mm]dd]HH]MM[.SS][/[c][m]]"); + return(-1); +} + +/* + * trng_match() + * check if this files mtime/ctime falls within any supplied time range. + * Return: + * 0 if this archive member should be processed, 1 if it should be skipped + */ + +static int +trng_match(ARCHD *arcn) +{ + TIME_RNG *pt; + + /* + * have to search down the list one at a time looking for a match. + * remember time range limits are inclusive. + */ + pt = trhead; + while (pt != NULL) { + switch (pt->flgs & CMPBOTH) { + case CMPBOTH: + /* + * user wants both mtime and ctime checked for this + * time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPCTME: + /* + * user wants only ctime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_ctime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_ctime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + case CMPMTME: + default: + /* + * user wants only mtime checked for this time range + */ + if (((pt->flgs & HASLOW) && + (arcn->sb.st_mtime < pt->low_time)) || + ((pt->flgs & HASHIGH) && + (arcn->sb.st_mtime > pt->high_time))) { + pt = pt->fow; + continue; + } + break; + } + break; + } + + if (pt == NULL) + return(1); + return(0); +} + +/* + * str_sec() + * Convert a time string in the format of [[[[[cc]yy]mm]dd]HH]MM[.SS] to + * seconds UTC. Tval already has current time loaded into it at entry. + * Return: + * 0 if converted ok, -1 otherwise + */ + +static int +str_sec(const char *p, time_t *tval) +{ + struct tm *lt; + const char *dot, *t; + size_t len; + int bigyear; + int yearset; + + yearset = 0; + len = strlen(p); + + for (t = p, dot = NULL; *t; ++t) { + if (isdigit((unsigned char)*t)) + continue; + if (*t == '.' && dot == NULL) { + dot = t; + continue; + } + return(-1); + } + + lt = localtime(tval); + + if (dot != NULL) { /* .SS */ + if (strlen(++dot) != 2) + return(-1); + lt->tm_sec = ATOI2(dot); + if (lt->tm_sec > 61) + return(-1); + len -= 3; + } else + lt->tm_sec = 0; + + switch (len) { + case 12: /* cc */ + bigyear = ATOI2(p); + lt->tm_year = (bigyear * 100) - 1900; + yearset = 1; + /* FALLTHROUGH */ + case 10: /* yy */ + if (yearset) { + lt->tm_year += ATOI2(p); + } else { + lt->tm_year = ATOI2(p); + if (lt->tm_year < 69) /* hack for 2000 ;-} */ + lt->tm_year += (2000 - 1900); + } + /* FALLTHROUGH */ + case 8: /* mm */ + lt->tm_mon = ATOI2(p); + if ((lt->tm_mon > 12) || !lt->tm_mon) + return(-1); + --lt->tm_mon; /* time struct is 0 - 11 */ + /* FALLTHROUGH */ + case 6: /* dd */ + lt->tm_mday = ATOI2(p); + if ((lt->tm_mday > 31) || !lt->tm_mday) + return(-1); + /* FALLTHROUGH */ + case 4: /* HH */ + lt->tm_hour = ATOI2(p); + if (lt->tm_hour > 23) + return(-1); + /* FALLTHROUGH */ + case 2: /* MM */ + lt->tm_min = ATOI2(p); + if (lt->tm_min > 59) + return(-1); + break; + default: + return(-1); + } + + /* convert broken-down time to UTC clock time seconds */ + if ((*tval = mktime(lt)) == -1) + return(-1); + return(0); +} diff --git a/bin/pax/tables.c b/bin/pax/tables.c new file mode 100644 index 0000000..0a7b71f --- /dev/null +++ b/bin/pax/tables.c @@ -0,0 +1,1786 @@ +/* $OpenBSD: tables.c,v 1.54 2019/06/28 05:35:34 deraadt Exp $ */ +/* $NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * Routines for controlling the contents of all the different databases pax + * keeps. Tables are dynamically created only when they are needed. The + * goal was speed and the ability to work with HUGE archives. The databases + * were kept simple, but do have complex rules for when the contents change. + * As of this writing, the posix library functions were more complex than + * needed for this application (pax databases have very short lifetimes and + * do not survive after pax is finished). Pax is required to handle very + * large archives. These database routines carefully combine memory usage and + * temporary file storage in ways which will not significantly impact runtime + * performance while allowing the largest possible archives to be handled. + * Trying to force the fit to the posix database routines was not considered + * time well spent. + */ + +/* + * data structures and constants used by the different databases kept by pax + */ + +/* + * Hash Table Sizes MUST BE PRIME, if set too small performance suffers. + * Probably safe to expect 500000 inodes per tape. Assuming good key + * distribution (inodes) chains of under 50 long (worst case) is ok. + */ +#define L_TAB_SZ 2503 /* hard link hash table size */ +#define F_TAB_SZ 50503 /* file time hash table size */ +#define N_TAB_SZ 541 /* interactive rename hash table */ +#define D_TAB_SZ 317 /* unique device mapping table */ +#define A_TAB_SZ 317 /* ftree dir access time reset table */ +#define SL_TAB_SZ 317 /* escape symlink tables */ +#define MAXKEYLEN 64 /* max number of chars for hash */ +#define DIRP_SIZE 64 /* initial size of created dir table */ + +/* + * file hard link structure (hashed by dev/ino and chained) used to find the + * hard links in a file system or with some archive formats (cpio) + */ +typedef struct hrdlnk { + ino_t ino; /* files inode number */ + char *name; /* name of first file seen with this ino/dev */ + dev_t dev; /* files device number */ + u_long nlink; /* expected link count */ + struct hrdlnk *fow; +} HRDLNK; + +/* + * Archive write update file time table (the -u, -C flag), hashed by filename. + * Filenames are stored in a scratch file at seek offset into the file. The + * file time (mod time) and the file name length (for a quick check) are + * stored in a hash table node. We were forced to use a scratch file because + * with -u, the mtime for every node in the archive must always be available + * to compare against (and this data can get REALLY large with big archives). + * By being careful to read only when we have a good chance of a match, the + * performance loss is not measurable (and the size of the archive we can + * handle is greatly increased). + */ +typedef struct ftm { + off_t seek; /* location in scratch file */ + struct timespec mtim; /* files last modification time */ + struct ftm *fow; + int namelen; /* file name length */ +} FTM; + +/* + * Interactive rename table (-i flag), hashed by orig filename. + * We assume this will not be a large table as this mapping data can only be + * obtained through interactive input by the user. Nobody is going to type in + * changes for 500000 files? We use chaining to resolve collisions. + */ + +typedef struct namt { + char *oname; /* old name */ + char *nname; /* new name typed in by the user */ + struct namt *fow; +} NAMT; + +/* + * Unique device mapping tables. Some protocols (e.g. cpio) require that the + * <c_dev,c_ino> pair will uniquely identify a file in an archive unless they + * are links to the same file. Appending to archives can break this. For those + * protocols that have this requirement we map c_dev to a unique value not seen + * in the archive when we append. We also try to handle inode truncation with + * this table. (When the inode field in the archive header are too small, we + * remap the dev on writes to remove accidental collisions). + * + * The list is hashed by device number using chain collision resolution. Off of + * each DEVT are linked the various remaps for this device based on those bits + * in the inode which were truncated. For example if we are just remapping to + * avoid a device number during an update append, off the DEVT we would have + * only a single DLIST that has a truncation id of 0 (no inode bits were + * stripped for this device so far). When we spot inode truncation we create + * a new mapping based on the set of bits in the inode which were stripped off. + * so if the top four bits of the inode are stripped and they have a pattern of + * 0110...... (where . are those bits not truncated) we would have a mapping + * assigned for all inodes that has the same 0110.... pattern (with this dev + * number of course). This keeps the mapping sparse and should be able to store + * close to the limit of files which can be represented by the optimal + * combination of dev and inode bits, and without creating a fouled up archive. + * Note we also remap truncated devs in the same way (an exercise for the + * dedicated reader; always wanted to say that...:) + */ + +typedef struct devt { + dev_t dev; /* the orig device number we now have to map */ + struct devt *fow; /* new device map list */ + struct dlist *list; /* map list based on inode truncation bits */ +} DEVT; + +typedef struct dlist { + ino_t trunc_bits; /* truncation pattern for a specific map */ + dev_t dev; /* the new device id we use */ + struct dlist *fow; +} DLIST; + +/* + * ftree directory access time reset table. When we are done with a + * subtree we reset the access and mod time of the directory when the tflag is + * set. Not really explicitly specified in the pax spec, but easy and fast to + * do (and this may have even been intended in the spec, it is not clear). + * table is hashed by inode with chaining. + */ + +typedef struct atdir { + struct file_times ft; + struct atdir *fow; +} ATDIR; + +/* + * created directory time and mode storage entry. After pax is finished during + * extraction or copy, we must reset directory access modes and times that + * may have been modified after creation (they no longer have the specified + * times and/or modes). We must reset time in the reverse order of creation, + * because entries are added from the top of the file tree to the bottom. + * We MUST reset times from leaf to root (it will not work the other + * direction). + */ + +typedef struct dirdata { + struct file_times ft; + u_int16_t mode; /* file mode to restore */ + u_int16_t frc_mode; /* do we force mode settings? */ +} DIRDATA; + +static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ +static FTM **ftab = NULL; /* file time table for updating arch */ +static NAMT **ntab = NULL; /* interactive rename storage table */ +#ifndef NOCPIO +static DEVT **dtab = NULL; /* device/inode mapping tables */ +#endif +static ATDIR **atab = NULL; /* file tree directory time reset table */ +static DIRDATA *dirp = NULL; /* storage for setting created dir time/mode */ +static size_t dirsize; /* size of dirp table */ +static size_t dircnt = 0; /* entries in dir time/mode storage */ +static int ffd = -1; /* tmp file for file time table name storage */ + +/* + * hard link table routines + * + * The hard link table tries to detect hard links to files using the device and + * inode values. We do this when writing an archive, so we can tell the format + * write routine that this file is a hard link to another file. The format + * write routine then can store this file in whatever way it wants (as a hard + * link if the format supports that like tar, or ignore this info like cpio). + * (Actually a field in the format driver table tells us if the format wants + * hard link info. if not, we do not waste time looking for them). We also use + * the same table when reading an archive. In that situation, this table is + * used by the format read routine to detect hard links from stored dev and + * inode numbers (like cpio). This will allow pax to create a link when one + * can be detected by the archive format. + */ + +/* + * lnk_start + * Creates the hard link table. + * Return: + * 0 if created, -1 if failure + */ + +int +lnk_start(void) +{ + if (ltab != NULL) + return(0); + if ((ltab = calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { + paxwarn(1, "Cannot allocate memory for hard link table"); + return(-1); + } + return(0); +} + +/* + * chk_lnk() + * Looks up entry in hard link hash table. If found, it copies the name + * of the file it is linked to (we already saw that file) into ln_name. + * lnkcnt is decremented and if goes to 1 the node is deleted from the + * database. (We have seen all the links to this file). If not found, + * we add the file to the database if it has the potential for having + * hard links to other files we may process (it has a link count > 1) + * Return: + * if found returns 1; if not found returns 0; -1 on error + */ + +int +chk_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return(-1); + /* + * ignore those nodes that cannot have hard links + */ + if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) + return(0); + + /* + * hash inode number and look for this file + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) != NULL) { + /* + * its hash chain in not empty, walk down looking for it + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found a link. set the node type and copy in the + * name of the file it is to link to. we need to + * handle hardlinks to regular files differently than + * other links. + */ + arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name, + sizeof(arcn->ln_name)); + /* XXX truncate? */ + if ((size_t)arcn->nlen >= sizeof(arcn->name)) + arcn->nlen = sizeof(arcn->name) - 1; + if (arcn->type == PAX_REG) + arcn->type = PAX_HRG; + else + arcn->type = PAX_HLK; + + /* + * if we have found all the links to this file, remove + * it from the database + */ + if (--pt->nlink <= 1) { + *ppt = pt->fow; + free(pt->name); + free(pt); + } + return(1); + } + } + + /* + * we never saw this file before. It has links so we add it to the + * front of this hash chain + */ + if ((pt = malloc(sizeof(HRDLNK))) != NULL) { + if ((pt->name = strdup(arcn->name)) != NULL) { + pt->dev = arcn->sb.st_dev; + pt->ino = arcn->sb.st_ino; + pt->nlink = arcn->sb.st_nlink; + pt->fow = ltab[indx]; + ltab[indx] = pt; + return(0); + } + free(pt); + } + + paxwarn(1, "Hard link table out of memory"); + return(-1); +} + +/* + * purg_lnk + * remove reference for a file that we may have added to the data base as + * a potential source for hard links. We ended up not using the file, so + * we do not want to accidently point another file at it later on. + */ + +void +purg_lnk(ARCHD *arcn) +{ + HRDLNK *pt; + HRDLNK **ppt; + u_int indx; + + if (ltab == NULL) + return; + /* + * do not bother to look if it could not be in the database + */ + if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || + PAX_IS_HARDLINK(arcn->type)) + return; + + /* + * find the hash chain for this inode value, if empty return + */ + indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; + if ((pt = ltab[indx]) == NULL) + return; + + /* + * walk down the list looking for the inode/dev pair, unlink and + * free if found + */ + ppt = &(ltab[indx]); + while (pt != NULL) { + if ((pt->ino == arcn->sb.st_ino) && + (pt->dev == arcn->sb.st_dev)) + break; + ppt = &(pt->fow); + pt = pt->fow; + } + if (pt == NULL) + return; + + /* + * remove and free it + */ + *ppt = pt->fow; + free(pt->name); + free(pt); +} + +/* + * lnk_end() + * pull apart a existing link table so we can reuse it. We do this between + * read and write phases of append with update. (The format may have + * used the link table, and we need to start with a fresh table for the + * write phase + */ + +void +lnk_end(void) +{ + int i; + HRDLNK *pt; + HRDLNK *ppt; + + if (ltab == NULL) + return; + + for (i = 0; i < L_TAB_SZ; ++i) { + if (ltab[i] == NULL) + continue; + pt = ltab[i]; + ltab[i] = NULL; + + /* + * free up each entry on this chain + */ + while (pt != NULL) { + ppt = pt; + pt = ppt->fow; + free(ppt->name); + free(ppt); + } + } +} + +/* + * modification time table routines + * + * The modification time table keeps track of last modification times for all + * files stored in an archive during a write phase when -u is set. We only + * add a file to the archive if it is newer than a file with the same name + * already stored on the archive (if there is no other file with the same + * name on the archive it is added). This applies to writes and appends. + * An append with an -u must read the archive and store the modification time + * for every file on that archive before starting the write phase. It is clear + * that this is one HUGE database. To save memory space, the actual file names + * are stored in a scratch file and indexed by an in-memory hash table. The + * hash table is indexed by hashing the file path. The nodes in the table store + * the length of the filename and the lseek offset within the scratch file + * where the actual name is stored. Since there are never any deletions from + * this table, fragmentation of the scratch file is never a issue. Lookups + * seem to not exhibit any locality at all (files in the database are rarely + * looked up more than once...), so caching is just a waste of memory. The + * only limitation is the amount of scratch file space available to store the + * path names. + */ + +/* + * ftime_start() + * create the file time hash table and open for read/write the scratch + * file. (after created it is unlinked, so when we exit we leave + * no witnesses). + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +int +ftime_start(void) +{ + + if (ftab != NULL) + return(0); + if ((ftab = calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { + paxwarn(1, "Cannot allocate memory for file time table"); + return(-1); + } + + /* + * get random name and create temporary scratch file, unlink name + * so it will get removed on exit + */ + memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); + if ((ffd = mkstemp(tempfile)) == -1) { + syswarn(1, errno, "Unable to create temporary file: %s", + tempfile); + return(-1); + } + (void)unlink(tempfile); + + return(0); +} + +/* + * chk_ftime() + * looks up entry in file time hash table. If not found, the file is + * added to the hash table and the file named stored in the scratch file. + * If a file with the same name is found, the file times are compared and + * the most recent file time is retained. If the new file was younger (or + * was not in the database) the new file is selected for storage. + * Return: + * 0 if file should be added to the archive, 1 if it should be skipped, + * -1 on error + */ + +int +chk_ftime(ARCHD *arcn) +{ + FTM *pt; + int namelen; + u_int indx; + char ckname[PAXPATHLEN+1]; + + /* + * no info, go ahead and add to archive + */ + if (ftab == NULL) + return(0); + + /* + * hash the pathname and look up in table + */ + namelen = arcn->nlen; + indx = st_hash(arcn->name, namelen, F_TAB_SZ); + if ((pt = ftab[indx]) != NULL) { + /* + * the hash chain is not empty, walk down looking for match + * only read up the path names if the lengths match, speeds + * up the search a lot + */ + while (pt != NULL) { + if (pt->namelen == namelen) { + /* + * potential match, have to read the name + * from the scratch file. + */ + if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { + syswarn(1, errno, + "Failed ftime table seek"); + return(-1); + } + if (read(ffd, ckname, namelen) != namelen) { + syswarn(1, errno, + "Failed ftime table read"); + return(-1); + } + + /* + * if the names match, we are done + */ + if (!strncmp(ckname, arcn->name, namelen)) + break; + } + + /* + * try the next entry on the chain + */ + pt = pt->fow; + } + + if (pt != NULL) { + /* + * found the file, compare the times, save the newer + */ + if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)) { + /* + * file is newer + */ + pt->mtim = arcn->sb.st_mtim; + return(0); + } + /* + * file is older + */ + return(1); + } + } + + /* + * not in table, add it + */ + if ((pt = malloc(sizeof(FTM))) != NULL) { + /* + * add the name at the end of the scratch file, saving the + * offset. add the file to the head of the hash chain + */ + if ((pt->seek = lseek(ffd, 0, SEEK_END)) >= 0) { + if (write(ffd, arcn->name, namelen) == namelen) { + pt->mtim = arcn->sb.st_mtim; + pt->namelen = namelen; + pt->fow = ftab[indx]; + ftab[indx] = pt; + return(0); + } + syswarn(1, errno, "Failed write to file time table"); + } else + syswarn(1, errno, "Failed seek on file time table"); + } else + paxwarn(1, "File time table ran out of memory"); + + if (pt != NULL) + free(pt); + return(-1); +} + +/* + * escaping (absolute or w/"..") symlink table routines + * + * By default, an archive shouldn't be able extract to outside of the + * current directory. What should we do if the archive contains a symlink + * whose value is either absolute or contains ".." components? What we'll + * do is initially create the path as an empty file (to block attempts to + * reference _through_ it) and instead record its path and desired + * final value and mode. Then once all the other archive + * members are created (but before the pass to set timestamps on + * directories) we'll process those records, replacing the placeholder with + * the correct symlink and setting them to the correct mode, owner, group, + * and timestamps. + * + * Note: we also need to handle hardlinks to symlinks (barf) as well as + * hardlinks whose target is replaced by a later entry in the archive (barf^2). + * + * So we track things by dev+ino of the placeholder file, associating with + * that the value and mode of the final symlink and a list of paths that + * should all be hardlinks of that. We'll 'store' the symlink's desired + * timestamps, owner, and group by setting them on the placeholder file. + * + * The operations are: + * a) create an escaping symlink: create the placeholder file and add an entry + * for the new link + * b) create a hardlink: do the link. If the target turns out to be a + * zero-length file whose dev+ino are in the symlink table, then add this + * path to the list of names for that link + * c) perform deferred processing: for each entry, check each associated path: + * if it's a zero-length file with the correct dev+ino then recreate it as + * the specified symlink or hardlink to the first such + */ + +struct slpath { + char *sp_path; + struct slpath *sp_next; +}; +struct slinode { + ino_t sli_ino; + char *sli_value; + struct slpath sli_paths; + struct slinode *sli_fow; /* hash table chain */ + dev_t sli_dev; + mode_t sli_mode; +}; + +static struct slinode **slitab = NULL; + +/* + * sltab_start() + * create the hash table + * Return: + * 0 if the table and file was created ok, -1 otherwise + */ + +int +sltab_start(void) +{ + + if ((slitab = calloc(SL_TAB_SZ, sizeof *slitab)) == NULL) { + syswarn(1, errno, "symlink table"); + return(-1); + } + + return(0); +} + +/* + * sltab_add_sym() + * Create the placeholder and tracking info for an escaping symlink. + * Return: + * 0 on success, -1 otherwise + */ + +int +sltab_add_sym(const char *path0, const char *value0, mode_t mode) +{ + struct stat sb; + struct slinode *s; + struct slpath *p; + char *path, *value; + u_int indx; + int fd; + + /* create the placeholder */ + fd = open(path0, O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0600); + if (fd == -1) + return (-1); + if (fstat(fd, &sb) == -1) { + unlink(path0); + close(fd); + return (-1); + } + close(fd); + + if (havechd && *path0 != '/') { + if ((path = realpath(path0, NULL)) == NULL) { + syswarn(1, errno, "Cannot canonicalize %s", path0); + unlink(path0); + return (-1); + } + } else if ((path = strdup(path0)) == NULL) { + syswarn(1, errno, "defered symlink path"); + unlink(path0); + return (-1); + } + if ((value = strdup(value0)) == NULL) { + syswarn(1, errno, "defered symlink value"); + unlink(path); + free(path); + return (-1); + } + + /* now check the hash table for conflicting entry */ + indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ; + for (s = slitab[indx]; s != NULL; s = s->sli_fow) { + if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev) + continue; + + /* + * One of our placeholders got removed behind our back and + * we've reused the inode. Weird, but clean up the mess. + */ + free(s->sli_value); + free(s->sli_paths.sp_path); + p = s->sli_paths.sp_next; + while (p != NULL) { + struct slpath *next_p = p->sp_next; + + free(p->sp_path); + free(p); + p = next_p; + } + goto set_value; + } + + /* Normal case: create a new node */ + if ((s = malloc(sizeof *s)) == NULL) { + syswarn(1, errno, "defered symlink"); + unlink(path); + free(path); + free(value); + return (-1); + } + s->sli_ino = sb.st_ino; + s->sli_dev = sb.st_dev; + s->sli_fow = slitab[indx]; + slitab[indx] = s; + +set_value: + s->sli_paths.sp_path = path; + s->sli_paths.sp_next = NULL; + s->sli_value = value; + s->sli_mode = mode; + return (0); +} + +/* + * sltab_add_link() + * A hardlink was created; if it looks like a placeholder, handle the + * tracking. + * Return: + * 0 if things are ok, -1 if something went wrong + */ + +int +sltab_add_link(const char *path, const struct stat *sb) +{ + struct slinode *s; + struct slpath *p; + u_int indx; + + if (!S_ISREG(sb->st_mode) || sb->st_size != 0) + return (1); + + /* find the hash table entry for this hardlink */ + indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ; + for (s = slitab[indx]; s != NULL; s = s->sli_fow) { + if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev) + continue; + + if ((p = malloc(sizeof *p)) == NULL) { + syswarn(1, errno, "deferred symlink hardlink"); + return (-1); + } + if (havechd && *path != '/') { + if ((p->sp_path = realpath(path, NULL)) == NULL) { + syswarn(1, errno, "Cannot canonicalize %s", + path); + free(p); + return (-1); + } + } else if ((p->sp_path = strdup(path)) == NULL) { + syswarn(1, errno, "defered symlink hardlink path"); + free(p); + return (-1); + } + + /* link it in */ + p->sp_next = s->sli_paths.sp_next; + s->sli_paths.sp_next = p; + return (0); + } + + /* not found */ + return (1); +} + + +static int +sltab_process_one(struct slinode *s, struct slpath *p, const char *first, + int in_sig) +{ + struct stat sb; + char *path = p->sp_path; + mode_t mode; + int err; + + /* + * is it the expected placeholder? This can fail legimately + * if the archive overwrote the link with another, later entry, + * so don't warn. + */ + if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode) || sb.st_size != 0 || + sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev) + return (0); + + if (unlink(path) && errno != ENOENT) { + if (!in_sig) + syswarn(1, errno, "deferred symlink removal"); + return (0); + } + + err = 0; + if (first != NULL) { + /* add another hardlink to the existing symlink */ + if (linkat(AT_FDCWD, first, AT_FDCWD, path, 0) == 0) + return (0); + + /* + * Couldn't hardlink the symlink for some reason, so we'll + * try creating it as its own symlink, but save the error + * for reporting if that fails. + */ + err = errno; + } + + if (symlink(s->sli_value, path)) { + if (!in_sig) { + const char *qualifier = ""; + if (err) + qualifier = " hardlink"; + else + err = errno; + + syswarn(1, err, "deferred symlink%s: %s", + qualifier, path); + } + return (0); + } + + /* success, so set the id, mode, and times */ + mode = s->sli_mode; + if (pids) { + /* if can't set the ids, force the set[ug]id bits off */ + if (set_ids(path, sb.st_uid, sb.st_gid)) + mode &= ~(SETBITS); + } + + if (pmode) + set_pmode(path, mode); + + if (patime || pmtime) + set_ftime(path, &sb.st_mtim, &sb.st_atim, 0); + + /* + * If we tried to link to first but failed, then this new symlink + * might be a better one to try in the future. Guess from the errno. + */ + if (err == 0 || err == ENOENT || err == EMLINK || err == EOPNOTSUPP) + return (1); + return (0); +} + +/* + * sltab_process() + * Do all the delayed process for escape symlinks + */ + +void +sltab_process(int in_sig) +{ + struct slinode *s; + struct slpath *p; + char *first; + u_int indx; + + if (slitab == NULL) + return; + + /* walk across the entire hash table */ + for (indx = 0; indx < SL_TAB_SZ; indx++) { + while ((s = slitab[indx]) != NULL) { + /* pop this entry */ + slitab[indx] = s->sli_fow; + + first = NULL; + p = &s->sli_paths; + while (1) { + struct slpath *next_p; + + if (sltab_process_one(s, p, first, in_sig)) { + if (!in_sig) + free(first); + first = p->sp_path; + } else if (!in_sig) + free(p->sp_path); + + if ((next_p = p->sp_next) == NULL) + break; + *p = *next_p; + if (!in_sig) + free(next_p); + } + if (!in_sig) { + free(first); + free(s->sli_value); + free(s); + } + } + } + if (!in_sig) + free(slitab); + slitab = NULL; +} + + +/* + * Interactive rename table routines + * + * The interactive rename table keeps track of the new names that the user + * assigns to files from tty input. Since this map is unique for each file + * we must store it in case there is a reference to the file later in archive + * (a link). Otherwise we will be unable to find the file we know was + * extracted. The remapping of these files is stored in a memory based hash + * table (it is assumed since input must come from /dev/tty, it is unlikely to + * be a very large table). + */ + +/* + * name_start() + * create the interactive rename table + * Return: + * 0 if successful, -1 otherwise + */ + +int +name_start(void) +{ + if (ntab != NULL) + return(0); + if ((ntab = calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { + paxwarn(1, "Cannot allocate memory for interactive rename table"); + return(-1); + } + return(0); +} + +/* + * add_name() + * add the new name to old name mapping just created by the user. + * If an old name mapping is found (there may be duplicate names on an + * archive) only the most recent is kept. + * Return: + * 0 if added, -1 otherwise + */ + +int +add_name(char *oname, int onamelen, char *nname) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) { + /* + * should never happen + */ + paxwarn(0, "No interactive rename table, links may fail"); + return(0); + } + + /* + * look to see if we have already mapped this file, if so we + * will update it + */ + indx = st_hash(oname, onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) != NULL) { + /* + * look down the has chain for the file + */ + while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) + pt = pt->fow; + + if (pt != NULL) { + /* + * found an old mapping, replace it with the new one + * the user just input (if it is different) + */ + if (strcmp(nname, pt->nname) == 0) + return(0); + + free(pt->nname); + if ((pt->nname = strdup(nname)) == NULL) { + paxwarn(1, "Cannot update rename table"); + return(-1); + } + return(0); + } + } + + /* + * this is a new mapping, add it to the table + */ + if ((pt = malloc(sizeof(NAMT))) != NULL) { + if ((pt->oname = strdup(oname)) != NULL) { + if ((pt->nname = strdup(nname)) != NULL) { + pt->fow = ntab[indx]; + ntab[indx] = pt; + return(0); + } + free(pt->oname); + } + free(pt); + } + paxwarn(1, "Interactive rename table out of memory"); + return(-1); +} + +/* + * sub_name() + * look up a link name to see if it points at a file that has been + * remapped by the user. If found, the link is adjusted to contain the + * new name (oname is the link to name) + */ + +void +sub_name(char *oname, int *onamelen, int onamesize) +{ + NAMT *pt; + u_int indx; + + if (ntab == NULL) + return; + /* + * look the name up in the hash table + */ + indx = st_hash(oname, *onamelen, N_TAB_SZ); + if ((pt = ntab[indx]) == NULL) + return; + + while (pt != NULL) { + /* + * walk down the hash chain looking for a match + */ + if (strcmp(oname, pt->oname) == 0) { + /* + * found it, replace it with the new name + * and return (we know that oname has enough space) + */ + *onamelen = strlcpy(oname, pt->nname, onamesize); + if (*onamelen >= onamesize) + *onamelen = onamesize - 1; /* XXX truncate? */ + return; + } + pt = pt->fow; + } + + /* + * no match, just return + */ +} + +#ifndef NOCPIO +/* + * device/inode mapping table routines + * (used with formats that store device and inodes fields) + * + * device/inode mapping tables remap the device field in a archive header. The + * device/inode fields are used to determine when files are hard links to each + * other. However these values have very little meaning outside of that. This + * database is used to solve one of two different problems. + * + * 1) when files are appended to an archive, while the new files may have hard + * links to each other, you cannot determine if they have hard links to any + * file already stored on the archive from a prior run of pax. We must assume + * that these inode/device pairs are unique only within a SINGLE run of pax + * (which adds a set of files to an archive). So we have to make sure the + * inode/dev pairs we add each time are always unique. We do this by observing + * while the inode field is very dense, the use of the dev field is fairly + * sparse. Within each run of pax, we remap any device number of a new archive + * member that has a device number used in a prior run and already stored in a + * file on the archive. During the read phase of the append, we store the + * device numbers used and mark them to not be used by any file during the + * write phase. If during write we go to use one of those old device numbers, + * we remap it to a new value. + * + * 2) Often the fields in the archive header used to store these values are + * too small to store the entire value. The result is an inode or device value + * which can be truncated. This really can foul up an archive. With truncation + * we end up creating links between files that are really not links (after + * truncation the inodes are the same value). We address that by detecting + * truncation and forcing a remap of the device field to split truncated + * inodes away from each other. Each truncation creates a pattern of bits that + * are removed. We use this pattern of truncated bits to partition the inodes + * on a single device to many different devices (each one represented by the + * truncated bit pattern). All inodes on the same device that have the same + * truncation pattern are mapped to the same new device. Two inodes that + * truncate to the same value clearly will always have different truncation + * bit patterns, so they will be split from away each other. When we spot + * device truncation we remap the device number to a non truncated value. + * (for more info see table.h for the data structures involved). + */ + +static DEVT *chk_dev(dev_t, int); + +/* + * dev_start() + * create the device mapping table + * Return: + * 0 if successful, -1 otherwise + */ + +int +dev_start(void) +{ + if (dtab != NULL) + return(0); + if ((dtab = calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { + paxwarn(1, "Cannot allocate memory for device mapping table"); + return(-1); + } + return(0); +} + +/* + * add_dev() + * add a device number to the table. this will force the device to be + * remapped to a new value if it be used during a write phase. This + * function is called during the read phase of an append to prohibit the + * use of any device number already in the archive. + * Return: + * 0 if added ok, -1 otherwise + */ + +int +add_dev(ARCHD *arcn) +{ + if (chk_dev(arcn->sb.st_dev, 1) == NULL) + return(-1); + return(0); +} + +/* + * chk_dev() + * check for a device value in the device table. If not found and the add + * flag is set, it is added. This does NOT assign any mapping values, just + * adds the device number as one that need to be remapped. If this device + * is already mapped, just return with a pointer to that entry. + * Return: + * pointer to the entry for this device in the device map table. Null + * if the add flag is not set and the device is not in the table (it is + * not been seen yet). If add is set and the device cannot be added, null + * is returned (indicates an error). + */ + +static DEVT * +chk_dev(dev_t dev, int add) +{ + DEVT *pt; + u_int indx; + + if (dtab == NULL) + return(NULL); + /* + * look to see if this device is already in the table + */ + indx = ((unsigned)dev) % D_TAB_SZ; + if ((pt = dtab[indx]) != NULL) { + while ((pt != NULL) && (pt->dev != dev)) + pt = pt->fow; + + /* + * found it, return a pointer to it + */ + if (pt != NULL) + return(pt); + } + + /* + * not in table, we add it only if told to as this may just be a check + * to see if a device number is being used. + */ + if (add == 0) + return(NULL); + + /* + * allocate a node for this device and add it to the front of the hash + * chain. Note we do not assign remaps values here, so the pt->list + * list must be NULL. + */ + if ((pt = malloc(sizeof(DEVT))) == NULL) { + paxwarn(1, "Device map table out of memory"); + return(NULL); + } + pt->dev = dev; + pt->list = NULL; + pt->fow = dtab[indx]; + dtab[indx] = pt; + return(pt); +} +/* + * map_dev() + * given an inode and device storage mask (the mask has a 1 for each bit + * the archive format is able to store in a header), we check for inode + * and device truncation and remap the device as required. Device mapping + * can also occur when during the read phase of append a device number was + * seen (and was marked as do not use during the write phase). WE ASSUME + * that unsigned longs are the same size or bigger than the fields used + * for ino_t and dev_t. If not the types will have to be changed. + * Return: + * 0 if all ok, -1 otherwise. + */ + +int +map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) +{ + DEVT *pt; + DLIST *dpt; + static dev_t lastdev = 0; /* next device number to try */ + int trc_ino = 0; + int trc_dev = 0; + ino_t trunc_bits = 0; + ino_t nino; + + if (dtab == NULL) + return(0); + /* + * check for device and inode truncation, and extract the truncated + * bit pattern. + */ + if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) + ++trc_dev; + if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { + ++trc_ino; + trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); + } + + /* + * see if this device is already being mapped, look up the device + * then find the truncation bit pattern which applies + */ + if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { + /* + * this device is already marked to be remapped + */ + for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) + if (dpt->trunc_bits == trunc_bits) + break; + + if (dpt != NULL) { + /* + * we are being remapped for this device and pattern + * change the device number to be stored and return + */ + arcn->sb.st_dev = dpt->dev; + arcn->sb.st_ino = nino; + return(0); + } + } else { + /* + * this device is not being remapped YET. if we do not have any + * form of truncation, we do not need a remap + */ + if (!trc_ino && !trc_dev) + return(0); + + /* + * we have truncation, have to add this as a device to remap + */ + if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) + goto bad; + + /* + * if we just have a truncated inode, we have to make sure that + * all future inodes that do not truncate (they have the + * truncation pattern of all 0's) continue to map to the same + * device number. We probably have already written inodes with + * this device number to the archive with the truncation + * pattern of all 0's. So we add the mapping for all 0's to the + * same device number. + */ + if (!trc_dev && (trunc_bits != 0)) { + if ((dpt = malloc(sizeof(DLIST))) == NULL) + goto bad; + dpt->trunc_bits = 0; + dpt->dev = arcn->sb.st_dev; + dpt->fow = pt->list; + pt->list = dpt; + } + } + + /* + * look for a device number not being used. We must watch for wrap + * around on lastdev (so we do not get stuck looking forever!) + */ + while (++lastdev > 0) { + if (chk_dev(lastdev, 0) != NULL) + continue; + /* + * found an unused value. If we have reached truncation point + * for this format we are hosed, so we give up. Otherwise we + * mark it as being used. + */ + if (((lastdev & ((dev_t)dev_mask)) != lastdev) || + (chk_dev(lastdev, 1) == NULL)) + goto bad; + break; + } + + if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL)) + goto bad; + + /* + * got a new device number, store it under this truncation pattern. + * change the device number this file is being stored with. + */ + dpt->trunc_bits = trunc_bits; + dpt->dev = lastdev; + dpt->fow = pt->list; + pt->list = dpt; + arcn->sb.st_dev = lastdev; + arcn->sb.st_ino = nino; + return(0); + + bad: + paxwarn(1, "Unable to fix truncated inode/device field when storing %s", + arcn->name); + paxwarn(0, "Archive may create improper hard links when extracted"); + return(0); +} +#endif /* NOCPIO */ + +/* + * directory access/mod time reset table routines (for directories READ by pax) + * + * The pax -t flag requires that access times of archive files be the same + * before being read by pax. For regular files, access time is restored after + * the file has been copied. This database provides the same functionality for + * directories read during file tree traversal. Restoring directory access time + * is more complex than files since directories may be read several times until + * all the descendants in their subtree are visited by fts. Directory access + * and modification times are stored during the fts pre-order visit (done + * before any descendants in the subtree are visited) and restored after the + * fts post-order visit (after all the descendants have been visited). In the + * case of premature exit from a subtree (like from the effects of -n), any + * directory entries left in this database are reset during final cleanup + * operations of pax. Entries are hashed by inode number for fast lookup. + */ + +/* + * atdir_start() + * create the directory access time database for directories READ by pax. + * Return: + * 0 is created ok, -1 otherwise. + */ + +int +atdir_start(void) +{ + if (atab != NULL) + return(0); + if ((atab = calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { + paxwarn(1,"Cannot allocate space for directory access time table"); + return(-1); + } + return(0); +} + + +/* + * atdir_end() + * walk through the directory access time table and reset the access time + * of any directory who still has an entry left in the database. These + * entries are for directories READ by pax + */ + +void +atdir_end(void) +{ + ATDIR *pt; + int i; + + if (atab == NULL) + return; + /* + * for each non-empty hash table entry reset all the directories + * chained there. + */ + for (i = 0; i < A_TAB_SZ; ++i) { + if ((pt = atab[i]) == NULL) + continue; + /* + * remember to force the times, set_ftime() looks at pmtime + * and patime, which only applies to things CREATED by pax, + * not read by pax. Read time reset is controlled by -t. + */ + for (; pt != NULL; pt = pt->fow) + set_attr(&pt->ft, 1, 0, 0, 0); + } +} + +/* + * add_atdir() + * add a directory to the directory access time table. Table is hashed + * and chained by inode number. This is for directories READ by pax + */ + +void +add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp, + const struct timespec *atimp) +{ + ATDIR *pt; + sigset_t allsigs, savedsigs; + u_int indx; + + if (atab == NULL) + return; + + /* + * make sure this directory is not already in the table, if so just + * return (the older entry always has the correct time). The only + * way this will happen is when the same subtree can be traversed by + * different args to pax and the -n option is aborting fts out of a + * subtree before all the post-order visits have been made. + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) != NULL) { + while (pt != NULL) { + if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) + break; + pt = pt->fow; + } + + /* + * oops, already there. Leave it alone. + */ + if (pt != NULL) + return; + } + + /* + * add it to the front of the hash chain + */ + sigfillset(&allsigs); + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + if ((pt = malloc(sizeof *pt)) != NULL) { + if ((pt->ft.ft_name = strdup(fname)) != NULL) { + pt->ft.ft_dev = dev; + pt->ft.ft_ino = ino; + pt->ft.ft_mtim = *mtimp; + pt->ft.ft_atim = *atimp; + pt->fow = atab[indx]; + atab[indx] = pt; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + return; + } + free(pt); + } + + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + paxwarn(1, "Directory access time reset table ran out of memory"); +} + +/* + * get_atdir() + * look up a directory by inode and device number to obtain the access + * and modification time you want to set to. If found, the modification + * and access time parameters are set and the entry is removed from the + * table (as it is no longer needed). These are for directories READ by + * pax + * Return: + * 0 if found, -1 if not found. + */ + +int +do_atdir(const char *name, dev_t dev, ino_t ino) +{ + ATDIR *pt; + ATDIR **ppt; + sigset_t allsigs, savedsigs; + u_int indx; + + if (atab == NULL) + return(-1); + /* + * hash by inode and search the chain for an inode and device match + */ + indx = ((unsigned)ino) % A_TAB_SZ; + if ((pt = atab[indx]) == NULL) + return(-1); + + ppt = &(atab[indx]); + while (pt != NULL) { + if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) + break; + /* + * no match, go to next one + */ + ppt = &(pt->fow); + pt = pt->fow; + } + + /* + * return if we did not find it. + */ + if (pt == NULL || pt->ft.ft_name == NULL || + strcmp(name, pt->ft.ft_name) == 0) + return(-1); + + /* + * found it. set the times and remove the entry from the table. + */ + set_attr(&pt->ft, 1, 0, 0, 0); + sigfillset(&allsigs); + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + *ppt = pt->fow; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + free(pt->ft.ft_name); + free(pt); + return(0); +} + +/* + * directory access mode and time storage routines (for directories CREATED + * by pax). + * + * Pax requires that extracted directories, by default, have their access/mod + * times and permissions set to the values specified in the archive. During the + * actions of extracting (and creating the destination subtree during -rw copy) + * directories extracted may be modified after being created. Even worse is + * that these directories may have been created with file permissions which + * prohibits any descendants of these directories from being extracted. When + * directories are created by pax, access rights may be added to permit the + * creation of files in their subtree. Every time pax creates a directory, the + * times and file permissions specified by the archive are stored. After all + * files have been extracted (or copied), these directories have their times + * and file modes reset to the stored values. The directory info is restored in + * reverse order as entries were added from root to leaf: to restore atime + * properly, we must go backwards. + */ + +/* + * dir_start() + * set up the directory time and file mode storage for directories CREATED + * by pax. + * Return: + * 0 if ok, -1 otherwise + */ + +int +dir_start(void) +{ + if (dirp != NULL) + return(0); + + dirsize = DIRP_SIZE; + if ((dirp = reallocarray(NULL, dirsize, sizeof(DIRDATA))) == NULL) { + paxwarn(1, "Unable to allocate memory for directory times"); + return(-1); + } + return(0); +} + +/* + * add_dir() + * add the mode and times for a newly CREATED directory + * name is name of the directory, psb the stat buffer with the data in it, + * frc_mode is a flag that says whether to force the setting of the mode + * (ignoring the user set values for preserving file mode). Frc_mode is + * for the case where we created a file and found that the resulting + * directory was not writeable and the user asked for file modes to NOT + * be preserved. (we have to preserve what was created by default, so we + * have to force the setting at the end. this is stated explicitly in the + * pax spec) + */ + +void +add_dir(char *name, struct stat *psb, int frc_mode) +{ + DIRDATA *dblk; + sigset_t allsigs, savedsigs; + char realname[PATH_MAX], *rp; + + if (dirp == NULL) + return; + + if (havechd && *name != '/') { + if ((rp = realpath(name, realname)) == NULL) { + paxwarn(1, "Cannot canonicalize %s", name); + return; + } + name = rp; + } + if (dircnt == dirsize) { + dblk = reallocarray(dirp, dirsize * 2, sizeof(DIRDATA)); + if (dblk == NULL) { + paxwarn(1, "Unable to store mode and times for created" + " directory: %s", name); + return; + } + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + dirp = dblk; + dirsize *= 2; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); + } + dblk = &dirp[dircnt]; + if ((dblk->ft.ft_name = strdup(name)) == NULL) { + paxwarn(1, "Unable to store mode and times for created" + " directory: %s", name); + return; + } + dblk->ft.ft_mtim = psb->st_mtim; + dblk->ft.ft_atim = psb->st_atim; + dblk->ft.ft_ino = psb->st_ino; + dblk->ft.ft_dev = psb->st_dev; + dblk->mode = psb->st_mode & ABITS; + dblk->frc_mode = frc_mode; + sigprocmask(SIG_BLOCK, &allsigs, &savedsigs); + ++dircnt; + sigprocmask(SIG_SETMASK, &savedsigs, NULL); +} + +/* + * delete_dir() + * When we rmdir a directory, we may want to make sure we don't + * later warn about being unable to set its mode and times. + */ + +void +delete_dir(dev_t dev, ino_t ino) +{ + DIRDATA *dblk; + char *name; + size_t i; + + if (dirp == NULL) + return; + for (i = 0; i < dircnt; i++) { + dblk = &dirp[i]; + + if (dblk->ft.ft_name == NULL) + continue; + if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) { + name = dblk->ft.ft_name; + dblk->ft.ft_name = NULL; + free(name); + break; + } + } +} + +/* + * proc_dir(int in_sig) + * process all file modes and times stored for directories CREATED + * by pax. If in_sig is set, we're in a signal handler and can't + * free stuff. + */ + +void +proc_dir(int in_sig) +{ + DIRDATA *dblk; + size_t cnt; + + if (dirp == NULL) + return; + /* + * read backwards through the file and process each directory + */ + cnt = dircnt; + while (cnt-- > 0) { + dblk = &dirp[cnt]; + /* + * If we remove a directory we created, we replace the + * ft_name with NULL. Ignore those. + */ + if (dblk->ft.ft_name == NULL) + continue; + + /* + * frc_mode set, make sure we set the file modes even if + * the user didn't ask for it (see file_subs.c for more info) + */ + set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode, + in_sig); + if (!in_sig) + free(dblk->ft.ft_name); + } + + if (!in_sig) + free(dirp); + dirp = NULL; + dircnt = 0; +} + +/* + * database independent routines + */ + +/* + * st_hash() + * hashes filenames to a u_int for hashing into a table. Looks at the tail + * end of file, as this provides far better distribution than any other + * part of the name. For performance reasons we only care about the last + * MAXKEYLEN chars (should be at LEAST large enough to pick off the file + * name). Was tested on 500,000 name file tree traversal from the root + * and gave almost a perfectly uniform distribution of keys when used with + * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) + * chars at a time and pads with 0 for last addition. + * Return: + * the hash value of the string MOD (%) the table size. + */ + +u_int +st_hash(const char *name, int len, int tabsz) +{ + const char *pt; + char *dest; + const char *end; + int i; + u_int key = 0; + int steps; + int res; + u_int val; + + /* + * only look at the tail up to MAXKEYLEN, we do not need to waste + * time here (remember these are pathnames, the tail is what will + * spread out the keys) + */ + if (len > MAXKEYLEN) { + pt = &(name[len - MAXKEYLEN]); + len = MAXKEYLEN; + } else + pt = name; + + /* + * calculate the number of u_int size steps in the string and if + * there is a runt to deal with + */ + steps = len/sizeof(u_int); + res = len % sizeof(u_int); + + /* + * add up the value of the string in unsigned integer sized pieces + * too bad we cannot have unsigned int aligned strings, then we + * could avoid the expensive copy. + */ + for (i = 0; i < steps; ++i) { + end = pt + sizeof(u_int); + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * add in the runt padded with zero to the right + */ + if (res) { + val = 0; + end = pt + res; + dest = (char *)&val; + while (pt < end) + *dest++ = *pt++; + key += val; + } + + /* + * return the result mod the table size + */ + return(key % tabsz); +} diff --git a/bin/pax/tar.1 b/bin/pax/tar.1 new file mode 100644 index 0000000..bbdef11 --- /dev/null +++ b/bin/pax/tar.1 @@ -0,0 +1,410 @@ +.\" $OpenBSD: tar.1,v 1.62 2020/01/16 16:46:46 schwarze Exp $ +.\" +.\" Copyright (c) 1996 SigmaSoft, Th. Lockert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd $Mdocdate: January 16 2020 $ +.Dt TAR 1 +.Os +.Sh NAME +.Nm tar +.Nd tape archiver +.Sh SYNOPSIS +.Nm tar +.Sm off +.No { Cm crtux No } Op Cm 014578befHhjLmNOoPpqsvwXZz +.Sm on +.Bk -words +.Op Ar blocking-factor | archive | replstr +.Op Fl C Ar directory +.Op Fl I Ar file +.Op Ar +.Ek +.Nm tar +.No { Ns Fl crtux Ns } +.Bk -words +.Op Fl 014578eHhjLmNOoPpqvwXZz +.Op Fl b Ar blocking-factor +.Op Fl C Ar directory +.Op Fl f Ar archive +.Op Fl I Ar file +.Op Fl s Ar replstr +.Op Ar +.Ek +.Sh DESCRIPTION +The +.Nm +command creates, adds files to, or extracts files from an +archive file in +.Dq tar +format. +A tar archive is often stored on a magnetic tape, but can be +stored equally well on a floppy, CD-ROM, or in a regular disk file. +.Pp +In the first (legacy) form, all option flags except for +.Fl C +and +.Fl I +must be contained within the first argument to +.Nm +and must not be prefixed by a hyphen +.Pq Sq - . +Option arguments, if any, are processed as subsequent arguments to +.Nm +and are processed in the order in which their corresponding option +flags have been presented on the command line. +.Pp +In the second and preferred form, option flags may be given in any order +and are immediately followed by their corresponding option argument +values. +.Pp +One of the following flags must be present: +.Bl -tag -width Ds +.It Fl c +Create new archive, or overwrite an existing archive, +adding the specified files to it. +.It Fl r +Append the named new files to existing archive. +Note that this will only work on media on which an end-of-file mark +can be overwritten. +.It Fl t +List contents of archive. +If any files are named on the +command line, only those files will be listed. +The +.Ar file +arguments may be specified as glob patterns (see +.Xr glob 7 +for more information), in which case +.Nm +will list all archive members that match each pattern. +.It Fl u +Alias for +.Fl r . +.It Fl x +Extract files from archive. +If any files are named on the +command line, only those files will be extracted from the +archive. +The +.Ar file +arguments may be specified as glob patterns (see +.Xr glob 7 +for more information), in which case +.Nm +will extract all archive members that match each pattern. +.Pp +If more than one copy of a file exists in the +archive, later copies will overwrite earlier copies during +extraction. +The file mode and modification time are preserved +if possible. +The file mode is subject to modification by the +.Xr umask 2 . +.El +.Pp +In addition to the flags mentioned above, any of the following +flags may be used: +.Bl -tag -width Ds +.It Fl b Ar blocking-factor +Set blocking factor to use for the archive. +.Nm +uses 512-byte blocks. +The default is 20, the maximum is 126. +Archives with a blocking factor larger than 63 +violate the POSIX standard and will not be portable to all systems. +.It Fl C Ar directory +This is a positional argument which sets the working directory for the +following files. +When extracting, files will be extracted into +the specified directory; when creating, the specified files will be matched +from the directory. +.It Fl e +Stop after the first error. +.It Fl f Ar archive +Filename where the archive is stored. +Defaults to +.Pa /dev/rst0 . +If set to hyphen +.Pq Sq - +standard output is used. +See also the +.Ev TAPE +environment variable. +.It Fl H +Follow symlinks given on the command line only. +.It Fl h +Follow symbolic links as if they were normal files +or directories. +In extract mode this means that a directory entry in the archive +will not overwrite an existing symbolic link, but rather what the +link ultimately points to. +.It Fl I Ar file +This is a positional argument which reads the names of files to +archive or extract from the given file, one per line. +.It Fl j +Compress archive using bzip2. +The bzip2 utility must be installed separately. +.It Fl L +Synonym for the +.Fl h +option. +.It Fl m +Do not preserve modification time. +.It Fl N +Use only the numeric UID and GID values when creating or extracting an +archive. +.It Fl O +Write old-style (non-POSIX) archives. +.It Fl o +Don't write directory information that the older (V7) style +.Nm +is unable to decode. +This implies the +.Fl O +flag. +.It Fl P +Do not strip leading slashes +.Pq Sq / +from pathnames. +The default is to strip leading slashes. +.It Fl p +Preserve user and group ID as well as file mode regardless of +the current +.Xr umask 2 . +The setuid and setgid bits are only preserved if the user and group ID +could be preserved. +Only meaningful in conjunction with the +.Fl x +flag. +.It Fl q +Select the first archive member that matches each +.Ar file +operand. +No more than one archive member is matched for each +.Ar file . +When members of type directory are matched, the file hierarchy rooted at that +directory is also matched. +.It Fl s Ar replstr +Modify the archive member names according to the substitution expression +.Ar replstr , +using the syntax of the +.Xr ed 1 +utility regular expressions. +.Ar file +arguments may be given to restrict the list of archive members to those +specified. +.Pp +The format of these regular expressions is +.Pp +.Dl /old/new/[gp] +.Pp +As in +.Xr ed 1 , +.Va old +is a basic regular expression (see +.Xr re_format 7 ) +and +.Va new +can contain an ampersand +.Pq Ql & , +.Ql \e Ns Em n +(where +.Em n +is a digit) back-references, +or subexpression matching. +The +.Va old +string may also contain newline characters. +Any non-null character can be used as a delimiter +.Po +.Ql / +is shown here +.Pc . +Multiple +.Fl s +expressions can be specified. +The expressions are applied in the order they are specified on the +command line, terminating with the first successful substitution. +.Pp +The optional trailing +.Cm g +continues to apply the substitution expression to the pathname substring, +which starts with the first character following the end of the last successful +substitution. +The first unsuccessful substitution stops the operation of the +.Cm g +option. +The optional trailing +.Cm p +will cause the final result of a successful substitution to be written to +standard error in the following format: +.Pp +.D1 Em original-pathname No >> Em new-pathname +.Pp +File or archive member names that substitute to the empty string +are not selected and will be skipped. +.It Fl v +Verbose operation mode. +If +.Fl v +is specified multiple times or if the +.Fl t +option is also specified, +.Nm +will use a long format for listing files, similar to +.Xr ls 1 +.Fl l . +.It Fl w +Interactively rename files. +This option causes +.Nm +to prompt the user for the filename to use when storing or +extracting files in an archive. +.It Fl X +Do not cross mount points in the file system. +.It Fl Z +Compress archive using +.Xr compress 1 . +.It Fl z +Compress archive using +.Xr gzip 1 . +.El +.Pp +The options +.Op Fl 014578 +can be used to select one of the compiled-in backup devices, +.Pa /dev/rstN . +.Sh ENVIRONMENT +.Bl -tag -width Ds +.It Ev TMPDIR +Path in which to store temporary files. +.It Ev TAPE +Default tape device to use instead of +.Pa /dev/rst0 . +If set to hyphen +.Pq Sq - +standard output is used. +.El +.Sh FILES +.Bl -tag -width "/dev/rst0" +.It Pa /dev/rst0 +default archive name +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +All files were processed successfully. +.It 1 +An error occurred. +.El +.Sh EXAMPLES +Create an archive on the default tape drive, containing the files named +.Pa bonvole +and +.Pa sekve : +.Pp +.Dl $ tar c bonvole sekve +.Pp +Output a +.Xr gzip 1 +compressed archive containing the files +.Pa bonvole +and +.Pa sekve +to a file called +.Pa foriru.tar.gz : +.Pp +.Dl $ tar zcf foriru.tar.gz bonvole sekve +.Pp +Verbosely create an archive, called +.Pa backup.tar.gz , +of all files matching the shell +.Xr glob 7 +function +.Pa *.c : +.Pp +.Dl $ tar zcvf backup.tar.gz *.c +.Pp +Verbosely list, but do not extract, all files ending in +.Pa .jpeg +from a compressed archive named +.Pa backup.tar.gz . +Note that the glob pattern has been quoted to avoid expansion by the shell: +.Pp +.Dl $ tar tvzf backup.tar.gz '*.jpeg' +.Pp +For more detailed examples, see +.Xr pax 1 . +.Sh DIAGNOSTICS +Whenever +.Nm +cannot create a file or a link when extracting an archive or cannot +find a file while writing an archive, or cannot preserve the user +ID, group ID, file mode, or access and modification times when the +.Fl p +option is specified, a diagnostic message is written to standard +error and a non-zero exit value will be returned, but processing +will continue. +In the case where +.Nm +cannot create a link to a file, +.Nm +will not create a second copy of the file. +.Pp +If the extraction of a file from an archive is prematurely terminated +by a signal or error, +.Nm +may have only partially extracted the file the user wanted. +Additionally, the file modes of extracted files and directories may +have incorrect file bits, and the modification and access times may +be wrong. +.Pp +If the creation of an archive is prematurely terminated by a signal +or error, +.Nm +may have only partially created the archive, which may violate the +specific archive format specification. +.Sh SEE ALSO +.Xr cpio 1 , +.Xr pax 1 +.Sh HISTORY +A +.Nm +command first appeared in +.At v7 . +.Sh AUTHORS +.An Keith Muller +at the University of California, San Diego. +.Sh CAVEATS +The +.Fl j +and +.Fl L +flags are not portable to other versions of +.Nm +where they may have a different meaning. diff --git a/bin/pax/tar.c b/bin/pax/tar.c new file mode 100644 index 0000000..c62705b --- /dev/null +++ b/bin/pax/tar.c @@ -0,0 +1,1284 @@ +/* $OpenBSD: tar.c,v 1.68 2019/06/24 03:33:09 deraadt Exp $ */ +/* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <ctype.h> +#include <errno.h> +#include <grp.h> +#include <limits.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifndef major +#include <sys/sysmacros.h> +#endif + +#include "pax.h" +#include "extern.h" +#include "tar.h" + +/* + * Routines for reading, writing and header identify of various versions of tar + */ + +static size_t expandname(char *, size_t, char **, const char *, size_t); +static u_long tar_chksm(char *, int); +static char *name_split(char *, int); +static int ul_oct(u_long, char *, int, int); +static int ull_oct(unsigned long long, char *, int, int); +#ifndef SMALL +static int rd_xheader(ARCHD *arcn, int, off_t); +#endif + +static uid_t uid_nobody; +static uid_t uid_warn; +static gid_t gid_nobody; +static gid_t gid_warn; + +/* + * Routines common to all versions of tar + */ + +int tar_nodir; /* do not write dirs under old tar */ +char *gnu_name_string; /* GNU ././@LongLink hackery name */ +char *gnu_link_string; /* GNU ././@LongLink hackery link */ + +/* + * tar_endwr() + * add the tar trailer of two null blocks + * Return: + * 0 if ok, -1 otherwise (what wr_skip returns) + */ + +int +tar_endwr(void) +{ + return wr_skip(NULLCNT * BLKMULT); +} + +/* + * tar_endrd() + * no cleanup needed here, just return size of trailer (for append) + * Return: + * size of trailer (2 * BLKMULT) + */ + +off_t +tar_endrd(void) +{ + return NULLCNT * BLKMULT; +} + +/* + * tar_trail() + * Called to determine if a header block is a valid trailer. We are passed + * the block, the in_sync flag (which tells us we are in resync mode; + * looking for a valid header), and cnt (which starts at zero) which is + * used to count the number of empty blocks we have seen so far. + * Return: + * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block + * could never contain a header. + */ + +int +tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt) +{ + int i; + + /* + * look for all zero, trailer is two consecutive blocks of zero + */ + for (i = 0; i < BLKMULT; ++i) { + if (buf[i] != '\0') + break; + } + + /* + * if not all zero it is not a trailer, but MIGHT be a header. + */ + if (i != BLKMULT) + return(-1); + + /* + * When given a zero block, we must be careful! + * If we are not in resync mode, check for the trailer. Have to watch + * out that we do not mis-identify file data as the trailer, so we do + * NOT try to id a trailer during resync mode. During resync mode we + * might as well throw this block out since a valid header can NEVER be + * a block of all 0 (we must have a valid file name). + */ + if (!in_resync && (++*cnt >= NULLCNT)) + return(0); + return(1); +} + +/* + * ul_oct() + * convert an unsigned long to an octal string. many oddball field + * termination characters are used by the various versions of tar in the + * different fields. term selects which kind to use. str is '0' padded + * at the front to len. we are unable to use only one format as many old + * tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +ul_oct(u_long val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch (term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * ull_oct() + * Convert an unsigned long long to an octal string. One of many oddball + * field termination characters are used by the various versions of tar + * in the different fields. term selects which kind to use. str is + * '0' padded at the front to len. We are unable to use only one format + * as many old tar readers are very cranky about this. + * Return: + * 0 if the number fit into the string, -1 otherwise + */ + +static int +ull_oct(unsigned long long val, char *str, int len, int term) +{ + char *pt; + + /* + * term selects the appropriate character(s) for the end of the string + */ + pt = str + len - 1; + switch (term) { + case 3: + *pt-- = '\0'; + break; + case 2: + *pt-- = ' '; + *pt-- = '\0'; + break; + case 1: + *pt-- = ' '; + break; + case 0: + default: + *pt-- = '\0'; + *pt-- = ' '; + break; + } + + /* + * convert and blank pad if there is space + */ + while (pt >= str) { + *pt-- = '0' + (char)(val & 0x7); + val >>= 3; + if (val == 0) + break; + } + + while (pt >= str) + *pt-- = '0'; + if (val != 0) + return(-1); + return(0); +} + +/* + * tar_chksm() + * calculate the checksum for a tar block counting the checksum field as + * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). + * NOTE: we use len to short circuit summing 0's on write since we ALWAYS + * pad headers with 0. + * Return: + * unsigned long checksum + */ + +static u_long +tar_chksm(char *blk, int len) +{ + char *stop; + char *pt; + u_long chksm = BLNKSUM; /* initial value is checksum field sum */ + + /* + * add the part of the block before the checksum field + */ + pt = blk; + stop = blk + CHK_OFFSET; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + /* + * move past the checksum field and keep going, spec counts the + * checksum field as the sum of 8 blanks (which is pre-computed as + * BLNKSUM). + * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding + * starts, no point in summing zero's) + */ + pt += CHK_LEN; + stop = blk + len; + while (pt < stop) + chksm += (u_long)(*pt++ & 0xff); + return(chksm); +} + +/* + * Routines for old BSD style tar (also made portable to sysV tar) + */ + +/* + * tar_id() + * determine if a block given to us is a valid tar header (and not a USTAR + * header). We have to be on the lookout for those pesky blocks of all + * zero's. + * Return: + * 0 if a tar header, -1 otherwise + */ + +int +tar_id(char *blk, int size) +{ + HD_TAR *hd; + HD_USTAR *uhd; + + if (size < BLKMULT) + return(-1); + hd = (HD_TAR *)blk; + uhd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test, then make + * sure this is not a ustar header by looking for the ustar magic + * cookie. We should use TMAGLEN, but some USTAR archive programs are + * wrong and create archives missing the \0. Last we check the + * checksum. If this is ok we have to assume it is a valid header. + */ + if (hd->name[0] == '\0') + return(-1); + if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + force_one_volume = 1; + return(0); +} + +/* + * tar_opt() + * handle tar format specific -o options + * Return: + * 0 if ok -1 otherwise + */ + +int +tar_opt(void) +{ + OPLIST *opt; + + while ((opt = opt_next()) != NULL) { + if (strcmp(opt->name, TAR_OPTION) || + strcmp(opt->value, TAR_NODIR)) { + paxwarn(1, "Unknown tar format -o option/value pair %s=%s", + opt->name, opt->value); + paxwarn(1,"%s=%s is the only supported tar format option", + TAR_OPTION, TAR_NODIR); + return(-1); + } + + /* + * we only support one option, and only when writing + */ + if ((act != APPND) && (act != ARCHIVE)) { + paxwarn(1, "%s=%s is only supported when writing.", + opt->name, opt->value); + return(-1); + } + tar_nodir = 1; + } + return(0); +} + + +/* + * tar_rd() + * extract the values out of block already determined to be a tar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +tar_rd(ARCHD *arcn, char *buf) +{ + HD_TAR *hd; + unsigned long long val; + char *pt; + + /* + * we only get proper sized buffers passed to us + */ + if (tar_id(buf, BLKMULT) < 0) + return(-1); + memset(arcn, 0, sizeof(*arcn)); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + + /* + * copy out the name and values in the stat buffer + */ + hd = (HD_TAR *)buf; + if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { + arcn->nlen = expandname(arcn->name, sizeof(arcn->name), + &gnu_name_string, hd->name, sizeof(hd->name)); + arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), + &gnu_link_string, hd->linkname, sizeof(hd->linkname)); + } + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & + 0xfff); + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT); + val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT); + if (val > MAX_TIME_T) + arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ + else + arcn->sb.st_mtime = val; + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * have to look at the last character, it may be a '/' and that is used + * to encode this as a directory + */ + pt = &(arcn->name[arcn->nlen - 1]); + arcn->pad = 0; + arcn->skip = 0; + switch (hd->linkflag) { + case SYMTYPE: + /* + * symbolic link, need to get the link name and set the type in + * the st_mode so -v printing will look correct. + */ + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + break; + case LNKTYPE: + /* + * hard link, need to get the link name, set the type in the + * st_mode and st_nlink so -v printing will look better. + */ + arcn->type = PAX_HLK; + arcn->sb.st_nlink = 2; + + /* + * no idea of what type this thing really points at, but + * we set something for printing only. + */ + arcn->sb.st_mode |= S_IFREG; + break; + case LONGLINKTYPE: + case LONGNAMETYPE: + /* + * GNU long link/file; we tag these here and let the + * pax internals deal with it -- too ugly otherwise. + */ + arcn->type = + hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + break; + case DIRTYPE: + /* + * It is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + break; + case AREGTYPE: + case REGTYPE: + default: + /* + * If we have a trailing / this is a directory and NOT a file. + */ + arcn->ln_name[0] = '\0'; + arcn->ln_nlen = 0; + if (*pt == '/') { + /* + * it is a directory, set the mode for -v printing + */ + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + } else { + /* + * have a file that will be followed by data. Set the + * skip value to the size field and calculate the size + * of the padding. + */ + arcn->type = PAX_REG; + arcn->sb.st_mode |= S_IFREG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + } + break; + } + + /* + * strip off any trailing slash. + */ + if (*pt == '/') { + *pt = '\0'; + --arcn->nlen; + } + return(0); +} + +/* + * tar_wr() + * write a tar header for the file specified in the ARCHD to the archive. + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, each field + * of tar has it own spec for the termination character(s). + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +tar_wr(ARCHD *arcn) +{ + HD_TAR *hd; + int len; + char hdblk[sizeof(HD_TAR)]; + + /* + * check for those file system types which tar cannot store + */ + switch (arcn->type) { + case PAX_DIR: + /* + * user asked that dirs not be written to the archive + */ + if (tar_nodir) + return(1); + break; + case PAX_CHR: + paxwarn(1, "Tar cannot archive a character device %s", + arcn->org_name); + return(1); + case PAX_BLK: + paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name); + return(1); + case PAX_SCK: + paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name); + return(1); + case PAX_FIF: + paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name); + return(1); + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if ((size_t)arcn->ln_nlen > sizeof(hd->linkname)) { + paxwarn(1, "Link name too long for tar %s", + arcn->ln_name); + return(1); + } + break; + case PAX_REG: + case PAX_CTG: + default: + break; + } + + /* + * check file name len, remember extra char for dirs (the / at the end) + */ + len = arcn->nlen; + if (arcn->type == PAX_DIR) + ++len; + if ((size_t)len > sizeof(hd->name)) { + paxwarn(1, "File name too long for tar %s", arcn->name); + return(1); + } + + /* + * Copy the data out of the ARCHD into the tar header based on the type + * of the file. Remember, many tar readers want all fields to be + * padded with zero so we zero the header first. We then set the + * linkflag field (type), the linkname, the size, and set the padding + * (if any) to be added after the file data (0 for all other types, + * as they only have a header). + */ + memset(hdblk, 0, sizeof(hdblk)); + hd = (HD_TAR *)hdblk; + fieldcpy(hd->name, sizeof(hd->name), arcn->name, sizeof(arcn->name)); + arcn->pad = 0; + + if (arcn->type == PAX_DIR) { + /* + * directories are the same as files, except have a filename + * that ends with a /, we add the slash here. No data follows + * dirs, so no pad. + */ + hd->linkflag = AREGTYPE; + hd->name[len-1] = '/'; + if (ul_oct(0, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (arcn->type == PAX_SLK) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = SYMTYPE; + fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, + sizeof(arcn->ln_name)); + if (ul_oct(0, hd->size, sizeof(hd->size), 1)) + goto out; + } else if (PAX_IS_HARDLINK(arcn->type)) { + /* + * no data follows this file, so no pad + */ + hd->linkflag = LNKTYPE; + fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, + sizeof(arcn->ln_name)); + if (ul_oct(0, hd->size, sizeof(hd->size), 1)) + goto out; + } else { + /* + * data follows this file, so set the pad + */ + hd->linkflag = AREGTYPE; + if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { + paxwarn(1, "File is too large for tar %s", + arcn->org_name); + return(1); + } + arcn->pad = TAR_PAD(arcn->sb.st_size); + } + + /* + * copy those fields that are independent of the type + */ + if (ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || + ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, + sizeof(hd->mtime), 1) || + ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || + ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0)) + goto out; + + /* + * calculate and add the checksum, then write the header. A return of + * 0 tells the caller to now write the file data, 1 says no data needs + * to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) + return(-1); + if (wr_skip(BLKMULT - sizeof(HD_TAR)) < 0) + return(-1); + if (PAX_IS_REG(arcn->type)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Tar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * Routines for POSIX ustar + */ + +/* + * ustar_id() + * determine if a block given to us is a valid ustar header. We have to + * be on the lookout for those pesky blocks of all zero's + * Return: + * 0 if a ustar header, -1 otherwise + */ + +int +ustar_id(char *blk, int size) +{ + HD_USTAR *hd; + + if (size < BLKMULT) + return(-1); + hd = (HD_USTAR *)blk; + + /* + * check for block of zero's first, a simple and fast test then check + * ustar magic cookie. We should use TMAGLEN, but some USTAR archive + * programs are fouled up and create archives missing the \0. Last we + * check the checksum. If ok we have to assume it is a valid header. + */ + if (hd->prefix[0] == '\0' && hd->name[0] == '\0') + return(-1); + if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) + return(-1); + if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT)) + return(-1); + return(0); +} + +/* + * ustar_rd() + * extract the values out of block already determined to be a ustar header. + * store the values in the ARCHD parameter. + * Return: + * 0 + */ + +int +ustar_rd(ARCHD *arcn, char *buf) +{ + HD_USTAR *hd = (HD_USTAR *)buf; + char *dest; + int cnt = 0; + dev_t devmajor; + dev_t devminor; + unsigned long long val; + + /* + * we only get proper sized buffers + */ + if (ustar_id(buf, BLKMULT) < 0) + return(-1); + +#ifndef SMALL +reset: +#endif + memset(arcn, 0, sizeof(*arcn)); + arcn->org_name = arcn->name; + arcn->sb.st_nlink = 1; + +#ifndef SMALL + /* Process Extended headers. */ + if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) { + if (rd_xheader(arcn, hd->typeflag == GHDRTYPE, + (off_t)asc_ul(hd->size, sizeof(hd->size), OCT)) < 0) + return (-1); + + /* Update and check the ustar header. */ + if (rd_wrbuf(buf, BLKMULT) != BLKMULT) + return (-1); + if (ustar_id(buf, BLKMULT) < 0) + return(-1); + + /* if the next block is another extension, reset the values */ + if (hd->typeflag == XHDRTYPE || hd->typeflag == GHDRTYPE) + goto reset; + } +#endif + + if (!arcn->nlen) { + /* + * See if the filename is split into two parts. if, so join + * the parts. We copy the prefix first and add a / between + * the prefix and name. + */ + dest = arcn->name; + if (*(hd->prefix) != '\0') { + cnt = fieldcpy(dest, sizeof(arcn->name) - 1, + hd->prefix, sizeof(hd->prefix)); + dest += cnt; + *dest++ = '/'; + cnt++; + } else + cnt = 0; + + if (hd->typeflag != LONGLINKTYPE && + hd->typeflag != LONGNAMETYPE) { + arcn->nlen = cnt + expandname(dest, + sizeof(arcn->name) - cnt, &gnu_name_string, + hd->name, sizeof(hd->name)); + } + } + + if (!arcn->ln_nlen && + hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { + arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), + &gnu_link_string, hd->linkname, sizeof(hd->linkname)); + } + + /* + * follow the spec to the letter. we should only have mode bits, strip + * off all other crud we may be passed. + */ + arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & + 0xfff); + arcn->sb.st_size = (off_t)asc_ull(hd->size, sizeof(hd->size), OCT); + val = asc_ull(hd->mtime, sizeof(hd->mtime), OCT); + if (val > MAX_TIME_T) + arcn->sb.st_mtime = INT_MAX; /* XXX 2038 */ + else + arcn->sb.st_mtime = val; + arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; + + /* + * If we can find the ascii names for gname and uname in the password + * and group files we will use the uid's and gid they bind. Otherwise + * we use the uid and gid values stored in the header. (This is what + * the posix spec wants). + */ + hd->gname[sizeof(hd->gname) - 1] = '\0'; + if (Nflag || gid_from_group(hd->gname, &(arcn->sb.st_gid)) == -1) + arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); + hd->uname[sizeof(hd->uname) - 1] = '\0'; + if (Nflag || uid_from_user(hd->uname, &(arcn->sb.st_uid)) == -1) + arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); + + /* + * set the defaults, these may be changed depending on the file type + */ + arcn->pad = 0; + arcn->skip = 0; + arcn->sb.st_rdev = (dev_t)0; + + /* + * set the mode and PAX type according to the typeflag in the header + */ + switch (hd->typeflag) { + case FIFOTYPE: + arcn->type = PAX_FIF; + arcn->sb.st_mode |= S_IFIFO; + break; + case DIRTYPE: + arcn->type = PAX_DIR; + arcn->sb.st_mode |= S_IFDIR; + arcn->sb.st_nlink = 2; + + /* + * Some programs that create ustar archives append a '/' + * to the pathname for directories. This clearly violates + * ustar specs, but we will silently strip it off anyway. + */ + if (arcn->name[arcn->nlen - 1] == '/') + arcn->name[--arcn->nlen] = '\0'; + break; + case BLKTYPE: + case CHRTYPE: + /* + * this type requires the rdev field to be set. + */ + if (hd->typeflag == BLKTYPE) { + arcn->type = PAX_BLK; + arcn->sb.st_mode |= S_IFBLK; + } else { + arcn->type = PAX_CHR; + arcn->sb.st_mode |= S_IFCHR; + } + devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); + devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); + arcn->sb.st_rdev = TODEV(devmajor, devminor); + break; + case SYMTYPE: + case LNKTYPE: + if (hd->typeflag == SYMTYPE) { + arcn->type = PAX_SLK; + arcn->sb.st_mode |= S_IFLNK; + } else { + arcn->type = PAX_HLK; + /* + * so printing looks better + */ + arcn->sb.st_mode |= S_IFREG; + arcn->sb.st_nlink = 2; + } + break; + case LONGLINKTYPE: + case LONGNAMETYPE: + /* + * GNU long link/file; we tag these here and let the + * pax internals deal with it -- too ugly otherwise. + */ + arcn->type = + hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + break; + case CONTTYPE: + case AREGTYPE: + case REGTYPE: + default: + /* + * these types have file data that follows. Set the skip and + * pad fields. + */ + arcn->type = PAX_REG; + arcn->pad = TAR_PAD(arcn->sb.st_size); + arcn->skip = arcn->sb.st_size; + arcn->sb.st_mode |= S_IFREG; + break; + } + return(0); +} + +/* + * ustar_wr() + * write a ustar header for the file specified in the ARCHD to the archive + * Have to check for file types that cannot be stored and file names that + * are too long. Be careful of the term (last arg) to ul_oct, we only use + * '\0' for the termination character (this is different than picky tar) + * ASSUMED: space after header in header block is zero filled + * Return: + * 0 if file has data to be written after the header, 1 if file has NO + * data to write after the header, -1 if archive write failed + */ + +int +ustar_wr(ARCHD *arcn) +{ + HD_USTAR *hd; + const char *name; + char *pt, hdblk[sizeof(HD_USTAR)]; + + /* + * check for those file system types ustar cannot store + */ + if (arcn->type == PAX_SCK) { + paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name); + return(1); + } + + /* + * user asked that dirs not be written to the archive + */ + if (arcn->type == PAX_DIR && tar_nodir) + return (1); + + /* + * check the length of the linkname + */ + if (PAX_IS_LINK(arcn->type) && + ((size_t)arcn->ln_nlen > sizeof(hd->linkname))) { + paxwarn(1, "Link name too long for ustar %s", arcn->ln_name); + return(1); + } + + /* + * split the path name into prefix and name fields (if needed). if + * pt != arcn->name, the name has to be split + */ + if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { + paxwarn(1, "File name too long for ustar %s", arcn->name); + return(1); + } + + /* + * zero out the header so we don't have to worry about zero fill below + */ + memset(hdblk, 0, sizeof(hdblk)); + hd = (HD_USTAR *)hdblk; + arcn->pad = 0; + + /* + * split the name, or zero out the prefix + */ + if (pt != arcn->name) { + /* + * name was split, pt points at the / where the split is to + * occur, we remove the / and copy the first part to the prefix + */ + *pt = '\0'; + fieldcpy(hd->prefix, sizeof(hd->prefix), arcn->name, + sizeof(arcn->name)); + *pt++ = '/'; + } + + /* + * copy the name part. this may be the whole path or the part after + * the prefix + */ + fieldcpy(hd->name, sizeof(hd->name), pt, + sizeof(arcn->name) - (pt - arcn->name)); + + /* + * set the fields in the header that are type dependent + */ + switch (arcn->type) { + case PAX_DIR: + hd->typeflag = DIRTYPE; + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_CHR: + case PAX_BLK: + if (arcn->type == PAX_CHR) + hd->typeflag = CHRTYPE; + else + hd->typeflag = BLKTYPE; + if (ul_oct(MAJOR(arcn->sb.st_rdev), hd->devmajor, + sizeof(hd->devmajor), 3) || + ul_oct(MINOR(arcn->sb.st_rdev), hd->devminor, + sizeof(hd->devminor), 3) || + ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_FIF: + hd->typeflag = FIFOTYPE; + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_SLK: + case PAX_HLK: + case PAX_HRG: + if (arcn->type == PAX_SLK) + hd->typeflag = SYMTYPE; + else + hd->typeflag = LNKTYPE; + fieldcpy(hd->linkname, sizeof(hd->linkname), arcn->ln_name, + sizeof(arcn->ln_name)); + if (ul_oct(0, hd->size, sizeof(hd->size), 3)) + goto out; + break; + case PAX_REG: + case PAX_CTG: + default: + /* + * file data with this type, set the padding + */ + if (arcn->type == PAX_CTG) + hd->typeflag = CONTTYPE; + else + hd->typeflag = REGTYPE; + arcn->pad = TAR_PAD(arcn->sb.st_size); + if (ull_oct(arcn->sb.st_size, hd->size, sizeof(hd->size), 3)) { + paxwarn(1, "File is too long for ustar %s", + arcn->org_name); + return(1); + } + break; + } + + memcpy(hd->magic, TMAGIC, TMAGLEN); + memcpy(hd->version, TVERSION, TVERSLEN); + + /* + * set the remaining fields. Some versions want all 16 bits of mode + * we better humor them (they really do not meet spec though).... + */ + if (ul_oct(arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) { + if (uid_nobody == 0) { + if (uid_from_user("nobody", &uid_nobody) == -1) + goto out; + } + if (uid_warn != arcn->sb.st_uid) { + uid_warn = arcn->sb.st_uid; + paxwarn(1, + "Ustar header field is too small for uid %lu, " + "using nobody", (u_long)arcn->sb.st_uid); + } + if (ul_oct(uid_nobody, hd->uid, sizeof(hd->uid), 3)) + goto out; + } + if (ul_oct(arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) { + if (gid_nobody == 0) { + if (gid_from_group("nobody", &gid_nobody) == -1) + goto out; + } + if (gid_warn != arcn->sb.st_gid) { + gid_warn = arcn->sb.st_gid; + paxwarn(1, + "Ustar header field is too small for gid %lu, " + "using nobody", (u_long)arcn->sb.st_gid); + } + if (ul_oct(gid_nobody, hd->gid, sizeof(hd->gid), 3)) + goto out; + } + if (ull_oct(arcn->sb.st_mtime < 0 ? 0 : arcn->sb.st_mtime, hd->mtime, + sizeof(hd->mtime), 3) || + ul_oct(arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) + goto out; + if (!Nflag) { + if ((name = user_from_uid(arcn->sb.st_uid, 1)) != NULL) + strncpy(hd->uname, name, sizeof(hd->uname)); + if ((name = group_from_gid(arcn->sb.st_gid, 1)) != NULL) + strncpy(hd->gname, name, sizeof(hd->gname)); + } + + /* + * calculate and store the checksum write the header to the archive + * return 0 tells the caller to now write the file data, 1 says no data + * needs to be written + */ + if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, + sizeof(hd->chksum), 3)) + goto out; + if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) + return(-1); + if (wr_skip(BLKMULT - sizeof(HD_USTAR)) < 0) + return(-1); + if (PAX_IS_REG(arcn->type)) + return(0); + return(1); + + out: + /* + * header field is out of range + */ + paxwarn(1, "Ustar header field is too small for %s", arcn->org_name); + return(1); +} + +/* + * name_split() + * see if the name has to be split for storage in a ustar header. We try + * to fit the entire name in the name field without splitting if we can. + * The split point is always at a / + * Return + * character pointer to split point (always the / that is to be removed + * if the split is not needed, the points is set to the start of the file + * name (it would violate the spec to split there). A NULL is returned if + * the file name is too long + */ + +static char * +name_split(char *name, int len) +{ + char *start; + + /* + * check to see if the file name is small enough to fit in the name + * field. if so just return a pointer to the name. + * The strings can fill the complete name and prefix fields + * without a NUL terminator. + */ + if (len <= TNMSZ) + return(name); + if (len > (TPFSZ + TNMSZ + 1)) + return(NULL); + + /* + * we start looking at the biggest sized piece that fits in the name + * field. We walk forward looking for a slash to split at. The idea is + * to find the biggest piece to fit in the name field (or the smallest + * prefix we can find) (the -1 is correct the biggest piece would + * include the slash between the two parts that gets thrown away) + */ + start = name + len - TNMSZ - 1; + + /* + * the prefix may not be empty, so skip the first character when + * trying to split a path of exactly TNMSZ+1 characters. + * NOTE: This means the ustar format can't store /str if + * str contains no slashes and the length of str == TNMSZ + */ + if (start == name) + ++start; + + while ((*start != '\0') && (*start != '/')) + ++start; + + /* + * if we hit the end of the string, this name cannot be split, so we + * cannot store this file. + */ + if (*start == '\0') + return(NULL); + + /* + * the split point isn't valid if it results in a prefix + * longer than TPFSZ + */ + if ((start - name) > TPFSZ) + return(NULL); + + /* + * ok have a split point, return it to the caller + */ + return(start); +} + +static size_t +expandname(char *buf, size_t len, char **gnu_name, const char *name, + size_t limit) +{ + size_t nlen; + + if (*gnu_name) { + /* *gnu_name is NUL terminated */ + if ((nlen = strlcpy(buf, *gnu_name, len)) >= len) + nlen = len - 1; + free(*gnu_name); + *gnu_name = NULL; + } else + nlen = fieldcpy(buf, len, name, limit); + return(nlen); +} + +#ifndef SMALL + +/* shortest possible extended record: "5 a=\n" */ +#define MINXHDRSZ 5 + +/* longest record we'll accept */ +#define MAXXHDRSZ BLKMULT + +static int +rd_xheader(ARCHD *arcn, int global, off_t size) +{ + char buf[MAXXHDRSZ]; + long len; + char *delim, *keyword; + char *nextp, *p, *end; + int pad, ret = 0; + + /* before we alter size, make note of how much we have to skip */ + pad = TAR_PAD((unsigned)size); + + p = end = buf; + while (size > 0 || p < end) { + if (size > 0) { + int rdlen; + + /* shift stuff down */ + if (p > buf) { + memmove(buf, p, end - p); + end -= p - buf; + p = buf; + } + + /* fill starting at end */ + rdlen = MINIMUM(size, (buf + sizeof buf) - end); + if (rd_wrbuf(end, rdlen) != rdlen) { + ret = -1; + break; + } + size -= rdlen; + end += rdlen; + } + + /* [p, end) is good */ + if (memchr(p, ' ', end - p) == NULL || + !isdigit((unsigned char)*p)) { + paxwarn(1, "Invalid extended header record"); + ret = -1; + break; + } + errno = 0; + len = strtol(p, &delim, 10); + if (*delim != ' ' || (errno == ERANGE && len == LONG_MAX) || + len < MINXHDRSZ) { + paxwarn(1, "Invalid extended header record length"); + ret = -1; + break; + } + if (len > end - p) { + paxwarn(1, "Extended header record length %lu is " + "out of range", len); + /* if we can just toss this record, do so */ + len -= end - p; + if (len <= size && rd_skip(len) == 0) { + size -= len; + p = end = buf; + continue; + } + ret = -1; + break; + } + nextp = p + len; + keyword = p = delim + 1; + p = memchr(p, '=', len); + if (!p || nextp[-1] != '\n') { + paxwarn(1, "Malformed extended header record"); + ret = -1; + break; + } + *p++ = nextp[-1] = '\0'; + if (!global) { + if (!strcmp(keyword, "path")) { + arcn->nlen = strlcpy(arcn->name, p, + sizeof(arcn->name)); + } else if (!strcmp(keyword, "linkpath")) { + arcn->ln_nlen = strlcpy(arcn->ln_name, p, + sizeof(arcn->ln_name)); + } + } + p = nextp; + } + + if (rd_skip(size + pad) < 0) + return (-1); + return (ret); +} +#endif diff --git a/bin/pax/tar.h b/bin/pax/tar.h new file mode 100644 index 0000000..318d099 --- /dev/null +++ b/bin/pax/tar.h @@ -0,0 +1,159 @@ +/* $OpenBSD: tar.h,v 1.9 2014/01/08 06:43:34 deraadt Exp $ */ +/* $NetBSD: tar.h,v 1.3 1995/03/21 09:07:51 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tar.h 8.2 (Berkeley) 4/18/94 + */ + +/* + * defines and data structures common to all tar formats + */ +#define CHK_LEN 8 /* length of checksum field */ +#define TNMSZ 100 /* size of name field */ +#ifdef _PAX_ +#define NULLCNT 2 /* number of null blocks in trailer */ +#define CHK_OFFSET 148 /* start of chksum field */ +#define BLNKSUM 256L /* sum of checksum field using ' ' */ +#endif /* _PAX_ */ + +/* + * Values used in typeflag field in all tar formats + * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers) + */ +#define REGTYPE '0' /* Regular File */ +#define AREGTYPE '\0' /* Regular File */ +#define LNKTYPE '1' /* Link */ +#define SYMTYPE '2' /* Symlink */ +#define CHRTYPE '3' /* Character Special File */ +#define BLKTYPE '4' /* Block Special File */ +#define DIRTYPE '5' /* Directory */ +#define FIFOTYPE '6' /* FIFO */ +#define CONTTYPE '7' /* high perf file */ + +/* + * Extended header - POSIX.1-2001 + */ +#define XHDRTYPE 'x' /* Extended header */ +#define GHDRTYPE 'g' /* Global header*/ + +/* + * GNU tar compatibility; + */ +#define LONGLINKTYPE 'K' /* Long Symlink */ +#define LONGNAMETYPE 'L' /* Long File */ + +/* + * Mode field encoding of the different file types - values in octal + */ +#define TSUID 04000 /* Set UID on execution */ +#define TSGID 02000 /* Set GID on execution */ +#define TSVTX 01000 /* Reserved */ +#define TUREAD 00400 /* Read by owner */ +#define TUWRITE 00200 /* Write by owner */ +#define TUEXEC 00100 /* Execute/Search by owner */ +#define TGREAD 00040 /* Read by group */ +#define TGWRITE 00020 /* Write by group */ +#define TGEXEC 00010 /* Execute/Search by group */ +#define TOREAD 00004 /* Read by other */ +#define TOWRITE 00002 /* Write by other */ +#define TOEXEC 00001 /* Execute/Search by other */ + +#ifdef _PAX_ +/* + * Pad with a bit mask, much faster than doing a mod but only works on powers + * of 2. Macro below is for block of 512 bytes. + */ +#define TAR_PAD(x) ((512 - ((x) & 511)) & 511) +#endif /* _PAX_ */ + +/* + * structure of an old tar header as it appeared in BSD releases + */ +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char linkflag; /* norm, hard, or sym. */ + char linkname[TNMSZ]; /* linked to name */ +} HD_TAR; + +#ifdef _PAX_ +/* + * -o options for BSD tar to not write directories to the archive + */ +#define TAR_NODIR "nodir" +#define TAR_OPTION "write_opt" + +/* + * default device names + */ +#define DEV_0 "/dev/rst0" +#define DEV_1 "/dev/rst1" +#define DEV_4 "/dev/rst4" +#define DEV_5 "/dev/rst5" +#define DEV_7 "/dev/rst7" +#define DEV_8 "/dev/rst8" +#endif /* _PAX_ */ + +/* + * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990 + */ +#define TPFSZ 155 +#define TMAGIC "ustar" /* ustar and a null */ +#define TMAGLEN 6 +#define TVERSION "00" /* 00 and no null */ +#define TVERSLEN 2 + +typedef struct { + char name[TNMSZ]; /* name of entry */ + char mode[8]; /* mode */ + char uid[8]; /* uid */ + char gid[8]; /* gid */ + char size[12]; /* size */ + char mtime[12]; /* modification time */ + char chksum[CHK_LEN]; /* checksum */ + char typeflag; /* type of file. */ + char linkname[TNMSZ]; /* linked to name */ + char magic[TMAGLEN]; /* magic cookie */ + char version[TVERSLEN]; /* version */ + char uname[32]; /* ascii owner name */ + char gname[32]; /* ascii group name */ + char devmajor[8]; /* major device number */ + char devminor[8]; /* minor device number */ + char prefix[TPFSZ]; /* linked to name */ +} HD_USTAR; diff --git a/bin/pax/tty_subs.c b/bin/pax/tty_subs.c new file mode 100644 index 0000000..a07264a --- /dev/null +++ b/bin/pax/tty_subs.c @@ -0,0 +1,187 @@ +/* $OpenBSD: tty_subs.c,v 1.17 2016/08/26 04:22:13 guenther Exp $ */ +/* $NetBSD: tty_subs.c,v 1.5 1995/03/21 09:07:52 cgd Exp $ */ + +/*- + * Copyright (c) 1992 Keith Muller. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Keith Muller of the University of California, San Diego. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "pax.h" +#include "extern.h" + +/* + * routines that deal with I/O to and from the user + */ + +#define DEVTTY "/dev/tty" /* device for interactive i/o */ +static FILE *ttyoutf = NULL; /* output pointing at control tty */ +static FILE *ttyinf = NULL; /* input pointing at control tty */ + +/* + * tty_init() + * try to open the controlling terminal (if any) for this process. if the + * open fails, future ops that require user input will get an EOF + */ + +int +tty_init(void) +{ + int ttyfd; + + if ((ttyfd = open(DEVTTY, O_RDWR | O_CLOEXEC)) >= 0) { + if ((ttyoutf = fdopen(ttyfd, "w")) != NULL) { + if ((ttyinf = fdopen(ttyfd, "r")) != NULL) + return(0); + (void)fclose(ttyoutf); + } + (void)close(ttyfd); + } + + if (iflag) { + paxwarn(1, "Fatal error, cannot open %s", DEVTTY); + return(-1); + } + return(0); +} + +/* + * tty_prnt() + * print a message using the specified format to the controlling tty + * if there is no controlling terminal, just return. + */ + +void +tty_prnt(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (ttyoutf == NULL) { + va_end(ap); + return; + } + (void)vfprintf(ttyoutf, fmt, ap); + va_end(ap); + (void)fflush(ttyoutf); +} + +/* + * tty_read() + * read a string from the controlling terminal if it is open into the + * supplied buffer + * Return: + * 0 if data was read, -1 otherwise. + */ + +int +tty_read(char *str, int len) +{ + if (ttyinf == NULL || fgets(str, len, ttyinf) == NULL) + return(-1); + + /* + * strip off that trailing newline + */ + str[strcspn(str, "\n")] = '\0'; + return(0); +} + +/* + * paxwarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +paxwarn(int set, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fflush(listf); + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fputc('\n', stderr); +} + +/* + * syswarn() + * write a warning message to stderr. if "set" the exit value of pax + * will be non-zero. + */ + +void +syswarn(int set, int errnum, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (set) + exit_val = 1; + /* + * when vflag we better ship out an extra \n to get this message on a + * line by itself + */ + if (vflag && vfpart) { + (void)fflush(listf); + (void)fputc('\n', stderr); + vfpart = 0; + } + (void)fprintf(stderr, "%s: ", argv0); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + + /* + * format and print the errno + */ + if (errnum > 0) + (void)fprintf(stderr, ": %s", strerror(errnum)); + (void)fputc('\n', stderr); +} diff --git a/usr.bin/diff/CVS/Entries b/usr.bin/diff/CVS/Entries new file mode 100644 index 0000000..912da0e --- /dev/null +++ b/usr.bin/diff/CVS/Entries @@ -0,0 +1,9 @@ +/Makefile/1.3/Tue May 29 18:24:56 2007// +/diff.1/1.49/Sat Feb 8 01:09:58 2020// +/diff.c/1.67/Fri Jun 28 13:35:00 2019// +/diff.h/1.33/Mon Oct 5 20:15:00 2015// +/diffdir.c/1.47/Fri Jan 25 00:19:26 2019// +/diffreg.c/1.93/Fri Jun 28 13:35:00 2019// +/xmalloc.c/1.10/Fri Jun 28 05:44:09 2019// +/xmalloc.h/1.4/Thu Nov 12 16:30:30 2015// +D diff --git a/usr.bin/diff/CVS/Repository b/usr.bin/diff/CVS/Repository new file mode 100644 index 0000000..088ef75 --- /dev/null +++ b/usr.bin/diff/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/diff diff --git a/usr.bin/diff/CVS/Root b/usr.bin/diff/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/diff/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/diff/Makefile b/usr.bin/diff/Makefile new file mode 100644 index 0000000..4f1c9d5 --- /dev/null +++ b/usr.bin/diff/Makefile @@ -0,0 +1,7 @@ +# $OpenBSD: Makefile,v 1.3 2007/05/29 18:24:56 ray Exp $ + +PROG= diff +SRCS= diff.c diffdir.c diffreg.c xmalloc.c +COPTS+= -Wall + +.include <bsd.prog.mk> diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1 new file mode 100644 index 0000000..353c770 --- /dev/null +++ b/usr.bin/diff/diff.1 @@ -0,0 +1,474 @@ +.\" $OpenBSD: diff.1,v 1.49 2020/02/08 01:09:58 jsg Exp $ +.\" +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)diff.1 8.1 (Berkeley) 6/30/93 +.\" +.Dd $Mdocdate: February 8 2020 $ +.Dt DIFF 1 +.Os +.Sh NAME +.Nm diff +.Nd differential file and directory comparator +.Sh SYNOPSIS +.Nm diff +.Op Fl abdipTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl I Ar pattern +.Op Fl L Ar label +.Ar file1 file2 +.Nm diff +.Op Fl abdilpTtw +.Op Fl I Ar pattern +.Op Fl L Ar label +.Fl C Ar number +.Ar file1 file2 +.Nm diff +.Op Fl abdiltw +.Op Fl I Ar pattern +.Fl D Ar string +.Ar file1 file2 +.Nm diff +.Op Fl abdilpTtw +.Op Fl I Ar pattern +.Op Fl L Ar label +.Fl U Ar number +.Ar file1 file2 +.Nm diff +.Op Fl abdilNPprsTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl I Ar pattern +.Bk -words +.Op Fl L Ar label +.Op Fl S Ar name +.Op Fl X Ar file +.Op Fl x Ar pattern +.Ek +.Ar dir1 dir2 +.Sh DESCRIPTION +The +.Nm +utility compares the contents of +.Ar file1 +and +.Ar file2 +and writes to the standard output the list of changes necessary to +convert one file into the other. +No output is produced if the files are identical. +.Pp +Output options (mutually exclusive): +.Bl -tag -width Ds +.It Fl C Ar number +Like +.Fl c +but produces a diff with +.Ar number +lines of context. +.It Fl c +Produces a diff with 3 lines of context. +With +.Fl c +the output format is modified slightly: +the output begins with identification of the files involved and +their creation dates and then each change is separated +by a line with fifteen +.Li * Ns 's . +The lines removed from +.Ar file1 +are marked with +.Sq \-\ \& ; +those added to +.Ar file2 +are marked +.Sq +\ \& . +Lines which are changed from one file to the other are marked in +both files with +.Sq !\ \& . +Changes which lie within 3 lines of each other are grouped together on +output. +.It Fl D Ar string +Creates a merged version of +.Ar file1 +and +.Ar file2 +on the standard output, with C preprocessor controls included so that +a compilation of the result without defining +.Ar string +is equivalent to compiling +.Ar file1 , +while defining +.Ar string +will yield +.Ar file2 . +.It Fl e +Produces output in a form suitable as input for the editor utility, +.Xr ed 1 , +which can then be used to convert file1 into file2. +.Pp +Extra commands are added to the output when comparing directories with +.Fl e , +so that the result is a +.Xr sh 1 +script for converting text files which are common to the two directories +from their state in +.Ar dir1 +to their state in +.Ar dir2 . +.It Fl f +Identical output to that of the +.Fl e +flag, but in reverse order. +It cannot be digested by +.Xr ed 1 . +.It Fl n +Produces a script similar to that of +.Fl e , +but in the opposite order and with a count of changed lines on each +insert or delete command. +This is the form used by +.Xr rcsdiff 1 . +.It Fl q +Just print a line when the files differ. +Does not output a list of changes. +.It Fl U Ar number +Like +.Fl u +but produces a diff with +.Ar number +lines of context. +.It Fl u +Produces a +.Em unified +diff with 3 lines of context. +A unified diff is similar to the context diff produced by the +.Fl c +option. +However, unlike with +.Fl c , +all lines to be changed (added and/or removed) are present in +a single section. +.El +.Pp +Comparison options: +.Bl -tag -width Ds +.It Fl a +Treat all files as ASCII text. +Normally +.Nm +will simply print +.Dq Binary files ... differ +if files contain binary characters. +Use of this option forces +.Nm +to produce a diff. +.It Fl b +Causes trailing blanks (spaces and tabs) to be ignored, and other +strings of blanks to compare equal. +.It Fl d +Try very hard to produce a diff as small as possible. +This may consume a lot of processing power and memory when processing +large files with many changes. +.It Fl I Ar pattern +Ignores changes, insertions, and deletions whose lines match the +extended regular expression +.Ar pattern . +Multiple +.Fl I +patterns may be specified. +All lines in the change must match some pattern for the change to be +ignored. +See +.Xr re_format 7 +for more information on regular expression patterns. +.It Fl i +Ignores the case of letters. +E.g., +.Dq A +will compare equal to +.Dq a . +.It Fl L Ar label +Print +.Ar label +instead of the first (and second, if this option is specified twice) +file name and time in the context or unified diff header. +.It Fl p +With unified and context diffs, show with each change +the first 40 characters of the last line before the context beginning +with a letter, an underscore or a dollar sign. +For C source code following standard layout conventions, this will +show the prototype of the function the change applies to. +.It Fl T +Print a tab rather than a space before the rest of the line for the +normal, context or unified output formats. +This makes the alignment of tabs in the line consistent. +.It Fl t +Will expand tabs in output lines. +Normal or +.Fl c +output adds character(s) to the front of each line which may screw up +the indentation of the original source lines and make the output listing +difficult to interpret. +This option will preserve the original source's indentation. +.It Fl w +Is similar to +.Fl b +but causes whitespace (blanks and tabs) to be totally ignored. +E.g., +.Dq if (\ \&a == b \&) +will compare equal to +.Dq if(a==b) . +.El +.Pp +Directory comparison options: +.Bl -tag -width Ds +.It Fl N +If a file is found in only one directory, act as if it was found in the +other directory too but was of zero size. +.It Fl P +If a file is found only in +.Ar dir2 , +act as if it was found in +.Ar dir1 +too but was of zero size. +.It Fl r +Causes application of +.Nm +recursively to common subdirectories encountered. +.It Fl S Ar name +Re-starts a directory +.Nm +in the middle, beginning with file +.Ar name . +.It Fl s +Causes +.Nm +to report files which are the same, which are otherwise not mentioned. +.It Fl X Ar file +Exclude files and subdirectories from comparison whose basenames match +lines in +.Ar file . +Multiple +.Fl X +options may be specified. +.It Fl x Ar pattern +Exclude files and subdirectories from comparison whose basenames match +.Ar pattern . +Patterns are matched using shell-style globbing via +.Xr fnmatch 3 . +Multiple +.Fl x +options may be specified. +.El +.Pp +If both arguments are directories, +.Nm +sorts the contents of the directories by name, and then runs the +regular file +.Nm +algorithm, producing a change list, +on text files which are different. +Binary files which differ, +common subdirectories, and files which appear in only one directory +are described as such. +In directory mode only regular files and directories are compared. +If a non-regular file such as a device special file or FIFO +is encountered, a diagnostic message is printed. +.Pp +If only one of +.Ar file1 +and +.Ar file2 +is a directory, +.Nm +is applied to the non-directory file and the file contained in +the directory file with a filename that is the same as the +last component of the non-directory file. +.Pp +If either +.Ar file1 +or +.Ar file2 +is +.Sq - , +the standard input is +used in its place. +.Ss Output Style +The default (without +.Fl e , +.Fl c , +or +.Fl n +.\" -C +options) +output contains lines of these forms, where +.Va XX , YY , ZZ , QQ +are line numbers respective of file order. +.Pp +.Bl -tag -width "XX,YYcZZ,QQ" -compact +.It Li XX Ns Ic a Ns Li YY +At (the end of) line +.Va XX +of +.Ar file1 , +append the contents +of line +.Va YY +of +.Ar file2 +to make them equal. +.It Li XX Ns Ic a Ns Li YY,ZZ +Same as above, but append the range of lines, +.Va YY +through +.Va ZZ +of +.Ar file2 +to line +.Va XX +of file1. +.It Li XX Ns Ic d Ns Li YY +At line +.Va XX +delete +the line. +The value +.Va YY +tells to which line the change would bring +.Ar file1 +in line with +.Ar file2 . +.It Li XX,YY Ns Ic d Ns Li ZZ +Delete the range of lines +.Va XX +through +.Va YY +in +.Ar file1 . +.It Li XX Ns Ic c Ns Li YY +Change the line +.Va XX +in +.Ar file1 +to the line +.Va YY +in +.Ar file2 . +.It Li XX,YY Ns Ic c Ns Li ZZ +Replace the range of specified lines with the line +.Va ZZ . +.It Li XX,YY Ns Ic c Ns Li ZZ,QQ +Replace the range +.Va XX , Ns Va YY +from +.Ar file1 +with the range +.Va ZZ , Ns Va QQ +from +.Ar file2 . +.El +.Pp +These lines resemble +.Xr ed 1 +subcommands to convert +.Ar file1 +into +.Ar file2 . +The line numbers before the action letters pertain to +.Ar file1 ; +those after pertain to +.Ar file2 . +Thus, by exchanging +.Ic a +for +.Ic d +and reading the line in reverse order, one can also +determine how to convert +.Ar file2 +into +.Ar file1 . +As in +.Xr ed 1 , +identical +pairs (where num1 = num2) are abbreviated as a single +number. +.Sh FILES +.Bl -tag -width /tmp/diff.XXXXXXXX -compact +.It Pa /tmp/diff. Ns Ar XXXXXXXX +Temporary file used when comparing a device or the standard input. +Note that the temporary file is unlinked as soon as it is created +so it will not show up in a directory listing. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +No differences were found. +.It 1 +Differences were found. +.It >1 +An error occurred. +.El +.Sh SEE ALSO +.Xr cmp 1 , +.Xr comm 1 , +.Xr diff3 1 , +.Xr ed 1 , +.Xr patch 1 , +.Xr sdiff 1 +.Rs +.%A James W. Hunt +.%A M. Douglas McIlroy +.%T "An Algorithm for Differential File Comparison" +.%J Computing Science Technical Report +.%Q Bell Laboratories 41 +.%D June 1976 +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl aDdIiLlNnPpqSsTtwXx +are extensions to that specification. +.Sh HISTORY +A +.Nm +command appeared in +.At v5 . diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c new file mode 100644 index 0000000..64cdd45 --- /dev/null +++ b/usr.bin/diff/diff.c @@ -0,0 +1,402 @@ +/* $OpenBSD: diff.c,v 1.67 2019/06/28 13:35:00 deraadt Exp $ */ + +/* + * Copyright (c) 2003 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/cdefs.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <limits.h> + +#include "diff.h" +#include "xmalloc.h" + +int Nflag, Pflag, rflag, sflag, Tflag; +int diff_format, diff_context, status; +char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; +struct stat stb1, stb2; +struct excludes *excludes_list; +regex_t ignore_re; + +#define OPTIONS "0123456789abC:cdD:efhI:iL:lnNPpqrS:sTtU:uwX:x:" +static struct option longopts[] = { + { "text", no_argument, 0, 'a' }, + { "ignore-space-change", no_argument, 0, 'b' }, + { "context", optional_argument, 0, 'C' }, + { "ifdef", required_argument, 0, 'D' }, + { "minimal", no_argument, 0, 'd' }, + { "ed", no_argument, 0, 'e' }, + { "forward-ed", no_argument, 0, 'f' }, + { "ignore-matching-lines", required_argument, 0, 'I' }, + { "ignore-case", no_argument, 0, 'i' }, + { "label", required_argument, 0, 'L' }, + { "new-file", no_argument, 0, 'N' }, + { "rcs", no_argument, 0, 'n' }, + { "unidirectional-new-file", no_argument, 0, 'P' }, + { "show-c-function", no_argument, 0, 'p' }, + { "brief", no_argument, 0, 'q' }, + { "recursive", no_argument, 0, 'r' }, + { "report-identical-files", no_argument, 0, 's' }, + { "starting-file", required_argument, 0, 'S' }, + { "expand-tabs", no_argument, 0, 't' }, + { "initial-tab", no_argument, 0, 'T' }, + { "unified", optional_argument, 0, 'U' }, + { "ignore-all-space", no_argument, 0, 'w' }, + { "exclude", required_argument, 0, 'x' }, + { "exclude-from", required_argument, 0, 'X' }, + { NULL, 0, 0, '\0'} +}; + +__dead void usage(void); +void push_excludes(char *); +void push_ignore_pats(char *); +void read_excludes_file(char *file); +void set_argstr(char **, char **); + +int +main(int argc, char **argv) +{ + char *ep, **oargv; + long l; + int ch, dflags, lastch, gotstdin, prevoptind, newarg; + + oargv = argv; + gotstdin = 0; + dflags = 0; + lastch = '\0'; + prevoptind = 1; + newarg = 1; + while ((ch = getopt_long(argc, argv, OPTIONS, longopts, NULL)) != -1) { + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg) + usage(); /* disallow -[0-9]+ */ + else if (lastch == 'c' || lastch == 'u') + diff_context = 0; + else if (!isdigit(lastch) || diff_context > INT_MAX / 10) + usage(); + diff_context = (diff_context * 10) + (ch - '0'); + break; + case 'a': + dflags |= D_FORCEASCII; + break; + case 'b': + dflags |= D_FOLDBLANKS; + break; + case 'C': + case 'c': + diff_format = D_CONTEXT; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } else + diff_context = 3; + break; + case 'd': + dflags |= D_MINIMAL; + break; + case 'D': + diff_format = D_IFDEF; + ifdefname = optarg; + break; + case 'e': + diff_format = D_EDIT; + break; + case 'f': + diff_format = D_REVERSE; + break; + case 'h': + /* silently ignore for backwards compatibility */ + break; + case 'I': + push_ignore_pats(optarg); + break; + case 'i': + dflags |= D_IGNORECASE; + break; + case 'L': + if (label[0] == NULL) + label[0] = optarg; + else if (label[1] == NULL) + label[1] = optarg; + else + usage(); + break; + case 'N': + Nflag = 1; + break; + case 'n': + diff_format = D_NREVERSE; + break; + case 'p': + dflags |= D_PROTOTYPE; + break; + case 'P': + Pflag = 1; + break; + case 'r': + rflag = 1; + break; + case 'q': + diff_format = D_BRIEF; + break; + case 'S': + start = optarg; + break; + case 's': + sflag = 1; + break; + case 'T': + Tflag = 1; + break; + case 't': + dflags |= D_EXPANDTABS; + break; + case 'U': + case 'u': + diff_format = D_UNIFIED; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } else + diff_context = 3; + break; + case 'w': + dflags |= D_IGNOREBLANKS; + break; + case 'X': + read_excludes_file(optarg); + break; + case 'x': + push_excludes(optarg); + break; + default: + usage(); + break; + } + lastch = ch; + newarg = optind != prevoptind; + prevoptind = optind; + } + argc -= optind; + argv += optind; + + if (pledge("stdio rpath tmppath", NULL) == -1) + err(2, "pledge"); + + /* + * Do sanity checks, fill in stb1 and stb2 and call the appropriate + * driver routine. Both drivers use the contents of stb1 and stb2. + */ + if (argc != 2) + usage(); + if (ignore_pats != NULL) { + char buf[BUFSIZ]; + int error; + + if ((error = regcomp(&ignore_re, ignore_pats, + REG_NEWLINE | REG_EXTENDED)) != 0) { + regerror(error, &ignore_re, buf, sizeof(buf)); + if (*ignore_pats != '\0') + errx(2, "%s: %s", ignore_pats, buf); + else + errx(2, "%s", buf); + } + } + if (strcmp(argv[0], "-") == 0) { + fstat(STDIN_FILENO, &stb1); + gotstdin = 1; + } else if (stat(argv[0], &stb1) != 0) + err(2, "%s", argv[0]); + if (strcmp(argv[1], "-") == 0) { + fstat(STDIN_FILENO, &stb2); + gotstdin = 1; + } else if (stat(argv[1], &stb2) != 0) + err(2, "%s", argv[1]); + if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) + errx(2, "can't compare - to a directory"); + set_argstr(oargv, argv); + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (diff_format == D_IFDEF) + errx(2, "-D option not supported with directories"); + diffdir(argv[0], argv[1], dflags); + } else { + if (S_ISDIR(stb1.st_mode)) { + argv[0] = splice(argv[0], argv[1]); + if (stat(argv[0], &stb1) == -1) + err(2, "%s", argv[0]); + } + if (S_ISDIR(stb2.st_mode)) { + argv[1] = splice(argv[1], argv[0]); + if (stat(argv[1], &stb2) == -1) + err(2, "%s", argv[1]); + } + print_status(diffreg(argv[0], argv[1], dflags), argv[0], argv[1], + ""); + } + exit(status); +} + +void +set_argstr(char **av, char **ave) +{ + size_t argsize; + char **ap; + + argsize = 4 + *ave - *av + 1; + diffargs = xmalloc(argsize); + strlcpy(diffargs, "diff", argsize); + for (ap = av + 1; ap < ave; ap++) { + if (strcmp(*ap, "--") != 0) { + strlcat(diffargs, " ", argsize); + strlcat(diffargs, *ap, argsize); + } + } +} + +/* + * Read in an excludes file and push each line. + */ +void +read_excludes_file(char *file) +{ + FILE *fp; + char *buf, *pattern; + size_t len; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) + err(2, "%s", file); + while ((buf = fgetln(fp, &len)) != NULL) { + if (buf[len - 1] == '\n') + len--; + pattern = xmalloc(len + 1); + memcpy(pattern, buf, len); + pattern[len] = '\0'; + push_excludes(pattern); + } + if (strcmp(file, "-") != 0) + fclose(fp); +} + +/* + * Push a pattern onto the excludes list. + */ +void +push_excludes(char *pattern) +{ + struct excludes *entry; + + entry = xmalloc(sizeof(*entry)); + entry->pattern = pattern; + entry->next = excludes_list; + excludes_list = entry; +} + +void +push_ignore_pats(char *pattern) +{ + size_t len; + + if (ignore_pats == NULL) + ignore_pats = xstrdup(pattern); + else { + /* old + "|" + new + NUL */ + len = strlen(ignore_pats) + strlen(pattern) + 2; + ignore_pats = xreallocarray(ignore_pats, 1, len); + strlcat(ignore_pats, "|", len); + strlcat(ignore_pats, pattern, len); + } +} + +void +print_only(const char *path, size_t dirlen, const char *entry) +{ + if (dirlen > 1) + dirlen--; + printf("Only in %.*s: %s\n", (int)dirlen, path, entry); +} + +void +print_status(int val, char *path1, char *path2, char *entry) +{ + switch (val) { + case D_BINARY: + printf("Binary files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_DIFFER: + if (diff_format == D_BRIEF) + printf("Files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_SAME: + if (sflag) + printf("Files %s%s and %s%s are identical\n", + path1, entry, path2, entry); + break; + case D_MISMATCH1: + printf("File %s%s is a directory while file %s%s is a regular file\n", + path1, entry, path2, entry); + break; + case D_MISMATCH2: + printf("File %s%s is a regular file while file %s%s is a directory\n", + path1, entry, path2, entry); + break; + case D_SKIPPED1: + printf("File %s%s is not a regular file or directory and was skipped\n", + path1, entry); + break; + case D_SKIPPED2: + printf("File %s%s is not a regular file or directory and was skipped\n", + path2, entry); + break; + } +} + +__dead void +usage(void) +{ + (void)fprintf(stderr, + "usage: diff [-abdipTtw] [-c | -e | -f | -n | -q | -u] [-I pattern] [-L label]\n" + " file1 file2\n" + " diff [-abdipTtw] [-I pattern] [-L label] -C number file1 file2\n" + " diff [-abditw] [-I pattern] -D string file1 file2\n" + " diff [-abdipTtw] [-I pattern] [-L label] -U number file1 file2\n" + " diff [-abdiNPprsTtw] [-c | -e | -f | -n | -q | -u] [-I pattern]\n" + " [-L label] [-S name] [-X file] [-x pattern] dir1 dir2\n"); + + exit(2); +} diff --git a/usr.bin/diff/diff.h b/usr.bin/diff/diff.h new file mode 100644 index 0000000..3a36222 --- /dev/null +++ b/usr.bin/diff/diff.h @@ -0,0 +1,98 @@ + + +/*ROR + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)diff.h 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/types.h> +#include <regex.h> + +/* + * Output format options + */ +#define D_NORMAL 0 /* Normal output */ +#define D_EDIT -1 /* Editor script out */ +#define D_REVERSE 1 /* Reverse editor script */ +#define D_CONTEXT 2 /* Diff with context */ +#define D_UNIFIED 3 /* Unified context diff */ +#define D_IFDEF 4 /* Diff with merged #ifdef's */ +#define D_NREVERSE 5 /* Reverse ed script with numbered + lines and no trailing . */ +#define D_BRIEF 6 /* Say if the files differ */ + +/* + * Output flags + */ +#define D_HEADER 0x001 /* Print a header/footer between files */ +#define D_EMPTY1 0x002 /* Treat first file as empty (/dev/null) */ +#define D_EMPTY2 0x004 /* Treat second file as empty (/dev/null) */ + +/* + * Command line flags + */ +#define D_FORCEASCII 0x008 /* Treat file as ascii regardless of content */ +#define D_FOLDBLANKS 0x010 /* Treat all white space as equal */ +#define D_MINIMAL 0x020 /* Make diff as small as possible */ +#define D_IGNORECASE 0x040 /* Case-insensitive matching */ +#define D_PROTOTYPE 0x080 /* Display C function prototype */ +#define D_EXPANDTABS 0x100 /* Expand tabs to spaces */ +#define D_IGNOREBLANKS 0x200 /* Ignore white space changes */ + +/* + * Status values for print_status() and diffreg() return values + */ +#define D_SAME 0 /* Files are the same */ +#define D_DIFFER 1 /* Files are different */ +#define D_BINARY 2 /* Binary files are different */ +#define D_MISMATCH1 3 /* path1 was a dir, path2 a file */ +#define D_MISMATCH2 4 /* path1 was a file, path2 a dir */ +#define D_SKIPPED1 5 /* path1 was a special file */ +#define D_SKIPPED2 6 /* path2 was a special file */ + +struct excludes { + char *pattern; + struct excludes *next; +}; + +extern int Nflag, Pflag, rflag, sflag, Tflag; +extern int diff_format, diff_context, status; +extern char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; +extern struct stat stb1, stb2; +extern struct excludes *excludes_list; +extern regex_t ignore_re; + +char *splice(char *, char *); +int diffreg(char *, char *, int); +int easprintf(char **, const char *, ...); +void *emalloc(size_t); +void *erealloc(void *, size_t); +void diffdir(char *, char *, int); +void print_only(const char *, size_t, const char *); +void print_status(int, char *, char *, char *); diff --git a/usr.bin/diff/diffdir.c b/usr.bin/diff/diffdir.c new file mode 100644 index 0000000..ad1acdb --- /dev/null +++ b/usr.bin/diff/diffdir.c @@ -0,0 +1,237 @@ +/* $OpenBSD: diffdir.c,v 1.47 2019/01/25 00:19:26 millert Exp $ */ + +/* + * Copyright (c) 2003, 2010 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include <sys/stat.h> + +#include <dirent.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <paths.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <limits.h> + +#include "diff.h" +#include "xmalloc.h" + +static int selectfile(const struct dirent *); +static void diffit(struct dirent *, char *, size_t, char *, size_t, int); + +#define d_status d_type /* we need to store status for -l */ + +/* + * Diff directory traversal. Will be called recursively if -r was specified. + */ +void +diffdir(char *p1, char *p2, int flags) +{ + struct dirent *dent1, **dp1, **edp1, **dirp1 = NULL; + struct dirent *dent2, **dp2, **edp2, **dirp2 = NULL; + size_t dirlen1, dirlen2; + char path1[PATH_MAX], path2[PATH_MAX]; + int pos; + + dirlen1 = strlcpy(path1, *p1 ? p1 : ".", sizeof(path1)); + if (dirlen1 >= sizeof(path1) - 1) { + warnc(ENAMETOOLONG, "%s", p1); + status |= 2; + return; + } + if (path1[dirlen1 - 1] != '/') { + path1[dirlen1++] = '/'; + path1[dirlen1] = '\0'; + } + dirlen2 = strlcpy(path2, *p2 ? p2 : ".", sizeof(path2)); + if (dirlen2 >= sizeof(path2) - 1) { + warnc(ENAMETOOLONG, "%s", p2); + status |= 2; + return; + } + if (path2[dirlen2 - 1] != '/') { + path2[dirlen2++] = '/'; + path2[dirlen2] = '\0'; + } + + /* + * Get a list of entries in each directory, skipping "excluded" files + * and sorting alphabetically. + */ + pos = scandir(path1, &dirp1, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && (Nflag || Pflag)) { + pos = 0; + } else { + warn("%s", path1); + goto closem; + } + } + dp1 = dirp1; + edp1 = dirp1 + pos; + + pos = scandir(path2, &dirp2, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && Nflag) { + pos = 0; + } else { + warn("%s", path2); + goto closem; + } + } + dp2 = dirp2; + edp2 = dirp2 + pos; + + /* + * If we were given a starting point, find it. + */ + if (start != NULL) { + while (dp1 != edp1 && strcmp((*dp1)->d_name, start) < 0) + dp1++; + while (dp2 != edp2 && strcmp((*dp2)->d_name, start) < 0) + dp2++; + } + + /* + * Iterate through the two directory lists, diffing as we go. + */ + while (dp1 != edp1 || dp2 != edp2) { + dent1 = dp1 != edp1 ? *dp1 : NULL; + dent2 = dp2 != edp2 ? *dp2 : NULL; + + pos = dent1 == NULL ? 1 : dent2 == NULL ? -1 : + strcmp(dent1->d_name, dent2->d_name); + if (pos == 0) { + /* file exists in both dirs, diff it */ + diffit(dent1, path1, dirlen1, path2, dirlen2, flags); + dp1++; + dp2++; + } else if (pos < 0) { + /* file only in first dir, only diff if -N */ + if (Nflag) { + diffit(dent1, path1, dirlen1, path2, dirlen2, + flags); + } else { + print_only(path1, dirlen1, dent1->d_name); + status |= 1; + } + dp1++; + } else { + /* file only in second dir, only diff if -N or -P */ + if (Nflag || Pflag) { + diffit(dent2, path1, dirlen1, path2, dirlen2, + flags); + } else { + print_only(path2, dirlen2, dent2->d_name); + status |= 1; + } + dp2++; + } + } + +closem: + if (dirp1 != NULL) { + for (dp1 = dirp1; dp1 < edp1; dp1++) + free(*dp1); + free(dirp1); + } + if (dirp2 != NULL) { + for (dp2 = dirp2; dp2 < edp2; dp2++) + free(*dp2); + free(dirp2); + } +} + +/* + * Do the actual diff by calling either diffreg() or diffdir(). + */ +static void +diffit(struct dirent *dp, char *path1, size_t plen1, char *path2, size_t plen2, + int flags) +{ + flags |= D_HEADER; + strlcpy(path1 + plen1, dp->d_name, PATH_MAX - plen1); + if (stat(path1, &stb1) != 0) { + if (!(Nflag || Pflag) || errno != ENOENT) { + warn("%s", path1); + return; + } + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + } + + strlcpy(path2 + plen2, dp->d_name, PATH_MAX - plen2); + if (stat(path2, &stb2) != 0) { + if (!Nflag || errno != ENOENT) { + warn("%s", path2); + return; + } + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + } + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (rflag) + diffdir(path1, path2, flags); + else + printf("Common subdirectories: %s and %s\n", + path1, path2); + return; + } + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + dp->d_status = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + dp->d_status = D_SKIPPED2; + else + dp->d_status = diffreg(path1, path2, flags); + print_status(dp->d_status, path1, path2, ""); +} + +/* + * Returns 1 if the directory entry should be included in the + * diff, else 0. Checks the excludes list. + */ +static int +selectfile(const struct dirent *dp) +{ + struct excludes *excl; + + if (dp->d_fileno == 0) + return (0); + + /* always skip "." and ".." */ + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + return (0); + + /* check excludes list */ + for (excl = excludes_list; excl != NULL; excl = excl->next) + if (fnmatch(excl->pattern, dp->d_name, FNM_PATHNAME) == 0) + return (0); + + return (1); +} diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c new file mode 100644 index 0000000..fc0029d --- /dev/null +++ b/usr.bin/diff/diffreg.c @@ -0,0 +1,1485 @@ +/* $OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $ */ + +/* + * Copyright (C) Caldera International Inc. 2001-2002. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code and documentation must retain the above + * copyright notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * 4. Neither the name of Caldera International, Inc. nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, + * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/stat.h> +#include <sys/wait.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <paths.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <limits.h> + +#include "diff.h" +#include "xmalloc.h" + +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) +#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) + +/* + * diff - compare two files. + */ + +/* + * Uses an algorithm due to Harold Stone, which finds + * a pair of longest identical subsequences in the two + * files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * check'ed against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ + +struct cand { + int x; + int y; + int pred; +}; + +struct line { + int serial; + int value; +} *file[2]; + +/* + * The following struct is used to record change information when + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + +#define diff_output printf +static FILE *opentemp(const char *); +static void output(char *, FILE *, char *, FILE *, int); +static void check(FILE *, FILE *, int); +static void range(int, int, char *); +static void uni_range(int, int); +static void dump_context_vec(FILE *, FILE *, int); +static void dump_unified_vec(FILE *, FILE *, int); +static void prepare(int, FILE *, off_t, int); +static void prune(void); +static void equiv(struct line *, int, struct line *, int, int *); +static void unravel(int); +static void unsort(struct line *, int, int *); +static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); +static void sort(struct line *, int); +static void print_header(const char *, const char *); +static int ignoreline(char *); +static int asciifile(FILE *); +static int fetch(long *, int, int, FILE *, int, int, int); +static int newcand(int, int, int); +static int search(int *, int, int); +static int skipline(FILE *); +static int isqrt(int); +static int stone(int *, int, int *, int *, int); +static int readhash(FILE *, int); +static int files_differ(FILE *, FILE *, int); +static char *match_function(const long *, int, FILE *); +static char *preadline(int, size_t, off_t); + +static int *J; /* will be overlaid on class */ +static int *class; /* will be overlaid on file[0] */ +static int *klist; /* will be overlaid on file[0] after class */ +static int *member; /* will be overlaid on file[1] */ +static int clen; +static int inifdef; /* whether or not we are in a #ifdef block */ +static int len[2]; +static int pref, suff; /* length of prefix and suffix */ +static int slen[2]; +static int anychange; +static long *ixnew; /* will be overlaid on file[1] */ +static long *ixold; /* will be overlaid on klist */ +static struct cand *clist; /* merely a free storage pot for candidates */ +static int clistlen; /* the length of clist */ +static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ +static u_char *chrtran; /* translation table for case-folding */ +static struct context_vec *context_vec_start; +static struct context_vec *context_vec_end; +static struct context_vec *context_vec_ptr; + +#define FUNCTION_CONTEXT_SIZE 55 +static char lastbuf[FUNCTION_CONTEXT_SIZE]; +static int lastline; +static int lastmatchline; + + +/* + * chrtran points to one of 2 translation tables: cup2low if folding upper to + * lower case clow2low if not folding case + */ +u_char clow2low[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, + 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, + 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, + 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, + 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, + 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, + 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, + 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, + 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, + 0xfd, 0xfe, 0xff +}; + +u_char cup2low[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, + 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, + 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, + 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, + 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, + 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, + 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, + 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, + 0xfd, 0xfe, 0xff +}; + +int +diffreg(char *file1, char *file2, int flags) +{ + FILE *f1, *f2; + int i, rval; + + f1 = f2 = NULL; + rval = D_SAME; + anychange = 0; + lastline = 0; + lastmatchline = 0; + context_vec_ptr = context_vec_start - 1; + if (flags & D_IGNORECASE) + chrtran = cup2low; + else + chrtran = clow2low; + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); + if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) + goto closem; + + if (flags & D_EMPTY1) + f1 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb1.st_mode)) { + if ((f1 = opentemp(file1)) == NULL || + fstat(fileno(f1), &stb1) == -1) { + warn("%s", file1); + status |= 2; + goto closem; + } + } else if (strcmp(file1, "-") == 0) + f1 = stdin; + else + f1 = fopen(file1, "r"); + } + if (f1 == NULL) { + warn("%s", file1); + status |= 2; + goto closem; + } + + if (flags & D_EMPTY2) + f2 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb2.st_mode)) { + if ((f2 = opentemp(file2)) == NULL || + fstat(fileno(f2), &stb2) == -1) { + warn("%s", file2); + status |= 2; + goto closem; + } + } else if (strcmp(file2, "-") == 0) + f2 = stdin; + else + f2 = fopen(file2, "r"); + } + if (f2 == NULL) { + warn("%s", file2); + status |= 2; + goto closem; + } + + switch (files_differ(f1, f2, flags)) { + case 0: + goto closem; + case 1: + break; + default: + /* error */ + status |= 2; + goto closem; + } + + if ((flags & D_FORCEASCII) == 0 && + (!asciifile(f1) || !asciifile(f2))) { + rval = D_BINARY; + status |= 1; + goto closem; + } + prepare(0, f1, stb1.st_size, flags); + prepare(1, f2, stb2.st_size, flags); + + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = xreallocarray(member, slen[1] + 2, sizeof(*member)); + + class = (int *)file[0]; + unsort(sfile[0], slen[0], class); + class = xreallocarray(class, slen[0] + 2, sizeof(*class)); + + klist = xcalloc(slen[0] + 2, sizeof(*klist)); + clen = 0; + clistlen = 100; + clist = xcalloc(clistlen, sizeof(*clist)); + i = stone(class, slen[0], member, klist, flags); + free(member); + free(class); + + J = xreallocarray(J, len[0] + 2, sizeof(*J)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); + ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); + check(f1, f2, flags); + output(file1, f1, file2, f2, flags); +closem: + if (anychange) { + status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + if (f1 != NULL) + fclose(f1); + if (f2 != NULL) + fclose(f2); + + return (rval); +} + +/* + * Check to see if the given files differ. + * Returns 0 if they are the same, 1 if different, and -1 on error. + * XXX - could use code from cmp(1) [faster] + */ +static int +files_differ(FILE *f1, FILE *f2, int flags) +{ + char buf1[BUFSIZ], buf2[BUFSIZ]; + size_t i, j; + + if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || + (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) + return (1); + for (;;) { + i = fread(buf1, 1, sizeof(buf1), f1); + j = fread(buf2, 1, sizeof(buf2), f2); + if ((!i && ferror(f1)) || (!j && ferror(f2))) + return (-1); + if (i != j) + return (1); + if (i == 0) + return (0); + if (memcmp(buf1, buf2, i) != 0) + return (1); + } +} + +static FILE * +opentemp(const char *file) +{ + char buf[BUFSIZ], tempfile[PATH_MAX]; + ssize_t nread; + int ifd, ofd; + + if (strcmp(file, "-") == 0) + ifd = STDIN_FILENO; + else if ((ifd = open(file, O_RDONLY, 0644)) == -1) + return (NULL); + + (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); + + if ((ofd = mkstemp(tempfile)) == -1) { + close(ifd); + return (NULL); + } + unlink(tempfile); + while ((nread = read(ifd, buf, BUFSIZ)) > 0) { + if (write(ofd, buf, nread) != nread) { + close(ifd); + close(ofd); + return (NULL); + } + } + close(ifd); + lseek(ofd, (off_t)0, SEEK_SET); + return (fdopen(ofd, "r")); +} + +char * +splice(char *dir, char *file) +{ + char *tail, *buf; + size_t dirlen; + + dirlen = strlen(dir); + while (dirlen != 0 && dir[dirlen - 1] == '/') + dirlen--; + if ((tail = strrchr(file, '/')) == NULL) + tail = file; + else + tail++; + xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); + return (buf); +} + +static void +prepare(int i, FILE *fd, off_t filesize, int flags) +{ + struct line *p; + int j, h; + size_t sz; + + rewind(fd); + + sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; + if (sz < 100) + sz = 100; + + p = xcalloc(sz + 3, sizeof(*p)); + for (j = 0; (h = readhash(fd, flags));) { + if (j == sz) { + sz = sz * 3 / 2; + p = xreallocarray(p, sz + 3, sizeof(*p)); + } + p[++j].value = h; + } + len[i] = j; + file[i] = p; +} + +static void +prune(void) +{ + int i, j; + + for (pref = 0; pref < len[0] && pref < len[1] && + file[0][pref + 1].value == file[1][pref + 1].value; + pref++) + ; + for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && + file[0][len[0] - suff].value == file[1][len[1] - suff].value; + suff++) + ; + for (j = 0; j < 2; j++) { + sfile[j] = file[j] + pref; + slen[j] = len[j] - pref - suff; + for (i = 0; i <= slen[j]; i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while (i <= n && j <= m) { + if (a[i].value < b[j].value) + a[i++].value = 0; + else if (a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while (i <= n) + a[i++].value = 0; + b[m + 1].value = 0; + j = 0; + while (++j <= m) { + c[j] = -b[j].serial; + while (b[j + 1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +/* Code taken from ping.c */ +static int +isqrt(int n) +{ + int y, x = 1; + + if (n == 0) + return (0); + + do { /* newton was a stinker */ + y = x; + x = n / x; + x += y; + x /= 2; + } while ((x - y) > 1 || (x - y) < -1); + + return (x); +} + +static int +stone(int *a, int n, int *b, int *c, int flags) +{ + int i, k, y, j, l; + int oldc, tc, oldl, sq; + u_int numtries, bound; + + if (flags & D_MINIMAL) + bound = UINT_MAX; + else { + sq = isqrt(n); + bound = MAXIMUM(256, sq); + } + + k = 0; + c[0] = newcand(0, 0, 0); + for (i = 1; i <= n; i++) { + j = a[i]; + if (j == 0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + numtries = 0; + do { + if (y <= clist[oldc].y) + continue; + l = search(c, k, y); + if (l != oldl + 1) + oldc = c[l - 1]; + if (l <= k) { + if (clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i, y, oldc); + oldc = tc; + oldl = l; + numtries++; + } else { + c[l] = newcand(i, y, oldc); + k++; + break; + } + } while ((y = b[++j]) > 0 && numtries < bound); + } + return (k); +} + +static int +newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xreallocarray(clist, clistlen, sizeof(*clist)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return (clen++); +} + +static int +search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return (k + 1); + i = 0; + j = k + 1; + for (;;) { + l = (i + j) / 2; + if (l <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return (l); + } + return (l + 1); +} + +static void +unravel(int p) +{ + struct cand *q; + int i; + + for (i = 0; i <= len[0]; i++) + J[i] = i <= pref ? i : + i > len[0] - suff ? i + len[1] - len[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; +} + +/* + * Check does double duty: + * 1. ferret out any fortuitous correspondences due + * to confounding by hashing (which result in "jackpot") + * 2. collect random access indexes to the two files + */ +static void +check(FILE *f1, FILE *f2, int flags) +{ + int i, j, jackpot, c, d; + long ctold, ctnew; + + rewind(f1); + rewind(f2); + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for (i = 1; i <= len[0]; i++) { + if (J[i] == 0) { + ixold[i] = ctold += skipline(f1); + continue; + } + while (j < J[i]) { + ixnew[j] = ctnew += skipline(f2); + j++; + } + if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) { + for (;;) { + c = getc(f1); + d = getc(f2); + /* + * GNU diff ignores a missing newline + * in one file for -b or -w. + */ + if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { + if (c == EOF && d == '\n') { + ctnew++; + break; + } else if (c == '\n' && d == EOF) { + ctold++; + break; + } + } + ctold++; + ctnew++; + if ((flags & D_FOLDBLANKS) && isspace(c) && + isspace(d)) { + do { + if (c == '\n') + break; + ctold++; + } while (isspace(c = getc(f1))); + do { + if (d == '\n') + break; + ctnew++; + } while (isspace(d = getc(f2))); + } else if ((flags & D_IGNOREBLANKS)) { + while (isspace(c) && c != '\n') { + c = getc(f1); + ctold++; + } + while (isspace(d) && d != '\n') { + d = getc(f2); + ctnew++; + } + } + if (chrtran[c] != chrtran[d]) { + jackpot++; + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } else { + for (;;) { + ctold++; + ctnew++; + if ((c = getc(f1)) != (d = getc(f2))) { + /* jackpot++; */ + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for (; j <= len[1]; j++) + ixnew[j] = ctnew += skipline(f2); + /* + * if (jackpot) + * fprintf(stderr, "jackpot\n"); + */ +} + +/* shellsort CACM #201 */ +static void +sort(struct line *a, int n) +{ + struct line *ai, *aim, w; + int j, m = 0, k; + + if (n == 0) + return; + for (j = 1; j <= n; j *= 2) + m = 2 * j - 1; + for (m /= 2; m != 0; m /= 2) { + k = n - m; + for (j = 1; j <= k; j++) { + for (ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if (aim < ai) + break; /* wraparound */ + if (aim->value > ai[0].value || + (aim->value == ai[0].value && + aim->serial > ai[0].serial)) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + +static void +unsort(struct line *f, int l, int *b) +{ + int *a, i; + + a = xcalloc(l + 1, sizeof(*a)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); +} + +static int +skipline(FILE *f) +{ + int i, c; + + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return (i); +} + +static void +output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) +{ + int m, i0, i1, j0, j1; + + rewind(f1); + rewind(f2); + m = len[0]; + J[0] = 0; + J[m + 1] = len[1] + 1; + if (diff_format != D_EDIT) { + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) + i0++; + j0 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j1 = J[i1 + 1] - 1; + J[i1] = j1; + change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); + } + } else { + for (i0 = m; i0 >= 1; i0 = i1 - 1) { + while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) + i0--; + j0 = J[i0 + 1] - 1; + i1 = i0 + 1; + while (i1 > 1 && J[i1 - 1] == 0) + i1--; + j1 = J[i1 - 1] + 1; + J[i1] = j1; + change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); + } + } + if (m == 0) + change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); + if (diff_format == D_IFDEF) { + for (;;) { +#define c i0 + if ((c = getc(f1)) == EOF) + return; + diff_output("%c", c); + } +#undef c + } + if (anychange != 0) { + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, flags); + else if (diff_format == D_UNIFIED) + dump_unified_vec(f1, f2, flags); + } +} + +static void +range(int a, int b, char *separator) +{ + diff_output("%d", a > b ? b : a); + if (a < b) + diff_output("%s%d", separator, b); +} + +static void +uni_range(int a, int b) +{ + if (a < b) + diff_output("%d,%d", a, b - a + 1); + else if (a == b) + diff_output("%d", b); + else + diff_output("%d,0", b); +} + +static char * +preadline(int fd, size_t rlen, off_t off) +{ + char *line; + ssize_t nr; + + line = xmalloc(rlen + 1); + if ((nr = pread(fd, line, rlen, off)) == -1) + err(2, "preadline"); + if (nr > 0 && line[nr-1] == '\n') + nr--; + line[nr] = '\0'; + return (line); +} + +static int +ignoreline(char *line) +{ + int ret; + + ret = regexec(&ignore_re, line, 0, NULL, 0); + free(line); + return (ret == 0); /* if it matched, it should be ignored. */ +} + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater then b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void +change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, + int *pflags) +{ + static size_t max_context = 64; + int i; + +restart: + if (diff_format != D_IFDEF && a > b && c > d) + return; + if (ignore_pats != NULL) { + char *line; + /* + * All lines in the change, insert, or delete must + * match an ignore pattern for the change to be + * ignored. + */ + if (a <= b) { /* Changes and deletes. */ + for (i = a; i <= b; i++) { + line = preadline(fileno(f1), + ixold[i] - ixold[i - 1], ixold[i - 1]); + if (!ignoreline(line)) + goto proceed; + } + } + if (a > b || c <= d) { /* Changes and inserts. */ + for (i = c; i <= d; i++) { + line = preadline(fileno(f2), + ixnew[i] - ixnew[i - 1], ixnew[i - 1]); + if (!ignoreline(line)) + goto proceed; + } + } + return; + } +proceed: + if (*pflags & D_HEADER) { + diff_output("%s %s %s\n", diffargs, file1, file2); + *pflags &= ~D_HEADER; + } + if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { + /* + * Allocate change records as needed. + */ + if (context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = context_vec_ptr - context_vec_start; + max_context <<= 1; + context_vec_start = xreallocarray(context_vec_start, + max_context, sizeof(*context_vec_start)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + anychange = 1; + } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && + c > context_vec_ptr->d + (2 * diff_context) + 1) { + /* + * If this change is more than 'diff_context' lines from the + * previous change, dump the record and reset it. + */ + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, *pflags); + else + dump_unified_vec(f1, f2, *pflags); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + } + if (anychange == 0) + anychange = 1; + switch (diff_format) { + case D_BRIEF: + return; + case D_NORMAL: + case D_EDIT: + range(a, b, ","); + diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + if (diff_format == D_NORMAL) + range(c, d, ","); + diff_output("\n"); + break; + case D_REVERSE: + diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + range(a, b, " "); + diff_output("\n"); + break; + case D_NREVERSE: + if (a > b) + diff_output("a%d %d\n", b, d - c + 1); + else { + diff_output("d%d %d\n", a, b - a + 1); + if (!(c > d)) + /* add changed lines */ + diff_output("a%d %d\n", b, d - c + 1); + } + break; + } + if (diff_format == D_NORMAL || diff_format == D_IFDEF) { + fetch(ixold, a, b, f1, '<', 1, *pflags); + if (a <= b && c <= d && diff_format == D_NORMAL) + diff_output("---\n"); + } + i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); + if (i != 0 && diff_format == D_EDIT) { + /* + * A non-zero return value for D_EDIT indicates that the + * last line printed was a bare dot (".") that has been + * escaped as ".." to prevent ed(1) from misinterpreting + * it. We have to add a substitute command to change this + * back and restart where we left off. + */ + diff_output(".\n"); + diff_output("%ds/.//\n", a + i - 1); + b = a + i - 1; + a = b + 1; + c += i; + goto restart; + } + if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) + diff_output(".\n"); + if (inifdef) { + diff_output("#endif /* %s */\n", ifdefname); + inifdef = 0; + } +} + +static int +fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) +{ + int i, j, c, lastc, col, nc; + + /* + * When doing #ifdef's, copy down to current line + * if this is the first file, so that stuff makes it to output. + */ + if (diff_format == D_IFDEF && oldfile) { + long curpos = ftell(lb); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = f[a > b ? b : a - 1] - curpos; + for (i = 0; i < nc; i++) + diff_output("%c", getc(lb)); + } + if (a > b) + return (0); + if (diff_format == D_IFDEF) { + if (inifdef) { + diff_output("#else /* %s%s */\n", + oldfile == 1 ? "!" : "", ifdefname); + } else { + if (oldfile) + diff_output("#ifndef %s\n", ifdefname); + else + diff_output("#ifdef %s\n", ifdefname); + } + inifdef = 1 + oldfile; + } + for (i = a; i <= b; i++) { + fseek(lb, f[i - 1], SEEK_SET); + nc = f[i] - f[i - 1]; + if (diff_format != D_IFDEF && ch != '\0') { + diff_output("%c", ch); + if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT + || diff_format == D_UNIFIED)) + diff_output("\t"); + else if (diff_format != D_UNIFIED) + diff_output(" "); + } + col = 0; + for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { + if ((c = getc(lb)) == EOF) { + if (diff_format == D_EDIT || diff_format == D_REVERSE || + diff_format == D_NREVERSE) + warnx("No newline at end of file"); + else + diff_output("\n\\ No newline at end of " + "file\n"); + return (0); + } + if (c == '\t' && (flags & D_EXPANDTABS)) { + do { + diff_output(" "); + } while (++col & 7); + } else { + if (diff_format == D_EDIT && j == 1 && c == '\n' + && lastc == '.') { + /* + * Don't print a bare "." line + * since that will confuse ed(1). + * Print ".." instead and return, + * giving the caller an offset + * from which to restart. + */ + diff_output(".\n"); + return (i - a + 1); + } + diff_output("%c", c); + col++; + } + } + } + return (0); +} + +/* + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. + */ +static int +readhash(FILE *f, int flags) +{ + int i, t, space; + int sum; + + sum = 1; + space = 0; + if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { + if (flags & D_IGNORECASE) + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + chrtran[t]; + } + else + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + t; + } + } else { + for (i = 0;;) { + switch (t = getc(f)) { + case '\t': + case '\r': + case '\v': + case '\f': + case ' ': + space++; + continue; + default: + if (space && (flags & D_IGNOREBLANKS) == 0) { + i++; + space = 0; + } + sum = sum * 127 + chrtran[t]; + i++; + continue; + case EOF: + if (i == 0) + return (0); + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } + } + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); +} + +static int +asciifile(FILE *f) +{ + unsigned char buf[BUFSIZ]; + size_t cnt; + + if (f == NULL) + return (1); + + rewind(f); + cnt = fread(buf, 1, sizeof(buf), f); + return (memchr(buf, '\0', cnt) == NULL); +} + +#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) + +static char * +match_function(const long *f, int pos, FILE *fp) +{ + unsigned char buf[FUNCTION_CONTEXT_SIZE]; + size_t nc; + int last = lastline; + char *state = NULL; + + lastline = pos; + while (pos > last) { + fseek(fp, f[pos - 1], SEEK_SET); + nc = f[pos] - f[pos - 1]; + if (nc >= sizeof(buf)) + nc = sizeof(buf) - 1; + nc = fread(buf, 1, nc, fp); + if (nc > 0) { + buf[nc] = '\0'; + buf[strcspn(buf, "\n")] = '\0'; + if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { + if (begins_with(buf, "private:")) { + if (!state) + state = " (private)"; + } else if (begins_with(buf, "protected:")) { + if (!state) + state = " (protected)"; + } else if (begins_with(buf, "public:")) { + if (!state) + state = " (public)"; + } else { + strlcpy(lastbuf, buf, sizeof lastbuf); + if (state) + strlcat(lastbuf, state, + sizeof lastbuf); + lastmatchline = pos; + return lastbuf; + } + } + } + pos--; + } + return lastmatchline > 0 ? lastbuf : NULL; +} + +/* dump accumulated "context" diff changes */ +static void +dump_context_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd, do_output; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAXIMUM(1, cvp->a - diff_context); + upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); + lowc = MAXIMUM(1, cvp->c - diff_context); + upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); + + diff_output("***************"); + if ((flags & D_PROTOTYPE)) { + f = match_function(ixold, lowa-1, f1); + if (f != NULL) + diff_output(" %s", f); + } + diff_output("\n*** "); + range(lowa, upb, ","); + diff_output(" ****\n"); + + /* + * Output changes to the "old" file. The first loop suppresses + * output if there were no changes to the "old" file (we'll see + * the "old" lines as context in the "new" list). + */ + do_output = 0; + for (; cvp <= context_vec_ptr; cvp++) + if (cvp->a <= cvp->b) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'a') + fetch(ixold, lowa, b, f1, ' ', 0, flags); + else { + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, + ch == 'c' ? '!' : '-', 0, flags); + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b + 1, upb, f1, ' ', 0, flags); + } + /* output changes to the "new" file */ + diff_output("--- "); + range(lowc, upd, ","); + diff_output(" ----\n"); + + do_output = 0; + for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) + if (cvp->c <= cvp->d) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'd') + fetch(ixnew, lowc, d, f2, ' ', 0, flags); + else { + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, + ch == 'c' ? '!' : '+', 0, flags); + } + lowc = d + 1; + cvp++; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + } + context_vec_ptr = context_vec_start - 1; +} + +/* dump accumulated "unified" diff changes */ +static void +dump_unified_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAXIMUM(1, cvp->a - diff_context); + upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); + lowc = MAXIMUM(1, cvp->c - diff_context); + upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); + + diff_output("@@ -"); + uni_range(lowa, upb); + diff_output(" +"); + uni_range(lowc, upd); + diff_output(" @@"); + if ((flags & D_PROTOTYPE)) { + f = match_function(ixold, lowa-1, f1); + if (f != NULL) + diff_output(" %s", f); + } + diff_output("\n"); + + /* + * Output changes in "unified" diff format--the old and new lines + * are printed together. + */ + for (; cvp <= context_vec_ptr; cvp++) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + /* + * c: both new and old changes + * d: only changes in the old file + * a: only changes in the new file + */ + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + switch (ch) { + case 'c': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + case 'd': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + break; + case 'a': + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + } + lowa = b + 1; + lowc = d + 1; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + + context_vec_ptr = context_vec_start - 1; +} + +static void +print_header(const char *file1, const char *file2) +{ + if (label[0] != NULL) + diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", + label[0]); + else + diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---", + file1, ctime(&stb1.st_mtime)); + if (label[1] != NULL) + diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", + label[1]); + else + diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++", + file2, ctime(&stb2.st_mtime)); +} diff --git a/usr.bin/diff/xmalloc.c b/usr.bin/diff/xmalloc.c new file mode 100644 index 0000000..ce0f454 --- /dev/null +++ b/usr.bin/diff/xmalloc.c @@ -0,0 +1,85 @@ +/* $OpenBSD: xmalloc.c,v 1.10 2019/06/28 05:44:09 deraadt Exp $ */ +/* + * Author: Tatu Ylonen <ylo@cs.hut.fi> + * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland + * All rights reserved + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#include <err.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xmalloc.h" + +void * +xmalloc(size_t size) +{ + void *ptr; + + if (size == 0) + errx(2, "xmalloc: zero size"); + ptr = malloc(size); + if (ptr == NULL) + err(2, "xmalloc: allocating %zu bytes", size); + return ptr; +} + +void * +xcalloc(size_t nmemb, size_t size) +{ + void *ptr; + + ptr = calloc(nmemb, size); + if (ptr == NULL) + err(2, "xcalloc: allocating %zu * %zu bytes", nmemb, size); + return ptr; +} + +void * +xreallocarray(void *ptr, size_t nmemb, size_t size) +{ + void *new_ptr; + + new_ptr = reallocarray(ptr, nmemb, size); + if (new_ptr == NULL) + err(2, "xreallocarray: allocating %zu * %zu bytes", + nmemb, size); + return new_ptr; +} + +char * +xstrdup(const char *str) +{ + char *cp; + + if ((cp = strdup(str)) == NULL) + err(2, "xstrdup"); + return cp; +} + +int +xasprintf(char **ret, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vasprintf(ret, fmt, ap); + va_end(ap); + + if (i == -1) + err(2, "xasprintf"); + + return i; +} diff --git a/usr.bin/diff/xmalloc.h b/usr.bin/diff/xmalloc.h new file mode 100644 index 0000000..2139671 --- /dev/null +++ b/usr.bin/diff/xmalloc.h @@ -0,0 +1,30 @@ +/* $OpenBSD: xmalloc.h,v 1.4 2015/11/12 16:30:30 mmcc Exp $ */ + +/* + * Author: Tatu Ylonen <ylo@cs.hut.fi> + * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland + * All rights reserved + * Created: Mon Mar 20 22:09:17 1995 ylo + * + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#ifndef XMALLOC_H +#define XMALLOC_H + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xreallocarray(void *, size_t, size_t); +char *xstrdup(const char *); +int xasprintf(char **, const char *, ...) + __attribute__((__format__ (printf, 2, 3))) + __attribute__((__nonnull__ (2))); + +#endif /* XMALLOC_H */ diff --git a/usr.bin/doas/CVS/Entries b/usr.bin/doas/CVS/Entries new file mode 100644 index 0000000..dbdad45 --- /dev/null +++ b/usr.bin/doas/CVS/Entries @@ -0,0 +1,8 @@ +/Makefile/1.3/Mon Jul 3 22:21:47 2017// +/doas.1/1.23/Thu Jul 4 19:04:17 2019// +/doas.c/1.82/Fri Oct 18 17:15:45 2019// +/doas.conf.5/1.42/Mon Feb 10 13:18:20 2020// +/doas.h/1.15/Mon Jun 17 19:51:23 2019// +/env.c/1.10/Sun Jul 7 19:21:28 2019// +/parse.y/1.27/Wed Jul 11 07:39:22 2018// +D diff --git a/usr.bin/doas/CVS/Repository b/usr.bin/doas/CVS/Repository new file mode 100644 index 0000000..84a451b --- /dev/null +++ b/usr.bin/doas/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/doas diff --git a/usr.bin/doas/CVS/Root b/usr.bin/doas/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/doas/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/doas/Makefile b/usr.bin/doas/Makefile new file mode 100644 index 0000000..f2e5529 --- /dev/null +++ b/usr.bin/doas/Makefile @@ -0,0 +1,15 @@ +# $OpenBSD: Makefile,v 1.3 2017/07/03 22:21:47 espie Exp $ + +SRCS= parse.y doas.c env.c + +PROG= doas +MAN= doas.1 doas.conf.5 + +BINOWN= root +BINMODE=4555 + +CFLAGS+= -I${.CURDIR} +COPTS+= -Wall +YFLAGS= + +.include <bsd.prog.mk> diff --git a/usr.bin/doas/doas.1 b/usr.bin/doas/doas.1 new file mode 100644 index 0000000..c7196e3 --- /dev/null +++ b/usr.bin/doas/doas.1 @@ -0,0 +1,130 @@ +.\" $OpenBSD: doas.1,v 1.23 2019/07/04 19:04:17 tedu Exp $ +.\" +.\"Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> +.\" +.\"Permission to use, copy, modify, and distribute this software for any +.\"purpose with or without fee is hereby granted, provided that the above +.\"copyright notice and this permission notice appear in all copies. +.\" +.\"THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\"WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\"MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\"ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\"WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\"ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\"OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.Dd $Mdocdate: July 4 2019 $ +.Dt DOAS 1 +.Os +.Sh NAME +.Nm doas +.Nd execute commands as another user +.Sh SYNOPSIS +.Nm doas +.Op Fl Lns +.Op Fl C Ar config +.Op Fl u Ar user +.Ar command +.Op Ar args +.Sh DESCRIPTION +The +.Nm +utility executes the given command as another user. +The +.Ar command +argument is mandatory unless +.Fl C , +.Fl L , +or +.Fl s +is specified. +.Pp +The user will be required to authenticate by entering their password, +unless configured otherwise. +.Pp +By default, a new environment is created. +The variables +.Ev HOME , +.Ev LOGNAME , +.Ev PATH , +.Ev SHELL , +and +.Ev USER +and the +.Xr umask 2 +are set to values appropriate for the target user. +.Ev DOAS_USER +is set to the name of the user executing +.Nm . +The variables +.Ev DISPLAY +and +.Ev TERM +are inherited from the current environment. +This behavior may be modified by the config file. +The working directory is not changed. +.Pp +The options are as follows: +.Bl -tag -width tenletters +.It Fl C Ar config +Parse and check the configuration file +.Ar config , +then exit. +If +.Ar command +is supplied, +.Nm +will also perform command matching. +In the latter case +either +.Sq permit , +.Sq permit nopass +or +.Sq deny +will be printed on standard output, depending on command +matching results. +No command is executed. +.It Fl L +Clear any persisted authorizations from previous invocations, +then immediately exit. +No command is executed. +.It Fl n +Non interactive mode, fail if +.Nm +would prompt for password. +.It Fl s +Execute the shell from +.Ev SHELL +or +.Pa /etc/passwd . +.It Fl u Ar user +Execute the command as +.Ar user . +The default is root. +.El +.Sh EXIT STATUS +.Ex -std doas +It may fail for one of the following reasons: +.Pp +.Bl -bullet -compact +.It +The config file +.Pa /etc/doas.conf +could not be parsed. +.It +The user attempted to run a command which is not permitted. +.It +The password was incorrect. +.It +The specified command was not found or is not executable. +.El +.Sh SEE ALSO +.Xr su 1 , +.Xr doas.conf 5 +.Sh HISTORY +The +.Nm +command first appeared in +.Ox 5.8 . +.Sh AUTHORS +.An Ted Unangst Aq Mt tedu@openbsd.org diff --git a/usr.bin/doas/doas.c b/usr.bin/doas/doas.c new file mode 100644 index 0000000..15220e4 --- /dev/null +++ b/usr.bin/doas/doas.c @@ -0,0 +1,482 @@ +/* $OpenBSD: doas.c,v 1.82 2019/10/18 17:15:45 tedu Exp $ */ +/* + * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> + +#include <limits.h> +#include <readpassphrase.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <err.h> +#include <unistd.h> +#include <pwd.h> +#include <grp.h> +#include <syslog.h> +#include <errno.h> +#include <fcntl.h> +#include <shadow.h> + +#include "doas.h" + +#ifndef UID_MAX +#define UID_MAX 65535 +#endif + +#ifndef GID_MAX +#define GID_MAX 65535 +#endif + +static void __dead +usage(void) +{ + fprintf(stderr, "usage: doas [-Lns] [-C config] [-u user]" + " command [args]\n"); + exit(1); +} + +static int +parseuid(const char *s, uid_t *uid) +{ + struct passwd *pw; + const char *errstr; + + if ((pw = getpwnam(s)) != NULL) { + *uid = pw->pw_uid; + if (*uid == UID_MAX) + return -1; + return 0; + } + *uid = strtonum(s, 0, UID_MAX - 1, &errstr); + if (errstr) + return -1; + return 0; +} + +static int +uidcheck(const char *s, uid_t desired) +{ + uid_t uid; + + if (parseuid(s, &uid) != 0) + return -1; + if (uid != desired) + return -1; + return 0; +} + +static int +parsegid(const char *s, gid_t *gid) +{ + struct group *gr; + const char *errstr; + + if ((gr = getgrnam(s)) != NULL) { + *gid = gr->gr_gid; + if (*gid == GID_MAX) + return -1; + return 0; + } + *gid = strtonum(s, 0, GID_MAX - 1, &errstr); + if (errstr) + return -1; + return 0; +} + +static int +match(uid_t uid, gid_t *groups, int ngroups, uid_t target, const char *cmd, + const char **cmdargs, struct rule *r) +{ + int i; + + if (r->ident[0] == ':') { + gid_t rgid; + if (parsegid(r->ident + 1, &rgid) == -1) + return 0; + for (i = 0; i < ngroups; i++) { + if (rgid == groups[i]) + break; + } + if (i == ngroups) + return 0; + } else { + if (uidcheck(r->ident, uid) != 0) + return 0; + } + if (r->target && uidcheck(r->target, target) != 0) + return 0; + if (r->cmd) { + if (strcmp(r->cmd, cmd)) + return 0; + if (r->cmdargs) { + /* if arguments were given, they should match explicitly */ + for (i = 0; r->cmdargs[i]; i++) { + if (!cmdargs[i]) + return 0; + if (strcmp(r->cmdargs[i], cmdargs[i])) + return 0; + } + if (cmdargs[i]) + return 0; + } + } + return 1; +} + +static int +permit(uid_t uid, gid_t *groups, int ngroups, const struct rule **lastr, + uid_t target, const char *cmd, const char **cmdargs) +{ + int i; + + *lastr = NULL; + for (i = 0; i < nrules; i++) { + if (match(uid, groups, ngroups, target, cmd, + cmdargs, rules[i])) + *lastr = rules[i]; + } + if (!*lastr) + return 0; + return (*lastr)->action == PERMIT; +} + +static void +parseconfig(const char *filename, int checkperms) +{ + extern FILE *yyfp; + extern int yyparse(void); + struct stat sb; + + yyfp = fopen(filename, "r"); + if (!yyfp) + err(1, checkperms ? "doas is not enabled, %s" : + "could not open config file %s", filename); + + if (checkperms) { + if (fstat(fileno(yyfp), &sb) != 0) + err(1, "fstat(\"%s\")", filename); + if ((sb.st_mode & (S_IWGRP|S_IWOTH)) != 0) + errx(1, "%s is writable by group or other", filename); + if (sb.st_uid != 0) + errx(1, "%s is not owned by root", filename); + } + + yyparse(); + fclose(yyfp); + if (parse_errors) + exit(1); +} + +static void __dead +checkconfig(const char *confpath, int argc, char **argv, + uid_t uid, gid_t *groups, int ngroups, uid_t target) +{ + const struct rule *rule; + + setresuid(uid, uid, uid); + parseconfig(confpath, 0); + if (!argc) + exit(0); + + if (permit(uid, groups, ngroups, &rule, target, argv[0], + (const char **)argv + 1)) { + printf("permit%s\n", (rule->options & NOPASS) ? " nopass" : ""); + exit(0); + } else { + printf("deny\n"); + exit(1); + } +} + +static int +verifypasswd(const char *user, const char *pass) +{ + struct spwd *sp; + char *p1, *p2; + + sp = getspnam(user); + if (!sp) + return 0; + p1 = sp->sp_pwdp; + if (p1[0] == '!' || p1[0] == '*') + return 0; + p2 = crypt(pass, p1); + if (!p2) + return 0; + return strcmp(p1, p2) == 0; +} + +static void +authuser(char *myname, int persist) +{ + char *challenge = NULL, *response, rbuf[1024], cbuf[128]; + int fd = -1, valid = 0; + + if (persist) { + fd = openpersist(&valid); + if (valid) + goto good; + } + + if (!challenge) { + char host[HOST_NAME_MAX + 1]; + if (gethostname(host, sizeof(host))) + snprintf(host, sizeof(host), "?"); + snprintf(cbuf, sizeof(cbuf), + "\rdoas (%.32s@%.32s) password: ", myname, host); + challenge = cbuf; + } + response = readpassphrase(challenge, rbuf, sizeof(rbuf), + RPP_REQUIRE_TTY); + if (response == NULL && errno == ENOTTY) { + syslog(LOG_NOTICE, "tty required for %s", myname); + errx(1, "a tty is required"); + } + if (!verifypasswd(myname, response)) { + explicit_bzero(rbuf, sizeof(rbuf)); + syslog(LOG_NOTICE, "failed auth for %s", myname); + errx(1, "Authorization failed"); + } + explicit_bzero(rbuf, sizeof(rbuf)); +good: + if (fd != -1) { + setpersist(fd); + close(fd); + } +} + +int +unveilcommands(const char *ipath, const char *cmd) +{ + char *path = NULL, *p; + int unveils = 0; + + if (strchr(cmd, '/') != NULL) { + if (unveil(cmd, "x") != -1) + unveils++; + goto done; + } + + if (!ipath) { + errno = ENOENT; + goto done; + } + path = strdup(ipath); + if (!path) { + errno = ENOENT; + goto done; + } + for (p = path; p && *p; ) { + char buf[PATH_MAX]; + char *cp = strsep(&p, ":"); + + if (cp) { + int r = snprintf(buf, sizeof buf, "%s/%s", cp, cmd); + if (r >= 0 && r < sizeof buf) { + if (unveil(buf, "x") != -1) + unveils++; + } + } + } +done: + free(path); + return (unveils); +} + +int +main(int argc, char **argv) +{ + const char *safepath = "/bin"; + const char *confpath = NULL; + char *shargv[] = { NULL, NULL }; + char *sh; + const char *p; + const char *cmd; + char cmdline[LINE_MAX]; + char mypwbuf[1024], targpwbuf[1024]; + struct passwd mypwstore, targpwstore; + struct passwd *mypw, *targpw; + const struct rule *rule; + uid_t uid; + uid_t target = 0; + gid_t groups[NGROUPS_MAX + 1]; + int ngroups; + int i, ch, rv; + int sflag = 0; + int nflag = 0; + char cwdpath[PATH_MAX]; + const char *cwd; + char **envp; + + setprogname("doas"); + openlog("doas", 0, LOG_AUTHPRIV); + + uid = getuid(); + + while ((ch = getopt(argc, argv, "C:Lnsu:")) != -1) { + switch (ch) { + case 'C': + confpath = optarg; + break; + case 'L': + exit(clearpersist() != 0); + case 'u': + if (parseuid(optarg, &target) != 0) + errx(1, "unknown user"); + break; + case 'n': + nflag = 1; + break; + case 's': + sflag = 1; + break; + default: + usage(); + break; + } + } + argv += optind; + argc -= optind; + + if (confpath) { + if (sflag) + usage(); + } else if ((!sflag && !argc) || (sflag && argc)) + usage(); + + rv = getpwuid_r(uid, &mypwstore, mypwbuf, sizeof(mypwbuf), &mypw); + if (rv != 0) + err(1, "getpwuid_r failed"); + if (mypw == NULL) + errx(1, "no passwd entry for self"); + ngroups = getgroups(NGROUPS_MAX, groups); + if (ngroups == -1) + err(1, "can't get groups"); + groups[ngroups++] = getgid(); + + if (sflag) { + sh = getenv("SHELL"); + if (sh == NULL || *sh == '\0') { + shargv[0] = mypw->pw_shell; + } else + shargv[0] = sh; + argv = shargv; + argc = 1; + } + + if (confpath) { + checkconfig(confpath, argc, argv, uid, groups, ngroups, + target); + exit(1); /* fail safe */ + } + + if (geteuid()) + errx(1, "not installed setuid"); + + parseconfig("/etc/doas.conf", 1); + + /* cmdline is used only for logging, no need to abort on truncate */ + (void)strlcpy(cmdline, argv[0], sizeof(cmdline)); + for (i = 1; i < argc; i++) { + if (strlcat(cmdline, " ", sizeof(cmdline)) >= sizeof(cmdline)) + break; + if (strlcat(cmdline, argv[i], sizeof(cmdline)) >= sizeof(cmdline)) + break; + } + + cmd = argv[0]; + if (!permit(uid, groups, ngroups, &rule, target, cmd, + (const char **)argv + 1)) { + syslog(LOG_NOTICE, "failed command for %s: %s", mypw->pw_name, cmdline); + errno = EPERM; + err(1, NULL); + } + + if (!(rule->options & NOPASS)) { + if (nflag) + errx(1, "Authorization required"); + + authuser(mypw->pw_name, rule->options & PERSIST); + } + + if ((p = getenv("PATH")) != NULL) + formerpath = strdup(p); + if (formerpath == NULL) + formerpath = ""; + + if (unveil(_PATH_LOGIN_CONF, "r") == -1 || + unveil(_PATH_LOGIN_CONF ".db", "r") == -1) + err(1, "unveil"); + if (rule->cmd) { + if (setenv("PATH", safepath, 1) == -1) + err(1, "failed to set PATH '%s'", safepath); + } + if (unveilcommands(getenv("PATH"), cmd) == 0) + goto fail; + + if (pledge("stdio rpath getpw exec id", NULL) == -1) + err(1, "pledge"); + + rv = getpwuid_r(target, &targpwstore, targpwbuf, sizeof(targpwbuf), &targpw); + if (rv != 0) + err(1, "getpwuid_r failed"); + if (targpw == NULL) + errx(1, "no passwd entry for target"); + + if (initgroups(targpw->pw_name, targpw->pw_gid) < 0) + err(1, "initgroups"); + if (setgid(targpw->pw_gid) < 0) + err(1, "setgid"); + if (setuid(targpw->pw_uid) < 0) + err(1, "setuid"); + + if (pledge("stdio rpath exec", NULL) == -1) + err(1, "pledge"); + + if (getcwd(cwdpath, sizeof(cwdpath)) == NULL) + cwd = "(failed)"; + else + cwd = cwdpath; + + if (pledge("stdio exec", NULL) == -1) + err(1, "pledge"); + + syslog(LOG_INFO, "%s ran command %s as %s from %s", + mypw->pw_name, cmdline, targpw->pw_name, cwd); + + envp = prepenv(rule, mypw, targpw); + + /* setusercontext set path for the next process, so reset it for us */ + if (rule->cmd) { + if (setenv("PATH", safepath, 1) == -1) + err(1, "failed to set PATH '%s'", safepath); + } else { + if (setenv("PATH", formerpath, 1) == -1) + err(1, "failed to set PATH '%s'", formerpath); + } + execvpe(cmd, argv, envp); +fail: + if (errno == ENOENT) + errx(1, "%s: command not found", cmd); + err(1, "%s", cmd); +} diff --git a/usr.bin/doas/doas.conf.5 b/usr.bin/doas/doas.conf.5 new file mode 100644 index 0000000..70379ee --- /dev/null +++ b/usr.bin/doas/doas.conf.5 @@ -0,0 +1,149 @@ +.\" $OpenBSD: doas.conf.5,v 1.42 2020/02/10 13:18:20 schwarze Exp $ +.\" +.\"Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> +.\" +.\"Permission to use, copy, modify, and distribute this software for any +.\"purpose with or without fee is hereby granted, provided that the above +.\"copyright notice and this permission notice appear in all copies. +.\" +.\"THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\"WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\"MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\"ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\"WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\"ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\"OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.Dd $Mdocdate: February 10 2020 $ +.Dt DOAS.CONF 5 +.Os +.Sh NAME +.Nm doas.conf +.Nd doas configuration file +.Sh DESCRIPTION +The +.Xr doas 1 +utility executes commands as other users according to the rules +in the +.Nm +configuration file. +.Pp +The rules have the following format: +.Bd -ragged -offset indent +.Ic permit Ns | Ns Ic deny +.Op Ar options +.Ar identity +.Op Ic as Ar target +.Op Ic cmd Ar command Op Ic args No ... +.Ed +.Pp +Rules consist of the following parts: +.Bl -tag -width 11n +.It Ic permit Ns | Ns Ic deny +The action to be taken if this rule matches. +.It Ar options +Options are: +.Bl -tag -width keepenv +.It Ic nopass +The user is not required to enter a password. +.It Ic persist +After the user successfully authenticates, do not ask for a password +again for some time. +.It Ic keepenv +Environment variables other than those listed in +.Xr doas 1 +are retained when creating the environment for the new process. +.It Ic setenv { Oo Ar variable ... Oc Oo Ar variable=value ... Oc Ic } +Keep or set the space-separated specified variables. +Variables may also be removed with a leading +.Sq - +or set using the latter syntax. +If the first character of +.Ar value +is a +.Ql $ +then the value to be set is taken from the existing environment +variable of the indicated name. +This option is processed after the default environment has been created. +.El +.It Ar identity +The username to match. +Groups may be specified by prepending a colon +.Pq Sq \&: . +Numeric IDs are also accepted. +.It Ic as Ar target +The target user the running user is allowed to run the command as. +The default is all users. +.It Ic cmd Ar command +The command the user is allowed or denied to run. +The default is all commands. +Be advised that it is best to specify absolute paths. +If a relative path is specified, only a restricted +.Ev PATH +will be searched. +.It Ic args Op Ar argument ... +Arguments to command. +The command arguments provided by the user need to match those specified. +The keyword +.Ic args +alone means that command must be run without any arguments. +.El +.Pp +The last matching rule determines the action taken. +If no rule matches, the action is denied. +.Pp +Comments can be put anywhere in the file using a hash mark +.Pq Sq # , +and extend to the end of the current line. +.Pp +The following quoting rules apply: +.Bl -dash +.It +The text between a pair of double quotes +.Pq Sq \&" +is taken as is. +.It +The backslash character +.Pq Sq \e +escapes the next character, including new line characters, outside comments; +as a result, comments may not be extended over multiple lines. +.It +If quotes or backslashes are used in a word, +it is not considered a keyword. +.El +.Sh FILES +.Bl -tag -width /etc/examples/doas.conf -compact +.It Pa /etc/doas.conf +.Xr doas 1 +configuration file +.It Pa /etc/examples/doas.conf +example configuration file +.El +.Sh EXAMPLES +The following example permits user aja to install packages +from a preferred mirror; +group wheel to execute commands as any user while keeping the environment +variables +.Ev PS1 +and +.Ev SSH_AUTH_SOCK +and +unsetting +.Ev ENV ; +permits tedu to run procmap as root without a password; +and additionally permits root to run unrestricted commands as itself +while retaining the original PATH. +.Bd -literal -offset indent +permit persist setenv { PKG_CACHE PKG_PATH } aja cmd pkg_add +permit setenv { -ENV PS1=$DOAS_PS1 SSH_AUTH_SOCK } :wheel +permit nopass tedu as root cmd /usr/sbin/procmap +permit nopass keepenv setenv { PATH } root as root +.Ed +.Sh SEE ALSO +.Xr doas 1 +.Sh HISTORY +The +.Nm +configuration file first appeared in +.Ox 5.8 . +.Sh AUTHORS +.An Ted Unangst Aq Mt tedu@openbsd.org diff --git a/usr.bin/doas/doas.h b/usr.bin/doas/doas.h new file mode 100644 index 0000000..c97986e --- /dev/null +++ b/usr.bin/doas/doas.h @@ -0,0 +1,48 @@ +/* $OpenBSD: doas.h,v 1.15 2019/06/17 19:51:23 tedu Exp $ */ +/* + * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct rule { + int action; + int options; + const char *ident; + const char *target; + const char *cmd; + const char **cmdargs; + const char **envlist; +}; + +extern struct rule **rules; +extern int nrules; +extern int parse_errors; + +extern const char *formerpath; + +struct passwd; + +char **prepenv(const struct rule *, const struct passwd *, + const struct passwd *); + +int openpersist(int *valid); +int setpersist(int fd); +int clearpersist(void); + +#define PERMIT 1 +#define DENY 2 + +#define NOPASS 0x1 +#define KEEPENV 0x2 +#define PERSIST 0x4 diff --git a/usr.bin/doas/env.c b/usr.bin/doas/env.c new file mode 100644 index 0000000..2d93a40 --- /dev/null +++ b/usr.bin/doas/env.c @@ -0,0 +1,235 @@ +/* $OpenBSD: env.c,v 1.10 2019/07/07 19:21:28 tedu Exp $ */ +/* + * Copyright (c) 2016 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/tree.h> + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <err.h> +#include <unistd.h> +#include <errno.h> +#include <pwd.h> + +#include "doas.h" + +const char *formerpath; + +struct envnode { + RB_ENTRY(envnode) node; + const char *key; + const char *value; +}; + +struct env { + RB_HEAD(envtree, envnode) root; + u_int count; +}; + +static void fillenv(struct env *env, const char **envlist); + +static int +envcmp(struct envnode *a, struct envnode *b) +{ + return strcmp(a->key, b->key); +} +RB_GENERATE_STATIC(envtree, envnode, node, envcmp) + +static struct envnode * +createnode(const char *key, const char *value) +{ + struct envnode *node; + + node = malloc(sizeof(*node)); + if (!node) + err(1, NULL); + node->key = strdup(key); + node->value = strdup(value); + if (!node->key || !node->value) + err(1, NULL); + return node; +} + +static void +freenode(struct envnode *node) +{ + free((char *)node->key); + free((char *)node->value); + free(node); +} + +static void +addnode(struct env *env, const char *key, const char *value) +{ + struct envnode *node; + + node = createnode(key, value); + RB_INSERT(envtree, &env->root, node); + env->count++; +} + +static struct env * +createenv(const struct rule *rule, const struct passwd *mypw, + const struct passwd *targpw) +{ + static const char *copyset[] = { + "DISPLAY", "TERM", + NULL + }; + struct env *env; + u_int i; + + env = malloc(sizeof(*env)); + if (!env) + err(1, NULL); + RB_INIT(&env->root); + env->count = 0; + + addnode(env, "DOAS_USER", mypw->pw_name); + addnode(env, "HOME", targpw->pw_dir); + addnode(env, "LOGNAME", targpw->pw_name); + addnode(env, "PATH", getenv("PATH")); + addnode(env, "SHELL", targpw->pw_shell); + addnode(env, "USER", targpw->pw_name); + + fillenv(env, copyset); + + if (rule->options & KEEPENV) { + extern const char **environ; + + for (i = 0; environ[i] != NULL; i++) { + struct envnode *node; + const char *e, *eq; + size_t len; + char name[1024]; + + e = environ[i]; + + /* ignore invalid or overlong names */ + if ((eq = strchr(e, '=')) == NULL || eq == e) + continue; + len = eq - e; + if (len > sizeof(name) - 1) + continue; + memcpy(name, e, len); + name[len] = '\0'; + + node = createnode(name, eq + 1); + if (RB_INSERT(envtree, &env->root, node)) { + /* ignore any later duplicates */ + freenode(node); + } else { + env->count++; + } + } + } + + return env; +} + +static char ** +flattenenv(struct env *env) +{ + char **envp; + struct envnode *node; + u_int i; + + envp = reallocarray(NULL, env->count + 1, sizeof(char *)); + if (!envp) + err(1, NULL); + i = 0; + RB_FOREACH(node, envtree, &env->root) { + if (asprintf(&envp[i], "%s=%s", node->key, node->value) == -1) + err(1, NULL); + i++; + } + envp[i] = NULL; + return envp; +} + +static void +fillenv(struct env *env, const char **envlist) +{ + struct envnode *node, key; + const char *e, *eq; + const char *val; + char name[1024]; + u_int i; + size_t len; + + for (i = 0; envlist[i]; i++) { + e = envlist[i]; + + /* parse out env name */ + if ((eq = strchr(e, '=')) == NULL) + len = strlen(e); + else + len = eq - e; + if (len > sizeof(name) - 1) + continue; + memcpy(name, e, len); + name[len] = '\0'; + + /* delete previous copies */ + key.key = name; + if (*name == '-') + key.key = name + 1; + if ((node = RB_FIND(envtree, &env->root, &key))) { + RB_REMOVE(envtree, &env->root, node); + freenode(node); + env->count--; + } + if (*name == '-') + continue; + + /* assign value or inherit from environ */ + if (eq) { + val = eq + 1; + if (*val == '$') { + if (strcmp(val + 1, "PATH") == 0) + val = formerpath; + else + val = getenv(val + 1); + } + } else { + if (strcmp(name, "PATH") == 0) + val = formerpath; + else + val = getenv(name); + } + /* at last, we have something to insert */ + if (val) { + node = createnode(name, val); + RB_INSERT(envtree, &env->root, node); + env->count++; + } + } +} + +char ** +prepenv(const struct rule *rule, const struct passwd *mypw, + const struct passwd *targpw) +{ + struct env *env; + + env = createenv(rule, mypw, targpw); + if (rule->envlist) + fillenv(env, rule->envlist); + + return flattenenv(env); +} diff --git a/usr.bin/doas/parse.c b/usr.bin/doas/parse.c new file mode 100644 index 0000000..abebdb2 --- /dev/null +++ b/usr.bin/doas/parse.c @@ -0,0 +1,868 @@ +/* original parser id follows */ +/* yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93" */ +/* (use YYMAJOR/YYMINOR for ifdefs dependent on parser version) */ + +#define YYBYACC 1 +#define YYMAJOR 2 +#define YYMINOR 0 +#define YYPATCH 20200910 + +#define YYEMPTY (-1) +#define yyclearin (yychar = YYEMPTY) +#define yyerrok (yyerrflag = 0) +#define YYRECOVERING() (yyerrflag != 0) +#define YYENOMEM (-2) +#define YYEOF 0 +#define YYPREFIX "yy" + +#define YYPURE 0 + +#line 19 "usr.bin/doas/parse.y" +#include <sys/types.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <err.h> + +#include "doas.h" + +typedef struct { + union { + struct { + int action; + int options; + const char *cmd; + const char **cmdargs; + const char **envlist; + }; + const char **strlist; + const char *str; + }; + int lineno; + int colno; +} yystype; +#define YYSTYPE yystype + +FILE *yyfp; + +struct rule **rules; +int nrules; +static int maxrules; + +int parse_errors = 0; + +static void yyerror(const char *, ...); +static int yylex(void); + +static size_t +arraylen(const char **arr) +{ + size_t cnt = 0; + + while (*arr) { + cnt++; + arr++; + } + return cnt; +} + +#line 74 "usr.bin/doas/parse.c" + +#if ! defined(YYSTYPE) && ! defined(YYSTYPE_IS_DECLARED) +/* Default: YYSTYPE is the semantic value type. */ +typedef int YYSTYPE; +# define YYSTYPE_IS_DECLARED 1 +#endif + +/* compatibility with bison */ +#ifdef YYPARSE_PARAM +/* compatibility with FreeBSD */ +# ifdef YYPARSE_PARAM_TYPE +# define YYPARSE_DECL() yyparse(YYPARSE_PARAM_TYPE YYPARSE_PARAM) +# else +# define YYPARSE_DECL() yyparse(void *YYPARSE_PARAM) +# endif +#else +# define YYPARSE_DECL() yyparse(void) +#endif + +/* Parameters sent to lex. */ +#ifdef YYLEX_PARAM +# define YYLEX_DECL() yylex(void *YYLEX_PARAM) +# define YYLEX yylex(YYLEX_PARAM) +#else +# define YYLEX_DECL() yylex(void) +# define YYLEX yylex() +#endif + +#if !(defined(yylex) || defined(YYSTATE)) +int YYLEX_DECL(); +#endif + +/* Parameters sent to yyerror. */ +#ifndef YYERROR_DECL +#define YYERROR_DECL() yyerror(const char *s) +#endif +#ifndef YYERROR_CALL +#define YYERROR_CALL(msg) yyerror(msg) +#endif + +extern int YYPARSE_DECL(); + +#define TPERMIT 257 +#define TDENY 258 +#define TAS 259 +#define TCMD 260 +#define TARGS 261 +#define TNOPASS 262 +#define TPERSIST 263 +#define TKEEPENV 264 +#define TSETENV 265 +#define TSTRING 266 +#define YYERRCODE 256 +typedef short YYINT; +static const YYINT yylhs[] = { -1, + 0, 0, 0, 0, 1, 2, 2, 6, 6, 7, + 7, 7, 7, 8, 8, 3, 4, 4, 5, 5, + 9, 9, +}; +static const YYINT yylen[] = { 2, + 0, 2, 3, 2, 4, 2, 1, 0, 2, 1, + 1, 1, 4, 0, 2, 1, 0, 2, 0, 3, + 0, 2, +}; +static const YYINT yydefred[] = { 0, + 0, 0, 4, 8, 7, 2, 0, 0, 0, 3, + 16, 0, 10, 11, 12, 0, 9, 0, 0, 14, + 18, 0, 5, 0, 0, 15, 13, 14, 20, 0, +}; +static const YYINT yydgoto[] = { 2, + 7, 8, 12, 19, 23, 9, 17, 24, 29, +}; +static const YYINT yysindex[] = { -253, + -1, -6, 0, 0, 0, 0, 2, -256, -257, 0, + 0, -246, 0, 0, 0, -109, 0, -251, -244, 0, + 0, -249, 0, -123, -243, 0, 0, 0, 0, -247, +}; +static const YYINT yyrindex[] = { 1, + 0, 0, 0, 0, 0, 0, 0, 0, -245, 0, + 0, -10, 0, 0, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 12, 0, 0, 0, 0, 13, +}; +static const YYINT yygindex[] = { 0, + 0, 0, 0, 0, 0, 0, 0, -4, 0, +}; +#define YYTABLESIZE 259 +static const YYINT yytable[] = { 17, + 1, 27, 1, 6, 13, 14, 15, 16, 3, 11, + 1, 10, 18, 20, 21, 22, 25, 28, 26, 19, + 6, 21, 22, 30, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, + 4, 5, 0, 0, 0, 0, 0, 1, 1, +}; +static const YYINT yycheck[] = { 10, + 0, 125, 256, 10, 262, 263, 264, 265, 10, 266, + 10, 10, 259, 123, 266, 260, 266, 261, 266, 10, + 266, 10, 10, 28, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 266, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 260, + 257, 258, -1, -1, -1, -1, -1, 257, 258, +}; +#define YYFINAL 2 +#ifndef YYDEBUG +#define YYDEBUG 0 +#endif +#define YYMAXTOKEN 266 +#define YYUNDFTOKEN 278 +#define YYTRANSLATE(a) ((a) > YYMAXTOKEN ? YYUNDFTOKEN : (a)) +#if YYDEBUG +static const char *const yyname[] = { + +"end-of-file",0,0,0,0,0,0,0,0,0,"'\\n'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,"'{'",0,"'}'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"TPERMIT","TDENY", +"TAS","TCMD","TARGS","TNOPASS","TPERSIST","TKEEPENV","TSETENV","TSTRING",0,0,0, +0,0,0,0,0,0,0,0,"illegal-symbol", +}; +static const char *const yyrule[] = { +"$accept : grammar", +"grammar :", +"grammar : grammar '\\n'", +"grammar : grammar rule '\\n'", +"grammar : error '\\n'", +"rule : action ident target cmd", +"action : TPERMIT options", +"action : TDENY", +"options :", +"options : options option", +"option : TNOPASS", +"option : TPERSIST", +"option : TKEEPENV", +"option : TSETENV '{' strlist '}'", +"strlist :", +"strlist : strlist TSTRING", +"ident : TSTRING", +"target :", +"target : TAS TSTRING", +"cmd :", +"cmd : TCMD TSTRING args", +"args :", +"args : TARGS strlist", + +}; +#endif + +#if YYDEBUG +int yydebug; +#endif + +int yyerrflag; +int yychar; +YYSTYPE yyval; +YYSTYPE yylval; +int yynerrs; + +/* define the initial stack-sizes */ +#ifdef YYSTACKSIZE +#undef YYMAXDEPTH +#define YYMAXDEPTH YYSTACKSIZE +#else +#ifdef YYMAXDEPTH +#define YYSTACKSIZE YYMAXDEPTH +#else +#define YYSTACKSIZE 10000 +#define YYMAXDEPTH 10000 +#endif +#endif + +#define YYINITSTACKSIZE 200 + +typedef struct { + unsigned stacksize; + YYINT *s_base; + YYINT *s_mark; + YYINT *s_last; + YYSTYPE *l_base; + YYSTYPE *l_mark; +} YYSTACKDATA; +/* variables for the parser stack */ +static YYSTACKDATA yystack; +#line 189 "usr.bin/doas/parse.y" + +void +yyerror(const char *fmt, ...) +{ + va_list va; + + fprintf(stderr, "doas: "); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); + fprintf(stderr, " at line %d\n", yylval.lineno + 1); + parse_errors++; +} + +static struct keyword { + const char *word; + int token; +} keywords[] = { + { "deny", TDENY }, + { "permit", TPERMIT }, + { "as", TAS }, + { "cmd", TCMD }, + { "args", TARGS }, + { "nopass", TNOPASS }, + { "persist", TPERSIST }, + { "keepenv", TKEEPENV }, + { "setenv", TSETENV }, +}; + +int +yylex(void) +{ + char buf[1024], *ebuf, *p, *str; + int i, c, quotes = 0, escape = 0, qpos = -1, nonkw = 0; + + p = buf; + ebuf = buf + sizeof(buf); + +repeat: + /* skip whitespace first */ + for (c = getc(yyfp); c == ' ' || c == '\t'; c = getc(yyfp)) + yylval.colno++; + + /* check for special one-character constructions */ + switch (c) { + case '\n': + yylval.colno = 0; + yylval.lineno++; + /* FALLTHROUGH */ + case '{': + case '}': + return c; + case '#': + /* skip comments; NUL is allowed; no continuation */ + while ((c = getc(yyfp)) != '\n') + if (c == EOF) + goto eof; + yylval.colno = 0; + yylval.lineno++; + return c; + case EOF: + goto eof; + } + + /* parsing next word */ + for (;; c = getc(yyfp), yylval.colno++) { + switch (c) { + case '\0': + yyerror("unallowed character NUL in column %d", + yylval.colno + 1); + escape = 0; + continue; + case '\\': + escape = !escape; + if (escape) + continue; + break; + case '\n': + if (quotes) + yyerror("unterminated quotes in column %d", + qpos + 1); + if (escape) { + nonkw = 1; + escape = 0; + yylval.colno = 0; + yylval.lineno++; + continue; + } + goto eow; + case EOF: + if (escape) + yyerror("unterminated escape in column %d", + yylval.colno); + if (quotes) + yyerror("unterminated quotes in column %d", + qpos + 1); + goto eow; + /* FALLTHROUGH */ + case '{': + case '}': + case '#': + case ' ': + case '\t': + if (!escape && !quotes) + goto eow; + break; + case '"': + if (!escape) { + quotes = !quotes; + if (quotes) { + nonkw = 1; + qpos = yylval.colno; + } + continue; + } + } + *p++ = c; + if (p == ebuf) { + yyerror("too long line"); + p = buf; + } + escape = 0; + } + +eow: + *p = 0; + if (c != EOF) + ungetc(c, yyfp); + if (p == buf) { + /* + * There could be a number of reasons for empty buffer, + * and we handle all of them here, to avoid cluttering + * the main loop. + */ + if (c == EOF) + goto eof; + else if (qpos == -1) /* accept, e.g., empty args: cmd foo args "" */ + goto repeat; + } + if (!nonkw) { + for (i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { + if (strcmp(buf, keywords[i].word) == 0) + return keywords[i].token; + } + } + if ((str = strdup(buf)) == NULL) + err(1, "%s", __func__); + yylval.str = str; + return TSTRING; + +eof: + if (ferror(yyfp)) + yyerror("input error reading config"); + return 0; +} +#line 456 "usr.bin/doas/parse.c" + +#if YYDEBUG +#include <stdio.h> /* needed for printf */ +#endif + +#include <stdlib.h> /* needed for malloc, etc */ +#include <string.h> /* needed for memset */ + +/* allocate initial stack or double stack size, up to YYMAXDEPTH */ +static int yygrowstack(YYSTACKDATA *data) +{ + int i; + unsigned newsize; + YYINT *newss; + YYSTYPE *newvs; + + if ((newsize = data->stacksize) == 0) + newsize = YYINITSTACKSIZE; + else if (newsize >= YYMAXDEPTH) + return YYENOMEM; + else if ((newsize *= 2) > YYMAXDEPTH) + newsize = YYMAXDEPTH; + + i = (int) (data->s_mark - data->s_base); + newss = (YYINT *)realloc(data->s_base, newsize * sizeof(*newss)); + if (newss == 0) + return YYENOMEM; + + data->s_base = newss; + data->s_mark = newss + i; + + newvs = (YYSTYPE *)realloc(data->l_base, newsize * sizeof(*newvs)); + if (newvs == 0) + return YYENOMEM; + + data->l_base = newvs; + data->l_mark = newvs + i; + + data->stacksize = newsize; + data->s_last = data->s_base + newsize - 1; + return 0; +} + +#if YYPURE || defined(YY_NO_LEAKS) +static void yyfreestack(YYSTACKDATA *data) +{ + free(data->s_base); + free(data->l_base); + memset(data, 0, sizeof(*data)); +} +#else +#define yyfreestack(data) /* nothing */ +#endif + +#define YYABORT goto yyabort +#define YYREJECT goto yyabort +#define YYACCEPT goto yyaccept +#define YYERROR goto yyerrlab + +int +YYPARSE_DECL() +{ + int yym, yyn, yystate; +#if YYDEBUG + const char *yys; + + if ((yys = getenv("YYDEBUG")) != 0) + { + yyn = *yys; + if (yyn >= '0' && yyn <= '9') + yydebug = yyn - '0'; + } +#endif + + yym = 0; + yyn = 0; + yynerrs = 0; + yyerrflag = 0; + yychar = YYEMPTY; + yystate = 0; + +#if YYPURE + memset(&yystack, 0, sizeof(yystack)); +#endif + + if (yystack.s_base == NULL && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + yystack.s_mark = yystack.s_base; + yystack.l_mark = yystack.l_base; + yystate = 0; + *yystack.s_mark = 0; + +yyloop: + if ((yyn = yydefred[yystate]) != 0) goto yyreduce; + if (yychar < 0) + { + yychar = YYLEX; + if (yychar < 0) yychar = YYEOF; +#if YYDEBUG + if (yydebug) + { + if ((yys = yyname[YYTRANSLATE(yychar)]) == NULL) yys = yyname[YYUNDFTOKEN]; + printf("%sdebug: state %d, reading %d (%s)\n", + YYPREFIX, yystate, yychar, yys); + } +#endif + } + if (((yyn = yysindex[yystate]) != 0) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) yychar) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, shifting to state %d\n", + YYPREFIX, yystate, yytable[yyn]); +#endif + if (yystack.s_mark >= yystack.s_last && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + yystate = yytable[yyn]; + *++yystack.s_mark = yytable[yyn]; + *++yystack.l_mark = yylval; + yychar = YYEMPTY; + if (yyerrflag > 0) --yyerrflag; + goto yyloop; + } + if (((yyn = yyrindex[yystate]) != 0) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) yychar) + { + yyn = yytable[yyn]; + goto yyreduce; + } + if (yyerrflag != 0) goto yyinrecovery; + + YYERROR_CALL("syntax error"); + + goto yyerrlab; /* redundant goto avoids 'unused label' warning */ +yyerrlab: + ++yynerrs; + +yyinrecovery: + if (yyerrflag < 3) + { + yyerrflag = 3; + for (;;) + { + if (((yyn = yysindex[*yystack.s_mark]) != 0) && (yyn += YYERRCODE) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) YYERRCODE) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, error recovery shifting\ + to state %d\n", YYPREFIX, *yystack.s_mark, yytable[yyn]); +#endif + if (yystack.s_mark >= yystack.s_last && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + yystate = yytable[yyn]; + *++yystack.s_mark = yytable[yyn]; + *++yystack.l_mark = yylval; + goto yyloop; + } + else + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: error recovery discarding state %d\n", + YYPREFIX, *yystack.s_mark); +#endif + if (yystack.s_mark <= yystack.s_base) goto yyabort; + --yystack.s_mark; + --yystack.l_mark; + } + } + } + else + { + if (yychar == YYEOF) goto yyabort; +#if YYDEBUG + if (yydebug) + { + if ((yys = yyname[YYTRANSLATE(yychar)]) == NULL) yys = yyname[YYUNDFTOKEN]; + printf("%sdebug: state %d, error recovery discards token %d (%s)\n", + YYPREFIX, yystate, yychar, yys); + } +#endif + yychar = YYEMPTY; + goto yyloop; + } + +yyreduce: +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, reducing by rule %d (%s)\n", + YYPREFIX, yystate, yyn, yyrule[yyn]); +#endif + yym = yylen[yyn]; + if (yym > 0) + yyval = yystack.l_mark[1-yym]; + else + memset(&yyval, 0, sizeof yyval); + + switch (yyn) + { +case 5: +#line 85 "usr.bin/doas/parse.y" + { + struct rule *r; + r = calloc(1, sizeof(*r)); + if (!r) + errx(1, "can't allocate rule"); + r->action = yystack.l_mark[-3].action; + r->options = yystack.l_mark[-3].options; + r->envlist = yystack.l_mark[-3].envlist; + r->ident = yystack.l_mark[-2].str; + r->target = yystack.l_mark[-1].str; + r->cmd = yystack.l_mark[0].cmd; + r->cmdargs = yystack.l_mark[0].cmdargs; + if (nrules == maxrules) { + if (maxrules == 0) + maxrules = 63; + else + maxrules *= 2; + if (!(rules = reallocarray(rules, maxrules, + sizeof(*rules)))) + errx(1, "can't allocate rules"); + } + rules[nrules++] = r; + } +break; +case 6: +#line 109 "usr.bin/doas/parse.y" + { + yyval.action = PERMIT; + yyval.options = yystack.l_mark[0].options; + yyval.envlist = yystack.l_mark[0].envlist; + } +break; +case 7: +#line 113 "usr.bin/doas/parse.y" + { + yyval.action = DENY; + yyval.options = 0; + yyval.envlist = NULL; + } +break; +case 8: +#line 119 "usr.bin/doas/parse.y" + { + yyval.options = 0; + yyval.envlist = NULL; + } +break; +case 9: +#line 122 "usr.bin/doas/parse.y" + { + yyval.options = yystack.l_mark[-1].options | yystack.l_mark[0].options; + yyval.envlist = yystack.l_mark[-1].envlist; + if ((yyval.options & (NOPASS|PERSIST)) == (NOPASS|PERSIST)) { + yyerror("can't combine nopass and persist"); + YYERROR; + } + if (yystack.l_mark[0].envlist) { + if (yyval.envlist) { + yyerror("can't have two setenv sections"); + YYERROR; + } else + yyval.envlist = yystack.l_mark[0].envlist; + } + } +break; +case 10: +#line 137 "usr.bin/doas/parse.y" + { + yyval.options = NOPASS; + yyval.envlist = NULL; + } +break; +case 11: +#line 140 "usr.bin/doas/parse.y" + { + yyval.options = PERSIST; + yyval.envlist = NULL; + } +break; +case 12: +#line 143 "usr.bin/doas/parse.y" + { + yyval.options = KEEPENV; + yyval.envlist = NULL; + } +break; +case 13: +#line 146 "usr.bin/doas/parse.y" + { + yyval.options = 0; + yyval.envlist = yystack.l_mark[-1].strlist; + } +break; +case 14: +#line 151 "usr.bin/doas/parse.y" + { + if (!(yyval.strlist = calloc(1, sizeof(char *)))) + errx(1, "can't allocate strlist"); + } +break; +case 15: +#line 154 "usr.bin/doas/parse.y" + { + int nstr = arraylen(yystack.l_mark[-1].strlist); + if (!(yyval.strlist = reallocarray(yystack.l_mark[-1].strlist, nstr + 2, + sizeof(char *)))) + errx(1, "can't allocate strlist"); + yyval.strlist[nstr] = yystack.l_mark[0].str; + yyval.strlist[nstr + 1] = NULL; + } +break; +case 16: +#line 164 "usr.bin/doas/parse.y" + { + yyval.str = yystack.l_mark[0].str; + } +break; +case 17: +#line 168 "usr.bin/doas/parse.y" + { + yyval.str = NULL; + } +break; +case 18: +#line 170 "usr.bin/doas/parse.y" + { + yyval.str = yystack.l_mark[0].str; + } +break; +case 19: +#line 174 "usr.bin/doas/parse.y" + { + yyval.cmd = NULL; + yyval.cmdargs = NULL; + } +break; +case 20: +#line 177 "usr.bin/doas/parse.y" + { + yyval.cmd = yystack.l_mark[-1].str; + yyval.cmdargs = yystack.l_mark[0].cmdargs; + } +break; +case 21: +#line 182 "usr.bin/doas/parse.y" + { + yyval.cmdargs = NULL; + } +break; +case 22: +#line 184 "usr.bin/doas/parse.y" + { + yyval.cmdargs = yystack.l_mark[0].strlist; + } +break; +#line 812 "usr.bin/doas/parse.c" + } + yystack.s_mark -= yym; + yystate = *yystack.s_mark; + yystack.l_mark -= yym; + yym = yylhs[yyn]; + if (yystate == 0 && yym == 0) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: after reduction, shifting from state 0 to\ + state %d\n", YYPREFIX, YYFINAL); +#endif + yystate = YYFINAL; + *++yystack.s_mark = YYFINAL; + *++yystack.l_mark = yyval; + if (yychar < 0) + { + yychar = YYLEX; + if (yychar < 0) yychar = YYEOF; +#if YYDEBUG + if (yydebug) + { + if ((yys = yyname[YYTRANSLATE(yychar)]) == NULL) yys = yyname[YYUNDFTOKEN]; + printf("%sdebug: state %d, reading %d (%s)\n", + YYPREFIX, YYFINAL, yychar, yys); + } +#endif + } + if (yychar == YYEOF) goto yyaccept; + goto yyloop; + } + if (((yyn = yygindex[yym]) != 0) && (yyn += yystate) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) yystate) + yystate = yytable[yyn]; + else + yystate = yydgoto[yym]; +#if YYDEBUG + if (yydebug) + printf("%sdebug: after reduction, shifting from state %d \ +to state %d\n", YYPREFIX, *yystack.s_mark, yystate); +#endif + if (yystack.s_mark >= yystack.s_last && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + *++yystack.s_mark = (YYINT) yystate; + *++yystack.l_mark = yyval; + goto yyloop; + +yyoverflow: + YYERROR_CALL("yacc stack overflow"); + +yyabort: + yyfreestack(&yystack); + return (1); + +yyaccept: + yyfreestack(&yystack); + return (0); +} diff --git a/usr.bin/doas/parse.tab.h b/usr.bin/doas/parse.tab.h new file mode 100644 index 0000000..dfa6b41 --- /dev/null +++ b/usr.bin/doas/parse.tab.h @@ -0,0 +1,10 @@ +#define TPERMIT 257 +#define TDENY 258 +#define TAS 259 +#define TCMD 260 +#define TARGS 261 +#define TNOPASS 262 +#define TPERSIST 263 +#define TKEEPENV 264 +#define TSETENV 265 +#define TSTRING 266 diff --git a/usr.bin/doas/parse.y b/usr.bin/doas/parse.y new file mode 100644 index 0000000..d1f698c --- /dev/null +++ b/usr.bin/doas/parse.y @@ -0,0 +1,343 @@ +/* $OpenBSD: parse.y,v 1.27 2018/07/11 07:39:22 krw Exp $ */ +/* + * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +%{ +#include <sys/types.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <err.h> + +#include "doas.h" + +typedef struct { + union { + struct { + int action; + int options; + const char *cmd; + const char **cmdargs; + const char **envlist; + }; + const char **strlist; + const char *str; + }; + int lineno; + int colno; +} yystype; +#define YYSTYPE yystype + +FILE *yyfp; + +struct rule **rules; +int nrules; +static int maxrules; + +int parse_errors = 0; + +static void yyerror(const char *, ...); +static int yylex(void); + +static size_t +arraylen(const char **arr) +{ + size_t cnt = 0; + + while (*arr) { + cnt++; + arr++; + } + return cnt; +} + +%} + +%token TPERMIT TDENY TAS TCMD TARGS +%token TNOPASS TPERSIST TKEEPENV TSETENV +%token TSTRING + +%% + +grammar: /* empty */ + | grammar '\n' + | grammar rule '\n' + | error '\n' + ; + +rule: action ident target cmd { + struct rule *r; + r = calloc(1, sizeof(*r)); + if (!r) + errx(1, "can't allocate rule"); + r->action = $1.action; + r->options = $1.options; + r->envlist = $1.envlist; + r->ident = $2.str; + r->target = $3.str; + r->cmd = $4.cmd; + r->cmdargs = $4.cmdargs; + if (nrules == maxrules) { + if (maxrules == 0) + maxrules = 63; + else + maxrules *= 2; + if (!(rules = reallocarray(rules, maxrules, + sizeof(*rules)))) + errx(1, "can't allocate rules"); + } + rules[nrules++] = r; + } ; + +action: TPERMIT options { + $$.action = PERMIT; + $$.options = $2.options; + $$.envlist = $2.envlist; + } | TDENY { + $$.action = DENY; + $$.options = 0; + $$.envlist = NULL; + } ; + +options: /* none */ { + $$.options = 0; + $$.envlist = NULL; + } | options option { + $$.options = $1.options | $2.options; + $$.envlist = $1.envlist; + if (($$.options & (NOPASS|PERSIST)) == (NOPASS|PERSIST)) { + yyerror("can't combine nopass and persist"); + YYERROR; + } + if ($2.envlist) { + if ($$.envlist) { + yyerror("can't have two setenv sections"); + YYERROR; + } else + $$.envlist = $2.envlist; + } + } ; +option: TNOPASS { + $$.options = NOPASS; + $$.envlist = NULL; + } | TPERSIST { + $$.options = PERSIST; + $$.envlist = NULL; + } | TKEEPENV { + $$.options = KEEPENV; + $$.envlist = NULL; + } | TSETENV '{' strlist '}' { + $$.options = 0; + $$.envlist = $3.strlist; + } ; + +strlist: /* empty */ { + if (!($$.strlist = calloc(1, sizeof(char *)))) + errx(1, "can't allocate strlist"); + } | strlist TSTRING { + int nstr = arraylen($1.strlist); + if (!($$.strlist = reallocarray($1.strlist, nstr + 2, + sizeof(char *)))) + errx(1, "can't allocate strlist"); + $$.strlist[nstr] = $2.str; + $$.strlist[nstr + 1] = NULL; + } ; + + +ident: TSTRING { + $$.str = $1.str; + } ; + +target: /* optional */ { + $$.str = NULL; + } | TAS TSTRING { + $$.str = $2.str; + } ; + +cmd: /* optional */ { + $$.cmd = NULL; + $$.cmdargs = NULL; + } | TCMD TSTRING args { + $$.cmd = $2.str; + $$.cmdargs = $3.cmdargs; + } ; + +args: /* empty */ { + $$.cmdargs = NULL; + } | TARGS strlist { + $$.cmdargs = $2.strlist; + } ; + +%% + +void +yyerror(const char *fmt, ...) +{ + va_list va; + + fprintf(stderr, "doas: "); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); + fprintf(stderr, " at line %d\n", yylval.lineno + 1); + parse_errors++; +} + +static struct keyword { + const char *word; + int token; +} keywords[] = { + { "deny", TDENY }, + { "permit", TPERMIT }, + { "as", TAS }, + { "cmd", TCMD }, + { "args", TARGS }, + { "nopass", TNOPASS }, + { "persist", TPERSIST }, + { "keepenv", TKEEPENV }, + { "setenv", TSETENV }, +}; + +int +yylex(void) +{ + char buf[1024], *ebuf, *p, *str; + int i, c, quotes = 0, escape = 0, qpos = -1, nonkw = 0; + + p = buf; + ebuf = buf + sizeof(buf); + +repeat: + /* skip whitespace first */ + for (c = getc(yyfp); c == ' ' || c == '\t'; c = getc(yyfp)) + yylval.colno++; + + /* check for special one-character constructions */ + switch (c) { + case '\n': + yylval.colno = 0; + yylval.lineno++; + /* FALLTHROUGH */ + case '{': + case '}': + return c; + case '#': + /* skip comments; NUL is allowed; no continuation */ + while ((c = getc(yyfp)) != '\n') + if (c == EOF) + goto eof; + yylval.colno = 0; + yylval.lineno++; + return c; + case EOF: + goto eof; + } + + /* parsing next word */ + for (;; c = getc(yyfp), yylval.colno++) { + switch (c) { + case '\0': + yyerror("unallowed character NUL in column %d", + yylval.colno + 1); + escape = 0; + continue; + case '\\': + escape = !escape; + if (escape) + continue; + break; + case '\n': + if (quotes) + yyerror("unterminated quotes in column %d", + qpos + 1); + if (escape) { + nonkw = 1; + escape = 0; + yylval.colno = 0; + yylval.lineno++; + continue; + } + goto eow; + case EOF: + if (escape) + yyerror("unterminated escape in column %d", + yylval.colno); + if (quotes) + yyerror("unterminated quotes in column %d", + qpos + 1); + goto eow; + /* FALLTHROUGH */ + case '{': + case '}': + case '#': + case ' ': + case '\t': + if (!escape && !quotes) + goto eow; + break; + case '"': + if (!escape) { + quotes = !quotes; + if (quotes) { + nonkw = 1; + qpos = yylval.colno; + } + continue; + } + } + *p++ = c; + if (p == ebuf) { + yyerror("too long line"); + p = buf; + } + escape = 0; + } + +eow: + *p = 0; + if (c != EOF) + ungetc(c, yyfp); + if (p == buf) { + /* + * There could be a number of reasons for empty buffer, + * and we handle all of them here, to avoid cluttering + * the main loop. + */ + if (c == EOF) + goto eof; + else if (qpos == -1) /* accept, e.g., empty args: cmd foo args "" */ + goto repeat; + } + if (!nonkw) { + for (i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { + if (strcmp(buf, keywords[i].word) == 0) + return keywords[i].token; + } + } + if ((str = strdup(buf)) == NULL) + err(1, "%s", __func__); + yylval.str = str; + return TSTRING; + +eof: + if (ferror(yyfp)) + yyerror("input error reading config"); + return 0; +} diff --git a/usr.bin/doas/persist.c b/usr.bin/doas/persist.c new file mode 100644 index 0000000..4ad1bf1 --- /dev/null +++ b/usr.bin/doas/persist.c @@ -0,0 +1,133 @@ +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "doas.h" + +#define PERSIST_DIR "/run/doas" +#define PERSIST_TIMEOUT 5 * 60 + +static int +ttyid(dev_t *tty) +{ + int fd, i; + char buf[BUFSIZ], *p; + ssize_t n; + + fd = open("/proc/self/stat", O_RDONLY); + if (fd == -1) + return -1; + n = read(fd, buf, sizeof(buf) - 1); + if (n >= 0) + buf[n] = '\0'; + /* check that we read the whole file */ + n = read(fd, buf, 1); + close(fd); + if (n != 0) + return -1; + p = strrchr(buf, ')'); + if (!p) + return -1; + ++p; + /* ttr_nr is the 5th field after executable name, so skip the next 4 */ + for (i = 0; i < 4; ++i) { + p = strchr(++p, ' '); + if (!p) + return -1; + } + *tty = strtol(p, &p, 10); + if (*p != ' ') + return -1; + return 0; +} + +static int +persistpath(char *buf, size_t len) +{ + dev_t tty; + int n; + + if (ttyid(&tty) < 0) + return -1; + n = snprintf(buf, len, PERSIST_DIR "/%ju-%ju", (uintmax_t)getuid(), (uintmax_t)tty); + if (n < 0 || n >= (int)len) + return -1; + return 0; +} + +int +openpersist(int *valid) +{ + char path[256]; + struct stat st; + struct timespec ts; + int fd; + + if (stat(PERSIST_DIR, &st) < 0) { + if (errno != ENOENT) + return -1; + if (mkdir(PERSIST_DIR, 0700) < 0) + return -1; + } else if (st.st_uid != 0 || st.st_mode != (S_IFDIR | 0700)) { + return -1; + } + if (persistpath(path, sizeof(path)) < 0) + return -1; + fd = open(path, O_RDONLY); + if (fd == -1) { + char tmp[256]; + struct timespec ts[2] = { { .tv_nsec = UTIME_OMIT }, { 0 } }; + int n; + + n = snprintf(tmp, sizeof(tmp), PERSIST_DIR "/.tmp-%d", getpid()); + if (n < 0 || n >= (int)sizeof(tmp)) + return -1; + fd = open(tmp, O_RDONLY | O_CREAT | O_EXCL, 0); + if (fd == -1) + return -1; + if (futimens(fd, ts) < 0 || rename(tmp, path) < 0) { + close(fd); + unlink(tmp); + return -1; + } + *valid = 0; + } else { + *valid = clock_gettime(CLOCK_BOOTTIME, &ts) == 0 && + fstat(fd, &st) == 0 && + (ts.tv_sec < st.st_mtim.tv_sec || + (ts.tv_sec == st.st_mtim.tv_sec && ts.tv_nsec < st.st_mtim.tv_nsec)) && + st.st_mtime - ts.tv_sec <= PERSIST_TIMEOUT; + } + return fd; +} + +int +setpersist(int fd) +{ + struct timespec times[2]; + + if (clock_gettime(CLOCK_BOOTTIME, ×[1]) < 0) + return -1; + times[0].tv_nsec = UTIME_OMIT; + times[1].tv_sec += PERSIST_TIMEOUT; + return futimens(fd, times); +} + +int +clearpersist(void) +{ + char path[256]; + + if (persistpath(path, sizeof(path)) < 0) + return -1; + if (unlink(path) < 0 && errno != ENOENT) + return -1; + return 0; +} diff --git a/usr.bin/m4/CVS/Entries b/usr.bin/m4/CVS/Entries new file mode 100644 index 0000000..382e47b --- /dev/null +++ b/usr.bin/m4/CVS/Entries @@ -0,0 +1,18 @@ +/Makefile/1.16/Sun Jul 9 14:04:50 2017// +/NOTES/1.1.1.1/Wed Oct 18 08:45:35 1995// +/eval.c/1.78/Fri Jun 28 05:35:34 2019// +/expr.c/1.18/Tue Sep 7 19:58:09 2010// +/extern.h/1.55/Thu Jun 15 13:48:42 2017// +/gnum4.c/1.52/Mon Aug 21 21:41:13 2017// +/look.c/1.24/Sun Dec 21 09:33:12 2014// +/m4.1/1.64/Thu Jun 15 13:48:42 2017// +/main.c/1.87/Thu Jun 15 13:48:42 2017// +/mdef.h/1.33/Tue Nov 3 16:21:47 2015// +/misc.c/1.47/Thu Jun 15 13:48:42 2017// +/parser.y/1.7/Thu Apr 12 17:00:11 2012// +/pathnames.h/1.6/Tue Nov 3 16:21:47 2015// +/stdd.h/1.6/Tue Sep 7 19:58:09 2010// +/tokenizer.l/1.10/Sat Jun 17 01:55:16 2017// +/trace.c/1.16/Tue Sep 7 19:58:09 2010// +D/PSD.doc//// +D/TEST//// diff --git a/usr.bin/m4/CVS/Repository b/usr.bin/m4/CVS/Repository new file mode 100644 index 0000000..534b442 --- /dev/null +++ b/usr.bin/m4/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/m4 diff --git a/usr.bin/m4/CVS/Root b/usr.bin/m4/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/m4/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/m4/Makefile b/usr.bin/m4/Makefile new file mode 100644 index 0000000..acef575 --- /dev/null +++ b/usr.bin/m4/Makefile @@ -0,0 +1,17 @@ +# $OpenBSD: Makefile,v 1.16 2017/07/09 14:04:50 espie Exp $ + +# -DEXTENDED +# if you want the paste & spaste macros. + +PROG= m4 +CFLAGS+=-DEXTENDED -I. +CDIAGFLAGS=-W -Wall -Wstrict-prototypes -pedantic \ + -Wno-unused -Wno-char-subscripts -Wno-sign-compare + +LDADD= -lm -lutil +DPADD= ${LIBM} ${LIBUTIL} + +SRCS= eval.c expr.c look.c main.c misc.c gnum4.c trace.c tokenizer.l parser.y +MAN= m4.1 + +.include <bsd.prog.mk> diff --git a/usr.bin/m4/NOTES b/usr.bin/m4/NOTES new file mode 100644 index 0000000..d60f80e --- /dev/null +++ b/usr.bin/m4/NOTES @@ -0,0 +1,64 @@ +m4 - macro processor + +PD m4 is based on the macro tool distributed with the software +tools (VOS) package, and described in the "SOFTWARE TOOLS" and +"SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include +most of the command set of SysV m4, the standard UN*X macro processor. + +Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro, +there may be certain implementation similarities between +the two. The PD m4 was produced without ANY references to m4 +sources. + +written by: Ozan S. Yigit + +References: + + Software Tools distribution: macro + + Kernighan, Brian W. and P. J. Plauger, SOFTWARE + TOOLS IN PASCAL, Addison-Wesley, Mass. 1981 + + Kernighan, Brian W. and P. J. Plauger, SOFTWARE + TOOLS, Addison-Wesley, Mass. 1976 + + Kernighan, Brian W. and Dennis M. Ritchie, + THE M4 MACRO PROCESSOR, Unix Programmer's Manual, + Seventh Edition, Vol. 2, Bell Telephone Labs, 1979 + + System V man page for M4 + + +Implementation Notes: + +[1] PD m4 uses a different (and simpler) stack mechanism than the one + described in Software Tools and Software Tools in Pascal books. + The triple stack thing is replaced with a single stack containing + the call frames and the arguments. Each frame is back-linked to a + previous stack frame, which enables us to rewind the stack after + each nested call is completed. Each argument is a character pointer + to the beginning of the argument string within the string space. + The only exceptions to this are (*) arg 0 and arg 1, which are + the macro definition and macro name strings, stored dynamically + for the hash table. + + . . + | . | <-- sp | . | + +-------+ +-----+ + | arg 3 ------------------------------->| str | + +-------+ | . | + | arg 2 --------------+ . + +-------+ | + * | | | + +-------+ | +-----+ + | plev | <-- fp +---------------->| str | + +-------+ | . | + | type | . + +-------+ + | prcf -----------+ plev: paren level + +-------+ | type: call type + | . | | prcf: prev. call frame + . | + +-------+ | + | <----------+ + +-------+ diff --git a/usr.bin/m4/PSD.doc/CVS/Entries b/usr.bin/m4/PSD.doc/CVS/Entries new file mode 100644 index 0000000..8644d2d --- /dev/null +++ b/usr.bin/m4/PSD.doc/CVS/Entries @@ -0,0 +1,3 @@ +/Makefile/1.4/Sun Feb 1 14:43:10 2004// +/m4.ms/1.2/Thu Jun 26 16:18:48 2003// +D diff --git a/usr.bin/m4/PSD.doc/CVS/Repository b/usr.bin/m4/PSD.doc/CVS/Repository new file mode 100644 index 0000000..d480cc8 --- /dev/null +++ b/usr.bin/m4/PSD.doc/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/m4/PSD.doc diff --git a/usr.bin/m4/PSD.doc/CVS/Root b/usr.bin/m4/PSD.doc/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/m4/PSD.doc/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/m4/PSD.doc/Makefile b/usr.bin/m4/PSD.doc/Makefile new file mode 100644 index 0000000..12bff06 --- /dev/null +++ b/usr.bin/m4/PSD.doc/Makefile @@ -0,0 +1,11 @@ +# $OpenBSD: Makefile,v 1.4 2004/02/01 14:43:10 jmc Exp $ + + +DIR= psd/17.m4 +SRCS= m4.ms +MACROS= -ms + +paper.txt: ${SRCS} + ${ROFF} -Tascii ${SRCS} > ${.TARGET} + +.include <bsd.doc.mk> diff --git a/usr.bin/m4/PSD.doc/m4.ms b/usr.bin/m4/PSD.doc/m4.ms new file mode 100644 index 0000000..1163ee4 --- /dev/null +++ b/usr.bin/m4/PSD.doc/m4.ms @@ -0,0 +1,967 @@ +.\" $OpenBSD: m4.ms,v 1.2 2003/06/26 16:18:48 mickey Exp $ +.\" +.\" Copyright (C) Caldera International Inc. 2001-2002. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code and documentation must retain the above +.\" copyright notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" 4. Neither the name of Caldera International, Inc. nor the names of other +.\" contributors may be used to endorse or promote products derived from +.\" this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, +.\" INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" @(#)m4.ms 6.3 (Berkeley) 6/5/93 +.\" +.EH 'PSD:17-%''The M4 Macro Processor' +.OH 'The M4 Macro Processor''PSD:17-%' +.if n .ls 2 +.tr _\(em +.tr *\(** +.de UC +\&\\$3\s-1\\$1\\s0\&\\$2 +.. +.de IT +.if n .ul +\&\\$3\f2\\$1\fP\&\\$2 +.. +.de UL +.if n .ul +\&\\$3\f3\\$1\fP\&\\$2 +.. +.de P1 +.DS I 3n +.if n .ls 2 +.nf +.if n .ta 5 10 15 20 25 30 35 40 45 50 55 60 +.if t .ta .4i .8i 1.2i 1.6i 2i 2.4i 2.8i 3.2i 3.6i 4i 4.4i 4.8i 5.2i 5.6i +.if t .tr -\(mi|\(bv'\(fm^\(no*\(** +.tr `\(ga'\(aa +.if t .tr _\(ul +.ft 3 +.lg 0 +.. +.de P2 +.ps \\n(PS +.vs \\n(VSp +.ft R +.if n .ls 2 +.tr --||''^^!! +.if t .tr _\(em +.fi +.lg +.DE +.if t .tr _\(em +.. +.hw semi-colon +.hw estab-lished +.hy 14 +. \"2=not last lines; 4= no -xx; 8=no xx- +. \"special chars in programs +. \" start of text +.\".RP +.....TR 59 +.....TM 77-1273-6 39199 39199-11 +.ND "July 1, 1977" +.TL +The M4 Macro Processor +.AU "MH 2C-518" 6021 +Brian W. Kernighan +.AU "MH 2C-517" 3770 +Dennis M. Ritchie +.AI +.MH +.AB +.PP +M4 is a macro processor available on +.UX +and +.UC GCOS . +Its primary use has been as a +front end for Ratfor for those +cases where parameterless macros +are not adequately powerful. +It has also been used for languages as disparate as C and Cobol. +M4 is particularly suited for functional languages like Fortran, PL/I and C +since macros are specified in a functional notation. +.PP +M4 provides features seldom found even in much larger +macro processors, +including +.IP " \(bu" +arguments +.IP " \(bu" +condition testing +.IP " \(bu" +arithmetic capabilities +.IP " \(bu" +string and substring functions +.IP " \(bu" +file manipulation +.LP +.PP +This paper is a user's manual for M4. +.AE +.CS 6 0 6 0 0 1 +.if t .2C +.SH +Introduction +.PP +A macro processor is a useful way to enhance a programming language, +to make it more palatable +or more readable, +or to tailor it to a particular application. +The +.UL #define +statement in C +and the analogous +.UL define +in Ratfor +are examples of the basic facility provided by +any macro processor _ +replacement of text by other text. +.PP +The M4 macro processor is an extension of a macro processor called M3 +which was written by D. M. Ritchie +for the AP-3 minicomputer; +M3 was in turn based on a macro processor implemented for [1]. +Readers unfamiliar with the basic ideas of macro processing +may wish to read some of the discussion there. +.PP +M4 is a suitable front end for Ratfor and C, +and has also been used successfully with Cobol. +Besides the straightforward replacement of one string of text by another, +it provides +macros with arguments, +conditional macro expansion, +arithmetic, +file manipulation, +and some specialized string processing functions. +.PP +The basic operation of M4 +is to copy its input to its output. +As the input is read, however, each alphanumeric ``token'' +(that is, string of letters and digits) is checked. +If it is the name of a macro, +then the name of the macro is replaced by its defining text, +and the resulting string is pushed back onto the +input to be rescanned. +Macros may be called with arguments, in which case the arguments are collected +and substituted into the right places in the defining text +before it is rescanned. +.PP +M4 provides a collection of about twenty built-in +macros +which perform various useful operations; +in addition, the user can define new macros. +Built-ins and user-defined macros work exactly the same way, except that +some of the built-in macros have side effects +on the state of the process. +.SH +Usage +.PP +On +.UC UNIX , +use +.P1 +m4 [files] +.P2 +Each argument file is processed in order; +if there are no arguments, or if an argument +is `\-', +the standard input is read at that point. +The processed text is written on the standard output, +which may be captured for subsequent processing with +.P1 +m4 [files] >outputfile +.P2 +On +.UC GCOS , +usage is identical, but the program is called +.UL \&./m4 . +.SH +Defining Macros +.PP +The primary built-in function of M4 +is +.UL define , +which is used to define new macros. +The input +.P1 +define(name, stuff) +.P2 +causes the string +.UL name +to be defined as +.UL stuff . +All subsequent occurrences of +.UL name +will be replaced by +.UL stuff . +.UL name +must be alphanumeric and must begin with a letter +(the underscore \(ul counts as a letter). +.UL stuff +is any text that contains balanced parentheses; +it may stretch over multiple lines. +.PP +Thus, as a typical example, +.P1 +define(N, 100) + ... +if (i > N) +.P2 +defines +.UL N +to be 100, and uses this ``symbolic constant'' in a later +.UL if +statement. +.PP +The left parenthesis must immediately follow the word +.UL define , +to signal that +.UL define +has arguments. +If a macro or built-in name is not followed immediately by `(', +it is assumed to have no arguments. +This is the situation for +.UL N +above; +it is actually a macro with no arguments, +and thus when it is used there need be no (...) following it. +.PP +You should also notice that a macro name is only recognized as such +if it appears surrounded by non-alphanumerics. +For example, in +.P1 +define(N, 100) + ... +if (NNN > 100) +.P2 +the variable +.UL NNN +is absolutely unrelated to the defined macro +.UL N , +even though it contains a lot of +.UL N 's. +.PP +Things may be defined in terms of other things. +For example, +.P1 +define(N, 100) +define(M, N) +.P2 +defines both M and N to be 100. +.PP +What happens if +.UL N +is redefined? +Or, to say it another way, is +.UL M +defined as +.UL N +or as 100? +In M4, +the latter is true _ +.UL M +is 100, so even if +.UL N +subsequently changes, +.UL M +does not. +.PP +This behavior arises because +M4 expands macro names into their defining text as soon as it possibly can. +Here, that means that when the string +.UL N +is seen as the arguments of +.UL define +are being collected, it is immediately replaced by 100; +it's just as if you had said +.P1 +define(M, 100) +.P2 +in the first place. +.PP +If this isn't what you really want, there are two ways out of it. +The first, which is specific to this situation, +is to interchange the order of the definitions: +.P1 +define(M, N) +define(N, 100) +.P2 +Now +.UL M +is defined to be the string +.UL N , +so when you ask for +.UL M +later, you'll always get the value of +.UL N +at that time +(because the +.UL M +will be replaced by +.UL N +which will be replaced by 100). +.SH +Quoting +.PP +The more general solution is to delay the expansion of +the arguments of +.UL define +by +.ul +quoting +them. +Any text surrounded by the single quotes \(ga and \(aa +is not expanded immediately, but has the quotes stripped off. +If you say +.P1 +define(N, 100) +define(M, `N') +.P2 +the quotes around the +.UL N +are stripped off as the argument is being collected, +but they have served their purpose, and +.UL M +is defined as +the string +.UL N , +not 100. +The general rule is that M4 always strips off +one level of single quotes whenever it evaluates +something. +This is true even outside of +macros. +If you want the word +.UL define +to appear in the output, +you have to quote it in the input, +as in +.P1 + `define' = 1; +.P2 +.PP +As another instance of the same thing, which is a bit more surprising, +consider redefining +.UL N : +.P1 +define(N, 100) + ... +define(N, 200) +.P2 +Perhaps regrettably, the +.UL N +in the second definition is +evaluated as soon as it's seen; +that is, it is +replaced by +100, so it's as if you had written +.P1 +define(100, 200) +.P2 +This statement is ignored by M4, since you can only define things that look +like names, but it obviously doesn't have the effect you wanted. +To really redefine +.UL N , +you must delay the evaluation by quoting: +.P1 +define(N, 100) + ... +define(`N', 200) +.P2 +In M4, +it is often wise to quote the first argument of a macro. +.PP +If \` and \' are not convenient for some reason, +the quote characters can be changed with the built-in +.UL changequote : +.P1 +changequote([, ]) +.P2 +makes the new quote characters the left and right brackets. +You can restore the original characters with just +.P1 +changequote +.P2 +.PP +There are two additional built-ins related to +.UL define . +.UL undefine +removes the definition of some macro or built-in: +.P1 +undefine(`N') +.P2 +removes the definition of +.UL N . +(Why are the quotes absolutely necessary?) +Built-ins can be removed with +.UL undefine , +as in +.P1 +undefine(`define') +.P2 +but once you remove one, you can never get it back. +.PP +The built-in +.UL ifdef +provides a way to determine if a macro is currently defined. +In particular, M4 has pre-defined the names +.UL unix +and +.UL gcos +on the corresponding systems, so you can +tell which one you're using: +.P1 +ifdef(`unix', `define(wordsize,16)' ) +ifdef(`gcos', `define(wordsize,36)' ) +.P2 +makes a definition appropriate for the particular machine. +Don't forget the quotes! +.PP +.UL ifdef +actually permits three arguments; +if the name is undefined, the value of +.UL ifdef +is then the third argument, as in +.P1 +ifdef(`unix', on UNIX, not on UNIX) +.P2 +.SH +Arguments +.PP +So far we have discussed the simplest form of macro processing _ +replacing one string by another (fixed) string. +User-defined macros may also have arguments, so different invocations +can have different results. +Within the replacement text for a macro +(the second argument of its +.UL define ) +any occurrence of +.UL $n +will be replaced by the +.UL n th +argument when the macro +is actually used. +Thus, the macro +.UL bump , +defined as +.P1 +define(bump, $1 = $1 + 1) +.P2 +generates code to increment its argument by 1: +.P1 +bump(x) +.P2 +is +.P1 +x = x + 1 +.P2 +.PP +A macro can have as many arguments as you want, +but only the first nine are accessible, +through +.UL $1 +to +.UL $9 . +(The macro name itself is +.UL $0 , +although that is less commonly used.) +Arguments that are not supplied are replaced by null strings, +so +we can define a macro +.UL cat +which simply concatenates its arguments, like this: +.P1 +define(cat, $1$2$3$4$5$6$7$8$9) +.P2 +Thus +.P1 +cat(x, y, z) +.P2 +is equivalent to +.P1 +xyz +.P2 +.UL $4 +through +.UL $9 +are null, since no corresponding arguments were provided. +.PP +.PP +Leading unquoted blanks, tabs, or newlines that occur during argument collection +are discarded. +All other white space is retained. +Thus +.P1 +define(a, b c) +.P2 +defines +.UL a +to be +.UL b\ \ \ c . +.PP +Arguments are separated by commas, but parentheses are counted properly, +so a comma ``protected'' by parentheses does not terminate an argument. +That is, in +.P1 +define(a, (b,c)) +.P2 +there are only two arguments; +the second is literally +.UL (b,c) . +And of course a bare comma or parenthesis can be inserted by quoting it. +.SH +Arithmetic Built-ins +.PP +M4 provides two built-in functions for doing arithmetic +on integers (only). +The simplest is +.UL incr , +which increments its numeric argument by 1. +Thus to handle the common programming situation +where you want a variable to be defined as ``one more than N'', +write +.P1 +define(N, 100) +define(N1, `incr(N)') +.P2 +Then +.UL N1 +is defined as one more than the current value of +.UL N . +.PP +The more general mechanism for arithmetic is a built-in +called +.UL eval , +which is capable of arbitrary arithmetic on integers. +It provides the operators +(in decreasing order of precedence) +.DS +unary + and \(mi +** or ^ (exponentiation) +* / % (modulus) ++ \(mi +== != < <= > >= +! (not) +& or && (logical and) +\(or or \(or\(or (logical or) +.DE +Parentheses may be used to group operations where needed. +All the operands of +an expression given to +.UL eval +must ultimately be numeric. +The numeric value of a true relation +(like 1>0) +is 1, and false is 0. +The precision in +.UL eval +is +32 bits on +.UC UNIX +and 36 bits on +.UC GCOS . +.PP +As a simple example, suppose we want +.UL M +to be +.UL 2**N+1 . +Then +.P1 +define(N, 3) +define(M, `eval(2**N+1)') +.P2 +As a matter of principle, it is advisable +to quote the defining text for a macro +unless it is very simple indeed +(say just a number); +it usually gives the result you want, +and is a good habit to get into. +.SH +File Manipulation +.PP +You can include a new file in the input at any time by +the built-in function +.UL include : +.P1 +include(filename) +.P2 +inserts the contents of +.UL filename +in place of the +.UL include +command. +The contents of the file is often a set of definitions. +The value +of +.UL include +(that is, its replacement text) +is the contents of the file; +this can be captured in definitions, etc. +.PP +It is a fatal error if the file named in +.UL include +cannot be accessed. +To get some control over this situation, the alternate form +.UL sinclude +can be used; +.UL sinclude +(``silent include'') +says nothing and continues if it can't access the file. +.PP +It is also possible to divert the output of M4 to temporary files during processing, +and output the collected material upon command. +M4 maintains nine of these diversions, numbered 1 through 9. +If you say +.P1 +divert(n) +.P2 +all subsequent output is put onto the end of a temporary file +referred to as +.UL n . +Diverting to this file is stopped by another +.UL divert +command; +in particular, +.UL divert +or +.UL divert(0) +resumes the normal output process. +.PP +Diverted text is normally output all at once +at the end of processing, +with the diversions output in numeric order. +It is possible, however, to bring back diversions +at any time, +that is, to append them to the current diversion. +.P1 +undivert +.P2 +brings back all diversions in numeric order, and +.UL undivert +with arguments brings back the selected diversions +in the order given. +The act of undiverting discards the diverted stuff, +as does diverting into a diversion +whose number is not between 0 and 9 inclusive. +.PP +The value of +.UL undivert +is +.ul +not +the diverted stuff. +Furthermore, the diverted material is +.ul +not +rescanned for macros. +.PP +The built-in +.UL divnum +returns the number of the currently active diversion. +This is zero during normal processing. +.SH +System Command +.PP +You can run any program in the local operating system +with the +.UL syscmd +built-in. +For example, +.P1 +syscmd(date) +.P2 +on +.UC UNIX +runs the +.UL date +command. +Normally +.UL syscmd +would be used to create a file +for a subsequent +.UL include . +.PP +To facilitate making unique file names, the built-in +.UL maketemp +is provided, with specifications identical to the system function +.ul +mktemp: +a string of XXXXX in the argument is replaced +by the process id of the current process. +.SH +Conditionals +.PP +There is a built-in called +.UL ifelse +which enables you to perform arbitrary conditional testing. +In the simplest form, +.P1 +ifelse(a, b, c, d) +.P2 +compares the two strings +.UL a +and +.UL b . +If these are identical, +.UL ifelse +returns +the string +.UL c ; +otherwise it returns +.UL d . +Thus we might define a macro called +.UL compare +which compares two strings and returns ``yes'' or ``no'' +if they are the same or different. +.P1 +define(compare, `ifelse($1, $2, yes, no)') +.P2 +Note the quotes, +which prevent too-early evaluation of +.UL ifelse . +.PP +If the fourth argument is missing, it is treated as empty. +.PP +.UL ifelse +can actually have any number of arguments, +and thus provides a limited form of multi-way decision capability. +In the input +.P1 +ifelse(a, b, c, d, e, f, g) +.P2 +if the string +.UL a +matches the string +.UL b , +the result is +.UL c . +Otherwise, if +.UL d +is the same as +.UL e , +the result is +.UL f . +Otherwise the result is +.UL g . +If the final argument +is omitted, the result is null, +so +.P1 +ifelse(a, b, c) +.P2 +is +.UL c +if +.UL a +matches +.UL b , +and null otherwise. +.SH +String Manipulation +.PP +The built-in +.UL len +returns the length of the string that makes up its argument. +Thus +.P1 +len(abcdef) +.P2 +is 6, and +.UL len((a,b)) +is 5. +.PP +The built-in +.UL substr +can be used to produce substrings of strings. +.UL substr(s,\ i,\ n) +returns the substring of +.UL s +that starts at the +.UL i th +position +(origin zero), +and is +.UL n +characters long. +If +.UL n +is omitted, the rest of the string is returned, +so +.P1 +substr(`now is the time', 1) +.P2 +is +.P1 +ow is the time +.P2 +If +.UL i +or +.UL n +are out of range, various sensible things happen. +.PP +.UL index(s1,\ s2) +returns the index (position) in +.UL s1 +where the string +.UL s2 +occurs, or \-1 +if it doesn't occur. +As with +.UL substr , +the origin for strings is 0. +.PP +The built-in +.UL translit +performs character transliteration. +.P1 +translit(s, f, t) +.P2 +modifies +.UL s +by replacing any character found in +.UL f +by the corresponding character of +.UL t . +That is, +.P1 +translit(s, aeiou, 12345) +.P2 +replaces the vowels by the corresponding digits. +If +.UL t +is shorter than +.UL f , +characters which don't have an entry in +.UL t +are deleted; as a limiting case, +if +.UL t +is not present at all, +characters from +.UL f +are deleted from +.UL s . +So +.P1 +translit(s, aeiou) +.P2 +deletes vowels from +.UL s . +.PP +There is also a built-in called +.UL dnl +which deletes all characters that follow it up to +and including the next newline; +it is useful mainly for throwing away +empty lines that otherwise tend to clutter up M4 output. +For example, if you say +.P1 +define(N, 100) +define(M, 200) +define(L, 300) +.P2 +the newline at the end of each line is not part of the definition, +so it is copied into the output, where it may not be wanted. +If you add +.UL dnl +to each of these lines, the newlines will disappear. +.PP +Another way to achieve this, due to J. E. Weythman, +is +.P1 +divert(-1) + define(...) + ... +divert +.P2 +.SH +Printing +.PP +The built-in +.UL errprint +writes its arguments out on the standard error file. +Thus you can say +.P1 +errprint(`fatal error') +.P2 +.PP +.UL dumpdef +is a debugging aid which +dumps the current definitions of defined terms. +If there are no arguments, you get everything; +otherwise you get the ones you name as arguments. +Don't forget to quote the names! +.SH +Summary of Built-ins +.PP +Each entry is preceded by the +page number where it is described. +.DS +.tr '\'`\` +.ta .25i +3 changequote(L, R) +1 define(name, replacement) +4 divert(number) +4 divnum +5 dnl +5 dumpdef(`name', `name', ...) +5 errprint(s, s, ...) +4 eval(numeric expression) +3 ifdef(`name', this if true, this if false) +5 ifelse(a, b, c, d) +4 include(file) +3 incr(number) +5 index(s1, s2) +5 len(string) +4 maketemp(...XXXXX...) +4 sinclude(file) +5 substr(string, position, number) +4 syscmd(s) +5 translit(str, from, to) +3 undefine(`name') +4 undivert(number,number,...) +.DE +.SH +Acknowledgements +.PP +We are indebted to Rick Becker, John Chambers, +Doug McIlroy, +and especially Jim Weythman, +whose pioneering use of M4 has led to several valuable improvements. +We are also deeply grateful to Weythman for several substantial contributions +to the code. +.SG +.SH +References +.LP +.IP [1] +B. W. Kernighan and P. J. Plauger, +.ul +Software Tools, +Addison-Wesley, Inc., 1976. diff --git a/usr.bin/m4/TEST/CVS/Entries b/usr.bin/m4/TEST/CVS/Entries new file mode 100644 index 0000000..390dc32 --- /dev/null +++ b/usr.bin/m4/TEST/CVS/Entries @@ -0,0 +1,7 @@ +/ack.m4/1.3/Tue Jun 3 02:56:11 2003// +/hanoi.m4/1.3/Tue Jun 3 02:56:11 2003// +/hash.m4/1.3/Tue Jun 3 02:56:11 2003// +/sqroot.m4/1.3/Tue Jun 3 02:56:11 2003// +/string.m4/1.3/Tue Jun 3 02:56:11 2003// +/test.m4/1.3/Tue Jun 3 02:56:11 2003// +D diff --git a/usr.bin/m4/TEST/CVS/Repository b/usr.bin/m4/TEST/CVS/Repository new file mode 100644 index 0000000..dbd70ae --- /dev/null +++ b/usr.bin/m4/TEST/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/m4/TEST diff --git a/usr.bin/m4/TEST/CVS/Root b/usr.bin/m4/TEST/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/m4/TEST/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/m4/TEST/ack.m4 b/usr.bin/m4/TEST/ack.m4 new file mode 100644 index 0000000..632e207 --- /dev/null +++ b/usr.bin/m4/TEST/ack.m4 @@ -0,0 +1,38 @@ +# $OpenBSD: ack.m4,v 1.3 2003/06/03 02:56:11 millert Exp $ +# $NetBSD: ack.m4,v 1.4 1995/09/28 05:37:54 tls Exp $ +# +# Copyright (c) 1989, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Ozan Yigit. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# @(#)ack.m4 8.1 (Berkeley) 6/6/93 +# + +define(ack, `ifelse($1,0,incr($2),$2,0,`ack(DECR($1),1)', +`ack(DECR($1), ack($1,DECR($2)))')') diff --git a/usr.bin/m4/TEST/hanoi.m4 b/usr.bin/m4/TEST/hanoi.m4 new file mode 100644 index 0000000..008ac14 --- /dev/null +++ b/usr.bin/m4/TEST/hanoi.m4 @@ -0,0 +1,43 @@ +# $OpenBSD: hanoi.m4,v 1.3 2003/06/03 02:56:11 millert Exp $ +# $NetBSD: hanoi.m4,v 1.4 1995/09/28 05:37:56 tls Exp $ +# +# Copyright (c) 1989, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Ozan Yigit. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# @(#)hanoi.m4 8.1 (Berkeley) 6/6/93 +# + +define(hanoi, `trans(A, B, C, $1)') + +define(moved,`move disk from $1 to $2 +') + +define(trans, `ifelse($4,1,`moved($1,$2)', + `trans($1,$3,$2,DECR($4))moved($1,$2)trans($3,$2,$1,DECR($4))')') diff --git a/usr.bin/m4/TEST/hash.m4 b/usr.bin/m4/TEST/hash.m4 new file mode 100644 index 0000000..f46eb9e --- /dev/null +++ b/usr.bin/m4/TEST/hash.m4 @@ -0,0 +1,53 @@ +# $OpenBSD: hash.m4,v 1.3 2003/06/03 02:56:11 millert Exp $ +# $NetBSD: hash.m4,v 1.4 1995/09/28 05:37:58 tls Exp $ +# +# Copyright (c) 1989, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Ozan Yigit. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# @(#)hash.m4 8.1 (Berkeley) 6/6/93 +# + +dnl This probably will not run on any m4 that cannot +dnl handle char constants in eval. +dnl +changequote(<,>) define(HASHVAL,99) dnl +define(hash,<eval(str(substr($1,1),0)%HASHVAL)>) dnl +define(str, + <ifelse($1,",$2, + <str(substr(<$1>,1),<eval($2+'substr($1,0,1)')>)>) + >) dnl +define(KEYWORD,<$1,hash($1),>) dnl +define(TSTART, +<struct prehash { + char *keyword; + int hashval; +} keytab[] = {>) dnl +define(TEND,< "",0 +};>) dnl diff --git a/usr.bin/m4/TEST/sqroot.m4 b/usr.bin/m4/TEST/sqroot.m4 new file mode 100644 index 0000000..fa01674 --- /dev/null +++ b/usr.bin/m4/TEST/sqroot.m4 @@ -0,0 +1,43 @@ +# $OpenBSD: sqroot.m4,v 1.3 2003/06/03 02:56:11 millert Exp $ +# $NetBSD: sqroot.m4,v 1.4 1995/09/28 05:38:01 tls Exp $ +# +# Copyright (c) 1989, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Ozan Yigit. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# @(#)sqroot.m4 8.1 (Berkeley) 6/6/93 +# + +define(square_root, + `ifelse(eval($1<0),1,negative-square-root, + `square_root_aux($1, 1, eval(($1+1)/2))')') +define(square_root_aux, + `ifelse($3, $2, $3, + $3, eval($1/$2), $3, + `square_root_aux($1, $3, eval(($3+($1/$3))/2))')') diff --git a/usr.bin/m4/TEST/string.m4 b/usr.bin/m4/TEST/string.m4 new file mode 100644 index 0000000..32d3a71 --- /dev/null +++ b/usr.bin/m4/TEST/string.m4 @@ -0,0 +1,43 @@ +# $OpenBSD: string.m4,v 1.3 2003/06/03 02:56:11 millert Exp $ +# $NetBSD: string.m4,v 1.4 1995/09/28 05:38:03 tls Exp $ +# +# Copyright (c) 1989, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Ozan Yigit. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# @(#)string.m4 8.1 (Berkeley) 6/6/93 +# + +define(string,`integer $1(len(substr($2,1))) +str($1,substr($2,1),0) +data $1(len(substr($2,1)))/EOS/ +') + +define(str,`ifelse($2,",,data $1(incr($3))/`LET'substr($2,0,1)/ +`str($1,substr($2,1),incr($3))')') diff --git a/usr.bin/m4/TEST/test.m4 b/usr.bin/m4/TEST/test.m4 new file mode 100644 index 0000000..dd20317 --- /dev/null +++ b/usr.bin/m4/TEST/test.m4 @@ -0,0 +1,241 @@ +# $OpenBSD: test.m4,v 1.3 2003/06/03 02:56:11 millert Exp $ +# $NetBSD: test.m4,v 1.4 1995/09/28 05:38:05 tls Exp $ +# +# Copyright (c) 1989, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Ozan Yigit. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# @(#)test.m4 8.1 (Berkeley) 6/6/93 +# + +# test file for mp (not comprehensive) +# +# v7 m4 does not have `decr'. +# +define(DECR,`eval($1-1)') +# +# include string macros +# +include(string.m4) +# +# create some fortrash strings for an even uglier language +# +string(TEXT, "text") +string(DATA, "data") +string(BEGIN, "begin") +string(END, "end") +string(IF, "if") +string(THEN, "then") +string(ELSE, "else") +string(CASE, "case") +string(REPEAT, "repeat") +string(WHILE, "while") +string(DEFAULT, "default") +string(UNTIL, "until") +string(FUNCTION, "function") +string(PROCEDURE, "procedure") +string(EXTERNAL, "external") +string(FORWARD, "forward") +string(TYPE, "type") +string(VAR, "var") +string(CONST, "const") +string(PROGRAM, "program") +string(INPUT, "input") +string(OUTPUT, "output") +# +divert(2) +diversion #1 +divert(3) +diversion #2 +divert(4) +diversion #3 +divert(5) +diversion #4 +divert(0) +define(abc,xxx) +ifdef(`abc',defined,undefined) +# +# v7 m4 does this wrong. The right output is +# this is A vEry lon sEntEnCE +# see m4 documentation for translit. +# +translit(`this is a very long sentence', abcdefg, ABCDEF) +# +# include towers-of-hanoi +# +include(hanoi.m4) +# +# some reasonable set of disks +# +hanoi(6) +# +# include ackermann's function +# +include(ack.m4) +# +# something like (3,3) will blow away un*x m4. +# +ack(2,3) +# +# include a square_root function for fixed nums +# +include(sqroot.m4) +# +# some square roots. +# +square_root(15) +square_root(100) +square_root(-4) +square_root(21372) +# +# some textual material for enjoyment. +# +[taken from the 'Clemson University Computer Newsletter', + September 1981, pp. 6-7] + +I am a wizard in the magical Kingdom of Transformation and I +slay dragons for a living. Actually, I am a systems programmer. +One of the problems with systems programming is explaining to +non-computer enthusiasts what that is. All of the terms I use to +describe my job are totally meaningless to them. Usually my response +to questions about my work is to say as little as possible. For +instance, if someone asks what happened at work this week, I say +"Nothing much" and then I change the subject. + +With the assistance of my brother, a mechanical engineer, I have devised +an analogy that everyone can understand. The analogy describes the +"Kingdom of Transformation" where travelers wander and are magically +transformed. This kingdom is the computer and the travelers are information. +The purpose of the computer is to change information to a more meaningful +forma. The law of conservation applies here: The computer never creates +and never intentionally destroys data. With no further ado, let us travel +to the Kingdom of Transformation: + +In a land far, far away, there is a magical kingdom called the Kingdom of +Transformation. A king rules over this land and employs a Council of +Wizardry. The main purpose of this kingdom is to provide a way for +neighboring kingdoms to transform citizens into more useful citizens. This +is done by allowing the citizens to enter the kingdom at one of its ports +and to travel any of the many routes in the kingdom. They are magically +transformed along the way. The income of the Kingdom of Transformation +comes from the many toll roads within its boundaries. + +The Kingdom of Transformation was created when several kingdoms got +together and discovered a mutual need for new talents and abilities for +citizens. They employed CTK, Inc. (Creators of Transformation, Inc.) to +create this kingdom. CTK designed the country, its transportation routes, +and its laws of transformation, and created the major highway system. + +Hazards +======= + +Because magic is not truly controllable, CTK invariably, but unknowingly, +creates dragons. Dragons are huge fire-breathing beasts which sometimes +injure or kill travelers. Fortunately, they do not travel, but always +remain near their den. + +Other hazards also exist which are potentially harmful. As the roads +become older and more weatherbeaten, pot-holes will develop, trees will +fall on travelers, etc. CTK maintenance men are called to fix these +problems. + +Wizards +======= + +The wizards play a major role in creating and maintaining the kingdom but +get little credit for their work because it is performed secretly. The +wizards do not wan the workers or travelers to learn their incantations +because many laws would be broken and chaos would result. + +CTK's grand design is always general enough to be applicable in many +different situations. As a result, it is often difficult to use. The +first duty of the wizards is to tailor the transformation laws so as to be +more beneficial and easier to use in their particular environment. + +After creation of the kingdom, a major duty of the wizards is to search for +and kill dragons. If travelers do not return on time or if they return +injured, the ruler of the country contacts the wizards. If the wizards +determine that the injury or death occurred due to the traveler's +negligence, they provide the traveler's country with additional warnings. +If not, they must determine if the cause was a road hazard or a dragon. If +the suspect a road hazard, they call in a CTK maintenance man to locate the +hazard and to eliminate it, as in repairing the pothole in the road. If +they think that cause was a dragon, then they must find and slay it. + +The most difficult part of eliminating a dragon is finding it. Sometimes +the wizard magically knows where the dragon's lair it, but often the wizard +must send another traveler along the same route and watch to see where he +disappears. This sounds like a failsafe method for finding dragons (and a +suicide mission for thr traveler) but the second traveler does not always +disappear. Some dragons eat any traveler who comes too close; others are +very picky. + +The wizards may call in CTK who designed the highway system and +transformation laws to help devise a way to locate the dragon. CTK also +helps provide the right spell or incantation to slay the dragon. (There is +no general spell to slay dragons; each dragon must be eliminated with a +different spell.) + +Because neither CTK nor wizards are perfect, spells to not always work +correctly. At best, nothing happens when the wrong spell is uttered. At +worst, the dragon becomes a much larger dragon or multiplies into several +smaller ones. In either case, new spells must be found. + +If all existing dragons are quiet (i.e. have eaten sufficiently), wizards +have time to do other things. They hide in castles and practice spells and +incatations. They also devise shortcuts for travelers and new laws of +transformation. + +Changes in the Kingdom +====================== + +As new transformation kingdoms are created and old ones are maintained, +CTK, Inc. is constantly learning new things. It learns ways to avoid +creating some of the dragons that they have previously created. It also +discovers new and better laws of transformation. As a result, CTK will +periodically create a new grand design which is far better than the old. +The wizards determine when is a good time to implement this new design. +This is when the tourist season is slow or when no important travelers +(VIPs) are to arrive. The kingdom must be closed for the actual +implementation and is leter reopened as a new and better place to go. + +A final question you might ask is what happens when the number of tourists +becomes too great for the kingdom to handle in a reasonable period of time +(i.e., the tourist lines at the ports are too long). The Kingdom of +Transformation has three options: (1) shorten the paths that a tourist must +travel, or (2) convince CTK to develop a faster breed of horses so that the +travelers can finish sooner, or (3) annex more territories so that the +kingdom can handle more travelers. + +Thus ends the story of the Kingdom of Transformation. I hope this has +explained my job to you: I slay dragons for a living. + +# +#should do an automatic undivert.. +# diff --git a/usr.bin/m4/eval.c b/usr.bin/m4/eval.c new file mode 100644 index 0000000..fe9fbde --- /dev/null +++ b/usr.bin/m4/eval.c @@ -0,0 +1,1033 @@ +/* $OpenBSD: eval.c,v 1.78 2019/06/28 05:35:34 deraadt Exp $ */ +/* $NetBSD: eval.c,v 1.7 1996/11/10 21:21:29 pk Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * eval.c + * Facility: m4 macro processor + * by: oz + */ + +#include <sys/types.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <unistd.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <fcntl.h> +#include "mdef.h" +#include "stdd.h" +#include "extern.h" +#include "pathnames.h" + +static void dodefn(const char *); +static void dopushdef(const char *, const char *); +static void dodump(const char *[], int); +static void dotrace(const char *[], int, int); +static void doifelse(const char *[], int); +static int doincl(const char *); +#ifdef EXTENDED +static int dopaste(const char *); +#endif +static void dochq(const char *[], int); +static void dochc(const char *[], int); +static void dom4wrap(const char *); +static void dodiv(int); +static void doundiv(const char *[], int); +static void dosub(const char *[], int); +static void map(char *, const char *, const char *, const char *); +static const char *handledash(char *, char *, const char *); +static void expand_builtin(const char *[], int, int); +static void expand_macro(const char *[], int); +static void dump_one_def(const char *, struct macro_definition *); + +unsigned long expansion_id; + +/* + * eval - eval all macros and builtins calls + * argc - number of elements in argv. + * argv - element vector : + * argv[0] = definition of a user + * macro or NULL if built-in. + * argv[1] = name of the macro or + * built-in. + * argv[2] = parameters to user-defined + * . macro or built-in. + * . + * + * A call in the form of macro-or-builtin() will result in: + * argv[0] = nullstr + * argv[1] = macro-or-builtin + * argv[2] = nullstr + * + * argc is 3 for macro-or-builtin() and 2 for macro-or-builtin + */ +void +eval(const char *argv[], int argc, int td, int is_traced) +{ + size_t mark = SIZE_MAX; + + expansion_id++; + if (td & RECDEF) + m4errx(1, "expanding recursive definition for %s.", argv[1]); + if (is_traced) + mark = trace(argv, argc, infile+ilevel); + if (td == MACRTYPE) + expand_macro(argv, argc); + else + expand_builtin(argv, argc, td); + if (mark != SIZE_MAX) + finish_trace(mark); +} + +/* + * expand_builtin - evaluate built-in macros. + */ +void +expand_builtin(const char *argv[], int argc, int td) +{ + int c, n; + const char *errstr; + int ac; + static int sysval = 0; + +#ifdef DEBUG + printf("argc = %d\n", argc); + for (n = 0; n < argc; n++) + printf("argv[%d] = %s\n", n, argv[n]); + fflush(stdout); +#endif + + /* + * if argc == 3 and argv[2] is null, then we + * have macro-or-builtin() type call. We adjust + * argc to avoid further checking.. + */ + /* we keep the initial value for those built-ins that differentiate + * between builtin() and builtin. + */ + ac = argc; + + if (argc == 3 && !*(argv[2]) && !mimic_gnu) + argc--; + + switch (td & TYPEMASK) { + + case DEFITYPE: + if (argc > 2) + dodefine(argv[2], (argc > 3) ? argv[3] : null); + break; + + case PUSDTYPE: + if (argc > 2) + dopushdef(argv[2], (argc > 3) ? argv[3] : null); + break; + + case DUMPTYPE: + dodump(argv, argc); + break; + + case TRACEONTYPE: + dotrace(argv, argc, 1); + break; + + case TRACEOFFTYPE: + dotrace(argv, argc, 0); + break; + + case EXPRTYPE: + /* + * doexpr - evaluate arithmetic + * expression + */ + { + int base = 10; + int maxdigits = 0; + + if (argc > 3) { + base = strtonum(argv[3], 2, 36, &errstr); + if (errstr) { + m4errx(1, "expr: base is %s: %s.", + errstr, argv[3]); + } + } + if (argc > 4) { + maxdigits = strtonum(argv[4], 0, INT_MAX, &errstr); + if (errstr) { + m4errx(1, "expr: maxdigits is %s: %s.", + errstr, argv[4]); + } + } + if (argc > 2) + pbnumbase(expr(argv[2]), base, maxdigits); + break; + } + + case IFELTYPE: + doifelse(argv, argc); + break; + + case IFDFTYPE: + /* + * doifdef - select one of two + * alternatives based on the existence of + * another definition + */ + if (argc > 3) { + if (lookup_macro_definition(argv[2]) != NULL) + pbstr(argv[3]); + else if (argc > 4) + pbstr(argv[4]); + } + break; + + case LENGTYPE: + /* + * dolen - find the length of the + * argument + */ + pbnum((argc > 2) ? strlen(argv[2]) : 0); + break; + + case INCRTYPE: + /* + * doincr - increment the value of the + * argument + */ + if (argc > 2) { + n = strtonum(argv[2], INT_MIN, INT_MAX-1, &errstr); + if (errstr != NULL) + m4errx(1, "incr: argument is %s: %s.", + errstr, argv[2]); + pbnum(n + 1); + } + break; + + case DECRTYPE: + /* + * dodecr - decrement the value of the + * argument + */ + if (argc > 2) { + n = strtonum(argv[2], INT_MIN+1, INT_MAX, &errstr); + if (errstr) + m4errx(1, "decr: argument is %s: %s.", + errstr, argv[2]); + pbnum(n - 1); + } + break; + + case SYSCTYPE: + /* + * dosys - execute system command + */ + if (argc > 2) { + fflush(stdout); + sysval = system(argv[2]); + } + break; + + case SYSVTYPE: + /* + * dosysval - return value of the last + * system call. + * + */ + pbnum(sysval); + break; + + case ESYSCMDTYPE: + if (argc > 2) + doesyscmd(argv[2]); + break; + case INCLTYPE: + if (argc > 2) { + if (!doincl(argv[2])) { + if (mimic_gnu) { + warn("%s at line %lu: include(%s)", + CURRENT_NAME, CURRENT_LINE, argv[2]); + exit_code = 1; + if (fatal_warns) { + killdiv(); + exit(exit_code); + } + } else + err(1, "%s at line %lu: include(%s)", + CURRENT_NAME, CURRENT_LINE, argv[2]); + } + } + break; + + case SINCTYPE: + if (argc > 2) + (void) doincl(argv[2]); + break; +#ifdef EXTENDED + case PASTTYPE: + if (argc > 2) + if (!dopaste(argv[2])) + err(1, "%s at line %lu: paste(%s)", + CURRENT_NAME, CURRENT_LINE, argv[2]); + break; + + case SPASTYPE: + if (argc > 2) + (void) dopaste(argv[2]); + break; + case FORMATTYPE: + doformat(argv, argc); + break; +#endif + case CHNQTYPE: + dochq(argv, ac); + break; + + case CHNCTYPE: + dochc(argv, argc); + break; + + case SUBSTYPE: + /* + * dosub - select substring + * + */ + if (argc > 3) + dosub(argv, argc); + break; + + case SHIFTYPE: + /* + * doshift - push back all arguments + * except the first one (i.e. skip + * argv[2]) + */ + if (argc > 3) { + for (n = argc - 1; n > 3; n--) { + pbstr(rquote); + pbstr(argv[n]); + pbstr(lquote); + pushback(COMMA); + } + pbstr(rquote); + pbstr(argv[3]); + pbstr(lquote); + } + break; + + case DIVRTYPE: + if (argc > 2) { + n = strtonum(argv[2], INT_MIN, INT_MAX, &errstr); + if (errstr) + m4errx(1, "divert: argument is %s: %s.", + errstr, argv[2]); + if (n != 0) { + dodiv(n); + break; + } + } + active = stdout; + oindex = 0; + break; + + case UNDVTYPE: + doundiv(argv, argc); + break; + + case DIVNTYPE: + /* + * dodivnum - return the number of + * current output diversion + */ + pbnum(oindex); + break; + + case UNDFTYPE: + /* + * doundefine - undefine a previously + * defined macro(s) or m4 keyword(s). + */ + if (argc > 2) + for (n = 2; n < argc; n++) + macro_undefine(argv[n]); + break; + + case POPDTYPE: + /* + * dopopdef - remove the topmost + * definitions of macro(s) or m4 + * keyword(s). + */ + if (argc > 2) + for (n = 2; n < argc; n++) + macro_popdef(argv[n]); + break; + + case MKTMTYPE: + /* + * dotemp - create a temporary file + */ + if (argc > 2) { + int fd; + char *temp; + + temp = xstrdup(argv[2]); + + fd = mkstemp(temp); + if (fd == -1) + err(1, + "%s at line %lu: couldn't make temp file %s", + CURRENT_NAME, CURRENT_LINE, argv[2]); + close(fd); + pbstr(temp); + free(temp); + } + break; + + case TRNLTYPE: + /* + * dotranslit - replace all characters in + * the source string that appears in the + * "from" string with the corresponding + * characters in the "to" string. + */ + if (argc > 3) { + char *temp; + + temp = xalloc(strlen(argv[2])+1, NULL); + if (argc > 4) + map(temp, argv[2], argv[3], argv[4]); + else + map(temp, argv[2], argv[3], null); + pbstr(temp); + free(temp); + } else if (argc > 2) + pbstr(argv[2]); + break; + + case INDXTYPE: + /* + * doindex - find the index of the second + * argument string in the first argument + * string. -1 if not present. + */ + pbnum((argc > 3) ? indx(argv[2], argv[3]) : -1); + break; + + case ERRPTYPE: + /* + * doerrp - print the arguments to stderr + * file + */ + if (argc > 2) { + for (n = 2; n < argc; n++) + fprintf(stderr, "%s ", argv[n]); + fprintf(stderr, "\n"); + } + break; + + case DNLNTYPE: + /* + * dodnl - eat-up-to and including + * newline + */ + while ((c = gpbc()) != '\n' && c != EOF) + ; + break; + + case M4WRTYPE: + /* + * dom4wrap - set up for + * wrap-up/wind-down activity + */ + if (argc > 2) + dom4wrap(argv[2]); + break; + + case EXITTYPE: + /* + * doexit - immediate exit from m4. + */ + killdiv(); + exit((argc > 2) ? atoi(argv[2]) : 0); + break; + + case DEFNTYPE: + if (argc > 2) + for (n = 2; n < argc; n++) + dodefn(argv[n]); + break; + + case INDIRTYPE: /* Indirect call */ + if (argc > 2) + doindir(argv, argc); + break; + + case BUILTINTYPE: /* Builtins only */ + if (argc > 2) + dobuiltin(argv, argc); + break; + + case PATSTYPE: + if (argc > 2) + dopatsubst(argv, argc); + break; + case REGEXPTYPE: + if (argc > 2) + doregexp(argv, argc); + break; + case LINETYPE: + doprintlineno(infile+ilevel); + break; + case FILENAMETYPE: + doprintfilename(infile+ilevel); + break; + case SELFTYPE: + pbstr(rquote); + pbstr(argv[1]); + pbstr(lquote); + break; + default: + m4errx(1, "eval: major botch."); + break; + } +} + +/* + * expand_macro - user-defined macro expansion + */ +void +expand_macro(const char *argv[], int argc) +{ + const char *t; + const char *p; + int n; + int argno; + + t = argv[0]; /* defn string as a whole */ + p = t; + while (*p) + p++; + p--; /* last character of defn */ + while (p > t) { + if (*(p - 1) != ARGFLAG) + PUSHBACK(*p); + else { + switch (*p) { + + case '#': + pbnum(argc - 2); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if ((argno = *p - '0') < argc - 1) + pbstr(argv[argno + 1]); + break; + case '*': + if (argc > 2) { + for (n = argc - 1; n > 2; n--) { + pbstr(argv[n]); + pushback(COMMA); + } + pbstr(argv[2]); + } + break; + case '@': + if (argc > 2) { + for (n = argc - 1; n > 2; n--) { + pbstr(rquote); + pbstr(argv[n]); + pbstr(lquote); + pushback(COMMA); + } + pbstr(rquote); + pbstr(argv[2]); + pbstr(lquote); + } + break; + default: + PUSHBACK(*p); + PUSHBACK('$'); + break; + } + p--; + } + p--; + } + if (p == t) /* do last character */ + PUSHBACK(*p); +} + + +/* + * dodefine - install definition in the table + */ +void +dodefine(const char *name, const char *defn) +{ + if (!*name && !mimic_gnu) + m4errx(1, "null definition."); + else + macro_define(name, defn); +} + +/* + * dodefn - push back a quoted definition of + * the given name. + */ +static void +dodefn(const char *name) +{ + struct macro_definition *p; + + if ((p = lookup_macro_definition(name)) != NULL) { + if ((p->type & TYPEMASK) == MACRTYPE) { + pbstr(rquote); + pbstr(p->defn); + pbstr(lquote); + } else { + pbstr(p->defn); + pbstr(BUILTIN_MARKER); + } + } +} + +/* + * dopushdef - install a definition in the hash table + * without removing a previous definition. Since + * each new entry is entered in *front* of the + * hash bucket, it hides a previous definition from + * lookup. + */ +static void +dopushdef(const char *name, const char *defn) +{ + if (!*name && !mimic_gnu) + m4errx(1, "null definition."); + else + macro_pushdef(name, defn); +} + +/* + * dump_one_def - dump the specified definition. + */ +static void +dump_one_def(const char *name, struct macro_definition *p) +{ + if (!traceout) + traceout = stderr; + if (mimic_gnu) { + if ((p->type & TYPEMASK) == MACRTYPE) + fprintf(traceout, "%s:\t%s\n", name, p->defn); + else { + fprintf(traceout, "%s:\t<%s>\n", name, p->defn); + } + } else + fprintf(traceout, "`%s'\t`%s'\n", name, p->defn); +} + +/* + * dodumpdef - dump the specified definitions in the hash + * table to stderr. If nothing is specified, the entire + * hash table is dumped. + */ +static void +dodump(const char *argv[], int argc) +{ + int n; + struct macro_definition *p; + + if (argc > 2) { + for (n = 2; n < argc; n++) + if ((p = lookup_macro_definition(argv[n])) != NULL) + dump_one_def(argv[n], p); + } else + macro_for_all(dump_one_def); +} + +/* + * dotrace - mark some macros as traced/untraced depending upon on. + */ +static void +dotrace(const char *argv[], int argc, int on) +{ + int n; + + if (argc > 2) { + for (n = 2; n < argc; n++) + mark_traced(argv[n], on); + } else + mark_traced(NULL, on); +} + +/* + * doifelse - select one of two alternatives - loop. + */ +static void +doifelse(const char *argv[], int argc) +{ + while (argc > 4) { + if (STREQ(argv[2], argv[3])) { + pbstr(argv[4]); + break; + } else if (argc == 6) { + pbstr(argv[5]); + break; + } else { + argv += 3; + argc -= 3; + } + } +} + +/* + * doinclude - include a given file. + */ +static int +doincl(const char *ifile) +{ + if (ilevel + 1 == MAXINP) + m4errx(1, "too many include files."); + if (fopen_trypath(infile+ilevel+1, ifile) != NULL) { + ilevel++; + bbase[ilevel] = bufbase = bp; + return (1); + } else + return (0); +} + +#ifdef EXTENDED +/* + * dopaste - include a given file without any + * macro processing. + */ +static int +dopaste(const char *pfile) +{ + FILE *pf; + int c; + + if ((pf = fopen(pfile, "r")) != NULL) { + if (synch_lines) + fprintf(active, "#line 1 \"%s\"\n", pfile); + while ((c = getc(pf)) != EOF) + putc(c, active); + (void) fclose(pf); + emit_synchline(); + return (1); + } else + return (0); +} +#endif + +/* + * dochq - change quote characters + */ +static void +dochq(const char *argv[], int ac) +{ + if (ac == 2) { + lquote[0] = LQUOTE; lquote[1] = EOS; + rquote[0] = RQUOTE; rquote[1] = EOS; + } else { + strlcpy(lquote, argv[2], sizeof(lquote)); + if (ac > 3) { + strlcpy(rquote, argv[3], sizeof(rquote)); + } else { + rquote[0] = ECOMMT; rquote[1] = EOS; + } + } +} + +/* + * dochc - change comment characters + */ +static void +dochc(const char *argv[], int argc) +{ +/* XXX Note that there is no difference between no argument and a single + * empty argument. + */ + if (argc == 2) { + scommt[0] = EOS; + ecommt[0] = EOS; + } else { + strlcpy(scommt, argv[2], sizeof(scommt)); + if (argc == 3) { + ecommt[0] = ECOMMT; ecommt[1] = EOS; + } else { + strlcpy(ecommt, argv[3], sizeof(ecommt)); + } + } +} + +/* + * dom4wrap - expand text at EOF + */ +static void +dom4wrap(const char *text) +{ + if (wrapindex >= maxwraps) { + if (maxwraps == 0) + maxwraps = 16; + else + maxwraps *= 2; + m4wraps = xreallocarray(m4wraps, maxwraps, sizeof(*m4wraps), + "too many m4wraps"); + } + m4wraps[wrapindex++] = xstrdup(text); +} + +/* + * dodivert - divert the output to a temporary file + */ +static void +dodiv(int n) +{ + int fd; + + oindex = n; + if (n >= maxout) { + if (mimic_gnu) + resizedivs(n + 10); + else + n = 0; /* bitbucket */ + } + + if (n < 0) + n = 0; /* bitbucket */ + if (outfile[n] == NULL) { + char fname[] = _PATH_DIVNAME; + + if ((fd = mkstemp(fname)) == -1 || + unlink(fname) == -1 || + (outfile[n] = fdopen(fd, "w+")) == NULL) + err(1, "%s: cannot divert", fname); + } + active = outfile[n]; +} + +/* + * doundivert - undivert a specified output, or all + * other outputs, in numerical order. + */ +static void +doundiv(const char *argv[], int argc) +{ + int ind; + int n; + + if (argc > 2) { + for (ind = 2; ind < argc; ind++) { + const char *errstr; + n = strtonum(argv[ind], 1, INT_MAX, &errstr); + if (errstr) { + if (errno == EINVAL && mimic_gnu) + getdivfile(argv[ind]); + } else { + if (n < maxout && outfile[n] != NULL) + getdiv(n); + } + } + } + else + for (n = 1; n < maxout; n++) + if (outfile[n] != NULL) + getdiv(n); +} + +/* + * dosub - select substring + */ +static void +dosub(const char *argv[], int argc) +{ + const char *ap, *fc, *k; + int nc; + + ap = argv[2]; /* target string */ +#ifdef EXPR + fc = ap + expr(argv[3]); /* first char */ +#else + fc = ap + atoi(argv[3]); /* first char */ +#endif + nc = strlen(fc); + if (argc >= 5) +#ifdef EXPR + nc = min(nc, expr(argv[4])); +#else + nc = min(nc, atoi(argv[4])); +#endif + if (fc >= ap && fc < ap + strlen(ap)) + for (k = fc + nc - 1; k >= fc; k--) + pushback(*k); +} + +/* + * map: + * map every character of s1 that is specified in from + * into s3 and replace in s. (source s1 remains untouched) + * + * This is derived from the a standard implementation of map(s,from,to) + * function of ICON language. Within mapvec, we replace every character + * of "from" with the corresponding character in "to". + * If "to" is shorter than "from", than the corresponding entries are null, + * which means that those characters dissapear altogether. + */ +static void +map(char *dest, const char *src, const char *from, const char *to) +{ + const char *tmp; + unsigned char sch, dch; + static char frombis[257]; + static char tobis[257]; + int i; + char seen[256]; + static unsigned char mapvec[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, + 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, + 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, + 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, + 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, + 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, + 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, + 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, + 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 + }; + + if (*src) { + if (mimic_gnu) { + /* + * expand character ranges on the fly + */ + from = handledash(frombis, frombis + 256, from); + to = handledash(tobis, tobis + 256, to); + } + tmp = from; + /* + * create a mapping between "from" and + * "to" + */ + for (i = 0; i < 256; i++) + seen[i] = 0; + while (*from) { + if (!seen[(unsigned char)(*from)]) { + mapvec[(unsigned char)(*from)] = (unsigned char)(*to); + seen[(unsigned char)(*from)] = 1; + } + from++; + if (*to) + to++; + } + + while (*src) { + sch = (unsigned char)(*src++); + dch = mapvec[sch]; + if ((*dest = (char)dch)) + dest++; + } + /* + * restore all the changed characters + */ + while (*tmp) { + mapvec[(unsigned char)(*tmp)] = (unsigned char)(*tmp); + tmp++; + } + } + *dest = '\0'; +} + + +/* + * handledash: + * use buffer to copy the src string, expanding character ranges + * on the way. + */ +static const char * +handledash(char *buffer, char *end, const char *src) +{ + char *p; + + p = buffer; + while(*src) { + if (src[1] == '-' && src[2]) { + unsigned char i; + if ((unsigned char)src[0] <= (unsigned char)src[2]) { + for (i = (unsigned char)src[0]; + i <= (unsigned char)src[2]; i++) { + *p++ = i; + if (p == end) { + *p = '\0'; + return buffer; + } + } + } else { + for (i = (unsigned char)src[0]; + i >= (unsigned char)src[2]; i--) { + *p++ = i; + if (p == end) { + *p = '\0'; + return buffer; + } + } + } + src += 3; + } else + *p++ = *src++; + if (p == end) + break; + } + *p = '\0'; + return buffer; +} diff --git a/usr.bin/m4/expr.c b/usr.bin/m4/expr.c new file mode 100644 index 0000000..6e0b45f --- /dev/null +++ b/usr.bin/m4/expr.c @@ -0,0 +1,43 @@ +/* $OpenBSD: expr.c,v 1.18 2010/09/07 19:58:09 marco Exp $ */ +/* + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <stdint.h> +#include <stdio.h> +#include <stddef.h> +#include "mdef.h" +#include "extern.h" + +int32_t end_result; +const char *copy_toeval; + +extern void yy_scan_string(const char *); +extern int yyparse(void); + +int +yyerror(const char *msg) +{ + fprintf(stderr, "m4: %s in expr %s\n", msg, copy_toeval); + return(0); +} + +int +expr(const char *toeval) +{ + copy_toeval = toeval; + yy_scan_string(toeval); + yyparse(); + return end_result; +} diff --git a/usr.bin/m4/extern.h b/usr.bin/m4/extern.h new file mode 100644 index 0000000..0c07599 --- /dev/null +++ b/usr.bin/m4/extern.h @@ -0,0 +1,182 @@ +/* $OpenBSD: extern.h,v 1.55 2017/06/15 13:48:42 bcallah Exp $ */ +/* $NetBSD: extern.h,v 1.3 1996/01/13 23:25:24 pk Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.1 (Berkeley) 6/6/93 + */ + +/* eval.c */ +extern void eval(const char *[], int, int, int); +extern void dodefine(const char *, const char *); +extern unsigned long expansion_id; + +/* expr.c */ +extern int expr(const char *); + +/* gnum4.c */ +extern void addtoincludepath(const char *); +extern struct input_file *fopen_trypath(struct input_file *, const char *); +extern void doindir(const char *[], int); +extern void dobuiltin(const char *[], int); +extern void dopatsubst(const char *[], int); +extern void doregexp(const char *[], int); + +extern void doprintlineno(struct input_file *); +extern void doprintfilename(struct input_file *); + +extern void doesyscmd(const char *); +extern void getdivfile(const char *); +extern void doformat(const char *[], int); + +extern void m4_warnx(const char *, ...); + +/* look.c */ + +#define FLAG_UNTRACED 0 +#define FLAG_TRACED 1 +#define FLAG_NO_TRACE 2 + +extern void init_macros(void); +extern ndptr lookup(const char *); +extern void mark_traced(const char *, int); +extern struct ohash macros; + +extern struct macro_definition *lookup_macro_definition(const char *); +extern void macro_define(const char *, const char *); +extern void macro_pushdef(const char *, const char *); +extern void macro_popdef(const char *); +extern void macro_undefine(const char *); +extern void setup_builtin(const char *, unsigned int); +extern void macro_for_all(void (*)(const char *, struct macro_definition *)); +#define macro_getdef(p) ((p)->d) +#define macro_name(p) ((p)->name) +#define macro_builtin_type(p) ((p)->builtin_type) +#define is_traced(p) ((p)->trace_flags == FLAG_NO_TRACE ? (trace_flags & TRACE_ALL) : (p)->trace_flags) + +extern ndptr macro_getbuiltin(const char *); + +/* main.c */ +extern void outputstr(const char *); +extern void do_emit_synchline(void); +extern int exit_code; +#define emit_synchline() do { if (synch_lines) do_emit_synchline(); } while(0) + +/* misc.c */ +extern void chrsave(int); +extern char *compute_prevep(void); +extern void getdiv(int); +extern ptrdiff_t indx(const char *, const char *); +extern void initspaces(void); +extern void killdiv(void); +extern void onintr(int); +extern void pbnum(int); +extern void pbnumbase(int, int, int); +extern void pbunsigned(unsigned long); +extern void pbstr(const char *); +extern void pushback(int); +extern void *xalloc(size_t, const char *, ...); +extern void *xcalloc(size_t, size_t, const char *, ...); +extern void *xrealloc(void *, size_t, const char *, ...); +extern void *xreallocarray(void *, size_t, size_t, const char *, ...); +extern char *xstrdup(const char *); +extern void usage(void); +extern void resizedivs(int); +extern size_t buffer_mark(void); +extern void dump_buffer(FILE *, size_t); +extern void _Noreturn m4errx(int, const char *, ...); + +extern int obtain_char(struct input_file *); +extern void set_input(struct input_file *, FILE *, const char *); +extern void release_input(struct input_file *); + +/* speeded-up versions of chrsave/pushback */ +#define PUSHBACK(c) \ + do { \ + if (bp >= endpbb) \ + enlarge_bufspace(); \ + *bp++ = (c); \ + } while(0) + +#define CHRSAVE(c) \ + do { \ + if (ep >= endest) \ + enlarge_strspace(); \ + *ep++ = (c); \ + } while(0) + +/* and corresponding exposure for local symbols */ +extern void enlarge_bufspace(void); +extern void enlarge_strspace(void); +extern unsigned char *endpbb; +extern char *endest; + +/* trace.c */ +extern unsigned int trace_flags; +#define TRACE_ALL 512 +extern void trace_file(const char *); +extern size_t trace(const char **, int, struct input_file *); +extern void finish_trace(size_t); +extern void set_trace_flags(const char *); +extern FILE *traceout; + +extern stae *mstack; /* stack of m4 machine */ +extern char *sstack; /* shadow stack, for string space extension */ +extern FILE *active; /* active output file pointer */ +extern struct input_file infile[];/* input file stack (0=stdin) */ +extern FILE **outfile; /* diversion array(0=bitbucket) */ +extern int maxout; /* maximum number of diversions */ +extern int fp; /* m4 call frame pointer */ +extern int ilevel; /* input file stack pointer */ +extern int oindex; /* diversion index. */ +extern int sp; /* current m4 stack pointer */ +extern unsigned char *bp; /* first available character */ +extern unsigned char *buf; /* push-back buffer */ +extern unsigned char *bufbase; /* buffer base for this ilevel */ +extern unsigned char *bbase[]; /* buffer base per ilevel */ +extern char ecommt[MAXCCHARS+1];/* end character for comment */ +extern char *ep; /* first free char in strspace */ +extern char lquote[MAXCCHARS+1];/* left quote character (`) */ +extern char **m4wraps; /* m4wrap string default. */ +extern int maxwraps; /* size of m4wraps array */ +extern int wrapindex; /* current index in m4wraps */ + +extern char *null; /* as it says.. just a null. */ +extern char rquote[MAXCCHARS+1];/* right quote character (') */ +extern char scommt[MAXCCHARS+1];/* start character for comment */ +extern int synch_lines; /* line synchronisation directives */ + +extern int mimic_gnu; /* behaves like gnu-m4 */ +extern int prefix_builtins; /* prefix builtin macros with m4_ */ +extern int error_warns; /* make warnings cause exit_code = 1 */ +extern int fatal_warns; /* make warnings fatal */ + diff --git a/usr.bin/m4/gnum4.c b/usr.bin/m4/gnum4.c new file mode 100644 index 0000000..1de66f2 --- /dev/null +++ b/usr.bin/m4/gnum4.c @@ -0,0 +1,692 @@ +/* $OpenBSD: gnum4.c,v 1.52 2017/08/21 21:41:13 deraadt Exp $ */ + +/* + * Copyright (c) 1999 Marc Espie + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * functions needed to support gnu-m4 extensions, including a fake freezing + */ + +#include <sys/types.h> +#include <sys/wait.h> +#include <ctype.h> +#include <err.h> +#include <paths.h> +#include <regex.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <limits.h> +#include "mdef.h" +#include "stdd.h" +#include "extern.h" + + +int mimic_gnu = 0; + +/* + * Support for include path search + * First search in the current directory. + * If not found, and the path is not absolute, include path kicks in. + * First, -I options, in the order found on the command line. + * Then M4PATH env variable + */ + +struct path_entry { + char *name; + struct path_entry *next; +} *first, *last; + +static struct path_entry *new_path_entry(const char *); +static void ensure_m4path(void); +static struct input_file *dopath(struct input_file *, const char *); + +static struct path_entry * +new_path_entry(const char *dirname) +{ + struct path_entry *n; + + n = malloc(sizeof(struct path_entry)); + if (!n) + errx(1, "out of memory"); + n->name = xstrdup(dirname); + n->next = 0; + return n; +} + +void +addtoincludepath(const char *dirname) +{ + struct path_entry *n; + + n = new_path_entry(dirname); + + if (last) { + last->next = n; + last = n; + } + else + last = first = n; +} + +static void +ensure_m4path() +{ + static int envpathdone = 0; + char *envpath; + char *sweep; + char *path; + + if (envpathdone) + return; + envpathdone = TRUE; + envpath = getenv("M4PATH"); + if (!envpath) + return; + /* for portability: getenv result is read-only */ + envpath = xstrdup(envpath); + for (sweep = envpath; + (path = strsep(&sweep, ":")) != NULL;) + addtoincludepath(path); + free(envpath); +} + +static +struct input_file * +dopath(struct input_file *i, const char *filename) +{ + char path[PATH_MAX]; + struct path_entry *pe; + FILE *f; + + for (pe = first; pe; pe = pe->next) { + snprintf(path, sizeof(path), "%s/%s", pe->name, filename); + if ((f = fopen(path, "r")) != 0) { + set_input(i, f, path); + return i; + } + } + return NULL; +} + +struct input_file * +fopen_trypath(struct input_file *i, const char *filename) +{ + FILE *f; + + f = fopen(filename, "r"); + if (f != NULL) { + set_input(i, f, filename); + return i; + } + if (filename[0] == '/') + return NULL; + + ensure_m4path(); + + return dopath(i, filename); +} + +void +doindir(const char *argv[], int argc) +{ + ndptr n; + struct macro_definition *p; + + n = lookup(argv[2]); + if (n == NULL || (p = macro_getdef(n)) == NULL) + m4errx(1, "indir: undefined macro %s.", argv[2]); + argv[1] = p->defn; + + eval(argv+1, argc-1, p->type, is_traced(n)); +} + +void +dobuiltin(const char *argv[], int argc) +{ + ndptr p; + + argv[1] = NULL; + p = macro_getbuiltin(argv[2]); + if (p != NULL) + eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); + else + m4errx(1, "unknown builtin %s.", argv[2]); +} + + +/* We need some temporary buffer space, as pb pushes BACK and substitution + * proceeds forward... */ +static char *buffer; +static size_t bufsize = 0; +static size_t current = 0; + +static void addchars(const char *, size_t); +static void addchar(int); +static char *twiddle(const char *); +static char *getstring(void); +static void exit_regerror(int, regex_t *, const char *); +static void do_subst(const char *, regex_t *, const char *, const char *, + regmatch_t *); +static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); +static void do_regexp(const char *, regex_t *, const char *, const char *, + regmatch_t *); +static void add_sub(int, const char *, regex_t *, regmatch_t *); +static void add_replace(const char *, regex_t *, const char *, regmatch_t *); +#define addconstantstring(s) addchars((s), sizeof(s)-1) + +static void +addchars(const char *c, size_t n) +{ + if (n == 0) + return; + while (current + n > bufsize) { + if (bufsize == 0) + bufsize = 1024; + else if (bufsize <= SIZE_MAX/2) { + bufsize *= 2; + } else { + errx(1, "size overflow"); + } + buffer = xrealloc(buffer, bufsize, NULL); + } + memcpy(buffer+current, c, n); + current += n; +} + +static void +addchar(int c) +{ + if (current +1 > bufsize) { + if (bufsize == 0) + bufsize = 1024; + else + bufsize *= 2; + buffer = xrealloc(buffer, bufsize, NULL); + } + buffer[current++] = c; +} + +static char * +getstring(void) +{ + addchar('\0'); + current = 0; + return buffer; +} + + +static void +exit_regerror(int er, regex_t *re, const char *source) +{ + size_t errlen; + char *errbuf; + + errlen = regerror(er, re, NULL, 0); + errbuf = xalloc(errlen, + "malloc in regerror: %lu", (unsigned long)errlen); + regerror(er, re, errbuf, errlen); + m4errx(1, "regular expression error in %s: %s.", source, errbuf); +} + +/* warnx() plus check to see if we need to change exit code or exit. + * -E flag functionality. + */ +void +m4_warnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + + if (fatal_warns) + exit(1); + if (error_warns) + exit_code = 1; +} + +static void +add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) +{ + if (n > re->re_nsub) + m4_warnx("No subexpression %d", n); + /* Subexpressions that did not match are + * not an error. */ + else if (pm[n].rm_so != -1 && + pm[n].rm_eo != -1) { + addchars(string + pm[n].rm_so, + pm[n].rm_eo - pm[n].rm_so); + } +} + +/* Add replacement string to the output buffer, recognizing special + * constructs and replacing them with substrings of the original string. + */ +static void +add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) +{ + const char *p; + + for (p = replace; *p != '\0'; p++) { + if (*p == '&' && !mimic_gnu) { + add_sub(0, string, re, pm); + continue; + } + if (*p == '\\') { + if (p[1] == '\\') { + addchar(p[1]); + p++; + continue; + } + if (p[1] == '&') { + if (mimic_gnu) + add_sub(0, string, re, pm); + else + addchar(p[1]); + p++; + continue; + } + if (isdigit((unsigned char)p[1])) { + add_sub(*(++p) - '0', string, re, pm); + continue; + } + } + addchar(*p); + } +} + +static void +do_subst(const char *string, regex_t *re, const char *source, + const char *replace, regmatch_t *pm) +{ + int error; + int flags = 0; + const char *last_match = NULL; + + while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { + if (pm[0].rm_eo != 0) { + if (string[pm[0].rm_eo-1] == '\n') + flags = 0; + else + flags = REG_NOTBOL; + } + + /* NULL length matches are special... We use the `vi-mode' + * rule: don't allow a NULL-match at the last match + * position. + */ + if (pm[0].rm_so == pm[0].rm_eo && + string + pm[0].rm_so == last_match) { + if (*string == '\0') + return; + addchar(*string); + if (*string++ == '\n') + flags = 0; + else + flags = REG_NOTBOL; + continue; + } + last_match = string + pm[0].rm_so; + addchars(string, pm[0].rm_so); + add_replace(string, re, replace, pm); + string += pm[0].rm_eo; + } + if (error != REG_NOMATCH) + exit_regerror(error, re, source); + pbstr(string); +} + +static void +do_regexp(const char *string, regex_t *re, const char *source, + const char *replace, regmatch_t *pm) +{ + int error; + + switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { + case 0: + add_replace(string, re, replace, pm); + pbstr(getstring()); + break; + case REG_NOMATCH: + break; + default: + exit_regerror(error, re, source); + } +} + +static void +do_regexpindex(const char *string, regex_t *re, const char *source, + regmatch_t *pm) +{ + int error; + + switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { + case 0: + pbunsigned(pm[0].rm_so); + break; + case REG_NOMATCH: + pbnum(-1); + break; + default: + exit_regerror(error, re, source); + } +} + +/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 + * says. So we twiddle with the regexp before passing it to regcomp. + */ +static char * +twiddle(const char *p) +{ + /* + at start of regexp is a normal character for Gnu m4 */ + if (*p == '^') { + addchar(*p); + p++; + } + if (*p == '+') { + addchar('\\'); + } + /* This could use strcspn for speed... */ + while (*p != '\0') { + if (*p == '\\') { + switch(p[1]) { + case '(': + case ')': + case '|': + addchar(p[1]); + break; + case 'w': + addconstantstring("[_a-zA-Z0-9]"); + break; + case 'W': + addconstantstring("[^_a-zA-Z0-9]"); + break; + case '<': + addconstantstring("[[:<:]]"); + break; + case '>': + addconstantstring("[[:>:]]"); + break; + default: + addchars(p, 2); + break; + } + p+=2; + continue; + } + if (*p == '(' || *p == ')' || *p == '|') + addchar('\\'); + + addchar(*p); + p++; + } + return getstring(); +} + +/* patsubst(string, regexp, opt replacement) */ +/* argv[2]: string + * argv[3]: regexp + * argv[4]: opt rep + */ +void +dopatsubst(const char *argv[], int argc) +{ + if (argc <= 3) { + m4_warnx("Too few arguments to patsubst"); + return; + } + /* special case: empty regexp */ + if (argv[3][0] == '\0') { + const char *s; + size_t len; + if (argc > 4 && argv[4]) + len = strlen(argv[4]); + else + len = 0; + for (s = argv[2]; *s != '\0'; s++) { + addchars(argv[4], len); + addchar(*s); + } + } else { + int error; + regex_t re; + regmatch_t *pmatch; + int mode = REG_EXTENDED; + const char *source; + size_t l = strlen(argv[3]); + + if (!mimic_gnu || + (argv[3][0] == '^') || + (l > 0 && argv[3][l-1] == '$')) + mode |= REG_NEWLINE; + + source = mimic_gnu ? twiddle(argv[3]) : argv[3]; + error = regcomp(&re, source, mode); + if (error != 0) + exit_regerror(error, &re, source); + + pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), + NULL); + do_subst(argv[2], &re, source, + argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); + free(pmatch); + regfree(&re); + } + pbstr(getstring()); +} + +void +doregexp(const char *argv[], int argc) +{ + int error; + regex_t re; + regmatch_t *pmatch; + const char *source; + + if (argc <= 3) { + m4_warnx("Too few arguments to regexp"); + return; + } + /* special gnu case */ + if (argv[3][0] == '\0' && mimic_gnu) { + if (argc == 4 || argv[4] == NULL) + return; + else + pbstr(argv[4]); + } + source = mimic_gnu ? twiddle(argv[3]) : argv[3]; + error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); + if (error != 0) + exit_regerror(error, &re, source); + + pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); + if (argc == 4 || argv[4] == NULL) + do_regexpindex(argv[2], &re, source, pmatch); + else + do_regexp(argv[2], &re, source, argv[4], pmatch); + free(pmatch); + regfree(&re); +} + +void +doformat(const char *argv[], int argc) +{ + const char *format = argv[2]; + int pos = 3; + int left_padded; + long width; + size_t l; + const char *thisarg; + char temp[2]; + long extra; + + while (*format != 0) { + if (*format != '%') { + addchar(*format++); + continue; + } + + format++; + if (*format == '%') { + addchar(*format++); + continue; + } + if (*format == 0) { + addchar('%'); + break; + } + + if (*format == '*') { + format++; + if (pos >= argc) + m4errx(1, + "Format with too many format specifiers."); + width = strtol(argv[pos++], NULL, 10); + } else { + width = strtol(format, (char **)&format, 10); + } + if (width < 0) { + left_padded = 1; + width = -width; + } else { + left_padded = 0; + } + if (*format == '.') { + format++; + if (*format == '*') { + format++; + if (pos >= argc) + m4errx(1, + "Format with too many format specifiers."); + extra = strtol(argv[pos++], NULL, 10); + } else { + extra = strtol(format, (char **)&format, 10); + } + } else { + extra = LONG_MAX; + } + if (pos >= argc) + m4errx(1, "Format with too many format specifiers."); + switch(*format) { + case 's': + thisarg = argv[pos++]; + break; + case 'c': + temp[0] = strtoul(argv[pos++], NULL, 10); + temp[1] = 0; + thisarg = temp; + break; + default: + m4errx(1, "Unsupported format specification: %s.", + argv[2]); + } + format++; + l = strlen(thisarg); + if (l > extra) + l = extra; + if (!left_padded) { + while (l < width--) + addchar(' '); + } + addchars(thisarg, l); + if (left_padded) { + while (l < width--) + addchar(' '); + } + } + pbstr(getstring()); +} + +void +doesyscmd(const char *cmd) +{ + int p[2]; + pid_t cpid; + char *argv[4]; + int cc; + int status; + + /* Follow gnu m4 documentation: first flush buffers. */ + fflush(NULL); + + argv[0] = "sh"; + argv[1] = "-c"; + argv[2] = (char *)cmd; + argv[3] = NULL; + + /* Just set up standard output, share stderr and stdin with m4 */ + if (pipe(p) == -1) + err(1, "bad pipe"); + switch(cpid = fork()) { + case -1: + err(1, "bad fork"); + /* NOTREACHED */ + case 0: + (void) close(p[0]); + (void) dup2(p[1], 1); + (void) close(p[1]); + execv(_PATH_BSHELL, argv); + exit(1); + default: + /* Read result in two stages, since m4's buffer is + * pushback-only. */ + (void) close(p[1]); + do { + char result[BUFSIZE]; + cc = read(p[0], result, sizeof result); + if (cc > 0) + addchars(result, cc); + } while (cc > 0 || (cc == -1 && errno == EINTR)); + + (void) close(p[0]); + while (waitpid(cpid, &status, 0) == -1) { + if (errno != EINTR) + break; + } + pbstr(getstring()); + } +} + +void +getdivfile(const char *name) +{ + FILE *f; + int c; + + f = fopen(name, "r"); + if (!f) + return; + + while ((c = getc(f))!= EOF) + putc(c, active); + (void) fclose(f); +} diff --git a/usr.bin/m4/look.c b/usr.bin/m4/look.c new file mode 100644 index 0000000..5feb041 --- /dev/null +++ b/usr.bin/m4/look.c @@ -0,0 +1,337 @@ +/* $OpenBSD: look.c,v 1.24 2014/12/21 09:33:12 espie Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * look.c + * Facility: m4 macro processor + * by: oz + */ + +#include <sys/cdefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <ohash.h> +#include "mdef.h" +#include "stdd.h" +#include "extern.h" + +static void *hash_calloc(size_t, size_t, void *); +static void hash_free(void *, void *); +static void *element_alloc(size_t, void *); +static void setup_definition(struct macro_definition *, const char *, + const char *); +static void free_definition(char *); +static void keep(char *); +static int string_in_use(const char *); + +static struct ohash_info macro_info = { + offsetof(struct ndblock, name), + NULL, hash_calloc, hash_free, element_alloc }; + +struct ohash macros; + +/* Support routines for hash tables. */ +void * +hash_calloc(size_t n, size_t s, void *u UNUSED) +{ + void *storage = xcalloc(n, s, "hash alloc"); + return storage; +} + +void +hash_free(void *p, void *u UNUSED) +{ + free(p); +} + +void * +element_alloc(size_t s, void *u UNUSED) +{ + return xalloc(s, "element alloc"); +} + +void +init_macros() +{ + ohash_init(¯os, 10, ¯o_info); +} + +/* + * find name in the hash table + */ +ndptr +lookup(const char *name) +{ + return ohash_find(¯os, ohash_qlookup(¯os, name)); +} + +struct macro_definition * +lookup_macro_definition(const char *name) +{ + ndptr p; + + p = ohash_find(¯os, ohash_qlookup(¯os, name)); + if (p) + return p->d; + else + return NULL; +} + +static void +setup_definition(struct macro_definition *d, const char *defn, const char *name) +{ + ndptr p; + + if (strncmp(defn, BUILTIN_MARKER, sizeof(BUILTIN_MARKER)-1) == 0 && + (p = macro_getbuiltin(defn+sizeof(BUILTIN_MARKER)-1)) != NULL) { + d->type = macro_builtin_type(p); + d->defn = xstrdup(defn+sizeof(BUILTIN_MARKER)-1); + } else { + if (!*defn) + d->defn = null; + else + d->defn = xstrdup(defn); + d->type = MACRTYPE; + } + if (STREQ(name, defn)) + d->type |= RECDEF; +} + +static ndptr +create_entry(const char *name) +{ + const char *end = NULL; + unsigned int i; + ndptr n; + + i = ohash_qlookupi(¯os, name, &end); + n = ohash_find(¯os, i); + if (n == NULL) { + n = ohash_create_entry(¯o_info, name, &end); + ohash_insert(¯os, i, n); + n->trace_flags = FLAG_NO_TRACE; + n->builtin_type = MACRTYPE; + n->d = NULL; + } + return n; +} + +void +macro_define(const char *name, const char *defn) +{ + ndptr n = create_entry(name); + if (n->d != NULL) { + if (n->d->defn != null) + free_definition(n->d->defn); + } else { + n->d = xalloc(sizeof(struct macro_definition), NULL); + n->d->next = NULL; + } + setup_definition(n->d, defn, name); +} + +void +macro_pushdef(const char *name, const char *defn) +{ + ndptr n; + struct macro_definition *d; + + n = create_entry(name); + d = xalloc(sizeof(struct macro_definition), NULL); + d->next = n->d; + n->d = d; + setup_definition(n->d, defn, name); +} + +void +macro_undefine(const char *name) +{ + ndptr n = lookup(name); + if (n != NULL) { + struct macro_definition *r, *r2; + + for (r = n->d; r != NULL; r = r2) { + r2 = r->next; + if (r->defn != null) + free(r->defn); + free(r); + } + n->d = NULL; + } +} + +void +macro_popdef(const char *name) +{ + ndptr n = lookup(name); + + if (n != NULL) { + struct macro_definition *r = n->d; + if (r != NULL) { + n->d = r->next; + if (r->defn != null) + free(r->defn); + free(r); + } + } +} + +void +macro_for_all(void (*f)(const char *, struct macro_definition *)) +{ + ndptr n; + unsigned int i; + + for (n = ohash_first(¯os, &i); n != NULL; + n = ohash_next(¯os, &i)) + if (n->d != NULL) + f(n->name, n->d); +} + +void +setup_builtin(const char *name, unsigned int type) +{ + ndptr n; + char *name2; + + if (prefix_builtins) { + name2 = xalloc(strlen(name)+3+1, NULL); + memcpy(name2, "m4_", 3); + memcpy(name2 + 3, name, strlen(name)+1); + } else + name2 = xstrdup(name); + + n = create_entry(name2); + n->builtin_type = type; + n->d = xalloc(sizeof(struct macro_definition), NULL); + n->d->defn = name2; + n->d->type = type; + n->d->next = NULL; +} + +void +mark_traced(const char *name, int on) +{ + ndptr p; + unsigned int i; + + if (name == NULL) { + if (on) + trace_flags |= TRACE_ALL; + else + trace_flags &= ~TRACE_ALL; + for (p = ohash_first(¯os, &i); p != NULL; + p = ohash_next(¯os, &i)) + p->trace_flags = FLAG_NO_TRACE; + } else { + p = create_entry(name); + p->trace_flags = on; + } +} + +ndptr +macro_getbuiltin(const char *name) +{ + ndptr p; + + p = lookup(name); + if (p == NULL || p->builtin_type == MACRTYPE) + return NULL; + else + return p; +} + +/* XXX things are slightly more complicated than they seem. + * a macro may actually be "live" (in the middle of an expansion + * on the stack. + * So we actually may need to place it in an array for later... + */ + +static int kept_capacity = 0; +static int kept_size = 0; +static char **kept = NULL; + +static void +keep(char *ptr) +{ + if (kept_capacity <= kept_size) { + if (kept_capacity) + kept_capacity *= 2; + else + kept_capacity = 50; + kept = xreallocarray(kept, kept_capacity, + sizeof(char *), "Out of memory while saving %d strings\n", + kept_capacity); + } + kept[kept_size++] = ptr; +} + +static int +string_in_use(const char *ptr) +{ + int i; + for (i = 0; i <= sp; i++) { + if (sstack[i] == STORAGE_MACRO && mstack[i].sstr == ptr) + return 1; + } + return 0; +} + + +static void +free_definition(char *ptr) +{ + int i; + + /* first try to free old strings */ + for (i = 0; i < kept_size; i++) { + if (!string_in_use(kept[i])) { + kept_size--; + free(kept[i]); + if (i != kept_size) + kept[i] = kept[kept_size]; + i--; + } + } + + /* then deal with us */ + if (string_in_use(ptr)) + keep(ptr); + else + free(ptr); +} + diff --git a/usr.bin/m4/m4.1 b/usr.bin/m4/m4.1 new file mode 100644 index 0000000..d389c15 --- /dev/null +++ b/usr.bin/m4/m4.1 @@ -0,0 +1,524 @@ +.\" @(#) $OpenBSD: m4.1,v 1.64 2017/06/15 13:48:42 bcallah Exp $ +.\" +.\" Copyright (c) 1989, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Ozan Yigit at York University. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd $Mdocdate: June 15 2017 $ +.Dt M4 1 +.Os +.Sh NAME +.Nm m4 +.Nd macro language processor +.Sh SYNOPSIS +.Nm +.Op Fl EgPs +.Oo +.Sm off +.Fl D Ar name Op No = Ar value +.Sm on +.Oc +.Op Fl d Ar flags +.Op Fl I Ar dirname +.Op Fl o Ar filename +.Op Fl t Ar macro +.Op Fl U Ns Ar name +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility is a macro processor that can be used as a front end to any +language (e.g., C, ratfor, fortran, lex, and yacc). +If no input files are given, +.Nm +reads from the standard input, +otherwise files specified on the command line are +processed in the given order. +Input files can be regular files, files in the m4 include paths, or a +single dash +.Pq Sq - , +denoting standard input. +.Nm +writes +the processed text to the standard output, unless told otherwise. +.Pp +Macro calls have the form name(argument1[, argument2, ..., argumentN]). +.Pp +There cannot be any space following the macro name and the open +parenthesis +.Pq Sq \&( . +If the macro name is not followed by an open +parenthesis it is processed with no arguments. +.Pp +Macro names consist of a leading alphabetic or underscore +possibly followed by alphanumeric or underscore characters, e.g., +valid macro names match the pattern +.Dq [a-zA-Z_][a-zA-Z0-9_]* . +.Pp +In arguments to macros, leading unquoted space, tab, and newline +.Pq Sq \en +characters are ignored. +To quote strings, use left and right single quotes +.Pq e.g., Sq \ \&this is a string with a leading space . +You can change the quote characters with the +.Ic changequote +built-in macro. +.Pp +Most built-ins don't make any sense without arguments, and hence are not +recognized as special when not followed by an open parenthesis. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl D Ns Ar name Ns Op = Ns Ar value +Define the symbol +.Ar name +to have some value (or +.Dv NULL ) . +.It Fl d Ar "flags" +Set trace flags. +.Ar flags +may hold the following: +.Bl -tag -width Ds +.It Ar a +print macro arguments. +.It Ar c +print macro expansion over several lines. +.It Ar e +print result of macro expansion. +.It Ar f +print filename location. +.It Ar l +print line number. +.It Ar q +quote arguments and expansion with the current quotes. +.It Ar t +start with all macros traced. +.It Ar x +number macro expansions. +.It Ar V +turn on all options. +.El +.Pp +By default, trace is set to +.Qq eq . +.It Fl E +Set warnings to be fatal. +When a single +.Fl E +flag is specified, if warnings are issued, execution continues but +.Nm +will exit with a non-zero exit status. +When multiple +.Fl E +flags are specified, execution will halt upon issuing the first warning and +.Nm +will exit with a non-zero exit status. +This behaviour matches GNU-m4 1.4.9 and later. +.It Fl g +Activate GNU-m4 compatibility mode. +In this mode, translit handles simple character +ranges (e.g., a-z), regular expressions mimic emacs behavior, +multiple m4wrap calls are handled as a stack, +the number of diversions is unlimited, +empty names for macro definitions are allowed, +and eval understands +.Sq 0rbase:value +numbers. +.It Fl I Ar "dirname" +Add directory +.Ar dirname +to the include path. +.It Fl o Ar filename +Send trace output to +.Ar filename . +.It Fl P +Prefix all built-in macros with +.Sq m4_ . +For example, instead of writing +.Ic define , +use +.Ic m4_define . +.It Fl s +Output line synchronization directives, suitable for +.Xr cpp 1 . +.It Fl t Ar macro +Turn tracing on for +.Ar macro . +.It Fl "U" Ns Ar "name" +Undefine the symbol +.Ar name . +.El +.Sh SYNTAX +.Nm +provides the following built-in macros. +They may be redefined, losing their original meaning. +Return values are null unless otherwise stated. +.Bl -tag -width changequote +.It Fn builtin name +Calls a built-in by its +.Fa name , +overriding possible redefinitions. +.It Fn changecom startcomment endcomment +Changes the start comment and end comment sequences. +Comment sequences may be up to five characters long. +The default values are the hash sign +and the newline character. +.Bd -literal -offset indent +# This is a comment +.Ed +.Pp +With no arguments, comments are turned off. +With one single argument, the end comment sequence is set +to the newline character. +.It Fn changequote beginquote endquote +Defines the open quote and close quote sequences. +Quote sequences may be up to five characters long. +The default values are the backquote character and the quote +character. +.Bd -literal -offset indent +`Here is a quoted string' +.Ed +.Pp +With no arguments, the default quotes are restored. +With one single argument, the close quote sequence is set +to the newline character. +.It Fn decr arg +Decrements the argument +.Fa arg +by 1. +The argument +.Fa arg +must be a valid numeric string. +.It Fn define name value +Define a new macro named by the first argument +.Fa name +to have the +value of the second argument +.Fa value . +Each occurrence of +.Sq $n +(where +.Ar n +is 0 through 9) is replaced by the +.Ar n Ns 'th +argument. +.Sq $0 +is the name of the calling macro. +Undefined arguments are replaced by a null string. +.Sq $# +is replaced by the number of arguments; +.Sq $* +is replaced by all arguments comma separated; +.Sq $@ +is the same as +.Sq $* +but all arguments are quoted against further expansion. +.It Fn defn name ... +Returns the quoted definition for each argument. +This can be used to rename +macro definitions (even for built-in macros). +.It Fn divert num +There are 10 output queues (numbered 0-9). +At the end of processing +.Nm +concatenates all the queues in numerical order to produce the +final output. +Initially the output queue is 0. +The divert +macro allows you to select a new output queue (an invalid argument +passed to divert causes output to be discarded). +.It Ic divnum +Returns the current output queue number. +.It Ic dnl +Discard input characters up to and including the next newline. +.It Fn dumpdef name ... +Prints the names and definitions for the named items, or for everything +if no arguments are passed. +.It Fn errprint msg +Prints the first argument on the standard error output stream. +.It Fn esyscmd cmd +Passes its first argument to a shell and returns the shell's standard output. +Note that the shell shares its standard input and standard error with +.Nm . +.It Fn eval expr[,radix[,minimum]] +Computes the first argument as an arithmetic expression using 32-bit +arithmetic. +Operators are the standard C ternary, arithmetic, logical, +shift, relational, bitwise, and parentheses operators. +You can specify +octal, decimal, and hexadecimal numbers as in C. +The optional second argument +.Fa radix +specifies the radix for the result and the optional third argument +.Fa minimum +specifies the minimum number of digits in the result. +.It Fn expr expr +This is an alias for +.Ic eval . +.It Fn format formatstring arg1 ... +Returns +.Fa formatstring +with escape sequences substituted with +.Fa arg1 +and following arguments, in a way similar to +.Xr printf 3 . +This built-in is only available in GNU-m4 compatibility mode, and the only +parameters implemented are there for autoconf compatibility: +left-padding flag, an optional field width, a maximum field width, +*-specified field widths, and the %s and %c data type. +.It Fn ifdef name yes no +If the macro named by the first argument is defined then return the second +argument, otherwise the third. +If there is no third argument, the value is +.Dv NULL . +The word +.Qq unix +is predefined. +.It Fn ifelse a b yes ... +If the first argument +.Fa a +matches the second argument +.Fa b +then +.Fn ifelse +returns +the third argument +.Fa yes . +If the match fails the three arguments are +discarded and the next three arguments are used until there is +zero or one arguments left, either this last argument or +.Dv NULL +is returned if no other matches were found. +.It Fn include name +Returns the contents of the file specified in the first argument. +If the file is not found as is, look through the include path: +first the directories specified with +.Fl I +on the command line, then the environment variable +.Ev M4PATH , +as a colon-separated list of directories. +Include aborts with an error message if the file cannot be included. +.It Fn incr arg +Increments the argument by 1. +The argument must be a valid numeric string. +.It Fn index string substring +Returns the index of the second argument in the first argument (e.g., +.Ic index(the quick brown fox jumped, fox) +returns 16). +If the second +argument is not found index returns \-1. +.It Fn indir macro arg1 ... +Indirectly calls the macro whose name is passed as the first argument, +with the remaining arguments passed as first, ... arguments. +.It Fn len arg +Returns the number of characters in the first argument. +Extra arguments +are ignored. +.It Fn m4exit code +Immediately exits with the return value specified by the first argument, +0 if none. +.It Fn m4wrap todo +Allows you to define what happens at the final +.Dv EOF , +usually for cleanup purposes (e.g., +.Ic m4wrap("cleanup(tempfile)") +causes the macro cleanup to be +invoked after all other processing is done). +.Pp +Multiple calls to +.Fn m4wrap +get inserted in sequence at the final +.Dv EOF . +.It Fn maketemp template +Like +.Ic mkstemp . +.It Fn mkstemp template +Invokes +.Xr mkstemp 3 +on the first argument, and returns the modified string. +This can be used to create unique +temporary file names. +.It Fn paste file +Includes the contents of the file specified by the first argument without +any macro processing. +Aborts with an error message if the file cannot be +included. +.It Fn patsubst string regexp replacement +Substitutes a regular expression in a string with a replacement string. +Usual substitution patterns apply: an ampersand +.Pq Sq \&& +is replaced by the string matching the regular expression. +The string +.Sq \e# , +where +.Sq # +is a digit, is replaced by the corresponding back-reference. +.It Fn popdef arg ... +Restores the +.Ic pushdef Ns ed +definition for each argument. +.It Fn pushdef macro def +Takes the same arguments as +.Ic define , +but it saves the definition on a +stack for later retrieval by +.Fn popdef . +.It Fn regexp string regexp replacement +Finds a regular expression in a string. +If no further arguments are given, +it returns the first match position or \-1 if no match. +If a third argument +is provided, it returns the replacement string, with sub-patterns replaced. +.It Fn shift arg1 ... +Returns all but the first argument, the remaining arguments are +quoted and pushed back with commas in between. +The quoting +nullifies the effect of the extra scan that will subsequently be +performed. +.It Fn sinclude file +Similar to +.Ic include , +except it ignores any errors. +.It Fn spaste file +Similar to +.Fn paste , +except it ignores any errors. +.It Fn substr string offset length +Returns a substring of the first argument starting at the offset specified +by the second argument and the length specified by the third argument. +If no third argument is present it returns the rest of the string. +.It Fn syscmd cmd +Passes the first argument to the shell. +Nothing is returned. +.It Ic sysval +Returns the return value from the last +.Ic syscmd . +.It Fn traceon arg ... +Enables tracing of macro expansions for the given arguments, or for all +macros if no argument is given. +.It Fn traceoff arg ... +Disables tracing of macro expansions for the given arguments, or for all +macros if no argument is given. +.It Fn translit string mapfrom mapto +Transliterate the characters in the first argument from the set +given by the second argument to the set given by the third. +You cannot use +.Xr tr 1 +style abbreviations. +.It Fn undefine name1 ... +Removes the definition for the macros specified by its arguments. +.It Fn undivert arg ... +Flushes the named output queues (or all queues if no arguments). +.It Ic unix +A pre-defined macro for testing the OS platform. +.It Ic __line__ +Returns the current file's line number. +.It Ic __file__ +Returns the current file's name. +.El +.Sh EXIT STATUS +.Ex -std m4 +.Pp +But note that the +.Ic m4exit +macro can modify the exit status, as can the +.Fl E +flag. +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl dEgIPot +and the macros +.Ic builtin , +.Ic esyscmd , +.Ic expr , +.Ic format , +.Ic indir , +.Ic paste , +.Ic patsubst , +.Ic regexp , +.Ic spaste , +.Ic unix , +.Ic __line__ , +and +.Ic __file__ +are extensions to that specification. +.Pp +.Ic maketemp +is not supposed to be a synonym for +.Ic mkstemp , +but instead to be an insecure temporary file name creation function. +It is marked by +.St -p1003.1-2008 +as being obsolescent and should not be used if portability is a concern. +.Pp +The output format of +.Ic traceon +and +.Ic dumpdef +are not specified in any standard, +are likely to change and should not be relied upon. +The current format of tracing is closely modelled on +.Nm gnu-m4 , +to allow +.Nm autoconf +to work. +.Pp +The built-ins +.Ic pushdef +and +.Ic popdef +handle macro definitions as a stack. +However, +.Ic define +interacts with the stack in an undefined way. +In this implementation, +.Ic define +replaces the top-most definition only. +Other implementations may erase all definitions on the stack instead. +.Pp +All built-ins do expand without arguments in many other +.Nm . +.Pp +Many other +.Nm +have dire size limitations with respect to buffer sizes. +.Sh AUTHORS +.An -nosplit +.An Ozan Yigit Aq Mt oz@sis.yorku.ca +and +.An Richard A. O'Keefe Aq Mt ok@goanna.cs.rmit.OZ.AU . +.Pp +GNU-m4 compatibility extensions by +.An Marc Espie Aq Mt espie@cvs.openbsd.org . diff --git a/usr.bin/m4/main.c b/usr.bin/m4/main.c new file mode 100644 index 0000000..4e664c0 --- /dev/null +++ b/usr.bin/m4/main.c @@ -0,0 +1,642 @@ +/* $OpenBSD: main.c,v 1.87 2017/06/15 13:48:42 bcallah Exp $ */ +/* $NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $ */ + +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * main.c + * Facility: m4 macro processor + * by: oz + */ + +#include <sys/cdefs.h> +#include <assert.h> +#include <signal.h> +#include <err.h> +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <ohash.h> +#include "mdef.h" +#include "stdd.h" +#include "extern.h" +#include "pathnames.h" + +stae *mstack; /* stack of m4 machine */ +char *sstack; /* shadow stack, for string space extension */ +static size_t STACKMAX; /* current maximum size of stack */ +int sp; /* current m4 stack pointer */ +int fp; /* m4 call frame pointer */ +struct input_file infile[MAXINP];/* input file stack (0=stdin) */ +FILE **outfile; /* diversion array(0=bitbucket)*/ +int maxout; +FILE *active; /* active output file pointer */ +int ilevel = 0; /* input file stack pointer */ +int oindex = 0; /* diversion index.. */ +char *null = ""; /* as it says.. just a null.. */ +char **m4wraps = NULL; /* m4wraps array. */ +int maxwraps = 0; /* size of m4wraps array */ +int wrapindex = 0; /* current offset in m4wraps */ +char lquote[MAXCCHARS+1] = {LQUOTE}; /* left quote character (`) */ +char rquote[MAXCCHARS+1] = {RQUOTE}; /* right quote character (') */ +char scommt[MAXCCHARS+1] = {SCOMMT}; /* start character for comment */ +char ecommt[MAXCCHARS+1] = {ECOMMT}; /* end character for comment */ +int synch_lines = 0; /* line synchronisation for C preprocessor */ +int prefix_builtins = 0; /* -P option to prefix builtin keywords */ +int error_warns = 0; /* -E option to make warnings exit_code = 1 */ +int fatal_warns = 0; /* -E -E option to make warnings fatal */ + +struct keyblk { + char *knam; /* keyword name */ + int ktyp; /* keyword type */ +}; + +struct keyblk keywrds[] = { /* m4 keywords to be installed */ + { "include", INCLTYPE }, + { "sinclude", SINCTYPE }, + { "define", DEFITYPE }, + { "defn", DEFNTYPE }, + { "divert", DIVRTYPE | NOARGS }, + { "expr", EXPRTYPE }, + { "eval", EXPRTYPE }, + { "substr", SUBSTYPE }, + { "ifelse", IFELTYPE }, + { "ifdef", IFDFTYPE }, + { "len", LENGTYPE }, + { "incr", INCRTYPE }, + { "decr", DECRTYPE }, + { "dnl", DNLNTYPE | NOARGS }, + { "changequote", CHNQTYPE | NOARGS }, + { "changecom", CHNCTYPE | NOARGS }, + { "index", INDXTYPE }, +#ifdef EXTENDED + { "paste", PASTTYPE }, + { "spaste", SPASTYPE }, + /* Newer extensions, needed to handle gnu-m4 scripts */ + { "indir", INDIRTYPE}, + { "builtin", BUILTINTYPE}, + { "patsubst", PATSTYPE}, + { "regexp", REGEXPTYPE}, + { "esyscmd", ESYSCMDTYPE}, + { "__file__", FILENAMETYPE | NOARGS}, + { "__line__", LINETYPE | NOARGS}, +#endif + { "popdef", POPDTYPE }, + { "pushdef", PUSDTYPE }, + { "dumpdef", DUMPTYPE | NOARGS }, + { "shift", SHIFTYPE | NOARGS }, + { "translit", TRNLTYPE }, + { "undefine", UNDFTYPE }, + { "undivert", UNDVTYPE | NOARGS }, + { "divnum", DIVNTYPE | NOARGS }, + { "maketemp", MKTMTYPE }, + { "mkstemp", MKTMTYPE }, + { "errprint", ERRPTYPE | NOARGS }, + { "m4wrap", M4WRTYPE | NOARGS }, + { "m4exit", EXITTYPE | NOARGS }, + { "syscmd", SYSCTYPE }, + { "sysval", SYSVTYPE | NOARGS }, + { "traceon", TRACEONTYPE | NOARGS }, + { "traceoff", TRACEOFFTYPE | NOARGS }, + + { "unix", SELFTYPE | NOARGS }, +}; + +#define MAXKEYS (sizeof(keywrds)/sizeof(struct keyblk)) + +extern int optind; +extern char *optarg; + +#define MAXRECORD 50 +static struct position { + char *name; + unsigned long line; +} quotes[MAXRECORD], paren[MAXRECORD]; + +static void record(struct position *, int); +static void dump_stack(struct position *, int); + +static void macro(void); +static void initkwds(void); +static ndptr inspect(int, char *); +static int do_look_ahead(int, const char *); +static void reallyoutputstr(const char *); +static void reallyputchar(int); + +static void enlarge_stack(void); + +int main(int, char *[]); + +int exit_code = 0; + +int +main(int argc, char *argv[]) +{ + int c; + int n; + char *p; + + if (pledge("stdio rpath wpath cpath tmppath proc exec", NULL) == -1) + err(1, "pledge"); + + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, onintr); + + init_macros(); + initspaces(); + STACKMAX = INITSTACKMAX; + + mstack = xreallocarray(NULL, STACKMAX, sizeof(stae), NULL); + sstack = xalloc(STACKMAX, NULL); + + maxout = 0; + outfile = NULL; + resizedivs(MAXOUT); + + while ((c = getopt(argc, argv, "gst:d:D:EU:o:I:P")) != -1) + switch(c) { + + case 'D': /* define something..*/ + for (p = optarg; *p; p++) + if (*p == '=') + break; + if (*p) + *p++ = EOS; + dodefine(optarg, p); + break; + case 'E': /* like GNU m4 1.4.9+ */ + if (error_warns == 0) + error_warns = 1; + else + fatal_warns = 1; + break; + case 'I': + addtoincludepath(optarg); + break; + case 'P': + prefix_builtins = 1; + break; + case 'U': /* undefine... */ + macro_popdef(optarg); + break; + case 'g': + mimic_gnu = 1; + break; + case 'd': + set_trace_flags(optarg); + break; + case 's': + synch_lines = 1; + break; + case 't': + mark_traced(optarg, 1); + break; + case 'o': + trace_file(optarg); + break; + case '?': + usage(); + } + + argc -= optind; + argv += optind; + + initkwds(); + if (mimic_gnu) + setup_builtin("format", FORMATTYPE); + + active = stdout; /* default active output */ + bbase[0] = bufbase; + if (!argc) { + sp = -1; /* stack pointer initialized */ + fp = 0; /* frame pointer initialized */ + set_input(infile+0, stdin, "stdin"); + /* default input (naturally) */ + macro(); + } else + for (; argc--; ++argv) { + p = *argv; + if (p[0] == '-' && p[1] == EOS) + set_input(infile, stdin, "stdin"); + else if (fopen_trypath(infile, p) == NULL) + err(1, "%s", p); + sp = -1; + fp = 0; + macro(); + release_input(infile); + } + + if (wrapindex) { + int i; + + ilevel = 0; /* in case m4wrap includes.. */ + bufbase = bp = buf; /* use the entire buffer */ + if (mimic_gnu) { + while (wrapindex != 0) { + for (i = 0; i < wrapindex; i++) + pbstr(m4wraps[i]); + wrapindex =0; + macro(); + } + } else { + for (i = 0; i < wrapindex; i++) { + pbstr(m4wraps[i]); + macro(); + } + } + } + + if (active != stdout) + active = stdout; /* reset output just in case */ + for (n = 1; n < maxout; n++) /* default wrap-up: undivert */ + if (outfile[n] != NULL) + getdiv(n); + /* remove bitbucket if used */ + if (outfile[0] != NULL) { + (void) fclose(outfile[0]); + } + + return exit_code; +} + +/* + * Look ahead for `token'. + * (on input `t == token[0]') + * Used for comment and quoting delimiters. + * Returns 1 if `token' present; copied to output. + * 0 if `token' not found; all characters pushed back + */ +static int +do_look_ahead(int t, const char *token) +{ + int i; + + assert((unsigned char)t == (unsigned char)token[0]); + + for (i = 1; *++token; i++) { + t = gpbc(); + if (t == EOF || (unsigned char)t != (unsigned char)*token) { + pushback(t); + while (--i) + pushback(*--token); + return 0; + } + } + return 1; +} + +#define LOOK_AHEAD(t, token) (t != EOF && \ + (unsigned char)(t)==(unsigned char)(token)[0] && \ + do_look_ahead(t,token)) + +/* + * macro - the work horse.. + */ +static void +macro(void) +{ + char token[MAXTOK+1]; + int t, l; + ndptr p; + int nlpar; + + cycle { + t = gpbc(); + + if (LOOK_AHEAD(t,lquote)) { /* strip quotes */ + nlpar = 0; + record(quotes, nlpar++); + /* + * Opening quote: scan forward until matching + * closing quote has been found. + */ + do { + + l = gpbc(); + if (LOOK_AHEAD(l,rquote)) { + if (--nlpar > 0) + outputstr(rquote); + } else if (LOOK_AHEAD(l,lquote)) { + record(quotes, nlpar++); + outputstr(lquote); + } else if (l == EOF) { + if (nlpar == 1) + warnx("unclosed quote:"); + else + warnx("%d unclosed quotes:", nlpar); + dump_stack(quotes, nlpar); + exit(1); + } else { + if (nlpar > 0) { + if (sp < 0) + reallyputchar(l); + else + CHRSAVE(l); + } + } + } + while (nlpar != 0); + } else if (sp < 0 && LOOK_AHEAD(t, scommt)) { + reallyoutputstr(scommt); + + for(;;) { + t = gpbc(); + if (LOOK_AHEAD(t, ecommt)) { + reallyoutputstr(ecommt); + break; + } + if (t == EOF) + break; + reallyputchar(t); + } + } else if (t == '_' || isalpha(t)) { + p = inspect(t, token); + if (p != NULL) + pushback(l = gpbc()); + if (p == NULL || (l != LPAREN && + (macro_getdef(p)->type & NEEDARGS) != 0)) + outputstr(token); + else { + /* + * real thing.. First build a call frame: + */ + pushf(fp); /* previous call frm */ + pushf(macro_getdef(p)->type); /* type of the call */ + pushf(is_traced(p)); + pushf(0); /* parenthesis level */ + fp = sp; /* new frame pointer */ + /* + * now push the string arguments: + */ + pushdef(p); /* defn string */ + pushs1((char *)macro_name(p)); /* macro name */ + pushs(ep); /* start next..*/ + + if (l != LPAREN && PARLEV == 0) { + /* no bracks */ + chrsave(EOS); + + if (sp == STACKMAX) + errx(1, "internal stack overflow"); + eval((const char **) mstack+fp+1, 2, + CALTYP, TRACESTATUS); + + ep = PREVEP; /* flush strspace */ + sp = PREVSP; /* previous sp.. */ + fp = PREVFP; /* rewind stack...*/ + } + } + } else if (t == EOF) { + if (!mimic_gnu /* you can puke right there */ + && sp > -1 && ilevel <= 0) { + warnx( "unexpected end of input, unclosed parenthesis:"); + dump_stack(paren, PARLEV); + exit(1); + } + if (ilevel <= 0) + break; /* all done thanks.. */ + release_input(infile+ilevel--); + emit_synchline(); + bufbase = bbase[ilevel]; + continue; + } else if (sp < 0) { /* not in a macro at all */ + reallyputchar(t); /* output directly.. */ + } + + else switch(t) { + + case LPAREN: + if (PARLEV > 0) + chrsave(t); + while (isspace(l = gpbc())) /* skip blank, tab, nl.. */ + if (PARLEV > 0) + chrsave(l); + pushback(l); + record(paren, PARLEV++); + break; + + case RPAREN: + if (--PARLEV > 0) + chrsave(t); + else { /* end of argument list */ + chrsave(EOS); + + if (sp == STACKMAX) + errx(1, "internal stack overflow"); + + eval((const char **) mstack+fp+1, sp-fp, + CALTYP, TRACESTATUS); + + ep = PREVEP; /* flush strspace */ + sp = PREVSP; /* previous sp.. */ + fp = PREVFP; /* rewind stack...*/ + } + break; + + case COMMA: + if (PARLEV == 1) { + chrsave(EOS); /* new argument */ + while (isspace(l = gpbc())) + ; + pushback(l); + pushs(ep); + } else + chrsave(t); + break; + + default: + if (LOOK_AHEAD(t, scommt)) { + char *p; + for (p = scommt; *p; p++) + chrsave(*p); + for(;;) { + t = gpbc(); + if (LOOK_AHEAD(t, ecommt)) { + for (p = ecommt; *p; p++) + chrsave(*p); + break; + } + if (t == EOF) + break; + CHRSAVE(t); + } + } else + CHRSAVE(t); /* stack the char */ + break; + } + } +} + +/* + * output string directly, without pushing it for reparses. + */ +void +outputstr(const char *s) +{ + if (sp < 0) + reallyoutputstr(s); + else + while (*s) + CHRSAVE(*s++); +} + +void +reallyoutputstr(const char *s) +{ + if (synch_lines) { + while (*s) { + fputc(*s, active); + if (*s++ == '\n') { + infile[ilevel].synch_lineno++; + if (infile[ilevel].synch_lineno != + infile[ilevel].lineno) + do_emit_synchline(); + } + } + } else + fputs(s, active); +} + +void +reallyputchar(int c) +{ + putc(c, active); + if (synch_lines && c == '\n') { + infile[ilevel].synch_lineno++; + if (infile[ilevel].synch_lineno != infile[ilevel].lineno) + do_emit_synchline(); + } +} + +/* + * build an input token.. + * consider only those starting with _ or A-Za-z. + */ +static ndptr +inspect(int c, char *tp) +{ + char *name = tp; + char *etp = tp+MAXTOK; + ndptr p; + + *tp++ = c; + + while ((isalnum(c = gpbc()) || c == '_') && tp < etp) + *tp++ = c; + if (c != EOF) + PUSHBACK(c); + *tp = EOS; + /* token is too long, it won't match anything, but it can still + * be output. */ + if (tp == ep) { + outputstr(name); + while (isalnum(c = gpbc()) || c == '_') { + if (sp < 0) + reallyputchar(c); + else + CHRSAVE(c); + } + *name = EOS; + return NULL; + } + + p = ohash_find(¯os, ohash_qlookupi(¯os, name, (const char **)&tp)); + if (p == NULL) + return NULL; + if (macro_getdef(p) == NULL) + return NULL; + return p; +} + +/* + * initkwds - initialise m4 keywords as fast as possible. + * This very similar to install, but without certain overheads, + * such as calling lookup. Malloc is not used for storing the + * keyword strings, since we simply use the static pointers + * within keywrds block. + */ +static void +initkwds(void) +{ + unsigned int type; + int i; + + for (i = 0; i < MAXKEYS; i++) { + type = keywrds[i].ktyp & TYPEMASK; + if ((keywrds[i].ktyp & NOARGS) == 0) + type |= NEEDARGS; + setup_builtin(keywrds[i].knam, type); + } +} + +static void +record(struct position *t, int lev) +{ + if (lev < MAXRECORD) { + t[lev].name = CURRENT_NAME; + t[lev].line = CURRENT_LINE; + } +} + +static void +dump_stack(struct position *t, int lev) +{ + int i; + + for (i = 0; i < lev; i++) { + if (i == MAXRECORD) { + fprintf(stderr, " ...\n"); + break; + } + fprintf(stderr, " %s at line %lu\n", + t[i].name, t[i].line); + } +} + + +static void +enlarge_stack(void) +{ + STACKMAX += STACKMAX/2; + mstack = xreallocarray(mstack, STACKMAX, sizeof(stae), + "Evaluation stack overflow (%lu)", + (unsigned long)STACKMAX); + sstack = xrealloc(sstack, STACKMAX, + "Evaluation stack overflow (%lu)", + (unsigned long)STACKMAX); +} diff --git a/usr.bin/m4/mdef.h b/usr.bin/m4/mdef.h new file mode 100644 index 0000000..f0d9d39 --- /dev/null +++ b/usr.bin/m4/mdef.h @@ -0,0 +1,237 @@ +/* $OpenBSD: mdef.h,v 1.33 2015/11/03 16:21:47 deraadt Exp $ */ +/* $NetBSD: mdef.h,v 1.7 1996/01/13 23:25:27 pk Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mdef.h 8.1 (Berkeley) 6/6/93 + */ + +#ifdef __GNUC__ +# define UNUSED __attribute__((__unused__)) +#else +# define UNUSED +#endif + +#define MACRTYPE 1 +#define DEFITYPE 2 +#define EXPRTYPE 3 +#define SUBSTYPE 4 +#define IFELTYPE 5 +#define LENGTYPE 6 +#define CHNQTYPE 7 +#define SYSCTYPE 8 +#define UNDFTYPE 9 +#define INCLTYPE 10 +#define SINCTYPE 11 +#define PASTTYPE 12 +#define SPASTYPE 13 +#define INCRTYPE 14 +#define IFDFTYPE 15 +#define PUSDTYPE 16 +#define POPDTYPE 17 +#define SHIFTYPE 18 +#define DECRTYPE 19 +#define DIVRTYPE 20 +#define UNDVTYPE 21 +#define DIVNTYPE 22 +#define MKTMTYPE 23 +#define ERRPTYPE 24 +#define M4WRTYPE 25 +#define TRNLTYPE 26 +#define DNLNTYPE 27 +#define DUMPTYPE 28 +#define CHNCTYPE 29 +#define INDXTYPE 30 +#define SYSVTYPE 31 +#define EXITTYPE 32 +#define DEFNTYPE 33 +#define SELFTYPE 34 +#define INDIRTYPE 35 +#define BUILTINTYPE 36 +#define PATSTYPE 37 +#define FILENAMETYPE 38 +#define LINETYPE 39 +#define REGEXPTYPE 40 +#define ESYSCMDTYPE 41 +#define TRACEONTYPE 42 +#define TRACEOFFTYPE 43 +#define FORMATTYPE 44 + +#define BUILTIN_MARKER "__builtin_" + +#define TYPEMASK 63 /* Keep bits really corresponding to a type. */ +#define RECDEF 256 /* Pure recursive def, don't expand it */ +#define NOARGS 512 /* builtin needs no args */ +#define NEEDARGS 1024 /* mark builtin that need args with this */ + +/* + * m4 special characters + */ + +#define ARGFLAG '$' +#define LPAREN '(' +#define RPAREN ')' +#define LQUOTE '`' +#define RQUOTE '\'' +#define COMMA ',' +#define SCOMMT '#' +#define ECOMMT '\n' + +/* + * other important constants + */ + +#define EOS '\0' +#define MAXINP 10 /* maximum include files */ +#define MAXOUT 10 /* maximum # of diversions */ +#define BUFSIZE 4096 /* starting size of pushback buffer */ +#define INITSTACKMAX 4096 /* starting size of call stack */ +#define STRSPMAX 4096 /* starting size of string space */ +#define MAXTOK 512 /* maximum chars in a tokn */ +#define MAXCCHARS 5 /* max size of comment/quote delim */ + +#define ALL 1 +#define TOP 0 + +#define TRUE 1 +#define FALSE 0 +#define cycle for(;;) + +/* + * m4 data structures + */ + +typedef struct ndblock *ndptr; + +struct macro_definition { + struct macro_definition *next; + char *defn; /* definition.. */ + unsigned int type; /* type of the entry.. */ +}; + + +struct ndblock { /* hashtable structure */ + unsigned int builtin_type; + unsigned int trace_flags; + struct macro_definition *d; + char name[1]; /* entry name.. */ +}; + +typedef union { /* stack structure */ + int sfra; /* frame entry */ + char *sstr; /* string entry */ +} stae; + +struct input_file { + FILE *file; + char *name; + unsigned long lineno; + unsigned long synch_lineno; /* used for -s */ + int c; +}; + +#define STORAGE_STRSPACE 0 +#define STORAGE_MACRO 1 +#define STORAGE_OTHER 2 + +#define CURRENT_NAME (infile[ilevel].name) +#define CURRENT_LINE (infile[ilevel].lineno) +/* + * macros for readibility and/or speed + * + * gpbc() - get a possibly pushed-back character + * pushf() - push a call frame entry onto stack + * pushs() - push a string pointer onto stack + */ +#define gpbc() (bp > bufbase) ? *--bp : obtain_char(infile+ilevel) +#define pushf(x) \ + do { \ + if (++sp == STACKMAX) \ + enlarge_stack();\ + mstack[sp].sfra = (x); \ + sstack[sp] = STORAGE_OTHER; \ + } while (0) + +#define pushs(x) \ + do { \ + if (++sp == STACKMAX) \ + enlarge_stack();\ + mstack[sp].sstr = (x); \ + sstack[sp] = STORAGE_STRSPACE; \ + } while (0) + +#define pushs1(x) \ + do { \ + if (++sp == STACKMAX) \ + enlarge_stack();\ + mstack[sp].sstr = (x); \ + sstack[sp] = STORAGE_OTHER; \ + } while (0) + +#define pushdef(p) \ + do { \ + if (++sp == STACKMAX) \ + enlarge_stack();\ + mstack[sp].sstr = macro_getdef(p)->defn;\ + sstack[sp] = STORAGE_MACRO; \ + } while (0) + + +/* + * . . + * | . | <-- sp | . | + * +-------+ +-----+ + * | arg 3 ----------------------->| str | + * +-------+ | . | + * | arg 2 ---PREVEP-----+ . + * +-------+ | + * . | | | + * +-------+ | +-----+ + * | plev | PARLEV +-------->| str | + * +-------+ | . | + * | type | CALTYP . + * +-------+ + * | prcf ---PREVFP--+ + * +-------+ | + * | . | PREVSP | + * . | + * +-------+ | + * | <----------+ + * +-------+ + * + */ +#define PARLEV (mstack[fp].sfra) +#define CALTYP (mstack[fp-2].sfra) +#define TRACESTATUS (mstack[fp-1].sfra) +#define PREVEP (mstack[fp+3].sstr) +#define PREVSP (fp-4) +#define PREVFP (mstack[fp-3].sfra) diff --git a/usr.bin/m4/misc.c b/usr.bin/m4/misc.c new file mode 100644 index 0000000..0b4f80b --- /dev/null +++ b/usr.bin/m4/misc.c @@ -0,0 +1,467 @@ +/* $OpenBSD: misc.c,v 1.47 2017/06/15 13:48:42 bcallah Exp $ */ +/* $NetBSD: misc.c,v 1.6 1995/09/28 05:37:41 tls Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <unistd.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <err.h> +#include "mdef.h" +#include "stdd.h" +#include "extern.h" +#include "pathnames.h" + + +char *ep; /* first free char in strspace */ +static char *strspace; /* string space for evaluation */ +char *endest; /* end of string space */ +static size_t strsize = STRSPMAX; +static size_t bufsize = BUFSIZE; + +unsigned char *buf; /* push-back buffer */ +unsigned char *bufbase; /* the base for current ilevel */ +unsigned char *bbase[MAXINP]; /* the base for each ilevel */ +unsigned char *bp; /* first available character */ +unsigned char *endpbb; /* end of push-back buffer */ + + +/* + * find the index of second str in the first str. + */ +ptrdiff_t +indx(const char *s1, const char *s2) +{ + char *t; + + t = strstr(s1, s2); + if (t == NULL) + return (-1); + else + return (t - s1); +} +/* + * pushback - push character back onto input + */ +void +pushback(int c) +{ + if (c == EOF) + return; + if (bp >= endpbb) + enlarge_bufspace(); + *bp++ = c; +} + +/* + * pbstr - push string back onto input + * pushback is replicated to improve + * performance. + */ +void +pbstr(const char *s) +{ + size_t n; + + n = strlen(s); + while (endpbb - bp <= n) + enlarge_bufspace(); + while (n > 0) + *bp++ = s[--n]; +} + +/* + * pbnum - convert number to string, push back on input. + */ +void +pbnum(int n) +{ + pbnumbase(n, 10, 0); +} + +void +pbnumbase(int n, int base, int d) +{ + static char digits[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; + int num; + int printed = 0; + + if (base > 36) + m4errx(1, "base %d > 36: not supported.", base); + + if (base < 2) + m4errx(1, "bad base %d for conversion.", base); + + num = (n < 0) ? -n : n; + do { + pushback(digits[num % base]); + printed++; + } + while ((num /= base) > 0); + + if (n < 0) + printed++; + while (printed++ < d) + pushback('0'); + + if (n < 0) + pushback('-'); +} + +/* + * pbunsigned - convert unsigned long to string, push back on input. + */ +void +pbunsigned(unsigned long n) +{ + do { + pushback(n % 10 + '0'); + } + while ((n /= 10) > 0); +} + +void +initspaces() +{ + int i; + + strspace = xalloc(strsize+1, NULL); + ep = strspace; + endest = strspace+strsize; + buf = xalloc(bufsize, NULL); + bufbase = buf; + bp = buf; + endpbb = buf + bufsize; + for (i = 0; i < MAXINP; i++) + bbase[i] = buf; +} + +void +enlarge_strspace() +{ + char *newstrspace; + int i; + + strsize *= 2; + newstrspace = malloc(strsize + 1); + if (!newstrspace) + errx(1, "string space overflow"); + memcpy(newstrspace, strspace, strsize/2); + for (i = 0; i <= sp; i++) + if (sstack[i] == STORAGE_STRSPACE) + mstack[i].sstr = (mstack[i].sstr - strspace) + + newstrspace; + ep = (ep-strspace) + newstrspace; + free(strspace); + strspace = newstrspace; + endest = strspace + strsize; +} + +void +enlarge_bufspace() +{ + unsigned char *newbuf; + int i; + + bufsize += bufsize/2; + newbuf = xrealloc(buf, bufsize, "too many characters pushed back"); + for (i = 0; i < MAXINP; i++) + bbase[i] = (bbase[i]-buf)+newbuf; + bp = (bp-buf)+newbuf; + bufbase = (bufbase-buf)+newbuf; + buf = newbuf; + endpbb = buf+bufsize; +} + +/* + * chrsave - put single char on string space + */ +void +chrsave(int c) +{ + if (ep >= endest) + enlarge_strspace(); + *ep++ = c; +} + +/* + * read in a diversion file, and dispose it. + */ +void +getdiv(int n) +{ + int c; + + if (active == outfile[n]) + m4errx(1, "undivert: diversion still active."); + rewind(outfile[n]); + while ((c = getc(outfile[n])) != EOF) + putc(c, active); + (void) fclose(outfile[n]); + outfile[n] = NULL; +} + +void +onintr(int signo) +{ +#define intrmessage "m4: interrupted.\n" + write(STDERR_FILENO, intrmessage, sizeof(intrmessage)-1); + _exit(1); +} + +/* + * killdiv - get rid of the diversion files + */ +void +killdiv() +{ + int n; + + for (n = 0; n < maxout; n++) + if (outfile[n] != NULL) { + (void) fclose(outfile[n]); + } +} + +extern char *__progname; + +void +m4errx(int eval, const char *fmt, ...) +{ + fprintf(stderr, "%s: ", __progname); + fprintf(stderr, "%s at line %lu: ", CURRENT_NAME, CURRENT_LINE); + if (fmt != NULL) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } + fprintf(stderr, "\n"); + exit(eval); +} + +/* + * resizedivs: allocate more diversion files */ +void +resizedivs(int n) +{ + int i; + + outfile = xreallocarray(outfile, n, sizeof(FILE *), + "too many diverts %d", n); + for (i = maxout; i < n; i++) + outfile[i] = NULL; + maxout = n; +} + +void * +xalloc(size_t n, const char *fmt, ...) +{ + void *p = malloc(n); + + if (p == NULL) { + if (fmt == NULL) + err(1, "malloc"); + else { + va_list va; + + va_start(va, fmt); + verr(1, fmt, va); + va_end(va); + } + } + return p; +} + +void * +xcalloc(size_t n, size_t s, const char *fmt, ...) +{ + void *p = calloc(n, s); + + if (p == NULL) { + if (fmt == NULL) + err(1, "calloc"); + else { + va_list va; + + va_start(va, fmt); + verr(1, fmt, va); + va_end(va); + } + } + return p; +} + +void * +xrealloc(void *old, size_t n, const char *fmt, ...) +{ + char *p = realloc(old, n); + + if (p == NULL) { + free(old); + if (fmt == NULL) + err(1, "realloc"); + else { + va_list va; + + va_start(va, fmt); + verr(1, fmt, va); + va_end(va); + } + } + return p; +} + +void * +xreallocarray(void *old, size_t s1, size_t s2, const char *fmt, ...) +{ + void *p = reallocarray(old, s1, s2); + + if (p == NULL) { + free(old); + if (fmt == NULL) + err(1, "reallocarray"); + else { + va_list va; + + va_start(va, fmt); + verr(1, fmt, va); + va_end(va); + } + } + return p; +} + +char * +xstrdup(const char *s) +{ + char *p = strdup(s); + if (p == NULL) + err(1, "strdup"); + return p; +} + +void +usage(void) +{ + fprintf(stderr, "usage: m4 [-EgPs] [-Dname[=value]] [-d flags] " + "[-I dirname] [-o filename]\n" + "\t[-t macro] [-Uname] [file ...]\n"); + exit(1); +} + +int +obtain_char(struct input_file *f) +{ + if (f->c == EOF) + return EOF; + + f->c = fgetc(f->file); + if (f->c == '\n') + f->lineno++; + + return f->c; +} + +void +set_input(struct input_file *f, FILE *real, const char *name) +{ + f->file = real; + f->lineno = 1; + f->c = 0; + f->name = xstrdup(name); + emit_synchline(); +} + +void +do_emit_synchline() +{ + fprintf(active, "#line %lu \"%s\"\n", + infile[ilevel].lineno, infile[ilevel].name); + infile[ilevel].synch_lineno = infile[ilevel].lineno; +} + +void +release_input(struct input_file *f) +{ + if (ferror(f->file)) + errx(1, "Fatal error reading from %s\n", f->name); + if (f->file != stdin) + fclose(f->file); + f->c = EOF; + /* + * XXX can't free filename, as there might still be + * error information pointing to it. + */ +} + +void +doprintlineno(struct input_file *f) +{ + pbunsigned(f->lineno); +} + +void +doprintfilename(struct input_file *f) +{ + pbstr(rquote); + pbstr(f->name); + pbstr(lquote); +} + +/* + * buffer_mark/dump_buffer: allows one to save a mark in a buffer, + * and later dump everything that was added since then to a file. + */ +size_t +buffer_mark() +{ + return bp - buf; +} + + +void +dump_buffer(FILE *f, size_t m) +{ + unsigned char *s; + + for (s = bp; s-buf > m;) + fputc(*--s, f); +} diff --git a/usr.bin/m4/parser.c b/usr.bin/m4/parser.c new file mode 100644 index 0000000..85e191c --- /dev/null +++ b/usr.bin/m4/parser.c @@ -0,0 +1,756 @@ +/* original parser id follows */ +/* yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93" */ +/* (use YYMAJOR/YYMINOR for ifdefs dependent on parser version) */ + +#define YYBYACC 1 +#define YYMAJOR 2 +#define YYMINOR 0 +#define YYPATCH 20200910 + +#define YYEMPTY (-1) +#define yyclearin (yychar = YYEMPTY) +#define yyerrok (yyerrflag = 0) +#define YYRECOVERING() (yyerrflag != 0) +#define YYENOMEM (-2) +#define YYEOF 0 +#define YYPREFIX "yy" + +#define YYPURE 0 + +#line 2 "usr.bin/m4/parser.y" +/* $OpenBSD: parser.y,v 1.7 2012/04/12 17:00:11 espie Exp $ */ +/* + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <math.h> +#include <stdint.h> +#include <stdlib.h> +#define YYSTYPE int32_t +extern int32_t end_result; +extern int yylex(void); +extern int yyerror(const char *); +#line 45 "usr.bin/m4/parser.c" + +#if ! defined(YYSTYPE) && ! defined(YYSTYPE_IS_DECLARED) +/* Default: YYSTYPE is the semantic value type. */ +typedef int YYSTYPE; +# define YYSTYPE_IS_DECLARED 1 +#endif + +/* compatibility with bison */ +#ifdef YYPARSE_PARAM +/* compatibility with FreeBSD */ +# ifdef YYPARSE_PARAM_TYPE +# define YYPARSE_DECL() yyparse(YYPARSE_PARAM_TYPE YYPARSE_PARAM) +# else +# define YYPARSE_DECL() yyparse(void *YYPARSE_PARAM) +# endif +#else +# define YYPARSE_DECL() yyparse(void) +#endif + +/* Parameters sent to lex. */ +#ifdef YYLEX_PARAM +# define YYLEX_DECL() yylex(void *YYLEX_PARAM) +# define YYLEX yylex(YYLEX_PARAM) +#else +# define YYLEX_DECL() yylex(void) +# define YYLEX yylex() +#endif + +#if !(defined(yylex) || defined(YYSTATE)) +int YYLEX_DECL(); +#endif + +/* Parameters sent to yyerror. */ +#ifndef YYERROR_DECL +#define YYERROR_DECL() yyerror(const char *s) +#endif +#ifndef YYERROR_CALL +#define YYERROR_CALL(msg) yyerror(msg) +#endif + +extern int YYPARSE_DECL(); + +#define NUMBER 257 +#define ERROR 258 +#define LOR 259 +#define LAND 260 +#define EQ 261 +#define NE 262 +#define LE 263 +#define GE 264 +#define LSHIFT 265 +#define RSHIFT 266 +#define EXPONENT 267 +#define UMINUS 268 +#define UPLUS 269 +#define YYERRCODE 256 +typedef short YYINT; +static const YYINT yylhs[] = { -1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +}; +static const YYINT yylen[] = { 2, + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 2, 2, 2, 2, 1, +}; +static const YYINT yydefred[] = { 0, + 26, 0, 0, 0, 0, 0, 0, 0, 23, 22, + 24, 25, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, +}; +static const YYINT yydgoto[] = { 7, + 8, +}; +static const YYINT yysindex[] = { 95, + 0, 95, 95, 95, 95, 95, 0, 397, 0, 0, + 0, 0, 383, 95, 95, 95, 95, 95, 95, 95, + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, + 95, 95, 0, 428, 471, 482, 185, 437, 493, 493, + -10, -10, -10, -10, -23, -23, -34, -34, -267, -267, + -267, -267, +}; +static const YYINT yyrindex[] = { 0, + 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 11, 62, 23, 101, 308, 201, 243, + 124, 130, 144, 155, 79, 116, 51, 67, 1, 12, + 28, 40, +}; +static const YYINT yygindex[] = { 0, + 582, +}; +#define YYTABLESIZE 760 +static const YYINT yytable[] = { 32, + 5, 1, 31, 0, 0, 0, 0, 29, 0, 0, + 20, 6, 30, 31, 0, 0, 0, 0, 29, 27, + 0, 28, 18, 30, 0, 0, 31, 7, 0, 0, + 0, 29, 27, 0, 28, 0, 30, 5, 5, 4, + 0, 5, 5, 5, 0, 5, 0, 5, 6, 6, + 2, 20, 6, 6, 6, 0, 6, 0, 6, 0, + 5, 19, 5, 18, 7, 7, 3, 0, 7, 7, + 7, 6, 7, 6, 7, 0, 4, 4, 8, 0, + 4, 4, 4, 0, 4, 0, 4, 7, 2, 7, + 0, 2, 0, 2, 5, 2, 0, 0, 0, 4, + 17, 4, 19, 0, 3, 6, 0, 3, 0, 3, + 2, 3, 2, 0, 0, 9, 8, 0, 0, 8, + 0, 7, 0, 10, 5, 0, 3, 4, 3, 12, + 0, 0, 0, 4, 6, 6, 0, 2, 8, 3, + 8, 17, 0, 11, 2, 0, 18, 0, 0, 0, + 0, 7, 0, 9, 13, 0, 9, 0, 0, 0, + 3, 10, 0, 4, 10, 0, 0, 12, 0, 0, + 12, 0, 8, 0, 2, 9, 0, 9, 0, 0, + 0, 11, 0, 10, 11, 10, 0, 0, 0, 12, + 3, 12, 13, 0, 17, 13, 0, 0, 0, 0, + 14, 0, 8, 11, 0, 11, 0, 0, 0, 9, + 0, 0, 0, 0, 13, 0, 13, 10, 0, 0, + 5, 31, 18, 12, 17, 0, 29, 27, 0, 28, + 0, 30, 32, 0, 0, 0, 0, 11, 14, 9, + 0, 14, 15, 32, 21, 0, 23, 10, 13, 0, + 0, 0, 0, 12, 25, 26, 32, 0, 0, 5, + 5, 5, 5, 5, 5, 5, 5, 11, 0, 20, + 6, 6, 6, 6, 6, 6, 6, 6, 13, 0, + 15, 18, 18, 15, 0, 0, 7, 7, 7, 7, + 7, 7, 7, 7, 14, 0, 0, 0, 4, 4, + 4, 4, 4, 4, 4, 4, 0, 16, 0, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, + 19, 19, 0, 0, 14, 3, 3, 3, 3, 3, + 3, 3, 3, 0, 0, 0, 15, 8, 8, 8, + 8, 8, 8, 8, 8, 16, 0, 0, 16, 0, + 0, 1, 0, 0, 0, 0, 0, 0, 0, 17, + 17, 0, 0, 0, 0, 0, 15, 0, 0, 0, + 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, + 9, 9, 10, 10, 10, 10, 10, 10, 12, 12, + 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, + 0, 16, 11, 11, 11, 11, 11, 11, 0, 0, + 0, 0, 0, 13, 13, 13, 13, 13, 13, 31, + 18, 0, 0, 33, 29, 27, 0, 28, 0, 30, + 0, 16, 0, 31, 18, 0, 0, 0, 29, 27, + 0, 28, 21, 30, 23, 19, 20, 22, 24, 25, + 26, 32, 0, 0, 0, 0, 21, 0, 23, 14, + 14, 14, 14, 0, 31, 18, 0, 0, 0, 29, + 27, 0, 28, 31, 30, 0, 17, 0, 29, 27, + 0, 28, 0, 30, 0, 0, 0, 21, 0, 23, + 17, 0, 0, 0, 0, 0, 21, 0, 23, 0, + 0, 15, 15, 15, 15, 0, 16, 31, 18, 0, + 0, 0, 29, 27, 0, 28, 0, 30, 31, 18, + 16, 17, 0, 29, 27, 0, 28, 0, 30, 31, + 21, 0, 23, 0, 29, 27, 0, 28, 0, 30, + 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, + 0, 16, 21, 0, 23, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 17, 0, 16, 16, 0, 0, + 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, + 0, 0, 0, 9, 10, 11, 12, 13, 0, 0, + 0, 0, 0, 0, 16, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 14, 15, 19, 20, 22, 24, 25, 26, 32, + 0, 0, 0, 0, 0, 14, 15, 19, 20, 22, + 24, 25, 26, 32, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 15, 19, 20, + 22, 24, 25, 26, 32, 0, 0, 19, 20, 22, + 24, 25, 26, 32, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 19, 20, 22, 24, 25, 26, 32, 0, 0, + 0, 0, 19, 20, 22, 24, 25, 26, 32, 0, + 0, 0, 0, 0, 0, 22, 24, 25, 26, 32, +}; +static const YYINT yycheck[] = { 267, + 0, 0, 37, -1, -1, -1, -1, 42, -1, -1, + 0, 0, 47, 37, -1, -1, -1, -1, 42, 43, + -1, 45, 0, 47, -1, -1, 37, 0, -1, -1, + -1, 42, 43, -1, 45, -1, 47, 37, 38, 0, + -1, 41, 42, 43, -1, 45, -1, 47, 37, 38, + 0, 41, 41, 42, 43, -1, 45, -1, 47, -1, + 60, 0, 62, 41, 37, 38, 0, -1, 41, 42, + 43, 60, 45, 62, 47, -1, 37, 38, 0, -1, + 41, 42, 43, -1, 45, -1, 47, 60, 38, 62, + -1, 41, -1, 43, 94, 45, -1, -1, -1, 60, + 0, 62, 41, -1, 38, 94, -1, 41, -1, 43, + 60, 45, 62, -1, -1, 0, 38, -1, -1, 41, + -1, 94, -1, 0, 124, -1, 60, 33, 62, 0, + -1, -1, -1, 94, 40, 124, -1, 43, 60, 45, + 62, 41, -1, 0, 94, -1, 124, -1, -1, -1, + -1, 124, -1, 38, 0, -1, 41, -1, -1, -1, + 94, 38, -1, 124, 41, -1, -1, 38, -1, -1, + 41, -1, 94, -1, 124, 60, -1, 62, -1, -1, + -1, 38, -1, 60, 41, 62, -1, -1, -1, 60, + 124, 62, 38, -1, 94, 41, -1, -1, -1, -1, + 0, -1, 124, 60, -1, 62, -1, -1, -1, 94, + -1, -1, -1, -1, 60, -1, 62, 94, -1, -1, + 126, 37, 38, 94, 124, -1, 42, 43, -1, 45, + -1, 47, 267, -1, -1, -1, -1, 94, 38, 124, + -1, 41, 0, 267, 60, -1, 62, 124, 94, -1, + -1, -1, -1, 124, 265, 266, 267, -1, -1, 259, + 260, 261, 262, 263, 264, 265, 266, 124, -1, 259, + 259, 260, 261, 262, 263, 264, 265, 266, 124, -1, + 38, 259, 260, 41, -1, -1, 259, 260, 261, 262, + 263, 264, 265, 266, 94, -1, -1, -1, 259, 260, + 261, 262, 263, 264, 265, 266, -1, 0, -1, 259, + 260, 261, 262, 263, 264, 265, 266, -1, -1, -1, + 259, 260, -1, -1, 124, 259, 260, 261, 262, 263, + 264, 265, 266, -1, -1, -1, 94, 259, 260, 261, + 262, 263, 264, 265, 266, 38, -1, -1, 41, -1, + -1, 257, -1, -1, -1, -1, -1, -1, -1, 259, + 260, -1, -1, -1, -1, -1, 124, -1, -1, -1, + -1, -1, -1, -1, 259, 260, 261, 262, 263, 264, + 265, 266, 259, 260, 261, 262, 263, 264, 259, 260, + 261, 262, 263, 264, -1, -1, -1, -1, -1, -1, + -1, 94, 259, 260, 261, 262, 263, 264, -1, -1, + -1, -1, -1, 259, 260, 261, 262, 263, 264, 37, + 38, -1, -1, 41, 42, 43, -1, 45, -1, 47, + -1, 124, -1, 37, 38, -1, -1, -1, 42, 43, + -1, 45, 60, 47, 62, 261, 262, 263, 264, 265, + 266, 267, -1, -1, -1, -1, 60, -1, 62, 259, + 260, 261, 262, -1, 37, 38, -1, -1, -1, 42, + 43, -1, 45, 37, 47, -1, 94, -1, 42, 43, + -1, 45, -1, 47, -1, -1, -1, 60, -1, 62, + 94, -1, -1, -1, -1, -1, 60, -1, 62, -1, + -1, 259, 260, 261, 262, -1, 124, 37, 38, -1, + -1, -1, 42, 43, -1, 45, -1, 47, 37, 38, + 124, 94, -1, 42, 43, -1, 45, -1, 47, 37, + 60, -1, 62, -1, 42, 43, -1, 45, -1, 47, + -1, 60, -1, 62, -1, -1, -1, -1, -1, -1, + -1, 124, 60, -1, 62, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 94, -1, 259, 260, -1, -1, + -1, -1, -1, -1, -1, 94, -1, -1, -1, -1, + -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, + -1, -1, -1, -1, 124, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 259, 260, 261, 262, 263, 264, 265, 266, 267, + -1, -1, -1, -1, -1, 259, 260, 261, 262, 263, + 264, 265, 266, 267, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 260, 261, 262, + 263, 264, 265, 266, 267, -1, -1, 261, 262, 263, + 264, 265, 266, 267, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 261, 262, 263, 264, 265, 266, 267, -1, -1, + -1, -1, 261, 262, 263, 264, 265, 266, 267, -1, + -1, -1, -1, -1, -1, 263, 264, 265, 266, 267, +}; +#define YYFINAL 7 +#ifndef YYDEBUG +#define YYDEBUG 0 +#endif +#define YYMAXTOKEN 269 +#define YYUNDFTOKEN 273 +#define YYTRANSLATE(a) ((a) > YYMAXTOKEN ? YYUNDFTOKEN : (a)) +#if YYDEBUG +static const char *const yyname[] = { + +"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +"'!'",0,0,0,"'%'","'&'",0,"'('","')'","'*'","'+'",0,"'-'",0,"'/'",0,0,0,0,0,0,0, +0,0,0,0,0,"'<'",0,"'>'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,"'^'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0, +"'~'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,"NUMBER","ERROR","LOR","LAND","EQ","NE","LE","GE", +"LSHIFT","RSHIFT","EXPONENT","UMINUS","UPLUS",0,0,0,"illegal-symbol", +}; +static const char *const yyrule[] = { +"$accept : top", +"top : expr", +"expr : expr '+' expr", +"expr : expr '-' expr", +"expr : expr EXPONENT expr", +"expr : expr '*' expr", +"expr : expr '/' expr", +"expr : expr '%' expr", +"expr : expr LSHIFT expr", +"expr : expr RSHIFT expr", +"expr : expr '<' expr", +"expr : expr '>' expr", +"expr : expr LE expr", +"expr : expr GE expr", +"expr : expr EQ expr", +"expr : expr NE expr", +"expr : expr '&' expr", +"expr : expr '^' expr", +"expr : expr '|' expr", +"expr : expr LAND expr", +"expr : expr LOR expr", +"expr : '(' expr ')'", +"expr : '-' expr", +"expr : '+' expr", +"expr : '!' expr", +"expr : '~' expr", +"expr : NUMBER", + +}; +#endif + +#if YYDEBUG +int yydebug; +#endif + +int yyerrflag; +int yychar; +YYSTYPE yyval; +YYSTYPE yylval; +int yynerrs; + +/* define the initial stack-sizes */ +#ifdef YYSTACKSIZE +#undef YYMAXDEPTH +#define YYMAXDEPTH YYSTACKSIZE +#else +#ifdef YYMAXDEPTH +#define YYSTACKSIZE YYMAXDEPTH +#else +#define YYSTACKSIZE 10000 +#define YYMAXDEPTH 10000 +#endif +#endif + +#define YYINITSTACKSIZE 200 + +typedef struct { + unsigned stacksize; + YYINT *s_base; + YYINT *s_mark; + YYINT *s_last; + YYSTYPE *l_base; + YYSTYPE *l_mark; +} YYSTACKDATA; +/* variables for the parser stack */ +static YYSTACKDATA yystack; +#line 84 "usr.bin/m4/parser.y" + +#line 389 "usr.bin/m4/parser.c" + +#if YYDEBUG +#include <stdio.h> /* needed for printf */ +#endif + +#include <stdlib.h> /* needed for malloc, etc */ +#include <string.h> /* needed for memset */ + +/* allocate initial stack or double stack size, up to YYMAXDEPTH */ +static int yygrowstack(YYSTACKDATA *data) +{ + int i; + unsigned newsize; + YYINT *newss; + YYSTYPE *newvs; + + if ((newsize = data->stacksize) == 0) + newsize = YYINITSTACKSIZE; + else if (newsize >= YYMAXDEPTH) + return YYENOMEM; + else if ((newsize *= 2) > YYMAXDEPTH) + newsize = YYMAXDEPTH; + + i = (int) (data->s_mark - data->s_base); + newss = (YYINT *)realloc(data->s_base, newsize * sizeof(*newss)); + if (newss == 0) + return YYENOMEM; + + data->s_base = newss; + data->s_mark = newss + i; + + newvs = (YYSTYPE *)realloc(data->l_base, newsize * sizeof(*newvs)); + if (newvs == 0) + return YYENOMEM; + + data->l_base = newvs; + data->l_mark = newvs + i; + + data->stacksize = newsize; + data->s_last = data->s_base + newsize - 1; + return 0; +} + +#if YYPURE || defined(YY_NO_LEAKS) +static void yyfreestack(YYSTACKDATA *data) +{ + free(data->s_base); + free(data->l_base); + memset(data, 0, sizeof(*data)); +} +#else +#define yyfreestack(data) /* nothing */ +#endif + +#define YYABORT goto yyabort +#define YYREJECT goto yyabort +#define YYACCEPT goto yyaccept +#define YYERROR goto yyerrlab + +int +YYPARSE_DECL() +{ + int yym, yyn, yystate; +#if YYDEBUG + const char *yys; + + if ((yys = getenv("YYDEBUG")) != 0) + { + yyn = *yys; + if (yyn >= '0' && yyn <= '9') + yydebug = yyn - '0'; + } +#endif + + yym = 0; + yyn = 0; + yynerrs = 0; + yyerrflag = 0; + yychar = YYEMPTY; + yystate = 0; + +#if YYPURE + memset(&yystack, 0, sizeof(yystack)); +#endif + + if (yystack.s_base == NULL && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + yystack.s_mark = yystack.s_base; + yystack.l_mark = yystack.l_base; + yystate = 0; + *yystack.s_mark = 0; + +yyloop: + if ((yyn = yydefred[yystate]) != 0) goto yyreduce; + if (yychar < 0) + { + yychar = YYLEX; + if (yychar < 0) yychar = YYEOF; +#if YYDEBUG + if (yydebug) + { + if ((yys = yyname[YYTRANSLATE(yychar)]) == NULL) yys = yyname[YYUNDFTOKEN]; + printf("%sdebug: state %d, reading %d (%s)\n", + YYPREFIX, yystate, yychar, yys); + } +#endif + } + if (((yyn = yysindex[yystate]) != 0) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) yychar) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, shifting to state %d\n", + YYPREFIX, yystate, yytable[yyn]); +#endif + if (yystack.s_mark >= yystack.s_last && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + yystate = yytable[yyn]; + *++yystack.s_mark = yytable[yyn]; + *++yystack.l_mark = yylval; + yychar = YYEMPTY; + if (yyerrflag > 0) --yyerrflag; + goto yyloop; + } + if (((yyn = yyrindex[yystate]) != 0) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) yychar) + { + yyn = yytable[yyn]; + goto yyreduce; + } + if (yyerrflag != 0) goto yyinrecovery; + + YYERROR_CALL("syntax error"); + + goto yyerrlab; /* redundant goto avoids 'unused label' warning */ +yyerrlab: + ++yynerrs; + +yyinrecovery: + if (yyerrflag < 3) + { + yyerrflag = 3; + for (;;) + { + if (((yyn = yysindex[*yystack.s_mark]) != 0) && (yyn += YYERRCODE) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) YYERRCODE) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, error recovery shifting\ + to state %d\n", YYPREFIX, *yystack.s_mark, yytable[yyn]); +#endif + if (yystack.s_mark >= yystack.s_last && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + yystate = yytable[yyn]; + *++yystack.s_mark = yytable[yyn]; + *++yystack.l_mark = yylval; + goto yyloop; + } + else + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: error recovery discarding state %d\n", + YYPREFIX, *yystack.s_mark); +#endif + if (yystack.s_mark <= yystack.s_base) goto yyabort; + --yystack.s_mark; + --yystack.l_mark; + } + } + } + else + { + if (yychar == YYEOF) goto yyabort; +#if YYDEBUG + if (yydebug) + { + if ((yys = yyname[YYTRANSLATE(yychar)]) == NULL) yys = yyname[YYUNDFTOKEN]; + printf("%sdebug: state %d, error recovery discards token %d (%s)\n", + YYPREFIX, yystate, yychar, yys); + } +#endif + yychar = YYEMPTY; + goto yyloop; + } + +yyreduce: +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, reducing by rule %d (%s)\n", + YYPREFIX, yystate, yyn, yyrule[yyn]); +#endif + yym = yylen[yyn]; + if (yym > 0) + yyval = yystack.l_mark[1-yym]; + else + memset(&yyval, 0, sizeof yyval); + + switch (yyn) + { +case 1: +#line 43 "usr.bin/m4/parser.y" + { end_result = yystack.l_mark[0]; } +break; +case 2: +#line 45 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] + yystack.l_mark[0]; } +break; +case 3: +#line 46 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] - yystack.l_mark[0]; } +break; +case 4: +#line 47 "usr.bin/m4/parser.y" + { yyval = pow(yystack.l_mark[-2], yystack.l_mark[0]); } +break; +case 5: +#line 48 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] * yystack.l_mark[0]; } +break; +case 6: +#line 49 "usr.bin/m4/parser.y" + { + if (yystack.l_mark[0] == 0) { + yyerror("division by zero"); + exit(1); + } + yyval = yystack.l_mark[-2] / yystack.l_mark[0]; + } +break; +case 7: +#line 56 "usr.bin/m4/parser.y" + { + if (yystack.l_mark[0] == 0) { + yyerror("modulo zero"); + exit(1); + } + yyval = yystack.l_mark[-2] % yystack.l_mark[0]; + } +break; +case 8: +#line 63 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] << yystack.l_mark[0]; } +break; +case 9: +#line 64 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] >> yystack.l_mark[0]; } +break; +case 10: +#line 65 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] < yystack.l_mark[0]; } +break; +case 11: +#line 66 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] > yystack.l_mark[0]; } +break; +case 12: +#line 67 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] <= yystack.l_mark[0]; } +break; +case 13: +#line 68 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] >= yystack.l_mark[0]; } +break; +case 14: +#line 69 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] == yystack.l_mark[0]; } +break; +case 15: +#line 70 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] != yystack.l_mark[0]; } +break; +case 16: +#line 71 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] & yystack.l_mark[0]; } +break; +case 17: +#line 72 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] ^ yystack.l_mark[0]; } +break; +case 18: +#line 73 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] | yystack.l_mark[0]; } +break; +case 19: +#line 74 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] && yystack.l_mark[0]; } +break; +case 20: +#line 75 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-2] || yystack.l_mark[0]; } +break; +case 21: +#line 76 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[-1]; } +break; +case 22: +#line 77 "usr.bin/m4/parser.y" + { yyval = -yystack.l_mark[0]; } +break; +case 23: +#line 78 "usr.bin/m4/parser.y" + { yyval = yystack.l_mark[0]; } +break; +case 24: +#line 79 "usr.bin/m4/parser.y" + { yyval = !yystack.l_mark[0]; } +break; +case 25: +#line 80 "usr.bin/m4/parser.y" + { yyval = ~yystack.l_mark[0]; } +break; +#line 700 "usr.bin/m4/parser.c" + } + yystack.s_mark -= yym; + yystate = *yystack.s_mark; + yystack.l_mark -= yym; + yym = yylhs[yyn]; + if (yystate == 0 && yym == 0) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: after reduction, shifting from state 0 to\ + state %d\n", YYPREFIX, YYFINAL); +#endif + yystate = YYFINAL; + *++yystack.s_mark = YYFINAL; + *++yystack.l_mark = yyval; + if (yychar < 0) + { + yychar = YYLEX; + if (yychar < 0) yychar = YYEOF; +#if YYDEBUG + if (yydebug) + { + if ((yys = yyname[YYTRANSLATE(yychar)]) == NULL) yys = yyname[YYUNDFTOKEN]; + printf("%sdebug: state %d, reading %d (%s)\n", + YYPREFIX, YYFINAL, yychar, yys); + } +#endif + } + if (yychar == YYEOF) goto yyaccept; + goto yyloop; + } + if (((yyn = yygindex[yym]) != 0) && (yyn += yystate) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == (YYINT) yystate) + yystate = yytable[yyn]; + else + yystate = yydgoto[yym]; +#if YYDEBUG + if (yydebug) + printf("%sdebug: after reduction, shifting from state %d \ +to state %d\n", YYPREFIX, *yystack.s_mark, yystate); +#endif + if (yystack.s_mark >= yystack.s_last && yygrowstack(&yystack) == YYENOMEM) goto yyoverflow; + *++yystack.s_mark = (YYINT) yystate; + *++yystack.l_mark = yyval; + goto yyloop; + +yyoverflow: + YYERROR_CALL("yacc stack overflow"); + +yyabort: + yyfreestack(&yystack); + return (1); + +yyaccept: + yyfreestack(&yystack); + return (0); +} diff --git a/usr.bin/m4/parser.tab.h b/usr.bin/m4/parser.tab.h new file mode 100644 index 0000000..82c3c10 --- /dev/null +++ b/usr.bin/m4/parser.tab.h @@ -0,0 +1,13 @@ +#define NUMBER 257 +#define ERROR 258 +#define LOR 259 +#define LAND 260 +#define EQ 261 +#define NE 262 +#define LE 263 +#define GE 264 +#define LSHIFT 265 +#define RSHIFT 266 +#define EXPONENT 267 +#define UMINUS 268 +#define UPLUS 269 diff --git a/usr.bin/m4/parser.y b/usr.bin/m4/parser.y new file mode 100644 index 0000000..fedded1 --- /dev/null +++ b/usr.bin/m4/parser.y @@ -0,0 +1,84 @@ +%{ +/* $OpenBSD: parser.y,v 1.7 2012/04/12 17:00:11 espie Exp $ */ +/* + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <math.h> +#include <stdint.h> +#include <stdlib.h> +#define YYSTYPE int32_t +extern int32_t end_result; +extern int yylex(void); +extern int yyerror(const char *); +%} +%token NUMBER +%token ERROR +%left LOR +%left LAND +%left '|' +%left '^' +%left '&' +%left EQ NE +%left '<' LE '>' GE +%left LSHIFT RSHIFT +%left '+' '-' +%left '*' '/' '%' +%right EXPONENT +%right UMINUS UPLUS '!' '~' + +%% + +top : expr { end_result = $1; } + ; +expr : expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr EXPONENT expr { $$ = pow($1, $3); } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { + if ($3 == 0) { + yyerror("division by zero"); + exit(1); + } + $$ = $1 / $3; + } + | expr '%' expr { + if ($3 == 0) { + yyerror("modulo zero"); + exit(1); + } + $$ = $1 % $3; + } + | expr LSHIFT expr { $$ = $1 << $3; } + | expr RSHIFT expr { $$ = $1 >> $3; } + | expr '<' expr { $$ = $1 < $3; } + | expr '>' expr { $$ = $1 > $3; } + | expr LE expr { $$ = $1 <= $3; } + | expr GE expr { $$ = $1 >= $3; } + | expr EQ expr { $$ = $1 == $3; } + | expr NE expr { $$ = $1 != $3; } + | expr '&' expr { $$ = $1 & $3; } + | expr '^' expr { $$ = $1 ^ $3; } + | expr '|' expr { $$ = $1 | $3; } + | expr LAND expr { $$ = $1 && $3; } + | expr LOR expr { $$ = $1 || $3; } + | '(' expr ')' { $$ = $2; } + | '-' expr %prec UMINUS { $$ = -$2; } + | '+' expr %prec UPLUS { $$ = $2; } + | '!' expr { $$ = !$2; } + | '~' expr { $$ = ~$2; } + | NUMBER + ; +%% + diff --git a/usr.bin/m4/pathnames.h b/usr.bin/m4/pathnames.h new file mode 100644 index 0000000..85ed258 --- /dev/null +++ b/usr.bin/m4/pathnames.h @@ -0,0 +1,38 @@ +/* $OpenBSD: pathnames.h,v 1.6 2015/11/03 16:21:47 deraadt Exp $ */ +/* $NetBSD: pathnames.h,v 1.6 1995/09/29 00:27:55 cgd Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pathnames.h 8.1 (Berkeley) 6/6/93 + */ + +#define _PATH_DIVNAME "/tmp/m4.0XXXXXXXXXX" /* unix diversion files */ diff --git a/usr.bin/m4/stdd.h b/usr.bin/m4/stdd.h new file mode 100644 index 0000000..58d42b6 --- /dev/null +++ b/usr.bin/m4/stdd.h @@ -0,0 +1,55 @@ +/* $OpenBSD: stdd.h,v 1.6 2010/09/07 19:58:09 marco Exp $ */ +/* $NetBSD: stdd.h,v 1.2 1995/09/28 05:37:50 tls Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ozan Yigit at York University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)stdd.h 8.1 (Berkeley) 6/6/93 + */ + +/* + * standard defines + */ + +#define max(a,b) ((a) > (b)? (a): (b)) +#define min(a,b) ((a) < (b)? (a): (b)) + +#define iswhite(c) ((c) == ' ' || (c) == '\t') + +/* + * STREQ is an optimised strcmp(a,b)==0 + * STREQN is an optimised strncmp(a,b,n)==0; assumes n > 0 + */ +#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) +#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) + +#define YES 1 +#define NO 0 diff --git a/usr.bin/m4/tokenizer.c b/usr.bin/m4/tokenizer.c new file mode 100644 index 0000000..fa19fc6 --- /dev/null +++ b/usr.bin/m4/tokenizer.c @@ -0,0 +1,191 @@ +/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */ +/* + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "parser.tab.h" +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> + +extern void m4_warnx(const char *, ...); +extern int mimic_gnu; +extern int32_t yylval; +static const char *yypos; + +void +yy_scan_string(const char *s) +{ + yypos = s; +} + +static int32_t +number(const char *yytext, size_t yylen) +{ + long l; + + errno = 0; + l = strtol(yytext, NULL, 0); + if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) || + l > INT32_MAX || l < INT32_MIN) + m4_warnx("numeric overflow in expr: %.*s", (int)yylen, yytext); + return l; +} + +static int32_t +parse_radix(const char *yytext, size_t yylen) +{ + long base; + char *next; + long l; + int d; + + l = 0; + base = strtol(yytext+2, &next, 0); + if (base > 36 || next == NULL) { + m4_warnx("error in number %.*s", (int)yylen, yytext); + } else { + next++; + while (*next != 0) { + if (*next >= '0' && *next <= '9') + d = *next - '0'; + else if (*next >= 'a' && *next <= 'z') + d = *next - 'a' + 10; + else { + assert(*next >= 'A' && *next <= 'Z'); + d = *next - 'A' + 10; + } + if (d >= base) { + m4_warnx("error in number %.*s", (int)yylen, yytext); + return 0; + } + l = base * l + d; + next++; + } + } + return l; +} + +static int +isodigit(int c) +{ + return c >= '0' && c <= '7'; +} + +int yylex(void) +{ + const char *start; + +next: + start = yypos; + switch (*yypos) { + case ' ': + case '\t': + case '\n': + ++yypos; + goto next; + case '<': + switch (yypos[1]) { + case '=': + yypos += 2; + return LE; + case '<': + yypos += 2; + return LSHIFT; + } + break; + case '>': + switch (yypos[1]) { + case '=': + yypos += 2; + return GE; + case '>': + yypos += 2; + return RSHIFT; + } + break; + case '=': + if (yypos[1] != '=') + break; + yypos += 2; + return EQ; + case '!': + if (yypos[1] != '=') + break; + yypos += 2; + return NE; + case '&': + if (yypos[1] != '&') + break; + yypos += 2; + return LAND; + case '|': + if (yypos[1] != '|') + break; + yypos += 2; + return LOR; + case '*': + if (!mimic_gnu || yypos[1] != '*') + break; + yypos += 2; + return EXPONENT; + case '0': + switch (*++yypos) { + case 'x': + case 'X': + if (!isxdigit(*++yypos)) + return ERROR; + do ++yypos; + while (isxdigit(*yypos)); + break; + case 'r': + case 'R': + if (!mimic_gnu) + break; + if (!isdigit(*++yypos)) + return ERROR; + do ++yypos; + while (isdigit(*yypos)); + if (*yypos != ':') + return ERROR; + if (!isalnum(*++yypos)) + return ERROR; + do ++yypos; + while (isalnum(*yypos)); + yylval = parse_radix(start, yypos - start); + return NUMBER; + default: + do ++yypos; + while (isodigit(*yypos)); + break; + } + yylval = number(start, yypos - start); + return NUMBER; + case '\0': + return '\0'; + } + if (isdigit(*yypos)) { + do ++yypos; + while (isdigit(*yypos)); + yylval = number(start, yypos - start); + return NUMBER; + } + + return *yypos++; +} diff --git a/usr.bin/m4/trace.c b/usr.bin/m4/trace.c new file mode 100644 index 0000000..edf5887 --- /dev/null +++ b/usr.bin/m4/trace.c @@ -0,0 +1,196 @@ +/* $OpenBSD: trace.c,v 1.16 2010/09/07 19:58:09 marco Exp $ */ +/* + * Copyright (c) 2001 Marc Espie. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD + * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <err.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include "mdef.h" +#include "stdd.h" +#include "extern.h" + +FILE *traceout; + +#define TRACE_ARGS 1 +#define TRACE_EXPANSION 2 +#define TRACE_QUOTE 4 +#define TRACE_FILENAME 8 +#define TRACE_LINENO 16 +#define TRACE_CONT 32 +#define TRACE_ID 64 +#define TRACE_NEWFILE 128 /* not implemented yet */ +#define TRACE_INPUT 256 /* not implemented yet */ + +static unsigned int letter_to_flag(int); +static void print_header(struct input_file *); +static int frame_level(void); + + +unsigned int trace_flags = TRACE_QUOTE | TRACE_EXPANSION; + +void +trace_file(const char *name) +{ + + if (traceout && traceout != stderr) + fclose(traceout); + traceout = fopen(name, "w"); + if (!traceout) + err(1, "can't open %s", name); +} + +static unsigned int +letter_to_flag(int c) +{ + switch(c) { + case 'a': + return TRACE_ARGS; + case 'e': + return TRACE_EXPANSION; + case 'q': + return TRACE_QUOTE; + case 'c': + return TRACE_CONT; + case 'x': + return TRACE_ID; + case 'f': + return TRACE_FILENAME; + case 'l': + return TRACE_LINENO; + case 'p': + return TRACE_NEWFILE; + case 'i': + return TRACE_INPUT; + case 't': + return TRACE_ALL; + case 'V': + return ~0; + default: + return 0; + } +} + +void +set_trace_flags(const char *s) +{ + char mode = 0; + unsigned int f = 0; + + if (*s == '+' || *s == '-') + mode = *s++; + while (*s) + f |= letter_to_flag(*s++); + switch(mode) { + case 0: + trace_flags = f; + break; + case '+': + trace_flags |= f; + break; + case '-': + trace_flags &= ~f; + break; + } +} + +static int +frame_level() +{ + int level; + int framep; + + for (framep = fp, level = 0; framep != 0; + level++,framep = mstack[framep-3].sfra) + ; + return level; +} + +static void +print_header(struct input_file *inp) +{ + fprintf(traceout, "m4trace:"); + if (trace_flags & TRACE_FILENAME) + fprintf(traceout, "%s:", inp->name); + if (trace_flags & TRACE_LINENO) + fprintf(traceout, "%lu:", inp->lineno); + fprintf(traceout, " -%d- ", frame_level()); + if (trace_flags & TRACE_ID) + fprintf(traceout, "id %lu: ", expansion_id); +} + +size_t +trace(const char *argv[], int argc, struct input_file *inp) +{ + if (!traceout) + traceout = stderr; + print_header(inp); + if (trace_flags & TRACE_CONT) { + fprintf(traceout, "%s ...\n", argv[1]); + print_header(inp); + } + fprintf(traceout, "%s", argv[1]); + if ((trace_flags & TRACE_ARGS) && argc > 2) { + char delim[3]; + int i; + + delim[0] = LPAREN; + delim[1] = EOS; + for (i = 2; i < argc; i++) { + fprintf(traceout, "%s%s%s%s", delim, + (trace_flags & TRACE_QUOTE) ? lquote : "", + argv[i], + (trace_flags & TRACE_QUOTE) ? rquote : ""); + delim[0] = COMMA; + delim[1] = ' '; + delim[2] = EOS; + } + fprintf(traceout, "%c", RPAREN); + } + if (trace_flags & TRACE_CONT) { + fprintf(traceout, " -> ???\n"); + print_header(inp); + fprintf(traceout, argc > 2 ? "%s(...)" : "%s", argv[1]); + } + if (trace_flags & TRACE_EXPANSION) + return buffer_mark(); + else { + fprintf(traceout, "\n"); + return SIZE_MAX; + } +} + +void +finish_trace(size_t mark) +{ + fprintf(traceout, " -> "); + if (trace_flags & TRACE_QUOTE) + fprintf(traceout, "%s", lquote); + dump_buffer(traceout, mark); + if (trace_flags & TRACE_QUOTE) + fprintf(traceout, "%s", rquote); + fprintf(traceout, "\n"); +} diff --git a/usr.bin/mandoc/CVS/Entries b/usr.bin/mandoc/CVS/Entries new file mode 100644 index 0000000..30e23be --- /dev/null +++ b/usr.bin/mandoc/CVS/Entries @@ -0,0 +1,101 @@ +/Makefile/1.118/Fri Mar 13 00:31:04 2020// +/apropos.1/1.41/Thu Nov 22 12:32:10 2018// +/arch.c/1.11/Sat May 11 07:18:17 2019// +/att.c/1.14/Thu Dec 13 11:55:14 2018// +/cgi.c/1.110/Fri Apr 3 11:34:19 2020// +/cgi.h.example/1.6/Sat Mar 18 16:48:07 2017// +/chars.c/1.49/Thu Feb 13 16:16:03 2020// +/dba.c/1.7/Thu Feb 9 18:26:17 2017// +/dba.h/1.2/Wed Aug 17 20:46:06 2016// +/dba_array.c/1.1/Mon Aug 1 10:32:39 2016// +/dba_array.h/1.1/Mon Aug 1 10:32:39 2016// +/dba_read.c/1.4/Wed Aug 17 20:46:06 2016// +/dba_write.c/1.1/Mon Aug 1 10:32:39 2016// +/dba_write.h/1.1/Mon Aug 1 10:32:39 2016// +/dbm.c/1.5/Mon Jul 1 22:43:03 2019// +/dbm.h/1.1/Mon Aug 1 10:32:39 2016// +/dbm_map.c/1.6/Thu Feb 9 18:26:17 2017// +/dbm_map.h/1.2/Mon Jul 1 22:43:03 2019// +/eqn.c/1.47/Wed Jan 8 12:09:14 2020// +/eqn.h/1.1/Thu Dec 13 05:13:15 2018// +/eqn_html.c/1.15/Sun Mar 17 18:20:07 2019// +/eqn_parse.h/1.3/Fri Dec 14 06:33:03 2018// +/eqn_term.c/1.15/Thu Dec 13 05:13:15 2018// +/html.c/1.141/Mon Apr 20 12:59:24 2020// +/html.h/1.70/Sat Apr 18 20:28:46 2020// +/libman.h/1.61/Mon Dec 31 10:03:38 2018// +/libmandoc.h/1.64/Fri Apr 3 11:34:19 2020// +/libmdoc.h/1.88/Mon Dec 31 04:55:42 2018// +/main.c/1.251/Thu Apr 2 22:10:27 2020// +/main.h/1.25/Sun Mar 3 13:01:47 2019// +/makewhatis.8/1.14/Wed May 17 22:26:52 2017// +/man.1/1.36/Mon Feb 10 13:49:04 2020// +/man.c/1.135/Sat Jan 5 00:36:46 2019// +/man.cgi.8/1.22/Sun May 20 21:48:23 2018// +/man.conf.5/1.8/Mon Feb 10 14:42:03 2020// +/man.h/1.59/Thu Aug 23 19:32:03 2018// +/man_html.c/1.131/Sat Apr 4 20:23:06 2020// +/man_macro.c/1.106/Sat Jan 5 18:59:37 2019// +/man_term.c/1.188/Fri Mar 13 00:31:05 2020// +/man_validate.c/1.124/Fri Apr 24 11:58:02 2020// +/manconf.h/1.8/Thu Apr 2 22:10:27 2020// +/mandoc.1/1.167/Fri Apr 24 11:58:02 2020// +/mandoc.c/1.85/Sun Jan 19 16:16:32 2020// +/mandoc.css/1.33/Sun Jun 2 16:50:46 2019// +/mandoc.h/1.210/Fri Apr 24 11:58:02 2020// +/mandoc_aux.c/1.9/Wed Feb 7 20:04:33 2018// +/mandoc_aux.h/1.9/Mon Jun 12 18:55:42 2017// +/mandoc_msg.c/1.9/Fri Apr 24 11:58:02 2020// +/mandoc_ohash.c/1.2/Mon Oct 19 18:58:20 2015// +/mandoc_ohash.h/1.2/Sat Nov 7 13:57:55 2015// +/mandoc_parse.h/1.4/Sat Nov 9 14:39:42 2019// +/mandoc_xr.c/1.3/Sun Jul 2 21:17:12 2017// +/mandoc_xr.h/1.3/Sun Jul 2 21:17:12 2017// +/mandocdb.c/1.216/Fri Apr 3 11:34:19 2020// +/manpath.c/1.28/Mon Feb 10 14:42:03 2020// +/mansearch.c/1.65/Mon Jul 1 22:43:03 2019// +/mansearch.h/1.24/Tue Apr 30 18:48:26 2019// +/mdoc.c/1.164/Mon Apr 6 09:55:49 2020// +/mdoc.h/1.71/Sun Dec 30 00:48:47 2018// +/mdoc_argv.c/1.76/Thu Jul 11 16:56:52 2019// +/mdoc_html.c/1.215/Sun Apr 19 15:15:54 2020// +/mdoc_macro.c/1.191/Sun Jan 19 17:59:01 2020// +/mdoc_man.c/1.134/Thu Feb 27 01:25:57 2020// +/mdoc_markdown.c/1.35/Fri Apr 3 11:34:19 2020// +/mdoc_state.c/1.16/Sun Jan 19 17:59:01 2020// +/mdoc_term.c/1.279/Mon Apr 6 09:55:49 2020// +/mdoc_validate.c/1.302/Sun Apr 26 21:29:45 2020// +/msec.c/1.13/Fri Dec 14 01:17:46 2018// +/msec.in/1.6/Sat Jun 24 17:36:50 2017// +/out.c/1.51/Tue Dec 31 22:49:17 2019// +/out.h/1.25/Fri Apr 3 11:34:19 2020// +/preconv.c/1.9/Thu Dec 13 11:55:14 2018// +/predefs.in/1.4/Fri Nov 28 19:25:03 2014// +/read.c/1.190/Fri Apr 24 11:58:02 2020// +/roff.c/1.246/Wed Apr 8 11:54:14 2020// +/roff.h/1.56/Wed Apr 8 11:54:14 2020// +/roff_html.c/1.20/Tue Apr 30 15:52:42 2019// +/roff_int.h/1.17/Fri Apr 24 11:58:02 2020// +/roff_term.c/1.19/Fri Jan 4 03:24:30 2019// +/roff_validate.c/1.19/Thu Feb 27 01:25:58 2020// +/st.c/1.13/Fri Dec 14 01:17:46 2018// +/tag.c/1.36/Sun Apr 19 16:26:11 2020// +/tag.h/1.14/Sat Apr 18 20:28:46 2020// +/tbl.c/1.27/Fri Dec 14 06:33:03 2018// +/tbl.h/1.5/Wed Dec 12 21:54:30 2018// +/tbl_data.c/1.40/Sat Jan 11 20:48:13 2020// +/tbl_html.c/1.28/Sun Mar 17 18:20:07 2019// +/tbl_int.h/1.2/Fri Dec 14 06:33:03 2018// +/tbl_layout.c/1.35/Fri Dec 14 05:17:45 2018// +/tbl_opts.c/1.16/Fri Dec 14 05:17:45 2018// +/tbl_parse.h/1.2/Fri Dec 14 06:33:03 2018// +/tbl_term.c/1.61/Sat Jan 11 16:24:33 2020// +/term.c/1.141/Mon Jun 3 20:23:39 2019// +/term.h/1.75/Fri Jan 4 03:20:44 2019// +/term_ascii.c/1.50/Fri Jul 19 21:45:37 2019// +/term_ps.c/1.55/Fri Nov 10 14:16:28 2017// +/term_tab.c/1.4/Sat Jun 17 14:55:02 2017// +/term_tag.c/1.4/Sat Apr 18 20:28:46 2020// +/term_tag.h/1.2/Thu Apr 2 22:10:27 2020// +/tree.c/1.56/Wed Apr 8 11:54:14 2020// +D diff --git a/usr.bin/mandoc/CVS/Repository b/usr.bin/mandoc/CVS/Repository new file mode 100644 index 0000000..3f4a4d0 --- /dev/null +++ b/usr.bin/mandoc/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/mandoc diff --git a/usr.bin/mandoc/CVS/Root b/usr.bin/mandoc/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/mandoc/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile new file mode 100644 index 0000000..8d6fec5 --- /dev/null +++ b/usr.bin/mandoc/Makefile @@ -0,0 +1,78 @@ +# $OpenBSD: Makefile,v 1.118 2020/03/13 00:31:04 schwarze Exp $ + +.include <bsd.own.mk> + +CFLAGS += -W -Wall -Wstrict-prototypes -Wno-unused-parameter +DPADD += ${LIBUTIL} +LDADD += -lutil -lz + +SRCS= mandoc_aux.c mandoc_ohash.c mandoc.c mandoc_msg.c mandoc_xr.c \ + arch.c chars.c msec.c preconv.c read.c tag.c +SRCS+= roff.c roff_validate.c tbl.c tbl_opts.c tbl_layout.c tbl_data.c eqn.c +SRCS+= mdoc.c mdoc_argv.c mdoc_macro.c mdoc_state.c mdoc_validate.c \ + att.c st.c +SRCS+= man_macro.c man.c man_validate.c +SRCS+= main.c out.c tree.c +SRCS+= term.c term_ascii.c term_ps.c term_tab.c term_tag.c +SRCS+= roff_term.c mdoc_term.c man_term.c eqn_term.c tbl_term.c +SRCS+= mdoc_man.c +SRCS+= html.c roff_html.c mdoc_html.c man_html.c eqn_html.c tbl_html.c +SRCS+= mdoc_markdown.c +SRCS+= dbm_map.c dbm.c dba_write.c dba_array.c dba.c dba_read.c +SRCS+= manpath.c mandocdb.c mansearch.c + +PROG= mandoc + +LINKS = ${BINDIR}/mandoc ${BINDIR}/apropos \ + ${BINDIR}/mandoc ${BINDIR}/help \ + ${BINDIR}/mandoc ${BINDIR}/man \ + ${BINDIR}/mandoc ${BINDIR}/whatis \ + ${BINDIR}/mandoc /usr/sbin/makewhatis \ + ${BINDIR}/mandoc /usr/libexec/makewhatis + +MAN = apropos.1 man.1 mandoc.1 man.conf.5 makewhatis.8 + +CLEANFILES += man.cgi cgi.o + +afterinstall: + install -o ${BINOWN} -g ${BINGRP} -m 444 \ + ${.CURDIR}/mandoc.css ${DESTDIR}/usr/share/misc + + +# ---------------------------------------------------------------------- +# Variables and targets to build and install man.cgi(8), +# not used during make build and make release. + +# To configure, run: cp cgi.h.example cgi.h; vi cgi.h +# To build, run: make man.cgi +# To install, run: sudo make installcgi +# After that, read: man man.cgi.8 + +LIBMDOC_OBJS = mdoc_argv.o mdoc_macro.o mdoc_state.o \ + mdoc_validate.o mdoc.o att.o st.o +LIBMAN_OBJS = man.o man_macro.o man_validate.o +LIBROFF_OBJS = roff.o roff_validate.o eqn.o \ + tbl.o tbl_data.o tbl_layout.o tbl_opts.o +LIBMANDOC_OBJS = ${LIBMDOC_OBJS} ${LIBMAN_OBJS} ${LIBROFF_OBJS} \ + arch.o mandoc.o mandoc_aux.o mandoc_msg.o mandoc_ohash.o \ + mandoc_xr.o chars.o msec.o preconv.o read.o tag.o +HTML_OBJS = html.o roff_html.o mdoc_html.o man_html.o \ + tbl_html.o eqn_html.o out.o +CGI_OBJS = ${LIBMANDOC_OBJS} ${HTML_OBJS} \ + dbm_map.o dbm.o mansearch.o cgi.o + +cgi.o: cgi.h main.h manconf.h mandoc.h mandoc_aux.h mandoc_parse.h \ + mansearch.h man.h mdoc.h roff.h + +man.cgi: ${CGI_OBJS} + ${CC} ${LDFLAGS} ${STATIC} -o ${.TARGET} ${CGI_OBJS} ${LDADD} + +installcgi: man.cgi + ${INSTALL} -d -o root -g wheel -m 755 ${DESTDIR}/var/www/cgi-bin + ${INSTALL} ${INSTALL_COPY} ${INSTALL_STRIP} \ + -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ + man.cgi ${DESTDIR}/var/www/cgi-bin/man.cgi + ${INSTALL} ${INSTALL_COPY} -o root -g wheel -m 644 \ + ${.CURDIR}/mandoc.css ${DESTDIR}/var/www/htdocs/ + +.include <bsd.prog.mk> diff --git a/usr.bin/mandoc/apropos.1 b/usr.bin/mandoc/apropos.1 new file mode 100644 index 0000000..401976f --- /dev/null +++ b/usr.bin/mandoc/apropos.1 @@ -0,0 +1,510 @@ +.\" $OpenBSD: apropos.1,v 1.41 2018/11/22 12:32:10 schwarze Exp $ +.\" +.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2011,2012,2014,2017,2018 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: November 22 2018 $ +.Dt APROPOS 1 +.Os +.Sh NAME +.Nm apropos , +.Nm whatis +.Nd search manual page databases +.Sh SYNOPSIS +.Nm +.Op Fl afk +.Op Fl C Ar file +.Op Fl M Ar path +.Op Fl m Ar path +.Op Fl O Ar outkey +.Op Fl S Ar arch +.Op Fl s Ar section +.Ar expression ... +.Sh DESCRIPTION +The +.Nm apropos +and +.Nm whatis +utilities query manual page databases generated by +.Xr makewhatis 8 , +evaluating +.Ar expression +for each file in each database. +By default, they display the names, section numbers, and description lines +of all matching manuals. +.Pp +By default, +.Nm +searches for +.Xr makewhatis 8 +databases in the default paths stipulated by +.Xr man 1 +and uses case-insensitive extended regular expression matching +over manual names and descriptions +.Pq the Li \&Nm No and Li \&Nd No macro keys . +Multiple terms imply pairwise +.Fl o . +.Pp +.Nm whatis +is a synonym for +.Nm +.Fl f . +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +Instead of showing only the title lines, show the complete manual pages, +just like +.Xr man 1 +.Fl a +would. +If the standard output is a terminal device and +.Fl c +is not specified, use +.Xr more 1 +to paginate them. +In +.Fl a +mode, the options +.Fl IKOTW +described in the +.Xr mandoc 1 +manual are also available. +.It Fl C Ar file +Specify an alternative configuration +.Ar file +in +.Xr man.conf 5 +format. +.It Fl f +Search for all words in +.Ar expression +in manual page names only. +The search is case-insensitive and matches whole words only. +In this mode, macro keys, comparison operators, and logical operators +are not available. +.It Fl k +Support the full +.Ar expression +syntax. +It is the default for +.Nm . +.It Fl M Ar path +Use the colon-separated path instead of the default list of paths +searched for +.Xr makewhatis 8 +databases. +Invalid paths, or paths without manual databases, are ignored. +.It Fl m Ar path +Prepend the colon-separated paths to the list of paths searched +for +.Xr makewhatis 8 +databases. +Invalid paths, or paths without manual databases, are ignored. +.It Fl O Ar outkey +Show the values associated with the key +.Ar outkey +instead of the manual descriptions. +.It Fl S Ar arch +Restrict the search to pages for the specified +.Xr machine 1 +architecture. +.Ar arch +is case-insensitive. +By default, pages for all architectures are shown. +.It Fl s Ar section +Restrict the search to the specified section of the manual. +By default, pages from all sections are shown. +See +.Xr man 1 +for a listing of sections. +.El +.Pp +The options +.Fl chlw +are also supported and are documented in +.Xr man 1 . +The options +.Fl fkl +are mutually exclusive and override each other. +.Pp +An +.Ar expression +consists of search terms joined by logical operators +.Fl a +.Pq and +and +.Fl o +.Pq or . +The +.Fl a +operator has precedence over +.Fl o +and both are evaluated left-to-right. +.Bl -tag -width Ds +.It \&( Ar expr No \&) +True if the subexpression +.Ar expr +is true. +.It Ar expr1 Fl a Ar expr2 +True if both +.Ar expr1 +and +.Ar expr2 +are true (logical +.Sq and ) . +.It Ar expr1 Oo Fl o Oc Ar expr2 +True if +.Ar expr1 +and/or +.Ar expr2 +evaluate to true (logical +.Sq or ) . +.It Ar term +True if +.Ar term +is satisfied. +This has syntax +.Sm off +.Oo +.Op Ar key Op , Ar key ... +.Pq Cm = | \(ti +.Oc +.Ar val , +.Sm on +where +.Ar key +is an +.Xr mdoc 7 +macro to query and +.Ar val +is its value. +See +.Sx Macro Keys +for a list of available keys. +Operator +.Cm = +evaluates a substring, while +.Cm \(ti +evaluates a case-sensitive extended regular expression. +.It Fl i Ar term +If +.Ar term +is a regular expression, it +is evaluated case-insensitively. +Has no effect on substring terms. +.El +.Pp +Results are sorted first according to the section number in ascending +numerical order, then by the page name in ascending +.Xr ascii 7 +alphabetical order, case-insensitive. +.Pp +Each output line is formatted as +.Pp +.D1 name[, name...](sec) \- description +.Pp +Where +.Dq name +is the manual's name, +.Dq sec +is the manual section, and +.Dq description +is the manual's short description. +If an architecture is specified for the manual, it is displayed as +.Pp +.D1 name(sec/arch) \- description +.Pp +Resulting manuals may be accessed as +.Pp +.Dl $ man \-s sec name +.Pp +If an architecture is specified in the output, use +.Pp +.Dl $ man \-s sec \-S arch name +.Ss Macro Keys +Queries evaluate over a subset of +.Xr mdoc 7 +macros indexed by +.Xr makewhatis 8 . +In addition to the macro keys listed below, the special key +.Cm any +may be used to match any available macro key. +.Pp +Names and description: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Nm Ta manual name +.It Li \&Nd Ta one-line manual description +.It Li arch Ta machine architecture (case-insensitive) +.It Li sec Ta manual section number +.El +.Pp +Sections and cross references: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Sh Ta section header (excluding standard sections) +.It Li \&Ss Ta subsection header +.It Li \&Xr Ta cross reference to another manual page +.It Li \&Rs Ta bibliographic reference +.El +.Pp +Semantic markup for command line utilities: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Fl Ta command line options (flags) +.It Li \&Cm Ta command modifier +.It Li \&Ar Ta command argument +.It Li \&Ic Ta internal or interactive command +.It Li \&Ev Ta environmental variable +.It Li \&Pa Ta file system path +.El +.Pp +Semantic markup for function libraries: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Lb Ta function library name +.It Li \&In Ta include file +.It Li \&Ft Ta function return type +.It Li \&Fn Ta function name +.It Li \&Fa Ta function argument type and name +.It Li \&Vt Ta variable type +.It Li \&Va Ta variable name +.It Li \&Dv Ta defined variable or preprocessor constant +.It Li \&Er Ta error constant +.It Li \&Ev Ta environmental variable +.El +.Pp +Various semantic markup: +.Bl -column "xLix" description -offset indent -compact +.It Li \&An Ta author name +.It Li \&Lk Ta hyperlink +.It Li \&Mt Ta Do mailto Dc hyperlink +.It Li \&Cd Ta kernel configuration declaration +.It Li \&Ms Ta mathematical symbol +.It Li \&Tn Ta tradename +.El +.Pp +Physical markup: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Em Ta italic font or underline +.It Li \&Sy Ta boldface font +.It Li \&Li Ta typewriter font +.El +.Pp +Text production: +.Bl -column "xLix" description -offset indent -compact +.It Li \&St Ta reference to a standards document +.It Li \&At Ta At No version reference +.It Li \&Bx Ta Bx No version reference +.It Li \&Bsx Ta Bsx No version reference +.It Li \&Nx Ta Nx No version reference +.It Li \&Fx Ta Fx No version reference +.It Li \&Ox Ta Ox No version reference +.It Li \&Dx Ta Dx No version reference +.El +.Pp +In general, macro keys are supposed to yield complete results without +expecting the user to consider actual macro usage. +For example, results include: +.Pp +.Bl -tag -width 3n -offset 3n -compact +.It Li \&Fa +function arguments appearing on +.Ic \&Fn +lines +.It Li \&Fn +function names marked up with +.Ic \&Fo +macros +.It Li \&In +include file names marked up with +.Ic \&Fd +macros +.It Li \&Vt +types appearing as function return types and +.It \& +types appearing in function arguments in the SYNOPSIS +.El +.Sh ENVIRONMENT +.Bl -tag -width MANPAGER +.It Ev MANPAGER +Any non-empty value of the environment variable +.Ev MANPAGER +is used instead of the standard pagination program, +.Xr more 1 ; +see +.Xr man 1 +for details. +Only used if +.Fl a +or +.Fl l +is specified. +.It Ev MANPATH +A colon-separated list of directories to search for manual pages; see +.Xr man 1 +for details. +Overridden by +.Fl M , +ignored if +.Fl l +is specified. +.It Ev PAGER +Specifies the pagination program to use when +.Ev MANPAGER +is not defined. +If neither PAGER nor MANPAGER is defined, +.Xr more 1 +.Fl s +is used. +Only used if +.Fl a +or +.Fl l +is specified. +.El +.Sh FILES +.Bl -tag -width "/etc/man.conf" -compact +.It Pa mandoc.db +name of the +.Xr makewhatis 8 +keyword database +.It Pa /etc/man.conf +default +.Xr man 1 +configuration file +.El +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Search for +.Qq .cf +as a substring of manual names and descriptions: +.Pp +.Dl $ apropos =.cf +.Pp +Include matches for +.Qq .cnf +and +.Qq .conf +as well: +.Pp +.Dl $ apropos =.cf =.cnf =.conf +.Pp +Search in names and descriptions using a case-sensitive regular expression: +.Pp +.Dl $ apropos \(aq\(tiset.?[ug]id\(aq +.Pp +Search for manuals in the library section mentioning both the +.Qq optind +and the +.Qq optarg +variables: +.Pp +.Dl $ apropos \-s 3 Va=optind \-a Va=optarg +.Pp +Do exactly the same as calling +.Nm whatis +with the argument +.Qq ssh : +.Pp +.Dl $ apropos \-\- \-i \(aqNm\(ti[[:<:]]ssh[[:>:]]\(aq +.Pp +The following two invocations are equivalent: +.Pp +.D1 Li $ apropos -S Ar arch Li -s Ar section expression +.Bd -ragged -offset indent +.Li $ apropos \e( Ar expression Li \e) +.Li -a arch\(ti^( Ns Ar arch Ns Li |any)$ +.Li -a sec\(ti^ Ns Ar section Ns Li $ +.Ed +.Sh SEE ALSO +.Xr man 1 , +.Xr re_format 7 , +.Xr makewhatis 8 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification of +.Xr man 1 +.Fl k . +.Pp +All options, the +.Nm whatis +command, support for logical operators, macro keys, +substring matching, sorting of results, the environment variables +.Ev MANPAGER +and +.Ev MANPATH , +the database format, and the configuration file +are extensions to that specification. +.Sh HISTORY +Part of the functionality of +.Nm whatis +was already provided by the former +.Nm manwhere +utility in +.Bx 1 . +The +.Nm +and +.Nm whatis +utilities first appeared in +.Bx 2 . +They were rewritten from scratch for +.Ox 5.6 . +.Pp +The +.Fl M +option and the +.Ev MANPATH +variable first appeared in +.Bx 4.3 ; +.Fl m +in +.Bx 4.3 Reno ; +.Fl C +in +.Bx 4.4 Lite1 ; +and +.Fl S +and +.Fl s +in +.Ox 4.5 +for +.Nm +and in +.Ox 5.6 +for +.Nm whatis . +The options +.Fl acfhIKklOTWw +appeared in +.Ox 5.7 . +.Sh AUTHORS +.An -nosplit +.An Bill Joy +wrote +.Nm manwhere +in 1977 and the original +.Bx +.Nm +and +.Nm whatis +in February 1979. +The current version was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/usr.bin/mandoc/arch.c b/usr.bin/mandoc/arch.c new file mode 100644 index 0000000..68a20bb --- /dev/null +++ b/usr.bin/mandoc/arch.c @@ -0,0 +1,52 @@ +/* $OpenBSD: arch.c,v 1.11 2019/05/11 07:18:17 deraadt Exp $ */ +/* + * Copyright (c) 2017, 2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <string.h> + +#include "roff.h" + +int +arch_valid(const char *arch, enum mandoc_os os) +{ + const char *openbsd_arch[] = { + "alpha", "amd64", "arm64", "armv7", "hppa", "i386", + "landisk", "loongson", "luna88k", "macppc", "mips64", + "octeon", "sgi", "sparc64", NULL + }; + const char *netbsd_arch[] = { + "acorn26", "acorn32", "algor", "alpha", "amiga", + "arc", "atari", + "bebox", "cats", "cesfic", "cobalt", "dreamcast", + "emips", "evbarm", "evbmips", "evbppc", "evbsh3", "evbsh5", + "hp300", "hpcarm", "hpcmips", "hpcsh", "hppa", + "i386", "ibmnws", "luna68k", + "mac68k", "macppc", "mipsco", "mmeye", "mvme68k", "mvmeppc", + "netwinder", "news68k", "newsmips", "next68k", + "pc532", "playstation2", "pmax", "pmppc", "prep", + "sandpoint", "sbmips", "sgimips", "shark", + "sparc", "sparc64", "sun2", "sun3", + "vax", "walnut", "x68k", "x86", "x86_64", "xen", NULL + }; + const char **arches[] = { NULL, netbsd_arch, openbsd_arch }; + const char **arch_p; + + if ((arch_p = arches[os]) == NULL) + return 1; + for (; *arch_p != NULL; arch_p++) + if (strcmp(*arch_p, arch) == 0) + return 1; + return 0; +} diff --git a/usr.bin/mandoc/att.c b/usr.bin/mandoc/att.c new file mode 100644 index 0000000..85c184e --- /dev/null +++ b/usr.bin/mandoc/att.c @@ -0,0 +1,47 @@ +/* $OpenBSD: att.c,v 1.14 2018/12/13 11:55:14 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <string.h> + +#include "roff.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y) + + +const char * +mdoc_a2att(const char *p) +{ + + LINE("v1", "Version\\~1 AT&T UNIX"); + LINE("v2", "Version\\~2 AT&T UNIX"); + LINE("v3", "Version\\~3 AT&T UNIX"); + LINE("v4", "Version\\~4 AT&T UNIX"); + LINE("v5", "Version\\~5 AT&T UNIX"); + LINE("v6", "Version\\~6 AT&T UNIX"); + LINE("v7", "Version\\~7 AT&T UNIX"); + LINE("32v", "Version\\~32V AT&T UNIX"); + LINE("III", "AT&T System\\~III UNIX"); + LINE("V", "AT&T System\\~V UNIX"); + LINE("V.1", "AT&T System\\~V Release\\~1 UNIX"); + LINE("V.2", "AT&T System\\~V Release\\~2 UNIX"); + LINE("V.3", "AT&T System\\~V Release\\~3 UNIX"); + LINE("V.4", "AT&T System\\~V Release\\~4 UNIX"); + + return NULL; +} diff --git a/usr.bin/mandoc/cgi.c b/usr.bin/mandoc/cgi.c new file mode 100644 index 0000000..766ac06 --- /dev/null +++ b/usr.bin/mandoc/cgi.c @@ -0,0 +1,1255 @@ +/* $OpenBSD: cgi.c,v 1.110 2020/04/03 11:34:19 schwarze Exp $ */ +/* + * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> + * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Implementation of the man.cgi(8) program. + */ +#include <sys/types.h> +#include <sys/time.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "mandoc_parse.h" +#include "main.h" +#include "manconf.h" +#include "mansearch.h" +#include "cgi.h" + +/* + * A query as passed to the search function. + */ +struct query { + char *manpath; /* desired manual directory */ + char *arch; /* architecture */ + char *sec; /* manual section */ + char *query; /* unparsed query expression */ + int equal; /* match whole names, not substrings */ +}; + +struct req { + struct query q; + char **p; /* array of available manpaths */ + size_t psz; /* number of available manpaths */ + int isquery; /* QUERY_STRING used, not PATH_INFO */ +}; + +enum focus { + FOCUS_NONE = 0, + FOCUS_QUERY +}; + +static void html_print(const char *); +static void html_putchar(char); +static int http_decode(char *); +static void http_encode(const char *); +static void parse_manpath_conf(struct req *); +static void parse_path_info(struct req *, const char *); +static void parse_query_string(struct req *, const char *); +static void pg_error_badrequest(const char *); +static void pg_error_internal(void); +static void pg_index(const struct req *); +static void pg_noresult(const struct req *, int, const char *, + const char *); +static void pg_redirect(const struct req *, const char *); +static void pg_search(const struct req *); +static void pg_searchres(const struct req *, + struct manpage *, size_t); +static void pg_show(struct req *, const char *); +static void resp_begin_html(int, const char *, const char *); +static void resp_begin_http(int, const char *); +static void resp_catman(const struct req *, const char *); +static void resp_copy(const char *); +static void resp_end_html(void); +static void resp_format(const struct req *, const char *); +static void resp_searchform(const struct req *, enum focus); +static void resp_show(const struct req *, const char *); +static void set_query_attr(char **, char **); +static int validate_arch(const char *); +static int validate_filename(const char *); +static int validate_manpath(const struct req *, const char *); +static int validate_urifrag(const char *); + +static const char *scriptname = SCRIPT_NAME; + +static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; +static const char *const sec_numbers[] = { + "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" +}; +static const char *const sec_names[] = { + "All Sections", + "1 - General Commands", + "2 - System Calls", + "3 - Library Functions", + "3p - Perl Library", + "4 - Device Drivers", + "5 - File Formats", + "6 - Games", + "7 - Miscellaneous Information", + "8 - System Manager\'s Manual", + "9 - Kernel Developer\'s Manual" +}; +static const int sec_MAX = sizeof(sec_names) / sizeof(char *); + +static const char *const arch_names[] = { + "amd64", "alpha", "armv7", "arm64", + "hppa", "i386", "landisk", + "loongson", "luna88k", "macppc", "mips64", + "octeon", "sgi", "socppc", "sparc64", + "amiga", "arc", "armish", "arm32", + "atari", "aviion", "beagle", "cats", + "hppa64", "hp300", + "ia64", "mac68k", "mvme68k", "mvme88k", + "mvmeppc", "palm", "pc532", "pegasos", + "pmax", "powerpc", "solbourne", "sparc", + "sun3", "vax", "wgrisc", "x68k", + "zaurus" +}; +static const int arch_MAX = sizeof(arch_names) / sizeof(char *); + +/* + * Print a character, escaping HTML along the way. + * This will pass non-ASCII straight to output: be warned! + */ +static void +html_putchar(char c) +{ + + switch (c) { + case '"': + printf("""); + break; + case '&': + printf("&"); + break; + case '>': + printf(">"); + break; + case '<': + printf("<"); + break; + default: + putchar((unsigned char)c); + break; + } +} + +/* + * Call through to html_putchar(). + * Accepts NULL strings. + */ +static void +html_print(const char *p) +{ + + if (NULL == p) + return; + while ('\0' != *p) + html_putchar(*p++); +} + +/* + * Transfer the responsibility for the allocated string *val + * to the query structure. + */ +static void +set_query_attr(char **attr, char **val) +{ + + free(*attr); + if (**val == '\0') { + *attr = NULL; + free(*val); + } else + *attr = *val; + *val = NULL; +} + +/* + * Parse the QUERY_STRING for key-value pairs + * and store the values into the query structure. + */ +static void +parse_query_string(struct req *req, const char *qs) +{ + char *key, *val; + size_t keysz, valsz; + + req->isquery = 1; + req->q.manpath = NULL; + req->q.arch = NULL; + req->q.sec = NULL; + req->q.query = NULL; + req->q.equal = 1; + + key = val = NULL; + while (*qs != '\0') { + + /* Parse one key. */ + + keysz = strcspn(qs, "=;&"); + key = mandoc_strndup(qs, keysz); + qs += keysz; + if (*qs != '=') + goto next; + + /* Parse one value. */ + + valsz = strcspn(++qs, ";&"); + val = mandoc_strndup(qs, valsz); + qs += valsz; + + /* Decode and catch encoding errors. */ + + if ( ! (http_decode(key) && http_decode(val))) + goto next; + + /* Handle key-value pairs. */ + + if ( ! strcmp(key, "query")) + set_query_attr(&req->q.query, &val); + + else if ( ! strcmp(key, "apropos")) + req->q.equal = !strcmp(val, "0"); + + else if ( ! strcmp(key, "manpath")) { +#ifdef COMPAT_OLDURI + if ( ! strncmp(val, "OpenBSD ", 8)) { + val[7] = '-'; + if ('C' == val[8]) + val[8] = 'c'; + } +#endif + set_query_attr(&req->q.manpath, &val); + } + + else if ( ! (strcmp(key, "sec") +#ifdef COMPAT_OLDURI + && strcmp(key, "sektion") +#endif + )) { + if ( ! strcmp(val, "0")) + *val = '\0'; + set_query_attr(&req->q.sec, &val); + } + + else if ( ! strcmp(key, "arch")) { + if ( ! strcmp(val, "default")) + *val = '\0'; + set_query_attr(&req->q.arch, &val); + } + + /* + * The key must be freed in any case. + * The val may have been handed over to the query + * structure, in which case it is now NULL. + */ +next: + free(key); + key = NULL; + free(val); + val = NULL; + + if (*qs != '\0') + qs++; + } +} + +/* + * HTTP-decode a string. The standard explanation is that this turns + * "%4e+foo" into "n foo" in the regular way. This is done in-place + * over the allocated string. + */ +static int +http_decode(char *p) +{ + char hex[3]; + char *q; + int c; + + hex[2] = '\0'; + + q = p; + for ( ; '\0' != *p; p++, q++) { + if ('%' == *p) { + if ('\0' == (hex[0] = *(p + 1))) + return 0; + if ('\0' == (hex[1] = *(p + 2))) + return 0; + if (1 != sscanf(hex, "%x", &c)) + return 0; + if ('\0' == c) + return 0; + + *q = (char)c; + p += 2; + } else + *q = '+' == *p ? ' ' : *p; + } + + *q = '\0'; + return 1; +} + +static void +http_encode(const char *p) +{ + for (; *p != '\0'; p++) { + if (isalnum((unsigned char)*p) == 0 && + strchr("-._~", *p) == NULL) + printf("%%%2.2X", (unsigned char)*p); + else + putchar(*p); + } +} + +static void +resp_begin_http(int code, const char *msg) +{ + + if (200 != code) + printf("Status: %d %s\r\n", code, msg); + + printf("Content-Type: text/html; charset=utf-8\r\n" + "Cache-Control: no-cache\r\n" + "Content-Security-Policy: default-src 'none'; " + "style-src 'self' 'unsafe-inline'\r\n" + "Pragma: no-cache\r\n" + "\r\n"); + + fflush(stdout); +} + +static void +resp_copy(const char *filename) +{ + char buf[4096]; + ssize_t sz; + int fd; + + if ((fd = open(filename, O_RDONLY)) != -1) { + fflush(stdout); + while ((sz = read(fd, buf, sizeof(buf))) > 0) + write(STDOUT_FILENO, buf, sz); + close(fd); + } +} + +static void +resp_begin_html(int code, const char *msg, const char *file) +{ + char *cp; + + resp_begin_http(code, msg); + + printf("<!DOCTYPE html>\n" + "<html>\n" + "<head>\n" + " <meta charset=\"UTF-8\"/>\n" + " <meta name=\"viewport\"" + " content=\"width=device-width, initial-scale=1.0\">\n" + " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" + " type=\"text/css\" media=\"all\">\n" + " <title>", + CSS_DIR); + if (file != NULL) { + if ((cp = strrchr(file, '/')) != NULL) + file = cp + 1; + if ((cp = strrchr(file, '.')) != NULL) { + printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); + } else + printf("%s - ", file); + } + printf("%s</title>\n" + "</head>\n" + "<body>\n", + CUSTOMIZE_TITLE); + + resp_copy(MAN_DIR "/header.html"); +} + +static void +resp_end_html(void) +{ + + resp_copy(MAN_DIR "/footer.html"); + + puts("</body>\n" + "</html>"); +} + +static void +resp_searchform(const struct req *req, enum focus focus) +{ + int i; + + printf("<form action=\"/%s\" method=\"get\" " + "autocomplete=\"off\" autocapitalize=\"none\">\n" + " <fieldset>\n" + " <legend>Manual Page Search Parameters</legend>\n", + scriptname); + + /* Write query input box. */ + + printf(" <input type=\"search\" name=\"query\" value=\""); + if (req->q.query != NULL) + html_print(req->q.query); + printf( "\" size=\"40\""); + if (focus == FOCUS_QUERY) + printf(" autofocus"); + puts(">"); + + /* Write submission buttons. */ + + printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" + "man</button>\n" + " <button type=\"submit\" name=\"apropos\" value=\"1\">" + "apropos</button>\n" + " <br/>\n"); + + /* Write section selector. */ + + puts(" <select name=\"sec\">"); + for (i = 0; i < sec_MAX; i++) { + printf(" <option value=\"%s\"", sec_numbers[i]); + if (NULL != req->q.sec && + 0 == strcmp(sec_numbers[i], req->q.sec)) + printf(" selected=\"selected\""); + printf(">%s</option>\n", sec_names[i]); + } + puts(" </select>"); + + /* Write architecture selector. */ + + printf( " <select name=\"arch\">\n" + " <option value=\"default\""); + if (NULL == req->q.arch) + printf(" selected=\"selected\""); + puts(">All Architectures</option>"); + for (i = 0; i < arch_MAX; i++) { + printf(" <option"); + if (NULL != req->q.arch && + 0 == strcmp(arch_names[i], req->q.arch)) + printf(" selected=\"selected\""); + printf(">%s</option>\n", arch_names[i]); + } + puts(" </select>"); + + /* Write manpath selector. */ + + if (req->psz > 1) { + puts(" <select name=\"manpath\">"); + for (i = 0; i < (int)req->psz; i++) { + printf(" <option"); + if (strcmp(req->q.manpath, req->p[i]) == 0) + printf(" selected=\"selected\""); + printf(">"); + html_print(req->p[i]); + puts("</option>"); + } + puts(" </select>"); + } + + puts(" </fieldset>\n" + "</form>"); +} + +static int +validate_urifrag(const char *frag) +{ + + while ('\0' != *frag) { + if ( ! (isalnum((unsigned char)*frag) || + '-' == *frag || '.' == *frag || + '/' == *frag || '_' == *frag)) + return 0; + frag++; + } + return 1; +} + +static int +validate_manpath(const struct req *req, const char* manpath) +{ + size_t i; + + for (i = 0; i < req->psz; i++) + if ( ! strcmp(manpath, req->p[i])) + return 1; + + return 0; +} + +static int +validate_arch(const char *arch) +{ + int i; + + for (i = 0; i < arch_MAX; i++) + if (strcmp(arch, arch_names[i]) == 0) + return 1; + + return 0; +} + +static int +validate_filename(const char *file) +{ + + if ('.' == file[0] && '/' == file[1]) + file += 2; + + return ! (strstr(file, "../") || strstr(file, "/..") || + (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); +} + +static void +pg_index(const struct req *req) +{ + + resp_begin_html(200, NULL, NULL); + resp_searchform(req, FOCUS_QUERY); + printf("<p>\n" + "This web interface is documented in the\n" + "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" + "manual, and the\n" + "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" + "manual explains the query syntax.\n" + "</p>\n", + scriptname, *scriptname == '\0' ? "" : "/", + scriptname, *scriptname == '\0' ? "" : "/"); + resp_end_html(); +} + +static void +pg_noresult(const struct req *req, int code, const char *http_msg, + const char *user_msg) +{ + resp_begin_html(code, http_msg, NULL); + resp_searchform(req, FOCUS_QUERY); + puts("<p>"); + puts(user_msg); + puts("</p>"); + resp_end_html(); +} + +static void +pg_error_badrequest(const char *msg) +{ + + resp_begin_html(400, "Bad Request", NULL); + puts("<h1>Bad Request</h1>\n" + "<p>\n"); + puts(msg); + printf("Try again from the\n" + "<a href=\"/%s\">main page</a>.\n" + "</p>", scriptname); + resp_end_html(); +} + +static void +pg_error_internal(void) +{ + resp_begin_html(500, "Internal Server Error", NULL); + puts("<p>Internal Server Error</p>"); + resp_end_html(); +} + +static void +pg_redirect(const struct req *req, const char *name) +{ + printf("Status: 303 See Other\r\n" + "Location: /"); + if (*scriptname != '\0') + printf("%s/", scriptname); + if (strcmp(req->q.manpath, req->p[0])) + printf("%s/", req->q.manpath); + if (req->q.arch != NULL) + printf("%s/", req->q.arch); + http_encode(name); + if (req->q.sec != NULL) { + putchar('.'); + http_encode(req->q.sec); + } + printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); +} + +static void +pg_searchres(const struct req *req, struct manpage *r, size_t sz) +{ + char *arch, *archend; + const char *sec; + size_t i, iuse; + int archprio, archpriouse; + int prio, priouse; + + for (i = 0; i < sz; i++) { + if (validate_filename(r[i].file)) + continue; + warnx("invalid filename %s in %s database", + r[i].file, req->q.manpath); + pg_error_internal(); + return; + } + + if (req->isquery && sz == 1) { + /* + * If we have just one result, then jump there now + * without any delay. + */ + printf("Status: 303 See Other\r\n" + "Location: /"); + if (*scriptname != '\0') + printf("%s/", scriptname); + if (strcmp(req->q.manpath, req->p[0])) + printf("%s/", req->q.manpath); + printf("%s\r\n" + "Content-Type: text/html; charset=utf-8\r\n\r\n", + r[0].file); + return; + } + + /* + * In man(1) mode, show one of the pages + * even if more than one is found. + */ + + iuse = 0; + if (req->q.equal || sz == 1) { + priouse = 20; + archpriouse = 3; + for (i = 0; i < sz; i++) { + sec = r[i].file; + sec += strcspn(sec, "123456789"); + if (sec[0] == '\0') + continue; + prio = sec_prios[sec[0] - '1']; + if (sec[1] != '/') + prio += 10; + if (req->q.arch == NULL) { + archprio = + ((arch = strchr(sec + 1, '/')) + == NULL) ? 3 : + ((archend = strchr(arch + 1, '/')) + == NULL) ? 0 : + strncmp(arch, "amd64/", + archend - arch) ? 2 : 1; + if (archprio < archpriouse) { + archpriouse = archprio; + priouse = prio; + iuse = i; + continue; + } + if (archprio > archpriouse) + continue; + } + if (prio >= priouse) + continue; + priouse = prio; + iuse = i; + } + resp_begin_html(200, NULL, r[iuse].file); + } else + resp_begin_html(200, NULL, NULL); + + resp_searchform(req, + req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); + + if (sz > 1) { + puts("<table class=\"results\">"); + for (i = 0; i < sz; i++) { + printf(" <tr>\n" + " <td>" + "<a class=\"Xr\" href=\"/"); + if (*scriptname != '\0') + printf("%s/", scriptname); + if (strcmp(req->q.manpath, req->p[0])) + printf("%s/", req->q.manpath); + printf("%s\">", r[i].file); + html_print(r[i].names); + printf("</a></td>\n" + " <td><span class=\"Nd\">"); + html_print(r[i].output); + puts("</span></td>\n" + " </tr>"); + } + puts("</table>"); + } + + if (req->q.equal || sz == 1) { + puts("<hr>"); + resp_show(req, r[iuse].file); + } + + resp_end_html(); +} + +static void +resp_catman(const struct req *req, const char *file) +{ + FILE *f; + char *p; + size_t sz; + ssize_t len; + int i; + int italic, bold; + + if ((f = fopen(file, "r")) == NULL) { + puts("<p>You specified an invalid manual file.</p>"); + return; + } + + puts("<div class=\"catman\">\n" + "<pre>"); + + p = NULL; + sz = 0; + + while ((len = getline(&p, &sz, f)) != -1) { + bold = italic = 0; + for (i = 0; i < len - 1; i++) { + /* + * This means that the catpage is out of state. + * Ignore it and keep going (although the + * catpage is bogus). + */ + + if ('\b' == p[i] || '\n' == p[i]) + continue; + + /* + * Print a regular character. + * Close out any bold/italic scopes. + * If we're in back-space mode, make sure we'll + * have something to enter when we backspace. + */ + + if ('\b' != p[i + 1]) { + if (italic) + printf("</i>"); + if (bold) + printf("</b>"); + italic = bold = 0; + html_putchar(p[i]); + continue; + } else if (i + 2 >= len) + continue; + + /* Italic mode. */ + + if ('_' == p[i]) { + if (bold) + printf("</b>"); + if ( ! italic) + printf("<i>"); + bold = 0; + italic = 1; + i += 2; + html_putchar(p[i]); + continue; + } + + /* + * Handle funny behaviour troff-isms. + * These grok'd from the original man2html.c. + */ + + if (('+' == p[i] && 'o' == p[i + 2]) || + ('o' == p[i] && '+' == p[i + 2]) || + ('|' == p[i] && '=' == p[i + 2]) || + ('=' == p[i] && '|' == p[i + 2]) || + ('*' == p[i] && '=' == p[i + 2]) || + ('=' == p[i] && '*' == p[i + 2]) || + ('*' == p[i] && '|' == p[i + 2]) || + ('|' == p[i] && '*' == p[i + 2])) { + if (italic) + printf("</i>"); + if (bold) + printf("</b>"); + italic = bold = 0; + putchar('*'); + i += 2; + continue; + } else if (('|' == p[i] && '-' == p[i + 2]) || + ('-' == p[i] && '|' == p[i + 1]) || + ('+' == p[i] && '-' == p[i + 1]) || + ('-' == p[i] && '+' == p[i + 1]) || + ('+' == p[i] && '|' == p[i + 1]) || + ('|' == p[i] && '+' == p[i + 1])) { + if (italic) + printf("</i>"); + if (bold) + printf("</b>"); + italic = bold = 0; + putchar('+'); + i += 2; + continue; + } + + /* Bold mode. */ + + if (italic) + printf("</i>"); + if ( ! bold) + printf("<b>"); + bold = 1; + italic = 0; + i += 2; + html_putchar(p[i]); + } + + /* + * Clean up the last character. + * We can get to a newline; don't print that. + */ + + if (italic) + printf("</i>"); + if (bold) + printf("</b>"); + + if (i == len - 1 && p[i] != '\n') + html_putchar(p[i]); + + putchar('\n'); + } + free(p); + + puts("</pre>\n" + "</div>"); + + fclose(f); +} + +static void +resp_format(const struct req *req, const char *file) +{ + struct manoutput conf; + struct mparse *mp; + struct roff_meta *meta; + void *vp; + int fd; + int usepath; + + if (-1 == (fd = open(file, O_RDONLY, 0))) { + puts("<p>You specified an invalid manual file.</p>"); + return; + } + + mchars_alloc(); + mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | + MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); + mparse_readfd(mp, fd, file); + close(fd); + meta = mparse_result(mp); + + memset(&conf, 0, sizeof(conf)); + conf.fragment = 1; + conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); + usepath = strcmp(req->q.manpath, req->p[0]); + mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", + scriptname, *scriptname == '\0' ? "" : "/", + usepath ? req->q.manpath : "", usepath ? "/" : ""); + + vp = html_alloc(&conf); + if (meta->macroset == MACROSET_MDOC) + html_mdoc(vp, meta); + else + html_man(vp, meta); + + html_free(vp); + mparse_free(mp); + mchars_free(); + free(conf.man); + free(conf.style); +} + +static void +resp_show(const struct req *req, const char *file) +{ + + if ('.' == file[0] && '/' == file[1]) + file += 2; + + if ('c' == *file) + resp_catman(req, file); + else + resp_format(req, file); +} + +static void +pg_show(struct req *req, const char *fullpath) +{ + char *manpath; + const char *file; + + if ((file = strchr(fullpath, '/')) == NULL) { + pg_error_badrequest( + "You did not specify a page to show."); + return; + } + manpath = mandoc_strndup(fullpath, file - fullpath); + file++; + + if ( ! validate_manpath(req, manpath)) { + pg_error_badrequest( + "You specified an invalid manpath."); + free(manpath); + return; + } + + /* + * Begin by chdir()ing into the manpath. + * This way we can pick up the database files, which are + * relative to the manpath root. + */ + + if (chdir(manpath) == -1) { + warn("chdir %s", manpath); + pg_error_internal(); + free(manpath); + return; + } + free(manpath); + + if ( ! validate_filename(file)) { + pg_error_badrequest( + "You specified an invalid manual file."); + return; + } + + resp_begin_html(200, NULL, file); + resp_searchform(req, FOCUS_NONE); + resp_show(req, file); + resp_end_html(); +} + +static void +pg_search(const struct req *req) +{ + struct mansearch search; + struct manpaths paths; + struct manpage *res; + char **argv; + char *query, *rp, *wp; + size_t ressz; + int argc; + + /* + * Begin by chdir()ing into the root of the manpath. + * This way we can pick up the database files, which are + * relative to the manpath root. + */ + + if (chdir(req->q.manpath) == -1) { + warn("chdir %s", req->q.manpath); + pg_error_internal(); + return; + } + + search.arch = req->q.arch; + search.sec = req->q.sec; + search.outkey = "Nd"; + search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; + search.firstmatch = 1; + + paths.sz = 1; + paths.paths = mandoc_malloc(sizeof(char *)); + paths.paths[0] = mandoc_strdup("."); + + /* + * Break apart at spaces with backslash-escaping. + */ + + argc = 0; + argv = NULL; + rp = query = mandoc_strdup(req->q.query); + for (;;) { + while (isspace((unsigned char)*rp)) + rp++; + if (*rp == '\0') + break; + argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); + argv[argc++] = wp = rp; + for (;;) { + if (isspace((unsigned char)*rp)) { + *wp = '\0'; + rp++; + break; + } + if (rp[0] == '\\' && rp[1] != '\0') + rp++; + if (wp != rp) + *wp = *rp; + if (*rp == '\0') + break; + wp++; + rp++; + } + } + + res = NULL; + ressz = 0; + if (req->isquery && req->q.equal && argc == 1) + pg_redirect(req, argv[0]); + else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) + pg_noresult(req, 400, "Bad Request", + "You entered an invalid query."); + else if (ressz == 0) + pg_noresult(req, 404, "Not Found", "No results found."); + else + pg_searchres(req, res, ressz); + + free(query); + mansearch_free(res, ressz); + free(paths.paths[0]); + free(paths.paths); +} + +int +main(void) +{ + struct req req; + struct itimerval itimer; + const char *path; + const char *querystring; + int i; + + /* + * The "rpath" pledge could be revoked after mparse_readfd() + * if the file desciptor to "/footer.html" would be opened + * up front, but it's probably not worth the complication + * of the code it would cause: it would require scattering + * pledge() calls in multiple low-level resp_*() functions. + */ + + if (pledge("stdio rpath", NULL) == -1) { + warn("pledge"); + pg_error_internal(); + return EXIT_FAILURE; + } + + /* Poor man's ReDoS mitigation. */ + + itimer.it_value.tv_sec = 2; + itimer.it_value.tv_usec = 0; + itimer.it_interval.tv_sec = 2; + itimer.it_interval.tv_usec = 0; + if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { + warn("setitimer"); + pg_error_internal(); + return EXIT_FAILURE; + } + + /* + * First we change directory into the MAN_DIR so that + * subsequent scanning for manpath directories is rooted + * relative to the same position. + */ + + if (chdir(MAN_DIR) == -1) { + warn("MAN_DIR: %s", MAN_DIR); + pg_error_internal(); + return EXIT_FAILURE; + } + + memset(&req, 0, sizeof(struct req)); + req.q.equal = 1; + parse_manpath_conf(&req); + + /* Parse the path info and the query string. */ + + if ((path = getenv("PATH_INFO")) == NULL) + path = ""; + else if (*path == '/') + path++; + + if (*path != '\0') { + parse_path_info(&req, path); + if (req.q.manpath == NULL || req.q.sec == NULL || + *req.q.query == '\0' || access(path, F_OK) == -1) + path = ""; + } else if ((querystring = getenv("QUERY_STRING")) != NULL) + parse_query_string(&req, querystring); + + /* Validate parsed data and add defaults. */ + + if (req.q.manpath == NULL) + req.q.manpath = mandoc_strdup(req.p[0]); + else if ( ! validate_manpath(&req, req.q.manpath)) { + pg_error_badrequest( + "You specified an invalid manpath."); + return EXIT_FAILURE; + } + + if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { + pg_error_badrequest( + "You specified an invalid architecture."); + return EXIT_FAILURE; + } + + /* Dispatch to the three different pages. */ + + if ('\0' != *path) + pg_show(&req, path); + else if (NULL != req.q.query) + pg_search(&req); + else + pg_index(&req); + + free(req.q.manpath); + free(req.q.arch); + free(req.q.sec); + free(req.q.query); + for (i = 0; i < (int)req.psz; i++) + free(req.p[i]); + free(req.p); + return EXIT_SUCCESS; +} + +/* + * Translate PATH_INFO to a query. + */ +static void +parse_path_info(struct req *req, const char *path) +{ + const char *name, *sec, *end; + + req->isquery = 0; + req->q.equal = 1; + req->q.manpath = NULL; + req->q.arch = NULL; + + /* Mandatory manual page name. */ + if ((name = strrchr(path, '/')) == NULL) + name = path; + else + name++; + + /* Optional trailing section. */ + sec = strrchr(name, '.'); + if (sec != NULL && isdigit((unsigned char)*++sec)) { + req->q.query = mandoc_strndup(name, sec - name - 1); + req->q.sec = mandoc_strdup(sec); + } else { + req->q.query = mandoc_strdup(name); + req->q.sec = NULL; + } + + /* Handle the case of name[.section] only. */ + if (name == path) + return; + + /* Optional manpath. */ + end = strchr(path, '/'); + req->q.manpath = mandoc_strndup(path, end - path); + if (validate_manpath(req, req->q.manpath)) { + path = end + 1; + if (name == path) + return; + } else { + free(req->q.manpath); + req->q.manpath = NULL; + } + + /* Optional section. */ + if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { + path += 3; + end = strchr(path, '/'); + free(req->q.sec); + req->q.sec = mandoc_strndup(path, end - path); + path = end + 1; + if (name == path) + return; + } + + /* Optional architecture. */ + end = strchr(path, '/'); + if (end + 1 != name) { + pg_error_badrequest( + "You specified too many directory components."); + exit(EXIT_FAILURE); + } + req->q.arch = mandoc_strndup(path, end - path); + if (validate_arch(req->q.arch) == 0) { + pg_error_badrequest( + "You specified an invalid directory component."); + exit(EXIT_FAILURE); + } +} + +/* + * Scan for indexable paths. + */ +static void +parse_manpath_conf(struct req *req) +{ + FILE *fp; + char *dp; + size_t dpsz; + ssize_t len; + + if ((fp = fopen("manpath.conf", "r")) == NULL) { + warn("%s/manpath.conf", MAN_DIR); + pg_error_internal(); + exit(EXIT_FAILURE); + } + + dp = NULL; + dpsz = 0; + + while ((len = getline(&dp, &dpsz, fp)) != -1) { + if (dp[len - 1] == '\n') + dp[--len] = '\0'; + req->p = mandoc_realloc(req->p, + (req->psz + 1) * sizeof(char *)); + if ( ! validate_urifrag(dp)) { + warnx("%s/manpath.conf contains " + "unsafe path \"%s\"", MAN_DIR, dp); + pg_error_internal(); + exit(EXIT_FAILURE); + } + if (strchr(dp, '/') != NULL) { + warnx("%s/manpath.conf contains " + "path with slash \"%s\"", MAN_DIR, dp); + pg_error_internal(); + exit(EXIT_FAILURE); + } + req->p[req->psz++] = dp; + dp = NULL; + dpsz = 0; + } + free(dp); + + if (req->p == NULL) { + warnx("%s/manpath.conf is empty", MAN_DIR); + pg_error_internal(); + exit(EXIT_FAILURE); + } +} diff --git a/usr.bin/mandoc/cgi.h.example b/usr.bin/mandoc/cgi.h.example new file mode 100644 index 0000000..2ccbe25 --- /dev/null +++ b/usr.bin/mandoc/cgi.h.example @@ -0,0 +1,7 @@ +/* Example compile-time configuration file for man.cgi(8). */ + +#define SCRIPT_NAME "cgi-bin/man.cgi" +#define MAN_DIR "/man" +#define CSS_DIR "" +#define CUSTOMIZE_TITLE "Manual pages with mandoc" +#define COMPAT_OLDURI Yes diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c new file mode 100644 index 0000000..d091ab2 --- /dev/null +++ b/usr.bin/mandoc/chars.c @@ -0,0 +1,506 @@ +/* $OpenBSD: chars.c,v 1.49 2020/02/13 16:16:03 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2014, 2015, 2017, 2018, 2020 + * Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "libmandoc.h" + +struct ln { + const char roffcode[16]; + const char *ascii; + int unicode; +}; + +/* Special break control characters. */ +static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; +static const char ascii_break[2] = { ASCII_BREAK, '\0' }; + +static struct ln lines[] = { + + /* Spacing. */ + { " ", ascii_nbrsp, 0x00a0 }, + { "~", ascii_nbrsp, 0x00a0 }, + { "0", ascii_nbrsp, 0x00a0 }, + { ":", ascii_break, 0 }, + + /* Lines. */ + { "ba", "|", 0x007c }, + { "br", "|", 0x2502 }, + { "ul", "_", 0x005f }, + { "_", "_", 0x005f }, + { "ru", "_", 0x005f }, + { "rn", "-", 0x203e }, + { "bb", "|", 0x00a6 }, + { "sl", "/", 0x002f }, + { "rs", "\\", 0x005c }, + + /* Text markers. */ + { "ci", "O", 0x25cb }, + { "bu", "+\bo", 0x2022 }, + { "dd", "<**>", 0x2021 }, + { "dg", "<*>", 0x2020 }, + { "lz", "<>", 0x25ca }, + { "sq", "[]", 0x25a1 }, + { "ps", "<paragraph>", 0x00b6 }, + { "sc", "<section>", 0x00a7 }, + { "lh", "<=", 0x261c }, + { "rh", "=>", 0x261e }, + { "at", "@", 0x0040 }, + { "sh", "#", 0x0023 }, + { "CR", "<cr>", 0x21b5 }, + { "OK", "\\/", 0x2713 }, + { "CL", "C", 0x2663 }, + { "SP", "S", 0x2660 }, + { "HE", "H", 0x2665 }, + { "DI", "D", 0x2666 }, + + /* Legal symbols. */ + { "co", "(C)", 0x00a9 }, + { "rg", "(R)", 0x00ae }, + { "tm", "tm", 0x2122 }, + + /* Punctuation. */ + { "em", "--", 0x2014 }, + { "en", "-", 0x2013 }, + { "hy", "-", 0x2010 }, + { "e", "\\", 0x005c }, + { ".", ".", 0x002e }, + { "r!", "!", 0x00a1 }, + { "r?", "?", 0x00bf }, + + /* Quotes. */ + { "Bq", ",,", 0x201e }, + { "bq", ",", 0x201a }, + { "lq", "\"", 0x201c }, + { "rq", "\"", 0x201d }, + { "Lq", "\"", 0x201c }, + { "Rq", "\"", 0x201d }, + { "oq", "`", 0x2018 }, + { "cq", "\'", 0x2019 }, + { "aq", "\'", 0x0027 }, + { "dq", "\"", 0x0022 }, + { "Fo", "<<", 0x00ab }, + { "Fc", ">>", 0x00bb }, + { "fo", "<", 0x2039 }, + { "fc", ">", 0x203a }, + + /* Brackets. */ + { "lB", "[", 0x005b }, + { "rB", "]", 0x005d }, + { "lC", "{", 0x007b }, + { "rC", "}", 0x007d }, + { "la", "<", 0x27e8 }, + { "ra", ">", 0x27e9 }, + { "bv", "|", 0x23aa }, + { "braceex", "|", 0x23aa }, + { "bracketlefttp", "|", 0x23a1 }, + { "bracketleftbt", "|", 0x23a3 }, + { "bracketleftex", "|", 0x23a2 }, + { "bracketrighttp", "|", 0x23a4 }, + { "bracketrightbt", "|", 0x23a6 }, + { "bracketrightex", "|", 0x23a5 }, + { "lt", ",-", 0x23a7 }, + { "bracelefttp", ",-", 0x23a7 }, + { "lk", "{", 0x23a8 }, + { "braceleftmid", "{", 0x23a8 }, + { "lb", "`-", 0x23a9 }, + { "braceleftbt", "`-", 0x23a9 }, + { "braceleftex", "|", 0x23aa }, + { "rt", "-.", 0x23ab }, + { "bracerighttp", "-.", 0x23ab }, + { "rk", "}", 0x23ac }, + { "bracerightmid", "}", 0x23ac }, + { "rb", "-\'", 0x23ad }, + { "bracerightbt", "-\'", 0x23ad }, + { "bracerightex", "|", 0x23aa }, + { "parenlefttp", "/", 0x239b }, + { "parenleftbt", "\\", 0x239d }, + { "parenleftex", "|", 0x239c }, + { "parenrighttp", "\\", 0x239e }, + { "parenrightbt", "/", 0x23a0 }, + { "parenrightex", "|", 0x239f }, + + /* Arrows and lines. */ + { "<-", "<-", 0x2190 }, + { "->", "->", 0x2192 }, + { "<>", "<->", 0x2194 }, + { "da", "|\bv", 0x2193 }, + { "ua", "|\b^", 0x2191 }, + { "va", "^v", 0x2195 }, + { "lA", "<=", 0x21d0 }, + { "rA", "=>", 0x21d2 }, + { "hA", "<=>", 0x21d4 }, + { "uA", "=\b^", 0x21d1 }, + { "dA", "=\bv", 0x21d3 }, + { "vA", "^=v", 0x21d5 }, + { "an", "-", 0x23af }, + + /* Logic. */ + { "AN", "^", 0x2227 }, + { "OR", "v", 0x2228 }, + { "no", "~", 0x00ac }, + { "tno", "~", 0x00ac }, + { "te", "<there\037exists>", 0x2203 }, + { "fa", "<for\037all>", 0x2200 }, + { "st", "<such\037that>", 0x220b }, + { "tf", "<therefore>", 0x2234 }, + { "3d", "<therefore>", 0x2234 }, + { "or", "|", 0x007c }, + + /* Mathematicals. */ + { "pl", "+", 0x002b }, + { "mi", "-", 0x2212 }, + { "-", "-", 0x002d }, + { "-+", "-+", 0x2213 }, + { "+-", "+-", 0x00b1 }, + { "t+-", "+-", 0x00b1 }, + { "pc", ".", 0x00b7 }, + { "md", ".", 0x22c5 }, + { "mu", "x", 0x00d7 }, + { "tmu", "x", 0x00d7 }, + { "c*", "O\bx", 0x2297 }, + { "c+", "O\b+", 0x2295 }, + { "di", "/", 0x00f7 }, + { "tdi", "/", 0x00f7 }, + { "f/", "/", 0x2044 }, + { "**", "*", 0x2217 }, + { "<=", "<=", 0x2264 }, + { ">=", ">=", 0x2265 }, + { "<<", "<<", 0x226a }, + { ">>", ">>", 0x226b }, + { "eq", "=", 0x003d }, + { "!=", "!=", 0x2260 }, + { "==", "==", 0x2261 }, + { "ne", "!==", 0x2262 }, + { "ap", "~", 0x223c }, + { "|=", "-~", 0x2243 }, + { "=~", "=~", 0x2245 }, + { "~~", "~~", 0x2248 }, + { "~=", "~=", 0x2248 }, + { "pt", "<proportional\037to>", 0x221d }, + { "es", "{}", 0x2205 }, + { "mo", "<element\037of>", 0x2208 }, + { "nm", "<not\037element\037of>", 0x2209 }, + { "sb", "<proper\037subset>", 0x2282 }, + { "nb", "<not\037subset>", 0x2284 }, + { "sp", "<proper\037superset>", 0x2283 }, + { "nc", "<not\037superset>", 0x2285 }, + { "ib", "<subset\037or\037equal>", 0x2286 }, + { "ip", "<superset\037or\037equal>", 0x2287 }, + { "ca", "<intersection>", 0x2229 }, + { "cu", "<union>", 0x222a }, + { "/_", "<angle>", 0x2220 }, + { "pp", "<perpendicular>", 0x22a5 }, + { "is", "<integral>", 0x222b }, + { "integral", "<integral>", 0x222b }, + { "sum", "<sum>", 0x2211 }, + { "product", "<product>", 0x220f }, + { "coproduct", "<coproduct>", 0x2210 }, + { "gr", "<nabla>", 0x2207 }, + { "sr", "<sqrt>", 0x221a }, + { "sqrt", "<sqrt>", 0x221a }, + { "lc", "|~", 0x2308 }, + { "rc", "~|", 0x2309 }, + { "lf", "|_", 0x230a }, + { "rf", "_|", 0x230b }, + { "if", "<infinity>", 0x221e }, + { "Ah", "<Aleph>", 0x2135 }, + { "Im", "<Im>", 0x2111 }, + { "Re", "<Re>", 0x211c }, + { "wp", "p", 0x2118 }, + { "pd", "<del>", 0x2202 }, + { "-h", "/h", 0x210f }, + { "hbar", "/h", 0x210f }, + { "12", "1/2", 0x00bd }, + { "14", "1/4", 0x00bc }, + { "34", "3/4", 0x00be }, + { "18", "1/8", 0x215B }, + { "38", "3/8", 0x215C }, + { "58", "5/8", 0x215D }, + { "78", "7/8", 0x215E }, + { "S1", "^1", 0x00B9 }, + { "S2", "^2", 0x00B2 }, + { "S3", "^3", 0x00B3 }, + + /* Ligatures. */ + { "ff", "ff", 0xfb00 }, + { "fi", "fi", 0xfb01 }, + { "fl", "fl", 0xfb02 }, + { "Fi", "ffi", 0xfb03 }, + { "Fl", "ffl", 0xfb04 }, + { "AE", "AE", 0x00c6 }, + { "ae", "ae", 0x00e6 }, + { "OE", "OE", 0x0152 }, + { "oe", "oe", 0x0153 }, + { "ss", "ss", 0x00df }, + { "IJ", "IJ", 0x0132 }, + { "ij", "ij", 0x0133 }, + + /* Accents. */ + { "a\"", "\"", 0x02dd }, + { "a-", "-", 0x00af }, + { "a.", ".", 0x02d9 }, + { "a^", "^", 0x005e }, + { "aa", "\'", 0x00b4 }, + { "\'", "\'", 0x00b4 }, + { "ga", "`", 0x0060 }, + { "`", "`", 0x0060 }, + { "ab", "'\b`", 0x02d8 }, + { "ac", ",", 0x00b8 }, + { "ad", "\"", 0x00a8 }, + { "ah", "v", 0x02c7 }, + { "ao", "o", 0x02da }, + { "a~", "~", 0x007e }, + { "ho", ",", 0x02db }, + { "ha", "^", 0x005e }, + { "ti", "~", 0x007e }, + { "u02DC", "~", 0x02dc }, + + /* Accented letters. */ + { "'A", "'\bA", 0x00c1 }, + { "'E", "'\bE", 0x00c9 }, + { "'I", "'\bI", 0x00cd }, + { "'O", "'\bO", 0x00d3 }, + { "'U", "'\bU", 0x00da }, + { "'Y", "'\bY", 0x00dd }, + { "'a", "'\ba", 0x00e1 }, + { "'e", "'\be", 0x00e9 }, + { "'i", "'\bi", 0x00ed }, + { "'o", "'\bo", 0x00f3 }, + { "'u", "'\bu", 0x00fa }, + { "'y", "'\by", 0x00fd }, + { "`A", "`\bA", 0x00c0 }, + { "`E", "`\bE", 0x00c8 }, + { "`I", "`\bI", 0x00cc }, + { "`O", "`\bO", 0x00d2 }, + { "`U", "`\bU", 0x00d9 }, + { "`a", "`\ba", 0x00e0 }, + { "`e", "`\be", 0x00e8 }, + { "`i", "`\bi", 0x00ec }, + { "`o", "`\bo", 0x00f2 }, + { "`u", "`\bu", 0x00f9 }, + { "~A", "~\bA", 0x00c3 }, + { "~N", "~\bN", 0x00d1 }, + { "~O", "~\bO", 0x00d5 }, + { "~a", "~\ba", 0x00e3 }, + { "~n", "~\bn", 0x00f1 }, + { "~o", "~\bo", 0x00f5 }, + { ":A", "\"\bA", 0x00c4 }, + { ":E", "\"\bE", 0x00cb }, + { ":I", "\"\bI", 0x00cf }, + { ":O", "\"\bO", 0x00d6 }, + { ":U", "\"\bU", 0x00dc }, + { ":a", "\"\ba", 0x00e4 }, + { ":e", "\"\be", 0x00eb }, + { ":i", "\"\bi", 0x00ef }, + { ":o", "\"\bo", 0x00f6 }, + { ":u", "\"\bu", 0x00fc }, + { ":y", "\"\by", 0x00ff }, + { "^A", "^\bA", 0x00c2 }, + { "^E", "^\bE", 0x00ca }, + { "^I", "^\bI", 0x00ce }, + { "^O", "^\bO", 0x00d4 }, + { "^U", "^\bU", 0x00db }, + { "^a", "^\ba", 0x00e2 }, + { "^e", "^\be", 0x00ea }, + { "^i", "^\bi", 0x00ee }, + { "^o", "^\bo", 0x00f4 }, + { "^u", "^\bu", 0x00fb }, + { ",C", ",\bC", 0x00c7 }, + { ",c", ",\bc", 0x00e7 }, + { "/L", "/\bL", 0x0141 }, + { "/l", "/\bl", 0x0142 }, + { "/O", "/\bO", 0x00d8 }, + { "/o", "/\bo", 0x00f8 }, + { "oA", "o\bA", 0x00c5 }, + { "oa", "o\ba", 0x00e5 }, + + /* Special letters. */ + { "-D", "Dh", 0x00d0 }, + { "Sd", "dh", 0x00f0 }, + { "TP", "Th", 0x00de }, + { "Tp", "th", 0x00fe }, + { ".i", "i", 0x0131 }, + { ".j", "j", 0x0237 }, + + /* Currency. */ + { "Do", "$", 0x0024 }, + { "ct", "/\bc", 0x00a2 }, + { "Eu", "EUR", 0x20ac }, + { "eu", "EUR", 0x20ac }, + { "Ye", "=\bY", 0x00a5 }, + { "Po", "-\bL", 0x00a3 }, + { "Cs", "o\bx", 0x00a4 }, + { "Fn", ",\bf", 0x0192 }, + + /* Units. */ + { "de", "<degree>", 0x00b0 }, + { "%0", "<permille>", 0x2030 }, + { "fm", "\'", 0x2032 }, + { "sd", "''", 0x2033 }, + { "mc", "<micro>", 0x00b5 }, + { "Of", "_\ba", 0x00aa }, + { "Om", "_\bo", 0x00ba }, + + /* Greek characters. */ + { "*A", "A", 0x0391 }, + { "*B", "B", 0x0392 }, + { "*G", "<Gamma>", 0x0393 }, + { "*D", "<Delta>", 0x0394 }, + { "*E", "E", 0x0395 }, + { "*Z", "Z", 0x0396 }, + { "*Y", "H", 0x0397 }, + { "*H", "<Theta>", 0x0398 }, + { "*I", "I", 0x0399 }, + { "*K", "K", 0x039a }, + { "*L", "<Lambda>", 0x039b }, + { "*M", "M", 0x039c }, + { "*N", "N", 0x039d }, + { "*C", "<Xi>", 0x039e }, + { "*O", "O", 0x039f }, + { "*P", "<Pi>", 0x03a0 }, + { "*R", "P", 0x03a1 }, + { "*S", "<Sigma>", 0x03a3 }, + { "*T", "T", 0x03a4 }, + { "*U", "Y", 0x03a5 }, + { "*F", "<Phi>", 0x03a6 }, + { "*X", "X", 0x03a7 }, + { "*Q", "<Psi>", 0x03a8 }, + { "*W", "<Omega>", 0x03a9 }, + { "*a", "<alpha>", 0x03b1 }, + { "*b", "<beta>", 0x03b2 }, + { "*g", "<gamma>", 0x03b3 }, + { "*d", "<delta>", 0x03b4 }, + { "*e", "<epsilon>", 0x03b5 }, + { "*z", "<zeta>", 0x03b6 }, + { "*y", "<eta>", 0x03b7 }, + { "*h", "<theta>", 0x03b8 }, + { "*i", "<iota>", 0x03b9 }, + { "*k", "<kappa>", 0x03ba }, + { "*l", "<lambda>", 0x03bb }, + { "*m", "<mu>", 0x03bc }, + { "*n", "<nu>", 0x03bd }, + { "*c", "<xi>", 0x03be }, + { "*o", "o", 0x03bf }, + { "*p", "<pi>", 0x03c0 }, + { "*r", "<rho>", 0x03c1 }, + { "*s", "<sigma>", 0x03c3 }, + { "*t", "<tau>", 0x03c4 }, + { "*u", "<upsilon>", 0x03c5 }, + { "*f", "<phi>", 0x03d5 }, + { "*x", "<chi>", 0x03c7 }, + { "*q", "<psi>", 0x03c8 }, + { "*w", "<omega>", 0x03c9 }, + { "+h", "<theta>", 0x03d1 }, + { "+f", "<phi>", 0x03c6 }, + { "+p", "<pi>", 0x03d6 }, + { "+e", "<epsilon>", 0x03f5 }, + { "ts", "<sigma>", 0x03c2 }, +}; + +static struct ohash mchars; + + +void +mchars_free(void) +{ + + ohash_delete(&mchars); +} + +void +mchars_alloc(void) +{ + size_t i; + unsigned int slot; + + mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode)); + for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) { + slot = ohash_qlookup(&mchars, lines[i].roffcode); + assert(ohash_find(&mchars, slot) == NULL); + ohash_insert(&mchars, slot, lines + i); + } +} + +int +mchars_spec2cp(const char *p, size_t sz) +{ + const struct ln *ln; + const char *end; + + end = p + sz; + ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); + return ln != NULL ? ln->unicode : -1; +} + +int +mchars_num2char(const char *p, size_t sz) +{ + int i; + + i = mandoc_strntoi(p, sz, 10); + return i >= 0 && i < 256 ? i : -1; +} + +int +mchars_num2uc(const char *p, size_t sz) +{ + int i; + + i = mandoc_strntoi(p, sz, 16); + assert(i >= 0 && i <= 0x10FFFF); + return i; +} + +const char * +mchars_spec2str(const char *p, size_t sz, size_t *rsz) +{ + const struct ln *ln; + const char *end; + + end = p + sz; + ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); + if (ln == NULL) + return NULL; + + *rsz = strlen(ln->ascii); + return ln->ascii; +} + +const char * +mchars_uc2str(int uc) +{ + size_t i; + + for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) + if (uc == lines[i].unicode) + return lines[i].ascii; + return "<?>"; +} diff --git a/usr.bin/mandoc/dba.c b/usr.bin/mandoc/dba.c new file mode 100644 index 0000000..36d4bb7 --- /dev/null +++ b/usr.bin/mandoc/dba.c @@ -0,0 +1,501 @@ +/* $OpenBSD: dba.c,v 1.7 2017/02/09 18:26:17 schwarze Exp $ */ +/* + * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Allocation-based version of the mandoc database, for read-write access. + * The interface is defined in "dba.h". + */ +#include <sys/cdefs.h> +#include <sys/types.h> +#include <endian.h> +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mansearch.h" +#include "dba_write.h" +#include "dba_array.h" +#include "dba.h" + +struct macro_entry { + struct dba_array *pages; + char value[]; +}; + +static void *prepend(const char *, char); +static void dba_pages_write(struct dba_array *); +static int compare_names(const void *, const void *); +static int compare_strings(const void *, const void *); + +static struct macro_entry + *get_macro_entry(struct ohash *, const char *, int32_t); +static void dba_macros_write(struct dba_array *); +static void dba_macro_write(struct ohash *); +static int compare_entries(const void *, const void *); + + +/*** top-level functions **********************************************/ + +struct dba * +dba_new(int32_t npages) +{ + struct dba *dba; + struct ohash *macro; + int32_t im; + + dba = mandoc_malloc(sizeof(*dba)); + dba->pages = dba_array_new(npages, DBA_GROW); + dba->macros = dba_array_new(MACRO_MAX, 0); + for (im = 0; im < MACRO_MAX; im++) { + macro = mandoc_malloc(sizeof(*macro)); + mandoc_ohash_init(macro, 4, + offsetof(struct macro_entry, value)); + dba_array_set(dba->macros, im, macro); + } + return dba; +} + +void +dba_free(struct dba *dba) +{ + struct dba_array *page; + struct ohash *macro; + struct macro_entry *entry; + unsigned int slot; + + dba_array_FOREACH(dba->macros, macro) { + for (entry = ohash_first(macro, &slot); entry != NULL; + entry = ohash_next(macro, &slot)) { + dba_array_free(entry->pages); + free(entry); + } + ohash_delete(macro); + free(macro); + } + dba_array_free(dba->macros); + + dba_array_undel(dba->pages); + dba_array_FOREACH(dba->pages, page) { + dba_array_free(dba_array_get(page, DBP_NAME)); + dba_array_free(dba_array_get(page, DBP_SECT)); + dba_array_free(dba_array_get(page, DBP_ARCH)); + free(dba_array_get(page, DBP_DESC)); + dba_array_free(dba_array_get(page, DBP_FILE)); + dba_array_free(page); + } + dba_array_free(dba->pages); + + free(dba); +} + +/* + * Write the complete mandoc database to disk; the format is: + * - One integer each for magic and version. + * - One pointer each to the macros table and to the final magic. + * - The pages table. + * - The macros table. + * - And at the very end, the magic integer again. + */ +int +dba_write(const char *fname, struct dba *dba) +{ + int save_errno; + int32_t pos_end, pos_macros, pos_macros_ptr; + + if (dba_open(fname) == -1) + return -1; + dba_int_write(MANDOCDB_MAGIC); + dba_int_write(MANDOCDB_VERSION); + pos_macros_ptr = dba_skip(1, 2); + dba_pages_write(dba->pages); + pos_macros = dba_tell(); + dba_macros_write(dba->macros); + pos_end = dba_tell(); + dba_int_write(MANDOCDB_MAGIC); + dba_seek(pos_macros_ptr); + dba_int_write(pos_macros); + dba_int_write(pos_end); + if (dba_close() == -1) { + save_errno = errno; + unlink(fname); + errno = save_errno; + return -1; + } + return 0; +} + + +/*** functions for handling pages *************************************/ + +/* + * Create a new page and append it to the pages table. + */ +struct dba_array * +dba_page_new(struct dba_array *pages, const char *arch, + const char *desc, const char *file, enum form form) +{ + struct dba_array *page, *entry; + + page = dba_array_new(DBP_MAX, 0); + entry = dba_array_new(1, DBA_STR | DBA_GROW); + dba_array_add(page, entry); + entry = dba_array_new(1, DBA_STR | DBA_GROW); + dba_array_add(page, entry); + if (arch != NULL && *arch != '\0') { + entry = dba_array_new(1, DBA_STR | DBA_GROW); + dba_array_add(entry, (void *)arch); + } else + entry = NULL; + dba_array_add(page, entry); + dba_array_add(page, mandoc_strdup(desc)); + entry = dba_array_new(1, DBA_STR | DBA_GROW); + dba_array_add(entry, prepend(file, form)); + dba_array_add(page, entry); + dba_array_add(pages, page); + return page; +} + +/* + * Add a section, architecture, or file name to an existing page. + * Passing the NULL pointer for the architecture makes the page MI. + * In that case, any earlier or later architectures are ignored. + */ +void +dba_page_add(struct dba_array *page, int32_t ie, const char *str) +{ + struct dba_array *entries; + char *entry; + + entries = dba_array_get(page, ie); + if (ie == DBP_ARCH) { + if (entries == NULL) + return; + if (str == NULL || *str == '\0') { + dba_array_free(entries); + dba_array_set(page, DBP_ARCH, NULL); + return; + } + } + if (*str == '\0') + return; + dba_array_FOREACH(entries, entry) { + if (ie == DBP_FILE && *entry < ' ') + entry++; + if (strcmp(entry, str) == 0) + return; + } + dba_array_add(entries, (void *)str); +} + +/* + * Add an additional name to an existing page. + */ +void +dba_page_alias(struct dba_array *page, const char *name, uint64_t mask) +{ + struct dba_array *entries; + char *entry; + char maskbyte; + + if (*name == '\0') + return; + maskbyte = mask & NAME_MASK; + entries = dba_array_get(page, DBP_NAME); + dba_array_FOREACH(entries, entry) { + if (strcmp(entry + 1, name) == 0) { + *entry |= maskbyte; + return; + } + } + dba_array_add(entries, prepend(name, maskbyte)); +} + +/* + * Return a pointer to a temporary copy of instr with inbyte prepended. + */ +static void * +prepend(const char *instr, char inbyte) +{ + static char *outstr = NULL; + static size_t outlen = 0; + size_t newlen; + + newlen = strlen(instr) + 1; + if (newlen > outlen) { + outstr = mandoc_realloc(outstr, newlen + 1); + outlen = newlen; + } + *outstr = inbyte; + memcpy(outstr + 1, instr, newlen); + return outstr; +} + +/* + * Write the pages table to disk; the format is: + * - One integer containing the number of pages. + * - For each page, five pointers to the names, sections, + * architectures, description, and file names of the page. + * MI pages write 0 instead of the architecture pointer. + * - One list each for names, sections, architectures, descriptions and + * file names. The description for each page ends with a NUL byte. + * For all the other lists, each string ends with a NUL byte, + * and the last string for a page ends with two NUL bytes. + * - To assure alignment of following integers, + * the end is padded with NUL bytes up to a multiple of four bytes. + */ +static void +dba_pages_write(struct dba_array *pages) +{ + struct dba_array *page, *entry; + int32_t pos_pages, pos_end; + + pos_pages = dba_array_writelen(pages, 5); + dba_array_FOREACH(pages, page) { + dba_array_setpos(page, DBP_NAME, dba_tell()); + entry = dba_array_get(page, DBP_NAME); + dba_array_sort(entry, compare_names); + dba_array_writelst(entry); + } + dba_array_FOREACH(pages, page) { + dba_array_setpos(page, DBP_SECT, dba_tell()); + entry = dba_array_get(page, DBP_SECT); + dba_array_sort(entry, compare_strings); + dba_array_writelst(entry); + } + dba_array_FOREACH(pages, page) { + if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) { + dba_array_setpos(page, DBP_ARCH, dba_tell()); + dba_array_sort(entry, compare_strings); + dba_array_writelst(entry); + } else + dba_array_setpos(page, DBP_ARCH, 0); + } + dba_array_FOREACH(pages, page) { + dba_array_setpos(page, DBP_DESC, dba_tell()); + dba_str_write(dba_array_get(page, DBP_DESC)); + } + dba_array_FOREACH(pages, page) { + dba_array_setpos(page, DBP_FILE, dba_tell()); + dba_array_writelst(dba_array_get(page, DBP_FILE)); + } + pos_end = dba_align(); + dba_seek(pos_pages); + dba_array_FOREACH(pages, page) + dba_array_writepos(page); + dba_seek(pos_end); +} + +static int +compare_names(const void *vp1, const void *vp2) +{ + const char *cp1, *cp2; + int diff; + + cp1 = *(const char * const *)vp1; + cp2 = *(const char * const *)vp2; + return (diff = *cp2 - *cp1) ? diff : + strcasecmp(cp1 + 1, cp2 + 1); +} + +static int +compare_strings(const void *vp1, const void *vp2) +{ + const char *cp1, *cp2; + + cp1 = *(const char * const *)vp1; + cp2 = *(const char * const *)vp2; + return strcmp(cp1, cp2); +} + +/*** functions for handling macros ************************************/ + +/* + * In the hash table for a single macro, look up an entry by + * the macro value or add an empty one if it doesn't exist yet. + */ +static struct macro_entry * +get_macro_entry(struct ohash *macro, const char *value, int32_t np) +{ + struct macro_entry *entry; + size_t len; + unsigned int slot; + + slot = ohash_qlookup(macro, value); + if ((entry = ohash_find(macro, slot)) == NULL) { + len = strlen(value) + 1; + entry = mandoc_malloc(sizeof(*entry) + len); + memcpy(&entry->value, value, len); + entry->pages = dba_array_new(np, DBA_GROW); + ohash_insert(macro, slot, entry); + } + return entry; +} + +/* + * In addition to get_macro_entry(), add multiple page references, + * converting them from the on-disk format (byte offsets in the file) + * to page pointers in memory. + */ +void +dba_macro_new(struct dba *dba, int32_t im, const char *value, + const int32_t *pp) +{ + struct macro_entry *entry; + const int32_t *ip; + int32_t np; + + np = 0; + for (ip = pp; *ip; ip++) + np++; + + entry = get_macro_entry(dba_array_get(dba->macros, im), value, np); + for (ip = pp; *ip; ip++) + dba_array_add(entry->pages, dba_array_get(dba->pages, + be32toh(*ip) / 5 / sizeof(*ip) - 1)); +} + +/* + * In addition to get_macro_entry(), add one page reference, + * directly taking the in-memory page pointer as an argument. + */ +void +dba_macro_add(struct dba_array *macros, int32_t im, const char *value, + struct dba_array *page) +{ + struct macro_entry *entry; + + if (*value == '\0') + return; + entry = get_macro_entry(dba_array_get(macros, im), value, 1); + dba_array_add(entry->pages, page); +} + +/* + * Write the macros table to disk; the format is: + * - The number of macro tables (actually, MACRO_MAX). + * - That number of pointers to the individual macro tables. + * - The individual macro tables. + */ +static void +dba_macros_write(struct dba_array *macros) +{ + struct ohash *macro; + int32_t im, pos_macros, pos_end; + + pos_macros = dba_array_writelen(macros, 1); + im = 0; + dba_array_FOREACH(macros, macro) { + dba_array_setpos(macros, im++, dba_tell()); + dba_macro_write(macro); + } + pos_end = dba_tell(); + dba_seek(pos_macros); + dba_array_writepos(macros); + dba_seek(pos_end); +} + +/* + * Write one individual macro table to disk; the format is: + * - The number of entries in the table. + * - For each entry, two pointers, the first one to the value + * and the second one to the list of pages. + * - A list of values, each ending in a NUL byte. + * - To assure alignment of following integers, + * padding with NUL bytes up to a multiple of four bytes. + * - A list of pointers to pages, each list ending in a 0 integer. + */ +static void +dba_macro_write(struct ohash *macro) +{ + struct macro_entry **entries, *entry; + struct dba_array *page; + int32_t *kpos, *dpos; + unsigned int ie, ne, slot; + int use; + int32_t addr, pos_macro, pos_end; + + /* Temporary storage for filtering and sorting. */ + + ne = ohash_entries(macro); + entries = mandoc_reallocarray(NULL, ne, sizeof(*entries)); + kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos)); + dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos)); + + /* Build a list of non-empty entries and sort it. */ + + ne = 0; + for (entry = ohash_first(macro, &slot); entry != NULL; + entry = ohash_next(macro, &slot)) { + use = 0; + dba_array_FOREACH(entry->pages, page) + if (dba_array_getpos(page)) + use = 1; + if (use) + entries[ne++] = entry; + } + qsort(entries, ne, sizeof(*entries), compare_entries); + + /* Number of entries, and space for the pointer pairs. */ + + dba_int_write(ne); + pos_macro = dba_skip(2, ne); + + /* String table. */ + + for (ie = 0; ie < ne; ie++) { + kpos[ie] = dba_tell(); + dba_str_write(entries[ie]->value); + } + dba_align(); + + /* Pages table. */ + + for (ie = 0; ie < ne; ie++) { + dpos[ie] = dba_tell(); + dba_array_FOREACH(entries[ie]->pages, page) + if ((addr = dba_array_getpos(page))) + dba_int_write(addr); + dba_int_write(0); + } + pos_end = dba_tell(); + + /* Fill in the pointer pairs. */ + + dba_seek(pos_macro); + for (ie = 0; ie < ne; ie++) { + dba_int_write(kpos[ie]); + dba_int_write(dpos[ie]); + } + dba_seek(pos_end); + + free(entries); + free(kpos); + free(dpos); +} + +static int +compare_entries(const void *vp1, const void *vp2) +{ + const struct macro_entry *ep1, *ep2; + + ep1 = *(const struct macro_entry * const *)vp1; + ep2 = *(const struct macro_entry * const *)vp2; + return strcmp(ep1->value, ep2->value); +} diff --git a/usr.bin/mandoc/dba.h b/usr.bin/mandoc/dba.h new file mode 100644 index 0000000..7787958 --- /dev/null +++ b/usr.bin/mandoc/dba.h @@ -0,0 +1,50 @@ +/* $OpenBSD: dba.h,v 1.2 2016/08/17 20:46:06 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Public interface of the allocation-based version + * of the mandoc database, for read-write access. + * To be used by dba.c, dba_read.c, and makewhatis(8). + */ + +#define DBP_NAME 0 +#define DBP_SECT 1 +#define DBP_ARCH 2 +#define DBP_DESC 3 +#define DBP_FILE 4 +#define DBP_MAX 5 + +struct dba_array; + +struct dba { + struct dba_array *pages; + struct dba_array *macros; +}; + + +struct dba *dba_new(int32_t); +void dba_free(struct dba *); +struct dba *dba_read(const char *); +int dba_write(const char *, struct dba *); + +struct dba_array *dba_page_new(struct dba_array *, const char *, + const char *, const char *, enum form); +void dba_page_add(struct dba_array *, int32_t, const char *); +void dba_page_alias(struct dba_array *, const char *, uint64_t); + +void dba_macro_new(struct dba *, int32_t, + const char *, const int32_t *); +void dba_macro_add(struct dba_array *, int32_t, + const char *, struct dba_array *); diff --git a/usr.bin/mandoc/dba_array.c b/usr.bin/mandoc/dba_array.c new file mode 100644 index 0000000..dd08a32 --- /dev/null +++ b/usr.bin/mandoc/dba_array.c @@ -0,0 +1,188 @@ +/* $OpenBSD: dba_array.c,v 1.1 2016/08/01 10:32:39 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Allocation-based arrays for the mandoc database, for read-write access. + * The interface is defined in "dba_array.h". + */ +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "dba_write.h" +#include "dba_array.h" + +struct dba_array { + void **ep; /* Array of entries. */ + int32_t *em; /* Array of map positions. */ + int flags; + int32_t ea; /* Entries allocated. */ + int32_t eu; /* Entries used (including deleted). */ + int32_t ed; /* Entries deleted. */ + int32_t ec; /* Currently active entry. */ + int32_t pos; /* Map position of this array. */ +}; + + +struct dba_array * +dba_array_new(int32_t ea, int flags) +{ + struct dba_array *array; + + assert(ea > 0); + array = mandoc_malloc(sizeof(*array)); + array->ep = mandoc_reallocarray(NULL, ea, sizeof(*array->ep)); + array->em = mandoc_reallocarray(NULL, ea, sizeof(*array->em)); + array->ea = ea; + array->eu = 0; + array->ed = 0; + array->ec = 0; + array->flags = flags; + array->pos = 0; + return array; +} + +void +dba_array_free(struct dba_array *array) +{ + int32_t ie; + + if (array == NULL) + return; + if (array->flags & DBA_STR) + for (ie = 0; ie < array->eu; ie++) + free(array->ep[ie]); + free(array->ep); + free(array->em); + free(array); +} + +void +dba_array_set(struct dba_array *array, int32_t ie, void *entry) +{ + assert(ie >= 0); + assert(ie < array->ea); + assert(ie <= array->eu); + if (ie == array->eu) + array->eu++; + if (array->flags & DBA_STR) + entry = mandoc_strdup(entry); + array->ep[ie] = entry; + array->em[ie] = 0; +} + +void +dba_array_add(struct dba_array *array, void *entry) +{ + if (array->eu == array->ea) { + assert(array->flags & DBA_GROW); + array->ep = mandoc_reallocarray(array->ep, + 2, sizeof(*array->ep) * array->ea); + array->em = mandoc_reallocarray(array->em, + 2, sizeof(*array->em) * array->ea); + array->ea *= 2; + } + dba_array_set(array, array->eu, entry); +} + +void * +dba_array_get(struct dba_array *array, int32_t ie) +{ + if (ie < 0 || ie >= array->eu || array->em[ie] == -1) + return NULL; + return array->ep[ie]; +} + +void +dba_array_start(struct dba_array *array) +{ + array->ec = array->eu; +} + +void * +dba_array_next(struct dba_array *array) +{ + if (array->ec < array->eu) + array->ec++; + else + array->ec = 0; + while (array->ec < array->eu && array->em[array->ec] == -1) + array->ec++; + return array->ec < array->eu ? array->ep[array->ec] : NULL; +} + +void +dba_array_del(struct dba_array *array) +{ + if (array->ec < array->eu && array->em[array->ec] != -1) { + array->em[array->ec] = -1; + array->ed++; + } +} + +void +dba_array_undel(struct dba_array *array) +{ + memset(array->em, 0, sizeof(*array->em) * array->eu); +} + +void +dba_array_setpos(struct dba_array *array, int32_t ie, int32_t pos) +{ + array->em[ie] = pos; +} + +int32_t +dba_array_getpos(struct dba_array *array) +{ + return array->pos; +} + +void +dba_array_sort(struct dba_array *array, dba_compare_func func) +{ + assert(array->ed == 0); + qsort(array->ep, array->eu, sizeof(*array->ep), func); +} + +int32_t +dba_array_writelen(struct dba_array *array, int32_t nmemb) +{ + dba_int_write(array->eu - array->ed); + return dba_skip(nmemb, array->eu - array->ed); +} + +void +dba_array_writepos(struct dba_array *array) +{ + int32_t ie; + + array->pos = dba_tell(); + for (ie = 0; ie < array->eu; ie++) + if (array->em[ie] != -1) + dba_int_write(array->em[ie]); +} + +void +dba_array_writelst(struct dba_array *array) +{ + const char *str; + + dba_array_FOREACH(array, str) + dba_str_write(str); + dba_char_write('\0'); +} diff --git a/usr.bin/mandoc/dba_array.h b/usr.bin/mandoc/dba_array.h new file mode 100644 index 0000000..167f68f --- /dev/null +++ b/usr.bin/mandoc/dba_array.h @@ -0,0 +1,47 @@ +/* $OpenBSD: dba_array.h,v 1.1 2016/08/01 10:32:39 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Public interface for allocation-based arrays + * for the mandoc database, for read-write access. + * To be used by dba*.c and by makewhatis(8). + */ + +struct dba_array; + +#define DBA_STR 0x01 /* Map contains strings, not pointers. */ +#define DBA_GROW 0x02 /* Allow the array to grow. */ + +#define dba_array_FOREACH(a, e) \ + dba_array_start(a); \ + while (((e) = dba_array_next(a)) != NULL) + +typedef int dba_compare_func(const void *, const void *); + +struct dba_array *dba_array_new(int32_t, int); +void dba_array_free(struct dba_array *); +void dba_array_set(struct dba_array *, int32_t, void *); +void dba_array_add(struct dba_array *, void *); +void *dba_array_get(struct dba_array *, int32_t); +void dba_array_start(struct dba_array *); +void *dba_array_next(struct dba_array *); +void dba_array_del(struct dba_array *); +void dba_array_undel(struct dba_array *); +void dba_array_setpos(struct dba_array *, int32_t, int32_t); +int32_t dba_array_getpos(struct dba_array *); +void dba_array_sort(struct dba_array *, dba_compare_func); +int32_t dba_array_writelen(struct dba_array *, int32_t); +void dba_array_writepos(struct dba_array *); +void dba_array_writelst(struct dba_array *); diff --git a/usr.bin/mandoc/dba_read.c b/usr.bin/mandoc/dba_read.c new file mode 100644 index 0000000..3ab4a39 --- /dev/null +++ b/usr.bin/mandoc/dba_read.c @@ -0,0 +1,72 @@ +/* $OpenBSD: dba_read.c,v 1.4 2016/08/17 20:46:06 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Function to read the mandoc database from disk into RAM, + * such that data can be added or removed. + * The interface is defined in "dba.h". + * This file is seperate from dba.c because this also uses "dbm.h". + */ +#include <regex.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mansearch.h" +#include "dba_array.h" +#include "dba.h" +#include "dbm.h" + + +struct dba * +dba_read(const char *fname) +{ + struct dba *dba; + struct dba_array *page; + struct dbm_page *pdata; + struct dbm_macro *mdata; + const char *cp; + int32_t im, ip, iv, npages; + + if (dbm_open(fname) == -1) + return NULL; + npages = dbm_page_count(); + dba = dba_new(npages < 128 ? 128 : npages); + for (ip = 0; ip < npages; ip++) { + pdata = dbm_page_get(ip); + page = dba_page_new(dba->pages, pdata->arch, + pdata->desc, pdata->file + 1, *pdata->file); + for (cp = pdata->name; *cp != '\0'; cp = strchr(cp, '\0') + 1) + dba_page_add(page, DBP_NAME, cp); + for (cp = pdata->sect; *cp != '\0'; cp = strchr(cp, '\0') + 1) + dba_page_add(page, DBP_SECT, cp); + if ((cp = pdata->arch) != NULL) + while (*(cp = strchr(cp, '\0') + 1) != '\0') + dba_page_add(page, DBP_ARCH, cp); + cp = pdata->file; + while (*(cp = strchr(cp, '\0') + 1) != '\0') + dba_page_add(page, DBP_FILE, cp); + } + for (im = 0; im < MACRO_MAX; im++) { + for (iv = 0; iv < dbm_macro_count(im); iv++) { + mdata = dbm_macro_get(im, iv); + dba_macro_new(dba, im, mdata->value, mdata->pp); + } + } + dbm_close(); + return dba; +} diff --git a/usr.bin/mandoc/dba_write.c b/usr.bin/mandoc/dba_write.c new file mode 100644 index 0000000..ef15dbe --- /dev/null +++ b/usr.bin/mandoc/dba_write.c @@ -0,0 +1,117 @@ +/* $OpenBSD: dba_write.c,v 1.1 2016/08/01 10:32:39 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Low-level functions for serializing allocation-based data to disk. + * The interface is defined in "dba_write.h". + */ +#include <assert.h> +#include <endian.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdio.h> + +#include "dba_write.h" + +static FILE *ofp; + + +int +dba_open(const char *fname) +{ + ofp = fopen(fname, "w"); + return ofp == NULL ? -1 : 0; +} + +int +dba_close(void) +{ + return fclose(ofp) == EOF ? -1 : 0; +} + +int32_t +dba_tell(void) +{ + long pos; + + if ((pos = ftell(ofp)) == -1) + err(1, "ftell"); + if (pos >= INT32_MAX) { + errno = EOVERFLOW; + err(1, "ftell = %ld", pos); + } + return pos; +} + +void +dba_seek(int32_t pos) +{ + if (fseek(ofp, pos, SEEK_SET) == -1) + err(1, "fseek(%d)", pos); +} + +int32_t +dba_align(void) +{ + int32_t pos; + + pos = dba_tell(); + while (pos & 3) { + dba_char_write('\0'); + pos++; + } + return pos; +} + +int32_t +dba_skip(int32_t nmemb, int32_t sz) +{ + const int32_t out[5] = {0, 0, 0, 0, 0}; + int32_t i, pos; + + assert(sz >= 0); + assert(nmemb > 0); + assert(nmemb <= 5); + pos = dba_tell(); + for (i = 0; i < sz; i++) + if (nmemb - fwrite(&out, sizeof(out[0]), nmemb, ofp)) + err(1, "fwrite"); + return pos; +} + +void +dba_char_write(int c) +{ + if (putc(c, ofp) == EOF) + err(1, "fputc"); +} + +void +dba_str_write(const char *str) +{ + if (fputs(str, ofp) == EOF) + err(1, "fputs"); + dba_char_write('\0'); +} + +void +dba_int_write(int32_t i) +{ + i = htobe32(i); + if (fwrite(&i, sizeof(i), 1, ofp) != 1) + err(1, "fwrite"); +} diff --git a/usr.bin/mandoc/dba_write.h b/usr.bin/mandoc/dba_write.h new file mode 100644 index 0000000..bbbaa5e --- /dev/null +++ b/usr.bin/mandoc/dba_write.h @@ -0,0 +1,30 @@ +/* $OpenBSD: dba_write.h,v 1.1 2016/08/01 10:32:39 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Internal interface to low-level functions + * for serializing allocation-based data to disk. + * For use by dba_array.c and dba.c only. + */ + +int dba_open(const char *); +int dba_close(void); +int32_t dba_tell(void); +void dba_seek(int32_t); +int32_t dba_align(void); +int32_t dba_skip(int32_t, int32_t); +void dba_char_write(int); +void dba_str_write(const char *); +void dba_int_write(int32_t); diff --git a/usr.bin/mandoc/dbm.c b/usr.bin/mandoc/dbm.c new file mode 100644 index 0000000..261321e --- /dev/null +++ b/usr.bin/mandoc/dbm.c @@ -0,0 +1,474 @@ +/* $OpenBSD: dbm.c,v 1.5 2019/07/01 22:43:03 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Map-based version of the mandoc database, for read-only access. + * The interface is defined in "dbm.h". + */ +#include <assert.h> +#include <endian.h> +#include <err.h> +#include <errno.h> +#include <regex.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mansearch.h" +#include "dbm_map.h" +#include "dbm.h" + +#ifndef EFTYPE +#define EFTYPE 79 +#endif + +struct macro { + int32_t value; + int32_t pages; +}; + +struct page { + int32_t name; + int32_t sect; + int32_t arch; + int32_t desc; + int32_t file; +}; + +enum iter { + ITER_NONE = 0, + ITER_NAME, + ITER_SECT, + ITER_ARCH, + ITER_DESC, + ITER_MACRO +}; + +static struct macro *macros[MACRO_MAX]; +static int32_t nvals[MACRO_MAX]; +static struct page *pages; +static int32_t npages; +static enum iter iteration; + +static struct dbm_res page_bytitle(enum iter, const struct dbm_match *); +static struct dbm_res page_byarch(const struct dbm_match *); +static struct dbm_res page_bymacro(int32_t, const struct dbm_match *); +static char *macro_bypage(int32_t, int32_t); + + +/*** top level functions **********************************************/ + +/* + * Open a disk-based mandoc database for read-only access. + * Map the pages and macros[] arrays. + * Return 0 on success. Return -1 and set errno on failure. + */ +int +dbm_open(const char *fname) +{ + const int32_t *mp, *ep; + int32_t im; + + if (dbm_map(fname) == -1) + return -1; + + if ((npages = be32toh(*dbm_getint(4))) < 0) { + warnx("dbm_open(%s): Invalid number of pages: %d", + fname, npages); + goto fail; + } + pages = (struct page *)dbm_getint(5); + + if ((mp = dbm_get(*dbm_getint(2))) == NULL) { + warnx("dbm_open(%s): Invalid offset of macros array", fname); + goto fail; + } + if (be32toh(*mp) != MACRO_MAX) { + warnx("dbm_open(%s): Invalid number of macros: %d", + fname, be32toh(*mp)); + goto fail; + } + for (im = 0; im < MACRO_MAX; im++) { + if ((ep = dbm_get(*++mp)) == NULL) { + warnx("dbm_open(%s): Invalid offset of macro %d", + fname, im); + goto fail; + } + nvals[im] = be32toh(*ep); + macros[im] = (struct macro *)++ep; + } + return 0; + +fail: + dbm_unmap(); + errno = EFTYPE; + return -1; +} + +void +dbm_close(void) +{ + dbm_unmap(); +} + + +/*** functions for handling pages *************************************/ + +int32_t +dbm_page_count(void) +{ + return npages; +} + +/* + * Give the caller pointers to the data for one manual page. + */ +struct dbm_page * +dbm_page_get(int32_t ip) +{ + static struct dbm_page res; + + assert(ip >= 0); + assert(ip < npages); + res.name = dbm_get(pages[ip].name); + if (res.name == NULL) + res.name = "(NULL)\0"; + res.sect = dbm_get(pages[ip].sect); + if (res.sect == NULL) + res.sect = "(NULL)\0"; + res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL; + res.desc = dbm_get(pages[ip].desc); + if (res.desc == NULL) + res.desc = "(NULL)"; + res.file = dbm_get(pages[ip].file); + if (res.file == NULL) + res.file = " (NULL)\0"; + res.addr = dbm_addr(pages + ip); + return &res; +} + +/* + * Functions to start filtered iterations over manual pages. + */ +void +dbm_page_byname(const struct dbm_match *match) +{ + assert(match != NULL); + page_bytitle(ITER_NAME, match); +} + +void +dbm_page_bysect(const struct dbm_match *match) +{ + assert(match != NULL); + page_bytitle(ITER_SECT, match); +} + +void +dbm_page_byarch(const struct dbm_match *match) +{ + assert(match != NULL); + page_byarch(match); +} + +void +dbm_page_bydesc(const struct dbm_match *match) +{ + assert(match != NULL); + page_bytitle(ITER_DESC, match); +} + +void +dbm_page_bymacro(int32_t im, const struct dbm_match *match) +{ + assert(im >= 0); + assert(im < MACRO_MAX); + assert(match != NULL); + page_bymacro(im, match); +} + +/* + * Return the number of the next manual page in the current iteration. + */ +struct dbm_res +dbm_page_next(void) +{ + struct dbm_res res = {-1, 0}; + + switch(iteration) { + case ITER_NONE: + return res; + case ITER_ARCH: + return page_byarch(NULL); + case ITER_MACRO: + return page_bymacro(0, NULL); + default: + return page_bytitle(iteration, NULL); + } +} + +/* + * Functions implementing the iteration over manual pages. + */ +static struct dbm_res +page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match) +{ + static const struct dbm_match *match; + static const char *cp; + static int32_t ip; + struct dbm_res res = {-1, 0}; + + assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC || + arg_iter == ITER_SECT); + + /* Initialize for a new iteration. */ + + if (arg_match != NULL) { + iteration = arg_iter; + match = arg_match; + switch (iteration) { + case ITER_NAME: + cp = dbm_get(pages[0].name); + break; + case ITER_SECT: + cp = dbm_get(pages[0].sect); + break; + case ITER_DESC: + cp = dbm_get(pages[0].desc); + break; + default: + abort(); + } + if (cp == NULL) { + iteration = ITER_NONE; + match = NULL; + cp = NULL; + ip = npages; + } else + ip = 0; + return res; + } + + /* Search for a name. */ + + while (ip < npages) { + if (iteration == ITER_NAME) + cp++; + if (dbm_match(match, cp)) + break; + cp = strchr(cp, '\0') + 1; + if (iteration == ITER_DESC) + ip++; + else if (*cp == '\0') { + cp++; + ip++; + } + } + + /* Reached the end without a match. */ + + if (ip == npages) { + iteration = ITER_NONE; + match = NULL; + cp = NULL; + return res; + } + + /* Found a match; save the quality for later retrieval. */ + + res.page = ip; + res.bits = iteration == ITER_NAME ? cp[-1] : 0; + + /* Skip the remaining names of this page. */ + + if (++ip < npages) { + do { + cp++; + } while (cp[-1] != '\0' || + (iteration != ITER_DESC && cp[-2] != '\0')); + } + return res; +} + +static struct dbm_res +page_byarch(const struct dbm_match *arg_match) +{ + static const struct dbm_match *match; + struct dbm_res res = {-1, 0}; + static int32_t ip; + const char *cp; + + /* Initialize for a new iteration. */ + + if (arg_match != NULL) { + iteration = ITER_ARCH; + match = arg_match; + ip = 0; + return res; + } + + /* Search for an architecture. */ + + for ( ; ip < npages; ip++) + if (pages[ip].arch) + for (cp = dbm_get(pages[ip].arch); + *cp != '\0'; + cp = strchr(cp, '\0') + 1) + if (dbm_match(match, cp)) { + res.page = ip++; + return res; + } + + /* Reached the end without a match. */ + + iteration = ITER_NONE; + match = NULL; + return res; +} + +static struct dbm_res +page_bymacro(int32_t arg_im, const struct dbm_match *arg_match) +{ + static const struct dbm_match *match; + static const int32_t *pp; + static const char *cp; + static int32_t im, iv; + struct dbm_res res = {-1, 0}; + + assert(im >= 0); + assert(im < MACRO_MAX); + + /* Initialize for a new iteration. */ + + if (arg_match != NULL) { + iteration = ITER_MACRO; + match = arg_match; + im = arg_im; + cp = nvals[im] ? dbm_get(macros[im]->value) : NULL; + pp = NULL; + iv = -1; + return res; + } + if (iteration != ITER_MACRO) + return res; + + /* Find the next matching macro value. */ + + while (pp == NULL || *pp == 0) { + if (++iv == nvals[im]) { + iteration = ITER_NONE; + return res; + } + if (iv) + cp = strchr(cp, '\0') + 1; + if (dbm_match(match, cp)) + pp = dbm_get(macros[im][iv].pages); + } + + /* Found a matching page. */ + + res.page = (struct page *)dbm_get(*pp++) - pages; + return res; +} + + +/*** functions for handling macros ************************************/ + +int32_t +dbm_macro_count(int32_t im) +{ + assert(im >= 0); + assert(im < MACRO_MAX); + return nvals[im]; +} + +struct dbm_macro * +dbm_macro_get(int32_t im, int32_t iv) +{ + static struct dbm_macro macro; + + assert(im >= 0); + assert(im < MACRO_MAX); + assert(iv >= 0); + assert(iv < nvals[im]); + macro.value = dbm_get(macros[im][iv].value); + macro.pp = dbm_get(macros[im][iv].pages); + return ¯o; +} + +/* + * Filtered iteration over macro entries. + */ +void +dbm_macro_bypage(int32_t im, int32_t ip) +{ + assert(im >= 0); + assert(im < MACRO_MAX); + assert(ip != 0); + macro_bypage(im, ip); +} + +char * +dbm_macro_next(void) +{ + return macro_bypage(MACRO_MAX, 0); +} + +static char * +macro_bypage(int32_t arg_im, int32_t arg_ip) +{ + static const int32_t *pp; + static int32_t im, ip, iv; + + /* Initialize for a new iteration. */ + + if (arg_im < MACRO_MAX && arg_ip != 0) { + im = arg_im; + ip = arg_ip; + pp = dbm_get(macros[im]->pages); + iv = 0; + return NULL; + } + if (im >= MACRO_MAX) + return NULL; + + /* Search for the next value. */ + + while (iv < nvals[im]) { + if (*pp == ip) + break; + if (*pp == 0) + iv++; + pp++; + } + + /* Reached the end without a match. */ + + if (iv == nvals[im]) { + im = MACRO_MAX; + ip = 0; + pp = NULL; + return NULL; + } + + /* Found a match; skip the remaining pages of this entry. */ + + if (++iv < nvals[im]) + while (*pp++ != 0) + continue; + + return dbm_get(macros[im][iv - 1].value); +} diff --git a/usr.bin/mandoc/dbm.h b/usr.bin/mandoc/dbm.h new file mode 100644 index 0000000..0f12ee1 --- /dev/null +++ b/usr.bin/mandoc/dbm.h @@ -0,0 +1,68 @@ +/* $OpenBSD: dbm.h,v 1.1 2016/08/01 10:32:39 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Public interface for the map-based version + * of the mandoc database, for read-only access. + * To be used by dbm*.c, dba_read.c, and man(1) and apropos(1). + */ + +enum dbm_mtype { + DBM_EXACT = 0, + DBM_SUB, + DBM_REGEX +}; + +struct dbm_match { + regex_t *re; + const char *str; + enum dbm_mtype type; +}; + +struct dbm_res { + int32_t page; + int32_t bits; +}; + +struct dbm_page { + const char *name; + const char *sect; + const char *arch; + const char *desc; + const char *file; + int32_t addr; +}; + +struct dbm_macro { + const char *value; + const int32_t *pp; +}; + +int dbm_open(const char *); +void dbm_close(void); + +int32_t dbm_page_count(void); +struct dbm_page *dbm_page_get(int32_t); +void dbm_page_byname(const struct dbm_match *); +void dbm_page_bysect(const struct dbm_match *); +void dbm_page_byarch(const struct dbm_match *); +void dbm_page_bydesc(const struct dbm_match *); +void dbm_page_bymacro(int32_t, const struct dbm_match *); +struct dbm_res dbm_page_next(void); + +int32_t dbm_macro_count(int32_t); +struct dbm_macro *dbm_macro_get(int32_t, int32_t); +void dbm_macro_bypage(int32_t, int32_t); +char *dbm_macro_next(void); diff --git a/usr.bin/mandoc/dbm_map.c b/usr.bin/mandoc/dbm_map.c new file mode 100644 index 0000000..72b1220 --- /dev/null +++ b/usr.bin/mandoc/dbm_map.c @@ -0,0 +1,188 @@ +/* $OpenBSD: dbm_map.c,v 1.6 2017/02/09 18:26:17 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Low-level routines for the map-based version + * of the mandoc database, for read-only access. + * The interface is defined in "dbm_map.h". + */ +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <endian.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <regex.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mansearch.h" +#include "dbm_map.h" +#include "dbm.h" + +#ifndef EFTYPE +#define EFTYPE 79 +#endif + +static struct stat st; +static char *dbm_base; +static int ifd; +static int32_t max_offset; + +/* + * Open a disk-based database for read-only access. + * Validate the file format as far as it is not mandoc-specific. + * Return 0 on success. Return -1 and set errno on failure. + */ +int +dbm_map(const char *fname) +{ + int save_errno; + const int32_t *magic; + + if ((ifd = open(fname, O_RDONLY)) == -1) + return -1; + if (fstat(ifd, &st) == -1) + goto fail; + if (st.st_size < 5) { + warnx("dbm_map(%s): File too short", fname); + errno = EFTYPE; + goto fail; + } + if (st.st_size > INT32_MAX) { + errno = EFBIG; + goto fail; + } + if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, + ifd, 0)) == MAP_FAILED) + goto fail; + magic = dbm_getint(0); + if (be32toh(*magic) != MANDOCDB_MAGIC) { + if (strncmp(dbm_base, "SQLite format 3", 15)) + warnx("dbm_map(%s): " + "Bad initial magic %x (expected %x)", + fname, be32toh(*magic), MANDOCDB_MAGIC); + else + warnx("dbm_map(%s): " + "Obsolete format based on SQLite 3", + fname); + errno = EFTYPE; + goto fail; + } + magic = dbm_getint(1); + if (be32toh(*magic) != MANDOCDB_VERSION) { + warnx("dbm_map(%s): Bad version number %d (expected %d)", + fname, be32toh(*magic), MANDOCDB_VERSION); + errno = EFTYPE; + goto fail; + } + max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t); + if (st.st_size != max_offset) { + warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)", + fname, (long long)st.st_size, max_offset); + errno = EFTYPE; + goto fail; + } + if ((magic = dbm_get(*dbm_getint(3))) == NULL) { + errno = EFTYPE; + goto fail; + } + if (be32toh(*magic) != MANDOCDB_MAGIC) { + warnx("dbm_map(%s): Bad final magic %x (expected %x)", + fname, be32toh(*magic), MANDOCDB_MAGIC); + errno = EFTYPE; + goto fail; + } + return 0; + +fail: + save_errno = errno; + close(ifd); + errno = save_errno; + return -1; +} + +void +dbm_unmap(void) +{ + if (munmap(dbm_base, st.st_size) == -1) + warn("dbm_unmap: munmap"); + if (close(ifd) == -1) + warn("dbm_unmap: close"); + dbm_base = (char *)-1; +} + +/* + * Take a raw integer as it was read from the database. + * Interpret it as an offset into the database file + * and return a pointer to that place in the file. + */ +void * +dbm_get(int32_t offset) +{ + offset = be32toh(offset); + if (offset < 0) { + warnx("dbm_get: Database corrupt: offset %d", offset); + return NULL; + } + if (offset >= max_offset) { + warnx("dbm_get: Database corrupt: offset %d > %d", + offset, max_offset); + return NULL; + } + return dbm_base + offset; +} + +/* + * Assume the database starts with some integers. + * Assume they are numbered starting from 0, increasing. + * Get a pointer to one with the number "offset". + */ +int32_t * +dbm_getint(int32_t offset) +{ + return (int32_t *)dbm_base + offset; +} + +/* + * The reverse of dbm_get(). + * Take pointer into the database file + * and convert it to the raw integer + * that would be used to refer to that place in the file. + */ +int32_t +dbm_addr(const void *p) +{ + return htobe32((const char *)p - dbm_base); +} + +int +dbm_match(const struct dbm_match *match, const char *str) +{ + switch (match->type) { + case DBM_EXACT: + return strcmp(str, match->str) == 0; + case DBM_SUB: + return strcasestr(str, match->str) != NULL; + case DBM_REGEX: + return regexec(match->re, str, 0, NULL, 0) == 0; + default: + abort(); + } +} diff --git a/usr.bin/mandoc/dbm_map.h b/usr.bin/mandoc/dbm_map.h new file mode 100644 index 0000000..94ee82f --- /dev/null +++ b/usr.bin/mandoc/dbm_map.h @@ -0,0 +1,29 @@ +/* $OpenBSD: dbm_map.h,v 1.2 2019/07/01 22:43:03 schwarze Exp $ */ +/* + * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Private interface for low-level routines for the map-based version + * of the mandoc database, for read-only access. + * To be used by dbm*.c only. + */ + +struct dbm_match; + +int dbm_map(const char *); +void dbm_unmap(void); +void *dbm_get(int32_t); +int32_t *dbm_getint(int32_t); +int32_t dbm_addr(const void *); +int dbm_match(const struct dbm_match *, const char *); diff --git a/usr.bin/mandoc/eqn.c b/usr.bin/mandoc/eqn.c new file mode 100644 index 0000000..ad32067 --- /dev/null +++ b/usr.bin/mandoc/eqn.c @@ -0,0 +1,1130 @@ +/* $OpenBSD: eqn.c,v 1.47 2020/01/08 12:09:14 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "eqn.h" +#include "libmandoc.h" +#include "eqn_parse.h" + +#define EQN_NEST_MAX 128 /* maximum nesting of defines */ +#define STRNEQ(p1, sz1, p2, sz2) \ + ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) + +enum eqn_tok { + EQN_TOK_DYAD = 0, + EQN_TOK_VEC, + EQN_TOK_UNDER, + EQN_TOK_BAR, + EQN_TOK_TILDE, + EQN_TOK_HAT, + EQN_TOK_DOT, + EQN_TOK_DOTDOT, + EQN_TOK_FWD, + EQN_TOK_BACK, + EQN_TOK_DOWN, + EQN_TOK_UP, + EQN_TOK_FAT, + EQN_TOK_ROMAN, + EQN_TOK_ITALIC, + EQN_TOK_BOLD, + EQN_TOK_SIZE, + EQN_TOK_SUB, + EQN_TOK_SUP, + EQN_TOK_SQRT, + EQN_TOK_OVER, + EQN_TOK_FROM, + EQN_TOK_TO, + EQN_TOK_BRACE_OPEN, + EQN_TOK_BRACE_CLOSE, + EQN_TOK_GSIZE, + EQN_TOK_GFONT, + EQN_TOK_MARK, + EQN_TOK_LINEUP, + EQN_TOK_LEFT, + EQN_TOK_RIGHT, + EQN_TOK_PILE, + EQN_TOK_LPILE, + EQN_TOK_RPILE, + EQN_TOK_CPILE, + EQN_TOK_MATRIX, + EQN_TOK_CCOL, + EQN_TOK_LCOL, + EQN_TOK_RCOL, + EQN_TOK_DELIM, + EQN_TOK_DEFINE, + EQN_TOK_TDEFINE, + EQN_TOK_NDEFINE, + EQN_TOK_UNDEF, + EQN_TOK_ABOVE, + EQN_TOK__MAX, + EQN_TOK_FUNC, + EQN_TOK_QUOTED, + EQN_TOK_SYM, + EQN_TOK_EOF +}; + +static const char *eqn_toks[EQN_TOK__MAX] = { + "dyad", /* EQN_TOK_DYAD */ + "vec", /* EQN_TOK_VEC */ + "under", /* EQN_TOK_UNDER */ + "bar", /* EQN_TOK_BAR */ + "tilde", /* EQN_TOK_TILDE */ + "hat", /* EQN_TOK_HAT */ + "dot", /* EQN_TOK_DOT */ + "dotdot", /* EQN_TOK_DOTDOT */ + "fwd", /* EQN_TOK_FWD * */ + "back", /* EQN_TOK_BACK */ + "down", /* EQN_TOK_DOWN */ + "up", /* EQN_TOK_UP */ + "fat", /* EQN_TOK_FAT */ + "roman", /* EQN_TOK_ROMAN */ + "italic", /* EQN_TOK_ITALIC */ + "bold", /* EQN_TOK_BOLD */ + "size", /* EQN_TOK_SIZE */ + "sub", /* EQN_TOK_SUB */ + "sup", /* EQN_TOK_SUP */ + "sqrt", /* EQN_TOK_SQRT */ + "over", /* EQN_TOK_OVER */ + "from", /* EQN_TOK_FROM */ + "to", /* EQN_TOK_TO */ + "{", /* EQN_TOK_BRACE_OPEN */ + "}", /* EQN_TOK_BRACE_CLOSE */ + "gsize", /* EQN_TOK_GSIZE */ + "gfont", /* EQN_TOK_GFONT */ + "mark", /* EQN_TOK_MARK */ + "lineup", /* EQN_TOK_LINEUP */ + "left", /* EQN_TOK_LEFT */ + "right", /* EQN_TOK_RIGHT */ + "pile", /* EQN_TOK_PILE */ + "lpile", /* EQN_TOK_LPILE */ + "rpile", /* EQN_TOK_RPILE */ + "cpile", /* EQN_TOK_CPILE */ + "matrix", /* EQN_TOK_MATRIX */ + "ccol", /* EQN_TOK_CCOL */ + "lcol", /* EQN_TOK_LCOL */ + "rcol", /* EQN_TOK_RCOL */ + "delim", /* EQN_TOK_DELIM */ + "define", /* EQN_TOK_DEFINE */ + "tdefine", /* EQN_TOK_TDEFINE */ + "ndefine", /* EQN_TOK_NDEFINE */ + "undef", /* EQN_TOK_UNDEF */ + "above", /* EQN_TOK_ABOVE */ +}; + +static const char *const eqn_func[] = { + "acos", "acsc", "and", "arc", "asec", "asin", "atan", + "cos", "cosh", "coth", "csc", "det", "exp", "for", + "if", "lim", "ln", "log", "max", "min", + "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", +}; + +enum eqn_symt { + EQNSYM_alpha = 0, + EQNSYM_beta, + EQNSYM_chi, + EQNSYM_delta, + EQNSYM_epsilon, + EQNSYM_eta, + EQNSYM_gamma, + EQNSYM_iota, + EQNSYM_kappa, + EQNSYM_lambda, + EQNSYM_mu, + EQNSYM_nu, + EQNSYM_omega, + EQNSYM_omicron, + EQNSYM_phi, + EQNSYM_pi, + EQNSYM_ps, + EQNSYM_rho, + EQNSYM_sigma, + EQNSYM_tau, + EQNSYM_theta, + EQNSYM_upsilon, + EQNSYM_xi, + EQNSYM_zeta, + EQNSYM_DELTA, + EQNSYM_GAMMA, + EQNSYM_LAMBDA, + EQNSYM_OMEGA, + EQNSYM_PHI, + EQNSYM_PI, + EQNSYM_PSI, + EQNSYM_SIGMA, + EQNSYM_THETA, + EQNSYM_UPSILON, + EQNSYM_XI, + EQNSYM_inter, + EQNSYM_union, + EQNSYM_prod, + EQNSYM_int, + EQNSYM_sum, + EQNSYM_grad, + EQNSYM_del, + EQNSYM_times, + EQNSYM_cdot, + EQNSYM_nothing, + EQNSYM_approx, + EQNSYM_prime, + EQNSYM_half, + EQNSYM_partial, + EQNSYM_inf, + EQNSYM_muchgreat, + EQNSYM_muchless, + EQNSYM_larrow, + EQNSYM_rarrow, + EQNSYM_pm, + EQNSYM_nequal, + EQNSYM_equiv, + EQNSYM_lessequal, + EQNSYM_moreequal, + EQNSYM_minus, + EQNSYM__MAX +}; + +struct eqnsym { + const char *str; + const char *sym; +}; + +static const struct eqnsym eqnsyms[EQNSYM__MAX] = { + { "alpha", "*a" }, /* EQNSYM_alpha */ + { "beta", "*b" }, /* EQNSYM_beta */ + { "chi", "*x" }, /* EQNSYM_chi */ + { "delta", "*d" }, /* EQNSYM_delta */ + { "epsilon", "*e" }, /* EQNSYM_epsilon */ + { "eta", "*y" }, /* EQNSYM_eta */ + { "gamma", "*g" }, /* EQNSYM_gamma */ + { "iota", "*i" }, /* EQNSYM_iota */ + { "kappa", "*k" }, /* EQNSYM_kappa */ + { "lambda", "*l" }, /* EQNSYM_lambda */ + { "mu", "*m" }, /* EQNSYM_mu */ + { "nu", "*n" }, /* EQNSYM_nu */ + { "omega", "*w" }, /* EQNSYM_omega */ + { "omicron", "*o" }, /* EQNSYM_omicron */ + { "phi", "*f" }, /* EQNSYM_phi */ + { "pi", "*p" }, /* EQNSYM_pi */ + { "psi", "*q" }, /* EQNSYM_psi */ + { "rho", "*r" }, /* EQNSYM_rho */ + { "sigma", "*s" }, /* EQNSYM_sigma */ + { "tau", "*t" }, /* EQNSYM_tau */ + { "theta", "*h" }, /* EQNSYM_theta */ + { "upsilon", "*u" }, /* EQNSYM_upsilon */ + { "xi", "*c" }, /* EQNSYM_xi */ + { "zeta", "*z" }, /* EQNSYM_zeta */ + { "DELTA", "*D" }, /* EQNSYM_DELTA */ + { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ + { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ + { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ + { "PHI", "*F" }, /* EQNSYM_PHI */ + { "PI", "*P" }, /* EQNSYM_PI */ + { "PSI", "*Q" }, /* EQNSYM_PSI */ + { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ + { "THETA", "*H" }, /* EQNSYM_THETA */ + { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ + { "XI", "*C" }, /* EQNSYM_XI */ + { "inter", "ca" }, /* EQNSYM_inter */ + { "union", "cu" }, /* EQNSYM_union */ + { "prod", "product" }, /* EQNSYM_prod */ + { "int", "integral" }, /* EQNSYM_int */ + { "sum", "sum" }, /* EQNSYM_sum */ + { "grad", "gr" }, /* EQNSYM_grad */ + { "del", "gr" }, /* EQNSYM_del */ + { "times", "mu" }, /* EQNSYM_times */ + { "cdot", "pc" }, /* EQNSYM_cdot */ + { "nothing", "&" }, /* EQNSYM_nothing */ + { "approx", "~~" }, /* EQNSYM_approx */ + { "prime", "fm" }, /* EQNSYM_prime */ + { "half", "12" }, /* EQNSYM_half */ + { "partial", "pd" }, /* EQNSYM_partial */ + { "inf", "if" }, /* EQNSYM_inf */ + { ">>", ">>" }, /* EQNSYM_muchgreat */ + { "<<", "<<" }, /* EQNSYM_muchless */ + { "<-", "<-" }, /* EQNSYM_larrow */ + { "->", "->" }, /* EQNSYM_rarrow */ + { "+-", "+-" }, /* EQNSYM_pm */ + { "!=", "!=" }, /* EQNSYM_nequal */ + { "==", "==" }, /* EQNSYM_equiv */ + { "<=", "<=" }, /* EQNSYM_lessequal */ + { ">=", ">=" }, /* EQNSYM_moreequal */ + { "-", "mi" }, /* EQNSYM_minus */ +}; + +enum parse_mode { + MODE_QUOTED, + MODE_NOSUB, + MODE_SUB, + MODE_TOK +}; + +struct eqn_def { + char *key; + size_t keysz; + char *val; + size_t valsz; +}; + +static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); +static struct eqn_box *eqn_box_makebinary(struct eqn_node *, + struct eqn_box *); +static void eqn_def(struct eqn_node *); +static struct eqn_def *eqn_def_find(struct eqn_node *); +static void eqn_delim(struct eqn_node *); +static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); +static void eqn_undef(struct eqn_node *); + + +struct eqn_node * +eqn_alloc(void) +{ + struct eqn_node *ep; + + ep = mandoc_calloc(1, sizeof(*ep)); + ep->gsize = EQN_DEFSIZE; + return ep; +} + +void +eqn_reset(struct eqn_node *ep) +{ + free(ep->data); + ep->data = ep->start = ep->end = NULL; + ep->sz = ep->toksz = 0; +} + +void +eqn_read(struct eqn_node *ep, const char *p) +{ + char *cp; + + if (ep->data == NULL) { + ep->sz = strlen(p); + ep->data = mandoc_strdup(p); + } else { + ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); + free(ep->data); + ep->data = cp; + } + ep->sz += 1; +} + +/* + * Find the key "key" of the give size within our eqn-defined values. + */ +static struct eqn_def * +eqn_def_find(struct eqn_node *ep) +{ + int i; + + for (i = 0; i < (int)ep->defsz; i++) + if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, + ep->defs[i].keysz, ep->start, ep->toksz)) + return &ep->defs[i]; + + return NULL; +} + +/* + * Parse a token from the input text. The modes are: + * MODE_QUOTED: Use *ep->start as the delimiter; the token ends + * before its next occurence. Do not interpret the token in any + * way and return EQN_TOK_QUOTED. All other modes behave like + * MODE_QUOTED when *ep->start is '"'. + * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; + * otherwise, it ends before the next whitespace or brace. + * Do not interpret the token and return EQN_TOK__MAX. + * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an + * alias created with define. If it is an alias, replace it with + * its string value and reparse. + * MODE_TOK: Like MODE_SUB, but also check the token against the list + * of tokens, and if there is a match, return that token. Otherwise, + * if the token matches a symbol, return EQN_TOK_SYM; if it matches + * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for + * a token match, *ep->start is set to an allocated string that the + * caller is expected to free. + * All modes skip whitespace following the end of the token. + */ +static enum eqn_tok +eqn_next(struct eqn_node *ep, enum parse_mode mode) +{ + static int last_len, lim; + + struct eqn_def *def; + size_t start; + int diff, i, quoted; + enum eqn_tok tok; + + /* + * Reset the recursion counter after advancing + * beyond the end of the previous substitution. + */ + if (ep->end - ep->data >= last_len) + lim = 0; + + ep->start = ep->end; + quoted = mode == MODE_QUOTED; + for (;;) { + switch (*ep->start) { + case '\0': + ep->toksz = 0; + return EQN_TOK_EOF; + case '"': + quoted = 1; + break; + case ' ': + case '\t': + case '~': + case '^': + if (quoted) + break; + ep->start++; + continue; + default: + break; + } + if (quoted) { + ep->end = strchr(ep->start + 1, *ep->start); + ep->start++; /* Skip opening quote. */ + if (ep->end == NULL) { + mandoc_msg(MANDOCERR_ARG_QUOTE, + ep->node->line, ep->node->pos, NULL); + ep->end = strchr(ep->start, '\0'); + } + } else { + ep->end = ep->start + 1; + if (*ep->start != '{' && *ep->start != '}') + ep->end += strcspn(ep->end, " ^~\"{}\t"); + } + ep->toksz = ep->end - ep->start; + if (quoted && *ep->end != '\0') + ep->end++; /* Skip closing quote. */ + while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) + ep->end++; + if (quoted) /* Cannot return, may have to strndup. */ + break; + if (mode == MODE_NOSUB) + return EQN_TOK__MAX; + if ((def = eqn_def_find(ep)) == NULL) + break; + if (++lim > EQN_NEST_MAX) { + mandoc_msg(MANDOCERR_ROFFLOOP, + ep->node->line, ep->node->pos, NULL); + return EQN_TOK_EOF; + } + + /* Replace a defined name with its string value. */ + if ((diff = def->valsz - ep->toksz) > 0) { + start = ep->start - ep->data; + ep->sz += diff; + ep->data = mandoc_realloc(ep->data, ep->sz + 1); + ep->start = ep->data + start; + } + if (diff) + memmove(ep->start + def->valsz, ep->start + ep->toksz, + strlen(ep->start + ep->toksz) + 1); + memcpy(ep->start, def->val, def->valsz); + last_len = ep->start - ep->data + def->valsz; + } + if (mode != MODE_TOK) + return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; + if (quoted) { + ep->start = mandoc_strndup(ep->start, ep->toksz); + return EQN_TOK_QUOTED; + } + for (tok = 0; tok < EQN_TOK__MAX; tok++) + if (STRNEQ(ep->start, ep->toksz, + eqn_toks[tok], strlen(eqn_toks[tok]))) + return tok; + + for (i = 0; i < EQNSYM__MAX; i++) { + if (STRNEQ(ep->start, ep->toksz, + eqnsyms[i].str, strlen(eqnsyms[i].str))) { + mandoc_asprintf(&ep->start, + "\\[%s]", eqnsyms[i].sym); + return EQN_TOK_SYM; + } + } + ep->start = mandoc_strndup(ep->start, ep->toksz); + for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) + if (STRNEQ(ep->start, ep->toksz, + eqn_func[i], strlen(eqn_func[i]))) + return EQN_TOK_FUNC; + return EQN_TOK__MAX; +} + +void +eqn_box_free(struct eqn_box *bp) +{ + if (bp == NULL) + return; + + if (bp->first) + eqn_box_free(bp->first); + if (bp->next) + eqn_box_free(bp->next); + + free(bp->text); + free(bp->left); + free(bp->right); + free(bp->top); + free(bp->bottom); + free(bp); +} + +struct eqn_box * +eqn_box_new(void) +{ + struct eqn_box *bp; + + bp = mandoc_calloc(1, sizeof(*bp)); + bp->expectargs = UINT_MAX; + return bp; +} + +/* + * Allocate a box as the last child of the parent node. + */ +static struct eqn_box * +eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) +{ + struct eqn_box *bp; + + bp = eqn_box_new(); + bp->parent = parent; + bp->parent->args++; + bp->font = bp->parent->font; + bp->size = ep->gsize; + + if (NULL != parent->first) { + parent->last->next = bp; + bp->prev = parent->last; + } else + parent->first = bp; + + parent->last = bp; + return bp; +} + +/* + * Reparent the current last node (of the current parent) under a new + * EQN_SUBEXPR as the first element. + * Then return the new parent. + * The new EQN_SUBEXPR will have a two-child limit. + */ +static struct eqn_box * +eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) +{ + struct eqn_box *b, *newb; + + assert(NULL != parent->last); + b = parent->last; + if (parent->last == parent->first) + parent->first = NULL; + parent->args--; + parent->last = b->prev; + b->prev = NULL; + newb = eqn_box_alloc(ep, parent); + newb->type = EQN_SUBEXPR; + newb->expectargs = 2; + newb->args = 1; + newb->first = newb->last = b; + newb->first->next = NULL; + b->parent = newb; + return newb; +} + +/* + * Parse the "delim" control statement. + */ +static void +eqn_delim(struct eqn_node *ep) +{ + if (ep->end[0] == '\0' || ep->end[1] == '\0') { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->node->line, ep->node->pos, "delim"); + if (ep->end[0] != '\0') + ep->end++; + } else if (strncmp(ep->end, "off", 3) == 0) { + ep->delim = 0; + ep->end += 3; + } else if (strncmp(ep->end, "on", 2) == 0) { + if (ep->odelim && ep->cdelim) + ep->delim = 1; + ep->end += 2; + } else { + ep->odelim = *ep->end++; + ep->cdelim = *ep->end++; + ep->delim = 1; + } +} + +/* + * Undefine a previously-defined string. + */ +static void +eqn_undef(struct eqn_node *ep) +{ + struct eqn_def *def; + + if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->node->line, ep->node->pos, "undef"); + return; + } + if ((def = eqn_def_find(ep)) == NULL) + return; + free(def->key); + free(def->val); + def->key = def->val = NULL; + def->keysz = def->valsz = 0; +} + +static void +eqn_def(struct eqn_node *ep) +{ + struct eqn_def *def; + int i; + + if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->node->line, ep->node->pos, "define"); + return; + } + + /* + * Search for a key that already exists. + * Create a new key if none is found. + */ + if ((def = eqn_def_find(ep)) == NULL) { + /* Find holes in string array. */ + for (i = 0; i < (int)ep->defsz; i++) + if (0 == ep->defs[i].keysz) + break; + + if (i == (int)ep->defsz) { + ep->defsz++; + ep->defs = mandoc_reallocarray(ep->defs, + ep->defsz, sizeof(struct eqn_def)); + ep->defs[i].key = ep->defs[i].val = NULL; + } + + def = ep->defs + i; + free(def->key); + def->key = mandoc_strndup(ep->start, ep->toksz); + def->keysz = ep->toksz; + } + + if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->node->line, ep->node->pos, "define %s", def->key); + free(def->key); + free(def->val); + def->key = def->val = NULL; + def->keysz = def->valsz = 0; + return; + } + free(def->val); + def->val = mandoc_strndup(ep->start, ep->toksz); + def->valsz = ep->toksz; +} + +void +eqn_parse(struct eqn_node *ep) +{ + struct eqn_box *cur, *nbox, *parent, *split; + const char *cp, *cpn; + char *p; + enum eqn_tok tok; + enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; + int size; + + parent = ep->node->eqn; + assert(parent != NULL); + + /* + * Empty equation. + * Do not add it to the high-level syntax tree. + */ + + if (ep->data == NULL) + return; + + ep->start = ep->end = ep->data; + +next_tok: + tok = eqn_next(ep, MODE_TOK); + switch (tok) { + case EQN_TOK_UNDEF: + eqn_undef(ep); + break; + case EQN_TOK_NDEFINE: + case EQN_TOK_DEFINE: + eqn_def(ep); + break; + case EQN_TOK_TDEFINE: + if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || + eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->node->line, ep->node->pos, "tdefine"); + break; + case EQN_TOK_DELIM: + eqn_delim(ep); + break; + case EQN_TOK_GFONT: + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + case EQN_TOK_MARK: + case EQN_TOK_LINEUP: + /* Ignore these. */ + break; + case EQN_TOK_DYAD: + case EQN_TOK_VEC: + case EQN_TOK_UNDER: + case EQN_TOK_BAR: + case EQN_TOK_TILDE: + case EQN_TOK_HAT: + case EQN_TOK_DOT: + case EQN_TOK_DOTDOT: + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); + } + parent = eqn_box_makebinary(ep, parent); + parent->type = EQN_LIST; + parent->expectargs = 1; + parent->font = EQNFONT_ROMAN; + switch (tok) { + case EQN_TOK_DOTDOT: + parent->top = mandoc_strdup("\\[ad]"); + break; + case EQN_TOK_VEC: + parent->top = mandoc_strdup("\\[->]"); + break; + case EQN_TOK_DYAD: + parent->top = mandoc_strdup("\\[<>]"); + break; + case EQN_TOK_TILDE: + parent->top = mandoc_strdup("\\[a~]"); + break; + case EQN_TOK_UNDER: + parent->bottom = mandoc_strdup("\\[ul]"); + break; + case EQN_TOK_BAR: + parent->top = mandoc_strdup("\\[rn]"); + break; + case EQN_TOK_DOT: + parent->top = mandoc_strdup("\\[a.]"); + break; + case EQN_TOK_HAT: + parent->top = mandoc_strdup("\\[ha]"); + break; + default: + abort(); + } + parent = parent->parent; + break; + case EQN_TOK_FWD: + case EQN_TOK_BACK: + case EQN_TOK_DOWN: + case EQN_TOK_UP: + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + case EQN_TOK_FAT: + case EQN_TOK_ROMAN: + case EQN_TOK_ITALIC: + case EQN_TOK_BOLD: + while (parent->args == parent->expectargs) + parent = parent->parent; + /* + * These values apply to the next word or sequence of + * words; thus, we mark that we'll have a child with + * exactly one of those. + */ + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + parent->expectargs = 1; + switch (tok) { + case EQN_TOK_FAT: + parent->font = EQNFONT_FAT; + break; + case EQN_TOK_ROMAN: + parent->font = EQNFONT_ROMAN; + break; + case EQN_TOK_ITALIC: + parent->font = EQNFONT_ITALIC; + break; + case EQN_TOK_BOLD: + parent->font = EQNFONT_BOLD; + break; + default: + abort(); + } + break; + case EQN_TOK_SIZE: + case EQN_TOK_GSIZE: + /* Accept two values: integral size and a single. */ + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + } + size = mandoc_strntoi(ep->start, ep->toksz, 10); + if (-1 == size) { + mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + } + if (EQN_TOK_GSIZE == tok) { + ep->gsize = size; + break; + } + while (parent->args == parent->expectargs) + parent = parent->parent; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + parent->expectargs = 1; + parent->size = size; + break; + case EQN_TOK_FROM: + case EQN_TOK_TO: + case EQN_TOK_SUB: + case EQN_TOK_SUP: + /* + * We have a left-right-associative expression. + * Repivot under a positional node, open a child scope + * and keep on reading. + */ + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); + } + while (parent->expectargs == 1 && parent->args == 1) + parent = parent->parent; + if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->pos == EQNPOS_SUB || + cur->pos == EQNPOS_SUP || + cur->pos == EQNPOS_SUBSUP || + cur->pos == EQNPOS_SQRT || + cur->pos == EQNPOS_OVER) + break; + if (cur != NULL) + parent = cur->parent; + } + if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { + parent->expectargs = 3; + parent->pos = EQNPOS_SUBSUP; + break; + } + if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { + parent->expectargs = 3; + parent->pos = EQNPOS_FROMTO; + break; + } + parent = eqn_box_makebinary(ep, parent); + switch (tok) { + case EQN_TOK_FROM: + parent->pos = EQNPOS_FROM; + break; + case EQN_TOK_TO: + parent->pos = EQNPOS_TO; + break; + case EQN_TOK_SUP: + parent->pos = EQNPOS_SUP; + break; + case EQN_TOK_SUB: + parent->pos = EQNPOS_SUB; + break; + default: + abort(); + } + break; + case EQN_TOK_SQRT: + while (parent->args == parent->expectargs) + parent = parent->parent; + /* + * Accept a left-right-associative set of arguments just + * like sub and sup and friends but without rebalancing + * under a pivot. + */ + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_SUBEXPR; + parent->pos = EQNPOS_SQRT; + parent->expectargs = 1; + break; + case EQN_TOK_OVER: + /* + * We have a right-left-associative fraction. + * Close out anything that's currently open, then + * rebalance and continue reading. + */ + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); + } + while (parent->args == parent->expectargs) + parent = parent->parent; + while (EQN_SUBEXPR == parent->type) + parent = parent->parent; + parent = eqn_box_makebinary(ep, parent); + parent->pos = EQNPOS_OVER; + break; + case EQN_TOK_RIGHT: + case EQN_TOK_BRACE_CLOSE: + /* + * Close out the existing brace. + * FIXME: this is a shitty sentinel: we should really + * have a native EQN_BRACE type or whatnot. + */ + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_LIST && + cur->expectargs > 1 && + (tok == EQN_TOK_BRACE_CLOSE || + cur->left != NULL)) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + } + parent = cur; + if (EQN_TOK_RIGHT == tok) { + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->node->line, ep->node->pos, + "%s", eqn_toks[tok]); + break; + } + /* Handling depends on right/left. */ + if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) + parent->right = mandoc_strdup("\\[rc]"); + else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) + parent->right = mandoc_strdup("\\[rf]"); + else + parent->right = + mandoc_strndup(ep->start, ep->toksz); + } + parent = parent->parent; + if (tok == EQN_TOK_BRACE_CLOSE && + (parent->type == EQN_PILE || + parent->type == EQN_MATRIX)) + parent = parent->parent; + /* Close out any "singleton" lists. */ + while (parent->type == EQN_LIST && + parent->expectargs == 1 && + parent->args == 1) + parent = parent->parent; + break; + case EQN_TOK_BRACE_OPEN: + case EQN_TOK_LEFT: + /* + * If we already have something in the stack and we're + * in an expression, then rewind til we're not any more + * (just like with the text node). + */ + while (parent->args == parent->expectargs) + parent = parent->parent; + if (EQN_TOK_LEFT == tok && + eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + if (EQN_TOK_LEFT == tok) { + if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) + parent->left = mandoc_strdup("\\[lc]"); + else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) + parent->left = mandoc_strdup("\\[lf]"); + else + parent->left = + mandoc_strndup(ep->start, ep->toksz); + } + break; + case EQN_TOK_PILE: + case EQN_TOK_LPILE: + case EQN_TOK_RPILE: + case EQN_TOK_CPILE: + case EQN_TOK_CCOL: + case EQN_TOK_LCOL: + case EQN_TOK_RCOL: + while (parent->args == parent->expectargs) + parent = parent->parent; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_PILE; + parent->expectargs = 1; + break; + case EQN_TOK_ABOVE: + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_PILE) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, + ep->node->pos, "%s", eqn_toks[tok]); + break; + } + parent = eqn_box_alloc(ep, cur); + parent->type = EQN_LIST; + break; + case EQN_TOK_MATRIX: + while (parent->args == parent->expectargs) + parent = parent->parent; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_MATRIX; + parent->expectargs = 1; + break; + case EQN_TOK_EOF: + return; + case EQN_TOK__MAX: + case EQN_TOK_FUNC: + case EQN_TOK_QUOTED: + case EQN_TOK_SYM: + p = ep->start; + assert(p != NULL); + /* + * If we already have something in the stack and we're + * in an expression, then rewind til we're not any more. + */ + while (parent->args == parent->expectargs) + parent = parent->parent; + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = p; + switch (tok) { + case EQN_TOK_FUNC: + cur->font = EQNFONT_ROMAN; + break; + case EQN_TOK_QUOTED: + if (cur->font == EQNFONT_NONE) + cur->font = EQNFONT_ITALIC; + break; + case EQN_TOK_SYM: + break; + default: + if (cur->font != EQNFONT_NONE || *p == '\0') + break; + cpn = p - 1; + ccln = CCL_LET; + split = NULL; + for (;;) { + /* Advance to next character. */ + cp = cpn++; + ccl = ccln; + ccln = isalpha((unsigned char)*cpn) ? CCL_LET : + isdigit((unsigned char)*cpn) || + (*cpn == '.' && (ccl == CCL_DIG || + isdigit((unsigned char)cpn[1]))) ? + CCL_DIG : CCL_PUN; + /* No boundary before first character. */ + if (cp < p) + continue; + cur->font = ccl == CCL_LET ? + EQNFONT_ITALIC : EQNFONT_ROMAN; + if (*cp == '\\') + mandoc_escape(&cpn, NULL, NULL); + /* No boundary after last character. */ + if (*cpn == '\0') + break; + if (ccln == ccl && *cp != ',' && *cpn != ',') + continue; + /* Boundary found, split the text. */ + if (parent->args == parent->expectargs) { + /* Remove the text from the tree. */ + if (cur->prev == NULL) + parent->first = cur->next; + else + cur->prev->next = NULL; + parent->last = cur->prev; + parent->args--; + /* Set up a list instead. */ + split = eqn_box_alloc(ep, parent); + split->type = EQN_LIST; + /* Insert the word into the list. */ + split->first = split->last = cur; + cur->parent = split; + cur->prev = NULL; + parent = split; + } + /* Append a new text box. */ + nbox = eqn_box_alloc(ep, parent); + nbox->type = EQN_TEXT; + nbox->text = mandoc_strdup(cpn); + /* Truncate the old box. */ + p = mandoc_strndup(cur->text, + cpn - cur->text); + free(cur->text); + cur->text = p; + /* Setup to process the new box. */ + cur = nbox; + p = nbox->text; + cpn = p - 1; + ccln = CCL_LET; + } + if (split != NULL) + parent = split->parent; + break; + } + break; + default: + abort(); + } + goto next_tok; +} + +void +eqn_free(struct eqn_node *p) +{ + int i; + + if (p == NULL) + return; + + for (i = 0; i < (int)p->defsz; i++) { + free(p->defs[i].key); + free(p->defs[i].val); + } + + free(p->data); + free(p->defs); + free(p); +} diff --git a/usr.bin/mandoc/eqn.h b/usr.bin/mandoc/eqn.h new file mode 100644 index 0000000..0eff4a4 --- /dev/null +++ b/usr.bin/mandoc/eqn.h @@ -0,0 +1,72 @@ +/* $OpenBSD: eqn.h,v 1.1 2018/12/13 05:13:15 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Public data types for eqn(7) syntax trees. + */ + +enum eqn_boxt { + EQN_TEXT, /* Text, e.g. number, variable, operator, ... */ + EQN_SUBEXPR, /* Nested eqn(7) subexpression. */ + EQN_LIST, /* List, for example in braces. */ + EQN_PILE, /* Vertical pile. */ + EQN_MATRIX /* List of columns. */ +}; + +enum eqn_fontt { + EQNFONT_NONE = 0, + EQNFONT_ROMAN, + EQNFONT_BOLD, + EQNFONT_FAT, + EQNFONT_ITALIC, + EQNFONT__MAX +}; + +enum eqn_post { + EQNPOS_NONE = 0, + EQNPOS_SUP, + EQNPOS_SUBSUP, + EQNPOS_SUB, + EQNPOS_TO, + EQNPOS_FROM, + EQNPOS_FROMTO, + EQNPOS_OVER, + EQNPOS_SQRT, + EQNPOS__MAX +}; + + /* + * A "box" is a parsed mathematical expression as defined by the eqn.7 + * grammar. + */ +struct eqn_box { + struct eqn_box *parent; + struct eqn_box *prev; + struct eqn_box *next; + struct eqn_box *first; /* First child node. */ + struct eqn_box *last; /* Last child node. */ + char *text; /* Text (or NULL). */ + char *left; /* Left-hand fence. */ + char *right; /* Right-hand fence. */ + char *top; /* Symbol above. */ + char *bottom; /* Symbol below. */ + size_t expectargs; /* Maximal number of arguments. */ + size_t args; /* Actual number of arguments. */ + int size; /* Font size. */ +#define EQN_DEFSIZE INT_MIN + enum eqn_boxt type; /* Type of node. */ + enum eqn_fontt font; /* Font in this box. */ + enum eqn_post pos; /* Position of the next box. */ +}; diff --git a/usr.bin/mandoc/eqn_html.c b/usr.bin/mandoc/eqn_html.c new file mode 100644 index 0000000..049bbf9 --- /dev/null +++ b/usr.bin/mandoc/eqn_html.c @@ -0,0 +1,244 @@ +/* $OpenBSD: eqn_html.c,v 1.15 2019/03/17 18:20:07 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "eqn.h" +#include "out.h" +#include "html.h" + +static void +eqn_box(struct html *p, const struct eqn_box *bp) +{ + struct tag *post, *row, *cell, *t; + const struct eqn_box *child, *parent; + const char *cp; + size_t i, j, rows; + enum htmltag tag; + enum eqn_fontt font; + + if (NULL == bp) + return; + + post = NULL; + + /* + * Special handling for a matrix, which is presented to us in + * column order, but must be printed in row-order. + */ + if (EQN_MATRIX == bp->type) { + if (NULL == bp->first) + goto out; + if (bp->first->type != EQN_LIST || + bp->first->expectargs == 1) { + eqn_box(p, bp->first); + goto out; + } + if (NULL == (parent = bp->first->first)) + goto out; + /* Estimate the number of rows, first. */ + if (NULL == (child = parent->first)) + goto out; + for (rows = 0; NULL != child; rows++) + child = child->next; + /* Print row-by-row. */ + post = print_otag(p, TAG_MTABLE, ""); + for (i = 0; i < rows; i++) { + parent = bp->first->first; + row = print_otag(p, TAG_MTR, ""); + while (NULL != parent) { + child = parent->first; + for (j = 0; j < i; j++) { + if (NULL == child) + break; + child = child->next; + } + cell = print_otag(p, TAG_MTD, ""); + /* + * If we have no data for this + * particular cell, then print a + * placeholder and continue--don't puke. + */ + if (NULL != child) + eqn_box(p, child->first); + print_tagq(p, cell); + parent = parent->next; + } + print_tagq(p, row); + } + goto out; + } + + switch (bp->pos) { + case EQNPOS_TO: + post = print_otag(p, TAG_MOVER, ""); + break; + case EQNPOS_SUP: + post = print_otag(p, TAG_MSUP, ""); + break; + case EQNPOS_FROM: + post = print_otag(p, TAG_MUNDER, ""); + break; + case EQNPOS_SUB: + post = print_otag(p, TAG_MSUB, ""); + break; + case EQNPOS_OVER: + post = print_otag(p, TAG_MFRAC, ""); + break; + case EQNPOS_FROMTO: + post = print_otag(p, TAG_MUNDEROVER, ""); + break; + case EQNPOS_SUBSUP: + post = print_otag(p, TAG_MSUBSUP, ""); + break; + case EQNPOS_SQRT: + post = print_otag(p, TAG_MSQRT, ""); + break; + default: + break; + } + + if (bp->top || bp->bottom) { + assert(NULL == post); + if (bp->top && NULL == bp->bottom) + post = print_otag(p, TAG_MOVER, ""); + else if (bp->top && bp->bottom) + post = print_otag(p, TAG_MUNDEROVER, ""); + else if (bp->bottom) + post = print_otag(p, TAG_MUNDER, ""); + } + + if (EQN_PILE == bp->type) { + assert(NULL == post); + if (bp->first != NULL && + bp->first->type == EQN_LIST && + bp->first->expectargs > 1) + post = print_otag(p, TAG_MTABLE, ""); + } else if (bp->type == EQN_LIST && bp->expectargs > 1 && + bp->parent && bp->parent->type == EQN_PILE) { + assert(NULL == post); + post = print_otag(p, TAG_MTR, ""); + print_otag(p, TAG_MTD, ""); + } + + if (bp->text != NULL) { + assert(post == NULL); + tag = TAG_MI; + cp = bp->text; + if (isdigit((unsigned char)cp[0]) || + (cp[0] == '.' && isdigit((unsigned char)cp[1]))) { + tag = TAG_MN; + while (*++cp != '\0') { + if (*cp != '.' && + isdigit((unsigned char)*cp) == 0) { + tag = TAG_MI; + break; + } + } + } else if (*cp != '\0' && isalpha((unsigned char)*cp) == 0) { + tag = TAG_MO; + while (*cp != '\0') { + if (cp[0] == '\\' && cp[1] != '\0') { + cp++; + mandoc_escape(&cp, NULL, NULL); + } else if (isalnum((unsigned char)*cp)) { + tag = TAG_MI; + break; + } else + cp++; + } + } + font = bp->font; + if (bp->text[0] != '\0' && + (((tag == TAG_MN || tag == TAG_MO) && + font == EQNFONT_ROMAN) || + (tag == TAG_MI && font == (bp->text[1] == '\0' ? + EQNFONT_ITALIC : EQNFONT_ROMAN)))) + font = EQNFONT_NONE; + switch (font) { + case EQNFONT_NONE: + post = print_otag(p, tag, ""); + break; + case EQNFONT_ROMAN: + post = print_otag(p, tag, "?", "fontstyle", "normal"); + break; + case EQNFONT_BOLD: + case EQNFONT_FAT: + post = print_otag(p, tag, "?", "fontweight", "bold"); + break; + case EQNFONT_ITALIC: + post = print_otag(p, tag, "?", "fontstyle", "italic"); + break; + default: + abort(); + } + print_text(p, bp->text); + } else if (NULL == post) { + if (NULL != bp->left || NULL != bp->right) + post = print_otag(p, TAG_MFENCED, "??", + "open", bp->left == NULL ? "" : bp->left, + "close", bp->right == NULL ? "" : bp->right); + if (NULL == post) + post = print_otag(p, TAG_MROW, ""); + else + print_otag(p, TAG_MROW, ""); + } + + eqn_box(p, bp->first); + +out: + if (NULL != bp->bottom) { + t = print_otag(p, TAG_MO, ""); + print_text(p, bp->bottom); + print_tagq(p, t); + } + if (NULL != bp->top) { + t = print_otag(p, TAG_MO, ""); + print_text(p, bp->top); + print_tagq(p, t); + } + + if (NULL != post) + print_tagq(p, post); + + eqn_box(p, bp->next); +} + +void +print_eqn(struct html *p, const struct eqn_box *bp) +{ + struct tag *t; + + if (bp->first == NULL) + return; + + t = print_otag(p, TAG_MATH, "c", "eqn"); + + p->flags |= HTML_NONOSPACE; + eqn_box(p, bp); + p->flags &= ~HTML_NONOSPACE; + + print_tagq(p, t); +} diff --git a/usr.bin/mandoc/eqn_parse.h b/usr.bin/mandoc/eqn_parse.h new file mode 100644 index 0000000..0a8e619 --- /dev/null +++ b/usr.bin/mandoc/eqn_parse.h @@ -0,0 +1,48 @@ +/* $OpenBSD: eqn_parse.h,v 1.3 2018/12/14 06:33:03 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * External interface of the eqn(7) parser. + * For use in the roff(7) and eqn(7) parsers only. + */ + +struct roff_node; +struct eqn_box; +struct eqn_def; + +struct eqn_node { + struct roff_node *node; /* Syntax tree of this equation. */ + struct eqn_def *defs; /* Array of definitions. */ + char *data; /* Source code of this equation. */ + char *start; /* First byte of the current token. */ + char *end; /* First byte of the next token. */ + size_t defsz; /* Number of definitions. */ + size_t sz; /* Length of the source code. */ + size_t toksz; /* Length of the current token. */ + int gsize; /* Default point size. */ + int delim; /* In-line delimiters enabled. */ + char odelim; /* In-line opening delimiter. */ + char cdelim; /* In-line closing delimiter. */ +}; + + +struct eqn_node *eqn_alloc(void); +struct eqn_box *eqn_box_new(void); +void eqn_box_free(struct eqn_box *); +void eqn_free(struct eqn_node *); +void eqn_parse(struct eqn_node *); +void eqn_read(struct eqn_node *, const char *); +void eqn_reset(struct eqn_node *); diff --git a/usr.bin/mandoc/eqn_term.c b/usr.bin/mandoc/eqn_term.c new file mode 100644 index 0000000..7bb1aae --- /dev/null +++ b/usr.bin/mandoc/eqn_term.c @@ -0,0 +1,172 @@ +/* $OpenBSD: eqn_term.c,v 1.15 2018/12/13 05:13:15 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "eqn.h" +#include "out.h" +#include "term.h" + +static const enum termfont fontmap[EQNFONT__MAX] = { + TERMFONT_NONE, /* EQNFONT_NONE */ + TERMFONT_NONE, /* EQNFONT_ROMAN */ + TERMFONT_BOLD, /* EQNFONT_BOLD */ + TERMFONT_BOLD, /* EQNFONT_FAT */ + TERMFONT_UNDER /* EQNFONT_ITALIC */ +}; + +static void eqn_box(struct termp *, const struct eqn_box *); + + +void +term_eqn(struct termp *p, const struct eqn_box *bp) +{ + + eqn_box(p, bp); + p->flags &= ~TERMP_NOSPACE; +} + +static void +eqn_box(struct termp *p, const struct eqn_box *bp) +{ + const struct eqn_box *child; + const char *cp; + int delim; + + /* Delimiters around this box? */ + + if ((bp->type == EQN_LIST && bp->expectargs > 1) || + (bp->type == EQN_PILE && (bp->prev || bp->next)) || + (bp->parent != NULL && (bp->parent->pos == EQNPOS_SQRT || + /* Diacritic followed by ^ or _. */ + ((bp->top != NULL || bp->bottom != NULL) && + bp->parent->type == EQN_SUBEXPR && + bp->parent->pos != EQNPOS_OVER && bp->next != NULL) || + /* Nested over, sub, sup, from, to. */ + (bp->type == EQN_SUBEXPR && bp->pos != EQNPOS_SQRT && + ((bp->parent->type == EQN_LIST && bp->expectargs == 1) || + (bp->parent->type == EQN_SUBEXPR && + bp->pos != EQNPOS_SQRT)))))) { + if ((bp->parent->type == EQN_SUBEXPR && bp->prev != NULL) || + (bp->type == EQN_LIST && + bp->first != NULL && + bp->first->type != EQN_PILE && + bp->first->type != EQN_MATRIX && + bp->prev != NULL && + (bp->prev->type == EQN_LIST || + (bp->prev->type == EQN_TEXT && + (*bp->prev->text == '\\' || + isalpha((unsigned char)*bp->prev->text)))))) + p->flags |= TERMP_NOSPACE; + term_word(p, bp->left != NULL ? bp->left : "("); + p->flags |= TERMP_NOSPACE; + delim = 1; + } else + delim = 0; + + /* Handle Fonts and text. */ + + if (bp->font != EQNFONT_NONE) + term_fontpush(p, fontmap[(int)bp->font]); + + if (bp->text != NULL) { + if (strchr("!\"'),.:;?]}", *bp->text) != NULL) + p->flags |= TERMP_NOSPACE; + term_word(p, bp->text); + if ((cp = strchr(bp->text, '\0')) > bp->text && + (strchr("\"'([{", cp[-1]) != NULL || + (bp->prev == NULL && (cp[-1] == '-' || + (cp >= bp->text + 5 && + strcmp(cp - 5, "\\[mi]") == 0))))) + p->flags |= TERMP_NOSPACE; + } + + /* Special box types. */ + + if (bp->pos == EQNPOS_SQRT) { + term_word(p, "\\(sr"); + if (bp->first != NULL) { + p->flags |= TERMP_NOSPACE; + eqn_box(p, bp->first); + } + } else if (bp->type == EQN_SUBEXPR) { + child = bp->first; + eqn_box(p, child); + p->flags |= TERMP_NOSPACE; + term_word(p, bp->pos == EQNPOS_OVER ? "/" : + (bp->pos == EQNPOS_SUP || + bp->pos == EQNPOS_TO) ? "^" : "_"); + child = child->next; + if (child != NULL) { + p->flags |= TERMP_NOSPACE; + eqn_box(p, child); + if (bp->pos == EQNPOS_FROMTO || + bp->pos == EQNPOS_SUBSUP) { + p->flags |= TERMP_NOSPACE; + term_word(p, "^"); + p->flags |= TERMP_NOSPACE; + child = child->next; + if (child != NULL) + eqn_box(p, child); + } + } + } else { + child = bp->first; + if (bp->type == EQN_MATRIX && + child != NULL && + child->type == EQN_LIST && + child->expectargs > 1) + child = child->first; + while (child != NULL) { + eqn_box(p, + bp->type == EQN_PILE && + child->type == EQN_LIST && + child->expectargs > 1 && + child->args == 1 ? + child->first : child); + child = child->next; + } + } + + /* Handle Fonts and diacritics. */ + + if (bp->font != EQNFONT_NONE) + term_fontpop(p); + if (bp->top != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, bp->top); + } + if (bp->bottom != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, "_"); + } + + /* Right delimiter after this box? */ + + if (delim) { + p->flags |= TERMP_NOSPACE; + term_word(p, bp->right != NULL ? bp->right : ")"); + if (bp->parent->type == EQN_SUBEXPR && bp->next != NULL) + p->flags |= TERMP_NOSPACE; + } +} diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c new file mode 100644 index 0000000..0225e66 --- /dev/null +++ b/usr.bin/mandoc/html.c @@ -0,0 +1,1085 @@ +/* $OpenBSD: html.c,v 1.141 2020/04/20 12:59:24 schwarze Exp $ */ +/* + * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Common functions for mandoc(1) HTML formatters. + * For use by individual formatters and by the main program. + */ +#include <sys/cdefs.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mandoc.h" +#include "roff.h" +#include "out.h" +#include "html.h" +#include "manconf.h" +#include "main.h" + +struct htmldata { + const char *name; + int flags; +#define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */ +#define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */ +#define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */ +#define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */ +#define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */ +#define HTML_NLEND (1 << 5) /* Output line break before closing. */ +#define HTML_NLAFTER (1 << 6) /* Output line break after closing. */ +#define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER) +#define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND) +#define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE) +#define HTML_INDENT (1 << 7) /* Indent content by two spaces. */ +#define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */ +}; + +static const struct htmldata htmltags[TAG_MAX] = { + {"html", HTML_NLALL}, + {"head", HTML_NLALL | HTML_INDENT}, + {"meta", HTML_NOSTACK | HTML_NLALL}, + {"link", HTML_NOSTACK | HTML_NLALL}, + {"style", HTML_NLALL | HTML_INDENT}, + {"title", HTML_NLAROUND}, + {"body", HTML_NLALL}, + {"div", HTML_NLAROUND}, + {"section", HTML_NLALL}, + {"table", HTML_NLALL | HTML_INDENT}, + {"tr", HTML_NLALL | HTML_INDENT}, + {"td", HTML_NLAROUND}, + {"li", HTML_NLAROUND | HTML_INDENT}, + {"ul", HTML_NLALL | HTML_INDENT}, + {"ol", HTML_NLALL | HTML_INDENT}, + {"dl", HTML_NLALL | HTML_INDENT}, + {"dt", HTML_NLAROUND}, + {"dd", HTML_NLAROUND | HTML_INDENT}, + {"h1", HTML_TOPHRASE | HTML_NLAROUND}, + {"h2", HTML_TOPHRASE | HTML_NLAROUND}, + {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT}, + {"pre", HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT}, + {"a", HTML_INPHRASE | HTML_TOPHRASE}, + {"b", HTML_INPHRASE | HTML_TOPHRASE}, + {"cite", HTML_INPHRASE | HTML_TOPHRASE}, + {"code", HTML_INPHRASE | HTML_TOPHRASE}, + {"i", HTML_INPHRASE | HTML_TOPHRASE}, + {"small", HTML_INPHRASE | HTML_TOPHRASE}, + {"span", HTML_INPHRASE | HTML_TOPHRASE}, + {"var", HTML_INPHRASE | HTML_TOPHRASE}, + {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL}, + {"mark", HTML_INPHRASE }, + {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT}, + {"mrow", 0}, + {"mi", 0}, + {"mn", 0}, + {"mo", 0}, + {"msup", 0}, + {"msub", 0}, + {"msubsup", 0}, + {"mfrac", 0}, + {"msqrt", 0}, + {"mfenced", 0}, + {"mtable", 0}, + {"mtr", 0}, + {"mtd", 0}, + {"munderover", 0}, + {"munder", 0}, + {"mover", 0}, +}; + +/* Avoid duplicate HTML id= attributes. */ + +struct id_entry { + int ord; /* Ordinal number of the latest occurrence. */ + char id[]; /* The id= attribute without any ordinal suffix. */ +}; +static struct ohash id_unique; + +static void html_reset_internal(struct html *); +static void print_byte(struct html *, char); +static void print_endword(struct html *); +static void print_indent(struct html *); +static void print_word(struct html *, const char *); + +static void print_ctag(struct html *, struct tag *); +static int print_escape(struct html *, char); +static int print_encode(struct html *, const char *, const char *, int); +static void print_href(struct html *, const char *, const char *, int); +static void print_metaf(struct html *); + + +void * +html_alloc(const struct manoutput *outopts) +{ + struct html *h; + + h = mandoc_calloc(1, sizeof(struct html)); + + h->tag = NULL; + h->style = outopts->style; + if ((h->base_man1 = outopts->man) == NULL) + h->base_man2 = NULL; + else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL) + *h->base_man2++ = '\0'; + h->base_includes = outopts->includes; + if (outopts->fragment) + h->oflags |= HTML_FRAGMENT; + if (outopts->toc) + h->oflags |= HTML_TOC; + + mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); + + return h; +} + +static void +html_reset_internal(struct html *h) +{ + struct tag *tag; + struct id_entry *entry; + unsigned int slot; + + while ((tag = h->tag) != NULL) { + h->tag = tag->next; + free(tag); + } + entry = ohash_first(&id_unique, &slot); + while (entry != NULL) { + free(entry); + entry = ohash_next(&id_unique, &slot); + } + ohash_delete(&id_unique); +} + +void +html_reset(void *p) +{ + html_reset_internal(p); + mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id)); +} + +void +html_free(void *p) +{ + html_reset_internal(p); + free(p); +} + +void +print_gen_head(struct html *h) +{ + struct tag *t; + + print_otag(h, TAG_META, "?", "charset", "utf-8"); + if (h->style != NULL) { + print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet", + h->style, "type", "text/css", "media", "all"); + return; + } + + /* + * Print a minimal embedded style sheet. + */ + + t = print_otag(h, TAG_STYLE, ""); + print_text(h, "table.head, table.foot { width: 100%; }"); + print_endline(h); + print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }"); + print_endline(h); + print_text(h, "td.head-vol { text-align: center; }"); + print_endline(h); + print_text(h, ".Nd, .Bf, .Op { display: inline; }"); + print_endline(h); + print_text(h, ".Pa, .Ad { font-style: italic; }"); + print_endline(h); + print_text(h, ".Ms { font-weight: bold; }"); + print_endline(h); + print_text(h, ".Bl-diag "); + print_byte(h, '>'); + print_text(h, " dt { font-weight: bold; }"); + print_endline(h); + print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd " + "{ font-weight: bold; font-family: inherit; }"); + print_tagq(h, t); +} + +int +html_setfont(struct html *h, enum mandoc_esc font) +{ + switch (font) { + case ESCAPE_FONTPREV: + font = h->metal; + break; + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTBI: + case ESCAPE_FONTCW: + case ESCAPE_FONTROMAN: + break; + case ESCAPE_FONT: + font = ESCAPE_FONTROMAN; + break; + default: + return 0; + } + h->metal = h->metac; + h->metac = font; + return 1; +} + +static void +print_metaf(struct html *h) +{ + if (h->metaf) { + print_tagq(h, h->metaf); + h->metaf = NULL; + } + switch (h->metac) { + case ESCAPE_FONTITALIC: + h->metaf = print_otag(h, TAG_I, ""); + break; + case ESCAPE_FONTBOLD: + h->metaf = print_otag(h, TAG_B, ""); + break; + case ESCAPE_FONTBI: + h->metaf = print_otag(h, TAG_B, ""); + print_otag(h, TAG_I, ""); + break; + case ESCAPE_FONTCW: + h->metaf = print_otag(h, TAG_SPAN, "c", "Li"); + break; + default: + break; + } +} + +void +html_close_paragraph(struct html *h) +{ + struct tag *this, *next; + int flags; + + this = h->tag; + for (;;) { + next = this->next; + flags = htmltags[this->tag].flags; + if (flags & (HTML_INPHRASE | HTML_TOPHRASE)) + print_ctag(h, this); + if ((flags & HTML_INPHRASE) == 0) + break; + this = next; + } +} + +/* + * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode. + * TOKEN_NONE does not switch. The old mode is returned. + */ +enum roff_tok +html_fillmode(struct html *h, enum roff_tok want) +{ + struct tag *t; + enum roff_tok had; + + for (t = h->tag; t != NULL; t = t->next) + if (t->tag == TAG_PRE) + break; + + had = t == NULL ? ROFF_fi : ROFF_nf; + + if (want != had) { + switch (want) { + case ROFF_fi: + print_tagq(h, t); + break; + case ROFF_nf: + html_close_paragraph(h); + print_otag(h, TAG_PRE, ""); + break; + case TOKEN_NONE: + break; + default: + abort(); + } + } + return had; +} + +/* + * Allocate a string to be used for the "id=" attribute of an HTML + * element and/or as a segment identifier for a URI in an <a> element. + * The function may fail and return NULL if the node lacks text data + * to create the attribute from. + * The caller is responsible for free(3)ing the returned string. + * + * If the "unique" argument is non-zero, the "id_unique" ohash table + * is used for de-duplication. If the "unique" argument is 1, + * it is the first time the function is called for this tag and + * location, so if an ordinal suffix is needed, it is incremented. + * If the "unique" argument is 2, it is the second time the function + * is called for this tag and location, so the ordinal suffix + * remains unchanged. + */ +char * +html_make_id(const struct roff_node *n, int unique) +{ + const struct roff_node *nch; + struct id_entry *entry; + char *buf, *cp; + size_t len; + unsigned int slot; + + if (n->tag != NULL) + buf = mandoc_strdup(n->tag); + else { + switch (n->tok) { + case MDOC_Sh: + case MDOC_Ss: + case MDOC_Sx: + case MAN_SH: + case MAN_SS: + for (nch = n->child; nch != NULL; nch = nch->next) + if (nch->type != ROFFT_TEXT) + return NULL; + buf = NULL; + deroff(&buf, n); + if (buf == NULL) + return NULL; + break; + default: + if (n->child == NULL || n->child->type != ROFFT_TEXT) + return NULL; + buf = mandoc_strdup(n->child->string); + break; + } + } + + /* + * In ID attributes, only use ASCII characters that are + * permitted in URL-fragment strings according to the + * explicit list at: + * https://url.spec.whatwg.org/#url-fragment-string + * In addition, reserve '~' for ordinal suffixes. + */ + + for (cp = buf; *cp != '\0'; cp++) + if (isalnum((unsigned char)*cp) == 0 && + strchr("!$&'()*+,-./:;=?@_", *cp) == NULL) + *cp = '_'; + + if (unique == 0) + return buf; + + /* Avoid duplicate HTML id= attributes. */ + + slot = ohash_qlookup(&id_unique, buf); + if ((entry = ohash_find(&id_unique, slot)) == NULL) { + len = strlen(buf) + 1; + entry = mandoc_malloc(sizeof(*entry) + len); + entry->ord = 1; + memcpy(entry->id, buf, len); + ohash_insert(&id_unique, slot, entry); + } else if (unique == 1) + entry->ord++; + + if (entry->ord > 1) { + cp = buf; + mandoc_asprintf(&buf, "%s~%d", cp, entry->ord); + free(cp); + } + return buf; +} + +static int +print_escape(struct html *h, char c) +{ + + switch (c) { + case '<': + print_word(h, "<"); + break; + case '>': + print_word(h, ">"); + break; + case '&': + print_word(h, "&"); + break; + case '"': + print_word(h, """); + break; + case ASCII_NBRSP: + print_word(h, " "); + break; + case ASCII_HYPH: + print_byte(h, '-'); + break; + case ASCII_BREAK: + break; + default: + return 0; + } + return 1; +} + +static int +print_encode(struct html *h, const char *p, const char *pend, int norecurse) +{ + char numbuf[16]; + const char *seq; + size_t sz; + int c, len, breakline, nospace; + enum mandoc_esc esc; + static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', + ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; + + if (pend == NULL) + pend = strchr(p, '\0'); + + breakline = 0; + nospace = 0; + + while (p < pend) { + if (HTML_SKIPCHAR & h->flags && '\\' != *p) { + h->flags &= ~HTML_SKIPCHAR; + p++; + continue; + } + + for (sz = strcspn(p, rejs); sz-- && p < pend; p++) + print_byte(h, *p); + + if (breakline && + (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { + print_otag(h, TAG_BR, ""); + breakline = 0; + while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) + p++; + continue; + } + + if (p >= pend) + break; + + if (*p == ' ') { + print_endword(h); + p++; + continue; + } + + if (print_escape(h, *p++)) + continue; + + esc = mandoc_escape(&p, &seq, &len); + switch (esc) { + case ESCAPE_FONT: + case ESCAPE_FONTPREV: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBI: + case ESCAPE_FONTCW: + case ESCAPE_FONTROMAN: + if (0 == norecurse) { + h->flags |= HTML_NOSPACE; + if (html_setfont(h, esc)) + print_metaf(h); + h->flags &= ~HTML_NOSPACE; + } + continue; + case ESCAPE_SKIPCHAR: + h->flags |= HTML_SKIPCHAR; + continue; + case ESCAPE_ERROR: + continue; + default: + break; + } + + if (h->flags & HTML_SKIPCHAR) { + h->flags &= ~HTML_SKIPCHAR; + continue; + } + + switch (esc) { + case ESCAPE_UNICODE: + /* Skip past "u" header. */ + c = mchars_num2uc(seq + 1, len - 1); + break; + case ESCAPE_NUMBERED: + c = mchars_num2char(seq, len); + if (c < 0) + continue; + break; + case ESCAPE_SPECIAL: + c = mchars_spec2cp(seq, len); + if (c <= 0) + continue; + break; + case ESCAPE_UNDEF: + c = *seq; + break; + case ESCAPE_DEVICE: + print_word(h, "html"); + continue; + case ESCAPE_BREAK: + breakline = 1; + continue; + case ESCAPE_NOSPACE: + if ('\0' == *p) + nospace = 1; + continue; + case ESCAPE_OVERSTRIKE: + if (len == 0) + continue; + c = seq[len - 1]; + break; + default: + continue; + } + if ((c < 0x20 && c != 0x09) || + (c > 0x7E && c < 0xA0)) + c = 0xFFFD; + if (c > 0x7E) { + (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); + print_word(h, numbuf); + } else if (print_escape(h, c) == 0) + print_byte(h, c); + } + + return nospace; +} + +static void +print_href(struct html *h, const char *name, const char *sec, int man) +{ + struct stat sb; + const char *p, *pp; + char *filename; + + if (man) { + pp = h->base_man1; + if (h->base_man2 != NULL) { + mandoc_asprintf(&filename, "%s.%s", name, sec); + if (stat(filename, &sb) == -1) + pp = h->base_man2; + free(filename); + } + } else + pp = h->base_includes; + + while ((p = strchr(pp, '%')) != NULL) { + print_encode(h, pp, p, 1); + if (man && p[1] == 'S') { + if (sec == NULL) + print_byte(h, '1'); + else + print_encode(h, sec, NULL, 1); + } else if ((man && p[1] == 'N') || + (man == 0 && p[1] == 'I')) + print_encode(h, name, NULL, 1); + else + print_encode(h, p, p + 2, 1); + pp = p + 2; + } + if (*pp != '\0') + print_encode(h, pp, NULL, 1); +} + +struct tag * +print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) +{ + va_list ap; + struct tag *t; + const char *attr; + char *arg1, *arg2; + int style_written, tflags; + + tflags = htmltags[tag].flags; + + /* Flow content is not allowed in phrasing context. */ + + if ((tflags & HTML_INPHRASE) == 0) { + for (t = h->tag; t != NULL; t = t->next) { + if (t->closed) + continue; + assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0); + break; + } + + /* + * Always wrap phrasing elements in a paragraph + * unless already contained in some flow container; + * never put them directly into a section. + */ + + } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION) + print_otag(h, TAG_P, "c", "Pp"); + + /* Push this tag onto the stack of open scopes. */ + + if ((tflags & HTML_NOSTACK) == 0) { + t = mandoc_malloc(sizeof(struct tag)); + t->tag = tag; + t->next = h->tag; + t->refcnt = 0; + t->closed = 0; + h->tag = t; + } else + t = NULL; + + if (tflags & HTML_NLBEFORE) + print_endline(h); + if (h->col == 0) + print_indent(h); + else if ((h->flags & HTML_NOSPACE) == 0) { + if (h->flags & HTML_KEEP) + print_word(h, " "); + else { + if (h->flags & HTML_PREKEEP) + h->flags |= HTML_KEEP; + print_endword(h); + } + } + + if ( ! (h->flags & HTML_NONOSPACE)) + h->flags &= ~HTML_NOSPACE; + else + h->flags |= HTML_NOSPACE; + + /* Print out the tag name and attributes. */ + + print_byte(h, '<'); + print_word(h, htmltags[tag].name); + + va_start(ap, fmt); + + while (*fmt != '\0' && *fmt != 's') { + + /* Parse attributes and arguments. */ + + arg1 = va_arg(ap, char *); + arg2 = NULL; + switch (*fmt++) { + case 'c': + attr = "class"; + break; + case 'h': + attr = "href"; + break; + case 'i': + attr = "id"; + break; + case '?': + attr = arg1; + arg1 = va_arg(ap, char *); + break; + default: + abort(); + } + if (*fmt == 'M') + arg2 = va_arg(ap, char *); + if (arg1 == NULL) + continue; + + /* Print the attributes. */ + + print_byte(h, ' '); + print_word(h, attr); + print_byte(h, '='); + print_byte(h, '"'); + switch (*fmt) { + case 'I': + print_href(h, arg1, NULL, 0); + fmt++; + break; + case 'M': + print_href(h, arg1, arg2, 1); + fmt++; + break; + case 'R': + print_byte(h, '#'); + print_encode(h, arg1, NULL, 1); + fmt++; + break; + default: + print_encode(h, arg1, NULL, 1); + break; + } + print_byte(h, '"'); + } + + style_written = 0; + while (*fmt++ == 's') { + arg1 = va_arg(ap, char *); + arg2 = va_arg(ap, char *); + if (arg2 == NULL) + continue; + print_byte(h, ' '); + if (style_written == 0) { + print_word(h, "style=\""); + style_written = 1; + } + print_word(h, arg1); + print_byte(h, ':'); + print_byte(h, ' '); + print_word(h, arg2); + print_byte(h, ';'); + } + if (style_written) + print_byte(h, '"'); + + va_end(ap); + + /* Accommodate for "well-formed" singleton escaping. */ + + if (htmltags[tag].flags & HTML_NOSTACK) + print_byte(h, '/'); + + print_byte(h, '>'); + + if (tflags & HTML_NLBEGIN) + print_endline(h); + else + h->flags |= HTML_NOSPACE; + + if (tflags & HTML_INDENT) + h->indent++; + if (tflags & HTML_NOINDENT) + h->noindent++; + + return t; +} + +/* + * Print an element with an optional "id=" attribute. + * If the element has phrasing content and an "id=" attribute, + * also add a permalink: outside if it can be in phrasing context, + * inside otherwise. + */ +struct tag * +print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr, + struct roff_node *n) +{ + struct roff_node *nch; + struct tag *ret, *t; + char *id, *href; + + ret = NULL; + id = href = NULL; + if (n->flags & NODE_ID) + id = html_make_id(n, 1); + if (n->flags & NODE_HREF) + href = id == NULL ? html_make_id(n, 2) : id; + if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE) + ret = print_otag(h, TAG_A, "chR", "permalink", href); + t = print_otag(h, elemtype, "ci", cattr, id); + if (ret == NULL) { + ret = t; + if (href != NULL && (nch = n->child) != NULL) { + /* man(7) is safe, it tags phrasing content only. */ + if (n->tok > MDOC_MAX || + htmltags[elemtype].flags & HTML_TOPHRASE) + nch = NULL; + else /* For mdoc(7), beware of nested blocks. */ + while (nch != NULL && nch->type == ROFFT_TEXT) + nch = nch->next; + if (nch == NULL) + print_otag(h, TAG_A, "chR", "permalink", href); + } + } + free(id); + if (id == NULL) + free(href); + return ret; +} + +static void +print_ctag(struct html *h, struct tag *tag) +{ + int tflags; + + if (tag->closed == 0) { + tag->closed = 1; + if (tag == h->metaf) + h->metaf = NULL; + if (tag == h->tblt) + h->tblt = NULL; + + tflags = htmltags[tag->tag].flags; + if (tflags & HTML_INDENT) + h->indent--; + if (tflags & HTML_NOINDENT) + h->noindent--; + if (tflags & HTML_NLEND) + print_endline(h); + print_indent(h); + print_byte(h, '<'); + print_byte(h, '/'); + print_word(h, htmltags[tag->tag].name); + print_byte(h, '>'); + if (tflags & HTML_NLAFTER) + print_endline(h); + } + if (tag->refcnt == 0) { + h->tag = tag->next; + free(tag); + } +} + +void +print_gen_decls(struct html *h) +{ + print_word(h, "<!DOCTYPE html>"); + print_endline(h); +} + +void +print_gen_comment(struct html *h, struct roff_node *n) +{ + int wantblank; + + print_word(h, "<!-- This is an automatically generated file." + " Do not edit."); + h->indent = 1; + wantblank = 0; + while (n != NULL && n->type == ROFFT_COMMENT) { + if (strstr(n->string, "-->") == NULL && + (wantblank || *n->string != '\0')) { + print_endline(h); + print_indent(h); + print_word(h, n->string); + wantblank = *n->string != '\0'; + } + n = n->next; + } + if (wantblank) + print_endline(h); + print_word(h, " -->"); + print_endline(h); + h->indent = 0; +} + +void +print_text(struct html *h, const char *word) +{ + print_tagged_text(h, word, NULL); +} + +void +print_tagged_text(struct html *h, const char *word, struct roff_node *n) +{ + struct tag *t; + char *href; + + /* + * Always wrap text in a paragraph unless already contained in + * some flow container; never put it directly into a section. + */ + + if (h->tag->tag == TAG_SECTION) + print_otag(h, TAG_P, "c", "Pp"); + + /* Output whitespace before this text? */ + + if (h->col && (h->flags & HTML_NOSPACE) == 0) { + if ( ! (HTML_KEEP & h->flags)) { + if (HTML_PREKEEP & h->flags) + h->flags |= HTML_KEEP; + print_endword(h); + } else + print_word(h, " "); + } + + /* + * Optionally switch fonts, optionally write a permalink, then + * print the text, optionally surrounded by HTML whitespace. + */ + + assert(h->metaf == NULL); + print_metaf(h); + print_indent(h); + + if (n != NULL && (href = html_make_id(n, 2)) != NULL) { + t = print_otag(h, TAG_A, "chR", "permalink", href); + free(href); + } else + t = NULL; + + if ( ! print_encode(h, word, NULL, 0)) { + if ( ! (h->flags & HTML_NONOSPACE)) + h->flags &= ~HTML_NOSPACE; + h->flags &= ~HTML_NONEWLINE; + } else + h->flags |= HTML_NOSPACE | HTML_NONEWLINE; + + if (h->metaf != NULL) { + print_tagq(h, h->metaf); + h->metaf = NULL; + } else if (t != NULL) + print_tagq(h, t); + + h->flags &= ~HTML_IGNDELIM; +} + +void +print_tagq(struct html *h, const struct tag *until) +{ + struct tag *this, *next; + + for (this = h->tag; this != NULL; this = next) { + next = this == until ? NULL : this->next; + print_ctag(h, this); + } +} + +/* + * Close out all open elements up to but excluding suntil. + * Note that a paragraph just inside stays open together with it + * because paragraphs include subsequent phrasing content. + */ +void +print_stagq(struct html *h, const struct tag *suntil) +{ + struct tag *this, *next; + + for (this = h->tag; this != NULL; this = next) { + next = this->next; + if (this == suntil || (next == suntil && + (this->tag == TAG_P || this->tag == TAG_PRE))) + break; + print_ctag(h, this); + } +} + + +/*********************************************************************** + * Low level output functions. + * They implement line breaking using a short static buffer. + ***********************************************************************/ + +/* + * Buffer one HTML output byte. + * If the buffer is full, flush and deactivate it and start a new line. + * If the buffer is inactive, print directly. + */ +static void +print_byte(struct html *h, char c) +{ + if ((h->flags & HTML_BUFFER) == 0) { + putchar(c); + h->col++; + return; + } + + if (h->col + h->bufcol < sizeof(h->buf)) { + h->buf[h->bufcol++] = c; + return; + } + + putchar('\n'); + h->col = 0; + print_indent(h); + putchar(' '); + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + putchar(c); + h->col = (h->indent + 1) * 2 + h->bufcol + 1; + h->bufcol = 0; + h->flags &= ~HTML_BUFFER; +} + +/* + * If something was printed on the current output line, end it. + * Not to be called right after print_indent(). + */ +void +print_endline(struct html *h) +{ + if (h->col == 0) + return; + + if (h->bufcol) { + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + h->bufcol = 0; + } + putchar('\n'); + h->col = 0; + h->flags |= HTML_NOSPACE; + h->flags &= ~HTML_BUFFER; +} + +/* + * Flush the HTML output buffer. + * If it is inactive, activate it. + */ +static void +print_endword(struct html *h) +{ + if (h->noindent) { + print_byte(h, ' '); + return; + } + + if ((h->flags & HTML_BUFFER) == 0) { + h->col++; + h->flags |= HTML_BUFFER; + } else if (h->bufcol) { + putchar(' '); + fwrite(h->buf, h->bufcol, 1, stdout); + h->col += h->bufcol + 1; + } + h->bufcol = 0; +} + +/* + * If at the beginning of a new output line, + * perform indentation and mark the line as containing output. + * Make sure to really produce some output right afterwards, + * but do not use print_otag() for producing it. + */ +static void +print_indent(struct html *h) +{ + size_t i; + + if (h->col || h->noindent) + return; + + h->col = h->indent * 2; + for (i = 0; i < h->col; i++) + putchar(' '); +} + +/* + * Print or buffer some characters + * depending on the current HTML output buffer state. + */ +static void +print_word(struct html *h, const char *cp) +{ + while (*cp != '\0') + print_byte(h, *cp++); +} diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h new file mode 100644 index 0000000..dcce339 --- /dev/null +++ b/usr.bin/mandoc/html.h @@ -0,0 +1,141 @@ +/* $OpenBSD: html.h,v 1.70 2020/04/18 20:28:46 schwarze Exp $ */ +/* + * Copyright (c) 2017, 2018, 2019, 2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Internal interfaces for mandoc(1) HTML formatters. + * For use by the individual HTML formatters only. + */ + +enum htmltag { + TAG_HTML, + TAG_HEAD, + TAG_META, + TAG_LINK, + TAG_STYLE, + TAG_TITLE, + TAG_BODY, + TAG_DIV, + TAG_SECTION, + TAG_TABLE, + TAG_TR, + TAG_TD, + TAG_LI, + TAG_UL, + TAG_OL, + TAG_DL, + TAG_DT, + TAG_DD, + TAG_H1, + TAG_H2, + TAG_P, + TAG_PRE, + TAG_A, + TAG_B, + TAG_CITE, + TAG_CODE, + TAG_I, + TAG_SMALL, + TAG_SPAN, + TAG_VAR, + TAG_BR, + TAG_MARK, + TAG_MATH, + TAG_MROW, + TAG_MI, + TAG_MN, + TAG_MO, + TAG_MSUP, + TAG_MSUB, + TAG_MSUBSUP, + TAG_MFRAC, + TAG_MSQRT, + TAG_MFENCED, + TAG_MTABLE, + TAG_MTR, + TAG_MTD, + TAG_MUNDEROVER, + TAG_MUNDER, + TAG_MOVER, + TAG_MAX +}; + +struct tag { + struct tag *next; + int refcnt; + int closed; + enum htmltag tag; +}; + +struct html { + int flags; +#define HTML_NOSPACE (1 << 0) /* suppress next space */ +#define HTML_IGNDELIM (1 << 1) +#define HTML_KEEP (1 << 2) +#define HTML_PREKEEP (1 << 3) +#define HTML_NONOSPACE (1 << 4) /* never add spaces */ +#define HTML_SKIPCHAR (1 << 6) /* skip the next character */ +#define HTML_NOSPLIT (1 << 7) /* do not break line before .An */ +#define HTML_SPLIT (1 << 8) /* break line before .An */ +#define HTML_NONEWLINE (1 << 9) /* No line break in nofill mode. */ +#define HTML_BUFFER (1 << 10) /* Collect a word to see if it fits. */ +#define HTML_TOCDONE (1 << 11) /* The TOC was already written. */ + size_t indent; /* current output indentation level */ + int noindent; /* indent disabled by <pre> */ + size_t col; /* current output byte position */ + size_t bufcol; /* current buf byte position */ + char buf[80]; /* output buffer */ + struct tag *tag; /* last open tag */ + struct rofftbl tbl; /* current table */ + struct tag *tblt; /* current open table scope */ + char *base_man1; /* bases for manpage href */ + char *base_man2; + char *base_includes; /* base for include href */ + char *style; /* style-sheet URI */ + struct tag *metaf; /* current open font scope */ + enum mandoc_esc metal; /* last used font */ + enum mandoc_esc metac; /* current font mode */ + int oflags; /* output options */ +#define HTML_FRAGMENT (1 << 0) /* don't emit HTML/HEAD/BODY */ +#define HTML_TOC (1 << 1) /* emit a table of contents */ +}; + + +struct roff_node; +struct tbl_span; +struct eqn_box; + +void roff_html_pre(struct html *, const struct roff_node *); + +void print_gen_comment(struct html *, struct roff_node *); +void print_gen_decls(struct html *); +void print_gen_head(struct html *); +struct tag *print_otag(struct html *, enum htmltag, const char *, ...); +struct tag *print_otag_id(struct html *, enum htmltag, const char *, + struct roff_node *); +void print_tagq(struct html *, const struct tag *); +void print_stagq(struct html *, const struct tag *); +void print_tagged_text(struct html *, const char *, + struct roff_node *); +void print_text(struct html *, const char *); +void print_tblclose(struct html *); +void print_tbl(struct html *, const struct tbl_span *); +void print_eqn(struct html *, const struct eqn_box *); +void print_endline(struct html *); + +void html_close_paragraph(struct html *); +enum roff_tok html_fillmode(struct html *, enum roff_tok); +char *html_make_id(const struct roff_node *, int); +int html_setfont(struct html *, enum mandoc_esc); diff --git a/usr.bin/mandoc/libman.h b/usr.bin/mandoc/libman.h new file mode 100644 index 0000000..daffd76 --- /dev/null +++ b/usr.bin/mandoc/libman.h @@ -0,0 +1,42 @@ +/* $OpenBSD: libman.h,v 1.61 2018/12/31 10:03:38 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct roff_node; +struct roff_man; + +#define MACRO_PROT_ARGS struct roff_man *man, \ + enum roff_tok tok, \ + int line, \ + int ppos, \ + int *pos, \ + char *buf + +struct man_macro { + void (*fp)(MACRO_PROT_ARGS); + int flags; +#define MAN_BSCOPED (1 << 0) /* Optional next-line block scope. */ +#define MAN_ESCOPED (1 << 1) /* Optional next-line element scope. */ +#define MAN_NSCOPED (1 << 2) /* Allowed in next-line element scope. */ +#define MAN_XSCOPE (1 << 3) /* Exit next-line block scope. */ +#define MAN_JOIN (1 << 4) /* Join arguments together. */ +}; + +const struct man_macro *man_macro(enum roff_tok); + +void man_descope(struct roff_man *, int, int, char *); +void man_unscope(struct roff_man *, const struct roff_node *); diff --git a/usr.bin/mandoc/libmandoc.h b/usr.bin/mandoc/libmandoc.h new file mode 100644 index 0000000..b291631 --- /dev/null +++ b/usr.bin/mandoc/libmandoc.h @@ -0,0 +1,85 @@ +/* $OpenBSD: libmandoc.h,v 1.64 2020/04/03 11:34:19 schwarze Exp $ */ +/* + * Copyright (c) 2013-2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Internal interfaces for parser utilities needed by multiple parsers + * and the top-level functions to call the mdoc, man, and roff parsers. + */ + +/* + * Return codes passed from the roff parser to the main parser. + */ + +/* Main instruction: what to do with the returned line. */ +#define ROFF_IGN 0x000 /* Don't do anything with it. */ +#define ROFF_CONT 0x001 /* Give it to the high-level parser. */ +#define ROFF_RERUN 0x002 /* Re-run the roff parser with an offset. */ +#define ROFF_REPARSE 0x004 /* Recursively run the main parser on it. */ +#define ROFF_SO 0x008 /* Include the named file. */ +#define ROFF_MASK 0x00f /* Only one of these bits should be set. */ + +/* Options for further parsing, to be OR'ed with the above. */ +#define ROFF_APPEND 0x010 /* Append the next line to this one. */ +#define ROFF_USERCALL 0x020 /* Start execution of a new macro. */ +#define ROFF_USERRET 0x040 /* Abort execution of the current macro. */ +#define ROFF_WHILE 0x100 /* Start a new .while loop. */ +#define ROFF_LOOPCONT 0x200 /* Iterate the current .while loop. */ +#define ROFF_LOOPEXIT 0x400 /* Exit the current .while loop. */ +#define ROFF_LOOPMASK 0xf00 + + +struct buf { + char *buf; + size_t sz; + struct buf *next; +}; + + +struct roff; +struct roff_man; +struct roff_node; + +char *mandoc_normdate(struct roff_node *, struct roff_node *); +int mandoc_eos(const char *, size_t); +int mandoc_strntoi(const char *, size_t, int); +const char *mandoc_a2msec(const char*); + +int mdoc_parseln(struct roff_man *, int, char *, int); +void mdoc_endparse(struct roff_man *); + +int man_parseln(struct roff_man *, int, char *, int); +void man_endparse(struct roff_man *); + +int preconv_cue(const struct buf *, size_t); +int preconv_encode(const struct buf *, size_t *, + struct buf *, size_t *, int *); + +void roff_free(struct roff *); +struct roff *roff_alloc(int); +void roff_reset(struct roff *); +void roff_man_free(struct roff_man *); +struct roff_man *roff_man_alloc(struct roff *, const char *, int); +void roff_man_reset(struct roff_man *); +int roff_parseln(struct roff *, int, struct buf *, int *); +void roff_userret(struct roff *); +void roff_endparse(struct roff *); +void roff_setreg(struct roff *, const char *, int, char); +int roff_getreg(struct roff *, const char *); +char *roff_strdup(const struct roff *, const char *); +char *roff_getarg(struct roff *, char **, int, int *); +int roff_getcontrol(const struct roff *, + const char *, int *); +int roff_getformat(const struct roff *); diff --git a/usr.bin/mandoc/libmdoc.h b/usr.bin/mandoc/libmdoc.h new file mode 100644 index 0000000..4cdea31 --- /dev/null +++ b/usr.bin/mandoc/libmdoc.h @@ -0,0 +1,86 @@ +/* $OpenBSD: libmdoc.h,v 1.88 2018/12/31 04:55:42 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013,2014,2015,2017,2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct roff_node; +struct roff_man; +struct mdoc_arg; + +#define MACRO_PROT_ARGS struct roff_man *mdoc, \ + enum roff_tok tok, \ + int line, \ + int ppos, \ + int *pos, \ + char *buf + +struct mdoc_macro { + void (*fp)(MACRO_PROT_ARGS); + int flags; +#define MDOC_CALLABLE (1 << 0) +#define MDOC_PARSED (1 << 1) +#define MDOC_EXPLICIT (1 << 2) +#define MDOC_PROLOGUE (1 << 3) +#define MDOC_IGNDELIM (1 << 4) +#define MDOC_JOIN (1 << 5) +}; + +enum margserr { + ARGS_ERROR, + ARGS_EOLN, /* end-of-line */ + ARGS_WORD, /* normal word */ + ARGS_ALLOC, /* normal word from roff_getarg() */ + ARGS_PUNCT, /* series of punctuation */ + ARGS_PHRASE /* Bl -column phrase */ +}; + +/* + * A punctuation delimiter is opening, closing, or "middle mark" + * punctuation. These govern spacing. + * Opening punctuation (e.g., the opening parenthesis) suppresses the + * following space; closing punctuation (e.g., the closing parenthesis) + * suppresses the leading space; middle punctuation (e.g., the vertical + * bar) can do either. The middle punctuation delimiter bends the rules + * depending on usage. + */ +enum mdelim { + DELIM_NONE = 0, + DELIM_OPEN, + DELIM_MIDDLE, + DELIM_CLOSE, + DELIM_MAX +}; + +const struct mdoc_macro *mdoc_macro(enum roff_tok); + +void mdoc_elem_alloc(struct roff_man *, int, int, + enum roff_tok, struct mdoc_arg *); +struct roff_node *mdoc_block_alloc(struct roff_man *, int, int, + enum roff_tok, struct mdoc_arg *); +void mdoc_tail_alloc(struct roff_man *, int, int, + enum roff_tok); +struct roff_node *mdoc_endbody_alloc(struct roff_man *, int, int, + enum roff_tok, struct roff_node *); +void mdoc_state(struct roff_man *, struct roff_node *); +const char *mdoc_a2arch(const char *); +const char *mdoc_a2att(const char *); +enum roff_sec mdoc_a2sec(const char *); +const char *mdoc_a2st(const char *); +void mdoc_argv(struct roff_man *, int, enum roff_tok, + struct mdoc_arg **, int *, char *); +enum margserr mdoc_args(struct roff_man *, int, + int *, char *, enum roff_tok, char **); +enum mdelim mdoc_isdelim(const char *); diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c new file mode 100644 index 0000000..6f2174a --- /dev/null +++ b/usr.bin/mandoc/main.c @@ -0,0 +1,1255 @@ +/* $OpenBSD: main.c,v 1.251 2020/04/02 22:10:27 schwarze Exp $ */ +/* + * Copyright (c) 2010-2012, 2014-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Main program for mandoc(1), man(1), apropos(1), whatis(1), and help(1). + */ +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/param.h> /* MACHINE */ +#include <sys/stat.h> +#include <sys/wait.h> + +#include <assert.h> +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <glob.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <termios.h> +#include <time.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "mandoc_xr.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "mandoc_parse.h" +#include "tag.h" +#include "term_tag.h" +#include "main.h" +#include "manconf.h" +#include "mansearch.h" + +#define BINM_APROPOS "apropos" +#define BINM_MAN "man" +#define BINM_MAKEWHATIS "makewhatis" +#define BINM_WHATIS "whatis" +#define OSENUM MANDOC_OS_OPENBSD + +enum outmode { + OUTMODE_DEF = 0, + OUTMODE_FLN, + OUTMODE_LST, + OUTMODE_ALL, + OUTMODE_ONE +}; + +enum outt { + OUTT_ASCII = 0, /* -Tascii */ + OUTT_LOCALE, /* -Tlocale */ + OUTT_UTF8, /* -Tutf8 */ + OUTT_TREE, /* -Ttree */ + OUTT_MAN, /* -Tman */ + OUTT_HTML, /* -Thtml */ + OUTT_MARKDOWN, /* -Tmarkdown */ + OUTT_LINT, /* -Tlint */ + OUTT_PS, /* -Tps */ + OUTT_PDF /* -Tpdf */ +}; + +struct outstate { + struct tag_files *tag_files; /* Tagging state variables. */ + void *outdata; /* data for output */ + int use_pager; + int wstop; /* stop after a file with a warning */ + int had_output; /* Some output was generated. */ + enum outt outtype; /* which output to use */ +}; + + +int mandocdb(int, char *[]); + +static void check_xr(void); +static int fs_lookup(const struct manpaths *, + size_t ipath, const char *, + const char *, const char *, + struct manpage **, size_t *); +static int fs_search(const struct mansearch *, + const struct manpaths *, const char *, + struct manpage **, size_t *); +static void glob_esc(char **, const char *, const char *); +static void outdata_alloc(struct outstate *, struct manoutput *); +static void parse(struct mparse *, int, const char *, + struct outstate *, struct manoutput *); +static void passthrough(int, int); +static void process_onefile(struct mparse *, struct manpage *, + int, struct outstate *, struct manconf *); +static void run_pager(struct tag_files *, char *); +static pid_t spawn_pager(struct tag_files *, char *); +static void usage(enum argmode) __attribute__((__noreturn__)); +static int woptions(char *, enum mandoc_os *, int *); + +static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; +static char help_arg[] = "help"; +static char *help_argv[] = {help_arg, NULL}; + + +int +main(int argc, char *argv[]) +{ + struct manconf conf; /* Manpaths and output options. */ + struct outstate outst; /* Output state. */ + struct winsize ws; /* Result of ioctl(TIOCGWINSZ). */ + struct mansearch search; /* Search options. */ + struct manpage *res; /* Complete list of search results. */ + struct manpage *resn; /* Search results for one name. */ + struct mparse *mp; /* Opaque parser object. */ + const char *conf_file; /* -C: alternate config file. */ + const char *os_s; /* -I: Operating system for display. */ + const char *progname, *sec; + char *defpaths; /* -M: override manpaths. */ + char *auxpaths; /* -m: additional manpaths. */ + char *oarg; /* -O: output option string. */ + char *tagarg; /* -O tag: default value. */ + unsigned char *uc; + size_t ressz; /* Number of elements in res[]. */ + size_t resnsz; /* Number of elements in resn[]. */ + size_t i, ib, ssz; + int options; /* Parser options. */ + int show_usage; /* Invalid argument: give up. */ + int prio, best_prio; + int startdir; + int c; + enum mandoc_os os_e; /* Check base system conventions. */ + enum outmode outmode; /* According to command line. */ + + progname = getprogname(); + mandoc_msg_setoutfile(stderr); + if (strncmp(progname, "mandocdb", 8) == 0 || + strcmp(progname, BINM_MAKEWHATIS) == 0) + return mandocdb(argc, argv); + + if (pledge("stdio rpath tmppath tty proc exec", NULL) == -1) { + mandoc_msg(MANDOCERR_PLEDGE, 0, 0, "%s", strerror(errno)); + return mandoc_msg_getrc(); + } + + /* Search options. */ + + memset(&conf, 0, sizeof(conf)); + conf_file = NULL; + defpaths = auxpaths = NULL; + + memset(&search, 0, sizeof(struct mansearch)); + search.outkey = "Nd"; + oarg = NULL; + + if (strcmp(progname, BINM_MAN) == 0) + search.argmode = ARG_NAME; + else if (strcmp(progname, BINM_APROPOS) == 0) + search.argmode = ARG_EXPR; + else if (strcmp(progname, BINM_WHATIS) == 0) + search.argmode = ARG_WORD; + else if (strncmp(progname, "help", 4) == 0) + search.argmode = ARG_NAME; + else + search.argmode = ARG_FILE; + + /* Parser options. */ + + options = MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1; + os_e = MANDOC_OS_OTHER; + os_s = NULL; + + /* Formatter options. */ + + memset(&outst, 0, sizeof(outst)); + outst.tag_files = NULL; + outst.outtype = OUTT_LOCALE; + outst.use_pager = 1; + + show_usage = 0; + outmode = OUTMODE_DEF; + + while ((c = getopt(argc, argv, + "aC:cfhI:iK:klM:m:O:S:s:T:VW:w")) != -1) { + if (c == 'i' && search.argmode == ARG_EXPR) { + optind--; + break; + } + switch (c) { + case 'a': + outmode = OUTMODE_ALL; + break; + case 'C': + conf_file = optarg; + break; + case 'c': + outst.use_pager = 0; + break; + case 'f': + search.argmode = ARG_WORD; + break; + case 'h': + conf.output.synopsisonly = 1; + outst.use_pager = 0; + outmode = OUTMODE_ALL; + break; + case 'I': + if (strncmp(optarg, "os=", 3) != 0) { + mandoc_msg(MANDOCERR_BADARG_BAD, 0, 0, + "-I %s", optarg); + return mandoc_msg_getrc(); + } + if (os_s != NULL) { + mandoc_msg(MANDOCERR_BADARG_DUPE, 0, 0, + "-I %s", optarg); + return mandoc_msg_getrc(); + } + os_s = optarg + 3; + break; + case 'K': + options &= ~(MPARSE_UTF8 | MPARSE_LATIN1); + if (strcmp(optarg, "utf-8") == 0) + options |= MPARSE_UTF8; + else if (strcmp(optarg, "iso-8859-1") == 0) + options |= MPARSE_LATIN1; + else if (strcmp(optarg, "us-ascii") != 0) { + mandoc_msg(MANDOCERR_BADARG_BAD, 0, 0, + "-K %s", optarg); + return mandoc_msg_getrc(); + } + break; + case 'k': + search.argmode = ARG_EXPR; + break; + case 'l': + search.argmode = ARG_FILE; + outmode = OUTMODE_ALL; + break; + case 'M': + defpaths = optarg; + break; + case 'm': + auxpaths = optarg; + break; + case 'O': + oarg = optarg; + break; + case 'S': + search.arch = optarg; + break; + case 's': + search.sec = optarg; + break; + case 'T': + if (strcmp(optarg, "ascii") == 0) + outst.outtype = OUTT_ASCII; + else if (strcmp(optarg, "lint") == 0) { + outst.outtype = OUTT_LINT; + mandoc_msg_setoutfile(stdout); + mandoc_msg_setmin(MANDOCERR_BASE); + } else if (strcmp(optarg, "tree") == 0) + outst.outtype = OUTT_TREE; + else if (strcmp(optarg, "man") == 0) + outst.outtype = OUTT_MAN; + else if (strcmp(optarg, "html") == 0) + outst.outtype = OUTT_HTML; + else if (strcmp(optarg, "markdown") == 0) + outst.outtype = OUTT_MARKDOWN; + else if (strcmp(optarg, "utf8") == 0) + outst.outtype = OUTT_UTF8; + else if (strcmp(optarg, "locale") == 0) + outst.outtype = OUTT_LOCALE; + else if (strcmp(optarg, "ps") == 0) + outst.outtype = OUTT_PS; + else if (strcmp(optarg, "pdf") == 0) + outst.outtype = OUTT_PDF; + else { + mandoc_msg(MANDOCERR_BADARG_BAD, 0, 0, + "-T %s", optarg); + return mandoc_msg_getrc(); + } + break; + case 'W': + if (woptions(optarg, &os_e, &outst.wstop) == -1) + return mandoc_msg_getrc(); + break; + case 'w': + outmode = OUTMODE_FLN; + break; + default: + show_usage = 1; + break; + } + } + + if (show_usage) + usage(search.argmode); + + /* Postprocess options. */ + + switch (outmode) { + case OUTMODE_DEF: + switch (search.argmode) { + case ARG_FILE: + outmode = OUTMODE_ALL; + outst.use_pager = 0; + break; + case ARG_NAME: + outmode = OUTMODE_ONE; + break; + default: + outmode = OUTMODE_LST; + break; + } + break; + case OUTMODE_FLN: + if (search.argmode == ARG_FILE) + outmode = OUTMODE_ALL; + break; + case OUTMODE_ALL: + break; + case OUTMODE_LST: + case OUTMODE_ONE: + abort(); + } + + if (oarg != NULL) { + if (outmode == OUTMODE_LST) + search.outkey = oarg; + else { + while (oarg != NULL) { + if (manconf_output(&conf.output, + strsep(&oarg, ","), 0) == -1) + return mandoc_msg_getrc(); + } + } + } + + if (outst.outtype != OUTT_TREE || conf.output.noval == 0) + options |= MPARSE_VALIDATE; + + if (outmode == OUTMODE_FLN || + outmode == OUTMODE_LST || + !isatty(STDOUT_FILENO)) + outst.use_pager = 0; + + if (outst.use_pager && + (conf.output.width == 0 || conf.output.indent == 0) && + ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) != -1 && + ws.ws_col > 1) { + if (conf.output.width == 0 && ws.ws_col < 79) + conf.output.width = ws.ws_col - 1; + if (conf.output.indent == 0 && ws.ws_col < 66) + conf.output.indent = 3; + } + + if (outst.use_pager == 0) { + if (pledge("stdio rpath", NULL) == -1) { + mandoc_msg(MANDOCERR_PLEDGE, 0, 0, + "%s", strerror(errno)); + return mandoc_msg_getrc(); + } + } + + /* Parse arguments. */ + + if (argc > 0) { + argc -= optind; + argv += optind; + } + + /* + * Quirks for help(1) and man(1), + * in particular for a section argument without -s. + */ + + if (search.argmode == ARG_NAME) { + if (*progname == 'h') { + if (argc == 0) { + argv = help_argv; + argc = 1; + } + } else if (argc > 1 && + ((uc = (unsigned char *)argv[0]) != NULL) && + ((isdigit(uc[0]) && (uc[1] == '\0' || + isalpha(uc[1]))) || + (uc[0] == 'n' && uc[1] == '\0'))) { + search.sec = (char *)uc; + argv++; + argc--; + } + if (search.arch == NULL) + search.arch = getenv("MACHINE"); +#ifdef MACHINE + if (search.arch == NULL) + search.arch = MACHINE; +#endif + if (outmode == OUTMODE_ONE) + search.firstmatch = 1; + } + + /* + * Use the first argument for -O tag in addition to + * using it as a search term for man(1) or apropos(1). + */ + + if (conf.output.tag != NULL && *conf.output.tag == '\0') { + tagarg = argc > 0 && search.argmode == ARG_EXPR ? + strchr(*argv, '=') : NULL; + conf.output.tag = tagarg == NULL ? *argv : tagarg + 1; + } + + /* Read the configuration file. */ + + if (search.argmode != ARG_FILE) + manconf_parse(&conf, conf_file, defpaths, auxpaths); + + /* man(1): Resolve each name individually. */ + + if (search.argmode == ARG_NAME) { + if (argc < 1) { + if (outmode != OUTMODE_FLN) + usage(ARG_NAME); + if (conf.manpath.sz == 0) { + warnx("The manpath is empty."); + mandoc_msg_setrc(MANDOCLEVEL_BADARG); + } else { + for (i = 0; i + 1 < conf.manpath.sz; i++) + printf("%s:", conf.manpath.paths[i]); + printf("%s\n", conf.manpath.paths[i]); + } + manconf_free(&conf); + return (int)mandoc_msg_getrc(); + } + for (res = NULL, ressz = 0; argc > 0; argc--, argv++) { + (void)mansearch(&search, &conf.manpath, + 1, argv, &resn, &resnsz); + if (resnsz == 0) + (void)fs_search(&search, &conf.manpath, + *argv, &resn, &resnsz); + if (resnsz == 0 && strchr(*argv, '/') == NULL) { + if (search.arch != NULL && + arch_valid(search.arch, OSENUM) == 0) + warnx("Unknown architecture \"%s\".", + search.arch); + else if (search.sec != NULL) + warnx("No entry for %s in " + "section %s of the manual.", + *argv, search.sec); + else + warnx("No entry for %s in " + "the manual.", *argv); + mandoc_msg_setrc(MANDOCLEVEL_BADARG); + continue; + } + if (resnsz == 0) { + if (access(*argv, R_OK) == -1) { + mandoc_msg_setinfilename(*argv); + mandoc_msg(MANDOCERR_BADARG_BAD, + 0, 0, "%s", strerror(errno)); + mandoc_msg_setinfilename(NULL); + continue; + } + resnsz = 1; + resn = mandoc_calloc(resnsz, sizeof(*res)); + resn->file = mandoc_strdup(*argv); + resn->ipath = SIZE_MAX; + resn->form = FORM_SRC; + } + if (outmode != OUTMODE_ONE || resnsz == 1) { + res = mandoc_reallocarray(res, + ressz + resnsz, sizeof(*res)); + memcpy(res + ressz, resn, + sizeof(*resn) * resnsz); + ressz += resnsz; + continue; + } + + /* Search for the best section. */ + + best_prio = 40; + for (ib = i = 0; i < resnsz; i++) { + sec = resn[i].file; + sec += strcspn(sec, "123456789"); + if (sec[0] == '\0') + continue; /* No section at all. */ + prio = sec_prios[sec[0] - '1']; + if (search.sec != NULL) { + ssz = strlen(search.sec); + if (strncmp(sec, search.sec, ssz) == 0) + sec += ssz; + } else + sec++; /* Prefer without suffix. */ + if (*sec != '/') + prio += 10; /* Wrong dir name. */ + if (search.sec != NULL && + (strlen(sec) <= ssz + 3 || + strcmp(sec + strlen(sec) - ssz, + search.sec) != 0)) + prio += 20; /* Wrong file ext. */ + if (prio >= best_prio) + continue; + best_prio = prio; + ib = i; + } + res = mandoc_reallocarray(res, ressz + 1, + sizeof(*res)); + memcpy(res + ressz++, resn + ib, sizeof(*resn)); + } + + /* apropos(1), whatis(1): Process the full search expression. */ + + } else if (search.argmode != ARG_FILE) { + if (mansearch(&search, &conf.manpath, + argc, argv, &res, &ressz) == 0) + usage(search.argmode); + + if (ressz == 0) { + warnx("nothing appropriate"); + mandoc_msg_setrc(MANDOCLEVEL_BADARG); + goto out; + } + + /* mandoc(1): Take command line arguments as file names. */ + + } else { + ressz = argc > 0 ? argc : 1; + res = mandoc_calloc(ressz, sizeof(*res)); + for (i = 0; i < ressz; i++) { + if (argc > 0) + res[i].file = mandoc_strdup(argv[i]); + res[i].ipath = SIZE_MAX; + res[i].form = FORM_SRC; + } + } + + switch (outmode) { + case OUTMODE_FLN: + for (i = 0; i < ressz; i++) + puts(res[i].file); + goto out; + case OUTMODE_LST: + for (i = 0; i < ressz; i++) + printf("%s - %s\n", res[i].names, + res[i].output == NULL ? "" : + res[i].output); + goto out; + default: + break; + } + + if (search.argmode == ARG_FILE && auxpaths != NULL) { + if (strcmp(auxpaths, "doc") == 0) + options |= MPARSE_MDOC; + else if (strcmp(auxpaths, "an") == 0) + options |= MPARSE_MAN; + } + + mchars_alloc(); + mp = mparse_alloc(options, os_e, os_s); + + /* + * Remember the original working directory, if possible. + * This will be needed if some names on the command line + * are page names and some are relative file names. + * Do not error out if the current directory is not + * readable: Maybe it won't be needed after all. + */ + startdir = open(".", O_RDONLY | O_DIRECTORY); + for (i = 0; i < ressz; i++) { + process_onefile(mp, res + i, startdir, &outst, &conf); + if (outst.wstop && mandoc_msg_getrc() != MANDOCLEVEL_OK) + break; + } + if (startdir != -1) { + (void)fchdir(startdir); + close(startdir); + } + if (conf.output.tag != NULL && conf.output.tag_found == 0) { + mandoc_msg(MANDOCERR_TAG, 0, 0, "%s", conf.output.tag); + conf.output.tag = NULL; + } + if (outst.outdata != NULL) { + switch (outst.outtype) { + case OUTT_HTML: + html_free(outst.outdata); + break; + case OUTT_UTF8: + case OUTT_LOCALE: + case OUTT_ASCII: + ascii_free(outst.outdata); + break; + case OUTT_PDF: + case OUTT_PS: + pspdf_free(outst.outdata); + break; + default: + break; + } + } + mandoc_xr_free(); + mparse_free(mp); + mchars_free(); + +out: + mansearch_free(res, ressz); + if (search.argmode != ARG_FILE) + manconf_free(&conf); + + if (outst.tag_files != NULL) { + if (term_tag_close() != -1) + run_pager(outst.tag_files, conf.output.tag); + term_tag_unlink(); + } else if (outst.had_output && outst.outtype != OUTT_LINT) + mandoc_msg_summary(); + + return (int)mandoc_msg_getrc(); +} + +static void +usage(enum argmode argmode) +{ + switch (argmode) { + case ARG_FILE: + fputs("usage: mandoc [-ac] [-I os=name] " + "[-K encoding] [-mdoc | -man] [-O options]\n" + "\t [-T output] [-W level] [file ...]\n", stderr); + break; + case ARG_NAME: + fputs("usage: man [-acfhklw] [-C file] [-M path] " + "[-m path] [-S subsection]\n" + "\t [[-s] section] name ...\n", stderr); + break; + case ARG_WORD: + fputs("usage: whatis [-afk] [-C file] " + "[-M path] [-m path] [-O outkey] [-S arch]\n" + "\t [-s section] name ...\n", stderr); + break; + case ARG_EXPR: + fputs("usage: apropos [-afk] [-C file] " + "[-M path] [-m path] [-O outkey] [-S arch]\n" + "\t [-s section] expression ...\n", stderr); + break; + } + exit((int)MANDOCLEVEL_BADARG); +} + +static void +glob_esc(char **dst, const char *src, const char *suffix) +{ + while (*src != '\0') { + if (strchr("*?[", *src) != NULL) + *(*dst)++ = '\\'; + *(*dst)++ = *src++; + } + while (*suffix != '\0') + *(*dst)++ = *suffix++; +} + +static int +fs_lookup(const struct manpaths *paths, size_t ipath, + const char *sec, const char *arch, const char *name, + struct manpage **res, size_t *ressz) +{ + struct stat sb; + glob_t globinfo; + struct manpage *page; + char *file, *cp; + int globres; + enum form form; + + const char *const slman = "/man"; + const char *const slash = "/"; + const char *const sglob = ".[01-9]*"; + + form = FORM_SRC; + mandoc_asprintf(&file, "%s/man%s/%s.%s", + paths->paths[ipath], sec, name, sec); + if (stat(file, &sb) != -1) + goto found; + free(file); + + mandoc_asprintf(&file, "%s/cat%s/%s.0", + paths->paths[ipath], sec, name); + if (stat(file, &sb) != -1) { + form = FORM_CAT; + goto found; + } + free(file); + + if (arch != NULL) { + mandoc_asprintf(&file, "%s/man%s/%s/%s.%s", + paths->paths[ipath], sec, arch, name, sec); + if (stat(file, &sb) != -1) + goto found; + free(file); + } + + cp = file = mandoc_malloc(strlen(paths->paths[ipath]) * 2 + + strlen(slman) + strlen(sec) * 2 + strlen(slash) + + strlen(name) * 2 + strlen(sglob) + 1); + glob_esc(&cp, paths->paths[ipath], slman); + glob_esc(&cp, sec, slash); + glob_esc(&cp, name, sglob); + *cp = '\0'; + globres = glob(file, 0, NULL, &globinfo); + if (globres != 0 && globres != GLOB_NOMATCH) + mandoc_msg(MANDOCERR_GLOB, 0, 0, + "%s: %s", file, strerror(errno)); + free(file); + if (globres == 0) + file = mandoc_strdup(*globinfo.gl_pathv); + globfree(&globinfo); + if (globres == 0) { + if (stat(file, &sb) != -1) + goto found; + free(file); + } + if (res != NULL || ipath + 1 != paths->sz) + return -1; + + mandoc_asprintf(&file, "%s.%s", name, sec); + globres = stat(file, &sb); + free(file); + return globres; + +found: + warnx("outdated mandoc.db lacks %s(%s) entry, run %s %s", + name, sec, BINM_MAKEWHATIS, paths->paths[ipath]); + if (res == NULL) { + free(file); + return 0; + } + *res = mandoc_reallocarray(*res, ++*ressz, sizeof(**res)); + page = *res + (*ressz - 1); + page->file = file; + page->names = NULL; + page->output = NULL; + page->bits = NAME_FILE & NAME_MASK; + page->ipath = ipath; + page->sec = (*sec >= '1' && *sec <= '9') ? *sec - '1' + 1 : 10; + page->form = form; + return 0; +} + +static int +fs_search(const struct mansearch *cfg, const struct manpaths *paths, + const char *name, struct manpage **res, size_t *ressz) +{ + const char *const sections[] = + {"1", "8", "6", "2", "3", "5", "7", "4", "9", "3p"}; + const size_t nsec = sizeof(sections)/sizeof(sections[0]); + + size_t ipath, isec; + + assert(cfg->argmode == ARG_NAME); + if (res != NULL) + *res = NULL; + *ressz = 0; + for (ipath = 0; ipath < paths->sz; ipath++) { + if (cfg->sec != NULL) { + if (fs_lookup(paths, ipath, cfg->sec, cfg->arch, + name, res, ressz) != -1 && cfg->firstmatch) + return 0; + } else { + for (isec = 0; isec < nsec; isec++) + if (fs_lookup(paths, ipath, sections[isec], + cfg->arch, name, res, ressz) != -1 && + cfg->firstmatch) + return 0; + } + } + return -1; +} + +static void +process_onefile(struct mparse *mp, struct manpage *resp, int startdir, + struct outstate *outst, struct manconf *conf) +{ + int fd; + + /* + * Changing directories is not needed in ARG_FILE mode. + * Do it on a best-effort basis. Even in case of + * failure, some functionality may still work. + */ + if (resp->ipath != SIZE_MAX) + (void)chdir(conf->manpath.paths[resp->ipath]); + else if (startdir != -1) + (void)fchdir(startdir); + + mandoc_msg_setinfilename(resp->file); + if (resp->file != NULL) { + if ((fd = mparse_open(mp, resp->file)) == -1) { + mandoc_msg(resp->ipath == SIZE_MAX ? + MANDOCERR_BADARG_BAD : MANDOCERR_OPEN, + 0, 0, "%s", strerror(errno)); + mandoc_msg_setinfilename(NULL); + return; + } + } else + fd = STDIN_FILENO; + + if (outst->use_pager) { + outst->use_pager = 0; + outst->tag_files = term_tag_init(); + } + if (outst->had_output && outst->outtype <= OUTT_UTF8) { + if (outst->outdata == NULL) + outdata_alloc(outst, &conf->output); + terminal_sepline(outst->outdata); + } + + if (resp->form == FORM_SRC) + parse(mp, fd, resp->file, outst, &conf->output); + else { + passthrough(fd, conf->output.synopsisonly); + outst->had_output = 1; + } + + if (ferror(stdout)) { + if (outst->tag_files != NULL) { + mandoc_msg(MANDOCERR_WRITE, 0, 0, "%s: %s", + outst->tag_files->ofn, strerror(errno)); + term_tag_unlink(); + outst->tag_files = NULL; + } else + mandoc_msg(MANDOCERR_WRITE, 0, 0, "%s", + strerror(errno)); + } + mandoc_msg_setinfilename(NULL); +} + +static void +parse(struct mparse *mp, int fd, const char *file, + struct outstate *outst, struct manoutput *outconf) +{ + static int previous; + struct roff_meta *meta; + + assert(fd >= 0); + if (file == NULL) + file = "<stdin>"; + + if (previous) + mparse_reset(mp); + else + previous = 1; + + mparse_readfd(mp, fd, file); + if (fd != STDIN_FILENO) + close(fd); + + /* + * With -Wstop and warnings or errors of at least the requested + * level, do not produce output. + */ + + if (outst->wstop && mandoc_msg_getrc() != MANDOCLEVEL_OK) + return; + + if (outst->outdata == NULL) + outdata_alloc(outst, outconf); + else if (outst->outtype == OUTT_HTML) + html_reset(outst); + + mandoc_xr_reset(); + meta = mparse_result(mp); + + /* Execute the out device, if it exists. */ + + outst->had_output = 1; + if (meta->macroset == MACROSET_MDOC) { + switch (outst->outtype) { + case OUTT_HTML: + html_mdoc(outst->outdata, meta); + break; + case OUTT_TREE: + tree_mdoc(outst->outdata, meta); + break; + case OUTT_MAN: + man_mdoc(outst->outdata, meta); + break; + case OUTT_PDF: + case OUTT_ASCII: + case OUTT_UTF8: + case OUTT_LOCALE: + case OUTT_PS: + terminal_mdoc(outst->outdata, meta); + break; + case OUTT_MARKDOWN: + markdown_mdoc(outst->outdata, meta); + break; + default: + break; + } + } + if (meta->macroset == MACROSET_MAN) { + switch (outst->outtype) { + case OUTT_HTML: + html_man(outst->outdata, meta); + break; + case OUTT_TREE: + tree_man(outst->outdata, meta); + break; + case OUTT_MAN: + mparse_copy(mp); + break; + case OUTT_PDF: + case OUTT_ASCII: + case OUTT_UTF8: + case OUTT_LOCALE: + case OUTT_PS: + terminal_man(outst->outdata, meta); + break; + default: + break; + } + } + if (outconf->tag != NULL && outconf->tag_found == 0 && + tag_exists(outconf->tag)) + outconf->tag_found = 1; + if (mandoc_msg_getmin() < MANDOCERR_STYLE) + check_xr(); +} + +static void +check_xr(void) +{ + static struct manpaths paths; + struct mansearch search; + struct mandoc_xr *xr; + size_t sz; + + if (paths.sz == 0) + manpath_base(&paths); + + for (xr = mandoc_xr_get(); xr != NULL; xr = xr->next) { + if (xr->line == -1) + continue; + search.arch = NULL; + search.sec = xr->sec; + search.outkey = NULL; + search.argmode = ARG_NAME; + search.firstmatch = 1; + if (mansearch(&search, &paths, 1, &xr->name, NULL, &sz)) + continue; + if (fs_search(&search, &paths, xr->name, NULL, &sz) != -1) + continue; + if (xr->count == 1) + mandoc_msg(MANDOCERR_XR_BAD, xr->line, + xr->pos + 1, "Xr %s %s", xr->name, xr->sec); + else + mandoc_msg(MANDOCERR_XR_BAD, xr->line, + xr->pos + 1, "Xr %s %s (%d times)", + xr->name, xr->sec, xr->count); + } +} + +static void +outdata_alloc(struct outstate *outst, struct manoutput *outconf) +{ + switch (outst->outtype) { + case OUTT_HTML: + outst->outdata = html_alloc(outconf); + break; + case OUTT_UTF8: + outst->outdata = utf8_alloc(outconf); + break; + case OUTT_LOCALE: + outst->outdata = locale_alloc(outconf); + break; + case OUTT_ASCII: + outst->outdata = ascii_alloc(outconf); + break; + case OUTT_PDF: + outst->outdata = pdf_alloc(outconf); + break; + case OUTT_PS: + outst->outdata = ps_alloc(outconf); + break; + default: + break; + } +} + +static void +passthrough(int fd, int synopsis_only) +{ + const char synb[] = "S\bSY\bYN\bNO\bOP\bPS\bSI\bIS\bS"; + const char synr[] = "SYNOPSIS"; + + FILE *stream; + char *line, *cp; + size_t linesz; + ssize_t len, written; + int lno, print; + + stream = NULL; + line = NULL; + linesz = 0; + + if (fflush(stdout) == EOF) { + mandoc_msg(MANDOCERR_FFLUSH, 0, 0, "%s", strerror(errno)); + goto done; + } + if ((stream = fdopen(fd, "r")) == NULL) { + close(fd); + mandoc_msg(MANDOCERR_FDOPEN, 0, 0, "%s", strerror(errno)); + goto done; + } + + lno = print = 0; + while ((len = getline(&line, &linesz, stream)) != -1) { + lno++; + cp = line; + if (synopsis_only) { + if (print) { + if ( ! isspace((unsigned char)*cp)) + goto done; + while (isspace((unsigned char)*cp)) { + cp++; + len--; + } + } else { + if (strcmp(cp, synb) == 0 || + strcmp(cp, synr) == 0) + print = 1; + continue; + } + } + for (; len > 0; len -= written) { + if ((written = write(STDOUT_FILENO, cp, len)) == -1) { + mandoc_msg(MANDOCERR_WRITE, 0, 0, + "%s", strerror(errno)); + goto done; + } + } + } + if (ferror(stream)) + mandoc_msg(MANDOCERR_GETLINE, lno, 0, "%s", strerror(errno)); + +done: + free(line); + if (stream != NULL) + fclose(stream); +} + +static int +woptions(char *arg, enum mandoc_os *os_e, int *wstop) +{ + char *v, *o; + const char *toks[11]; + + toks[0] = "stop"; + toks[1] = "all"; + toks[2] = "base"; + toks[3] = "style"; + toks[4] = "warning"; + toks[5] = "error"; + toks[6] = "unsupp"; + toks[7] = "fatal"; + toks[8] = "openbsd"; + toks[9] = "netbsd"; + toks[10] = NULL; + + while (*arg) { + o = arg; + switch (getsubopt(&arg, (char * const *)toks, &v)) { + case 0: + *wstop = 1; + break; + case 1: + case 2: + mandoc_msg_setmin(MANDOCERR_BASE); + break; + case 3: + mandoc_msg_setmin(MANDOCERR_STYLE); + break; + case 4: + mandoc_msg_setmin(MANDOCERR_WARNING); + break; + case 5: + mandoc_msg_setmin(MANDOCERR_ERROR); + break; + case 6: + mandoc_msg_setmin(MANDOCERR_UNSUPP); + break; + case 7: + mandoc_msg_setmin(MANDOCERR_BADARG); + break; + case 8: + mandoc_msg_setmin(MANDOCERR_BASE); + *os_e = MANDOC_OS_OPENBSD; + break; + case 9: + mandoc_msg_setmin(MANDOCERR_BASE); + *os_e = MANDOC_OS_NETBSD; + break; + default: + mandoc_msg(MANDOCERR_BADARG_BAD, 0, 0, "-W %s", o); + return -1; + } + } + return 0; +} + +/* + * Wait until moved to the foreground, + * then fork the pager and wait for the user to close it. + */ +static void +run_pager(struct tag_files *tag_files, char *tag_target) +{ + int signum, status; + pid_t man_pgid, tc_pgid; + pid_t pager_pid, wait_pid; + + man_pgid = getpgid(0); + tag_files->tcpgid = man_pgid == getpid() ? getpgid(getppid()) : + man_pgid; + pager_pid = 0; + signum = SIGSTOP; + + for (;;) { + /* Stop here until moved to the foreground. */ + + tc_pgid = tcgetpgrp(STDOUT_FILENO); + if (tc_pgid != man_pgid) { + if (tc_pgid == pager_pid) { + (void)tcsetpgrp(STDOUT_FILENO, man_pgid); + if (signum == SIGTTIN) + continue; + } else + tag_files->tcpgid = tc_pgid; + kill(0, signum); + continue; + } + + /* Once in the foreground, activate the pager. */ + + if (pager_pid) { + (void)tcsetpgrp(STDOUT_FILENO, pager_pid); + kill(pager_pid, SIGCONT); + } else + pager_pid = spawn_pager(tag_files, tag_target); + + /* Wait for the pager to stop or exit. */ + + while ((wait_pid = waitpid(pager_pid, &status, + WUNTRACED)) == -1 && errno == EINTR) + continue; + + if (wait_pid == -1) { + mandoc_msg(MANDOCERR_WAIT, 0, 0, + "%s", strerror(errno)); + break; + } + if (!WIFSTOPPED(status)) + break; + + signum = WSTOPSIG(status); + } +} + +static pid_t +spawn_pager(struct tag_files *tag_files, char *tag_target) +{ + const struct timespec timeout = { 0, 100000000 }; /* 0.1s */ +#define MAX_PAGER_ARGS 16 + char *argv[MAX_PAGER_ARGS]; + const char *pager; + char *cp; + int argc, use_ofn; + pid_t pager_pid; + + assert(tag_files->ofd == -1); + assert(tag_files->tfs == NULL); + + pager = getenv("MANPAGER"); + if (pager == NULL || *pager == '\0') + pager = getenv("PAGER"); + if (pager == NULL || *pager == '\0') + pager = "more -s"; + cp = mandoc_strdup(pager); + + /* + * Parse the pager command into words. + * Intentionally do not do anything fancy here. + */ + + argc = 0; + while (argc + 5 < MAX_PAGER_ARGS) { + argv[argc++] = cp; + cp = strchr(cp, ' '); + if (cp == NULL) + break; + *cp++ = '\0'; + while (*cp == ' ') + cp++; + if (*cp == '\0') + break; + } + + /* For more(1) and less(1), use the tag file. */ + + use_ofn = 1; + if (use_ofn) + argv[argc++] = tag_files->ofn; + argv[argc] = NULL; + + switch (pager_pid = fork()) { + case -1: + mandoc_msg(MANDOCERR_FORK, 0, 0, "%s", strerror(errno)); + exit(mandoc_msg_getrc()); + case 0: + break; + default: + (void)setpgid(pager_pid, 0); + (void)tcsetpgrp(STDOUT_FILENO, pager_pid); + if (pledge("stdio rpath tmppath tty proc", NULL) == -1) { + mandoc_msg(MANDOCERR_PLEDGE, 0, 0, + "%s", strerror(errno)); + exit(mandoc_msg_getrc()); + } + tag_files->pager_pid = pager_pid; + return pager_pid; + } + + /* + * The child process becomes the pager. + * Do not start it before controlling the terminal. + */ + + while (tcgetpgrp(STDOUT_FILENO) != getpid()) + nanosleep(&timeout, NULL); + + execvp(argv[0], argv); + mandoc_msg(MANDOCERR_EXEC, 0, 0, "%s: %s", argv[0], strerror(errno)); + _exit(mandoc_msg_getrc()); +} diff --git a/usr.bin/mandoc/main.h b/usr.bin/mandoc/main.h new file mode 100644 index 0000000..ee8e10d --- /dev/null +++ b/usr.bin/mandoc/main.h @@ -0,0 +1,53 @@ +/* $OpenBSD: main.h,v 1.25 2019/03/03 13:01:47 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct roff_meta; +struct manoutput; + +/* + * Definitions for main.c-visible output device functions, e.g., -Thtml + * and -Tascii. Note that ascii_alloc() is named as such in + * anticipation of latin1_alloc() and so on, all of which map into the + * terminal output routines with different character settings. + */ + +void *html_alloc(const struct manoutput *); +void html_mdoc(void *, const struct roff_meta *); +void html_man(void *, const struct roff_meta *); +void html_reset(void *); +void html_free(void *); + +void tree_mdoc(void *, const struct roff_meta *); +void tree_man(void *, const struct roff_meta *); + +void man_mdoc(void *, const struct roff_meta *); + +void *locale_alloc(const struct manoutput *); +void *utf8_alloc(const struct manoutput *); +void *ascii_alloc(const struct manoutput *); +void ascii_free(void *); + +void *pdf_alloc(const struct manoutput *); +void *ps_alloc(const struct manoutput *); +void pspdf_free(void *); + +void terminal_mdoc(void *, const struct roff_meta *); +void terminal_man(void *, const struct roff_meta *); +void terminal_sepline(void *); + +void markdown_mdoc(void *, const struct roff_meta *); diff --git a/usr.bin/mandoc/makewhatis.8 b/usr.bin/mandoc/makewhatis.8 new file mode 100644 index 0000000..c42bcc4 --- /dev/null +++ b/usr.bin/mandoc/makewhatis.8 @@ -0,0 +1,228 @@ +.\" $OpenBSD: makewhatis.8,v 1.14 2017/05/17 22:26:52 schwarze Exp $ +.\" +.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2011, 2012, 2014, 2017 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 17 2017 $ +.Dt MAKEWHATIS 8 +.Os +.Sh NAME +.Nm makewhatis +.Nd index UNIX manuals +.Sh SYNOPSIS +.Nm +.Op Fl aDnpQ +.Op Fl T Cm utf8 +.Op Fl C Ar file +.Nm +.Op Fl aDnpQ +.Op Fl T Cm utf8 +.Ar dir ... +.Nm +.Op Fl DnpQ +.Op Fl T Cm utf8 +.Fl d Ar dir +.Op Ar +.Nm +.Op Fl Dnp +.Op Fl T Cm utf8 +.Fl u Ar dir +.Op Ar +.Nm +.Op Fl DQ +.Fl t Ar +.Sh DESCRIPTION +The +.Nm +utility extracts keywords from +.Ux +manuals and indexes them in a database for fast retrieval by +.Xr apropos 1 , +.Xr whatis 1 , +and +.Xr man 1 Ns 's +.Fl k +option. +.Pp +By default, +.Nm +creates a database in each +.Ar dir +using the files +.Sm off +.Sy man Ar section Li / +.Op Ar arch Li / +.Ar title . section +.Sm on +and +.Sm off +.Sy cat Ar section Li / +.Op Ar arch Li / +.Ar title . Sy 0 +.Sm on +in that directory. +Existing databases are replaced. +If a directory contains no manual pages, no database is created in that +directory. +If +.Ar dir +is not provided, +.Nm +uses the default paths stipulated by +.Xr man.conf 5 . +.Pp +The arguments are as follows: +.Bl -tag -width "-C file" +.It Fl a +Use all directories and files found below +.Ar dir ... . +.It Fl C Ar file +Specify an alternative configuration +.Ar file +in +.Xr man.conf 5 +format. +.It Fl D +Display all files added or removed to the index. +With a second +.Fl D , +also show all keywords added for each file. +.It Fl d Ar dir +Merge (remove and re-add) +.Ar +to the database in +.Ar dir . +.It Fl n +Do not create or modify any database; scan and parse only, +and print manual page names and descriptions to standard output. +.It Fl p +Print warnings about potential problems with manual pages +to the standard error output. +.It Fl Q +Quickly build reduced-size databases +by reading only the NAME sections of manuals. +The resulting databases will usually contain names and descriptions only. +.It Fl T Cm utf8 +Use UTF-8 encoding instead of ASCII for strings stored in the databases. +.It Fl t Ar +Check the given +.Ar files +for potential problems. +Implies +.Fl a , +.Fl n , +and +.Fl p . +All diagnostic messages are printed to the standard output; +the standard error output is not used. +.It Fl u Ar dir +Remove +.Ar +from the database in +.Ar dir . +If that causes the database to become empty, also delete the database file. +.El +.Pp +If fatal parse errors are encountered while parsing, the offending file +is printed to stderr, omitted from the index, and the parse continues +with the next input file. +.Sh ENVIRONMENT +.Bl -tag -width MANPATH +.It Ev MANPATH +A colon-separated list of directories to create databases in. +Ignored if a +.Ar dir +argument or the +.Fl t +option is specified. +.El +.Sh FILES +.Bl -tag -width Ds +.It Pa mandoc.db +A database of manpages relative to the directory of the file. +This file is portable across architectures and systems, so long as the +manpage hierarchy it indexes does not change. +.It Pa /etc/man.conf +The default +.Xr man 1 +configuration file. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No errors occurred. +.It 5 +Invalid command line arguments were specified. +No input files have been read. +.It 6 +An operating system error occurred, for example memory exhaustion or an +error accessing input files. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +The output databases are corrupt and should be removed. +.El +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr whatis 1 , +.Xr man.conf 5 +.Sh HISTORY +A +.Nm +utility first appeared in +.Bx 2 . +It was rewritten in +.Xr perl 1 +for +.Ox 2.7 +and in C for +.Ox 5.6 . +.Pp +The +.Ar dir +argument first appeared in +.Nx 1.0 ; +the options +.Fl dpt +in +.Ox 2.7 ; +the option +.Fl u +in +.Ox 3.4 ; +and the options +.Fl aCDnQT +in +.Ox 5.6 . +.Sh AUTHORS +.An -nosplit +.An Bill Joy +wrote the original +.Bx +.Nm +in February 1979, +.An Marc Espie +started the Perl version in 2000, +and the current version of +.Nm +was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/usr.bin/mandoc/man.1 b/usr.bin/mandoc/man.1 new file mode 100644 index 0000000..2d4d33a --- /dev/null +++ b/usr.bin/mandoc/man.1 @@ -0,0 +1,431 @@ +.\" $OpenBSD: man.1,v 1.36 2020/02/10 13:49:04 schwarze Exp $ +.\" +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" Copyright (c) 2003, 2007, 2008, 2014 Jason McIntyre <jmc@openbsd.org> +.\" Copyright (c) 2010, 2011, 2014-2020 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)man.1 8.2 (Berkeley) 1/2/94 +.\" +.Dd $Mdocdate: February 10 2020 $ +.Dt MAN 1 +.Os +.Sh NAME +.Nm man +.Nd display manual pages +.Sh SYNOPSIS +.Nm man +.Op Fl acfhklw +.Op Fl C Ar file +.Op Fl M Ar path +.Op Fl m Ar path +.Op Fl S Ar subsection +.Op Oo Fl s Oc Ar section +.Ar name ... +.Sh DESCRIPTION +The +.Nm +utility +displays the +manual page entitled +.Ar name . +Pages may be selected according to +a specific category +.Pq Ar section +or +machine architecture +.Pq Ar subsection . +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +Display all matching manual pages. +.It Fl C Ar file +Use the specified +.Ar file +instead of the default configuration file. +This permits users to configure their own manual environment. +See +.Xr man.conf 5 +for a description of the contents of this file. +.It Fl c +Copy the manual page to the standard output instead of using +.Xr more 1 +to paginate it. +This is done by default if the standard output is not a terminal device. +.Pp +When using +.Fl c , +most terminal devices are unable to show the markup. +To print the output of +.Nm +to the terminal with markup but without using a pager, pipe it to +.Xr ul 1 . +To remove the markup, pipe the output to +.Xr col 1 +.Fl b +instead. +.It Fl f +A synonym for +.Xr whatis 1 . +It searches for +.Ar name +in manual page names and displays the header lines from all matching pages. +The search is case insensitive and matches whole words only. +.It Fl h +Display only the SYNOPSIS lines of the requested manual pages. +Implies +.Fl a +and +.Fl c . +.It Fl k +A synonym for +.Xr apropos 1 . +Instead of +.Ar name , +an expression can be provided using the syntax described in the +.Xr apropos 1 +manual. +By default, it displays the header lines of all matching pages. +.It Fl l +A synonym for +.Xr mandoc 1 . +The +.Ar name +arguments are interpreted as filenames. +No search is done and +.Ar file , +.Ar path , +.Ar section , +.Ar subsection , +and +.Fl w +are ignored. +This option implies +.Fl a . +.It Fl M Ar path +Override the list of directories to search for manual pages. +The supplied +.Ar path +must be a colon +.Pq Ql \&: +separated list of directories. +This option also overrides the environment variable +.Ev MANPATH +and any directories specified in the +.Xr man.conf 5 +file. +.It Fl m Ar path +Augment the list of directories to search for manual pages. +The supplied +.Ar path +must be a colon +.Pq Ql \&: +separated list of directories. +These directories will be searched before those specified using the +.Fl M +option, the +.Ev MANPATH +environment variable, the +.Xr man.conf 5 +file, or the default directories. +.It Fl S Ar subsection +Only show pages for the specified +.Xr machine 1 +architecture. +.Ar subsection +is case insensitive. +.Pp +By default manual pages for all architectures are installed. +Therefore this option can be used to view pages for one +architecture whilst using another. +.Pp +This option overrides the +.Ev MACHINE +environment variable. +.It Oo Fl s Oc Ar section +Only select manuals from the specified +.Ar section . +The currently available sections are: +.Pp +.Bl -tag -width "localXXX" -offset indent -compact +.It 1 +General commands +.Pq tools and utilities . +.It 2 +System calls and error numbers. +.It 3 +Library functions. +.It 3p +.Xr perl 1 +programmer's reference guide. +.It 4 +Device drivers. +.It 5 +File formats. +.It 6 +Games. +.It 7 +Miscellaneous information. +.It 8 +System maintenance and operation commands. +.It 9 +Kernel internals. +.El +.It Fl w +List the pathnames of all matching manual pages instead of displaying +any of them. +If no +.Ar name +is given, list the directories that would be searched. +.El +.Pp +The options +.Fl IKOTW +are also supported and are documented in +.Xr mandoc 1 . +The options +.Fl fkl +are mutually exclusive and override each other. +.Pp +The search starts with the +.Fl m +argument if provided, then continues with the +.Fl M +argument, the +.Ev MANPATH +variable, the +.Ic manpath +entries in the +.Xr man.conf 5 +file, or with +.Pa /usr/share/man : Ns Pa /usr/X11R6/man : Ns Pa /usr/local/man +by default. +Within each of these, directories are searched in the order provided. +Within each directory, the search proceeds according to the following +list of sections: 1, 8, 6, 2, 3, 5, 7, 4, 9, 3p. +The first match found is shown. +.Pp +The +.Xr mandoc.db 5 +database is used for looking up manual page entries. +In cases where the database is absent, outdated, or corrupt, +.Nm +falls back to looking for files called +.Ar name . Ns Ar section . +If both a formatted and an unformatted version of the same manual page, +for example +.Pa cat1/foo.0 +and +.Pa man1/foo.1 , +exist in the same directory, only the unformatted version is used. +The database is kept up to date with +.Xr makewhatis 8 , +which is run by the +.Xr weekly 8 +maintenance script. +.Pp +Guidelines for writing +man pages can be found in +.Xr mdoc 7 . +.Sh ENVIRONMENT +.Bl -tag -width MANPATHX +.It Ev MACHINE +As some manual pages are intended only for specific architectures, +.Nm +searches any subdirectories, +with the same name as the current architecture, +in every directory which it searches. +Machine specific areas are checked before general areas. +The current machine type may be overridden by setting the environment +variable +.Ev MACHINE +to the name of a specific architecture, +or with the +.Fl S +option. +.Ev MACHINE +is case insensitive. +.It Ev MANPAGER +Any non-empty value of the environment variable +.Ev MANPAGER +is used instead of the standard pagination program, +.Xr more 1 . +If +.Xr less 1 +is used, the interactive +.Ic :t +command can be used to go to the definitions of various terms, for +example command line options, command modifiers, internal commands, +environment variables, function names, preprocessor macros, +.Xr errno 2 +values, and some other emphasized words. +Some terms may have defining text at more than one place. +In that case, the +.Xr less 1 +interactive commands +.Ic t +and +.Ic T +can be used to move to the next and to the previous place providing +information about the term last searched for with +.Ic :t . +The +.Fl O Cm tag Ns Op = Ns Ar term +option documented in the +.Xr mandoc 1 +manual opens a manual page at the definition of a specific +.Ar term +rather than at the beginning. +.It Ev MANPATH +Override the standard search path which is either specified in +.Xr man.conf 5 +or the default path. +The format of +.Ev MANPATH +is a colon +.Pq Ql \&: +separated list of directories. +Invalid directories are ignored. +Overridden by +.Fl M , +ignored if +.Fl l +is specified. +.Pp +If +.Ev MANPATH +begins with a colon, it is appended to the standard path; +if it ends with a colon, it is prepended to the standard path; +or if it contains two adjacent colons, +the standard path is inserted between the colons. +.It Ev PAGER +Specifies the pagination program to use when +.Ev MANPAGER +is not defined. +If neither PAGER nor MANPAGER is defined, +.Xr more 1 +.Fl s +is used. +.El +.Sh FILES +.Bl -tag -width /etc/man.conf -compact +.It Pa /etc/man.conf +default +.Nm +configuration file +.El +.Sh EXIT STATUS +.Ex -std man +See +.Xr mandoc 1 +for details. +.Sh EXAMPLES +Format a page for pasting extracts into an email message \(em +avoid printing any UTF-8 characters, reduce the width to ease +quoting in replies, and remove markup: +.Pp +.Dl $ man -T ascii -O width=65 pledge | col -b +.Pp +Read a typeset page in a PDF viewer: +.Pp +.Dl $ MANPAGER=mupdf man -T pdf lpd +.Sh SEE ALSO +.Xr apropos 1 , +.Xr col 1 , +.Xr mandoc 1 , +.Xr ul 1 , +.Xr whereis 1 , +.Xr man.conf 5 , +.Xr mdoc 7 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl aCcfhIKlMmOSsTWw , +as well as the environment variables +.Ev MACHINE , +.Ev MANPAGER , +and +.Ev MANPATH , +are extensions to that specification. +.Sh HISTORY +A +.Nm +command first appeared in +.At v2 . +.Pp +The +.Fl w +option first appeared in +.At v7 ; +.Fl f +and +.Fl k +in +.Bx 4 ; +.Fl M +in +.Bx 4.3 ; +.Fl a +in +.Bx 4.3 Tahoe ; +.Fl c +and +.Fl m +in +.Bx 4.3 Reno ; +.Fl h +in +.Bx 4.3 Net/2 ; +.Fl C +in +.Nx 1.0 ; +.Fl s +and +.Fl S +in +.Ox 2.3 ; +and +.Fl I , +.Fl K , +.Fl l , +.Fl O , +and +.Fl W +in +.Ox 5.7 . +The +.Fl T +option first appeared in +.At III +and was also added in +.Ox 5.7 . diff --git a/usr.bin/mandoc/man.c b/usr.bin/mandoc/man.c new file mode 100644 index 0000000..934f2b3 --- /dev/null +++ b/usr.bin/mandoc/man.c @@ -0,0 +1,343 @@ +/* $OpenBSD: man.c,v 1.135 2019/01/05 00:36:46 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libman.h" + +static char *man_hasc(char *); +static int man_ptext(struct roff_man *, int, char *, int); +static int man_pmacro(struct roff_man *, int, char *, int); + + +int +man_parseln(struct roff_man *man, int ln, char *buf, int offs) +{ + + if (man->last->type != ROFFT_EQN || ln > man->last->line) + man->flags |= MAN_NEWLINE; + + return roff_getcontrol(man->roff, buf, &offs) ? + man_pmacro(man, ln, buf, offs) : + man_ptext(man, ln, buf, offs); +} + +/* + * If the string ends with \c, return a pointer to the backslash. + * Otherwise, return NULL. + */ +static char * +man_hasc(char *start) +{ + char *cp, *ep; + + ep = strchr(start, '\0') - 2; + if (ep < start || ep[0] != '\\' || ep[1] != 'c') + return NULL; + for (cp = ep; cp > start; cp--) + if (cp[-1] != '\\') + break; + return (ep - cp) % 2 ? NULL : ep; +} + +void +man_descope(struct roff_man *man, int line, int offs, char *start) +{ + /* Trailing \c keeps next-line scope open. */ + + if (start != NULL && man_hasc(start) != NULL) + return; + + /* + * Co-ordinate what happens with having a next-line scope open: + * first close out the element scopes (if applicable), + * then close out the block scope (also if applicable). + */ + + if (man->flags & MAN_ELINE) { + while (man->last->parent->type != ROFFT_ROOT && + man_macro(man->last->parent->tok)->flags & MAN_ESCOPED) + man_unscope(man, man->last->parent); + man->flags &= ~MAN_ELINE; + } + if ( ! (man->flags & MAN_BLINE)) + return; + man_unscope(man, man->last->parent); + roff_body_alloc(man, line, offs, man->last->tok); + man->flags &= ~(MAN_BLINE | ROFF_NONOFILL); +} + +static int +man_ptext(struct roff_man *man, int line, char *buf, int offs) +{ + int i; + char *ep; + + /* In no-fill mode, whitespace is preserved on text lines. */ + + if (man->flags & ROFF_NOFILL) { + roff_word_alloc(man, line, offs, buf + offs); + man_descope(man, line, offs, buf + offs); + return 1; + } + + for (i = offs; buf[i] == ' '; i++) + /* Skip leading whitespace. */ ; + + /* + * Blank lines are ignored in next line scope + * and right after headings and cancel preceding \c, + * but add a single vertical space elsewhere. + */ + + if (buf[i] == '\0') { + if (man->flags & (MAN_ELINE | MAN_BLINE)) { + mandoc_msg(MANDOCERR_BLK_BLANK, line, 0, NULL); + return 1; + } + if (man->last->tok == MAN_SH || man->last->tok == MAN_SS) + return 1; + if (man->last->type == ROFFT_TEXT && + ((ep = man_hasc(man->last->string)) != NULL)) { + *ep = '\0'; + return 1; + } + roff_elem_alloc(man, line, offs, ROFF_sp); + man->next = ROFF_NEXT_SIBLING; + return 1; + } + + /* + * Warn if the last un-escaped character is whitespace. Then + * strip away the remaining spaces (tabs stay!). + */ + + i = (int)strlen(buf); + assert(i); + + if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { + if (i > 1 && '\\' != buf[i - 2]) + mandoc_msg(MANDOCERR_SPACE_EOL, line, i - 1, NULL); + + for (--i; i && ' ' == buf[i]; i--) + /* Spin back to non-space. */ ; + + /* Jump ahead of escaped whitespace. */ + i += '\\' == buf[i] ? 2 : 1; + + buf[i] = '\0'; + } + roff_word_alloc(man, line, offs, buf + offs); + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(i); + if (mandoc_eos(buf, (size_t)i)) + man->last->flags |= NODE_EOS; + + man_descope(man, line, offs, buf + offs); + return 1; +} + +static int +man_pmacro(struct roff_man *man, int ln, char *buf, int offs) +{ + struct roff_node *n; + const char *cp; + size_t sz; + enum roff_tok tok; + int ppos; + int bline; + + /* Determine the line macro. */ + + ppos = offs; + tok = TOKEN_NONE; + for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) + offs++; + if (sz > 0 && sz < 4) + tok = roffhash_find(man->manmac, buf + ppos, sz); + if (tok == TOKEN_NONE) { + mandoc_msg(MANDOCERR_MACRO, ln, ppos, "%s", buf + ppos - 1); + return 1; + } + + /* Skip a leading escape sequence or tab. */ + + switch (buf[offs]) { + case '\\': + cp = buf + offs + 1; + mandoc_escape(&cp, NULL, NULL); + offs = cp - buf; + break; + case '\t': + offs++; + break; + default: + break; + } + + /* Jump to the next non-whitespace word. */ + + while (buf[offs] == ' ') + offs++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if (buf[offs] == '\0' && buf[offs - 1] == ' ') + mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL); + + /* + * Some macros break next-line scopes; otherwise, remember + * whether we are in next-line scope for a block head. + */ + + man_breakscope(man, tok); + bline = man->flags & MAN_BLINE; + + /* + * If the line in next-line scope ends with \c, keep the + * next-line scope open for the subsequent input line. + * That is not at all portable, only groff >= 1.22.4 + * does it, but *if* this weird idiom occurs in a manual + * page, that's very likely what the author intended. + */ + + if (bline && man_hasc(buf + offs)) + bline = 0; + + /* Call to handler... */ + + (*man_macro(tok)->fp)(man, tok, ln, ppos, &offs, buf); + + /* In quick mode (for mandocdb), abort after the NAME section. */ + + if (man->quick && tok == MAN_SH) { + n = man->last; + if (n->type == ROFFT_BODY && + strcmp(n->prev->child->string, "NAME")) + return 2; + } + + /* + * If we are in a next-line scope for a block head, + * close it out now and switch to the body, + * unless the next-line scope is allowed to continue. + */ + + if (bline == 0 || + (man->flags & MAN_BLINE) == 0 || + man->flags & MAN_ELINE || + man_macro(tok)->flags & MAN_NSCOPED) + return 1; + + man_unscope(man, man->last->parent); + roff_body_alloc(man, ln, ppos, man->last->tok); + man->flags &= ~(MAN_BLINE | ROFF_NONOFILL); + return 1; +} + +void +man_breakscope(struct roff_man *man, int tok) +{ + struct roff_node *n; + + /* + * An element next line scope is open, + * and the new macro is not allowed inside elements. + * Delete the element that is being broken. + */ + + if (man->flags & MAN_ELINE && (tok < MAN_TH || + (man_macro(tok)->flags & MAN_NSCOPED) == 0)) { + n = man->last; + if (n->type == ROFFT_TEXT) + n = n->parent; + if (n->tok < MAN_TH || + (man_macro(n->tok)->flags & (MAN_NSCOPED | MAN_ESCOPED)) + == MAN_NSCOPED) + n = n->parent; + + mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos, + "%s breaks %s", roff_name[tok], roff_name[n->tok]); + + roff_node_delete(man, n); + man->flags &= ~MAN_ELINE; + } + + /* + * Weird special case: + * Switching fill mode closes section headers. + */ + + if (man->flags & MAN_BLINE && + (tok == ROFF_nf || tok == ROFF_fi) && + (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) { + n = man->last; + man_unscope(man, n); + roff_body_alloc(man, n->line, n->pos, n->tok); + man->flags &= ~(MAN_BLINE | ROFF_NONOFILL); + } + + /* + * A block header next line scope is open, + * and the new macro is not allowed inside block headers. + * Delete the block that is being broken. + */ + + if (man->flags & MAN_BLINE && tok != ROFF_nf && tok != ROFF_fi && + (tok < MAN_TH || man_macro(tok)->flags & MAN_XSCOPE)) { + n = man->last; + if (n->type == ROFFT_TEXT) + n = n->parent; + if (n->tok < MAN_TH || + (man_macro(n->tok)->flags & MAN_XSCOPE) == 0) + n = n->parent; + + assert(n->type == ROFFT_HEAD); + n = n->parent; + assert(n->type == ROFFT_BLOCK); + assert(man_macro(n->tok)->flags & MAN_BSCOPED); + + mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos, + "%s breaks %s", roff_name[tok], roff_name[n->tok]); + + roff_node_delete(man, n); + man->flags &= ~(MAN_BLINE | ROFF_NONOFILL); + } +} diff --git a/usr.bin/mandoc/man.cgi.8 b/usr.bin/mandoc/man.cgi.8 new file mode 100644 index 0000000..a524f58 --- /dev/null +++ b/usr.bin/mandoc/man.cgi.8 @@ -0,0 +1,426 @@ +.\" $OpenBSD: man.cgi.8,v 1.22 2018/05/20 21:48:23 schwarze Exp $ +.\" +.\" Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 20 2018 $ +.Dt MAN.CGI 8 +.Os +.Sh NAME +.Nm man.cgi +.Nd CGI program to search and display manual pages +.Sh DESCRIPTION +The +.Nm +CGI program searches for manual pages on a WWW server +and displays them to HTTP clients, +providing functionality equivalent to the +.Xr man 1 +and +.Xr apropos 1 +utilities. +It can use multiple manual trees in parallel. +.Ss HTML search interface +At the top of each generated HTML page, +.Nm +displays a search form containing these elements: +.Bl -enum +.It +An input box for search queries, expecting +either a name of a manual page or an +.Ar expression +using the syntax described in the +.Xr apropos 1 +manual; filling this in is required for each search. +.Pp +The expression is broken into words at whitespace. +Whitespace characters and backslashes can be escaped +by prepending a backslash. +The effect of prepending a backslash to another character is undefined; +in the current implementation, it has no effect. +.It +A +.Xr man 1 +submit button. +The string in the input box is interpreted as the name of a manual page. +.It +An +.Xr apropos 1 +submit button. +The string in the input box is interpreted as a search +.Ar expression . +.It +A dropdown menu to optionally select a manual section. +If one is provided, it has the same effect as the +.Xr man 1 +and +.Xr apropos 1 +.Fl s +option. +Otherwise, pages from all sections are shown. +.It +A dropdown menu to optionally select an architecture. +If one is provided, it has the same effect as the +.Xr man 1 +and +.Xr apropos 1 +.Fl S +option. +By default, pages for all architectures are shown. +.It +A dropdown menu to select a manual tree. +If the configuration file +.Pa /var/www/man/manpath.conf +contains only one manpath, the dropdown menu is not shown. +By default, the first manpath given in the file is used. +.El +.Ss Program output +The +.Nm +program generates five kinds of output pages: +.Bl -tag -width Ds +.It The index page. +This is returned when calling +.Nm +without +.Ev PATH_INFO +and without a +.Ev QUERY_STRING . +It serves as a starting point for using the program +and shows the search form only. +.It A list page. +Lists are returned when searches match more than one manual page. +The first column shows the names and section numbers of manuals +as clickable links. +The second column shows the one-line descriptions of the manuals. +For +.Xr man 1 +style searches, the content of the first manual page follows the list. +.It A manual page. +This output format is used when a search matches exactly one +manual page, or when a link on a list page or an +.Ic \&Xr +link on another manual page is followed. +.It A no-result page. +This is shown when a search request returns no results - +either because it violates the query syntax, or because +the search does not match any manual pages. +.It \&An error page. +This cannot happen by merely clicking the +.Dq Search +button, but only by manually entering an invalid URI. +It does not show the search form, but only an error message +and a link back to the index page. +.El +.Ss Setup +For each manual tree, create one first-level subdirectory below +.Pa /var/www/man . +The name of one of these directories is called a +.Dq manpath +in the context of +.Nm . +Create a single ASCII text file +.Pa /var/www/man/manpath.conf +containing the names of these directories, one per line. +The directory given first is used as the default manpath. +.Pp +Inside each of these directories, use the same directory and file +structure as found below +.Pa /usr/share/man , +that is, second-level subdirectories +.Pa /var/www/man/*/man1 , /var/www/man/*/man2 +etc. containing source +.Xr mdoc 7 +and +.Xr man 7 +manuals with file name extensions matching the section numbers, +second-level subdirectories +.Pa /var/www/man/*/cat1 , /var/www/man/*/cat2 +etc. containing preformatted manuals with the file name extension +.Sq 0 , +and optional third-level subdirectories for architectures. +Use +.Xr makewhatis 8 +to create a +.Xr mandoc.db 5 +database inside each manpath. +.Pp +Configure your web server to execute CGI programs located in +.Pa /cgi-bin . +When using +.Ox +.Xr httpd 8 , +the +.Xr slowcgi 8 +proxy daemon is needed to translate FastCGI requests to plain old CGI. +.Pp +To compile +.Nm , +first copy +.Pa cgi.h.example +to +.Pa cgi.h +and edit it according to your needs. +It contains the following compile-time definitions: +.Bl -tag -width Ds +.It Ev COMPAT_OLDURI +Only useful for running on www.openbsd.org to deal with old URIs containing +.Qq "manpath=OpenBSD " +where the blank character has to be translated to a hyphen. +When compiling for other sites, this definition can be deleted. +.It Dv CSS_DIR +An optional file system path to the directory containing the file +.Pa mandoc.css , +to be specified relative to the server's document root, +and to be specified without a trailing slash. +When empty, the CSS file is assumed to be in the document root. +Otherwise, a leading slash is needed. +This is used in generated HTML code. +.It Dv CUSTOMIZE_TITLE +An ASCII string to be used for the HTML <TITLE> element. +.It Dv MAN_DIR +A file system path to the +.Nm +data directory relative to the web server +.Xr chroot 2 +directory, to be specified with a leading slash and without a trailing slash. +It needs to have at least one component; the root directory cannot be used +for this purpose. +The files +.Pa manpath.conf , +.Pa header.html , +and +.Pa footer.html +are looked up in this directory. +It is also prepended to the manpath when opening +.Xr mandoc.db 5 +and manual page files. +.It Dv SCRIPT_NAME +The initial component of URIs, to be specified without leading +and trailing slashes. +It can be empty. +.El +.Pp +After editing +.Pa cgi.h , +run +.Pp +.Dl make man.cgi +.Pp +and copy the resulting binary to the proper location, +for example using the command: +.Pp +.Dl make installcgi +.Pp +In addition to that, make sure the default manpath contains the files +.Pa man1/apropos.1 +and +.Pa man8/man.cgi.8 , +or the documentation links at the bottom of the index page will not work. +.Ss URI interface +.Nm +uniform resource identifiers are not needed for interactive use, +but can be useful for deep linking. +They consist of: +.Bl -enum +.It +The +.Cm http:// +or +.Cm https:// +protocol specifier. +.It +The host name. +.It +The +.Dv SCRIPT_NAME , +preceded by a slash unless empty. +.It +To show a single page, a slash, the manpath, another slash, +and the name of the requested file, for example +.Pa /OpenBSD-current/man1/mandoc.1 . +This can be abbreviated according to the following syntax: +.Sm off +.Op / Ar manpath +.Op / Cm man Ar sec +.Op / Ar arch +.Pf / Ar name Op \&. Ar sec +.Sm on +.It +For searches, a query string starting with a question mark +and consisting of +.Ar key Ns = Ns Ar value +pairs, separated by ampersands, for example +.Pa ?manpath=OpenBSD-current&query=mandoc . +Supported keys are +.Cm manpath , +.Cm query , +.Cm sec , +.Cm arch , +corresponding to +.Xr apropos 1 +.Fl M , +.Ar expression , +.Fl s , +.Fl S , +respectively, and +.Cm apropos , +which is a boolean parameter to select or deselect the +.Xr apropos 1 +query mode. +For backward compatibility with the traditional +.Nm , +.Cm sektion +is supported as an alias for +.Cm sec . +.El +.Ss Restricted character set +For security reasons, in particular to prevent cross site scripting +attacks, some strings used by +.Nm +can only contain the following characters: +.Pp +.Bl -dash -compact -offset indent +.It +lower case and upper case ASCII letters +.It +the ten decimal digits +.It +the dash +.Pq Sq - +.It +the dot +.Pq Sq \&. +.It +the slash +.Pq Sq / +.It +the underscore +.Pq Sq _ +.El +.Pp +In particular, this applies to all manpaths and architecture names. +.Sh ENVIRONMENT +The web server may pass the following CGI variables to +.Nm : +.Bl -tag -width Ds +.It Ev SCRIPT_NAME +The initial part of the URI passed from the client to the server, +starting after the server's host name and ending before +.Ev PATH_INFO . +This is ignored by +.Nm . +When constructing URIs for links and redirections, the +.Dv SCRIPT_NAME +preprocessor constant is used instead. +.It Ev PATH_INFO +The final part of the URI path passed from the client to the server, +starting after the +.Ev SCRIPT_NAME +and ending before the +.Ev QUERY_STRING . +It is used by the +.Cm show +page to acquire the manpath and filename it needs. +.It Ev QUERY_STRING +The HTTP query string passed from the client to the server. +It is the final part of the URI, after the question mark. +It is used by the +.Cm search +page to acquire the named parameters it needs. +.El +.Sh FILES +.Bl -tag -width Ds +.It Pa /var/www +Default web server +.Xr chroot 2 +directory. +All the following paths are specified relative to this directory. +.It Pa /cgi-bin/man.cgi +The usual file system path to the +.Nm +program inside the web server +.Xr chroot 2 +directory. +A different name can be chosen, but in any case, it needs to be configured in +.Xr httpd.conf 5 . +.It Pa /htdocs +The file system path to the server document root directory +relative to the server +.Xr chroot 2 +directory. +This is part of the web server configuration and not specific to +.Nm . +.It Pa /htdocs/mandoc.css +A style sheet for +.Xr mandoc 1 +HTML styling, referenced from each generated HTML page. +.It Pa /man +Default +.Nm +data directory containing all the manual trees. +Can be overridden by +.Dv MAN_DIR . +.It Pa /man/manpath.conf +The list of available manpaths, one per line. +If any of the lines in this file contains a slash +.Pq Sq / +or any character not contained in the +.Sx Restricted character set , +.Nm +reports an internal server error and exits without doing anything. +.It Pa /man/header.html +An optional file containing static HTML code to be inserted right +after opening the <BODY> element. +.It Pa /man/footer.html +An optional file containing static HTML code to be inserted right +before closing the <BODY> element. +.It Pa /man/OpenBSD-current/man1/mandoc.1 +An example +.Xr mdoc 7 +source file located below the +.Dq OpenBSD-current +manpath. +.El +.Sh COMPATIBILITY +The +.Nm +CGI program is call-compatible with queries from the traditional +.Pa man.cgi +script by Wolfram Schneider. +However, the output looks quite different. +.Sh SEE ALSO +.Xr apropos 1 , +.Xr mandoc.db 5 , +.Xr makewhatis 8 , +.Xr slowcgi 8 +.Sh HISTORY +A version of +.Nm +based on +.Xr mandoc 1 +first appeared in mdocml-1.12.1 (March 2012). +The current +.Xr mandoc.db 5 +database format first appeared in +.Ox 6.1 . +.Sh AUTHORS +.An -nosplit +The +.Nm +program was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and is maintained by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org , +who also designed and implemented the database format. diff --git a/usr.bin/mandoc/man.conf.5 b/usr.bin/mandoc/man.conf.5 new file mode 100644 index 0000000..2ba8cde --- /dev/null +++ b/usr.bin/mandoc/man.conf.5 @@ -0,0 +1,133 @@ +.\" $OpenBSD: man.conf.5,v 1.8 2020/02/10 14:42:03 schwarze Exp $ +.\" +.\" Copyright (c) 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: February 10 2020 $ +.Dt MAN.CONF 5 +.Os +.Sh NAME +.Nm man.conf +.Nd configuration file for man +.Sh DESCRIPTION +This is the configuration file +for the +.Xr man 1 , +.Xr apropos 1 , +and +.Xr makewhatis 8 +utilities. +Its presence, and all directives, are optional. +.Pp +This file is an ASCII text file. +Leading whitespace on lines, lines starting with +.Sq # , +and blank lines are ignored. +Words are separated by whitespace. +The first word on each line is the name of a configuration directive. +.Pp +The following directives are supported: +.Bl -tag -width Ds +.It Ic manpath Ar path +Override the default search +.Ar path +for +.Xr man 1 , +.Xr apropos 1 , +and +.Xr makewhatis 8 . +It can be used multiple times to specify multiple paths, +with the order determining the manual page search order. +.Pp +Each path is a tree containing subdirectories +whose names consist of the strings +.Sq man +and/or +.Sq cat +followed by the names of sections, usually single digits. +The former are supposed to contain unformatted manual pages in +.Xr mdoc 7 +and/or +.Xr man 7 +format; file names should end with the name of the section +preceded by a dot. +The latter should contain preformatted manual pages; +file names should end with +.Ql .0 . +.Pp +Creating a +.Xr mandoc.db 5 +database with +.Xr makewhatis 8 +in each directory configured with +.Ic manpath +is recommended and necessary for +.Xr apropos 1 +to work, and also for +.Xr man 1 +on operating systems like +.Ox +that install each manual page with only one file name in the file system, +even if it documents multiple utilities or functions. +.It Ic output Ar option Op Ar value +Configure the default value of an output option. +These directives are overridden by the +.Fl O +command line options of the same names. +For details, see the +.Xr mandoc 1 +manual. +.Pp +.Bl -column fragment integer "ascii, utf8" -compact +.It Ar option Ta Ar value Ta used by Fl T Ta purpose +.It Ta Ta Ta +.It Ic fragment Ta none Ta Cm html Ta print only body +.It Ic includes Ta string Ta Cm html Ta path to header files +.It Ic indent Ta integer Ta Cm ascii , utf8 Ta left margin +.It Ic man Ta string Ta Cm html Ta path for \&Xr links +.It Ic paper Ta string Ta Cm ps , pdf Ta paper size +.It Ic style Ta string Ta Cm html Ta CSS file +.It Ic toc Ta none Ta Cm html Ta print table of contents +.It Ic width Ta integer Ta Cm ascii , utf8 Ta right margin +.El +.El +.Sh FILES +.Bl -tag -width /etc/examples/man.conf -compact +.It Pa /etc/man.conf +.It Pa /etc/examples/man.conf +.El +.Sh EXAMPLES +The following configuration file reproduces the defaults: +installing it is equivalent to not having a +.Nm +file at all. +.Bd -literal -offset indent +manpath /usr/share/man +manpath /usr/X11R6/man +manpath /usr/local/man +.Ed +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr makewhatis 8 +.Sh HISTORY +A relatively complicated +.Nm +file format first appeared in +.Bx 4.3 Reno . +For +.Ox 5.8 , +it was redesigned from scratch, aiming for simplicity. +.Sh AUTHORS +.An Ingo Schwarze Aq Mt schwarze@openbsd.org diff --git a/usr.bin/mandoc/man.h b/usr.bin/mandoc/man.h new file mode 100644 index 0000000..eda8c6c --- /dev/null +++ b/usr.bin/mandoc/man.h @@ -0,0 +1,21 @@ +/* $OpenBSD: man.h,v 1.59 2018/08/23 19:32:03 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct roff_man; + +void man_validate(struct roff_man *); diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c new file mode 100644 index 0000000..a2a1b4d --- /dev/null +++ b/usr.bin/mandoc/man_html.c @@ -0,0 +1,640 @@ +/* $OpenBSD: man_html.c,v 1.131 2020/04/04 20:23:06 schwarze Exp $ */ +/* + * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * HTML formatter for man(7) used by mandoc(1). + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "out.h" +#include "html.h" +#include "main.h" + +#define MAN_ARGS const struct roff_meta *man, \ + struct roff_node *n, \ + struct html *h + +struct man_html_act { + int (*pre)(MAN_ARGS); + int (*post)(MAN_ARGS); +}; + +static void print_man_head(const struct roff_meta *, + struct html *); +static void print_man_nodelist(MAN_ARGS); +static void print_man_node(MAN_ARGS); +static char list_continues(const struct roff_node *, + const struct roff_node *); +static int man_B_pre(MAN_ARGS); +static int man_IP_pre(MAN_ARGS); +static int man_I_pre(MAN_ARGS); +static int man_OP_pre(MAN_ARGS); +static int man_PP_pre(MAN_ARGS); +static int man_RS_pre(MAN_ARGS); +static int man_SH_pre(MAN_ARGS); +static int man_SM_pre(MAN_ARGS); +static int man_SY_pre(MAN_ARGS); +static int man_UR_pre(MAN_ARGS); +static int man_abort_pre(MAN_ARGS); +static int man_alt_pre(MAN_ARGS); +static int man_ign_pre(MAN_ARGS); +static int man_in_pre(MAN_ARGS); +static void man_root_post(const struct roff_meta *, + struct html *); +static void man_root_pre(const struct roff_meta *, + struct html *); + +static const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = { + { NULL, NULL }, /* TH */ + { man_SH_pre, NULL }, /* SH */ + { man_SH_pre, NULL }, /* SS */ + { man_IP_pre, NULL }, /* TP */ + { man_IP_pre, NULL }, /* TQ */ + { man_abort_pre, NULL }, /* LP */ + { man_PP_pre, NULL }, /* PP */ + { man_abort_pre, NULL }, /* P */ + { man_IP_pre, NULL }, /* IP */ + { man_PP_pre, NULL }, /* HP */ + { man_SM_pre, NULL }, /* SM */ + { man_SM_pre, NULL }, /* SB */ + { man_alt_pre, NULL }, /* BI */ + { man_alt_pre, NULL }, /* IB */ + { man_alt_pre, NULL }, /* BR */ + { man_alt_pre, NULL }, /* RB */ + { NULL, NULL }, /* R */ + { man_B_pre, NULL }, /* B */ + { man_I_pre, NULL }, /* I */ + { man_alt_pre, NULL }, /* IR */ + { man_alt_pre, NULL }, /* RI */ + { NULL, NULL }, /* RE */ + { man_RS_pre, NULL }, /* RS */ + { man_ign_pre, NULL }, /* DT */ + { man_ign_pre, NULL }, /* UC */ + { man_ign_pre, NULL }, /* PD */ + { man_ign_pre, NULL }, /* AT */ + { man_in_pre, NULL }, /* in */ + { man_SY_pre, NULL }, /* SY */ + { NULL, NULL }, /* YS */ + { man_OP_pre, NULL }, /* OP */ + { NULL, NULL }, /* EX */ + { NULL, NULL }, /* EE */ + { man_UR_pre, NULL }, /* UR */ + { NULL, NULL }, /* UE */ + { man_UR_pre, NULL }, /* MT */ + { NULL, NULL }, /* ME */ +}; + + +void +html_man(void *arg, const struct roff_meta *man) +{ + struct html *h; + struct roff_node *n; + struct tag *t; + + h = (struct html *)arg; + n = man->first->child; + + if ((h->oflags & HTML_FRAGMENT) == 0) { + print_gen_decls(h); + print_otag(h, TAG_HTML, ""); + if (n != NULL && n->type == ROFFT_COMMENT) + print_gen_comment(h, n); + t = print_otag(h, TAG_HEAD, ""); + print_man_head(man, h); + print_tagq(h, t); + print_otag(h, TAG_BODY, ""); + } + + man_root_pre(man, h); + t = print_otag(h, TAG_DIV, "c", "manual-text"); + print_man_nodelist(man, n, h); + print_tagq(h, t); + man_root_post(man, h); + print_tagq(h, NULL); +} + +static void +print_man_head(const struct roff_meta *man, struct html *h) +{ + char *cp; + + print_gen_head(h); + mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec); + print_otag(h, TAG_TITLE, ""); + print_text(h, cp); + free(cp); +} + +static void +print_man_nodelist(MAN_ARGS) +{ + while (n != NULL) { + print_man_node(man, n, h); + n = n->next; + } +} + +static void +print_man_node(MAN_ARGS) +{ + struct tag *t; + int child; + + if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) + return; + + html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi); + + child = 1; + switch (n->type) { + case ROFFT_TEXT: + if (*n->string == '\0') { + print_endline(h); + return; + } + if (*n->string == ' ' && n->flags & NODE_LINE && + (h->flags & HTML_NONEWLINE) == 0) + print_otag(h, TAG_BR, ""); + else if (n->flags & NODE_DELIMC) + h->flags |= HTML_NOSPACE; + t = h->tag; + t->refcnt++; + print_text(h, n->string); + break; + case ROFFT_EQN: + t = h->tag; + t->refcnt++; + print_eqn(h, n->eqn); + break; + case ROFFT_TBL: + /* + * This will take care of initialising all of the table + * state data for the first table, then tearing it down + * for the last one. + */ + print_tbl(h, n->span); + return; + default: + /* + * Close out scope of font prior to opening a macro + * scope. + */ + if (h->metac != ESCAPE_FONTROMAN) { + h->metal = h->metac; + h->metac = ESCAPE_FONTROMAN; + } + + /* + * Close out the current table, if it's open, and unset + * the "meta" table state. This will be reopened on the + * next table element. + */ + if (h->tblt != NULL) + print_tblclose(h); + t = h->tag; + t->refcnt++; + if (n->tok < ROFF_MAX) { + roff_html_pre(h, n); + t->refcnt--; + print_stagq(h, t); + return; + } + assert(n->tok >= MAN_TH && n->tok < MAN_MAX); + if (man_html_acts[n->tok - MAN_TH].pre != NULL) + child = (*man_html_acts[n->tok - MAN_TH].pre)(man, + n, h); + break; + } + + if (child && n->child != NULL) + print_man_nodelist(man, n->child, h); + + /* This will automatically close out any font scope. */ + t->refcnt--; + if (n->type == ROFFT_BLOCK && + (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) { + t = h->tag; + while (t->tag != TAG_DL && t->tag != TAG_UL) + t = t->next; + /* + * Close the list if no further item of the same type + * follows; otherwise, close the item only. + */ + if (list_continues(n, roff_node_next(n)) == '\0') { + print_tagq(h, t); + t = NULL; + } + } + if (t != NULL) + print_stagq(h, t); + + if (n->flags & NODE_NOFILL && n->tok != MAN_YS && + (n->next != NULL && n->next->flags & NODE_LINE)) { + /* In .nf = <pre>, print even empty lines. */ + h->col++; + print_endline(h); + } +} + +static void +man_root_pre(const struct roff_meta *man, struct html *h) +{ + struct tag *t, *tt; + char *title; + + assert(man->title); + assert(man->msec); + mandoc_asprintf(&title, "%s(%s)", man->title, man->msec); + + t = print_otag(h, TAG_TABLE, "c", "head"); + tt = print_otag(h, TAG_TR, ""); + + print_otag(h, TAG_TD, "c", "head-ltitle"); + print_text(h, title); + print_stagq(h, tt); + + print_otag(h, TAG_TD, "c", "head-vol"); + if (man->vol != NULL) + print_text(h, man->vol); + print_stagq(h, tt); + + print_otag(h, TAG_TD, "c", "head-rtitle"); + print_text(h, title); + print_tagq(h, t); + free(title); +} + +static void +man_root_post(const struct roff_meta *man, struct html *h) +{ + struct tag *t, *tt; + + t = print_otag(h, TAG_TABLE, "c", "foot"); + tt = print_otag(h, TAG_TR, ""); + + print_otag(h, TAG_TD, "c", "foot-date"); + print_text(h, man->date); + print_stagq(h, tt); + + print_otag(h, TAG_TD, "c", "foot-os"); + if (man->os != NULL) + print_text(h, man->os); + print_tagq(h, t); +} + +static int +man_SH_pre(MAN_ARGS) +{ + const char *class; + enum htmltag tag; + + if (n->tok == MAN_SH) { + tag = TAG_H1; + class = "Sh"; + } else { + tag = TAG_H2; + class = "Ss"; + } + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + print_otag(h, TAG_SECTION, "c", class); + break; + case ROFFT_HEAD: + print_otag_id(h, tag, class, n); + break; + case ROFFT_BODY: + break; + default: + abort(); + } + return 1; +} + +static int +man_alt_pre(MAN_ARGS) +{ + const struct roff_node *nn; + struct tag *t; + int i; + enum htmltag fp; + + for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) { + switch (n->tok) { + case MAN_BI: + fp = i % 2 ? TAG_I : TAG_B; + break; + case MAN_IB: + fp = i % 2 ? TAG_B : TAG_I; + break; + case MAN_RI: + fp = i % 2 ? TAG_I : TAG_MAX; + break; + case MAN_IR: + fp = i % 2 ? TAG_MAX : TAG_I; + break; + case MAN_BR: + fp = i % 2 ? TAG_MAX : TAG_B; + break; + case MAN_RB: + fp = i % 2 ? TAG_B : TAG_MAX; + break; + default: + abort(); + } + + if (i) + h->flags |= HTML_NOSPACE; + + if (fp != TAG_MAX) + t = print_otag(h, fp, ""); + + print_text(h, nn->string); + + if (fp != TAG_MAX) + print_tagq(h, t); + } + return 0; +} + +static int +man_SM_pre(MAN_ARGS) +{ + print_otag(h, TAG_SMALL, ""); + if (n->tok == MAN_SB) + print_otag(h, TAG_B, ""); + return 1; +} + +static int +man_PP_pre(MAN_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + if (n->child != NULL && + (n->child->flags & NODE_NOFILL) == 0) + print_otag(h, TAG_P, "c", + n->tok == MAN_PP ? "Pp" : "Pp HP"); + break; + default: + abort(); + } + return 1; +} + +static char +list_continues(const struct roff_node *n1, const struct roff_node *n2) +{ + const char *s1, *s2; + char c1, c2; + + if (n1 == NULL || n1->type != ROFFT_BLOCK || + n2 == NULL || n2->type != ROFFT_BLOCK) + return '\0'; + if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) && + (n2->tok == MAN_TP || n2->tok == MAN_TQ)) + return ' '; + if (n1->tok != MAN_IP || n2->tok != MAN_IP) + return '\0'; + n1 = n1->head->child; + n2 = n2->head->child; + s1 = n1 == NULL ? "" : n1->string; + s2 = n2 == NULL ? "" : n2->string; + c1 = strcmp(s1, "*") == 0 ? '*' : + strcmp(s1, "\\-") == 0 ? '-' : + strcmp(s1, "\\(bu") == 0 ? 'b' : ' '; + c2 = strcmp(s2, "*") == 0 ? '*' : + strcmp(s2, "\\-") == 0 ? '-' : + strcmp(s2, "\\(bu") == 0 ? 'b' : ' '; + return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1; +} + +static int +man_IP_pre(MAN_ARGS) +{ + struct roff_node *nn; + const char *list_class; + enum htmltag list_elem, body_elem; + char list_type; + + nn = n->type == ROFFT_BLOCK ? n : n->parent; + list_type = list_continues(roff_node_prev(nn), nn); + if (list_type == '\0') { + /* Start a new list. */ + list_type = list_continues(nn, roff_node_next(nn)); + if (list_type == '\0') + list_type = ' '; + switch (list_type) { + case ' ': + list_class = "Bl-tag"; + list_elem = TAG_DL; + break; + case '*': + list_class = "Bl-bullet"; + list_elem = TAG_UL; + break; + case '-': + list_class = "Bl-dash"; + list_elem = TAG_UL; + break; + default: + abort(); + } + } else { + /* Continue a list that was started earlier. */ + list_class = NULL; + list_elem = TAG_MAX; + } + body_elem = list_type == ' ' ? TAG_DD : TAG_LI; + + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + if (list_elem != TAG_MAX) + print_otag(h, list_elem, "c", list_class); + return 1; + case ROFFT_HEAD: + if (body_elem == TAG_LI) + return 0; + print_otag_id(h, TAG_DT, NULL, n); + break; + case ROFFT_BODY: + print_otag(h, body_elem, ""); + return 1; + default: + abort(); + } + switch(n->tok) { + case MAN_IP: /* Only print the first header element. */ + if (n->child != NULL) + print_man_node(man, n->child, h); + break; + case MAN_TP: /* Only print next-line header elements. */ + case MAN_TQ: + nn = n->child; + while (nn != NULL && (NODE_LINE & nn->flags) == 0) + nn = nn->next; + while (nn != NULL) { + print_man_node(man, nn, h); + nn = nn->next; + } + break; + default: + abort(); + } + return 0; +} + +static int +man_OP_pre(MAN_ARGS) +{ + struct tag *tt; + + print_text(h, "["); + h->flags |= HTML_NOSPACE; + tt = print_otag(h, TAG_SPAN, "c", "Op"); + + if ((n = n->child) != NULL) { + print_otag(h, TAG_B, ""); + print_text(h, n->string); + } + + print_stagq(h, tt); + + if (n != NULL && n->next != NULL) { + print_otag(h, TAG_I, ""); + print_text(h, n->next->string); + } + + print_stagq(h, tt); + h->flags |= HTML_NOSPACE; + print_text(h, "]"); + return 0; +} + +static int +man_B_pre(MAN_ARGS) +{ + print_otag(h, TAG_B, ""); + return 1; +} + +static int +man_I_pre(MAN_ARGS) +{ + print_otag(h, TAG_I, ""); + return 1; +} + +static int +man_in_pre(MAN_ARGS) +{ + print_otag(h, TAG_BR, ""); + return 0; +} + +static int +man_ign_pre(MAN_ARGS) +{ + return 0; +} + +static int +man_RS_pre(MAN_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + print_otag(h, TAG_DIV, "c", "Bd-indent"); + break; + default: + abort(); + } + return 1; +} + +static int +man_SY_pre(MAN_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + print_otag(h, TAG_TABLE, "c", "Nm"); + print_otag(h, TAG_TR, ""); + break; + case ROFFT_HEAD: + print_otag(h, TAG_TD, ""); + print_otag(h, TAG_CODE, "c", "Nm"); + break; + case ROFFT_BODY: + print_otag(h, TAG_TD, ""); + break; + default: + abort(); + } + return 1; +} + +static int +man_UR_pre(MAN_ARGS) +{ + char *cp; + + n = n->child; + assert(n->type == ROFFT_HEAD); + if (n->child != NULL) { + assert(n->child->type == ROFFT_TEXT); + if (n->tok == MAN_MT) { + mandoc_asprintf(&cp, "mailto:%s", n->child->string); + print_otag(h, TAG_A, "ch", "Mt", cp); + free(cp); + } else + print_otag(h, TAG_A, "ch", "Lk", n->child->string); + } + + assert(n->next->type == ROFFT_BODY); + if (n->next->child != NULL) + n = n->next; + + print_man_nodelist(man, n->child, h); + return 0; +} + +static int +man_abort_pre(MAN_ARGS) +{ + abort(); +} diff --git a/usr.bin/mandoc/man_macro.c b/usr.bin/mandoc/man_macro.c new file mode 100644 index 0000000..dc0bcfd --- /dev/null +++ b/usr.bin/mandoc/man_macro.c @@ -0,0 +1,466 @@ +/* $OpenBSD: man_macro.c,v 1.106 2019/01/05 18:59:37 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2012-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libman.h" + +static void blk_close(MACRO_PROT_ARGS); +static void blk_exp(MACRO_PROT_ARGS); +static void blk_imp(MACRO_PROT_ARGS); +static void in_line_eoln(MACRO_PROT_ARGS); +static int man_args(struct roff_man *, int, + int *, char *, char **); +static void rew_scope(struct roff_man *, enum roff_tok); + +static const struct man_macro man_macros[MAN_MAX - MAN_TH] = { + { in_line_eoln, MAN_XSCOPE }, /* TH */ + { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* SH */ + { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* SS */ + { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* TP */ + { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* TQ */ + { blk_imp, MAN_XSCOPE }, /* LP */ + { blk_imp, MAN_XSCOPE }, /* PP */ + { blk_imp, MAN_XSCOPE }, /* P */ + { blk_imp, MAN_XSCOPE }, /* IP */ + { blk_imp, MAN_XSCOPE }, /* HP */ + { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* SM */ + { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* SB */ + { in_line_eoln, 0 }, /* BI */ + { in_line_eoln, 0 }, /* IB */ + { in_line_eoln, 0 }, /* BR */ + { in_line_eoln, 0 }, /* RB */ + { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* R */ + { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* B */ + { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* I */ + { in_line_eoln, 0 }, /* IR */ + { in_line_eoln, 0 }, /* RI */ + { blk_close, MAN_XSCOPE }, /* RE */ + { blk_exp, MAN_XSCOPE }, /* RS */ + { in_line_eoln, 0 }, /* DT */ + { in_line_eoln, 0 }, /* UC */ + { in_line_eoln, MAN_NSCOPED }, /* PD */ + { in_line_eoln, 0 }, /* AT */ + { in_line_eoln, MAN_NSCOPED }, /* in */ + { blk_imp, MAN_XSCOPE }, /* SY */ + { blk_close, MAN_XSCOPE }, /* YS */ + { in_line_eoln, 0 }, /* OP */ + { in_line_eoln, MAN_XSCOPE }, /* EX */ + { in_line_eoln, MAN_XSCOPE }, /* EE */ + { blk_exp, MAN_XSCOPE }, /* UR */ + { blk_close, MAN_XSCOPE }, /* UE */ + { blk_exp, MAN_XSCOPE }, /* MT */ + { blk_close, MAN_XSCOPE }, /* ME */ +}; + + +const struct man_macro * +man_macro(enum roff_tok tok) +{ + assert(tok >= MAN_TH && tok <= MAN_MAX); + return man_macros + (tok - MAN_TH); +} + +void +man_unscope(struct roff_man *man, const struct roff_node *to) +{ + struct roff_node *n; + + to = to->parent; + n = man->last; + while (n != to) { + + /* Reached the end of the document? */ + + if (to == NULL && ! (n->flags & NODE_VALID)) { + if (man->flags & (MAN_BLINE | MAN_ELINE) && + man_macro(n->tok)->flags & + (MAN_BSCOPED | MAN_NSCOPED)) { + mandoc_msg(MANDOCERR_BLK_LINE, + n->line, n->pos, + "EOF breaks %s", roff_name[n->tok]); + if (man->flags & MAN_ELINE) + man->flags &= ~MAN_ELINE; + else { + assert(n->type == ROFFT_HEAD); + n = n->parent; + man->flags &= ~MAN_BLINE; + } + man->last = n; + n = n->parent; + roff_node_delete(man, man->last); + continue; + } + if (n->type == ROFFT_BLOCK && + man_macro(n->tok)->fp == blk_exp) + mandoc_msg(MANDOCERR_BLK_NOEND, + n->line, n->pos, "%s", + roff_name[n->tok]); + } + + /* + * We might delete the man->last node + * in the post-validation phase. + * Save a pointer to the parent such that + * we know where to continue the iteration. + */ + + man->last = n; + n = n->parent; + man->last->flags |= NODE_VALID; + } + + /* + * If we ended up at the parent of the node we were + * supposed to rewind to, that means the target node + * got deleted, so add the next node we parse as a child + * of the parent instead of as a sibling of the target. + */ + + man->next = (man->last == to) ? + ROFF_NEXT_CHILD : ROFF_NEXT_SIBLING; +} + +/* + * Rewinding entails ascending the parse tree until a coherent point, + * for example, the `SH' macro will close out any intervening `SS' + * scopes. When a scope is closed, it must be validated and actioned. + */ +static void +rew_scope(struct roff_man *man, enum roff_tok tok) +{ + struct roff_node *n; + + /* Preserve empty paragraphs before RS. */ + + n = man->last; + if (tok == MAN_RS && n->child == NULL && + (n->tok == MAN_P || n->tok == MAN_PP || n->tok == MAN_LP)) + return; + + for (;;) { + if (n->type == ROFFT_ROOT) + return; + if (n->flags & NODE_VALID) { + n = n->parent; + continue; + } + if (n->type != ROFFT_BLOCK) { + if (n->parent->type == ROFFT_ROOT) { + man_unscope(man, n); + return; + } else { + n = n->parent; + continue; + } + } + if (tok != MAN_SH && (n->tok == MAN_SH || + (tok != MAN_SS && (n->tok == MAN_SS || + man_macro(n->tok)->fp == blk_exp)))) + return; + man_unscope(man, n); + n = man->last; + } +} + + +/* + * Close out a generic explicit macro. + */ +void +blk_close(MACRO_PROT_ARGS) +{ + enum roff_tok ctok, ntok; + const struct roff_node *nn; + char *p, *ep; + int cline, cpos, la, nrew, target; + + nrew = 1; + switch (tok) { + case MAN_RE: + ntok = MAN_RS; + la = *pos; + if ( ! man_args(man, line, pos, buf, &p)) + break; + for (nn = man->last->parent; nn; nn = nn->parent) + if (nn->tok == ntok && nn->type == ROFFT_BLOCK) + nrew++; + target = strtol(p, &ep, 10); + if (*ep != '\0') + mandoc_msg(MANDOCERR_ARG_EXCESS, line, + la + (buf[la] == '"') + (int)(ep - p), + "RE ... %s", ep); + free(p); + if (target == 0) + target = 1; + nrew -= target; + if (nrew < 1) { + mandoc_msg(MANDOCERR_RE_NOTOPEN, + line, ppos, "RE %d", target); + return; + } + break; + case MAN_YS: + ntok = MAN_SY; + break; + case MAN_UE: + ntok = MAN_UR; + break; + case MAN_ME: + ntok = MAN_MT; + break; + default: + abort(); + } + + for (nn = man->last->parent; nn; nn = nn->parent) + if (nn->tok == ntok && nn->type == ROFFT_BLOCK && ! --nrew) + break; + + if (nn == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, + line, ppos, "%s", roff_name[tok]); + rew_scope(man, MAN_PP); + if (tok == MAN_RE) { + roff_elem_alloc(man, line, ppos, ROFF_br); + man->last->flags |= NODE_LINE | + NODE_VALID | NODE_ENDED; + man->next = ROFF_NEXT_SIBLING; + } + return; + } + + cline = man->last->line; + cpos = man->last->pos; + ctok = man->last->tok; + man_unscope(man, nn); + + if (tok == MAN_RE && nn->head->aux > 0) + roff_setreg(man->roff, "an-margin", nn->head->aux, '-'); + + /* Trailing text. */ + + if (buf[*pos] != '\0') { + roff_word_alloc(man, line, ppos, buf + *pos); + man->last->flags |= NODE_DELIMC; + if (mandoc_eos(man->last->string, strlen(man->last->string))) + man->last->flags |= NODE_EOS; + } + + /* Move a trailing paragraph behind the block. */ + + if (ctok == MAN_LP || ctok == MAN_PP || ctok == MAN_P) { + *pos = strlen(buf); + blk_imp(man, ctok, cline, cpos, pos, buf); + } + + /* Synopsis blocks need an explicit end marker for spacing. */ + + if (tok == MAN_YS && man->last == nn) { + roff_elem_alloc(man, line, ppos, tok); + man_unscope(man, man->last); + } +} + +void +blk_exp(MACRO_PROT_ARGS) +{ + struct roff_node *head; + char *p; + int la; + + if (tok == MAN_RS) { + rew_scope(man, tok); + man->flags |= ROFF_NONOFILL; + } + roff_block_alloc(man, line, ppos, tok); + head = roff_head_alloc(man, line, ppos, tok); + + la = *pos; + if (man_args(man, line, pos, buf, &p)) { + roff_word_alloc(man, line, la, p); + if (tok == MAN_RS) { + if (roff_getreg(man->roff, "an-margin") == 0) + roff_setreg(man->roff, "an-margin", + 7 * 24, '='); + if ((head->aux = strtod(p, NULL) * 24.0) > 0) + roff_setreg(man->roff, "an-margin", + head->aux, '+'); + } + free(p); + } + + if (buf[*pos] != '\0') + mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos, + "%s ... %s", roff_name[tok], buf + *pos); + + man_unscope(man, head); + roff_body_alloc(man, line, ppos, tok); + man->flags &= ~ROFF_NONOFILL; +} + +/* + * Parse an implicit-block macro. These contain a ROFFT_HEAD and a + * ROFFT_BODY contained within a ROFFT_BLOCK. Rules for closing out other + * scopes, such as `SH' closing out an `SS', are defined in the rew + * routines. + */ +void +blk_imp(MACRO_PROT_ARGS) +{ + int la; + char *p; + struct roff_node *n; + + rew_scope(man, tok); + man->flags |= ROFF_NONOFILL; + if (tok == MAN_SH || tok == MAN_SS) + man->flags &= ~ROFF_NOFILL; + roff_block_alloc(man, line, ppos, tok); + n = roff_head_alloc(man, line, ppos, tok); + + /* Add line arguments. */ + + for (;;) { + la = *pos; + if ( ! man_args(man, line, pos, buf, &p)) + break; + roff_word_alloc(man, line, la, p); + free(p); + } + + /* + * For macros having optional next-line scope, + * keep the head open if there were no arguments. + * For `TP' and `TQ', always keep the head open. + */ + + if (man_macro(tok)->flags & MAN_BSCOPED && + (tok == MAN_TP || tok == MAN_TQ || n == man->last)) { + man->flags |= MAN_BLINE; + return; + } + + /* Close out the head and open the body. */ + + man_unscope(man, n); + roff_body_alloc(man, line, ppos, tok); + man->flags &= ~ROFF_NONOFILL; +} + +void +in_line_eoln(MACRO_PROT_ARGS) +{ + int la; + char *p; + struct roff_node *n; + + roff_elem_alloc(man, line, ppos, tok); + n = man->last; + + if (tok == MAN_EX) + man->flags |= ROFF_NOFILL; + else if (tok == MAN_EE) + man->flags &= ~ROFF_NOFILL; + + for (;;) { + if (buf[*pos] != '\0' && man->last != n && tok == MAN_PD) { + mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos, + "%s ... %s", roff_name[tok], buf + *pos); + break; + } + la = *pos; + if ( ! man_args(man, line, pos, buf, &p)) + break; + if (man_macro(tok)->flags & MAN_JOIN && + man->last->type == ROFFT_TEXT) + roff_word_append(man, p); + else + roff_word_alloc(man, line, la, p); + free(p); + } + + /* + * Append NODE_EOS in case the last snipped argument + * ends with a dot, e.g. `.IR syslog (3).' + */ + + if (n != man->last && + mandoc_eos(man->last->string, strlen(man->last->string))) + man->last->flags |= NODE_EOS; + + /* + * If no arguments are specified and this is MAN_ESCOPED (i.e., + * next-line scoped), then set our mode to indicate that we're + * waiting for terms to load into our context. + */ + + if (n == man->last && man_macro(tok)->flags & MAN_ESCOPED) { + man->flags |= MAN_ELINE; + return; + } + + assert(man->last->type != ROFFT_ROOT); + man->next = ROFF_NEXT_SIBLING; + + /* Rewind our element scope. */ + + for ( ; man->last; man->last = man->last->parent) { + man->last->flags |= NODE_VALID; + if (man->last == n) + break; + } + + /* Rewind next-line scoped ancestors, if any. */ + + if (man_macro(tok)->flags & MAN_ESCOPED) + man_descope(man, line, ppos, NULL); +} + +void +man_endparse(struct roff_man *man) +{ + man_unscope(man, man->meta.first); +} + +static int +man_args(struct roff_man *man, int line, int *pos, char *buf, char **v) +{ + char *start; + + assert(*pos); + *v = start = buf + *pos; + assert(' ' != *start); + + if ('\0' == *start) + return 0; + + *v = roff_getarg(man->roff, v, line, pos); + return 1; +} diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c new file mode 100644 index 0000000..3bf25ad --- /dev/null +++ b/usr.bin/mandoc/man_term.c @@ -0,0 +1,1149 @@ +/* $OpenBSD: man_term.c,v 1.188 2020/03/13 00:31:05 schwarze Exp $ */ +/* + * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Plain text formatter for man(7), used by mandoc(1) + * for ASCII, UTF-8, PostScript, and PDF output. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "out.h" +#include "term.h" +#include "term_tag.h" +#include "main.h" + +#define MAXMARGINS 64 /* maximum number of indented scopes */ + +struct mtermp { + int lmargin[MAXMARGINS]; /* margins (incl. vis. page) */ + int lmargincur; /* index of current margin */ + int lmarginsz; /* actual number of nested margins */ + size_t offset; /* default offset to visible page */ + int pardist; /* vert. space before par., unit: [v] */ +}; + +#define DECL_ARGS struct termp *p, \ + struct mtermp *mt, \ + struct roff_node *n, \ + const struct roff_meta *meta + +struct man_term_act { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); + int flags; +#define MAN_NOTEXT (1 << 0) /* Never has text children. */ +}; + +static void print_man_nodelist(DECL_ARGS); +static void print_man_node(DECL_ARGS); +static void print_man_head(struct termp *, + const struct roff_meta *); +static void print_man_foot(struct termp *, + const struct roff_meta *); +static void print_bvspace(struct termp *, + struct roff_node *, int); + +static int pre_B(DECL_ARGS); +static int pre_DT(DECL_ARGS); +static int pre_HP(DECL_ARGS); +static int pre_I(DECL_ARGS); +static int pre_IP(DECL_ARGS); +static int pre_OP(DECL_ARGS); +static int pre_PD(DECL_ARGS); +static int pre_PP(DECL_ARGS); +static int pre_RS(DECL_ARGS); +static int pre_SH(DECL_ARGS); +static int pre_SS(DECL_ARGS); +static int pre_SY(DECL_ARGS); +static int pre_TP(DECL_ARGS); +static int pre_UR(DECL_ARGS); +static int pre_abort(DECL_ARGS); +static int pre_alternate(DECL_ARGS); +static int pre_ign(DECL_ARGS); +static int pre_in(DECL_ARGS); +static int pre_literal(DECL_ARGS); + +static void post_IP(DECL_ARGS); +static void post_HP(DECL_ARGS); +static void post_RS(DECL_ARGS); +static void post_SH(DECL_ARGS); +static void post_SY(DECL_ARGS); +static void post_TP(DECL_ARGS); +static void post_UR(DECL_ARGS); + +static const struct man_term_act man_term_acts[MAN_MAX - MAN_TH] = { + { NULL, NULL, 0 }, /* TH */ + { pre_SH, post_SH, 0 }, /* SH */ + { pre_SS, post_SH, 0 }, /* SS */ + { pre_TP, post_TP, 0 }, /* TP */ + { pre_TP, post_TP, 0 }, /* TQ */ + { pre_abort, NULL, 0 }, /* LP */ + { pre_PP, NULL, 0 }, /* PP */ + { pre_abort, NULL, 0 }, /* P */ + { pre_IP, post_IP, 0 }, /* IP */ + { pre_HP, post_HP, 0 }, /* HP */ + { NULL, NULL, 0 }, /* SM */ + { pre_B, NULL, 0 }, /* SB */ + { pre_alternate, NULL, 0 }, /* BI */ + { pre_alternate, NULL, 0 }, /* IB */ + { pre_alternate, NULL, 0 }, /* BR */ + { pre_alternate, NULL, 0 }, /* RB */ + { NULL, NULL, 0 }, /* R */ + { pre_B, NULL, 0 }, /* B */ + { pre_I, NULL, 0 }, /* I */ + { pre_alternate, NULL, 0 }, /* IR */ + { pre_alternate, NULL, 0 }, /* RI */ + { NULL, NULL, 0 }, /* RE */ + { pre_RS, post_RS, 0 }, /* RS */ + { pre_DT, NULL, 0 }, /* DT */ + { pre_ign, NULL, MAN_NOTEXT }, /* UC */ + { pre_PD, NULL, MAN_NOTEXT }, /* PD */ + { pre_ign, NULL, 0 }, /* AT */ + { pre_in, NULL, MAN_NOTEXT }, /* in */ + { pre_SY, post_SY, 0 }, /* SY */ + { NULL, NULL, 0 }, /* YS */ + { pre_OP, NULL, 0 }, /* OP */ + { pre_literal, NULL, 0 }, /* EX */ + { pre_literal, NULL, 0 }, /* EE */ + { pre_UR, post_UR, 0 }, /* UR */ + { NULL, NULL, 0 }, /* UE */ + { pre_UR, post_UR, 0 }, /* MT */ + { NULL, NULL, 0 }, /* ME */ +}; +static const struct man_term_act *man_term_act(enum roff_tok); + + +static const struct man_term_act * +man_term_act(enum roff_tok tok) +{ + assert(tok >= MAN_TH && tok <= MAN_MAX); + return man_term_acts + (tok - MAN_TH); +} + +void +terminal_man(void *arg, const struct roff_meta *man) +{ + struct mtermp mt; + struct termp *p; + struct roff_node *n, *nc, *nn; + size_t save_defindent; + + p = (struct termp *)arg; + save_defindent = p->defindent; + if (p->synopsisonly == 0 && p->defindent == 0) + p->defindent = 7; + p->tcol->rmargin = p->maxrmargin = p->defrmargin; + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); + + memset(&mt, 0, sizeof(mt)); + mt.lmargin[mt.lmargincur] = term_len(p, p->defindent); + mt.offset = term_len(p, p->defindent); + mt.pardist = 1; + + n = man->first->child; + if (p->synopsisonly) { + for (nn = NULL; n != NULL; n = n->next) { + if (n->tok != MAN_SH) + continue; + nc = n->child->child; + if (nc->type != ROFFT_TEXT) + continue; + if (strcmp(nc->string, "SYNOPSIS") == 0) + break; + if (nn == NULL && strcmp(nc->string, "NAME") == 0) + nn = n; + } + if (n == NULL) + n = nn; + p->flags |= TERMP_NOSPACE; + if (n != NULL && (n = n->child->next->child) != NULL) + print_man_nodelist(p, &mt, n, man); + term_newln(p); + } else { + term_begin(p, print_man_head, print_man_foot, man); + p->flags |= TERMP_NOSPACE; + if (n != NULL) + print_man_nodelist(p, &mt, n, man); + term_end(p); + } + p->defindent = save_defindent; +} + +/* + * Printing leading vertical space before a block. + * This is used for the paragraph macros. + * The rules are pretty simple, since there's very little nesting going + * on here. Basically, if we're the first within another block (SS/SH), + * then don't emit vertical space. If we are (RS), then do. If not the + * first, print it. + */ +static void +print_bvspace(struct termp *p, struct roff_node *n, int pardist) +{ + struct roff_node *nch; + int i; + + term_newln(p); + + if (n->body != NULL && + (nch = roff_node_child(n->body)) != NULL && + nch->type == ROFFT_TBL) + return; + + if (n->parent->tok != MAN_RS && roff_node_prev(n) == NULL) + return; + + for (i = 0; i < pardist; i++) + term_vspace(p); +} + + +static int +pre_abort(DECL_ARGS) +{ + abort(); +} + +static int +pre_ign(DECL_ARGS) +{ + return 0; +} + +static int +pre_I(DECL_ARGS) +{ + term_fontrepl(p, TERMFONT_UNDER); + return 1; +} + +static int +pre_literal(DECL_ARGS) +{ + term_newln(p); + + /* + * Unlike .IP and .TP, .HP does not have a HEAD. + * So in case a second call to term_flushln() is needed, + * indentation has to be set up explicitly. + */ + if (n->parent->tok == MAN_HP && p->tcol->rmargin < p->maxrmargin) { + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + p->flags |= TERMP_NOSPACE; + } + return 0; +} + +static int +pre_PD(DECL_ARGS) +{ + struct roffsu su; + + n = n->child; + if (n == NULL) { + mt->pardist = 1; + return 0; + } + assert(n->type == ROFFT_TEXT); + if (a2roffsu(n->string, &su, SCALE_VS) != NULL) + mt->pardist = term_vspan(p, &su); + return 0; +} + +static int +pre_alternate(DECL_ARGS) +{ + enum termfont font[2]; + struct roff_node *nn; + int i; + + switch (n->tok) { + case MAN_RB: + font[0] = TERMFONT_NONE; + font[1] = TERMFONT_BOLD; + break; + case MAN_RI: + font[0] = TERMFONT_NONE; + font[1] = TERMFONT_UNDER; + break; + case MAN_BR: + font[0] = TERMFONT_BOLD; + font[1] = TERMFONT_NONE; + break; + case MAN_BI: + font[0] = TERMFONT_BOLD; + font[1] = TERMFONT_UNDER; + break; + case MAN_IR: + font[0] = TERMFONT_UNDER; + font[1] = TERMFONT_NONE; + break; + case MAN_IB: + font[0] = TERMFONT_UNDER; + font[1] = TERMFONT_BOLD; + break; + default: + abort(); + } + for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i = 1 - i) { + term_fontrepl(p, font[i]); + assert(nn->type == ROFFT_TEXT); + term_word(p, nn->string); + if (nn->flags & NODE_EOS) + p->flags |= TERMP_SENTENCE; + if (nn->next != NULL) + p->flags |= TERMP_NOSPACE; + } + return 0; +} + +static int +pre_B(DECL_ARGS) +{ + term_fontrepl(p, TERMFONT_BOLD); + return 1; +} + +static int +pre_OP(DECL_ARGS) +{ + term_word(p, "["); + p->flags |= TERMP_KEEP | TERMP_NOSPACE; + + if ((n = n->child) != NULL) { + term_fontrepl(p, TERMFONT_BOLD); + term_word(p, n->string); + } + if (n != NULL && n->next != NULL) { + term_fontrepl(p, TERMFONT_UNDER); + term_word(p, n->next->string); + } + term_fontrepl(p, TERMFONT_NONE); + p->flags &= ~TERMP_KEEP; + p->flags |= TERMP_NOSPACE; + term_word(p, "]"); + return 0; +} + +static int +pre_in(DECL_ARGS) +{ + struct roffsu su; + const char *cp; + size_t v; + int less; + + term_newln(p); + + if (n->child == NULL) { + p->tcol->offset = mt->offset; + return 0; + } + + cp = n->child->string; + less = 0; + + if (*cp == '-') + less = -1; + else if (*cp == '+') + less = 1; + else + cp--; + + if (a2roffsu(++cp, &su, SCALE_EN) == NULL) + return 0; + + v = term_hen(p, &su); + + if (less < 0) + p->tcol->offset -= p->tcol->offset > v ? v : p->tcol->offset; + else if (less > 0) + p->tcol->offset += v; + else + p->tcol->offset = v; + if (p->tcol->offset > SHRT_MAX) + p->tcol->offset = term_len(p, p->defindent); + + return 0; +} + +static int +pre_DT(DECL_ARGS) +{ + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); + return 0; +} + +static int +pre_HP(DECL_ARGS) +{ + struct roffsu su; + const struct roff_node *nn; + int len; + + switch (n->type) { + case ROFFT_BLOCK: + print_bvspace(p, n, mt->pardist); + return 1; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + abort(); + } + + if (n->child == NULL) + return 0; + + if ((n->child->flags & NODE_NOFILL) == 0) { + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + p->trailspace = 2; + } + + /* Calculate offset. */ + + if ((nn = n->parent->head->child) != NULL && + a2roffsu(nn->string, &su, SCALE_EN) != NULL) { + len = term_hen(p, &su); + if (len < 0 && (size_t)(-len) > mt->offset) + len = -mt->offset; + else if (len > SHRT_MAX) + len = term_len(p, p->defindent); + mt->lmargin[mt->lmargincur] = len; + } else + len = mt->lmargin[mt->lmargincur]; + + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; + return 1; +} + +static void +post_HP(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + case ROFFT_HEAD: + break; + case ROFFT_BODY: + term_newln(p); + + /* + * Compatibility with a groff bug. + * The .HP macro uses the undocumented .tag request + * which causes a line break and cancels no-space + * mode even if there isn't any output. + */ + + if (n->child == NULL) + term_vspace(p); + + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + p->trailspace = 0; + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; + break; + default: + abort(); + } +} + +static int +pre_PP(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + print_bvspace(p, n, mt->pardist); + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + p->tcol->offset = mt->offset; + break; + default: + abort(); + } + return 1; +} + +static int +pre_IP(DECL_ARGS) +{ + struct roffsu su; + const struct roff_node *nn; + int len; + + switch (n->type) { + case ROFFT_BLOCK: + print_bvspace(p, n, mt->pardist); + return 1; + case ROFFT_HEAD: + p->flags |= TERMP_NOBREAK; + p->trailspace = 1; + break; + case ROFFT_BODY: + p->flags |= TERMP_NOSPACE; + break; + default: + abort(); + } + + /* Calculate the offset from the optional second argument. */ + if ((nn = n->parent->head->child) != NULL && + (nn = nn->next) != NULL && + a2roffsu(nn->string, &su, SCALE_EN) != NULL) { + len = term_hen(p, &su); + if (len < 0 && (size_t)(-len) > mt->offset) + len = -mt->offset; + else if (len > SHRT_MAX) + len = term_len(p, p->defindent); + mt->lmargin[mt->lmargincur] = len; + } else + len = mt->lmargin[mt->lmargincur]; + + switch (n->type) { + case ROFFT_HEAD: + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; + if (n->child != NULL) + print_man_node(p, mt, n->child, meta); + return 0; + case ROFFT_BODY: + p->tcol->offset = mt->offset + len; + p->tcol->rmargin = p->maxrmargin; + break; + default: + abort(); + } + return 1; +} + +static void +post_IP(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + p->tcol->rmargin = p->maxrmargin; + break; + case ROFFT_BODY: + term_newln(p); + p->tcol->offset = mt->offset; + break; + default: + abort(); + } +} + +static int +pre_TP(DECL_ARGS) +{ + struct roffsu su; + struct roff_node *nn; + int len; + + switch (n->type) { + case ROFFT_BLOCK: + if (n->tok == MAN_TP) + print_bvspace(p, n, mt->pardist); + return 1; + case ROFFT_HEAD: + p->flags |= TERMP_NOBREAK | TERMP_BRTRSP; + p->trailspace = 1; + break; + case ROFFT_BODY: + p->flags |= TERMP_NOSPACE; + break; + default: + abort(); + } + + /* Calculate offset. */ + + if ((nn = n->parent->head->child) != NULL && + nn->string != NULL && ! (NODE_LINE & nn->flags) && + a2roffsu(nn->string, &su, SCALE_EN) != NULL) { + len = term_hen(p, &su); + if (len < 0 && (size_t)(-len) > mt->offset) + len = -mt->offset; + else if (len > SHRT_MAX) + len = term_len(p, p->defindent); + mt->lmargin[mt->lmargincur] = len; + } else + len = mt->lmargin[mt->lmargincur]; + + switch (n->type) { + case ROFFT_HEAD: + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; + + /* Don't print same-line elements. */ + nn = n->child; + while (nn != NULL && (nn->flags & NODE_LINE) == 0) + nn = nn->next; + + while (nn != NULL) { + print_man_node(p, mt, nn, meta); + nn = nn->next; + } + return 0; + case ROFFT_BODY: + p->tcol->offset = mt->offset + len; + p->tcol->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP); + break; + default: + abort(); + } + return 1; +} + +static void +post_TP(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + term_flushln(p); + break; + case ROFFT_BODY: + term_newln(p); + p->tcol->offset = mt->offset; + break; + default: + abort(); + } +} + +static int +pre_SS(DECL_ARGS) +{ + int i; + + switch (n->type) { + case ROFFT_BLOCK: + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + mt->offset = term_len(p, p->defindent); + + /* + * No vertical space before the first subsection + * and after an empty subsection. + */ + + if ((n = roff_node_prev(n)) == NULL || + (n->tok == MAN_SS && roff_node_child(n->body) == NULL)) + break; + + for (i = 0; i < mt->pardist; i++) + term_vspace(p); + break; + case ROFFT_HEAD: + term_fontrepl(p, TERMFONT_BOLD); + p->tcol->offset = term_len(p, 3); + p->tcol->rmargin = mt->offset; + p->trailspace = mt->offset; + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + break; + case ROFFT_BODY: + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + break; + default: + break; + } + return 1; +} + +static int +pre_SH(DECL_ARGS) +{ + int i; + + switch (n->type) { + case ROFFT_BLOCK: + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + mt->offset = term_len(p, p->defindent); + + /* + * No vertical space before the first section + * and after an empty section. + */ + + if ((n = roff_node_prev(n)) == NULL || + (n->tok == MAN_SH && roff_node_child(n->body) == NULL)) + break; + + for (i = 0; i < mt->pardist; i++) + term_vspace(p); + break; + case ROFFT_HEAD: + term_fontrepl(p, TERMFONT_BOLD); + p->tcol->offset = 0; + p->tcol->rmargin = mt->offset; + p->trailspace = mt->offset; + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + break; + case ROFFT_BODY: + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + break; + default: + abort(); + } + return 1; +} + +static void +post_SH(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + case ROFFT_BODY: + term_newln(p); + break; + default: + abort(); + } +} + +static int +pre_RS(DECL_ARGS) +{ + struct roffsu su; + + switch (n->type) { + case ROFFT_BLOCK: + term_newln(p); + return 1; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + abort(); + } + + n = n->parent->head; + n->aux = SHRT_MAX + 1; + if (n->child == NULL) + n->aux = mt->lmargin[mt->lmargincur]; + else if (a2roffsu(n->child->string, &su, SCALE_EN) != NULL) + n->aux = term_hen(p, &su); + if (n->aux < 0 && (size_t)(-n->aux) > mt->offset) + n->aux = -mt->offset; + else if (n->aux > SHRT_MAX) + n->aux = term_len(p, p->defindent); + + mt->offset += n->aux; + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; + + if (++mt->lmarginsz < MAXMARGINS) + mt->lmargincur = mt->lmarginsz; + + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + return 1; +} + +static void +post_RS(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + case ROFFT_HEAD: + return; + case ROFFT_BODY: + break; + default: + abort(); + } + term_newln(p); + mt->offset -= n->parent->head->aux; + p->tcol->offset = mt->offset; + if (--mt->lmarginsz < MAXMARGINS) + mt->lmargincur = mt->lmarginsz; +} + +static int +pre_SY(DECL_ARGS) +{ + const struct roff_node *nn; + int len; + + switch (n->type) { + case ROFFT_BLOCK: + if ((nn = roff_node_prev(n)) == NULL || nn->tok != MAN_SY) + print_bvspace(p, n, mt->pardist); + return 1; + case ROFFT_HEAD: + case ROFFT_BODY: + break; + default: + abort(); + } + + nn = n->parent->head->child; + len = nn == NULL ? 1 : term_strlen(p, nn->string) + 1; + + switch (n->type) { + case ROFFT_HEAD: + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; + if (n->next->child == NULL || + (n->next->child->flags & NODE_NOFILL) == 0) + p->flags |= TERMP_NOBREAK; + term_fontrepl(p, TERMFONT_BOLD); + break; + case ROFFT_BODY: + mt->lmargin[mt->lmargincur] = len; + p->tcol->offset = mt->offset + len; + p->tcol->rmargin = p->maxrmargin; + p->flags |= TERMP_NOSPACE; + break; + default: + abort(); + } + return 1; +} + +static void +post_SY(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + break; + case ROFFT_BODY: + term_newln(p); + p->tcol->offset = mt->offset; + break; + default: + abort(); + } +} + +static int +pre_UR(DECL_ARGS) +{ + return n->type != ROFFT_HEAD; +} + +static void +post_UR(DECL_ARGS) +{ + if (n->type != ROFFT_BLOCK) + return; + + term_word(p, "<"); + p->flags |= TERMP_NOSPACE; + + if (n->child->child != NULL) + print_man_node(p, mt, n->child->child, meta); + + p->flags |= TERMP_NOSPACE; + term_word(p, ">"); +} + +static void +print_man_node(DECL_ARGS) +{ + const struct man_term_act *act; + int c; + + if (n->flags & NODE_ID) + term_tag_write(n, p->line); + + switch (n->type) { + case ROFFT_TEXT: + /* + * If we have a blank line, output a vertical space. + * If we have a space as the first character, break + * before printing the line's data. + */ + if (*n->string == '\0') { + if (p->flags & TERMP_NONEWLINE) + term_newln(p); + else + term_vspace(p); + return; + } else if (*n->string == ' ' && n->flags & NODE_LINE && + (p->flags & TERMP_NONEWLINE) == 0) + term_newln(p); + else if (n->flags & NODE_DELIMC) + p->flags |= TERMP_NOSPACE; + + term_word(p, n->string); + goto out; + case ROFFT_COMMENT: + return; + case ROFFT_EQN: + if ( ! (n->flags & NODE_LINE)) + p->flags |= TERMP_NOSPACE; + term_eqn(p, n->eqn); + if (n->next != NULL && ! (n->next->flags & NODE_LINE)) + p->flags |= TERMP_NOSPACE; + return; + case ROFFT_TBL: + if (p->tbl.cols == NULL) + term_vspace(p); + term_tbl(p, n->span); + return; + default: + break; + } + + if (n->tok < ROFF_MAX) { + roff_term_pre(p, n); + return; + } + + act = man_term_act(n->tok); + if ((act->flags & MAN_NOTEXT) == 0 && n->tok != MAN_SM) + term_fontrepl(p, TERMFONT_NONE); + + c = 1; + if (act->pre != NULL) + c = (*act->pre)(p, mt, n, meta); + + if (c && n->child != NULL) + print_man_nodelist(p, mt, n->child, meta); + + if (act->post != NULL) + (*act->post)(p, mt, n, meta); + if ((act->flags & MAN_NOTEXT) == 0 && n->tok != MAN_SM) + term_fontrepl(p, TERMFONT_NONE); + +out: + /* + * If we're in a literal context, make sure that words + * together on the same line stay together. This is a + * POST-printing call, so we check the NEXT word. Since + * -man doesn't have nested macros, we don't need to be + * more specific than this. + */ + if (n->flags & NODE_NOFILL && + ! (p->flags & (TERMP_NOBREAK | TERMP_NONEWLINE)) && + (n->next == NULL || n->next->flags & NODE_LINE)) { + p->flags |= TERMP_BRNEVER | TERMP_NOSPACE; + if (n->string != NULL && *n->string != '\0') + term_flushln(p); + else + term_newln(p); + p->flags &= ~TERMP_BRNEVER; + if (p->tcol->rmargin < p->maxrmargin && + n->parent->tok == MAN_HP) { + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + } + } + if (n->flags & NODE_EOS) + p->flags |= TERMP_SENTENCE; +} + +static void +print_man_nodelist(DECL_ARGS) +{ + while (n != NULL) { + print_man_node(p, mt, n, meta); + n = n->next; + } +} + +static void +print_man_foot(struct termp *p, const struct roff_meta *meta) +{ + char *title; + size_t datelen, titlen; + + assert(meta->title); + assert(meta->msec); + assert(meta->date); + + term_fontrepl(p, TERMFONT_NONE); + + if (meta->hasbody) + term_vspace(p); + + /* + * Temporary, undocumented option to imitate mdoc(7) output. + * In the bottom right corner, use the operating system + * instead of the title. + */ + + if ( ! p->mdocstyle) { + if (meta->hasbody) { + term_vspace(p); + term_vspace(p); + } + mandoc_asprintf(&title, "%s(%s)", + meta->title, meta->msec); + } else if (meta->os != NULL) { + title = mandoc_strdup(meta->os); + } else { + title = mandoc_strdup(""); + } + datelen = term_strlen(p, meta->date); + + /* Bottom left corner: operating system. */ + + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + p->trailspace = 1; + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin > datelen ? + (p->maxrmargin + term_len(p, 1) - datelen) / 2 : 0; + + if (meta->os) + term_word(p, meta->os); + term_flushln(p); + + /* At the bottom in the middle: manual date. */ + + p->tcol->offset = p->tcol->rmargin; + titlen = term_strlen(p, title); + p->tcol->rmargin = p->maxrmargin > titlen ? + p->maxrmargin - titlen : 0; + p->flags |= TERMP_NOSPACE; + + term_word(p, meta->date); + term_flushln(p); + + /* Bottom right corner: manual title and section. */ + + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOSPACE; + p->trailspace = 0; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + + term_word(p, title); + term_flushln(p); + + /* + * Reset the terminal state for more output after the footer: + * Some output modes, in particular PostScript and PDF, print + * the header and the footer into a buffer such that it can be + * reused for multiple output pages, then go on to format the + * main text. + */ + + p->tcol->offset = 0; + p->flags = 0; + + free(title); +} + +static void +print_man_head(struct termp *p, const struct roff_meta *meta) +{ + const char *volume; + char *title; + size_t vollen, titlen; + + assert(meta->title); + assert(meta->msec); + + volume = NULL == meta->vol ? "" : meta->vol; + vollen = term_strlen(p, volume); + + /* Top left corner: manual title and section. */ + + mandoc_asprintf(&title, "%s(%s)", meta->title, meta->msec); + titlen = term_strlen(p, title); + + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + p->trailspace = 1; + p->tcol->offset = 0; + p->tcol->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? + (p->maxrmargin - vollen + term_len(p, 1)) / 2 : + vollen < p->maxrmargin ? p->maxrmargin - vollen : 0; + + term_word(p, title); + term_flushln(p); + + /* At the top in the middle: manual volume. */ + + p->flags |= TERMP_NOSPACE; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + vollen + titlen < + p->maxrmargin ? p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, volume); + term_flushln(p); + + /* Top right corner: title and section, again. */ + + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + if (p->tcol->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOSPACE; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->flags &= ~TERMP_NOSPACE; + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin; + + /* + * Groff prints three blank lines before the content. + * Do the same, except in the temporary, undocumented + * mode imitating mdoc(7) output. + */ + + term_vspace(p); + if ( ! p->mdocstyle) { + term_vspace(p); + term_vspace(p); + } + free(title); +} diff --git a/usr.bin/mandoc/man_validate.c b/usr.bin/mandoc/man_validate.c new file mode 100644 index 0000000..49aa390 --- /dev/null +++ b/usr.bin/mandoc/man_validate.c @@ -0,0 +1,656 @@ +/* $OpenBSD: man_validate.c,v 1.124 2020/04/24 11:58:02 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Validation module for man(7) syntax trees used by mandoc(1). + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libman.h" +#include "tag.h" + +#define CHKARGS struct roff_man *man, struct roff_node *n + +typedef void (*v_check)(CHKARGS); + +static void check_abort(CHKARGS) __attribute__((__noreturn__)); +static void check_par(CHKARGS); +static void check_part(CHKARGS); +static void check_root(CHKARGS); +static void check_tag(struct roff_node *, struct roff_node *); +static void check_text(CHKARGS); + +static void post_AT(CHKARGS); +static void post_EE(CHKARGS); +static void post_EX(CHKARGS); +static void post_IP(CHKARGS); +static void post_OP(CHKARGS); +static void post_SH(CHKARGS); +static void post_TH(CHKARGS); +static void post_TP(CHKARGS); +static void post_UC(CHKARGS); +static void post_UR(CHKARGS); +static void post_in(CHKARGS); + +static const v_check man_valids[MAN_MAX - MAN_TH] = { + post_TH, /* TH */ + post_SH, /* SH */ + post_SH, /* SS */ + post_TP, /* TP */ + post_TP, /* TQ */ + check_abort,/* LP */ + check_par, /* PP */ + check_abort,/* P */ + post_IP, /* IP */ + NULL, /* HP */ + NULL, /* SM */ + NULL, /* SB */ + NULL, /* BI */ + NULL, /* IB */ + NULL, /* BR */ + NULL, /* RB */ + NULL, /* R */ + NULL, /* B */ + NULL, /* I */ + NULL, /* IR */ + NULL, /* RI */ + NULL, /* RE */ + check_part, /* RS */ + NULL, /* DT */ + post_UC, /* UC */ + NULL, /* PD */ + post_AT, /* AT */ + post_in, /* in */ + NULL, /* SY */ + NULL, /* YS */ + post_OP, /* OP */ + post_EX, /* EX */ + post_EE, /* EE */ + post_UR, /* UR */ + NULL, /* UE */ + post_UR, /* MT */ + NULL, /* ME */ +}; + + +/* Validate the subtree rooted at man->last. */ +void +man_validate(struct roff_man *man) +{ + struct roff_node *n; + const v_check *cp; + + /* + * Translate obsolete macros such that later code + * does not need to look for them. + */ + + n = man->last; + switch (n->tok) { + case MAN_LP: + case MAN_P: + n->tok = MAN_PP; + break; + default: + break; + } + + /* + * Iterate over all children, recursing into each one + * in turn, depth-first. + */ + + man->last = man->last->child; + while (man->last != NULL) { + man_validate(man); + if (man->last == n) + man->last = man->last->child; + else + man->last = man->last->next; + } + + /* Finally validate the macro itself. */ + + man->last = n; + man->next = ROFF_NEXT_SIBLING; + switch (n->type) { + case ROFFT_TEXT: + check_text(man, n); + break; + case ROFFT_ROOT: + check_root(man, n); + break; + case ROFFT_COMMENT: + case ROFFT_EQN: + case ROFFT_TBL: + break; + default: + if (n->tok < ROFF_MAX) { + roff_validate(man); + break; + } + assert(n->tok >= MAN_TH && n->tok < MAN_MAX); + cp = man_valids + (n->tok - MAN_TH); + if (*cp) + (*cp)(man, n); + if (man->last == n) + n->flags |= NODE_VALID; + break; + } +} + +static void +check_root(CHKARGS) +{ + assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); + + if (n->last == NULL || n->last->type == ROFFT_COMMENT) + mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); + else + man->meta.hasbody = 1; + + if (NULL == man->meta.title) { + mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); + + /* + * If a title hasn't been set, do so now (by + * implication, date and section also aren't set). + */ + + man->meta.title = mandoc_strdup(""); + man->meta.msec = mandoc_strdup(""); + man->meta.date = mandoc_normdate(NULL, NULL); + } + + if (man->meta.os_e && + (man->meta.rcsids & (1 << man->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, + man->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); +} + +static void +check_abort(CHKARGS) +{ + abort(); +} + +/* + * Skip leading whitespace, dashes, backslashes, and font escapes, + * then create a tag if the first following byte is a letter. + * Priority is high unless whitespace is present. + */ +static void +check_tag(struct roff_node *n, struct roff_node *nt) +{ + const char *cp, *arg; + int prio, sz; + + if (nt == NULL || nt->type != ROFFT_TEXT) + return; + + cp = nt->string; + prio = TAG_STRONG; + for (;;) { + switch (*cp) { + case ' ': + case '\t': + prio = TAG_WEAK; + /* FALLTHROUGH */ + case '-': + cp++; + break; + case '\\': + cp++; + switch (mandoc_escape(&cp, &arg, &sz)) { + case ESCAPE_FONT: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBI: + case ESCAPE_FONTROMAN: + case ESCAPE_FONTCW: + case ESCAPE_FONTPREV: + case ESCAPE_IGNORE: + break; + case ESCAPE_SPECIAL: + if (sz != 1) + return; + switch (*arg) { + case '-': + case 'e': + break; + default: + return; + } + break; + default: + return; + } + break; + default: + if (isalpha((unsigned char)*cp)) + tag_put(cp, prio, n); + return; + } + } +} + +static void +check_text(CHKARGS) +{ + char *cp, *p; + + if (n->flags & NODE_NOFILL) + return; + + cp = n->string; + for (p = cp; NULL != (p = strchr(p, '\t')); p++) + mandoc_msg(MANDOCERR_FI_TAB, + n->line, n->pos + (int)(p - cp), NULL); +} + +static void +post_EE(CHKARGS) +{ + if ((n->flags & NODE_NOFILL) == 0) + mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); +} + +static void +post_EX(CHKARGS) +{ + if (n->flags & NODE_NOFILL) + mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); +} + +static void +post_OP(CHKARGS) +{ + + if (n->child == NULL) + mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); + else if (n->child->next != NULL && n->child->next->next != NULL) { + n = n->child->next->next; + mandoc_msg(MANDOCERR_ARG_EXCESS, + n->line, n->pos, "OP ... %s", n->string); + } +} + +static void +post_SH(CHKARGS) +{ + struct roff_node *nc; + char *cp, *tag; + + nc = n->child; + switch (n->type) { + case ROFFT_HEAD: + tag = NULL; + deroff(&tag, n); + if (tag != NULL) { + for (cp = tag; *cp != '\0'; cp++) + if (*cp == ' ') + *cp = '_'; + if (nc != NULL && nc->type == ROFFT_TEXT && + strcmp(nc->string, tag) == 0) + tag_put(NULL, TAG_WEAK, n); + else + tag_put(tag, TAG_FALLBACK, n); + free(tag); + } + return; + case ROFFT_BODY: + if (nc != NULL) + break; + return; + default: + return; + } + + if (nc->tok == MAN_PP && nc->body->child != NULL) { + while (nc->body->last != NULL) { + man->next = ROFF_NEXT_CHILD; + roff_node_relink(man, nc->body->last); + man->last = n; + } + } + + if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { + mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, + "%s after %s", roff_name[nc->tok], roff_name[n->tok]); + roff_node_delete(man, nc); + } + + /* + * Trailing PP is empty, so it is deleted by check_par(). + * Trailing sp is significant. + */ + + if ((nc = n->last) != NULL && nc->tok == ROFF_br) { + mandoc_msg(MANDOCERR_PAR_SKIP, + nc->line, nc->pos, "%s at the end of %s", + roff_name[nc->tok], roff_name[n->tok]); + roff_node_delete(man, nc); + } +} + +static void +post_UR(CHKARGS) +{ + if (n->type == ROFFT_HEAD && n->child == NULL) + mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, + "%s", roff_name[n->tok]); + check_part(man, n); +} + +static void +check_part(CHKARGS) +{ + + if (n->type == ROFFT_BODY && n->child == NULL) + mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, + "%s", roff_name[n->tok]); +} + +static void +check_par(CHKARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + if (n->body->child == NULL) + roff_node_delete(man, n); + break; + case ROFFT_BODY: + if (n->child != NULL && + (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { + mandoc_msg(MANDOCERR_PAR_SKIP, + n->child->line, n->child->pos, + "%s after %s", roff_name[n->child->tok], + roff_name[n->tok]); + roff_node_delete(man, n->child); + } + if (n->child == NULL) + mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, + "%s empty", roff_name[n->tok]); + break; + case ROFFT_HEAD: + if (n->child != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, + n->line, n->pos, "%s %s%s", + roff_name[n->tok], n->child->string, + n->child->next != NULL ? " ..." : ""); + break; + default: + break; + } +} + +static void +post_IP(CHKARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + if (n->head->child == NULL && n->body->child == NULL) + roff_node_delete(man, n); + break; + case ROFFT_HEAD: + check_tag(n, n->child); + break; + case ROFFT_BODY: + if (n->parent->head->child == NULL && n->child == NULL) + mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, + "%s empty", roff_name[n->tok]); + break; + default: + break; + } +} + +/* + * The first next-line element in the head is the tag. + * If that's a font macro, use its first child instead. + */ +static void +post_TP(CHKARGS) +{ + struct roff_node *nt; + + if (n->type != ROFFT_HEAD || (nt = n->child) == NULL) + return; + + while ((nt->flags & NODE_LINE) == 0) + if ((nt = nt->next) == NULL) + return; + + switch (nt->tok) { + case MAN_B: + case MAN_BI: + case MAN_BR: + case MAN_I: + case MAN_IB: + case MAN_IR: + nt = nt->child; + break; + default: + break; + } + check_tag(n, nt); +} + +static void +post_TH(CHKARGS) +{ + struct roff_node *nb; + const char *p; + + free(man->meta.title); + free(man->meta.vol); + free(man->meta.os); + free(man->meta.msec); + free(man->meta.date); + + man->meta.title = man->meta.vol = man->meta.date = + man->meta.msec = man->meta.os = NULL; + + nb = n; + + /* ->TITLE<- MSEC DATE OS VOL */ + + n = n->child; + if (n != NULL && n->string != NULL) { + for (p = n->string; *p != '\0'; p++) { + /* Only warn about this once... */ + if (isalpha((unsigned char)*p) && + ! isupper((unsigned char)*p)) { + mandoc_msg(MANDOCERR_TITLE_CASE, n->line, + n->pos + (int)(p - n->string), + "TH %s", n->string); + break; + } + } + man->meta.title = mandoc_strdup(n->string); + } else { + man->meta.title = mandoc_strdup(""); + mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); + } + + /* TITLE ->MSEC<- DATE OS VOL */ + + if (n != NULL) + n = n->next; + if (n != NULL && n->string != NULL) { + man->meta.msec = mandoc_strdup(n->string); + if (man->filesec != '\0' && + man->filesec != *n->string && + *n->string >= '1' && *n->string <= '9') + mandoc_msg(MANDOCERR_MSEC_FILE, n->line, n->pos, + "*.%c vs TH ... %c", man->filesec, *n->string); + } else { + man->meta.msec = mandoc_strdup(""); + mandoc_msg(MANDOCERR_MSEC_MISSING, + nb->line, nb->pos, "TH %s", man->meta.title); + } + + /* TITLE MSEC ->DATE<- OS VOL */ + + if (n != NULL) + n = n->next; + if (man->quick && n != NULL) + man->meta.date = mandoc_strdup(""); + else + man->meta.date = mandoc_normdate(n, nb); + + /* TITLE MSEC DATE ->OS<- VOL */ + + if (n && (n = n->next)) + man->meta.os = mandoc_strdup(n->string); + else if (man->os_s != NULL) + man->meta.os = mandoc_strdup(man->os_s); + if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { + if (strstr(man->meta.os, "OpenBSD") != NULL) + man->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(man->meta.os, "NetBSD") != NULL) + man->meta.os_e = MANDOC_OS_NETBSD; + } + + /* TITLE MSEC DATE OS ->VOL<- */ + /* If missing, use the default VOL name for MSEC. */ + + if (n && (n = n->next)) + man->meta.vol = mandoc_strdup(n->string); + else if ('\0' != man->meta.msec[0] && + (NULL != (p = mandoc_a2msec(man->meta.msec)))) + man->meta.vol = mandoc_strdup(p); + + if (n != NULL && (n = n->next) != NULL) + mandoc_msg(MANDOCERR_ARG_EXCESS, + n->line, n->pos, "TH ... %s", n->string); + + /* + * Remove the `TH' node after we've processed it for our + * meta-data. + */ + roff_node_delete(man, man->last); +} + +static void +post_UC(CHKARGS) +{ + static const char * const bsd_versions[] = { + "3rd Berkeley Distribution", + "4th Berkeley Distribution", + "4.2 Berkeley Distribution", + "4.3 Berkeley Distribution", + "4.4 Berkeley Distribution", + }; + + const char *p, *s; + + n = n->child; + + if (n == NULL || n->type != ROFFT_TEXT) + p = bsd_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = bsd_versions[0]; + else if (0 == strcmp(s, "4")) + p = bsd_versions[1]; + else if (0 == strcmp(s, "5")) + p = bsd_versions[2]; + else if (0 == strcmp(s, "6")) + p = bsd_versions[3]; + else if (0 == strcmp(s, "7")) + p = bsd_versions[4]; + else + p = bsd_versions[0]; + } + + free(man->meta.os); + man->meta.os = mandoc_strdup(p); +} + +static void +post_AT(CHKARGS) +{ + static const char * const unix_versions[] = { + "7th Edition", + "System III", + "System V", + "System V Release 2", + }; + + struct roff_node *nn; + const char *p, *s; + + n = n->child; + + if (n == NULL || n->type != ROFFT_TEXT) + p = unix_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = unix_versions[0]; + else if (0 == strcmp(s, "4")) + p = unix_versions[1]; + else if (0 == strcmp(s, "5")) { + nn = n->next; + if (nn != NULL && + nn->type == ROFFT_TEXT && + nn->string[0] != '\0') + p = unix_versions[3]; + else + p = unix_versions[2]; + } else + p = unix_versions[0]; + } + + free(man->meta.os); + man->meta.os = mandoc_strdup(p); +} + +static void +post_in(CHKARGS) +{ + char *s; + + if (n->parent->tok != MAN_TP || + n->parent->type != ROFFT_HEAD || + n->child == NULL || + *n->child->string == '+' || + *n->child->string == '-') + return; + mandoc_asprintf(&s, "+%s", n->child->string); + free(n->child->string); + n->child->string = s; +} diff --git a/usr.bin/mandoc/manconf.h b/usr.bin/mandoc/manconf.h new file mode 100644 index 0000000..4cc623f --- /dev/null +++ b/usr.bin/mandoc/manconf.h @@ -0,0 +1,56 @@ +/* $OpenBSD: manconf.h,v 1.8 2020/04/02 22:10:27 schwarze Exp $ */ +/* + * Copyright (c) 2011,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Public interface to man(1) configuration management. + * For use by the main program and by the formatters. + */ + +/* List of unique, absolute paths to manual trees. */ + +struct manpaths { + char **paths; + size_t sz; +}; + +/* Data from -O options and man.conf(5) output directives. */ + +struct manoutput { + char *includes; + char *man; + char *paper; + char *style; + char *tag; + size_t indent; + size_t width; + int fragment; + int mdoc; + int noval; + int synopsisonly; + int tag_found; + int toc; +}; + +struct manconf { + struct manoutput output; + struct manpaths manpath; +}; + + +void manconf_parse(struct manconf *, const char *, char *, char *); +int manconf_output(struct manoutput *, const char *, int); +void manconf_free(struct manconf *); +void manpath_base(struct manpaths *); diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1 new file mode 100644 index 0000000..a5ea382 --- /dev/null +++ b/usr.bin/mandoc/mandoc.1 @@ -0,0 +1,2336 @@ +.\" $OpenBSD: mandoc.1,v 1.167 2020/04/24 11:58:02 schwarze Exp $ +.\" +.\" Copyright (c) 2012, 2014-2020 Ingo Schwarze <schwarze@openbsd.org> +.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: April 24 2020 $ +.Dt MANDOC 1 +.Os +.Sh NAME +.Nm mandoc +.Nd format manual pages +.Sh SYNOPSIS +.Nm mandoc +.Op Fl ac +.Op Fl I Cm os Ns = Ns Ar name +.Op Fl K Ar encoding +.Op Fl mdoc | man +.Op Fl O Ar options +.Op Fl T Ar output +.Op Fl W Ar level +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility formats manual pages for display. +.Pp +By default, +.Nm +reads +.Xr mdoc 7 +or +.Xr man 7 +text from stdin and produces +.Fl T Cm locale +output. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +If the standard output is a terminal device and +.Fl c +is not specified, use +.Xr more 1 +to paginate the output, just like +.Xr man 1 +would. +.It Fl c +Copy the formatted manual pages to the standard output without using +.Xr more 1 +to paginate them. +This is the default. +It can be specified to override +.Fl a . +.It Fl I Cm os Ns = Ns Ar name +Override the default operating system +.Ar name +for the +.Xr mdoc 7 +.Ic \&Os +and for the +.Xr man 7 +.Ic \&TH +macro. +.It Fl K Ar encoding +Specify the input encoding. +The supported +.Ar encoding +arguments are +.Cm us-ascii , +.Cm iso-8859-1 , +and +.Cm utf-8 . +If not specified, autodetection uses the first match in the following +list: +.Bl -enum +.It +If the first three bytes of the input file are the UTF-8 byte order +mark (BOM, 0xefbbbf), input is interpreted as +.Cm utf-8 . +.It +If the first or second line of the input file matches the +.Sy emacs +mode line format +.Pp +.D1 .\e" -*- Oo ...; Oc coding: Ar encoding ; No -*- +.Pp +then input is interpreted according to +.Ar encoding . +.It +If the first non-ASCII byte in the file introduces a valid UTF-8 +sequence, input is interpreted as +.Cm utf-8 . +.It +Otherwise, input is interpreted as +.Cm iso-8859-1 . +.El +.It Fl mdoc | man +With +.Fl mdoc , +all input files are interpreted as +.Xr mdoc 7 . +With +.Fl man , +all input files are interpreted as +.Xr man 7 . +By default, the input language is automatically detected for each file: +if the first macro is +.Ic \&Dd +or +.Ic \&Dt , +the +.Xr mdoc 7 +parser is used; otherwise, the +.Xr man 7 +parser is used. +With other arguments, +.Fl m +is silently ignored. +.It Fl O Ar options +Comma-separated output options. +See the descriptions of the individual output formats for supported +.Ar options . +.It Fl T Ar output +Select the output format. +Supported values for the +.Ar output +argument are +.Cm ascii , +.Cm html , +the default of +.Cm locale , +.Cm man , +.Cm markdown , +.Cm pdf , +.Cm ps , +.Cm tree , +and +.Cm utf8 . +.Pp +The special +.Fl T Cm lint +mode only parses the input and produces no output. +It implies +.Fl W Cm all +and redirects parser messages, which usually appear on standard +error output, to standard output. +.It Fl W Ar level +Specify the minimum message +.Ar level +to be reported on the standard error output and to affect the exit status. +The +.Ar level +can be +.Cm base , +.Cm style , +.Cm warning , +.Cm error , +or +.Cm unsupp . +The +.Cm base +level automatically derives the operating system from the contents of the +.Ic \&Os +macro, from the +.Fl Ios +command line option, or from the +.Xr uname 3 +return value. +The levels +.Cm openbsd +and +.Cm netbsd +are variants of +.Cm base +that bypass autodetection and request validation of base system +conventions for a particular operating system. +The level +.Cm all +is an alias for +.Cm base . +By default, +.Nm +is silent. +See +.Sx EXIT STATUS +and +.Sx DIAGNOSTICS +for details. +.Pp +The special option +.Fl W Cm stop +tells +.Nm +to exit after parsing a file that causes warnings or errors of at least +the requested level. +No formatted output will be produced from that file. +If both a +.Ar level +and +.Cm stop +are requested, they can be joined with a comma, for example +.Fl W Cm error , Ns Cm stop . +.It Ar file +Read from the given input file. +If multiple files are specified, they are processed in the given order. +If unspecified, +.Nm +reads from standard input. +.El +.Pp +The options +.Fl fhklw +are also supported and are documented in +.Xr man 1 . +In +.Fl f +and +.Fl k +mode, +.Nm +also supports the options +.Fl CMmOSs +described in the +.Xr apropos 1 +manual. +The options +.Fl fkl +are mutually exclusive and override each other. +.Ss ASCII Output +Use +.Fl T Cm ascii +to force text output in 7-bit ASCII character encoding documented in the +.Xr ascii 7 +manual page, ignoring the +.Xr locale 1 +set in the environment. +.Pp +Font styles are applied by using back-spaced encoding such that an +underlined character +.Sq c +is rendered as +.Sq _ Ns \e[bs] Ns c , +where +.Sq \e[bs] +is the back-space character number 8. +Emboldened characters are rendered as +.Sq c Ns \e[bs] Ns c . +This markup is typically converted to appropriate terminal sequences by +the pager or +.Xr ul 1 . +To remove the markup, pipe the output to +.Xr col 1 +.Fl b +instead. +.Pp +The special characters documented in +.Xr mandoc_char 7 +are rendered best-effort in an ASCII equivalent. +In particular, opening and closing +.Sq single quotes +are represented as characters number 0x60 and 0x27, respectively, +which agrees with all ASCII standards from 1965 to the latest +revision (2012) and which matches the traditional way in which +.Xr roff 7 +formatters represent single quotes in ASCII output. +This correct ASCII rendering may look strange with modern +Unicode-compatible fonts because contrary to ASCII, Unicode uses +the code point U+0060 for the grave accent only, never for an opening +quote. +.Pp +The following +.Fl O +arguments are accepted: +.Bl -tag -width Ds +.It Cm indent Ns = Ns Ar indent +The left margin for normal text is set to +.Ar indent +blank characters instead of the default of five for +.Xr mdoc 7 +and seven for +.Xr man 7 . +Increasing this is not recommended; it may result in degraded formatting, +for example overfull lines or ugly line breaks. +When output is to a pager on a terminal that is less than 66 columns +wide, the default is reduced to three columns. +.It Cm mdoc +Format +.Xr man 7 +input files in +.Xr mdoc 7 +output style. +Specifically, this suppresses the two additional blank lines near the +top and the bottom of each page, and it implies +.Fl O Cm indent Ns =5 . +One useful application is for checking that +.Fl T Cm man +output formats in the same way as the +.Xr mdoc 7 +source it was generated from. +.It Cm tag Ns Op = Ns Ar term +If the formatted manual page is opened in a pager, +go to the definition of the +.Ar term +rather than showing the manual page from the beginning. +If no +.Ar term +is specified, reuse the first command line argument that is not a +.Ar section +number. +If that argument is in +.Xr apropos 1 +.Ar key Ns = Ns Ar val +format, only the +.Ar val +is used rather than the argument as a whole. +This is useful for commands like +.Ql man -akO tag Ic=ulimit +to search for a keyword and jump right to its definition +in the matching manual pages. +.It Cm width Ns = Ns Ar width +The output width is set to +.Ar width +instead of the default of 78. +When output is to a pager on a terminal that is less than 79 columns +wide, the default is reduced to one less than the terminal width. +In any case, lines that are output in literal mode are never wrapped +and may exceed the output width. +.El +.Ss HTML Output +Output produced by +.Fl T Cm html +conforms to HTML5 using optional self-closing tags. +Default styles use only CSS1. +Equations rendered from +.Xr eqn 7 +blocks use MathML. +.Pp +The file +.Pa /usr/share/misc/mandoc.css +documents style-sheet classes available for customising output. +If a style-sheet is not specified with +.Fl O Cm style , +.Fl T Cm html +defaults to simple output (via an embedded style-sheet) +readable in any graphical or text-based web +browser. +.Pp +Non-ASCII characters are rendered +as hexadecimal Unicode character references. +.Pp +The following +.Fl O +arguments are accepted: +.Bl -tag -width Ds +.It Cm fragment +Omit the <!DOCTYPE> declaration and the <html>, <head>, and <body> +elements and only emit the subtree below the <body> element. +The +.Cm style +argument will be ignored. +This is useful when embedding manual content within existing documents. +.It Cm includes Ns = Ns Ar fmt +The string +.Ar fmt , +for example, +.Ar ../src/%I.html , +is used as a template for linked header files (usually via the +.Ic \&In +macro). +Instances of +.Sq \&%I +are replaced with the include filename. +The default is not to present a +hyperlink. +.It Cm man Ns = Ns Ar fmt Ns Op ; Ns Ar fmt +The string +.Ar fmt , +for example, +.Ar ../html%S/%N.%S.html , +is used as a template for linked manuals (usually via the +.Ic \&Xr +macro). +Instances of +.Sq \&%N +and +.Sq %S +are replaced with the linked manual's name and section, respectively. +If no section is included, section 1 is assumed. +The default is not to +present a hyperlink. +If two formats are given and a file +.Ar %N.%S +exists in the current directory, the first format is used; +otherwise, the second format is used. +.It Cm style Ns = Ns Ar style.css +The file +.Ar style.css +is used for an external style-sheet. +This must be a valid absolute or +relative URI. +.It Cm toc +If an input file contains at least two non-standard sections, +print a table of contents near the beginning of the output. +.El +.Ss Locale Output +By default, +.Nm +automatically selects UTF-8 or ASCII output according to the current +.Xr locale 1 . +If any of the environment variables +.Ev LC_ALL , +.Ev LC_CTYPE , +or +.Ev LANG +are set and the first one that is set +selects the UTF-8 character encoding, it produces +.Sx UTF-8 Output ; +otherwise, it falls back to +.Sx ASCII Output . +This output mode can also be selected explicitly with +.Fl T Cm locale . +.Ss Man Output +Use +.Fl T Cm man +to translate +.Xr mdoc 7 +input into +.Xr man 7 +output format. +This is useful for distributing manual sources to legacy systems +lacking +.Xr mdoc 7 +formatters. +Embedded +.Xr eqn 7 +and +.Xr tbl 7 +code is not supported. +.Pp +If the input format of a file is +.Xr man 7 , +the input is copied to the output, expanding any +.Xr roff 7 +.Ic so +requests. +The parser is also run, and as usual, the +.Fl W +level controls which +.Sx DIAGNOSTICS +are displayed before copying the input to the output. +.Ss Markdown Output +Use +.Fl T Cm markdown +to translate +.Xr mdoc 7 +input to the markdown format conforming to +.Lk http://daringfireball.net/projects/markdown/syntax.text\ + "John Gruber's 2004 specification" . +The output also almost conforms to the +.Lk http://commonmark.org/ CommonMark +specification. +.Pp +The character set used for the markdown output is ASCII. +Non-ASCII characters are encoded as HTML entities. +Since that is not possible in literal font contexts, because these +are rendered as code spans and code blocks in the markdown output, +non-ASCII characters are transliterated to ASCII approximations in +these contexts. +.Pp +Markdown is a very weak markup language, so all semantic markup is +lost, and even part of the presentational markup may be lost. +Do not use this as an intermediate step in converting to HTML; +instead, use +.Fl T Cm html +directly. +.Pp +The +.Xr man 7 , +.Xr tbl 7 , +and +.Xr eqn 7 +input languages are not supported by +.Fl T Cm markdown +output mode. +.Ss PDF Output +PDF-1.1 output may be generated by +.Fl T Cm pdf . +See +.Sx PostScript Output +for +.Fl O +arguments and defaults. +.Ss PostScript Output +PostScript +.Qq Adobe-3.0 +Level-2 pages may be generated by +.Fl T Cm ps . +Output pages default to letter sized and are rendered in the Times font +family, 11-point. +Margins are calculated as 1/9 the page length and width. +Line-height is 1.4m. +.Pp +Special characters are rendered as in +.Sx ASCII Output . +.Pp +The following +.Fl O +arguments are accepted: +.Bl -tag -width Ds +.It Cm paper Ns = Ns Ar name +The paper size +.Ar name +may be one of +.Ar a3 , +.Ar a4 , +.Ar a5 , +.Ar legal , +or +.Ar letter . +You may also manually specify dimensions as +.Ar NNxNN , +width by height in millimetres. +If an unknown value is encountered, +.Ar letter +is used. +.El +.Ss UTF-8 Output +Use +.Fl T Cm utf8 +to force text output in UTF-8 multi-byte character encoding, +ignoring the +.Xr locale 1 +settings in the environment. +See +.Sx ASCII Output +regarding font styles and +.Fl O +arguments. +.Pp +On operating systems lacking locale or wide character support, and +on those where the internal character representation is not UCS-4, +.Nm +always falls back to +.Sx ASCII Output . +.Ss Syntax tree output +Use +.Fl T Cm tree +to show a human readable representation of the syntax tree. +It is useful for debugging the source code of manual pages. +The exact format is subject to change, so don't write parsers for it. +.Pp +The first paragraph shows meta data found in the +.Xr mdoc 7 +prologue, on the +.Xr man 7 +.Ic \&TH +line, or the fallbacks used. +.Pp +In the tree dump, each output line shows one syntax tree node. +Child nodes are indented with respect to their parent node. +The columns are: +.Pp +.Bl -enum -compact +.It +For macro nodes, the macro name; for text and +.Xr tbl 7 +nodes, the content. +There is a special format for +.Xr eqn 7 +nodes. +.It +Node type (text, elem, block, head, body, body-end, tail, tbl, eqn). +.It +Flags: +.Bl -dash -compact +.It +An opening parenthesis if the node is an opening delimiter. +.It +An asterisk if the node starts a new input line. +.It +The input line number (starting at one). +.It +A colon. +.It +The input column number (starting at one). +.It +A closing parenthesis if the node is a closing delimiter. +.It +A full stop if the node ends a sentence. +.It +BROKEN if the node is a block broken by another block. +.It +NOSRC if the node is not in the input file, +but automatically generated from macros. +.It +NOPRT if the node is not supposed to generate output +for any output format. +.El +.El +.Pp +The following +.Fl O +argument is accepted: +.Bl -tag -width Ds +.It Cm noval +Skip validation and show the unvalidated syntax tree. +This can help to find out whether a given behaviour is caused by +the parser or by the validator. +Meta data is not available in this case. +.El +.Sh ENVIRONMENT +.Bl -tag -width MANPAGER +.It Ev LC_CTYPE +The character encoding +.Xr locale 1 . +When +.Sx Locale Output +is selected, it decides whether to use ASCII or UTF-8 output format. +It never affects the interpretation of input files. +.It Ev MANPAGER +Any non-empty value of the environment variable +.Ev MANPAGER +is used instead of the standard pagination program, +.Xr more 1 ; +see +.Xr man 1 +for details. +Only used if +.Fl a +or +.Fl l +is specified. +.It Ev PAGER +Specifies the pagination program to use when +.Ev MANPAGER +is not defined. +If neither PAGER nor MANPAGER is defined, +.Xr more 1 +.Fl s +is used. +Only used if +.Fl a +or +.Fl l +is specified. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values, controlled by the message +.Ar level +associated with the +.Fl W +option: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No base system convention violations, style suggestions, warnings, +or errors occurred, or those that did were ignored because they +were lower than the requested +.Ar level . +.It 1 +At least one base system convention violation or style suggestion +occurred, but no warning or error, and +.Fl W Cm base +or +.Fl W Cm style +was specified. +.It 2 +At least one warning occurred, but no error, and +.Fl W Cm warning +or a lower +.Ar level +was requested. +.It 3 +At least one parsing error occurred, +but no unsupported feature was encountered, and +.Fl W Cm error +or a lower +.Ar level +was requested. +.It 4 +At least one unsupported feature was encountered, and +.Fl W Cm unsupp +or a lower +.Ar level +was requested. +.It 5 +Invalid command line arguments were specified. +No input files have been read. +.It 6 +An operating system error occurred, for example exhaustion +of memory, file descriptors, or process table entries. +Such errors may cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +.El +.Pp +Note that selecting +.Fl T Cm lint +output mode implies +.Fl W Cm all . +.Sh EXAMPLES +To page manuals to the terminal: +.Pp +.Dl $ mandoc -l mandoc.1 man.1 apropos.1 makewhatis.8 +.Pp +To produce HTML manuals with +.Pa /usr/share/misc/mandoc.css +as the style-sheet: +.Pp +.Dl $ mandoc \-T html -O style=/usr/share/misc/mandoc.css mdoc.7 > mdoc.7.html +.Pp +To check over a large set of manuals: +.Pp +.Dl $ mandoc \-T lint \(gafind /usr/src -name \e*\e.[1-9]\(ga +.Pp +To produce a series of PostScript manuals for A4 paper: +.Pp +.Dl $ mandoc \-T ps \-O paper=a4 mdoc.7 man.7 > manuals.ps +.Pp +Convert a modern +.Xr mdoc 7 +manual to the older +.Xr man 7 +format, for use on systems lacking an +.Xr mdoc 7 +parser: +.Pp +.Dl $ mandoc \-T man foo.mdoc > foo.man +.Sh DIAGNOSTICS +Messages displayed by +.Nm +follow this format: +.Bd -ragged -offset indent +.Nm : +.Ar file : Ns Ar line : Ns Ar column : level : message : macro arguments +.Pq Ar os +.Ed +.Pp +The first three fields identify the +.Ar file +name, +.Ar line +number, and +.Ar column +number of the input file where the message was triggered. +The line and column numbers start at 1. +Both are omitted for messages referring to an input file as a whole. +All +.Ar level +and +.Ar message +strings are explained below. +The name of the +.Ar macro +triggering the message and its +.Ar arguments +are omitted where meaningless. +The +.Ar os +operating system specifier is omitted for messages that are relevant +for all operating systems. +Fatal messages about invalid command line arguments +or operating system errors, for example when memory is exhausted, +may also omit the +.Ar file +and +.Ar level +fields. +.Pp +Message levels have the following meanings: +.Bl -tag -width "warning" +.It Cm syserr +An operating system error occurred. +There isn't necessarily anything wrong with the input files. +Output may all the same be missing or incomplete. +.It Cm badarg +Invalid command line arguments were specified. +No input files have been read and no output is produced. +.It Cm unsupp +An input file uses unsupported low-level +.Xr roff 7 +features. +The output may be incomplete and/or misformatted, +so using GNU troff instead of +.Nm +to process the file may be preferable. +.It Cm error +Indicates a risk of information loss or severe misformatting, +in most cases caused by serious syntax errors. +.It Cm warning +Indicates a risk that the information shown or its formatting +may mismatch the author's intent in minor ways. +Additionally, syntax errors are classified at least as warnings, +even if they do not usually cause misformatting. +.It Cm style +An input file uses dubious or discouraged style. +This is not a complaint about the syntax, and probably neither +formatting nor portability are in danger. +While great care is taken to avoid false positives on the higher +message levels, the +.Cm style +level tries to reduce the probability that issues go unnoticed, +so it may occasionally issue bogus suggestions. +Please use your good judgement to decide whether any particular +.Cm style +suggestion really justifies a change to the input file. +.It Cm base +A convention used in the base system of a specific operating system +is not adhered to. +These are not markup mistakes, and neither the quality of formatting +nor portability are in danger. +Messages of the +.Cm base +level are printed with the more intuitive +.Cm style +.Ar level +tag. +.El +.Pp +Messages of the +.Cm base , +.Cm style , +.Cm warning , +.Cm error , +and +.Cm unsupp +levels are hidden unless their level, or a lower level, is requested using a +.Fl W +option or +.Fl T Cm lint +output mode. +.Pp +As indicated below, all +.Cm base +and some +.Cm style +checks are only performed if a specific operating system name occurs +in the arguments of the +.Fl W +command line option, of the +.Ic \&Os +macro, of the +.Fl Ios +command line option, or, if neither are present, in the return value +of the +.Xr uname 3 +function. +.Ss Conventions for base system manuals +.Bl -ohang +.It Sy "Mdocdate found" +.Pq mdoc , Nx +The +.Ic \&Dd +macro uses CVS +.Ic Mdocdate +keyword substitution, which is not supported by the +.Nx +base system. +Consider using the conventional +.Dq "Month dd, yyyy" +format instead. +.It Sy "Mdocdate missing" +.Pq mdoc , Ox +The +.Ic \&Dd +macro does not use CVS +.Ic Mdocdate +keyword substitution, but using it is conventionally expected in the +.Ox +base system. +.It Sy "unknown architecture" +.Pq mdoc , Ox , Nx +The third argument of the +.Ic \&Dt +macro does not match any of the architectures this operating system +is running on. +.It Sy "operating system explicitly specified" +.Pq mdoc , Ox , Nx +The +.Ic \&Os +macro has an argument. +In the base system, it is conventionally left blank. +.It Sy "RCS id missing" +.Pq Ox , Nx +The manual page lacks the comment line with the RCS identifier +generated by CVS +.Ic OpenBSD +or +.Ic NetBSD +keyword substitution as conventionally used in these operating systems. +.It Sy "referenced manual not found" +.Pq mdoc +An +.Ic \&Xr +macro references a manual page that is not found in the base system. +The path to look for base system manuals is configurable at compile +time and defaults to +.Pa /usr/share/man : /usr/X11R6/man . +.El +.Ss Style suggestions +.Bl -ohang +.It Sy "legacy man(7) date format" +.Pq mdoc +The +.Ic \&Dd +macro uses the legacy +.Xr man 7 +date format +.Dq yyyy-dd-mm . +Consider using the conventional +.Xr mdoc 7 +date format +.Dq "Month dd, yyyy" +instead. +.It Sy "normalizing date format to" : No ... +.Pq mdoc , man +The +.Ic \&Dd +or +.Ic \&TH +macro provides an abbreviated month name or a day number with a +leading zero. +In the formatted output, the month name is written out in full +and the leading zero is omitted. +.It Sy "lower case character in document title" +.Pq mdoc , man +The title is still used as given in the +.Ic \&Dt +or +.Ic \&TH +macro. +.It Sy "duplicate RCS id" +A single manual page contains two copies of the RCS identifier for +the same operating system. +Consider deleting the later instance and moving the first one up +to the top of the page. +.It Sy "possible typo in section name" +.Pq mdoc +Fuzzy string matching revealed that the argument of an +.Ic \&Sh +macro is similar, but not identical to a standard section name. +.It Sy "unterminated quoted argument" +.Pq roff +Macro arguments can be enclosed in double quote characters +such that space characters and macro names contained in the quoted +argument need not be escaped. +The closing quote of the last argument of a macro can be omitted. +However, omitting it is not recommended because it makes the code +harder to read. +.It Sy "useless macro" +.Pq mdoc +A +.Ic \&Bt , +.Ic \&Tn , +or +.Ic \&Ud +macro was found. +Simply delete it: it serves no useful purpose. +.It Sy "consider using OS macro" +.Pq mdoc +A string was found in plain text or in a +.Ic \&Bx +macro that could be represented using +.Ic \&Ox , +.Ic \&Nx , +.Ic \&Fx , +or +.Ic \&Dx . +.It Sy "errnos out of order" +.Pq mdoc, Nx +The +.Ic \&Er +items in a +.Ic \&Bl +list are not in alphabetical order. +.It Sy "duplicate errno" +.Pq mdoc, Nx +A +.Ic \&Bl +list contains two consecutive +.Ic \&It +entries describing the same +.Ic \&Er +number. +.It Sy "trailing delimiter" +.Pq mdoc +The last argument of an +.Ic \&Ex , \&Fo , \&Nd , \&Nm , \&Os , \&Sh , \&Ss , \&St , +or +.Ic \&Sx +macro ends with a trailing delimiter. +This is usually bad style and often indicates typos. +Most likely, the delimiter can be removed. +.It Sy "no blank before trailing delimiter" +.Pq mdoc +The last argument of a macro that supports trailing delimiter +arguments is longer than one byte and ends with a trailing delimiter. +Consider inserting a blank such that the delimiter becomes a separate +argument, thus moving it out of the scope of the macro. +.It Sy "fill mode already enabled, skipping" +.Pq man +A +.Ic \&fi +request occurs even though the document is still in fill mode, +or already switched back to fill mode. +It has no effect. +.It Sy "fill mode already disabled, skipping" +.Pq man +An +.Ic \&nf +request occurs even though the document already switched to no-fill mode +and did not switch back to fill mode yet. +It has no effect. +.It Sy "verbatim \(dq--\(dq, maybe consider using \e(em" +.Pq mdoc +Even though the ASCII output device renders an em-dash as +.Qq \-\- , +that is not a good way to write it in an input file +because it renders poorly on all other output devices. +.It Sy "function name without markup" +.Pq mdoc +A word followed by an empty pair of parentheses occurs on a text line. +Consider using an +.Ic \&Fn +or +.Ic \&Xr +macro. +.It Sy "whitespace at end of input line" +.Pq mdoc , man , roff +Whitespace at the end of input lines is almost never semantically +significant \(em but in the odd case where it might be, it is +extremely confusing when reviewing and maintaining documents. +.It Sy "bad comment style" +.Pq roff +Comment lines start with a dot, a backslash, and a double-quote character. +The +.Nm +utility treats the line as a comment line even without the backslash, +but leaving out the backslash might not be portable. +.El +.Ss Warnings related to the document prologue +.Bl -ohang +.It Sy "missing manual title, using UNTITLED" +.Pq mdoc +A +.Ic \&Dt +macro has no arguments, or there is no +.Ic \&Dt +macro before the first non-prologue macro. +.It Sy "missing manual title, using \(dq\(dq" +.Pq man +There is no +.Ic \&TH +macro, or it has no arguments. +.It Sy "missing manual section, using \(dq\(dq" +.Pq mdoc , man +A +.Ic \&Dt +or +.Ic \&TH +macro lacks the mandatory section argument. +.It Sy "unknown manual section" +.Pq mdoc +The section number in a +.Ic \&Dt +line is invalid, but still used. +.It Sy "filename/section mismatch" +.Pq mdoc , man +The name of the input file being processed is known and its file +name extension starts with a non-zero digit, but the +.Ic \&Dt +or +.Ic \&TH +macro contains a +.Ar section +argument that starts with a different non-zero digit. +The +.Ar section +argument is used as provided anyway. +Consider checking whether the file name or the argument need a correction. +.It Sy "missing date, using \(dq\(dq" +.Pq mdoc, man +The document was parsed as +.Xr mdoc 7 +and it has no +.Ic \&Dd +macro, or the +.Ic \&Dd +macro has no arguments or only empty arguments; +or the document was parsed as +.Xr man 7 +and it has no +.Ic \&TH +macro, or the +.Ic \&TH +macro has less than three arguments or its third argument is empty. +.It Sy "cannot parse date, using it verbatim" +.Pq mdoc , man +The date given in a +.Ic \&Dd +or +.Ic \&TH +macro does not follow the conventional format. +.It Sy "date in the future, using it anyway" +.Pq mdoc , man +The date given in a +.Ic \&Dd +or +.Ic \&TH +macro is more than a day ahead of the current system +.Xr time 3 . +.It Sy "missing Os macro, using \(dq\(dq" +.Pq mdoc +The default or current system is not shown in this case. +.It Sy "late prologue macro" +.Pq mdoc +A +.Ic \&Dd +or +.Ic \&Os +macro occurs after some non-prologue macro, but still takes effect. +.It Sy "prologue macros out of order" +.Pq mdoc +The prologue macros are not given in the conventional order +.Ic \&Dd , +.Ic \&Dt , +.Ic \&Os . +All three macros are used even when given in another order. +.El +.Ss Warnings regarding document structure +.Bl -ohang +.It Sy ".so is fragile, better use ln(1)" +.Pq roff +Including files only works when the parser program runs with the correct +current working directory. +.It Sy "no document body" +.Pq mdoc , man +The document body contains neither text nor macros. +An empty document is shown, consisting only of a header and a footer line. +.It Sy "content before first section header" +.Pq mdoc , man +Some macros or text precede the first +.Ic \&Sh +or +.Ic \&SH +section header. +The offending macros and text are parsed and added to the top level +of the syntax tree, outside any section block. +.It Sy "first section is not NAME" +.Pq mdoc +The argument of the first +.Ic \&Sh +macro is not +.Sq NAME . +This may confuse +.Xr makewhatis 8 +and +.Xr apropos 1 . +.It Sy "NAME section without Nm before Nd" +.Pq mdoc +The NAME section does not contain any +.Ic \&Nm +child macro before the first +.Ic \&Nd +macro. +.It Sy "NAME section without description" +.Pq mdoc +The NAME section lacks the mandatory +.Ic \&Nd +child macro. +.It Sy "description not at the end of NAME" +.Pq mdoc +The NAME section does contain an +.Ic \&Nd +child macro, but other content follows it. +.It Sy "bad NAME section content" +.Pq mdoc +The NAME section contains plain text or macros other than +.Ic \&Nm +and +.Ic \&Nd . +.It Sy "missing comma before name" +.Pq mdoc +The NAME section contains an +.Ic \&Nm +macro that is neither the first one nor preceded by a comma. +.It Sy "missing description line, using \(dq\(dq" +.Pq mdoc +The +.Ic \&Nd +macro lacks the required argument. +The title line of the manual will end after the dash. +.It Sy "description line outside NAME section" +.Pq mdoc +An +.Ic \&Nd +macro appears outside the NAME section. +The arguments are printed anyway and the following text is used for +.Xr apropos 1 , +but none of that behaviour is portable. +.It Sy "sections out of conventional order" +.Pq mdoc +A standard section occurs after another section it usually precedes. +All section titles are used as given, +and the order of sections is not changed. +.It Sy "duplicate section title" +.Pq mdoc +The same standard section title occurs more than once. +.It Sy "unexpected section" +.Pq mdoc +A standard section header occurs in a section of the manual +where it normally isn't useful. +.It Sy "cross reference to self" +.Pq mdoc +An +.Ic \&Xr +macro refers to a name and section matching the section of the present +manual page and a name mentioned in an +.Ic \&Nm +macro in the NAME or SYNOPSIS section, or in an +.Ic \&Fn +or +.Ic \&Fo +macro in the SYNOPSIS. +Consider using +.Ic \&Nm +or +.Ic \&Fn +instead of +.Ic \&Xr . +.It Sy "unusual Xr order" +.Pq mdoc +In the SEE ALSO section, an +.Ic \&Xr +macro with a lower section number follows one with a higher number, +or two +.Ic \&Xr +macros referring to the same section are out of alphabetical order. +.It Sy "unusual Xr punctuation" +.Pq mdoc +In the SEE ALSO section, punctuation between two +.Ic \&Xr +macros differs from a single comma, or there is trailing punctuation +after the last +.Ic \&Xr +macro. +.It Sy "AUTHORS section without An macro" +.Pq mdoc +An AUTHORS sections contains no +.Ic \&An +macros, or only empty ones. +Probably, there are author names lacking markup. +.El +.Ss "Warnings related to macros and nesting" +.Bl -ohang +.It Sy "obsolete macro" +.Pq mdoc +See the +.Xr mdoc 7 +manual for replacements. +.It Sy "macro neither callable nor escaped" +.Pq mdoc +The name of a macro that is not callable appears on a macro line. +It is printed verbatim. +If the intention is to call it, move it to its own input line; +otherwise, escape it by prepending +.Sq \e& . +.It Sy "skipping paragraph macro" +In +.Xr mdoc 7 +documents, this happens +.Bl -dash -compact +.It +at the beginning and end of sections and subsections +.It +right before non-compact lists and displays +.It +at the end of items in non-column, non-compact lists +.It +and for multiple consecutive paragraph macros. +.El +In +.Xr man 7 +documents, it happens +.Bl -dash -compact +.It +for empty +.Ic \&P , +.Ic \&PP , +and +.Ic \&LP +macros +.It +for +.Ic \&IP +macros having neither head nor body arguments +.It +for +.Ic \&br +or +.Ic \&sp +right after +.Ic \&SH +or +.Ic \&SS +.El +.It Sy "moving paragraph macro out of list" +.Pq mdoc +A list item in a +.Ic \&Bl +list contains a trailing paragraph macro. +The paragraph macro is moved after the end of the list. +.It Sy "skipping no-space macro" +.Pq mdoc +An input line begins with an +.Ic \&Ns +macro, or the next argument after an +.Ic \&Ns +macro is an isolated closing delimiter. +The macro is ignored. +.It Sy "blocks badly nested" +.Pq mdoc +If two blocks intersect, one should completely contain the other. +Otherwise, rendered output is likely to look strange in any output +format, and rendering in SGML-based output formats is likely to be +outright wrong because such languages do not support badly nested +blocks at all. +Typical examples of badly nested blocks are +.Qq Ic \&Ao \&Bo \&Ac \&Bc +and +.Qq Ic \&Ao \&Bq \&Ac . +In these examples, +.Ic \&Ac +breaks +.Ic \&Bo +and +.Ic \&Bq , +respectively. +.It Sy "nested displays are not portable" +.Pq mdoc +A +.Ic \&Bd , +.Ic \&D1 , +or +.Ic \&Dl +display occurs nested inside another +.Ic \&Bd +display. +This works with +.Nm , +but fails with most other implementations. +.It Sy "moving content out of list" +.Pq mdoc +A +.Ic \&Bl +list block contains text or macros before the first +.Ic \&It +macro. +The offending children are moved before the beginning of the list. +.It Sy "first macro on line" +Inside a +.Ic \&Bl Fl column +list, a +.Ic \&Ta +macro occurs as the first macro on a line, which is not portable. +.It Sy "line scope broken" +.Pq man +While parsing the next-line scope of the previous macro, +another macro is found that prematurely terminates the previous one. +The previous, interrupted macro is deleted from the parse tree. +.El +.Ss "Warnings related to missing arguments" +.Bl -ohang +.It Sy "skipping empty request" +.Pq roff , eqn +The macro name is missing from a macro definition request, +or an +.Xr eqn 7 +control statement or operation keyword lacks its required argument. +.It Sy "conditional request controls empty scope" +.Pq roff +A conditional request is only useful if any of the following +follows it on the same logical input line: +.Bl -dash -compact +.It +The +.Sq \e{ +keyword to open a multi-line scope. +.It +A request or macro or some text, resulting in a single-line scope. +.It +The immediate end of the logical line without any intervening whitespace, +resulting in next-line scope. +.El +Here, a conditional request is followed by trailing whitespace only, +and there is no other content on its logical input line. +Note that it doesn't matter whether the logical input line is split +across multiple physical input lines using +.Sq \e +line continuation characters. +This is one of the rare cases +where trailing whitespace is syntactically significant. +The conditional request controls a scope containing whitespace only, +so it is unlikely to have a significant effect, +except that it may control a following +.Ic \&el +clause. +.It Sy "skipping empty macro" +.Pq mdoc +The indicated macro has no arguments and hence no effect. +.It Sy "empty block" +.Pq mdoc , man +A +.Ic \&Bd , +.Ic \&Bk , +.Ic \&Bl , +.Ic \&D1 , +.Ic \&Dl , +.Ic \&MT , +.Ic \&RS , +or +.Ic \&UR +block contains nothing in its body and will produce no output. +.It Sy "empty argument, using 0n" +.Pq mdoc +The required width is missing after +.Ic \&Bd +or +.Ic \&Bl +.Fl offset +or +.Fl width . +.It Sy "missing display type, using -ragged" +.Pq mdoc +The +.Ic \&Bd +macro is invoked without the required display type. +.It Sy "list type is not the first argument" +.Pq mdoc +In a +.Ic \&Bl +macro, at least one other argument precedes the type argument. +The +.Nm +utility copes with any argument order, but some other +.Xr mdoc 7 +implementations do not. +.It Sy "missing -width in -tag list, using 8n" +.Pq mdoc +Every +.Ic \&Bl +macro having the +.Fl tag +argument requires +.Fl width , +too. +.It Sy "missing utility name, using \(dq\(dq" +.Pq mdoc +The +.Ic \&Ex Fl std +macro is called without an argument before +.Ic \&Nm +has first been called with an argument. +.It Sy "missing function name, using \(dq\(dq" +.Pq mdoc +The +.Ic \&Fo +macro is called without an argument. +No function name is printed. +.It Sy "empty head in list item" +.Pq mdoc +In a +.Ic \&Bl +.Fl diag , +.Fl hang , +.Fl inset , +.Fl ohang , +or +.Fl tag +list, an +.Ic \&It +macro lacks the required argument. +The item head is left empty. +.It Sy "empty list item" +.Pq mdoc +In a +.Ic \&Bl +.Fl bullet , +.Fl dash , +.Fl enum , +or +.Fl hyphen +list, an +.Ic \&It +block is empty. +An empty list item is shown. +.It Sy "missing argument, using next line" +.Pq mdoc +An +.Ic \&It +macro in a +.Ic \&Bd Fl column +list has no arguments. +While +.Nm +uses the text or macros of the following line, if any, for the cell, +other formatters may misformat the list. +.It Sy "missing font type, using \efR" +.Pq mdoc +A +.Ic \&Bf +macro has no argument. +It switches to the default font. +.It Sy "unknown font type, using \efR" +.Pq mdoc +The +.Ic \&Bf +argument is invalid. +The default font is used instead. +.It Sy "nothing follows prefix" +.Pq mdoc +A +.Ic \&Pf +macro has no argument, or only one argument and no macro follows +on the same input line. +This defeats its purpose; in particular, spacing is not suppressed +before the text or macros following on the next input line. +.It Sy "empty reference block" +.Pq mdoc +An +.Ic \&Rs +macro is immediately followed by an +.Ic \&Re +macro on the next input line. +Such an empty block does not produce any output. +.It Sy "missing section argument" +.Pq mdoc +An +.Ic \&Xr +macro lacks its second, section number argument. +The first argument, i.e. the name, is printed, but without subsequent +parentheses. +.It Sy "missing -std argument, adding it" +.Pq mdoc +An +.Ic \&Ex +or +.Ic \&Rv +macro lacks the required +.Fl std +argument. +The +.Nm +utility assumes +.Fl std +even when it is not specified, but other implementations may not. +.It Sy "missing option string, using \(dq\(dq" +.Pq man +The +.Ic \&OP +macro is invoked without any argument. +An empty pair of square brackets is shown. +.It Sy "missing resource identifier, using \(dq\(dq" +.Pq man +The +.Ic \&MT +or +.Ic \&UR +macro is invoked without any argument. +An empty pair of angle brackets is shown. +.It Sy "missing eqn box, using \(dq\(dq" +.Pq eqn +A diacritic mark or a binary operator is found, +but there is nothing to the left of it. +An empty box is inserted. +.El +.Ss "Warnings related to bad macro arguments" +.Bl -ohang +.It Sy "duplicate argument" +.Pq mdoc +A +.Ic \&Bd +or +.Ic \&Bl +macro has more than one +.Fl compact , +more than one +.Fl offset , +or more than one +.Fl width +argument. +All but the last instances of these arguments are ignored. +.It Sy "skipping duplicate argument" +.Pq mdoc +An +.Ic \&An +macro has more than one +.Fl split +or +.Fl nosplit +argument. +All but the first of these arguments are ignored. +.It Sy "skipping duplicate display type" +.Pq mdoc +A +.Ic \&Bd +macro has more than one type argument; the first one is used. +.It Sy "skipping duplicate list type" +.Pq mdoc +A +.Ic \&Bl +macro has more than one type argument; the first one is used. +.It Sy "skipping -width argument" +.Pq mdoc +A +.Ic \&Bl +.Fl column , +.Fl diag , +.Fl ohang , +.Fl inset , +or +.Fl item +list has a +.Fl width +argument. +That has no effect. +.It Sy "wrong number of cells" +In a line of a +.Ic \&Bl Fl column +list, the number of tabs or +.Ic \&Ta +macros is less than the number expected from the list header line +or exceeds the expected number by more than one. +Missing cells remain empty, and all cells exceeding the number of +columns are joined into one single cell. +.It Sy "unknown AT&T UNIX version" +.Pq mdoc +An +.Ic \&At +macro has an invalid argument. +It is used verbatim, with +.Qq "AT&T UNIX " +prefixed to it. +.It Sy "comma in function argument" +.Pq mdoc +An argument of an +.Ic \&Fa +or +.Ic \&Fn +macro contains a comma; it should probably be split into two arguments. +.It Sy "parenthesis in function name" +.Pq mdoc +The first argument of an +.Ic \&Fc +or +.Ic \&Fn +macro contains an opening or closing parenthesis; that's probably wrong, +parentheses are added automatically. +.It Sy "unknown library name" +.Pq mdoc, not on Ox +An +.Ic \&Lb +macro has an unknown name argument and will be rendered as +.Qq library Dq Ar name . +.It Sy "invalid content in Rs block" +.Pq mdoc +An +.Ic \&Rs +block contains plain text or non-% macros. +The bogus content is left in the syntax tree. +Formatting may be poor. +.It Sy "invalid Boolean argument" +.Pq mdoc +An +.Ic \&Sm +macro has an argument other than +.Cm on +or +.Cm off . +The invalid argument is moved out of the macro, which leaves the macro +empty, causing it to toggle the spacing mode. +.It Sy "argument contains two font escapes" +.Pq roff +The second argument of a +.Ic char +request contains more than one font escape sequence. +A wrong font may remain active after using the character. +.It Sy "unknown font, skipping request" +.Pq man , tbl +A +.Xr roff 7 +.Ic \&ft +request or a +.Xr tbl 7 +.Ic \&f +layout modifier has an unknown +.Ar font +argument. +.It Sy "odd number of characters in request" +.Pq roff +A +.Ic \&tr +request contains an odd number of characters. +The last character is mapped to the blank character. +.El +.Ss "Warnings related to plain text" +.Bl -ohang +.It Sy "blank line in fill mode, using .sp" +.Pq mdoc +The meaning of blank input lines is only well-defined in non-fill mode: +In fill mode, line breaks of text input lines are not supposed to be +significant. +However, for compatibility with groff, blank lines in fill mode +are formatted like +.Ic \&sp +requests. +To request a paragraph break, use +.Ic \&Pp +instead of a blank line. +.It Sy "tab in filled text" +.Pq mdoc , man +The meaning of tab characters is only well-defined in non-fill mode: +In fill mode, whitespace is not supposed to be significant +on text input lines. +As an implementation dependent choice, tab characters on text lines +are passed through to the formatters in any case. +Given that the text before the tab character will be filled, +it is hard to predict which tab stop position the tab will advance to. +.It Sy "new sentence, new line" +.Pq mdoc +A new sentence starts in the middle of a text line. +Start it on a new input line to help formatters produce correct spacing. +.It Sy "invalid escape sequence" +.Pq roff +An escape sequence has an invalid opening argument delimiter, lacks the +closing argument delimiter, the argument is of an invalid form, or it is +a character escape sequence with an invalid name. +If the argument is incomplete, +.Ic \e* +and +.Ic \en +expand to an empty string, +.Ic \eB +to the digit +.Sq 0 , +and +.Ic \ew +to the length of the incomplete argument. +All other invalid escape sequences are ignored. +.It Sy "undefined escape, printing literally" +.Pq roff +In an escape sequence, the first character +right after the leading backslash is invalid. +That character is printed literally, +which is equivalent to ignoring the backslash. +.It Sy "undefined string, using \(dq\(dq" +.Pq roff +If a string is used without being defined before, +its value is implicitly set to the empty string. +However, defining strings explicitly before use +keeps the code more readable. +.El +.Ss "Warnings related to tables" +.Bl -ohang +.It Sy "tbl line starts with span" +.Pq tbl +The first cell in a table layout line is a horizontal span +.Pq Sq Cm s . +Data provided for this cell is ignored, and nothing is printed in the cell. +.It Sy "tbl column starts with span" +.Pq tbl +The first line of a table layout specification +requests a vertical span +.Pq Sq Cm ^ . +Data provided for this cell is ignored, and nothing is printed in the cell. +.It Sy "skipping vertical bar in tbl layout" +.Pq tbl +A table layout specification contains more than two consecutive vertical bars. +A double bar is printed, all additional bars are discarded. +.El +.Ss "Errors related to tables" +.Bl -ohang +.It Sy "non-alphabetic character in tbl options" +.Pq tbl +The table options line contains a character other than a letter, +blank, or comma where the beginning of an option name is expected. +The character is ignored. +.It Sy "skipping unknown tbl option" +.Pq tbl +The table options line contains a string of letters that does not +match any known option name. +The word is ignored. +.It Sy "missing tbl option argument" +.Pq tbl +A table option that requires an argument is not followed by an +opening parenthesis, or the opening parenthesis is immediately +followed by a closing parenthesis. +The option is ignored. +.It Sy "wrong tbl option argument size" +.Pq tbl +A table option argument contains an invalid number of characters. +Both the option and the argument are ignored. +.It Sy "empty tbl layout" +.Pq tbl +A table layout specification is completely empty, +specifying zero lines and zero columns. +As a fallback, a single left-justified column is used. +.It Sy "invalid character in tbl layout" +.Pq tbl +A table layout specification contains a character that can neither +be interpreted as a layout key character nor as a layout modifier, +or a modifier precedes the first key. +The invalid character is discarded. +.It Sy "unmatched parenthesis in tbl layout" +.Pq tbl +A table layout specification contains an opening parenthesis, +but no matching closing parenthesis. +The rest of the input line, starting from the parenthesis, has no effect. +.It Sy "tbl without any data cells" +.Pq tbl +A table does not contain any data cells. +It will probably produce no output. +.It Sy "ignoring data in spanned tbl cell" +.Pq tbl +A table cell is marked as a horizontal span +.Pq Sq Cm s +or vertical span +.Pq Sq Cm ^ +in the table layout, but it contains data. +The data is ignored. +.It Sy "ignoring extra tbl data cells" +.Pq tbl +A data line contains more cells than the corresponding layout line. +The data in the extra cells is ignored. +.It Sy "data block open at end of tbl" +.Pq tbl +A data block is opened with +.Cm T{ , +but never closed with a matching +.Cm T} . +The remaining data lines of the table are all put into one cell, +and any remaining cells stay empty. +.El +.Ss "Errors related to roff, mdoc, and man code" +.Bl -ohang +.It Sy "duplicate prologue macro" +.Pq mdoc +One of the prologue macros occurs more than once. +The last instance overrides all previous ones. +.It Sy "skipping late title macro" +.Pq mdoc +The +.Ic \&Dt +macro appears after the first non-prologue macro. +Traditional formatters cannot handle this because +they write the page header before parsing the document body. +Even though this technical restriction does not apply to +.Nm , +traditional semantics is preserved. +The late macro is discarded including its arguments. +.It Sy "input stack limit exceeded, infinite loop?" +.Pq roff +Explicit recursion limits are implemented for the following features, +in order to prevent infinite loops: +.Bl -dash -compact +.It +expansion of nested escape sequences +including expansion of strings and number registers, +.It +expansion of nested user-defined macros, +.It +and +.Ic \&so +file inclusion. +.El +When a limit is hit, the output is incorrect, typically losing +some content, but the parser can continue. +.It Sy "skipping bad character" +.Pq mdoc , man , roff +The input file contains a byte that is not a printable +.Xr ascii 7 +character. +The message mentions the character number. +The offending byte is replaced with a question mark +.Pq Sq \&? . +Consider editing the input file to replace the byte with an ASCII +transliteration of the intended character. +.It Sy "skipping unknown macro" +.Pq mdoc , man , roff +The first identifier on a request or macro line is neither recognized as a +.Xr roff 7 +request, nor as a user-defined macro, nor, respectively, as an +.Xr mdoc 7 +or +.Xr man 7 +macro. +It may be mistyped or unsupported. +The request or macro is discarded including its arguments. +.It Sy "skipping request outside macro" +.Pq roff +A +.Ic shift +or +.Ic return +request occurs outside any macro definition and has no effect. +.It Sy "skipping insecure request" +.Pq roff +An input file attempted to run a shell command +or to read or write an external file. +Such attempts are denied for security reasons. +.It Sy "skipping item outside list" +.Pq mdoc , eqn +An +.Ic \&It +macro occurs outside any +.Ic \&Bl +list, or an +.Xr eqn 7 +.Ic above +delimiter occurs outside any pile. +It is discarded including its arguments. +.It Sy "skipping column outside column list" +.Pq mdoc +A +.Ic \&Ta +macro occurs outside any +.Ic \&Bl Fl column +block. +It is discarded including its arguments. +.It Sy "skipping end of block that is not open" +.Pq mdoc , man , eqn , tbl , roff +Various syntax elements can only be used to explicitly close blocks +that have previously been opened. +An +.Xr mdoc 7 +block closing macro, a +.Xr man 7 +.Ic \&ME , \&RE +or +.Ic \&UE +macro, an +.Xr eqn 7 +right delimiter or closing brace, or the end of an equation, table, or +.Xr roff 7 +conditional request is encountered but no matching block is open. +The offending request or macro is discarded. +.It Sy "fewer RS blocks open, skipping" +.Pq man +The +.Ic \&RE +macro is invoked with an argument, but less than the specified number of +.Ic \&RS +blocks is open. +The +.Ic \&RE +macro is discarded. +.It Sy "inserting missing end of block" +.Pq mdoc , tbl +Various +.Xr mdoc 7 +macros as well as tables require explicit closing by dedicated macros. +A block that doesn't support bad nesting +ends before all of its children are properly closed. +The open child nodes are closed implicitly. +.It Sy "appending missing end of block" +.Pq mdoc , man , eqn , tbl , roff +At the end of the document, an explicit +.Xr mdoc 7 +block, a +.Xr man 7 +next-line scope or +.Ic \&MT , \&RS +or +.Ic \&UR +block, an equation, table, or +.Xr roff 7 +conditional or ignore block is still open. +The open block is closed implicitly. +.It Sy "escaped character not allowed in a name" +.Pq roff +Macro, string and register identifiers consist of printable, +non-whitespace ASCII characters. +Escape sequences and characters and strings expressed in terms of them +cannot form part of a name. +The first argument of an +.Ic \&am , +.Ic \&as , +.Ic \&de , +.Ic \&ds , +.Ic \&nr , +or +.Ic \&rr +request, or any argument of an +.Ic \&rm +request, or the name of a request or user defined macro being called, +is terminated by an escape sequence. +In the cases of +.Ic \&as , +.Ic \&ds , +and +.Ic \&nr , +the request has no effect at all. +In the cases of +.Ic \&am , +.Ic \&de , +.Ic \&rr , +and +.Ic \&rm , +what was parsed up to this point is used as the arguments to the request, +and the rest of the input line is discarded including the escape sequence. +When parsing for a request or a user-defined macro name to be called, +only the escape sequence is discarded. +The characters preceding it are used as the request or macro name, +the characters following it are used as the arguments to the request or macro. +.It Sy "using macro argument outside macro" +.Pq roff +The escape sequence \e$ occurs outside any macro definition +and expands to the empty string. +.It Sy "argument number is not numeric" +.Pq roff +The argument of the escape sequence \e$ is not a digit; +the escape sequence expands to the empty string. +.It Sy "NOT IMPLEMENTED: Bd -file" +.Pq mdoc +For security reasons, the +.Ic \&Bd +macro does not support the +.Fl file +argument. +By requesting the inclusion of a sensitive file, a malicious document +might otherwise trick a privileged user into inadvertently displaying +the file on the screen, revealing the file content to bystanders. +The argument is ignored including the file name following it. +.It Sy "skipping display without arguments" +.Pq mdoc +A +.Ic \&Bd +block macro does not have any arguments. +The block is discarded, and the block content is displayed in +whatever mode was active before the block. +.It Sy "missing list type, using -item" +.Pq mdoc +A +.Ic \&Bl +macro fails to specify the list type. +.It Sy "argument is not numeric, using 1" +.Pq roff +The argument of a +.Ic \&ce +request is not a number. +.It Sy "argument is not a character" +.Pq roff +The first argument of a +.Ic char +request is neither a single ASCII character +nor a single character escape sequence. +The request is ignored including all its arguments. +.It Sy "missing manual name, using \(dq\(dq" +.Pq mdoc +The first call to +.Ic \&Nm , +or any call in the NAME section, lacks the required argument. +.It Sy "uname(3) system call failed, using UNKNOWN" +.Pq mdoc +The +.Ic \&Os +macro is called without arguments, and the +.Xr uname 3 +system call failed. +As a workaround, +.Nm +can be compiled with +.Sm off +.Fl D Cm OSNAME=\(dq\e\(dq Ar string Cm \e\(dq\(dq . +.Sm on +.It Sy "unknown standard specifier" +.Pq mdoc +An +.Ic \&St +macro has an unknown argument and is discarded. +.It Sy "skipping request without numeric argument" +.Pq roff , eqn +An +.Ic \&it +request or an +.Xr eqn 7 +.Ic \&size +or +.Ic \&gsize +statement has a non-numeric or negative argument or no argument at all. +The invalid request or statement is ignored. +.It Sy "excessive shift" +.Pq roff +The argument of a +.Ic shift +request is larger than the number of arguments of the macro that is +currently being executed. +All macro arguments are deleted and \en(.$ is set to zero. +.It Sy "NOT IMPLEMENTED: .so with absolute path or \(dq..\(dq" +.Pq roff +For security reasons, +.Nm +allows +.Ic \&so +file inclusion requests only with relative paths +and only without ascending to any parent directory. +By requesting the inclusion of a sensitive file, a malicious document +might otherwise trick a privileged user into inadvertently displaying +the file on the screen, revealing the file content to bystanders. +.Nm +only shows the path as it appears behind +.Ic \&so . +.It Sy ".so request failed" +.Pq roff +Servicing a +.Ic \&so +request requires reading an external file, but the file could not be +opened. +.Nm +only shows the path as it appears behind +.Ic \&so . +.It Sy "skipping all arguments" +.Pq mdoc , man , eqn , roff +An +.Xr mdoc 7 +.Ic \&Bt , +.Ic \&Ed , +.Ic \&Ef , +.Ic \&Ek , +.Ic \&El , +.Ic \&Lp , +.Ic \&Pp , +.Ic \&Re , +.Ic \&Rs , +or +.Ic \&Ud +macro, an +.Ic \&It +macro in a list that don't support item heads, a +.Xr man 7 +.Ic \&LP , +.Ic \&P , +or +.Ic \&PP +macro, an +.Xr eqn 7 +.Ic \&EQ +or +.Ic \&EN +macro, or a +.Xr roff 7 +.Ic \&br , +.Ic \&fi , +or +.Ic \&nf +request or +.Sq \&.. +block closing request is invoked with at least one argument. +All arguments are ignored. +.It Sy "skipping excess arguments" +.Pq mdoc , man , roff +A macro or request is invoked with too many arguments: +.Bl -dash -offset 2n -width 2n -compact +.It +.Ic \&Fo , +.Ic \&MT , +.Ic \&PD , +.Ic \&RS , +.Ic \&UR , +.Ic \&ft , +or +.Ic \&sp +with more than one argument +.It +.Ic \&An +with another argument after +.Fl split +or +.Fl nosplit +.It +.Ic \&RE +with more than one argument or with a non-integer argument +.It +.Ic \&OP +or a request of the +.Ic \&de +family with more than two arguments +.It +.Ic \&Dt +with more than three arguments +.It +.Ic \&TH +with more than five arguments +.It +.Ic \&Bd , +.Ic \&Bk , +or +.Ic \&Bl +with invalid arguments +.El +The excess arguments are ignored. +.El +.Ss Unsupported features +.Bl -ohang +.It Sy "input too large" +.Pq mdoc , man +Currently, +.Nm +cannot handle input files larger than its arbitrary size limit +of 2^31 bytes (2 Gigabytes). +Since useful manuals are always small, this is not a problem in practice. +Parsing is aborted as soon as the condition is detected. +.It Sy "unsupported control character" +.Pq roff +An ASCII control character supported by other +.Xr roff 7 +implementations but not by +.Nm +was found in an input file. +It is replaced by a question mark. +.It Sy "unsupported escape sequence" +.Pq roff +An input file contains an escape sequence supported by GNU troff +or Heirloom troff but not by +.Nm , +and it is likely that this will cause information loss +or considerable misformatting. +.It Sy "unsupported roff request" +.Pq roff +An input file contains a +.Xr roff 7 +request supported by GNU troff or Heirloom troff but not by +.Nm , +and it is likely that this will cause information loss +or considerable misformatting. +.It Sy "eqn delim option in tbl" +.Pq eqn , tbl +The options line of a table defines equation delimiters. +Any equation source code contained in the table will be printed unformatted. +.It Sy "unsupported table layout modifier" +.Pq tbl +A table layout specification contains an +.Sq Cm m +modifier. +The modifier is discarded. +.It Sy "ignoring macro in table" +.Pq tbl , mdoc , man +A table contains an invocation of an +.Xr mdoc 7 +or +.Xr man 7 +macro or of an undefined macro. +The macro is ignored, and its arguments are handled +as if they were a text line. +.El +.Ss Bad command line arguments +.Bl -ohang +.It Sy "bad command line argument" +The argument following one of the +.Fl IKMmOTW +command line options is invalid, or a +.Ar file +given as a command line argument cannot be opened. +.It Sy "duplicate command line argument" +The +.Fl I +command line option was specified twice. +.It Sy "option has a superfluous value" +An argument to the +.Fl O +option has a value but does not accept one. +.It Sy "missing option value" +An argument to the +.Fl O +option has no argument but requires one. +.It Sy "bad option value" +An argument to the +.Fl O +.Cm indent +or +.Cm width +option has an invalid value. +.It Sy "duplicate option value" +The same +.Fl O +option is specified more than once. +.It Sy "no such tag" +The +.Fl O Cm tag +option was specified but the tag was not found in any of the displayed +manual pages. +.El +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr eqn 7 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr roff 7 , +.Xr tbl 7 +.Sh HISTORY +The +.Nm +utility first appeared in +.Ox 4.8 . +The option +.Fl I +appeared in +.Ox 5.2 , +and +.Fl aCcfhKklMSsw +in +.Ox 5.7 . +.Sh AUTHORS +.An -nosplit +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and is maintained by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c new file mode 100644 index 0000000..0b2d301 --- /dev/null +++ b/usr.bin/mandoc/mandoc.c @@ -0,0 +1,657 @@ +/* $OpenBSD: mandoc.c,v 1.85 2020/01/19 16:16:32 schwarze Exp $ */ +/* + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "libmandoc.h" +#include "roff_int.h" + +static int a2time(time_t *, const char *, const char *); +static char *time2a(time_t); + + +enum mandoc_esc +mandoc_font(const char *cp, int sz) +{ + switch (sz) { + case 0: + return ESCAPE_FONTPREV; + case 1: + switch (cp[0]) { + case 'B': + case '3': + return ESCAPE_FONTBOLD; + case 'I': + case '2': + return ESCAPE_FONTITALIC; + case 'P': + return ESCAPE_FONTPREV; + case 'R': + case '1': + return ESCAPE_FONTROMAN; + case '4': + return ESCAPE_FONTBI; + default: + return ESCAPE_ERROR; + } + case 2: + switch (cp[0]) { + case 'B': + switch (cp[1]) { + case 'I': + return ESCAPE_FONTBI; + default: + return ESCAPE_ERROR; + } + case 'C': + switch (cp[1]) { + case 'B': + return ESCAPE_FONTBOLD; + case 'I': + return ESCAPE_FONTITALIC; + case 'R': + case 'W': + return ESCAPE_FONTCW; + default: + return ESCAPE_ERROR; + } + default: + return ESCAPE_ERROR; + } + default: + return ESCAPE_ERROR; + } +} + +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + const char *local_start; + int local_sz, c, i; + char term; + enum mandoc_esc gly; + + /* + * When the caller doesn't provide return storage, + * use local storage. + */ + + if (NULL == start) + start = &local_start; + if (NULL == sz) + sz = &local_sz; + + /* + * Treat "\E" just like "\"; + * it only makes a difference in copy mode. + */ + + if (**end == 'E') + ++*end; + + /* + * Beyond the backslash, at least one input character + * is part of the escape sequence. With one exception + * (see below), that character won't be returned. + */ + + gly = ESCAPE_ERROR; + *start = ++*end; + *sz = 0; + term = '\0'; + + switch ((*start)[-1]) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case '(': + gly = ESCAPE_SPECIAL; + *sz = 2; + break; + case '[': + if (**start == ' ') { + ++*end; + return ESCAPE_ERROR; + } + gly = ESCAPE_SPECIAL; + term = ']'; + break; + case 'C': + if ('\'' != **start) + return ESCAPE_ERROR; + *start = ++*end; + gly = ESCAPE_SPECIAL; + term = '\''; + break; + + /* + * Escapes taking no arguments at all. + */ + case '!': + case '?': + return ESCAPE_UNSUPP; + case '%': + case '&': + case ')': + case ',': + case '/': + case '^': + case 'a': + case 'd': + case 'r': + case 't': + case 'u': + case '{': + case '|': + case '}': + return ESCAPE_IGNORE; + case 'c': + return ESCAPE_NOSPACE; + case 'p': + return ESCAPE_BREAK; + + /* + * The \z escape is supposed to output the following + * character without advancing the cursor position. + * Since we are mostly dealing with terminal mode, + * let us just skip the next character. + */ + case 'z': + return ESCAPE_SKIPCHAR; + + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case 'F': + case 'f': + case 'g': + case 'k': + case 'M': + case 'm': + case 'n': + case 'O': + case 'V': + case 'Y': + gly = (*start)[-1] == 'f' ? ESCAPE_FONT : ESCAPE_IGNORE; + switch (**start) { + case '(': + if ((*start)[-1] == 'O') + gly = ESCAPE_ERROR; + *start = ++*end; + *sz = 2; + break; + case '[': + if ((*start)[-1] == 'O') + gly = (*start)[1] == '5' ? + ESCAPE_UNSUPP : ESCAPE_ERROR; + *start = ++*end; + term = ']'; + break; + default: + if ((*start)[-1] == 'O') { + switch (**start) { + case '0': + gly = ESCAPE_UNSUPP; + break; + case '1': + case '2': + case '3': + case '4': + break; + default: + gly = ESCAPE_ERROR; + break; + } + } + *sz = 1; + break; + } + break; + case '*': + if (strncmp(*start, "(.T", 3) != 0) + abort(); + gly = ESCAPE_DEVICE; + *start = ++*end; + *sz = 2; + break; + + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + * The \B and \w escapes are handled in roff.c, roff_res(). + */ + case 'A': + case 'b': + case 'D': + case 'R': + case 'X': + case 'Z': + gly = ESCAPE_IGNORE; + /* FALLTHROUGH */ + case 'o': + if (**start == '\0') + return ESCAPE_ERROR; + if (gly == ESCAPE_ERROR) + gly = ESCAPE_OVERSTRIKE; + term = **start; + *start = ++*end; + break; + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case 'h': + case 'H': + case 'L': + case 'l': + case 'S': + case 'v': + case 'x': + if (strchr(" %&()*+-./0123456789:<=>", **start)) { + if ('\0' != **start) + ++*end; + return ESCAPE_ERROR; + } + switch ((*start)[-1]) { + case 'h': + gly = ESCAPE_HORIZ; + break; + case 'l': + gly = ESCAPE_HLINE; + break; + default: + gly = ESCAPE_IGNORE; + break; + } + term = **start; + *start = ++*end; + break; + + /* + * Special handling for the numbered character escape. + * XXX Do any other escapes need similar handling? + */ + case 'N': + if ('\0' == **start) + return ESCAPE_ERROR; + (*end)++; + if (isdigit((unsigned char)**start)) { + *sz = 1; + return ESCAPE_IGNORE; + } + (*start)++; + while (isdigit((unsigned char)**end)) + (*end)++; + *sz = *end - *start; + if ('\0' != **end) + (*end)++; + return ESCAPE_NUMBERED; + + /* + * Sizes get a special category of their own. + */ + case 's': + gly = ESCAPE_IGNORE; + + /* See +/- counts as a sign. */ + if ('+' == **end || '-' == **end || ASCII_HYPH == **end) + *start = ++*end; + + switch (**end) { + case '(': + *start = ++*end; + *sz = 2; + break; + case '[': + *start = ++*end; + term = ']'; + break; + case '\'': + *start = ++*end; + term = '\''; + break; + case '3': + case '2': + case '1': + *sz = (*end)[-1] == 's' && + isdigit((unsigned char)(*end)[1]) ? 2 : 1; + break; + default: + *sz = 1; + break; + } + + break; + + /* + * Several special characters can be encoded as + * one-byte escape sequences without using \[]. + */ + case ' ': + case '\'': + case '-': + case '.': + case '0': + case ':': + case '_': + case '`': + case 'e': + case '~': + gly = ESCAPE_SPECIAL; + /* FALLTHROUGH */ + default: + if (gly == ESCAPE_ERROR) + gly = ESCAPE_UNDEF; + *start = --*end; + *sz = 1; + break; + } + + /* + * Read up to the terminating character, + * paying attention to nested escapes. + */ + + if ('\0' != term) { + while (**end != term) { + switch (**end) { + case '\0': + return ESCAPE_ERROR; + case '\\': + (*end)++; + if (ESCAPE_ERROR == + mandoc_escape(end, NULL, NULL)) + return ESCAPE_ERROR; + break; + default: + (*end)++; + break; + } + } + *sz = (*end)++ - *start; + + /* + * The file chars.c only provides one common list + * of character names, but \[-] == \- is the only + * one of the characters with one-byte names that + * allows enclosing the name in brackets. + */ + if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-') + return ESCAPE_ERROR; + } else { + assert(*sz > 0); + if ((size_t)*sz > strlen(*start)) + return ESCAPE_ERROR; + *end += *sz; + } + + /* Run post-processors. */ + + switch (gly) { + case ESCAPE_FONT: + gly = mandoc_font(*start, *sz); + break; + case ESCAPE_SPECIAL: + if (**start == 'c') { + if (*sz < 6 || *sz > 7 || + strncmp(*start, "char", 4) != 0 || + (int)strspn(*start + 4, "0123456789") + 4 < *sz) + break; + c = 0; + for (i = 4; i < *sz; i++) + c = 10 * c + ((*start)[i] - '0'); + if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) + break; + *start += 4; + *sz -= 4; + gly = ESCAPE_NUMBERED; + break; + } + + /* + * Unicode escapes are defined in groff as \[u0000] + * to \[u10FFFF], where the contained value must be + * a valid Unicode codepoint. Here, however, only + * check the length and range. + */ + if (**start != 'u' || *sz < 5 || *sz > 7) + break; + if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) + break; + if (*sz == 6 && (*start)[1] == '0') + break; + if (*sz == 5 && (*start)[1] == 'D' && + strchr("89ABCDEF", (*start)[2]) != NULL) + break; + if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") + + 1 == *sz) + gly = ESCAPE_UNICODE; + break; + default: + break; + } + + return gly; +} + +static int +a2time(time_t *t, const char *fmt, const char *p) +{ + struct tm tm; + char *pp; + + memset(&tm, 0, sizeof(struct tm)); + + pp = strptime(p, fmt, &tm); + if (NULL != pp && '\0' == *pp) { + *t = mktime(&tm); + return 1; + } + + return 0; +} + +static char * +time2a(time_t t) +{ + struct tm *tm; + char *buf, *p; + size_t ssz; + int isz; + + buf = NULL; + tm = localtime(&t); + if (tm == NULL) + goto fail; + + /* + * Reserve space: + * up to 9 characters for the month (September) + blank + * up to 2 characters for the day + comma + blank + * 4 characters for the year and a terminating '\0' + */ + + p = buf = mandoc_malloc(10 + 4 + 4 + 1); + + if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) + goto fail; + p += (int)ssz; + + /* + * The output format is just "%d" here, not "%2d" or "%02d". + * That's also the reason why we can't just format the + * date as a whole with "%B %e, %Y" or "%B %d, %Y". + * Besides, the present approach is less prone to buffer + * overflows, in case anybody should ever introduce the bug + * of looking at LC_TIME. + */ + + isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday); + if (isz < 0 || isz > 4) + goto fail; + p += isz; + + if (strftime(p, 4 + 1, "%Y", tm) == 0) + goto fail; + return buf; + +fail: + free(buf); + return mandoc_strdup(""); +} + +char * +mandoc_normdate(struct roff_node *nch, struct roff_node *nbl) +{ + char *cp; + time_t t; + + /* No date specified. */ + + if (nch == NULL) { + if (nbl == NULL) + mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL); + else + mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line, + nbl->pos, "%s", roff_name[nbl->tok]); + return mandoc_strdup(""); + } + if (*nch->string == '\0') { + mandoc_msg(MANDOCERR_DATE_MISSING, nch->line, + nch->pos, "%s", roff_name[nbl->tok]); + return mandoc_strdup(""); + } + if (strcmp(nch->string, "$" "Mdocdate$") == 0) + return time2a(time(NULL)); + + /* Valid mdoc(7) date format. */ + + if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) || + a2time(&t, "%b %d, %Y", nch->string)) { + cp = time2a(t); + if (t > time(NULL) + 86400) + mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, + nch->pos, "%s %s", roff_name[nbl->tok], cp); + else if (*nch->string != '$' && + strcmp(nch->string, cp) != 0) + mandoc_msg(MANDOCERR_DATE_NORM, nch->line, + nch->pos, "%s %s", roff_name[nbl->tok], cp); + return cp; + } + + /* In man(7), do not warn about the legacy format. */ + + if (a2time(&t, "%Y-%m-%d", nch->string) == 0) + mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos, + "%s %s", roff_name[nbl->tok], nch->string); + else if (t > time(NULL) + 86400) + mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos, + "%s %s", roff_name[nbl->tok], nch->string); + else if (nbl->tok == MDOC_Dd) + mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos, + "Dd %s", nch->string); + + /* Use any non-mdoc(7) date verbatim. */ + + return mandoc_strdup(nch->string); +} + +int +mandoc_eos(const char *p, size_t sz) +{ + const char *q; + int enclosed, found; + + if (0 == sz) + return 0; + + /* + * End-of-sentence recognition must include situations where + * some symbols, such as `)', allow prior EOS punctuation to + * propagate outward. + */ + + enclosed = found = 0; + for (q = p + (int)sz - 1; q >= p; q--) { + switch (*q) { + case '\"': + case '\'': + case ']': + case ')': + if (0 == found) + enclosed = 1; + break; + case '.': + case '!': + case '?': + found = 1; + break; + default: + return found && + (!enclosed || isalnum((unsigned char)*q)); + } + } + + return found && !enclosed; +} + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntoi(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return -1; + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return -1; + + if (v > INT_MAX) + v = INT_MAX; + if (v < INT_MIN) + v = INT_MIN; + + return (int)v; +} diff --git a/usr.bin/mandoc/mandoc.css b/usr.bin/mandoc/mandoc.css new file mode 100644 index 0000000..3c4ea18 --- /dev/null +++ b/usr.bin/mandoc/mandoc.css @@ -0,0 +1,360 @@ +/* $OpenBSD: mandoc.css,v 1.33 2019/06/02 16:50:46 schwarze Exp $ */ +/* + * Standard style sheet for mandoc(1) -Thtml and man.cgi(8). + * + * Written by Ingo Schwarze <schwarze@openbsd.org>. + * I place this file into the public domain. + * Permission to use, copy, modify, and distribute it for any purpose + * with or without fee is hereby granted, without any conditions. + */ + +/* Global defaults. */ + +html { max-width: 65em; + --bg: #FFFFFF; + --fg: #000000; } +body { background: var(--bg); + color: var(--fg); + font-family: Helvetica,Arial,sans-serif; } +h1 { font-size: 110%; } +table { margin-top: 0em; + margin-bottom: 0em; + border-collapse: collapse; } +/* Some browsers set border-color in a browser style for tbody, + * but not for table, resulting in inconsistent border styling. */ +tbody { border-color: inherit; } +tr { border-color: inherit; } +td { vertical-align: top; + padding-left: 0.2em; + padding-right: 0.2em; + border-color: inherit; } +ul, ol, dl { margin-top: 0em; + margin-bottom: 0em; } +li, dt { margin-top: 1em; } + +.permalink { border-bottom: thin dotted; + color: inherit; + font: inherit; + text-decoration: inherit; } +* { clear: both } + +/* Search form and search results. */ + +fieldset { border: thin solid silver; + border-radius: 1em; + text-align: center; } +input[name=expr] { + width: 25%; } + +table.results { margin-top: 1em; + margin-left: 2em; + font-size: smaller; } + +/* Header and footer lines. */ + +table.head { width: 100%; + border-bottom: 1px dotted #808080; + margin-bottom: 1em; + font-size: smaller; } +td.head-vol { text-align: center; } +td.head-rtitle { + text-align: right; } + +table.foot { width: 100%; + border-top: 1px dotted #808080; + margin-top: 1em; + font-size: smaller; } +td.foot-os { text-align: right; } + +/* Sections and paragraphs. */ + +.manual-text { + margin-left: 3.8em; } +.Nd { } +section.Sh { } +h1.Sh { margin-top: 1.2em; + margin-bottom: 0.6em; + margin-left: -3.2em; } +section.Ss { } +h2.Ss { margin-top: 1.2em; + margin-bottom: 0.6em; + margin-left: -1.2em; + font-size: 105%; } +.Pp { margin: 0.6em 0em; } +.Sx { } +.Xr { } + +/* Displays and lists. */ + +.Bd { } +.Bd-indent { margin-left: 3.8em; } + +.Bl-bullet { list-style-type: disc; + padding-left: 1em; } +.Bl-bullet > li { } +.Bl-dash { list-style-type: none; + padding-left: 0em; } +.Bl-dash > li:before { + content: "\2014 "; } +.Bl-item { list-style-type: none; + padding-left: 0em; } +.Bl-item > li { } +.Bl-compact > li { + margin-top: 0em; } + +.Bl-enum { padding-left: 2em; } +.Bl-enum > li { } +.Bl-compact > li { + margin-top: 0em; } + +.Bl-diag { } +.Bl-diag > dt { + font-style: normal; + font-weight: bold; } +.Bl-diag > dd { + margin-left: 0em; } +.Bl-hang { } +.Bl-hang > dt { } +.Bl-hang > dd { + margin-left: 5.5em; } +.Bl-inset { } +.Bl-inset > dt { } +.Bl-inset > dd { + margin-left: 0em; } +.Bl-ohang { } +.Bl-ohang > dt { } +.Bl-ohang > dd { + margin-left: 0em; } +.Bl-tag { margin-top: 0.6em; + margin-left: 5.5em; } +.Bl-tag > dt { + float: left; + margin-top: 0em; + margin-left: -5.5em; + padding-right: 0.5em; + vertical-align: top; } +.Bl-tag > dd { + clear: right; + width: 100%; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0.6em; + vertical-align: top; + overflow: auto; } +.Bl-compact { margin-top: 0em; } +.Bl-compact > dd { + margin-bottom: 0em; } +.Bl-compact > dt { + margin-top: 0em; } + +.Bl-column { } +.Bl-column > tbody > tr { } +.Bl-column > tbody > tr > td { + margin-top: 1em; } +.Bl-compact > tbody > tr > td { + margin-top: 0em; } + +.Rs { font-style: normal; + font-weight: normal; } +.RsA { } +.RsB { font-style: italic; + font-weight: normal; } +.RsC { } +.RsD { } +.RsI { font-style: italic; + font-weight: normal; } +.RsJ { font-style: italic; + font-weight: normal; } +.RsN { } +.RsO { } +.RsP { } +.RsQ { } +.RsR { } +.RsT { text-decoration: underline; } +.RsU { } +.RsV { } + +.eqn { } +.tbl td { vertical-align: middle; } + +.HP { margin-left: 3.8em; + text-indent: -3.8em; } + +/* Semantic markup for command line utilities. */ + +table.Nm { } +code.Nm { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Fl { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Cm { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Ar { font-style: italic; + font-weight: normal; } +.Op { display: inline; } +.Ic { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Ev { font-style: normal; + font-weight: normal; + font-family: monospace; } +.Pa { font-style: italic; + font-weight: normal; } + +/* Semantic markup for function libraries. */ + +.Lb { } +code.In { font-style: normal; + font-weight: bold; + font-family: inherit; } +a.In { } +.Fd { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Ft { font-style: italic; + font-weight: normal; } +.Fn { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Fa { font-style: italic; + font-weight: normal; } +.Vt { font-style: italic; + font-weight: normal; } +.Va { font-style: italic; + font-weight: normal; } +.Dv { font-style: normal; + font-weight: normal; + font-family: monospace; } +.Er { font-style: normal; + font-weight: normal; + font-family: monospace; } + +/* Various semantic markup. */ + +.An { } +.Lk { } +.Mt { } +.Cd { font-style: normal; + font-weight: bold; + font-family: inherit; } +.Ad { font-style: italic; + font-weight: normal; } +.Ms { font-style: normal; + font-weight: bold; } +.St { } +.Ux { } + +/* Physical markup. */ + +.Bf { display: inline; } +.No { font-style: normal; + font-weight: normal; } +.Em { font-style: italic; + font-weight: normal; } +.Sy { font-style: normal; + font-weight: bold; } +.Li { font-style: normal; + font-weight: normal; + font-family: monospace; } + +/* Tooltip support. */ + +h1.Sh, h2.Ss { position: relative; } +.An, .Ar, .Cd, .Cm, .Dv, .Em, .Er, .Ev, .Fa, .Fd, .Fl, .Fn, .Ft, +.Ic, code.In, .Lb, .Lk, .Ms, .Mt, .Nd, code.Nm, .Pa, .Rs, +.St, .Sx, .Sy, .Va, .Vt, .Xr { + display: inline-block; + position: relative; } + +.An::before { content: "An"; } +.Ar::before { content: "Ar"; } +.Cd::before { content: "Cd"; } +.Cm::before { content: "Cm"; } +.Dv::before { content: "Dv"; } +.Em::before { content: "Em"; } +.Er::before { content: "Er"; } +.Ev::before { content: "Ev"; } +.Fa::before { content: "Fa"; } +.Fd::before { content: "Fd"; } +.Fl::before { content: "Fl"; } +.Fn::before { content: "Fn"; } +.Ft::before { content: "Ft"; } +.Ic::before { content: "Ic"; } +code.In::before { content: "In"; } +.Lb::before { content: "Lb"; } +.Lk::before { content: "Lk"; } +.Ms::before { content: "Ms"; } +.Mt::before { content: "Mt"; } +.Nd::before { content: "Nd"; } +code.Nm::before { content: "Nm"; } +.Pa::before { content: "Pa"; } +.Rs::before { content: "Rs"; } +h1.Sh::before { content: "Sh"; } +h2.Ss::before { content: "Ss"; } +.St::before { content: "St"; } +.Sx::before { content: "Sx"; } +.Sy::before { content: "Sy"; } +.Va::before { content: "Va"; } +.Vt::before { content: "Vt"; } +.Xr::before { content: "Xr"; } + +.An::before, .Ar::before, .Cd::before, .Cm::before, +.Dv::before, .Em::before, .Er::before, .Ev::before, +.Fa::before, .Fd::before, .Fl::before, .Fn::before, .Ft::before, +.Ic::before, code.In::before, .Lb::before, .Lk::before, +.Ms::before, .Mt::before, .Nd::before, code.Nm::before, +.Pa::before, .Rs::before, +h1.Sh::before, h2.Ss::before, .St::before, .Sx::before, .Sy::before, +.Va::before, .Vt::before, .Xr::before { + opacity: 0; + transition: .15s ease opacity; + pointer-events: none; + position: absolute; + bottom: 100%; + box-shadow: 0 0 .35em var(--fg); + padding: .15em .25em; + white-space: nowrap; + font-family: Helvetica,Arial,sans-serif; + font-style: normal; + font-weight: bold; + background: var(--bg); + color: var(--fg); } +.An:hover::before, .Ar:hover::before, .Cd:hover::before, .Cm:hover::before, +.Dv:hover::before, .Em:hover::before, .Er:hover::before, .Ev:hover::before, +.Fa:hover::before, .Fd:hover::before, .Fl:hover::before, .Fn:hover::before, +.Ft:hover::before, .Ic:hover::before, code.In:hover::before, +.Lb:hover::before, .Lk:hover::before, .Ms:hover::before, .Mt:hover::before, +.Nd:hover::before, code.Nm:hover::before, .Pa:hover::before, +.Rs:hover::before, h1.Sh:hover::before, h2.Ss:hover::before, .St:hover::before, +.Sx:hover::before, .Sy:hover::before, .Va:hover::before, .Vt:hover::before, +.Xr:hover::before { + opacity: 1; + pointer-events: inherit; } + +/* Overrides to avoid excessive margins on small devices. */ + +@media (max-width: 37.5em) { +.manual-text { + margin-left: 0.5em; } +h1.Sh, h2.Ss { margin-left: 0em; } +.Bd-indent { margin-left: 2em; } +.Bl-hang > dd { + margin-left: 2em; } +.Bl-tag { margin-left: 2em; } +.Bl-tag > dt { + margin-left: -2em; } +.HP { margin-left: 2em; + text-indent: -2em; } +} + +/* Overrides for a dark color scheme for accessibility. */ + +@media (prefers-color-scheme: dark) { +html { --bg: #1E1F21; + --fg: #EEEFF1; } +:link { color: #BAD7FF; } +:visited { color: #F6BAFF; } +} diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h new file mode 100644 index 0000000..3a48c89 --- /dev/null +++ b/usr.bin/mandoc/mandoc.h @@ -0,0 +1,322 @@ +/* $OpenBSD: mandoc.h,v 1.210 2020/04/24 11:58:02 schwarze Exp $ */ +/* + * Copyright (c) 2012-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Error handling, escape sequence, and character utilities. + * Can be used by all code in the mandoc package. + */ + +#define ASCII_NBRSP 31 /* non-breaking space */ +#define ASCII_HYPH 30 /* breakable hyphen */ +#define ASCII_BREAK 29 /* breakable zero-width space */ + +/* + * Status level. This refers to both internal status (i.e., whilst + * running, when warnings/errors are reported) and an indicator of a + * threshold of when to halt (when said internal state exceeds the + * threshold). + */ +enum mandoclevel { + MANDOCLEVEL_OK = 0, + MANDOCLEVEL_STYLE, /* style suggestions */ + MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */ + MANDOCLEVEL_ERROR, /* input has been thrown away */ + MANDOCLEVEL_UNSUPP, /* input needs unimplemented features */ + MANDOCLEVEL_BADARG, /* bad argument in invocation */ + MANDOCLEVEL_SYSERR, /* system error */ + MANDOCLEVEL_MAX +}; + +/* + * All possible things that can go wrong within a parse, be it libroff, + * libmdoc, or libman. + */ +enum mandocerr { + MANDOCERR_OK, + + MANDOCERR_BASE, /* ===== start of base system conventions ===== */ + + MANDOCERR_MDOCDATE, /* Mdocdate found: Dd ... */ + MANDOCERR_MDOCDATE_MISSING, /* Mdocdate missing: Dd ... */ + MANDOCERR_ARCH_BAD, /* unknown architecture: Dt ... arch */ + MANDOCERR_OS_ARG, /* operating system explicitly specified: Os ... */ + MANDOCERR_RCS_MISSING, /* RCS id missing */ + MANDOCERR_XR_BAD, /* referenced manual not found: Xr name sec */ + + MANDOCERR_STYLE, /* ===== start of style suggestions ===== */ + + MANDOCERR_DATE_LEGACY, /* legacy man(7) date format: Dd ... */ + MANDOCERR_DATE_NORM, /* normalizing date format to: ... */ + MANDOCERR_TITLE_CASE, /* lower case character in document title */ + MANDOCERR_RCS_REP, /* duplicate RCS id: ... */ + MANDOCERR_SEC_TYPO, /* possible typo in section name: Sh ... */ + MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */ + MANDOCERR_MACRO_USELESS, /* useless macro: macro */ + MANDOCERR_BX, /* consider using OS macro: macro */ + MANDOCERR_ER_ORDER, /* errnos out of order: Er ... */ + MANDOCERR_ER_REP, /* duplicate errno: Er ... */ + MANDOCERR_DELIM, /* trailing delimiter: macro ... */ + MANDOCERR_DELIM_NB, /* no blank before trailing delimiter: macro ... */ + MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */ + MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */ + MANDOCERR_DASHDASH, /* verbatim "--", maybe consider using \(em */ + MANDOCERR_FUNC, /* function name without markup: name() */ + MANDOCERR_SPACE_EOL, /* whitespace at end of input line */ + MANDOCERR_COMMENT_BAD, /* bad comment style */ + + MANDOCERR_WARNING, /* ===== start of warnings ===== */ + + /* related to the prologue */ + MANDOCERR_DT_NOTITLE, /* missing manual title, using UNTITLED: line */ + MANDOCERR_TH_NOTITLE, /* missing manual title, using "": [macro] */ + MANDOCERR_MSEC_MISSING, /* missing manual section, using "": macro */ + MANDOCERR_MSEC_BAD, /* unknown manual section: Dt ... section */ + MANDOCERR_MSEC_FILE, /* filename/section mismatch: ... */ + MANDOCERR_DATE_MISSING, /* missing date, using "": [macro] */ + MANDOCERR_DATE_BAD, /* cannot parse date, using it verbatim: date */ + MANDOCERR_DATE_FUTURE, /* date in the future, using it anyway: date */ + MANDOCERR_OS_MISSING, /* missing Os macro, using "" */ + MANDOCERR_PROLOG_LATE, /* late prologue macro: macro */ + MANDOCERR_PROLOG_ORDER, /* prologue macros out of order: macros */ + + /* related to document structure */ + MANDOCERR_SO, /* .so is fragile, better use ln(1): so path */ + MANDOCERR_DOC_EMPTY, /* no document body */ + MANDOCERR_SEC_BEFORE, /* content before first section header: macro */ + MANDOCERR_NAMESEC_FIRST, /* first section is not NAME: Sh title */ + MANDOCERR_NAMESEC_NONM, /* NAME section without Nm before Nd */ + MANDOCERR_NAMESEC_NOND, /* NAME section without description */ + MANDOCERR_NAMESEC_ND, /* description not at the end of NAME */ + MANDOCERR_NAMESEC_BAD, /* bad NAME section content: macro */ + MANDOCERR_NAMESEC_PUNCT, /* missing comma before name: Nm name */ + MANDOCERR_ND_EMPTY, /* missing description line, using "" */ + MANDOCERR_ND_LATE, /* description line outside NAME section */ + MANDOCERR_SEC_ORDER, /* sections out of conventional order: Sh title */ + MANDOCERR_SEC_REP, /* duplicate section title: Sh title */ + MANDOCERR_SEC_MSEC, /* unexpected section: Sh title for ... only */ + MANDOCERR_XR_SELF, /* cross reference to self: Xr name sec */ + MANDOCERR_XR_ORDER, /* unusual Xr order: ... after ... */ + MANDOCERR_XR_PUNCT, /* unusual Xr punctuation: ... after ... */ + MANDOCERR_AN_MISSING, /* AUTHORS section without An macro */ + + /* related to macros and nesting */ + MANDOCERR_MACRO_OBS, /* obsolete macro: macro */ + MANDOCERR_MACRO_CALL, /* macro neither callable nor escaped: macro */ + MANDOCERR_PAR_SKIP, /* skipping paragraph macro: macro ... */ + MANDOCERR_PAR_MOVE, /* moving paragraph macro out of list: macro */ + MANDOCERR_NS_SKIP, /* skipping no-space macro */ + MANDOCERR_BLK_NEST, /* blocks badly nested: macro ... */ + MANDOCERR_BD_NEST, /* nested displays are not portable: macro ... */ + MANDOCERR_BL_MOVE, /* moving content out of list: macro */ + MANDOCERR_TA_LINE, /* first macro on line: Ta */ + MANDOCERR_BLK_LINE, /* line scope broken: macro breaks macro */ + MANDOCERR_BLK_BLANK, /* skipping blank line in line scope */ + + /* related to missing arguments */ + MANDOCERR_REQ_EMPTY, /* skipping empty request: request */ + MANDOCERR_COND_EMPTY, /* conditional request controls empty scope */ + MANDOCERR_MACRO_EMPTY, /* skipping empty macro: macro */ + MANDOCERR_BLK_EMPTY, /* empty block: macro */ + MANDOCERR_ARG_EMPTY, /* empty argument, using 0n: macro arg */ + MANDOCERR_BD_NOTYPE, /* missing display type, using -ragged: Bd */ + MANDOCERR_BL_LATETYPE, /* list type is not the first argument: Bl arg */ + MANDOCERR_BL_NOWIDTH, /* missing -width in -tag list, using 6n */ + MANDOCERR_EX_NONAME, /* missing utility name, using "": Ex */ + MANDOCERR_FO_NOHEAD, /* missing function name, using "": Fo */ + MANDOCERR_IT_NOHEAD, /* empty head in list item: Bl -type It */ + MANDOCERR_IT_NOBODY, /* empty list item: Bl -type It */ + MANDOCERR_IT_NOARG, /* missing argument, using next line: Bl -c It */ + MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */ + MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */ + MANDOCERR_PF_SKIP, /* nothing follows prefix: Pf arg */ + MANDOCERR_RS_EMPTY, /* empty reference block: Rs */ + MANDOCERR_XR_NOSEC, /* missing section argument: Xr arg */ + MANDOCERR_ARG_STD, /* missing -std argument, adding it: macro */ + MANDOCERR_OP_EMPTY, /* missing option string, using "": OP */ + MANDOCERR_UR_NOHEAD, /* missing resource identifier, using "": UR */ + MANDOCERR_EQN_NOBOX, /* missing eqn box, using "": op */ + + /* related to bad arguments */ + MANDOCERR_ARG_REP, /* duplicate argument: macro arg */ + MANDOCERR_AN_REP, /* skipping duplicate argument: An -arg */ + MANDOCERR_BD_REP, /* skipping duplicate display type: Bd -type */ + MANDOCERR_BL_REP, /* skipping duplicate list type: Bl -type */ + MANDOCERR_BL_SKIPW, /* skipping -width argument: Bl -type */ + MANDOCERR_BL_COL, /* wrong number of cells */ + MANDOCERR_AT_BAD, /* unknown AT&T UNIX version: At version */ + MANDOCERR_FA_COMMA, /* comma in function argument: arg */ + MANDOCERR_FN_PAREN, /* parenthesis in function name: arg */ + MANDOCERR_LB_BAD, /* unknown library name: Lb ... */ + MANDOCERR_RS_BAD, /* invalid content in Rs block: macro */ + MANDOCERR_SM_BAD, /* invalid Boolean argument: macro arg */ + MANDOCERR_CHAR_FONT, /* argument contains two font escapes */ + MANDOCERR_FT_BAD, /* unknown font, skipping request: ft font */ + MANDOCERR_TR_ODD, /* odd number of characters in request: tr char */ + + /* related to plain text */ + MANDOCERR_FI_BLANK, /* blank line in fill mode, using .sp */ + MANDOCERR_FI_TAB, /* tab in filled text */ + MANDOCERR_EOS, /* new sentence, new line */ + MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */ + MANDOCERR_ESC_UNDEF, /* undefined escape, printing literally: char */ + MANDOCERR_STR_UNDEF, /* undefined string, using "": name */ + + /* related to tables */ + MANDOCERR_TBLLAYOUT_SPAN, /* tbl line starts with span */ + MANDOCERR_TBLLAYOUT_DOWN, /* tbl column starts with span */ + MANDOCERR_TBLLAYOUT_VERT, /* skipping vertical bar in tbl layout */ + + MANDOCERR_ERROR, /* ===== start of errors ===== */ + + /* related to tables */ + MANDOCERR_TBLOPT_ALPHA, /* non-alphabetic character in tbl options */ + MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */ + MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument: option */ + MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size: option */ + MANDOCERR_TBLLAYOUT_NONE, /* empty tbl layout */ + MANDOCERR_TBLLAYOUT_CHAR, /* invalid character in tbl layout: char */ + MANDOCERR_TBLLAYOUT_PAR, /* unmatched parenthesis in tbl layout */ + MANDOCERR_TBLDATA_NONE, /* tbl without any data cells */ + MANDOCERR_TBLDATA_SPAN, /* ignoring data in spanned tbl cell: data */ + MANDOCERR_TBLDATA_EXTRA, /* ignoring extra tbl data cells: data */ + MANDOCERR_TBLDATA_BLK, /* data block open at end of tbl: macro */ + + /* related to document structure and macros */ + MANDOCERR_PROLOG_REP, /* duplicate prologue macro: macro */ + MANDOCERR_DT_LATE, /* skipping late title macro: Dt args */ + MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ + MANDOCERR_CHAR_BAD, /* skipping bad character: number */ + MANDOCERR_MACRO, /* skipping unknown macro: macro */ + MANDOCERR_REQ_NOMAC, /* skipping request outside macro: ... */ + MANDOCERR_REQ_INSEC, /* skipping insecure request: request */ + MANDOCERR_IT_STRAY, /* skipping item outside list: It ... */ + MANDOCERR_TA_STRAY, /* skipping column outside column list: Ta */ + MANDOCERR_BLK_NOTOPEN, /* skipping end of block that is not open */ + MANDOCERR_RE_NOTOPEN, /* fewer RS blocks open, skipping: RE arg */ + MANDOCERR_BLK_BROKEN, /* inserting missing end of block: macro ... */ + MANDOCERR_BLK_NOEND, /* appending missing end of block: macro */ + + /* related to request and macro arguments */ + MANDOCERR_NAMESC, /* escaped character not allowed in a name: name */ + MANDOCERR_ARG_UNDEF, /* using macro argument outside macro */ + MANDOCERR_ARG_NONUM, /* argument number is not numeric */ + MANDOCERR_BD_FILE, /* NOT IMPLEMENTED: Bd -file */ + MANDOCERR_BD_NOARG, /* skipping display without arguments: Bd */ + MANDOCERR_BL_NOTYPE, /* missing list type, using -item: Bl */ + MANDOCERR_CE_NONUM, /* argument is not numeric, using 1: ce ... */ + MANDOCERR_CHAR_ARG, /* argument is not a character: char ... */ + MANDOCERR_NM_NONAME, /* missing manual name, using "": Nm */ + MANDOCERR_OS_UNAME, /* uname(3) system call failed, using UNKNOWN */ + MANDOCERR_ST_BAD, /* unknown standard specifier: St standard */ + MANDOCERR_IT_NONUM, /* skipping request without numeric argument */ + MANDOCERR_SHIFT, /* excessive shift: ..., but max is ... */ + MANDOCERR_SO_PATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */ + MANDOCERR_SO_FAIL, /* .so request failed */ + MANDOCERR_TG_SPC, /* skipping tag containing whitespace: tag */ + MANDOCERR_ARG_SKIP, /* skipping all arguments: macro args */ + MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */ + MANDOCERR_DIVZERO, /* divide by zero */ + + MANDOCERR_UNSUPP, /* ===== start of unsupported features ===== */ + + MANDOCERR_TOOLARGE, /* input too large */ + MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */ + MANDOCERR_ESC_UNSUPP, /* unsupported escape sequence: escape */ + MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */ + MANDOCERR_WHILE_NEST, /* nested .while loops */ + MANDOCERR_WHILE_OUTOF, /* end of scope with open .while loop */ + MANDOCERR_WHILE_INTO, /* end of .while loop in inner scope */ + MANDOCERR_WHILE_FAIL, /* cannot continue this .while loop */ + MANDOCERR_TBLOPT_EQN, /* eqn delim option in tbl: arg */ + MANDOCERR_TBLLAYOUT_MOD, /* unsupported tbl layout modifier: m */ + MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */ + + MANDOCERR_BADARG, /* ===== start of bad invocations ===== */ + + MANDOCERR_BADARG_BAD, /* bad argument */ + MANDOCERR_BADARG_DUPE, /* duplicate argument */ + MANDOCERR_BADVAL, /* does not take a value */ + MANDOCERR_BADVAL_MISS, /* missing argument value */ + MANDOCERR_BADVAL_BAD, /* bad argument value */ + MANDOCERR_BADVAL_DUPE, /* duplicate argument value */ + MANDOCERR_TAG, /* no such tag */ + + MANDOCERR_SYSERR, /* ===== start of system errors ===== */ + + MANDOCERR_DUP, + MANDOCERR_EXEC, + MANDOCERR_FDOPEN, + MANDOCERR_FFLUSH, + MANDOCERR_FORK, + MANDOCERR_FSTAT, + MANDOCERR_GETLINE, + MANDOCERR_GLOB, + MANDOCERR_GZCLOSE, + MANDOCERR_GZDOPEN, + MANDOCERR_MKSTEMP, + MANDOCERR_OPEN, + MANDOCERR_PLEDGE, + MANDOCERR_READ, + MANDOCERR_WAIT, + MANDOCERR_WRITE, + + MANDOCERR_MAX +}; + +enum mandoc_esc { + ESCAPE_ERROR = 0, /* bail! unparsable escape */ + ESCAPE_UNSUPP, /* unsupported escape; ignore it */ + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_UNDEF, /* undefined escape; print literal character */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_FONT, /* a generic font mode */ + ESCAPE_FONTBOLD, /* bold font mode */ + ESCAPE_FONTITALIC, /* italic font mode */ + ESCAPE_FONTBI, /* bold italic font mode */ + ESCAPE_FONTROMAN, /* roman font mode */ + ESCAPE_FONTCW, /* constant width font mode */ + ESCAPE_FONTPREV, /* previous font mode */ + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_UNICODE, /* a unicode codepoint */ + ESCAPE_DEVICE, /* print the output device name */ + ESCAPE_BREAK, /* break the output line */ + ESCAPE_NOSPACE, /* suppress space if the last on a line */ + ESCAPE_HORIZ, /* horizontal movement */ + ESCAPE_HLINE, /* horizontal line drawing */ + ESCAPE_SKIPCHAR, /* skip the next character */ + ESCAPE_OVERSTRIKE /* overstrike all chars in the argument */ +}; + + +enum mandoc_esc mandoc_font(const char *, int); +enum mandoc_esc mandoc_escape(const char **, const char **, int *); +void mandoc_msg_setoutfile(FILE *); +const char *mandoc_msg_getinfilename(void); +void mandoc_msg_setinfilename(const char *); +enum mandocerr mandoc_msg_getmin(void); +void mandoc_msg_setmin(enum mandocerr); +enum mandoclevel mandoc_msg_getrc(void); +void mandoc_msg_setrc(enum mandoclevel); +void mandoc_msg(enum mandocerr, int, int, const char *, ...) + __attribute__((__format__ (__printf__, 4, 5))); +void mandoc_msg_summary(void); +void mchars_alloc(void); +void mchars_free(void); +int mchars_num2char(const char *, size_t); +const char *mchars_uc2str(int); +int mchars_num2uc(const char *, size_t); +int mchars_spec2cp(const char *, size_t); +const char *mchars_spec2str(const char *, size_t, size_t *); diff --git a/usr.bin/mandoc/mandoc_aux.c b/usr.bin/mandoc/mandoc_aux.c new file mode 100644 index 0000000..7c23ecf --- /dev/null +++ b/usr.bin/mandoc/mandoc_aux.c @@ -0,0 +1,113 @@ +/* $OpenBSD: mandoc_aux.c,v 1.9 2018/02/07 20:04:33 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <err.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" + +int +mandoc_asprintf(char **dest, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vasprintf(dest, fmt, ap); + va_end(ap); + + if (ret == -1) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ret; +} + +void * +mandoc_calloc(size_t num, size_t size) +{ + void *ptr; + + ptr = calloc(num, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_malloc(size_t size) +{ + void *ptr; + + ptr = malloc(size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_realloc(void *ptr, size_t size) +{ + ptr = realloc(ptr, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_reallocarray(void *ptr, size_t num, size_t size) +{ + ptr = reallocarray(ptr, num, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_recallocarray(void *ptr, size_t oldnum, size_t num, size_t size) +{ + ptr = recallocarray(ptr, oldnum, num, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +char * +mandoc_strdup(const char *ptr) +{ + char *p; + + p = strdup(ptr); + if (p == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return p; +} + +char * +mandoc_strndup(const char *ptr, size_t sz) +{ + char *p; + + p = strndup(ptr, sz); + if (p == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return p; +} diff --git a/usr.bin/mandoc/mandoc_aux.h b/usr.bin/mandoc/mandoc_aux.h new file mode 100644 index 0000000..f535d85 --- /dev/null +++ b/usr.bin/mandoc/mandoc_aux.h @@ -0,0 +1,27 @@ +/* $OpenBSD: mandoc_aux.h,v 1.9 2017/06/12 18:55:42 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +int mandoc_asprintf(char **, const char *, ...) + __attribute__((__format__ (__printf__, 2, 3))); +void *mandoc_calloc(size_t, size_t); +void *mandoc_malloc(size_t); +void *mandoc_realloc(void *, size_t); +void *mandoc_reallocarray(void *, size_t, size_t); +void *mandoc_recallocarray(void *, size_t, size_t, size_t); +char *mandoc_strdup(const char *); +char *mandoc_strndup(const char *, size_t); diff --git a/usr.bin/mandoc/mandoc_msg.c b/usr.bin/mandoc/mandoc_msg.c new file mode 100644 index 0000000..a9334ae --- /dev/null +++ b/usr.bin/mandoc/mandoc_msg.c @@ -0,0 +1,368 @@ +/* $OpenBSD: mandoc_msg.c,v 1.9 2020/04/24 11:58:02 schwarze Exp $ */ +/* + * Copyright (c) 2014-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Implementation of warning and error messages for mandoc(1). + */ +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mandoc.h" + +static const enum mandocerr lowest_type[MANDOCLEVEL_MAX] = { + MANDOCERR_OK, + MANDOCERR_OK, + MANDOCERR_WARNING, + MANDOCERR_ERROR, + MANDOCERR_UNSUPP, + MANDOCERR_BADARG, + MANDOCERR_SYSERR +}; + +static const char *const level_name[MANDOCLEVEL_MAX] = { + "SUCCESS", + "STYLE", + "WARNING", + "ERROR", + "UNSUPP", + "BADARG", + "SYSERR" +}; + +static const char *const type_message[MANDOCERR_MAX] = { + "ok", + + "base system convention", + + "Mdocdate found", + "Mdocdate missing", + "unknown architecture", + "operating system explicitly specified", + "RCS id missing", + "referenced manual not found", + + "generic style suggestion", + + "legacy man(7) date format", + "normalizing date format to", + "lower case character in document title", + "duplicate RCS id", + "possible typo in section name", + "unterminated quoted argument", + "useless macro", + "consider using OS macro", + "errnos out of order", + "duplicate errno", + "trailing delimiter", + "no blank before trailing delimiter", + "fill mode already enabled, skipping", + "fill mode already disabled, skipping", + "verbatim \"--\", maybe consider using \\(em", + "function name without markup", + "whitespace at end of input line", + "bad comment style", + + "generic warning", + + /* related to the prologue */ + "missing manual title, using UNTITLED", + "missing manual title, using \"\"", + "missing manual section, using \"\"", + "unknown manual section", + "filename/section mismatch", + "missing date, using \"\"", + "cannot parse date, using it verbatim", + "date in the future, using it anyway", + "missing Os macro, using \"\"", + "late prologue macro", + "prologue macros out of order", + + /* related to document structure */ + ".so is fragile, better use ln(1)", + "no document body", + "content before first section header", + "first section is not \"NAME\"", + "NAME section without Nm before Nd", + "NAME section without description", + "description not at the end of NAME", + "bad NAME section content", + "missing comma before name", + "missing description line, using \"\"", + "description line outside NAME section", + "sections out of conventional order", + "duplicate section title", + "unexpected section", + "cross reference to self", + "unusual Xr order", + "unusual Xr punctuation", + "AUTHORS section without An macro", + + /* related to macros and nesting */ + "obsolete macro", + "macro neither callable nor escaped", + "skipping paragraph macro", + "moving paragraph macro out of list", + "skipping no-space macro", + "blocks badly nested", + "nested displays are not portable", + "moving content out of list", + "first macro on line", + "line scope broken", + "skipping blank line in line scope", + + /* related to missing macro arguments */ + "skipping empty request", + "conditional request controls empty scope", + "skipping empty macro", + "empty block", + "empty argument, using 0n", + "missing display type, using -ragged", + "list type is not the first argument", + "missing -width in -tag list, using 6n", + "missing utility name, using \"\"", + "missing function name, using \"\"", + "empty head in list item", + "empty list item", + "missing argument, using next line", + "missing font type, using \\fR", + "unknown font type, using \\fR", + "nothing follows prefix", + "empty reference block", + "missing section argument", + "missing -std argument, adding it", + "missing option string, using \"\"", + "missing resource identifier, using \"\"", + "missing eqn box, using \"\"", + + /* related to bad macro arguments */ + "duplicate argument", + "skipping duplicate argument", + "skipping duplicate display type", + "skipping duplicate list type", + "skipping -width argument", + "wrong number of cells", + "unknown AT&T UNIX version", + "comma in function argument", + "parenthesis in function name", + "unknown library name", + "invalid content in Rs block", + "invalid Boolean argument", + "argument contains two font escapes", + "unknown font, skipping request", + "odd number of characters in request", + + /* related to plain text */ + "blank line in fill mode, using .sp", + "tab in filled text", + "new sentence, new line", + "invalid escape sequence", + "undefined escape, printing literally", + "undefined string, using \"\"", + + /* related to tables */ + "tbl line starts with span", + "tbl column starts with span", + "skipping vertical bar in tbl layout", + + "generic error", + + /* related to tables */ + "non-alphabetic character in tbl options", + "skipping unknown tbl option", + "missing tbl option argument", + "wrong tbl option argument size", + "empty tbl layout", + "invalid character in tbl layout", + "unmatched parenthesis in tbl layout", + "tbl without any data cells", + "ignoring data in spanned tbl cell", + "ignoring extra tbl data cells", + "data block open at end of tbl", + + /* related to document structure and macros */ + "duplicate prologue macro", + "skipping late title macro", + "input stack limit exceeded, infinite loop?", + "skipping bad character", + "skipping unknown macro", + "ignoring request outside macro", + "skipping insecure request", + "skipping item outside list", + "skipping column outside column list", + "skipping end of block that is not open", + "fewer RS blocks open, skipping", + "inserting missing end of block", + "appending missing end of block", + + /* related to request and macro arguments */ + "escaped character not allowed in a name", + "using macro argument outside macro", + "argument number is not numeric", + "NOT IMPLEMENTED: Bd -file", + "skipping display without arguments", + "missing list type, using -item", + "argument is not numeric, using 1", + "argument is not a character", + "missing manual name, using \"\"", + "uname(3) system call failed, using UNKNOWN", + "unknown standard specifier", + "skipping request without numeric argument", + "excessive shift", + "NOT IMPLEMENTED: .so with absolute path or \"..\"", + ".so request failed", + "skipping tag containing whitespace", + "skipping all arguments", + "skipping excess arguments", + "divide by zero", + + "unsupported feature", + "input too large", + "unsupported control character", + "unsupported escape sequence", + "unsupported roff request", + "nested .while loops", + "end of scope with open .while loop", + "end of .while loop in inner scope", + "cannot continue this .while loop", + "eqn delim option in tbl", + "unsupported tbl layout modifier", + "ignoring macro in table", + + /* bad command line arguments */ + NULL, + "bad command line argument", + "duplicate command line argument", + "option has a superfluous value", + "missing option value", + "bad option value", + "duplicate option value", + "no such tag", + + /* system errors */ + NULL, + "dup", + "exec", + "fdopen", + "fflush", + "fork", + "fstat", + "getline", + "glob", + "gzclose", + "gzdopen", + "mkstemp", + "open", + "pledge", + "read", + "wait", + "write", +}; + +static FILE *fileptr = NULL; +static const char *filename = NULL; +static enum mandocerr min_type = MANDOCERR_BADARG; +static enum mandoclevel rc = MANDOCLEVEL_OK; + + +void +mandoc_msg_setoutfile(FILE *fp) +{ + fileptr = fp; +} + +const char * +mandoc_msg_getinfilename(void) +{ + return filename; +} + +void +mandoc_msg_setinfilename(const char *fn) +{ + filename = fn; +} + +enum mandocerr +mandoc_msg_getmin(void) +{ + return min_type; +} + +void +mandoc_msg_setmin(enum mandocerr t) +{ + min_type = t; +} + +enum mandoclevel +mandoc_msg_getrc(void) +{ + return rc; +} + +void +mandoc_msg_setrc(enum mandoclevel level) +{ + if (rc < level) + rc = level; +} + +void +mandoc_msg(enum mandocerr t, int line, int col, const char *fmt, ...) +{ + va_list ap; + enum mandoclevel level; + + if (t < min_type) + return; + + level = MANDOCLEVEL_SYSERR; + while (t < lowest_type[level]) + level--; + mandoc_msg_setrc(level); + + if (fileptr == NULL) + return; + + fprintf(fileptr, "%s:", getprogname()); + if (filename != NULL) + fprintf(fileptr, " %s:", filename); + + if (line > 0) + fprintf(fileptr, "%d:%d:", line, col + 1); + + fprintf(fileptr, " %s", level_name[level]); + if (type_message[t] != NULL) + fprintf(fileptr, ": %s", type_message[t]); + + if (fmt != NULL) { + fprintf(fileptr, ": "); + va_start(ap, fmt); + vfprintf(fileptr, fmt, ap); + va_end(ap); + } + fputc('\n', fileptr); +} + +void +mandoc_msg_summary(void) +{ + if (fileptr != NULL && rc != MANDOCLEVEL_OK) + fprintf(fileptr, + "%s: see above the output for %s messages\n", + getprogname(), level_name[rc]); +} diff --git a/usr.bin/mandoc/mandoc_ohash.c b/usr.bin/mandoc/mandoc_ohash.c new file mode 100644 index 0000000..9557caf --- /dev/null +++ b/usr.bin/mandoc/mandoc_ohash.c @@ -0,0 +1,64 @@ +/* $OpenBSD: mandoc_ohash.c,v 1.2 2015/10/19 18:58:20 schwarze Exp $ */ +/* + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/cdefs.h> +#include <sys/types.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" + +static void *hash_alloc(size_t, void *); +static void *hash_calloc(size_t, size_t, void *); +static void hash_free(void *, void *); + + +void +mandoc_ohash_init(struct ohash *h, unsigned int sz, ptrdiff_t ko) +{ + struct ohash_info info; + + info.alloc = hash_alloc; + info.calloc = hash_calloc; + info.free = hash_free; + info.data = NULL; + info.key_offset = ko; + + ohash_init(h, sz, &info); +} + +static void * +hash_alloc(size_t sz, void *arg) +{ + + return mandoc_malloc(sz); +} + +static void * +hash_calloc(size_t n, size_t sz, void *arg) +{ + + return mandoc_calloc(n, sz); +} + +static void +hash_free(void *p, void *arg) +{ + + free(p); +} diff --git a/usr.bin/mandoc/mandoc_ohash.h b/usr.bin/mandoc/mandoc_ohash.h new file mode 100644 index 0000000..0ea2d7a --- /dev/null +++ b/usr.bin/mandoc/mandoc_ohash.h @@ -0,0 +1,19 @@ +/* $OpenBSD: mandoc_ohash.h,v 1.2 2015/11/07 13:57:55 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <ohash.h> + +void mandoc_ohash_init(struct ohash *, unsigned int, ptrdiff_t); diff --git a/usr.bin/mandoc/mandoc_parse.h b/usr.bin/mandoc/mandoc_parse.h new file mode 100644 index 0000000..1821c1e --- /dev/null +++ b/usr.bin/mandoc/mandoc_parse.h @@ -0,0 +1,44 @@ +/* $OpenBSD: mandoc_parse.h,v 1.4 2019/11/09 14:39:42 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014,2015,2016,2017,2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Top level parser interface. For use in the main program + * and in the main parser, but not in formatters. + */ + +/* + * Parse options. + */ +#define MPARSE_MDOC (1 << 0) /* assume -mdoc */ +#define MPARSE_MAN (1 << 1) /* assume -man */ +#define MPARSE_SO (1 << 2) /* honour .so requests */ +#define MPARSE_QUICK (1 << 3) /* abort the parse early */ +#define MPARSE_UTF8 (1 << 4) /* accept UTF-8 input */ +#define MPARSE_LATIN1 (1 << 5) /* accept ISO-LATIN-1 input */ +#define MPARSE_VALIDATE (1 << 6) /* call validation functions */ +#define MPARSE_COMMENT (1 << 7) /* save comments in the tree */ + + +struct roff_meta; +struct mparse; + +struct mparse *mparse_alloc(int, enum mandoc_os, const char *); +void mparse_copy(const struct mparse *); +void mparse_free(struct mparse *); +int mparse_open(struct mparse *, const char *); +void mparse_readfd(struct mparse *, int, const char *); +void mparse_reset(struct mparse *); +struct roff_meta *mparse_result(struct mparse *); diff --git a/usr.bin/mandoc/mandoc_xr.c b/usr.bin/mandoc/mandoc_xr.c new file mode 100644 index 0000000..3f79da3 --- /dev/null +++ b/usr.bin/mandoc/mandoc_xr.c @@ -0,0 +1,122 @@ +/* $OpenBSD: mandoc_xr.c,v 1.3 2017/07/02 21:17:12 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mandoc_xr.h" + +static struct ohash *xr_hash = NULL; +static struct mandoc_xr *xr_first = NULL; +static struct mandoc_xr *xr_last = NULL; + +static void mandoc_xr_clear(void); + + +static void +mandoc_xr_clear(void) +{ + struct mandoc_xr *xr; + unsigned int slot; + + if (xr_hash == NULL) + return; + for (xr = ohash_first(xr_hash, &slot); xr != NULL; + xr = ohash_next(xr_hash, &slot)) + free(xr); + ohash_delete(xr_hash); +} + +void +mandoc_xr_reset(void) +{ + if (xr_hash == NULL) + xr_hash = mandoc_malloc(sizeof(*xr_hash)); + else + mandoc_xr_clear(); + mandoc_ohash_init(xr_hash, 5, + offsetof(struct mandoc_xr, hashkey)); + xr_first = xr_last = NULL; +} + +int +mandoc_xr_add(const char *sec, const char *name, int line, int pos) +{ + struct mandoc_xr *xr, *oxr; + const char *pend; + size_t ssz, nsz, tsz; + unsigned int slot; + int ret; + uint32_t hv; + + if (xr_hash == NULL) + return 0; + + ssz = strlen(sec) + 1; + nsz = strlen(name) + 1; + tsz = ssz + nsz; + xr = mandoc_malloc(sizeof(*xr) + tsz); + xr->next = NULL; + xr->sec = xr->hashkey; + xr->name = xr->hashkey + ssz; + xr->line = line; + xr->pos = pos; + xr->count = 1; + memcpy(xr->sec, sec, ssz); + memcpy(xr->name, name, nsz); + + pend = xr->hashkey + tsz; + hv = ohash_interval(xr->hashkey, &pend); + slot = ohash_lookup_memory(xr_hash, xr->hashkey, tsz, hv); + if ((oxr = ohash_find(xr_hash, slot)) == NULL) { + ohash_insert(xr_hash, slot, xr); + if (xr_first == NULL) + xr_first = xr; + else + xr_last->next = xr; + xr_last = xr; + return 0; + } + + oxr->count++; + ret = (oxr->line == -1) ^ (xr->line == -1); + if (xr->line == -1) + oxr->line = -1; + free(xr); + return ret; +} + +struct mandoc_xr * +mandoc_xr_get(void) +{ + return xr_first; +} + +void +mandoc_xr_free(void) +{ + mandoc_xr_clear(); + free(xr_hash); + xr_hash = NULL; +} diff --git a/usr.bin/mandoc/mandoc_xr.h b/usr.bin/mandoc/mandoc_xr.h new file mode 100644 index 0000000..708f502 --- /dev/null +++ b/usr.bin/mandoc/mandoc_xr.h @@ -0,0 +1,31 @@ +/* $OpenBSD: mandoc_xr.h,v 1.3 2017/07/02 21:17:12 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct mandoc_xr { + struct mandoc_xr *next; + char *sec; + char *name; + int line; /* Or -1 for this page's own names. */ + int pos; + int count; + char hashkey[]; +}; + +void mandoc_xr_reset(void); +int mandoc_xr_add(const char *, const char *, int, int); +struct mandoc_xr *mandoc_xr_get(void); +void mandoc_xr_free(void); diff --git a/usr.bin/mandoc/mandocdb.c b/usr.bin/mandoc/mandocdb.c new file mode 100644 index 0000000..6e7efae --- /dev/null +++ b/usr.bin/mandoc/mandocdb.c @@ -0,0 +1,2386 @@ +/* $OpenBSD: mandocdb.c,v 1.216 2020/04/03 11:34:19 schwarze Exp $ */ +/* + * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2016 Ed Maste <emaste@freebsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Implementation of the makewhatis(8) program. + */ +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <assert.h> +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <fts.h> +#include <limits.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "mandoc_parse.h" +#include "manconf.h" +#include "mansearch.h" +#include "dba_array.h" +#include "dba.h" + +extern const char *const mansearch_keynames[]; + +enum op { + OP_DEFAULT = 0, /* new dbs from dir list or default config */ + OP_CONFFILE, /* new databases from custom config file */ + OP_UPDATE, /* delete/add entries in existing database */ + OP_DELETE, /* delete entries from existing database */ + OP_TEST /* change no databases, report potential problems */ +}; + +struct str { + const struct mpage *mpage; /* if set, the owning parse */ + uint64_t mask; /* bitmask in sequence */ + char key[]; /* rendered text */ +}; + +struct inodev { + ino_t st_ino; + dev_t st_dev; +}; + +struct mpage { + struct inodev inodev; /* used for hashing routine */ + struct dba_array *dba; + char *sec; /* section from file content */ + char *arch; /* architecture from file content */ + char *title; /* title from file content */ + char *desc; /* description from file content */ + struct mpage *next; /* singly linked list */ + struct mlink *mlinks; /* singly linked list */ + int name_head_done; + enum form form; /* format from file content */ +}; + +struct mlink { + char file[PATH_MAX]; /* filename rel. to manpath */ + char *dsec; /* section from directory */ + char *arch; /* architecture from directory */ + char *name; /* name from file name (not empty) */ + char *fsec; /* section from file name suffix */ + struct mlink *next; /* singly linked list */ + struct mpage *mpage; /* parent */ + int gzip; /* filename has a .gz suffix */ + enum form dform; /* format from directory */ + enum form fform; /* format from file name suffix */ +}; + +typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, + const struct roff_node *); + +struct mdoc_handler { + mdoc_fp fp; /* optional handler */ + uint64_t mask; /* set unless handler returns 0 */ + int taboo; /* node flags that must not be set */ +}; + + +int mandocdb(int, char *[]); + +static void dbadd(struct dba *, struct mpage *); +static void dbadd_mlink(const struct mlink *); +static void dbprune(struct dba *); +static void dbwrite(struct dba *); +static void filescan(const char *); +static int fts_compare(const FTSENT **, const FTSENT **); +static void mlink_add(struct mlink *, const struct stat *); +static void mlink_check(struct mpage *, struct mlink *); +static void mlink_free(struct mlink *); +static void mlinks_undupe(struct mpage *); +static void mpages_free(void); +static void mpages_merge(struct dba *, struct mparse *); +static void parse_cat(struct mpage *, int); +static void parse_man(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static void parse_mdoc(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_head(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Fa(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static void parse_mdoc_fname(struct mpage *, const struct roff_node *); +static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static void putkey(const struct mpage *, char *, uint64_t); +static void putkeys(const struct mpage *, char *, size_t, uint64_t); +static void putmdockey(const struct mpage *, + const struct roff_node *, uint64_t, int); +static int render_string(char **, size_t *); +static void say(const char *, const char *, ...) + __attribute__((__format__ (__printf__, 2, 3))); +static int set_basedir(const char *, int); +static int treescan(void); +static size_t utf8(unsigned int, char [7]); + +static int nodb; /* no database changes */ +static int mparse_options; /* abort the parse early */ +static int use_all; /* use all found files */ +static int debug; /* print what we're doing */ +static int warnings; /* warn about crap */ +static int write_utf8; /* write UTF-8 output; else ASCII */ +static int exitcode; /* to be returned by main */ +static enum op op; /* operational mode */ +static char basedir[PATH_MAX]; /* current base directory */ +static size_t basedir_len; /* strlen(basedir) */ +static struct mpage *mpage_head; /* list of distinct manual pages */ +static struct ohash mpages; /* table of distinct manual pages */ +static struct ohash mlinks; /* table of directory entries */ +static struct ohash names; /* table of all names */ +static struct ohash strings; /* table of all strings */ +static uint64_t name_mask; + +static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = { + { NULL, 0, NODE_NOPRT }, /* Dd */ + { NULL, 0, NODE_NOPRT }, /* Dt */ + { NULL, 0, NODE_NOPRT }, /* Os */ + { parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */ + { parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */ + { NULL, 0, 0 }, /* Pp */ + { NULL, 0, 0 }, /* D1 */ + { NULL, 0, 0 }, /* Dl */ + { NULL, 0, 0 }, /* Bd */ + { NULL, 0, 0 }, /* Ed */ + { NULL, 0, 0 }, /* Bl */ + { NULL, 0, 0 }, /* El */ + { NULL, 0, 0 }, /* It */ + { NULL, 0, 0 }, /* Ad */ + { NULL, TYPE_An, 0 }, /* An */ + { NULL, 0, 0 }, /* Ap */ + { NULL, TYPE_Ar, 0 }, /* Ar */ + { NULL, TYPE_Cd, 0 }, /* Cd */ + { NULL, TYPE_Cm, 0 }, /* Cm */ + { NULL, TYPE_Dv, 0 }, /* Dv */ + { NULL, TYPE_Er, 0 }, /* Er */ + { NULL, TYPE_Ev, 0 }, /* Ev */ + { NULL, 0, 0 }, /* Ex */ + { parse_mdoc_Fa, 0, 0 }, /* Fa */ + { parse_mdoc_Fd, 0, 0 }, /* Fd */ + { NULL, TYPE_Fl, 0 }, /* Fl */ + { parse_mdoc_Fn, 0, 0 }, /* Fn */ + { NULL, TYPE_Ft | TYPE_Vt, 0 }, /* Ft */ + { NULL, TYPE_Ic, 0 }, /* Ic */ + { NULL, TYPE_In, 0 }, /* In */ + { NULL, TYPE_Li, 0 }, /* Li */ + { parse_mdoc_Nd, 0, 0 }, /* Nd */ + { parse_mdoc_Nm, 0, 0 }, /* Nm */ + { NULL, 0, 0 }, /* Op */ + { NULL, 0, 0 }, /* Ot */ + { NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */ + { NULL, 0, 0 }, /* Rv */ + { NULL, TYPE_St, 0 }, /* St */ + { parse_mdoc_Va, TYPE_Va, 0 }, /* Va */ + { parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */ + { parse_mdoc_Xr, 0, 0 }, /* Xr */ + { NULL, 0, 0 }, /* %A */ + { NULL, 0, 0 }, /* %B */ + { NULL, 0, 0 }, /* %D */ + { NULL, 0, 0 }, /* %I */ + { NULL, 0, 0 }, /* %J */ + { NULL, 0, 0 }, /* %N */ + { NULL, 0, 0 }, /* %O */ + { NULL, 0, 0 }, /* %P */ + { NULL, 0, 0 }, /* %R */ + { NULL, 0, 0 }, /* %T */ + { NULL, 0, 0 }, /* %V */ + { NULL, 0, 0 }, /* Ac */ + { NULL, 0, 0 }, /* Ao */ + { NULL, 0, 0 }, /* Aq */ + { NULL, TYPE_At, 0 }, /* At */ + { NULL, 0, 0 }, /* Bc */ + { NULL, 0, 0 }, /* Bf */ + { NULL, 0, 0 }, /* Bo */ + { NULL, 0, 0 }, /* Bq */ + { NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */ + { NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */ + { NULL, 0, 0 }, /* Db */ + { NULL, 0, 0 }, /* Dc */ + { NULL, 0, 0 }, /* Do */ + { NULL, 0, 0 }, /* Dq */ + { NULL, 0, 0 }, /* Ec */ + { NULL, 0, 0 }, /* Ef */ + { NULL, TYPE_Em, 0 }, /* Em */ + { NULL, 0, 0 }, /* Eo */ + { NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */ + { NULL, TYPE_Ms, 0 }, /* Ms */ + { NULL, 0, 0 }, /* No */ + { NULL, 0, 0 }, /* Ns */ + { NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */ + { NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */ + { NULL, 0, 0 }, /* Pc */ + { NULL, 0, 0 }, /* Pf */ + { NULL, 0, 0 }, /* Po */ + { NULL, 0, 0 }, /* Pq */ + { NULL, 0, 0 }, /* Qc */ + { NULL, 0, 0 }, /* Ql */ + { NULL, 0, 0 }, /* Qo */ + { NULL, 0, 0 }, /* Qq */ + { NULL, 0, 0 }, /* Re */ + { NULL, 0, 0 }, /* Rs */ + { NULL, 0, 0 }, /* Sc */ + { NULL, 0, 0 }, /* So */ + { NULL, 0, 0 }, /* Sq */ + { NULL, 0, 0 }, /* Sm */ + { NULL, 0, 0 }, /* Sx */ + { NULL, TYPE_Sy, 0 }, /* Sy */ + { NULL, TYPE_Tn, 0 }, /* Tn */ + { NULL, 0, NODE_NOSRC }, /* Ux */ + { NULL, 0, 0 }, /* Xc */ + { NULL, 0, 0 }, /* Xo */ + { parse_mdoc_Fo, 0, 0 }, /* Fo */ + { NULL, 0, 0 }, /* Fc */ + { NULL, 0, 0 }, /* Oo */ + { NULL, 0, 0 }, /* Oc */ + { NULL, 0, 0 }, /* Bk */ + { NULL, 0, 0 }, /* Ek */ + { NULL, 0, 0 }, /* Bt */ + { NULL, 0, 0 }, /* Hf */ + { NULL, 0, 0 }, /* Fr */ + { NULL, 0, 0 }, /* Ud */ + { NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */ + { NULL, 0, 0 }, /* Lp */ + { NULL, TYPE_Lk, 0 }, /* Lk */ + { NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */ + { NULL, 0, 0 }, /* Brq */ + { NULL, 0, 0 }, /* Bro */ + { NULL, 0, 0 }, /* Brc */ + { NULL, 0, 0 }, /* %C */ + { NULL, 0, 0 }, /* Es */ + { NULL, 0, 0 }, /* En */ + { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */ + { NULL, 0, 0 }, /* %Q */ + { NULL, 0, 0 }, /* %U */ + { NULL, 0, 0 }, /* Ta */ +}; + + +int +mandocdb(int argc, char *argv[]) +{ + struct manconf conf; + struct mparse *mp; + struct dba *dba; + const char *path_arg, *progname; + size_t j, sz; + int ch, i; + + if (pledge("stdio rpath wpath cpath", NULL) == -1) { + warn("pledge"); + return (int)MANDOCLEVEL_SYSERR; + } + + memset(&conf, 0, sizeof(conf)); + + /* + * We accept a few different invocations. + * The CHECKOP macro makes sure that invocation styles don't + * clobber each other. + */ +#define CHECKOP(_op, _ch) do \ + if ((_op) != OP_DEFAULT) { \ + warnx("-%c: Conflicting option", (_ch)); \ + goto usage; \ + } while (/*CONSTCOND*/0) + + mparse_options = MPARSE_VALIDATE; + path_arg = NULL; + op = OP_DEFAULT; + + while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1) + switch (ch) { + case 'a': + use_all = 1; + break; + case 'C': + CHECKOP(op, ch); + path_arg = optarg; + op = OP_CONFFILE; + break; + case 'D': + debug++; + break; + case 'd': + CHECKOP(op, ch); + path_arg = optarg; + op = OP_UPDATE; + break; + case 'n': + nodb = 1; + break; + case 'p': + warnings = 1; + break; + case 'Q': + mparse_options |= MPARSE_QUICK; + break; + case 'T': + if (strcmp(optarg, "utf8") != 0) { + warnx("-T%s: Unsupported output format", + optarg); + goto usage; + } + write_utf8 = 1; + break; + case 't': + CHECKOP(op, ch); + dup2(STDOUT_FILENO, STDERR_FILENO); + op = OP_TEST; + nodb = warnings = 1; + break; + case 'u': + CHECKOP(op, ch); + path_arg = optarg; + op = OP_DELETE; + break; + case 'v': + /* Compatibility with espie@'s makewhatis. */ + break; + default: + goto usage; + } + + argc -= optind; + argv += optind; + + if (nodb) { + if (pledge("stdio rpath", NULL) == -1) { + warn("pledge"); + return (int)MANDOCLEVEL_SYSERR; + } + } + + if (op == OP_CONFFILE && argc > 0) { + warnx("-C: Too many arguments"); + goto usage; + } + + exitcode = (int)MANDOCLEVEL_OK; + mchars_alloc(); + mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL); + mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); + mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); + + if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) { + + /* + * Most of these deal with a specific directory. + * Jump into that directory first. + */ + if (op != OP_TEST && set_basedir(path_arg, 1) == 0) + goto out; + + dba = nodb ? dba_new(128) : dba_read(MANDOC_DB); + if (dba != NULL) { + /* + * The existing database is usable. Process + * all files specified on the command-line. + */ + use_all = 1; + for (i = 0; i < argc; i++) + filescan(argv[i]); + if (nodb == 0) + dbprune(dba); + } else { + /* Database missing or corrupt. */ + if (op != OP_UPDATE || errno != ENOENT) + say(MANDOC_DB, "%s: Automatically recreating" + " from scratch", strerror(errno)); + exitcode = (int)MANDOCLEVEL_OK; + op = OP_DEFAULT; + if (treescan() == 0) + goto out; + dba = dba_new(128); + } + if (op != OP_DELETE) + mpages_merge(dba, mp); + if (nodb == 0) + dbwrite(dba); + dba_free(dba); + } else { + /* + * If we have arguments, use them as our manpaths. + * If we don't, use man.conf(5). + */ + if (argc > 0) { + conf.manpath.paths = mandoc_reallocarray(NULL, + argc, sizeof(char *)); + conf.manpath.sz = (size_t)argc; + for (i = 0; i < argc; i++) + conf.manpath.paths[i] = mandoc_strdup(argv[i]); + } else + manconf_parse(&conf, path_arg, NULL, NULL); + + if (conf.manpath.sz == 0) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "Empty manpath"); + } + + /* + * First scan the tree rooted at a base directory, then + * build a new database and finally move it into place. + * Ignore zero-length directories and strip trailing + * slashes. + */ + for (j = 0; j < conf.manpath.sz; j++) { + sz = strlen(conf.manpath.paths[j]); + if (sz && conf.manpath.paths[j][sz - 1] == '/') + conf.manpath.paths[j][--sz] = '\0'; + if (sz == 0) + continue; + + if (j) { + mandoc_ohash_init(&mpages, 6, + offsetof(struct mpage, inodev)); + mandoc_ohash_init(&mlinks, 6, + offsetof(struct mlink, file)); + } + + if (set_basedir(conf.manpath.paths[j], argc > 0) == 0) + continue; + if (treescan() == 0) + continue; + dba = dba_new(128); + mpages_merge(dba, mp); + if (nodb == 0) + dbwrite(dba); + dba_free(dba); + + if (j + 1 < conf.manpath.sz) { + mpages_free(); + ohash_delete(&mpages); + ohash_delete(&mlinks); + } + } + } +out: + manconf_free(&conf); + mparse_free(mp); + mchars_free(); + mpages_free(); + ohash_delete(&mpages); + ohash_delete(&mlinks); + return exitcode; +usage: + progname = getprogname(); + fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" + " %s [-aDnpQ] [-Tutf8] dir ...\n" + " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" + " %s [-Dnp] -u dir [file ...]\n" + " %s [-Q] -t file ...\n", + progname, progname, progname, progname, progname); + + return (int)MANDOCLEVEL_BADARG; +} + +/* + * To get a singly linked list in alpha order while inserting entries + * at the beginning, process directory entries in reverse alpha order. + */ +static int +fts_compare(const FTSENT **a, const FTSENT **b) +{ + return -strcmp((*a)->fts_name, (*b)->fts_name); +} + +/* + * Scan a directory tree rooted at "basedir" for manpages. + * We use fts(), scanning directory parts along the way for clues to our + * section and architecture. + * + * If use_all has been specified, grok all files. + * If not, sanitise paths to the following: + * + * [./]man*[/<arch>]/<name>.<section> + * or + * [./]cat<section>[/<arch>]/<name>.0 + * + * TODO: accommodate for multi-language directories. + */ +static int +treescan(void) +{ + char buf[PATH_MAX]; + FTS *f; + FTSENT *ff; + struct mlink *mlink; + int gzip; + enum form dform; + char *dsec, *arch, *fsec, *cp; + const char *path; + const char *argv[2]; + + argv[0] = "."; + argv[1] = NULL; + + f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR, + fts_compare); + if (f == NULL) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&fts_open"); + return 0; + } + + dsec = arch = NULL; + dform = FORM_NONE; + + while ((ff = fts_read(f)) != NULL) { + path = ff->fts_path + 2; + switch (ff->fts_info) { + + /* + * Symbolic links require various sanity checks, + * then get handled just like regular files. + */ + case FTS_SL: + if (realpath(path, buf) == NULL) { + if (warnings) + say(path, "&realpath"); + continue; + } + if (strncmp(buf, basedir, basedir_len) != 0) { + if (warnings) say("", + "%s: outside base directory", buf); + continue; + } + /* Use logical inode to avoid mpages dupe. */ + if (stat(path, ff->fts_statp) == -1) { + if (warnings) + say(path, "&stat"); + continue; + } + /* FALLTHROUGH */ + + /* + * If we're a regular file, add an mlink by using the + * stored directory data and handling the filename. + */ + case FTS_F: + if ( ! strcmp(path, MANDOC_DB)) + continue; + if ( ! use_all && ff->fts_level < 2) { + if (warnings) + say(path, "Extraneous file"); + continue; + } + gzip = 0; + fsec = NULL; + while (fsec == NULL) { + fsec = strrchr(ff->fts_name, '.'); + if (fsec == NULL || strcmp(fsec+1, "gz")) + break; + gzip = 1; + *fsec = '\0'; + fsec = NULL; + } + if (fsec == NULL) { + if ( ! use_all) { + if (warnings) + say(path, + "No filename suffix"); + continue; + } + } else if ( ! strcmp(++fsec, "html")) { + if (warnings) + say(path, "Skip html"); + continue; + } else if ( ! strcmp(fsec, "ps")) { + if (warnings) + say(path, "Skip ps"); + continue; + } else if ( ! strcmp(fsec, "pdf")) { + if (warnings) + say(path, "Skip pdf"); + continue; + } else if ( ! use_all && + ((dform == FORM_SRC && + strncmp(fsec, dsec, strlen(dsec))) || + (dform == FORM_CAT && strcmp(fsec, "0")))) { + if (warnings) + say(path, "Wrong filename suffix"); + continue; + } else + fsec[-1] = '\0'; + + mlink = mandoc_calloc(1, sizeof(struct mlink)); + if (strlcpy(mlink->file, path, + sizeof(mlink->file)) >= + sizeof(mlink->file)) { + say(path, "Filename too long"); + free(mlink); + continue; + } + mlink->dform = dform; + mlink->dsec = dsec; + mlink->arch = arch; + mlink->name = ff->fts_name; + mlink->fsec = fsec; + mlink->gzip = gzip; + mlink_add(mlink, ff->fts_statp); + continue; + + case FTS_D: + case FTS_DP: + break; + + default: + if (warnings) + say(path, "Not a regular file"); + continue; + } + + switch (ff->fts_level) { + case 0: + /* Ignore the root directory. */ + break; + case 1: + /* + * This might contain manX/ or catX/. + * Try to infer this from the name. + * If we're not in use_all, enforce it. + */ + cp = ff->fts_name; + if (ff->fts_info == FTS_DP) { + dform = FORM_NONE; + dsec = NULL; + break; + } + + if ( ! strncmp(cp, "man", 3)) { + dform = FORM_SRC; + dsec = cp + 3; + } else if ( ! strncmp(cp, "cat", 3)) { + dform = FORM_CAT; + dsec = cp + 3; + } else { + dform = FORM_NONE; + dsec = NULL; + } + + if (dsec != NULL || use_all) + break; + + if (warnings) + say(path, "Unknown directory part"); + fts_set(f, ff, FTS_SKIP); + break; + case 2: + /* + * Possibly our architecture. + * If we're descending, keep tabs on it. + */ + if (ff->fts_info != FTS_DP && dsec != NULL) + arch = ff->fts_name; + else + arch = NULL; + break; + default: + if (ff->fts_info == FTS_DP || use_all) + break; + if (warnings) + say(path, "Extraneous directory part"); + fts_set(f, ff, FTS_SKIP); + break; + } + } + + fts_close(f); + return 1; +} + +/* + * Add a file to the mlinks table. + * Do not verify that it's a "valid" looking manpage (we'll do that + * later). + * + * Try to infer the manual section, architecture, and page name from the + * path, assuming it looks like + * + * [./]man*[/<arch>]/<name>.<section> + * or + * [./]cat<section>[/<arch>]/<name>.0 + * + * See treescan() for the fts(3) version of this. + */ +static void +filescan(const char *infile) +{ + struct stat st; + struct mlink *mlink; + char *linkfile, *p, *realdir, *start, *usefile; + size_t realdir_len; + + assert(use_all); + + if (strncmp(infile, "./", 2) == 0) + infile += 2; + + /* + * We have to do lstat(2) before realpath(3) loses + * the information whether this is a symbolic link. + * We need to know that because for symbolic links, + * we want to use the orginal file name, while for + * regular files, we want to use the real path. + */ + if (lstat(infile, &st) == -1) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(infile, "&lstat"); + return; + } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(infile, "Not a regular file"); + return; + } + + /* + * We have to resolve the file name to the real path + * in any case for the base directory check. + */ + if ((usefile = realpath(infile, NULL)) == NULL) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(infile, "&realpath"); + return; + } + + if (op == OP_TEST) + start = usefile; + else if (strncmp(usefile, basedir, basedir_len) == 0) + start = usefile + basedir_len; + else { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "%s: outside base directory", infile); + free(usefile); + return; + } + + /* + * Now we are sure the file is inside our tree. + * If it is a symbolic link, ignore the real path + * and use the original name. + */ + do { + if (S_ISLNK(st.st_mode) == 0) + break; + + /* + * Some implementations of realpath(3) may succeed + * even if the target of the link does not exist, + * so check again for extra safety. + */ + if (stat(usefile, &st) == -1) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(infile, "&stat"); + free(usefile); + return; + } + linkfile = mandoc_strdup(infile); + if (op == OP_TEST) { + free(usefile); + start = usefile = linkfile; + break; + } + if (strncmp(infile, basedir, basedir_len) == 0) { + free(usefile); + usefile = linkfile; + start = usefile + basedir_len; + break; + } + + /* + * This symbolic link points into the basedir + * from the outside. Let's see whether any of + * the parent directories resolve to the basedir. + */ + p = strchr(linkfile, '\0'); + do { + while (*--p != '/') + continue; + *p = '\0'; + if ((realdir = realpath(linkfile, NULL)) == NULL) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(infile, "&realpath"); + free(linkfile); + free(usefile); + return; + } + realdir_len = strlen(realdir) + 1; + free(realdir); + *p = '/'; + } while (realdir_len > basedir_len); + + /* + * If one of the directories resolves to the basedir, + * use the rest of the original name. + * Otherwise, the best we can do + * is to use the filename pointed to. + */ + if (realdir_len == basedir_len) { + free(usefile); + usefile = linkfile; + start = p + 1; + } else { + free(linkfile); + start = usefile + basedir_len; + } + } while (/* CONSTCOND */ 0); + + mlink = mandoc_calloc(1, sizeof(struct mlink)); + mlink->dform = FORM_NONE; + if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= + sizeof(mlink->file)) { + say(start, "Filename too long"); + free(mlink); + free(usefile); + return; + } + + /* + * In test mode or when the original name is absolute + * but outside our tree, guess the base directory. + */ + + if (op == OP_TEST || (start == usefile && *start == '/')) { + if (strncmp(usefile, "man/", 4) == 0) + start = usefile + 4; + else if ((start = strstr(usefile, "/man/")) != NULL) + start += 5; + else + start = usefile; + } + + /* + * First try to guess our directory structure. + * If we find a separator, try to look for man* or cat*. + * If we find one of these and what's underneath is a directory, + * assume it's an architecture. + */ + if ((p = strchr(start, '/')) != NULL) { + *p++ = '\0'; + if (strncmp(start, "man", 3) == 0) { + mlink->dform = FORM_SRC; + mlink->dsec = start + 3; + } else if (strncmp(start, "cat", 3) == 0) { + mlink->dform = FORM_CAT; + mlink->dsec = start + 3; + } + + start = p; + if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) { + *p++ = '\0'; + mlink->arch = start; + start = p; + } + } + + /* + * Now check the file suffix. + * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. + */ + p = strrchr(start, '\0'); + while (p-- > start && *p != '/' && *p != '.') + continue; + + if (*p == '.') { + *p++ = '\0'; + mlink->fsec = p; + } + + /* + * Now try to parse the name. + * Use the filename portion of the path. + */ + mlink->name = start; + if ((p = strrchr(start, '/')) != NULL) { + mlink->name = p + 1; + *p = '\0'; + } + mlink_add(mlink, &st); + free(usefile); +} + +static void +mlink_add(struct mlink *mlink, const struct stat *st) +{ + struct inodev inodev; + struct mpage *mpage; + unsigned int slot; + + assert(NULL != mlink->file); + + mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); + mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); + mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); + mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); + + if ('0' == *mlink->fsec) { + free(mlink->fsec); + mlink->fsec = mandoc_strdup(mlink->dsec); + mlink->fform = FORM_CAT; + } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) + mlink->fform = FORM_SRC; + else + mlink->fform = FORM_NONE; + + slot = ohash_qlookup(&mlinks, mlink->file); + assert(NULL == ohash_find(&mlinks, slot)); + ohash_insert(&mlinks, slot, mlink); + + memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ + inodev.st_ino = st->st_ino; + inodev.st_dev = st->st_dev; + slot = ohash_lookup_memory(&mpages, (char *)&inodev, + sizeof(struct inodev), inodev.st_ino); + mpage = ohash_find(&mpages, slot); + if (NULL == mpage) { + mpage = mandoc_calloc(1, sizeof(struct mpage)); + mpage->inodev.st_ino = inodev.st_ino; + mpage->inodev.st_dev = inodev.st_dev; + mpage->form = FORM_NONE; + mpage->next = mpage_head; + mpage_head = mpage; + ohash_insert(&mpages, slot, mpage); + } else + mlink->next = mpage->mlinks; + mpage->mlinks = mlink; + mlink->mpage = mpage; +} + +static void +mlink_free(struct mlink *mlink) +{ + + free(mlink->dsec); + free(mlink->arch); + free(mlink->name); + free(mlink->fsec); + free(mlink); +} + +static void +mpages_free(void) +{ + struct mpage *mpage; + struct mlink *mlink; + + while ((mpage = mpage_head) != NULL) { + while ((mlink = mpage->mlinks) != NULL) { + mpage->mlinks = mlink->next; + mlink_free(mlink); + } + mpage_head = mpage->next; + free(mpage->sec); + free(mpage->arch); + free(mpage->title); + free(mpage->desc); + free(mpage); + } +} + +/* + * For each mlink to the mpage, check whether the path looks like + * it is formatted, and if it does, check whether a source manual + * exists by the same name, ignoring the suffix. + * If both conditions hold, drop the mlink. + */ +static void +mlinks_undupe(struct mpage *mpage) +{ + char buf[PATH_MAX]; + struct mlink **prev; + struct mlink *mlink; + char *bufp; + + mpage->form = FORM_CAT; + prev = &mpage->mlinks; + while (NULL != (mlink = *prev)) { + if (FORM_CAT != mlink->dform) { + mpage->form = FORM_NONE; + goto nextlink; + } + (void)strlcpy(buf, mlink->file, sizeof(buf)); + bufp = strstr(buf, "cat"); + assert(NULL != bufp); + memcpy(bufp, "man", 3); + if (NULL != (bufp = strrchr(buf, '.'))) + *++bufp = '\0'; + (void)strlcat(buf, mlink->dsec, sizeof(buf)); + if (NULL == ohash_find(&mlinks, + ohash_qlookup(&mlinks, buf))) + goto nextlink; + if (warnings) + say(mlink->file, "Man source exists: %s", buf); + if (use_all) + goto nextlink; + *prev = mlink->next; + mlink_free(mlink); + continue; +nextlink: + prev = &(*prev)->next; + } +} + +static void +mlink_check(struct mpage *mpage, struct mlink *mlink) +{ + struct str *str; + unsigned int slot; + + /* + * Check whether the manual section given in a file + * agrees with the directory where the file is located. + * Some manuals have suffixes like (3p) on their + * section number either inside the file or in the + * directory name, some are linked into more than one + * section, like encrypt(1) = makekey(8). + */ + + if (FORM_SRC == mpage->form && + strcasecmp(mpage->sec, mlink->dsec)) + say(mlink->file, "Section \"%s\" manual in %s directory", + mpage->sec, mlink->dsec); + + /* + * Manual page directories exist for each kernel + * architecture as returned by machine(1). + * However, many manuals only depend on the + * application architecture as returned by arch(1). + * For example, some (2/ARM) manuals are shared + * across the "armish" and "zaurus" kernel + * architectures. + * A few manuals are even shared across completely + * different architectures, for example fdformat(1) + * on amd64, i386, and sparc64. + */ + + if (strcasecmp(mpage->arch, mlink->arch)) + say(mlink->file, "Architecture \"%s\" manual in " + "\"%s\" directory", mpage->arch, mlink->arch); + + /* + * XXX + * parse_cat() doesn't set NAME_TITLE yet. + */ + + if (FORM_CAT == mpage->form) + return; + + /* + * Check whether this mlink + * appears as a name in the NAME section. + */ + + slot = ohash_qlookup(&names, mlink->name); + str = ohash_find(&names, slot); + assert(NULL != str); + if ( ! (NAME_TITLE & str->mask)) + say(mlink->file, "Name missing in NAME section"); +} + +/* + * Run through the files in the global vector "mpages" + * and add them to the database specified in "basedir". + * + * This handles the parsing scheme itself, using the cues of directory + * and filename to determine whether the file is parsable or not. + */ +static void +mpages_merge(struct dba *dba, struct mparse *mp) +{ + struct mpage *mpage, *mpage_dest; + struct mlink *mlink, *mlink_dest; + struct roff_meta *meta; + char *cp; + int fd; + + for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) { + mlinks_undupe(mpage); + if ((mlink = mpage->mlinks) == NULL) + continue; + + name_mask = NAME_MASK; + mandoc_ohash_init(&names, 4, offsetof(struct str, key)); + mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); + mparse_reset(mp); + meta = NULL; + + if ((fd = mparse_open(mp, mlink->file)) == -1) { + say(mlink->file, "&open"); + goto nextpage; + } + + /* + * Interpret the file as mdoc(7) or man(7) source + * code, unless it is known to be formatted. + */ + if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { + mparse_readfd(mp, fd, mlink->file); + close(fd); + fd = -1; + meta = mparse_result(mp); + } + + if (meta != NULL && meta->sodest != NULL) { + mlink_dest = ohash_find(&mlinks, + ohash_qlookup(&mlinks, meta->sodest)); + if (mlink_dest == NULL) { + mandoc_asprintf(&cp, "%s.gz", meta->sodest); + mlink_dest = ohash_find(&mlinks, + ohash_qlookup(&mlinks, cp)); + free(cp); + } + if (mlink_dest != NULL) { + + /* The .so target exists. */ + + mpage_dest = mlink_dest->mpage; + while (1) { + mlink->mpage = mpage_dest; + + /* + * If the target was already + * processed, add the links + * to the database now. + * Otherwise, this will + * happen when we come + * to the target. + */ + + if (mpage_dest->dba != NULL) + dbadd_mlink(mlink); + + if (mlink->next == NULL) + break; + mlink = mlink->next; + } + + /* Move all links to the target. */ + + mlink->next = mlink_dest->next; + mlink_dest->next = mpage->mlinks; + mpage->mlinks = NULL; + goto nextpage; + } + meta->macroset = MACROSET_NONE; + } + if (meta != NULL && meta->macroset == MACROSET_MDOC) { + mpage->form = FORM_SRC; + mpage->sec = meta->msec; + mpage->sec = mandoc_strdup( + mpage->sec == NULL ? "" : mpage->sec); + mpage->arch = meta->arch; + mpage->arch = mandoc_strdup( + mpage->arch == NULL ? "" : mpage->arch); + mpage->title = mandoc_strdup(meta->title); + } else if (meta != NULL && meta->macroset == MACROSET_MAN) { + if (*meta->msec != '\0' || *meta->title != '\0') { + mpage->form = FORM_SRC; + mpage->sec = mandoc_strdup(meta->msec); + mpage->arch = mandoc_strdup(mlink->arch); + mpage->title = mandoc_strdup(meta->title); + } else + meta = NULL; + } + + assert(mpage->desc == NULL); + if (meta == NULL || meta->sodest != NULL) { + mpage->sec = mandoc_strdup(mlink->dsec); + mpage->arch = mandoc_strdup(mlink->arch); + mpage->title = mandoc_strdup(mlink->name); + if (meta == NULL) { + mpage->form = FORM_CAT; + parse_cat(mpage, fd); + } else + mpage->form = FORM_SRC; + } else if (meta->macroset == MACROSET_MDOC) + parse_mdoc(mpage, meta, meta->first); + else + parse_man(mpage, meta, meta->first); + if (mpage->desc == NULL) { + mpage->desc = mandoc_strdup(mlink->name); + if (warnings) + say(mlink->file, "No one-line description, " + "using filename \"%s\"", mlink->name); + } + + for (mlink = mpage->mlinks; + mlink != NULL; + mlink = mlink->next) { + putkey(mpage, mlink->name, NAME_FILE); + if (warnings && !use_all) + mlink_check(mpage, mlink); + } + + dbadd(dba, mpage); + +nextpage: + ohash_delete(&strings); + ohash_delete(&names); + } +} + +static void +parse_cat(struct mpage *mpage, int fd) +{ + FILE *stream; + struct mlink *mlink; + char *line, *p, *title, *sec; + size_t linesz, plen, titlesz; + ssize_t len; + int offs; + + mlink = mpage->mlinks; + stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r"); + if (stream == NULL) { + if (fd != -1) + close(fd); + if (warnings) + say(mlink->file, "&fopen"); + return; + } + + line = NULL; + linesz = 0; + + /* Parse the section number from the header line. */ + + while (getline(&line, &linesz, stream) != -1) { + if (*line == '\n') + continue; + if ((sec = strchr(line, '(')) == NULL) + break; + if ((p = strchr(++sec, ')')) == NULL) + break; + free(mpage->sec); + mpage->sec = mandoc_strndup(sec, p - sec); + if (warnings && *mlink->dsec != '\0' && + strcasecmp(mpage->sec, mlink->dsec)) + say(mlink->file, + "Section \"%s\" manual in %s directory", + mpage->sec, mlink->dsec); + break; + } + + /* Skip to first blank line. */ + + while (line == NULL || *line != '\n') + if (getline(&line, &linesz, stream) == -1) + break; + + /* + * Assume the first line that is not indented + * is the first section header. Skip to it. + */ + + while (getline(&line, &linesz, stream) != -1) + if (*line != '\n' && *line != ' ') + break; + + /* + * Read up until the next section into a buffer. + * Strip the leading and trailing newline from each read line, + * appending a trailing space. + * Ignore empty (whitespace-only) lines. + */ + + titlesz = 0; + title = NULL; + + while ((len = getline(&line, &linesz, stream)) != -1) { + if (*line != ' ') + break; + offs = 0; + while (isspace((unsigned char)line[offs])) + offs++; + if (line[offs] == '\0') + continue; + title = mandoc_realloc(title, titlesz + len - offs); + memcpy(title + titlesz, line + offs, len - offs); + titlesz += len - offs; + title[titlesz - 1] = ' '; + } + free(line); + + /* + * If no page content can be found, or the input line + * is already the next section header, or there is no + * trailing newline, reuse the page title as the page + * description. + */ + + if (NULL == title || '\0' == *title) { + if (warnings) + say(mlink->file, "Cannot find NAME section"); + fclose(stream); + free(title); + return; + } + + title[titlesz - 1] = '\0'; + + /* + * Skip to the first dash. + * Use the remaining line as the description (no more than 70 + * bytes). + */ + + if (NULL != (p = strstr(title, "- "))) { + for (p += 2; ' ' == *p || '\b' == *p; p++) + /* Skip to next word. */ ; + } else { + if (warnings) + say(mlink->file, "No dash in title line, " + "reusing \"%s\" as one-line description", title); + p = title; + } + + plen = strlen(p); + + /* Strip backspace-encoding from line. */ + + while (NULL != (line = memchr(p, '\b', plen))) { + len = line - p; + if (0 == len) { + memmove(line, line + 1, plen--); + continue; + } + memmove(line - 1, line + 1, plen - len); + plen -= 2; + } + + /* + * Cut off excessive one-line descriptions. + * Bad pages are not worth better heuristics. + */ + + mpage->desc = mandoc_strndup(p, 150); + fclose(stream); + free(title); +} + +/* + * Put a type/word pair into the word database for this particular file. + */ +static void +putkey(const struct mpage *mpage, char *value, uint64_t type) +{ + putkeys(mpage, value, strlen(value), type); +} + +/* + * Grok all nodes at or below a certain mdoc node into putkey(). + */ +static void +putmdockey(const struct mpage *mpage, + const struct roff_node *n, uint64_t m, int taboo) +{ + + for ( ; NULL != n; n = n->next) { + if (n->flags & taboo) + continue; + if (NULL != n->child) + putmdockey(mpage, n->child, m, taboo); + if (n->type == ROFFT_TEXT) + putkey(mpage, n->string, m); + } +} + +static void +parse_man(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + const struct roff_node *head, *body; + char *start, *title; + char byte; + size_t sz; + + if (n == NULL) + return; + + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. + */ + + if (n->type == ROFFT_BODY && n->tok == MAN_SH) { + body = n; + if ((head = body->parent->head) != NULL && + (head = head->child) != NULL && + head->next == NULL && + head->type == ROFFT_TEXT && + strcmp(head->string, "NAME") == 0 && + body->child != NULL) { + + /* + * Suck the entire NAME section into memory. + * Yes, we might run away. + * But too many manuals have big, spread-out + * NAME sections over many lines. + */ + + title = NULL; + deroff(&title, body); + if (NULL == title) + return; + + /* + * Go through a special heuristic dance here. + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ + + start = title; + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[sz]) + break; + + byte = start[sz]; + start[sz] = '\0'; + + /* + * Assume a stray trailing comma in the + * name list if a name begins with a dash. + */ + + if ('-' == start[0] || + ('\\' == start[0] && '-' == start[1])) + break; + + putkey(mpage, start, NAME_TITLE); + if ( ! (mpage->name_head_done || + strcasecmp(start, meta->title))) { + putkey(mpage, start, NAME_HEAD); + mpage->name_head_done = 1; + } + + if (' ' == byte) { + start += sz + 1; + break; + } + + assert(',' == byte); + start += sz + 1; + while (' ' == *start) + start++; + } + + if (start == title) { + putkey(mpage, start, NAME_TITLE); + if ( ! (mpage->name_head_done || + strcasecmp(start, meta->title))) { + putkey(mpage, start, NAME_HEAD); + mpage->name_head_done = 1; + } + free(title); + return; + } + + while (isspace((unsigned char)*start)) + start++; + + if (0 == strncmp(start, "-", 1)) + start += 1; + else if (0 == strncmp(start, "\\-\\-", 4)) + start += 4; + else if (0 == strncmp(start, "\\-", 2)) + start += 2; + else if (0 == strncmp(start, "\\(en", 4)) + start += 4; + else if (0 == strncmp(start, "\\(em", 4)) + start += 4; + + while (' ' == *start) + start++; + + /* + * Cut off excessive one-line descriptions. + * Bad pages are not worth better heuristics. + */ + + mpage->desc = mandoc_strndup(start, 150); + free(title); + return; + } + } + + for (n = n->child; n; n = n->next) { + if (NULL != mpage->desc) + break; + parse_man(mpage, meta, n); + } +} + +static void +parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + const struct mdoc_handler *handler; + + for (n = n->child; n != NULL; n = n->next) { + if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX) + continue; + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + handler = mdoc_handlers + (n->tok - MDOC_Dd); + if (n->flags & handler->taboo) + continue; + + switch (n->type) { + case ROFFT_ELEM: + case ROFFT_BLOCK: + case ROFFT_HEAD: + case ROFFT_BODY: + case ROFFT_TAIL: + if (handler->fp != NULL && + (*handler->fp)(mpage, meta, n) == 0) + break; + if (handler->mask) + putmdockey(mpage, n->child, + handler->mask, handler->taboo); + break; + default: + continue; + } + if (NULL != n->child) + parse_mdoc(mpage, meta, n); + } +} + +static int +parse_mdoc_Fa(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + uint64_t mask; + + mask = TYPE_Fa; + if (n->sec == SEC_SYNOPSIS) + mask |= TYPE_Vt; + + putmdockey(mpage, n->child, mask, 0); + return 0; +} + +static int +parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + char *start, *end; + size_t sz; + + if (SEC_SYNOPSIS != n->sec || + NULL == (n = n->child) || + n->type != ROFFT_TEXT) + return 0; + + /* + * Only consider those `Fd' macro fields that begin with an + * "inclusion" token (versus, e.g., #define). + */ + + if (strcmp("#include", n->string)) + return 0; + + if ((n = n->next) == NULL || n->type != ROFFT_TEXT) + return 0; + + /* + * Strip away the enclosing angle brackets and make sure we're + * not zero-length. + */ + + start = n->string; + if ('<' == *start || '"' == *start) + start++; + + if (0 == (sz = strlen(start))) + return 0; + + end = &start[(int)sz - 1]; + if ('>' == *end || '"' == *end) + end--; + + if (end > start) + putkeys(mpage, start, end - start + 1, TYPE_In); + return 0; +} + +static void +parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) +{ + char *cp; + size_t sz; + + if (n->type != ROFFT_TEXT) + return; + + /* Skip function pointer punctuation. */ + + cp = n->string; + while (*cp == '(' || *cp == '*') + cp++; + sz = strcspn(cp, "()"); + + putkeys(mpage, cp, sz, TYPE_Fn); + if (n->sec == SEC_SYNOPSIS) + putkeys(mpage, cp, sz, NAME_SYN); +} + +static int +parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + uint64_t mask; + + if (n->child == NULL) + return 0; + + parse_mdoc_fname(mpage, n->child); + + n = n->child->next; + if (n != NULL && n->type == ROFFT_TEXT) { + mask = TYPE_Fa; + if (n->sec == SEC_SYNOPSIS) + mask |= TYPE_Vt; + putmdockey(mpage, n, mask, 0); + } + + return 0; +} + +static int +parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (n->type != ROFFT_HEAD) + return 1; + + if (n->child != NULL) + parse_mdoc_fname(mpage, n->child); + + return 0; +} + +static int +parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + char *cp; + + if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) + return 0; + + if (n->child != NULL && + n->child->next == NULL && + n->child->type == ROFFT_TEXT) + return 1; + + cp = NULL; + deroff(&cp, n); + if (cp != NULL) { + putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || + n->type == ROFFT_BODY ? TYPE_Va : 0)); + free(cp); + } + + return 0; +} + +static int +parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + char *cp; + + if (NULL == (n = n->child)) + return 0; + + if (NULL == n->next) { + putkey(mpage, n->string, TYPE_Xr); + return 0; + } + + mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); + putkey(mpage, cp, TYPE_Xr); + free(cp); + return 0; +} + +static int +parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (n->type == ROFFT_BODY) + deroff(&mpage->desc, n); + return 0; +} + +static int +parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (SEC_NAME == n->sec) + putmdockey(mpage, n->child, NAME_TITLE, 0); + else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { + if (n->child == NULL) + putkey(mpage, meta->name, NAME_SYN); + else + putmdockey(mpage, n->child, NAME_SYN, 0); + } + if ( ! (mpage->name_head_done || + n->child == NULL || n->child->string == NULL || + strcasecmp(n->child->string, meta->title))) { + putkey(mpage, n->child->string, NAME_HEAD); + mpage->name_head_done = 1; + } + return 0; +} + +static int +parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; +} + +static int +parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + return n->type == ROFFT_HEAD; +} + +/* + * Add a string to the hash table for the current manual. + * Each string has a bitmask telling which macros it belongs to. + * When we finish the manual, we'll dump the table. + */ +static void +putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) +{ + struct ohash *htab; + struct str *s; + const char *end; + unsigned int slot; + int i, mustfree; + + if (0 == sz) + return; + + mustfree = render_string(&cp, &sz); + + if (TYPE_Nm & v) { + htab = &names; + v &= name_mask; + if (v & NAME_FIRST) + name_mask &= ~NAME_FIRST; + if (debug > 1) + say(mpage->mlinks->file, + "Adding name %*s, bits=0x%llx", (int)sz, cp, + (unsigned long long)v); + } else { + htab = &strings; + if (debug > 1) + for (i = 0; i < KEY_MAX; i++) + if ((uint64_t)1 << i & v) + say(mpage->mlinks->file, + "Adding key %s=%*s", + mansearch_keynames[i], (int)sz, cp); + } + + end = cp + sz; + slot = ohash_qlookupi(htab, cp, &end); + s = ohash_find(htab, slot); + + if (NULL != s && mpage == s->mpage) { + s->mask |= v; + return; + } else if (NULL == s) { + s = mandoc_calloc(1, sizeof(struct str) + sz + 1); + memcpy(s->key, cp, sz); + ohash_insert(htab, slot, s); + } + s->mpage = mpage; + s->mask = v; + + if (mustfree) + free(cp); +} + +/* + * Take a Unicode codepoint and produce its UTF-8 encoding. + * This isn't the best way to do this, but it works. + * The magic numbers are from the UTF-8 packaging. + * They're not as scary as they seem: read the UTF-8 spec for details. + */ +static size_t +utf8(unsigned int cp, char out[7]) +{ + size_t rc; + + rc = 0; + if (cp <= 0x0000007F) { + rc = 1; + out[0] = (char)cp; + } else if (cp <= 0x000007FF) { + rc = 2; + out[0] = (cp >> 6 & 31) | 192; + out[1] = (cp & 63) | 128; + } else if (cp <= 0x0000FFFF) { + rc = 3; + out[0] = (cp >> 12 & 15) | 224; + out[1] = (cp >> 6 & 63) | 128; + out[2] = (cp & 63) | 128; + } else if (cp <= 0x001FFFFF) { + rc = 4; + out[0] = (cp >> 18 & 7) | 240; + out[1] = (cp >> 12 & 63) | 128; + out[2] = (cp >> 6 & 63) | 128; + out[3] = (cp & 63) | 128; + } else if (cp <= 0x03FFFFFF) { + rc = 5; + out[0] = (cp >> 24 & 3) | 248; + out[1] = (cp >> 18 & 63) | 128; + out[2] = (cp >> 12 & 63) | 128; + out[3] = (cp >> 6 & 63) | 128; + out[4] = (cp & 63) | 128; + } else if (cp <= 0x7FFFFFFF) { + rc = 6; + out[0] = (cp >> 30 & 1) | 252; + out[1] = (cp >> 24 & 63) | 128; + out[2] = (cp >> 18 & 63) | 128; + out[3] = (cp >> 12 & 63) | 128; + out[4] = (cp >> 6 & 63) | 128; + out[5] = (cp & 63) | 128; + } else + return 0; + + out[rc] = '\0'; + return rc; +} + +/* + * If the string contains escape sequences, + * replace it with an allocated rendering and return 1, + * such that the caller can free it after use. + * Otherwise, do nothing and return 0. + */ +static int +render_string(char **public, size_t *psz) +{ + const char *src, *scp, *addcp, *seq; + char *dst; + size_t ssz, dsz, addsz; + char utfbuf[7], res[6]; + int seqlen, unicode; + + res[0] = '\\'; + res[1] = '\t'; + res[2] = ASCII_NBRSP; + res[3] = ASCII_HYPH; + res[4] = ASCII_BREAK; + res[5] = '\0'; + + src = scp = *public; + ssz = *psz; + dst = NULL; + dsz = 0; + + while (scp < src + *psz) { + + /* Leave normal characters unchanged. */ + + if (strchr(res, *scp) == NULL) { + if (dst != NULL) + dst[dsz++] = *scp; + scp++; + continue; + } + + /* + * Found something that requires replacing, + * make sure we have a destination buffer. + */ + + if (dst == NULL) { + dst = mandoc_malloc(ssz + 1); + dsz = scp - src; + memcpy(dst, src, dsz); + } + + /* Handle single-char special characters. */ + + switch (*scp) { + case '\\': + break; + case '\t': + case ASCII_NBRSP: + dst[dsz++] = ' '; + scp++; + continue; + case ASCII_HYPH: + dst[dsz++] = '-'; + /* FALLTHROUGH */ + case ASCII_BREAK: + scp++; + continue; + default: + abort(); + } + + /* + * Found an escape sequence. + * Read past the slash, then parse it. + * Ignore everything except characters. + */ + + scp++; + if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) + continue; + + /* + * Render the special character + * as either UTF-8 or ASCII. + */ + + if (write_utf8) { + unicode = mchars_spec2cp(seq, seqlen); + if (unicode <= 0) + continue; + addsz = utf8(unicode, utfbuf); + if (addsz == 0) + continue; + addcp = utfbuf; + } else { + addcp = mchars_spec2str(seq, seqlen, &addsz); + if (addcp == NULL) + continue; + if (*addcp == ASCII_NBRSP) { + addcp = " "; + addsz = 1; + } + } + + /* Copy the rendered glyph into the stream. */ + + ssz += addsz; + dst = mandoc_realloc(dst, ssz + 1); + memcpy(dst + dsz, addcp, addsz); + dsz += addsz; + } + if (dst != NULL) { + *public = dst; + *psz = dsz; + } + + /* Trim trailing whitespace and NUL-terminate. */ + + while (*psz > 0 && (*public)[*psz - 1] == ' ') + --*psz; + if (dst != NULL) { + (*public)[*psz] = '\0'; + return 1; + } else + return 0; +} + +static void +dbadd_mlink(const struct mlink *mlink) +{ + dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE); + dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec); + dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec); + dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch); + dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file); +} + +/* + * Flush the current page's terms (and their bits) into the database. + * Also, handle escape sequences at the last possible moment. + */ +static void +dbadd(struct dba *dba, struct mpage *mpage) +{ + struct mlink *mlink; + struct str *key; + char *cp; + uint64_t mask; + size_t i; + unsigned int slot; + int mustfree; + + mlink = mpage->mlinks; + + if (nodb) { + for (key = ohash_first(&names, &slot); NULL != key; + key = ohash_next(&names, &slot)) + free(key); + for (key = ohash_first(&strings, &slot); NULL != key; + key = ohash_next(&strings, &slot)) + free(key); + if (0 == debug) + return; + while (NULL != mlink) { + fputs(mlink->name, stdout); + if (NULL == mlink->next || + strcmp(mlink->dsec, mlink->next->dsec) || + strcmp(mlink->fsec, mlink->next->fsec) || + strcmp(mlink->arch, mlink->next->arch)) { + putchar('('); + if ('\0' == *mlink->dsec) + fputs(mlink->fsec, stdout); + else + fputs(mlink->dsec, stdout); + if ('\0' != *mlink->arch) + printf("/%s", mlink->arch); + putchar(')'); + } + mlink = mlink->next; + if (NULL != mlink) + fputs(", ", stdout); + } + printf(" - %s\n", mpage->desc); + return; + } + + if (debug) + say(mlink->file, "Adding to database"); + + cp = mpage->desc; + i = strlen(cp); + mustfree = render_string(&cp, &i); + mpage->dba = dba_page_new(dba->pages, + *mpage->arch == '\0' ? mlink->arch : mpage->arch, + cp, mlink->file, mpage->form); + if (mustfree) + free(cp); + dba_page_add(mpage->dba, DBP_SECT, mpage->sec); + + while (mlink != NULL) { + dbadd_mlink(mlink); + mlink = mlink->next; + } + + for (key = ohash_first(&names, &slot); NULL != key; + key = ohash_next(&names, &slot)) { + assert(key->mpage == mpage); + dba_page_alias(mpage->dba, key->key, key->mask); + free(key); + } + for (key = ohash_first(&strings, &slot); NULL != key; + key = ohash_next(&strings, &slot)) { + assert(key->mpage == mpage); + i = 0; + for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) { + if (key->mask & mask) + dba_macro_add(dba->macros, i, + key->key, mpage->dba); + i++; + } + free(key); + } +} + +static void +dbprune(struct dba *dba) +{ + struct dba_array *page, *files; + char *file; + + dba_array_FOREACH(dba->pages, page) { + files = dba_array_get(page, DBP_FILE); + dba_array_FOREACH(files, file) { + if (*file < ' ') + file++; + if (ohash_find(&mlinks, ohash_qlookup(&mlinks, + file)) != NULL) { + if (debug) + say(file, "Deleting from database"); + dba_array_del(dba->pages); + break; + } + } + } +} + +/* + * Write the database from memory to disk. + */ +static void +dbwrite(struct dba *dba) +{ + struct stat sb1, sb2; + char tfn[33], *cp1, *cp2; + off_t i; + int fd1, fd2; + + /* + * Do not write empty databases, and delete existing ones + * when makewhatis -u causes them to become empty. + */ + + dba_array_start(dba->pages); + if (dba_array_next(dba->pages) == NULL) { + if (unlink(MANDOC_DB) == -1 && errno != ENOENT) + say(MANDOC_DB, "&unlink"); + return; + } + + /* + * Build the database in a temporary file, + * then atomically move it into place. + */ + + if (dba_write(MANDOC_DB "~", dba) != -1) { + if (rename(MANDOC_DB "~", MANDOC_DB) == -1) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "&rename"); + unlink(MANDOC_DB "~"); + } + return; + } + + /* + * We lack write permission and cannot replace the database + * file, but let's at least check whether the data changed. + */ + + (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn)); + if (mkdtemp(tfn) == NULL) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&%s", tfn); + return; + } + cp1 = cp2 = MAP_FAILED; + fd1 = fd2 = -1; + (void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn)); + if (dba_write(tfn, dba) == -1) { + say(tfn, "&dba_write"); + goto err; + } + if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) { + say(MANDOC_DB, "&open"); + goto err; + } + if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) { + say(tfn, "&open"); + goto err; + } + if (fstat(fd1, &sb1) == -1) { + say(MANDOC_DB, "&fstat"); + goto err; + } + if (fstat(fd2, &sb2) == -1) { + say(tfn, "&fstat"); + goto err; + } + if (sb1.st_size != sb2.st_size) + goto err; + if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE, + fd1, 0)) == MAP_FAILED) { + say(MANDOC_DB, "&mmap"); + goto err; + } + if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE, + fd2, 0)) == MAP_FAILED) { + say(tfn, "&mmap"); + goto err; + } + for (i = 0; i < sb1.st_size; i++) + if (cp1[i] != cp2[i]) + goto err; + goto out; + +err: + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "Data changed, but cannot replace database"); + +out: + if (cp1 != MAP_FAILED) + munmap(cp1, sb1.st_size); + if (cp2 != MAP_FAILED) + munmap(cp2, sb2.st_size); + if (fd1 != -1) + close(fd1); + if (fd2 != -1) + close(fd2); + unlink(tfn); + *strrchr(tfn, '/') = '\0'; + rmdir(tfn); +} + +static int +set_basedir(const char *targetdir, int report_baddir) +{ + static char startdir[PATH_MAX]; + static int getcwd_status; /* 1 = ok, 2 = failure */ + static int chdir_status; /* 1 = changed directory */ + + /* + * Remember the original working directory, if possible. + * This will be needed if the second or a later directory + * on the command line is given as a relative path. + * Do not error out if the current directory is not + * searchable: Maybe it won't be needed after all. + */ + if (getcwd_status == 0) { + if (getcwd(startdir, sizeof(startdir)) == NULL) { + getcwd_status = 2; + (void)strlcpy(startdir, strerror(errno), + sizeof(startdir)); + } else + getcwd_status = 1; + } + + /* + * We are leaving the old base directory. + * Do not use it any longer, not even for messages. + */ + *basedir = '\0'; + basedir_len = 0; + + /* + * If and only if the directory was changed earlier and + * the next directory to process is given as a relative path, + * first go back, or bail out if that is impossible. + */ + if (chdir_status && *targetdir != '/') { + if (getcwd_status == 2) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "getcwd: %s", startdir); + return 0; + } + if (chdir(startdir) == -1) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&chdir %s", startdir); + return 0; + } + } + + /* + * Always resolve basedir to the canonicalized absolute + * pathname and append a trailing slash, such that + * we can reliably check whether files are inside. + */ + if (realpath(targetdir, basedir) == NULL) { + if (report_baddir || errno != ENOENT) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "&%s: realpath", targetdir); + } + *basedir = '\0'; + return 0; + } else if (chdir(basedir) == -1) { + if (report_baddir || errno != ENOENT) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "&chdir"); + } + *basedir = '\0'; + return 0; + } + chdir_status = 1; + basedir_len = strlen(basedir); + if (basedir[basedir_len - 1] != '/') { + if (basedir_len >= PATH_MAX - 1) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "Filename too long"); + *basedir = '\0'; + basedir_len = 0; + return 0; + } + basedir[basedir_len++] = '/'; + basedir[basedir_len] = '\0'; + } + return 1; +} + +static void +say(const char *file, const char *format, ...) +{ + va_list ap; + int use_errno; + + if (*basedir != '\0') + fprintf(stderr, "%s", basedir); + if (*basedir != '\0' && *file != '\0') + fputc('/', stderr); + if (*file != '\0') + fprintf(stderr, "%s", file); + + use_errno = 1; + if (format != NULL) { + switch (*format) { + case '&': + format++; + break; + case '\0': + format = NULL; + break; + default: + use_errno = 0; + break; + } + } + if (format != NULL) { + if (*basedir != '\0' || *file != '\0') + fputs(": ", stderr); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + } + if (use_errno) { + if (*basedir != '\0' || *file != '\0' || format != NULL) + fputs(": ", stderr); + perror(NULL); + } else + fputc('\n', stderr); +} diff --git a/usr.bin/mandoc/manpath.c b/usr.bin/mandoc/manpath.c new file mode 100644 index 0000000..dcd12a0 --- /dev/null +++ b/usr.bin/mandoc/manpath.c @@ -0,0 +1,342 @@ +/* $OpenBSD: manpath.c,v 1.28 2020/02/10 14:42:03 schwarze Exp $ */ +/* + * Copyright (c) 2011,2014,2015,2017-2019 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "manconf.h" + +#define MAN_CONF_FILE "/etc/man.conf" +#define MANPATH_BASE "/usr/share/man:/usr/X11R6/man" +#define MANPATH_DEFAULT "/usr/share/man:/usr/X11R6/man:/usr/local/man" + +static void manconf_file(struct manconf *, const char *); +static void manpath_add(struct manpaths *, const char *, char); +static void manpath_parseline(struct manpaths *, char *, char); + + +void +manconf_parse(struct manconf *conf, const char *file, + char *defp, char *auxp) +{ + char *insert; + + /* Always prepend -m. */ + manpath_parseline(&conf->manpath, auxp, 'm'); + + /* If -M is given, it overrides everything else. */ + if (NULL != defp) { + manpath_parseline(&conf->manpath, defp, 'M'); + return; + } + + /* MANPATH and man.conf(5) cooperate. */ + defp = getenv("MANPATH"); + if (NULL == file) + file = MAN_CONF_FILE; + + /* No MANPATH; use man.conf(5) only. */ + if (NULL == defp || '\0' == defp[0]) { + manconf_file(conf, file); + return; + } + + /* Prepend man.conf(5) to MANPATH. */ + if (':' == defp[0]) { + manconf_file(conf, file); + manpath_parseline(&conf->manpath, defp, '\0'); + return; + } + + /* Append man.conf(5) to MANPATH. */ + if (':' == defp[strlen(defp) - 1]) { + manpath_parseline(&conf->manpath, defp, '\0'); + manconf_file(conf, file); + return; + } + + /* Insert man.conf(5) into MANPATH. */ + insert = strstr(defp, "::"); + if (NULL != insert) { + *insert++ = '\0'; + manpath_parseline(&conf->manpath, defp, '\0'); + manconf_file(conf, file); + manpath_parseline(&conf->manpath, insert + 1, '\0'); + return; + } + + /* MANPATH overrides man.conf(5) completely. */ + manpath_parseline(&conf->manpath, defp, '\0'); +} + +void +manpath_base(struct manpaths *dirs) +{ + char path_base[] = MANPATH_BASE; + manpath_parseline(dirs, path_base, '\0'); +} + +/* + * Parse a FULL pathname from a colon-separated list of arrays. + */ +static void +manpath_parseline(struct manpaths *dirs, char *path, char option) +{ + char *dir; + + if (NULL == path) + return; + + for (dir = strtok(path, ":"); dir; dir = strtok(NULL, ":")) + manpath_add(dirs, dir, option); +} + +/* + * Add a directory to the array, ignoring bad directories. + * Grow the array one-by-one for simplicity's sake. + */ +static void +manpath_add(struct manpaths *dirs, const char *dir, char option) +{ + char buf[PATH_MAX]; + struct stat sb; + char *cp; + size_t i; + + if ((cp = realpath(dir, buf)) == NULL) + goto fail; + + for (i = 0; i < dirs->sz; i++) + if (strcmp(dirs->paths[i], dir) == 0) + return; + + if (stat(cp, &sb) == -1) + goto fail; + + dirs->paths = mandoc_reallocarray(dirs->paths, + dirs->sz + 1, sizeof(*dirs->paths)); + dirs->paths[dirs->sz++] = mandoc_strdup(cp); + return; + +fail: + if (option != '\0') + mandoc_msg(MANDOCERR_BADARG_BAD, 0, 0, + "-%c %s: %s", option, dir, strerror(errno)); +} + +void +manconf_free(struct manconf *conf) +{ + size_t i; + + for (i = 0; i < conf->manpath.sz; i++) + free(conf->manpath.paths[i]); + + free(conf->manpath.paths); + free(conf->output.includes); + free(conf->output.man); + free(conf->output.paper); + free(conf->output.style); +} + +static void +manconf_file(struct manconf *conf, const char *file) +{ + const char *const toks[] = { "manpath", "output" }; + char manpath_default[] = MANPATH_DEFAULT; + + FILE *stream; + char *line, *cp, *ep; + size_t linesz, tok, toklen; + ssize_t linelen; + + if ((stream = fopen(file, "r")) == NULL) + goto out; + + line = NULL; + linesz = 0; + + while ((linelen = getline(&line, &linesz, stream)) != -1) { + cp = line; + ep = cp + linelen - 1; + while (ep > cp && isspace((unsigned char)*ep)) + *ep-- = '\0'; + while (isspace((unsigned char)*cp)) + cp++; + if (cp == ep || *cp == '#') + continue; + + for (tok = 0; tok < sizeof(toks)/sizeof(toks[0]); tok++) { + toklen = strlen(toks[tok]); + if (cp + toklen < ep && + isspace((unsigned char)cp[toklen]) && + strncmp(cp, toks[tok], toklen) == 0) { + cp += toklen; + while (isspace((unsigned char)*cp)) + cp++; + break; + } + } + + switch (tok) { + case 0: /* manpath */ + manpath_add(&conf->manpath, cp, '\0'); + *manpath_default = '\0'; + break; + case 1: /* output */ + manconf_output(&conf->output, cp, 1); + break; + default: + break; + } + } + free(line); + fclose(stream); + +out: + if (*manpath_default != '\0') + manpath_parseline(&conf->manpath, manpath_default, '\0'); +} + +int +manconf_output(struct manoutput *conf, const char *cp, int fromfile) +{ + const char *const toks[] = { + "includes", "man", "paper", "style", "indent", "width", + "tag", "fragment", "mdoc", "noval", "toc" + }; + const size_t ntoks = sizeof(toks) / sizeof(toks[0]); + + const char *errstr; + char *oldval; + size_t len, tok; + + for (tok = 0; tok < ntoks; tok++) { + len = strlen(toks[tok]); + if (strncmp(cp, toks[tok], len) == 0 && + strchr(" = ", cp[len]) != NULL) { + cp += len; + if (*cp == '=') + cp++; + while (isspace((unsigned char)*cp)) + cp++; + break; + } + } + + if (tok < 6 && *cp == '\0') { + mandoc_msg(MANDOCERR_BADVAL_MISS, 0, 0, "-O %s=?", toks[tok]); + return -1; + } + if (tok > 6 && tok < ntoks && *cp != '\0') { + mandoc_msg(MANDOCERR_BADVAL, 0, 0, "-O %s=%s", toks[tok], cp); + return -1; + } + + switch (tok) { + case 0: + if (conf->includes != NULL) { + oldval = mandoc_strdup(conf->includes); + break; + } + conf->includes = mandoc_strdup(cp); + return 0; + case 1: + if (conf->man != NULL) { + oldval = mandoc_strdup(conf->man); + break; + } + conf->man = mandoc_strdup(cp); + return 0; + case 2: + if (conf->paper != NULL) { + oldval = mandoc_strdup(conf->paper); + break; + } + conf->paper = mandoc_strdup(cp); + return 0; + case 3: + if (conf->style != NULL) { + oldval = mandoc_strdup(conf->style); + break; + } + conf->style = mandoc_strdup(cp); + return 0; + case 4: + if (conf->indent) { + mandoc_asprintf(&oldval, "%zu", conf->indent); + break; + } + conf->indent = strtonum(cp, 0, 1000, &errstr); + if (errstr == NULL) + return 0; + mandoc_msg(MANDOCERR_BADVAL_BAD, 0, 0, + "-O indent=%s is %s", cp, errstr); + return -1; + case 5: + if (conf->width) { + mandoc_asprintf(&oldval, "%zu", conf->width); + break; + } + conf->width = strtonum(cp, 1, 1000, &errstr); + if (errstr == NULL) + return 0; + mandoc_msg(MANDOCERR_BADVAL_BAD, 0, 0, + "-O width=%s is %s", cp, errstr); + return -1; + case 6: + if (conf->tag != NULL) { + oldval = mandoc_strdup(conf->tag); + break; + } + conf->tag = mandoc_strdup(cp); + return 0; + case 7: + conf->fragment = 1; + return 0; + case 8: + conf->mdoc = 1; + return 0; + case 9: + conf->noval = 1; + return 0; + case 10: + conf->toc = 1; + return 0; + default: + mandoc_msg(MANDOCERR_BADARG_BAD, 0, 0, "-O %s", cp); + return -1; + } + if (fromfile) { + free(oldval); + return 0; + } else { + mandoc_msg(MANDOCERR_BADVAL_DUPE, 0, 0, + "-O %s=%s: already set to %s", toks[tok], cp, oldval); + free(oldval); + return -1; + } +} diff --git a/usr.bin/mandoc/mansearch.c b/usr.bin/mandoc/mansearch.c new file mode 100644 index 0000000..7cb3468 --- /dev/null +++ b/usr.bin/mandoc/mansearch.c @@ -0,0 +1,842 @@ +/* $OpenBSD: mansearch.c,v 1.65 2019/07/01 22:43:03 schwarze Exp $ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013-2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/cdefs.h> +#include <sys/mman.h> +#include <sys/types.h> + +#include <assert.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <glob.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "manconf.h" +#include "mansearch.h" +#include "dbm.h" + +struct expr { + /* Used for terms: */ + struct dbm_match match; /* Match type and expression. */ + uint64_t bits; /* Type mask. */ + /* Used for OR and AND groups: */ + struct expr *next; /* Next child in the parent group. */ + struct expr *child; /* First child in this group. */ + enum { EXPR_TERM, EXPR_OR, EXPR_AND } type; +}; + +const char *const mansearch_keynames[KEY_MAX] = { + "arch", "sec", "Xr", "Ar", "Fa", "Fl", "Dv", "Fn", + "Ic", "Pa", "Cm", "Li", "Em", "Cd", "Va", "Ft", + "Tn", "Er", "Ev", "Sy", "Sh", "In", "Ss", "Ox", + "An", "Mt", "St", "Bx", "At", "Nx", "Fx", "Lk", + "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb", "Nm", "Nd" +}; + + +static struct ohash *manmerge(struct expr *, struct ohash *); +static struct ohash *manmerge_term(struct expr *, struct ohash *); +static struct ohash *manmerge_or(struct expr *, struct ohash *); +static struct ohash *manmerge_and(struct expr *, struct ohash *); +static char *buildnames(const struct dbm_page *); +static char *buildoutput(size_t, struct dbm_page *); +static size_t lstlen(const char *, size_t); +static void lstcat(char *, size_t *, const char *, const char *); +static int lstmatch(const char *, const char *); +static struct expr *exprcomp(const struct mansearch *, + int, char *[], int *); +static struct expr *expr_and(const struct mansearch *, + int, char *[], int *); +static struct expr *exprterm(const struct mansearch *, + int, char *[], int *); +static void exprfree(struct expr *); +static int manpage_compare(const void *, const void *); + + +int +mansearch(const struct mansearch *search, + const struct manpaths *paths, + int argc, char *argv[], + struct manpage **res, size_t *sz) +{ + char buf[PATH_MAX]; + struct dbm_res *rp; + struct expr *e; + struct dbm_page *page; + struct manpage *mpage; + struct ohash *htab; + size_t cur, i, maxres, outkey; + unsigned int slot; + int argi, chdir_status, getcwd_status, im; + + argi = 0; + if ((e = exprcomp(search, argc, argv, &argi)) == NULL) { + *sz = 0; + return 0; + } + + cur = maxres = 0; + if (res != NULL) + *res = NULL; + + outkey = KEY_Nd; + if (search->outkey != NULL) + for (im = 0; im < KEY_MAX; im++) + if (0 == strcasecmp(search->outkey, + mansearch_keynames[im])) { + outkey = im; + break; + } + + /* + * Remember the original working directory, if possible. + * This will be needed if the second or a later directory + * is given as a relative path. + * Do not error out if the current directory is not + * searchable: Maybe it won't be needed after all. + */ + + if (getcwd(buf, PATH_MAX) == NULL) { + getcwd_status = 0; + (void)strlcpy(buf, strerror(errno), sizeof(buf)); + } else + getcwd_status = 1; + + /* + * Loop over the directories (containing databases) for us to + * search. + * Don't let missing/bad databases/directories phase us. + * In each, try to open the resident database and, if it opens, + * scan it for our match expression. + */ + + chdir_status = 0; + for (i = 0; i < paths->sz; i++) { + if (chdir_status && paths->paths[i][0] != '/') { + if ( ! getcwd_status) { + warnx("%s: getcwd: %s", paths->paths[i], buf); + continue; + } else if (chdir(buf) == -1) { + warn("%s", buf); + continue; + } + } + if (chdir(paths->paths[i]) == -1) { + warn("%s", paths->paths[i]); + continue; + } + chdir_status = 1; + + if (dbm_open(MANDOC_DB) == -1) { + if (errno != ENOENT) + warn("%s/%s", paths->paths[i], MANDOC_DB); + continue; + } + + if ((htab = manmerge(e, NULL)) == NULL) { + dbm_close(); + continue; + } + + for (rp = ohash_first(htab, &slot); rp != NULL; + rp = ohash_next(htab, &slot)) { + page = dbm_page_get(rp->page); + + if (lstmatch(search->sec, page->sect) == 0 || + lstmatch(search->arch, page->arch) == 0 || + (search->argmode == ARG_NAME && + rp->bits <= (int32_t)(NAME_SYN & NAME_MASK))) + continue; + + if (res == NULL) { + cur = 1; + break; + } + if (cur + 1 > maxres) { + maxres += 1024; + *res = mandoc_reallocarray(*res, + maxres, sizeof(**res)); + } + mpage = *res + cur; + mandoc_asprintf(&mpage->file, "%s/%s", + paths->paths[i], page->file + 1); + if (access(chdir_status ? page->file + 1 : + mpage->file, R_OK) == -1) { + warn("%s", mpage->file); + warnx("outdated mandoc.db contains " + "bogus %s entry, run makewhatis %s", + page->file + 1, paths->paths[i]); + free(mpage->file); + free(rp); + continue; + } + mpage->names = buildnames(page); + mpage->output = buildoutput(outkey, page); + mpage->bits = search->firstmatch ? rp->bits : 0; + mpage->ipath = i; + mpage->sec = *page->sect - '0'; + if (mpage->sec < 0 || mpage->sec > 9) + mpage->sec = 10; + mpage->form = *page->file; + free(rp); + cur++; + } + ohash_delete(htab); + free(htab); + dbm_close(); + + /* + * In man(1) mode, prefer matches in earlier trees + * over matches in later trees. + */ + + if (cur && search->firstmatch) + break; + } + if (res != NULL) + qsort(*res, cur, sizeof(struct manpage), manpage_compare); + if (chdir_status && getcwd_status && chdir(buf) == -1) + warn("%s", buf); + exprfree(e); + *sz = cur; + return res != NULL || cur; +} + +/* + * Merge the results for the expression tree rooted at e + * into the the result list htab. + */ +static struct ohash * +manmerge(struct expr *e, struct ohash *htab) +{ + switch (e->type) { + case EXPR_TERM: + return manmerge_term(e, htab); + case EXPR_OR: + return manmerge_or(e->child, htab); + case EXPR_AND: + return manmerge_and(e->child, htab); + default: + abort(); + } +} + +static struct ohash * +manmerge_term(struct expr *e, struct ohash *htab) +{ + struct dbm_res res, *rp; + uint64_t ib; + unsigned int slot; + int im; + + if (htab == NULL) { + htab = mandoc_malloc(sizeof(*htab)); + mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page)); + } + + for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) { + if ((e->bits & ib) == 0) + continue; + + switch (ib) { + case TYPE_arch: + dbm_page_byarch(&e->match); + break; + case TYPE_sec: + dbm_page_bysect(&e->match); + break; + case TYPE_Nm: + dbm_page_byname(&e->match); + break; + case TYPE_Nd: + dbm_page_bydesc(&e->match); + break; + default: + dbm_page_bymacro(im - 2, &e->match); + break; + } + + /* + * When hashing for deduplication, use the unique + * page ID itself instead of a hash function; + * that is quite efficient. + */ + + for (;;) { + res = dbm_page_next(); + if (res.page == -1) + break; + slot = ohash_lookup_memory(htab, + (char *)&res, sizeof(res.page), res.page); + if ((rp = ohash_find(htab, slot)) != NULL) { + rp->bits |= res.bits; + continue; + } + rp = mandoc_malloc(sizeof(*rp)); + *rp = res; + ohash_insert(htab, slot, rp); + } + } + return htab; +} + +static struct ohash * +manmerge_or(struct expr *e, struct ohash *htab) +{ + while (e != NULL) { + htab = manmerge(e, htab); + e = e->next; + } + return htab; +} + +static struct ohash * +manmerge_and(struct expr *e, struct ohash *htab) +{ + struct ohash *hand, *h1, *h2; + struct dbm_res *res; + unsigned int slot1, slot2; + + /* Evaluate the first term of the AND clause. */ + + hand = manmerge(e, NULL); + + while ((e = e->next) != NULL) { + + /* Evaluate the next term and prepare for ANDing. */ + + h2 = manmerge(e, NULL); + if (ohash_entries(h2) < ohash_entries(hand)) { + h1 = h2; + h2 = hand; + } else + h1 = hand; + hand = mandoc_malloc(sizeof(*hand)); + mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page)); + + /* Keep all pages that are in both result sets. */ + + for (res = ohash_first(h1, &slot1); res != NULL; + res = ohash_next(h1, &slot1)) { + if (ohash_find(h2, ohash_lookup_memory(h2, + (char *)res, sizeof(res->page), + res->page)) == NULL) + free(res); + else + ohash_insert(hand, ohash_lookup_memory(hand, + (char *)res, sizeof(res->page), + res->page), res); + } + + /* Discard the merged results. */ + + for (res = ohash_first(h2, &slot2); res != NULL; + res = ohash_next(h2, &slot2)) + free(res); + ohash_delete(h2); + free(h2); + ohash_delete(h1); + free(h1); + } + + /* Merge the result of the AND into htab. */ + + if (htab == NULL) + return hand; + + for (res = ohash_first(hand, &slot1); res != NULL; + res = ohash_next(hand, &slot1)) { + slot2 = ohash_lookup_memory(htab, + (char *)res, sizeof(res->page), res->page); + if (ohash_find(htab, slot2) == NULL) + ohash_insert(htab, slot2, res); + else + free(res); + } + + /* Discard the merged result. */ + + ohash_delete(hand); + free(hand); + return htab; +} + +void +mansearch_free(struct manpage *res, size_t sz) +{ + size_t i; + + for (i = 0; i < sz; i++) { + free(res[i].file); + free(res[i].names); + free(res[i].output); + } + free(res); +} + +static int +manpage_compare(const void *vp1, const void *vp2) +{ + const struct manpage *mp1, *mp2; + const char *cp1, *cp2; + size_t sz1, sz2; + int diff; + + mp1 = vp1; + mp2 = vp2; + if ((diff = mp2->bits - mp1->bits) || + (diff = mp1->sec - mp2->sec)) + return diff; + + /* Fall back to alphabetic ordering of names. */ + sz1 = strcspn(mp1->names, "("); + sz2 = strcspn(mp2->names, "("); + if (sz1 < sz2) + sz1 = sz2; + if ((diff = strncasecmp(mp1->names, mp2->names, sz1))) + return diff; + + /* For identical names and sections, prefer arch-dependent. */ + cp1 = strchr(mp1->names + sz1, '/'); + cp2 = strchr(mp2->names + sz2, '/'); + return cp1 != NULL && cp2 != NULL ? strcasecmp(cp1, cp2) : + cp1 != NULL ? -1 : cp2 != NULL ? 1 : 0; +} + +static char * +buildnames(const struct dbm_page *page) +{ + char *buf; + size_t i, sz; + + sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) + + (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2; + buf = mandoc_malloc(sz); + i = 0; + lstcat(buf, &i, page->name, ", "); + buf[i++] = '('; + lstcat(buf, &i, page->sect, ", "); + if (page->arch != NULL) { + buf[i++] = '/'; + lstcat(buf, &i, page->arch, ", "); + } + buf[i++] = ')'; + buf[i++] = '\0'; + assert(i == sz); + return buf; +} + +/* + * Count the buffer space needed to print the NUL-terminated + * list of NUL-terminated strings, when printing sep separator + * characters between strings. + */ +static size_t +lstlen(const char *cp, size_t sep) +{ + size_t sz; + + for (sz = 0; *cp != '\0'; cp++) { + + /* Skip names appearing only in the SYNOPSIS. */ + if (*cp <= (char)(NAME_SYN & NAME_MASK)) { + while (*cp != '\0') + cp++; + continue; + } + + /* Skip name class markers. */ + if (*cp < ' ') + cp++; + + /* Print a separator before each but the first string. */ + if (sz) + sz += sep; + + /* Copy one string. */ + while (*cp != '\0') { + sz++; + cp++; + } + } + return sz; +} + +/* + * Print the NUL-terminated list of NUL-terminated strings + * into the buffer, seperating strings with sep. + */ +static void +lstcat(char *buf, size_t *i, const char *cp, const char *sep) +{ + const char *s; + size_t i_start; + + for (i_start = *i; *cp != '\0'; cp++) { + + /* Skip names appearing only in the SYNOPSIS. */ + if (*cp <= (char)(NAME_SYN & NAME_MASK)) { + while (*cp != '\0') + cp++; + continue; + } + + /* Skip name class markers. */ + if (*cp < ' ') + cp++; + + /* Print a separator before each but the first string. */ + if (*i > i_start) { + s = sep; + while (*s != '\0') + buf[(*i)++] = *s++; + } + + /* Copy one string. */ + while (*cp != '\0') + buf[(*i)++] = *cp++; + } + +} + +/* + * Return 1 if the string *want occurs in any of the strings + * in the NUL-terminated string list *have, or 0 otherwise. + * If either argument is NULL or empty, assume no filtering + * is desired and return 1. + */ +static int +lstmatch(const char *want, const char *have) +{ + if (want == NULL || have == NULL || *have == '\0') + return 1; + while (*have != '\0') { + if (strcasestr(have, want) != NULL) + return 1; + have = strchr(have, '\0') + 1; + } + return 0; +} + +/* + * Build a list of values taken by the macro im in the manual page. + */ +static char * +buildoutput(size_t im, struct dbm_page *page) +{ + const char *oldoutput, *sep, *input; + char *output, *newoutput, *value; + size_t sz, i; + + switch (im) { + case KEY_Nd: + return mandoc_strdup(page->desc); + case KEY_Nm: + input = page->name; + break; + case KEY_sec: + input = page->sect; + break; + case KEY_arch: + input = page->arch; + if (input == NULL) + input = "all\0"; + break; + default: + input = NULL; + break; + } + + if (input != NULL) { + sz = lstlen(input, 3) + 1; + output = mandoc_malloc(sz); + i = 0; + lstcat(output, &i, input, " # "); + output[i++] = '\0'; + assert(i == sz); + return output; + } + + output = NULL; + dbm_macro_bypage(im - 2, page->addr); + while ((value = dbm_macro_next()) != NULL) { + if (output == NULL) { + oldoutput = ""; + sep = ""; + } else { + oldoutput = output; + sep = " # "; + } + mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value); + free(output); + output = newoutput; + } + return output; +} + +/* + * Compile a set of string tokens into an expression. + * Tokens in "argv" are assumed to be individual expression atoms (e.g., + * "(", "foo=bar", etc.). + */ +static struct expr * +exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi) +{ + struct expr *parent, *child; + int needterm, nested; + + if ((nested = *argi) == argc) + return NULL; + needterm = 1; + parent = child = NULL; + while (*argi < argc) { + if (strcmp(")", argv[*argi]) == 0) { + if (needterm) + warnx("missing term " + "before closing parenthesis"); + needterm = 0; + if (nested) + break; + warnx("ignoring unmatched right parenthesis"); + ++*argi; + continue; + } + if (strcmp("-o", argv[*argi]) == 0) { + if (needterm) { + if (*argi > 0) + warnx("ignoring -o after %s", + argv[*argi - 1]); + else + warnx("ignoring initial -o"); + } + needterm = 1; + ++*argi; + continue; + } + needterm = 0; + if (child == NULL) { + child = expr_and(search, argc, argv, argi); + continue; + } + if (parent == NULL) { + parent = mandoc_calloc(1, sizeof(*parent)); + parent->type = EXPR_OR; + parent->next = NULL; + parent->child = child; + } + child->next = expr_and(search, argc, argv, argi); + child = child->next; + } + if (needterm && *argi) + warnx("ignoring trailing %s", argv[*argi - 1]); + return parent == NULL ? child : parent; +} + +static struct expr * +expr_and(const struct mansearch *search, int argc, char *argv[], int *argi) +{ + struct expr *parent, *child; + int needterm; + + needterm = 1; + parent = child = NULL; + while (*argi < argc) { + if (strcmp(")", argv[*argi]) == 0) { + if (needterm) + warnx("missing term " + "before closing parenthesis"); + needterm = 0; + break; + } + if (strcmp("-o", argv[*argi]) == 0) + break; + if (strcmp("-a", argv[*argi]) == 0) { + if (needterm) { + if (*argi > 0) + warnx("ignoring -a after %s", + argv[*argi - 1]); + else + warnx("ignoring initial -a"); + } + needterm = 1; + ++*argi; + continue; + } + if (needterm == 0) + break; + if (child == NULL) { + child = exprterm(search, argc, argv, argi); + if (child != NULL) + needterm = 0; + continue; + } + needterm = 0; + if (parent == NULL) { + parent = mandoc_calloc(1, sizeof(*parent)); + parent->type = EXPR_AND; + parent->next = NULL; + parent->child = child; + } + child->next = exprterm(search, argc, argv, argi); + if (child->next != NULL) { + child = child->next; + needterm = 0; + } + } + if (needterm && *argi) + warnx("ignoring trailing %s", argv[*argi - 1]); + return parent == NULL ? child : parent; +} + +static struct expr * +exprterm(const struct mansearch *search, int argc, char *argv[], int *argi) +{ + char errbuf[BUFSIZ]; + struct expr *e; + char *key, *val; + uint64_t iterbit; + int cs, i, irc; + + if (strcmp("(", argv[*argi]) == 0) { + ++*argi; + e = exprcomp(search, argc, argv, argi); + if (*argi < argc) { + assert(strcmp(")", argv[*argi]) == 0); + ++*argi; + } else + warnx("unclosed parenthesis"); + return e; + } + + if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) { + cs = 0; + ++*argi; + } else + cs = 1; + + e = mandoc_calloc(1, sizeof(*e)); + e->type = EXPR_TERM; + e->bits = 0; + e->next = NULL; + e->child = NULL; + + if (search->argmode == ARG_NAME) { + e->bits = TYPE_Nm; + e->match.type = DBM_EXACT; + e->match.str = argv[(*argi)++]; + return e; + } + + /* + * Separate macro keys from search string. + * If needed, request regular expression handling. + */ + + if (search->argmode == ARG_WORD) { + e->bits = TYPE_Nm; + e->match.type = DBM_REGEX; + mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]); + cs = 0; + } else if ((val = strpbrk(argv[*argi], "=~")) == NULL) { + e->bits = TYPE_Nm | TYPE_Nd; + e->match.type = DBM_REGEX; + val = argv[*argi]; + cs = 0; + } else { + if (val == argv[*argi]) + e->bits = TYPE_Nm | TYPE_Nd; + if (*val == '=') { + e->match.type = DBM_SUB; + e->match.str = val + 1; + } else + e->match.type = DBM_REGEX; + *val++ = '\0'; + if (strstr(argv[*argi], "arch") != NULL) + cs = 0; + } + + /* Compile regular expressions. */ + + if (e->match.type == DBM_REGEX) { + e->match.re = mandoc_malloc(sizeof(*e->match.re)); + irc = regcomp(e->match.re, val, + REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)); + if (irc) { + regerror(irc, e->match.re, errbuf, sizeof(errbuf)); + warnx("regcomp /%s/: %s", val, errbuf); + } + if (search->argmode == ARG_WORD) + free(val); + if (irc) { + free(e->match.re); + free(e); + ++*argi; + return NULL; + } + } + + if (e->bits) { + ++*argi; + return e; + } + + /* + * Parse out all possible fields. + * If the field doesn't resolve, bail. + */ + + while (NULL != (key = strsep(&argv[*argi], ","))) { + if ('\0' == *key) + continue; + for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) { + if (0 == strcasecmp(key, mansearch_keynames[i])) { + e->bits |= iterbit; + break; + } + } + if (i == KEY_MAX) { + if (strcasecmp(key, "any")) + warnx("treating unknown key " + "\"%s\" as \"any\"", key); + e->bits |= ~0ULL; + } + } + + ++*argi; + return e; +} + +static void +exprfree(struct expr *e) +{ + if (e->next != NULL) + exprfree(e->next); + if (e->child != NULL) + exprfree(e->child); + free(e); +} diff --git a/usr.bin/mandoc/mansearch.h b/usr.bin/mandoc/mansearch.h new file mode 100644 index 0000000..c2efe7c --- /dev/null +++ b/usr.bin/mandoc/mansearch.h @@ -0,0 +1,118 @@ +/* $OpenBSD: mansearch.h,v 1.24 2019/04/30 18:48:26 schwarze Exp $ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define MANDOC_DB "mandoc.db" +#define MANDOCDB_MAGIC 0x3a7d0cdb +#define MANDOCDB_VERSION 1 + +#define MACRO_MAX 36 +#define KEY_arch 0 +#define KEY_sec 1 +#define KEY_Nm 38 +#define KEY_Nd 39 +#define KEY_MAX 40 + +#define TYPE_arch 0x0000000000000001ULL +#define TYPE_sec 0x0000000000000002ULL +#define TYPE_Xr 0x0000000000000004ULL +#define TYPE_Ar 0x0000000000000008ULL +#define TYPE_Fa 0x0000000000000010ULL +#define TYPE_Fl 0x0000000000000020ULL +#define TYPE_Dv 0x0000000000000040ULL +#define TYPE_Fn 0x0000000000000080ULL +#define TYPE_Ic 0x0000000000000100ULL +#define TYPE_Pa 0x0000000000000200ULL +#define TYPE_Cm 0x0000000000000400ULL +#define TYPE_Li 0x0000000000000800ULL +#define TYPE_Em 0x0000000000001000ULL +#define TYPE_Cd 0x0000000000002000ULL +#define TYPE_Va 0x0000000000004000ULL +#define TYPE_Ft 0x0000000000008000ULL +#define TYPE_Tn 0x0000000000010000ULL +#define TYPE_Er 0x0000000000020000ULL +#define TYPE_Ev 0x0000000000040000ULL +#define TYPE_Sy 0x0000000000080000ULL +#define TYPE_Sh 0x0000000000100000ULL +#define TYPE_In 0x0000000000200000ULL +#define TYPE_Ss 0x0000000000400000ULL +#define TYPE_Ox 0x0000000000800000ULL +#define TYPE_An 0x0000000001000000ULL +#define TYPE_Mt 0x0000000002000000ULL +#define TYPE_St 0x0000000004000000ULL +#define TYPE_Bx 0x0000000008000000ULL +#define TYPE_At 0x0000000010000000ULL +#define TYPE_Nx 0x0000000020000000ULL +#define TYPE_Fx 0x0000000040000000ULL +#define TYPE_Lk 0x0000000080000000ULL +#define TYPE_Ms 0x0000000100000000ULL +#define TYPE_Bsx 0x0000000200000000ULL +#define TYPE_Dx 0x0000000400000000ULL +#define TYPE_Rs 0x0000000800000000ULL +#define TYPE_Vt 0x0000001000000000ULL +#define TYPE_Lb 0x0000002000000000ULL +#define TYPE_Nm 0x0000004000000000ULL +#define TYPE_Nd 0x0000008000000000ULL + +#define NAME_SYN 0x0000004000000001ULL +#define NAME_FIRST 0x0000004000000004ULL +#define NAME_TITLE 0x0000004000000006ULL +#define NAME_HEAD 0x0000004000000008ULL +#define NAME_FILE 0x0000004000000010ULL +#define NAME_MASK 0x000000000000001fULL + +enum form { + FORM_SRC = 1, /* Format is mdoc(7) or man(7). */ + FORM_CAT, /* Manual page is preformatted. */ + FORM_NONE /* Format is unknown. */ +}; + +enum argmode { + ARG_FILE = 0, + ARG_NAME, + ARG_WORD, + ARG_EXPR +}; + +struct manpage { + char *file; /* to be prefixed by manpath */ + char *names; /* a list of names with sections */ + char *output; /* user-defined additional output */ + uint64_t bits; /* name type mask */ + size_t ipath; /* number of the manpath */ + int sec; /* section number, 10 means invalid */ + enum form form; +}; + +struct mansearch { + const char *arch; /* architecture/NULL */ + const char *sec; /* mansection/NULL */ + const char *outkey; /* show content of this macro */ + enum argmode argmode; /* interpretation of arguments */ + int firstmatch; /* first matching database only */ +}; + + +struct manpaths; + +int mansearch(const struct mansearch *cfg, /* options */ + const struct manpaths *paths, /* manpaths */ + int argc, /* size of argv */ + char *argv[], /* search terms */ + struct manpage **res, /* results */ + size_t *ressz); /* results returned */ +void mansearch_free(struct manpage *, size_t); diff --git a/usr.bin/mandoc/mdoc.c b/usr.bin/mandoc/mdoc.c new file mode 100644 index 0000000..7920985 --- /dev/null +++ b/usr.bin/mandoc/mdoc.c @@ -0,0 +1,431 @@ +/* $OpenBSD: mdoc.c,v 1.164 2020/04/06 09:55:49 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2012-2018, 2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Top level and utility functions of the mdoc(7) parser for mandoc(1). + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +const char *const __mdoc_argnames[MDOC_ARG_MAX] = { + "split", "nosplit", "ragged", + "unfilled", "literal", "file", + "offset", "bullet", "dash", + "hyphen", "item", "enum", + "tag", "diag", "hang", + "ohang", "inset", "column", + "width", "compact", "std", + "filled", "words", "emphasis", + "symbolic", "nested", "centered" +}; +const char * const *mdoc_argnames = __mdoc_argnames; + +static int mdoc_ptext(struct roff_man *, int, char *, int); +static int mdoc_pmacro(struct roff_man *, int, char *, int); + + +/* + * Main parse routine. Parses a single line -- really just hands off to + * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). + */ +int +mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) +{ + + if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) + mdoc->flags |= MDOC_NEWLINE; + + /* + * Let the roff nS register switch SYNOPSIS mode early, + * such that the parser knows at all times + * whether this mode is on or off. + * Note that this mode is also switched by the Sh macro. + */ + if (roff_getreg(mdoc->roff, "nS")) + mdoc->flags |= MDOC_SYNOPSIS; + else + mdoc->flags &= ~MDOC_SYNOPSIS; + + return roff_getcontrol(mdoc->roff, buf, &offs) ? + mdoc_pmacro(mdoc, ln, buf, offs) : + mdoc_ptext(mdoc, ln, buf, offs); +} + +void +mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok) +{ + struct roff_node *p; + + p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_CHILD; +} + +struct roff_node * +mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, + enum roff_tok tok, struct roff_node *body) +{ + struct roff_node *p; + + body->flags |= NODE_ENDED; + body->parent->flags |= NODE_ENDED; + p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); + p->body = body; + p->norm = body->norm; + p->end = ENDBODY_SPACE; + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_SIBLING; + return p; +} + +struct roff_node * +mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, + enum roff_tok tok, struct mdoc_arg *args) +{ + struct roff_node *p; + + p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); + p->args = args; + if (p->args) + (args->refcnt)++; + + switch (tok) { + case MDOC_Bd: + case MDOC_Bf: + case MDOC_Bl: + case MDOC_En: + case MDOC_Rs: + p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); + break; + default: + break; + } + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_CHILD; + return p; +} + +void +mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, + enum roff_tok tok, struct mdoc_arg *args) +{ + struct roff_node *p; + + p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); + p->args = args; + if (p->args) + (args->refcnt)++; + + switch (tok) { + case MDOC_An: + p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); + break; + default: + break; + } + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_CHILD; +} + +/* + * Parse free-form text, that is, a line that does not begin with the + * control character. + */ +static int +mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) +{ + struct roff_node *n; + const char *cp, *sp; + char *c, *ws, *end; + + n = mdoc->last; + + /* + * If a column list contains plain text, assume an implicit item + * macro. This can happen one or more times at the beginning + * of such a list, intermixed with non-It mdoc macros and with + * nodes generated on the roff level, for example by tbl. + */ + + if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && + n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || + (n->parent != NULL && n->parent->tok == MDOC_Bl && + n->parent->norm->Bl.type == LIST_column)) { + mdoc->flags |= MDOC_FREECOL; + (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, + line, offs, &offs, buf); + return 1; + } + + /* + * Search for the beginning of unescaped trailing whitespace (ws) + * and for the first character not to be output (end). + */ + + /* FIXME: replace with strcspn(). */ + ws = NULL; + for (c = end = buf + offs; *c; c++) { + switch (*c) { + case ' ': + if (NULL == ws) + ws = c; + continue; + case '\t': + /* + * Always warn about trailing tabs, + * even outside literal context, + * where they should be put on the next line. + */ + if (NULL == ws) + ws = c; + /* + * Strip trailing tabs in literal context only; + * outside, they affect the next line. + */ + if (mdoc->flags & ROFF_NOFILL) + continue; + break; + case '\\': + /* Skip the escaped character, too, if any. */ + if (c[1]) + c++; + /* FALLTHROUGH */ + default: + ws = NULL; + break; + } + end = c + 1; + } + *end = '\0'; + + if (ws) + mandoc_msg(MANDOCERR_SPACE_EOL, line, (int)(ws - buf), NULL); + + /* + * Blank lines are allowed in no-fill mode + * and cancel preceding \c, + * but add a single vertical space elsewhere. + */ + + if (buf[offs] == '\0' && (mdoc->flags & ROFF_NOFILL) == 0) { + switch (mdoc->last->type) { + case ROFFT_TEXT: + sp = mdoc->last->string; + cp = end = strchr(sp, '\0') - 2; + if (cp < sp || cp[0] != '\\' || cp[1] != 'c') + break; + while (cp > sp && cp[-1] == '\\') + cp--; + if ((end - cp) % 2) + break; + *end = '\0'; + return 1; + default: + break; + } + mandoc_msg(MANDOCERR_FI_BLANK, line, (int)(c - buf), NULL); + roff_elem_alloc(mdoc, line, offs, ROFF_sp); + mdoc->last->flags |= NODE_VALID | NODE_ENDED; + mdoc->next = ROFF_NEXT_SIBLING; + return 1; + } + + roff_word_alloc(mdoc, line, offs, buf+offs); + + if (mdoc->flags & ROFF_NOFILL) + return 1; + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(buf < end); + + if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) + mdoc->last->flags |= NODE_EOS; + + for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { + if (c - buf < offs + 2) + continue; + if (end - c < 3) + break; + if (c[1] != ' ' || + isalnum((unsigned char)c[-2]) == 0 || + isalnum((unsigned char)c[-1]) == 0 || + (c[-2] == 'n' && c[-1] == 'c') || + (c[-2] == 'v' && c[-1] == 's')) + continue; + c += 2; + if (*c == ' ') + c++; + if (*c == ' ') + c++; + if (isupper((unsigned char)(*c))) + mandoc_msg(MANDOCERR_EOS, line, (int)(c - buf), NULL); + } + + return 1; +} + +/* + * Parse a macro line, that is, a line beginning with the control + * character. + */ +static int +mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) +{ + struct roff_node *n; + const char *cp; + size_t sz; + enum roff_tok tok; + int sv; + + /* Determine the line macro. */ + + sv = offs; + tok = TOKEN_NONE; + for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) + offs++; + if (sz == 2 || sz == 3) + tok = roffhash_find(mdoc->mdocmac, buf + sv, sz); + if (tok == TOKEN_NONE) { + mandoc_msg(MANDOCERR_MACRO, ln, sv, "%s", buf + sv - 1); + return 1; + } + + /* Skip a leading escape sequence or tab. */ + + switch (buf[offs]) { + case '\\': + cp = buf + offs + 1; + mandoc_escape(&cp, NULL, NULL); + offs = cp - buf; + break; + case '\t': + offs++; + break; + default: + break; + } + + /* Jump to the next non-whitespace word. */ + + while (buf[offs] == ' ') + offs++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if ('\0' == buf[offs] && ' ' == buf[offs - 1]) + mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL); + + /* + * If an initial or transparent macro or a list invocation, + * divert directly into macro processing. + */ + + n = mdoc->last; + if (n == NULL || tok == MDOC_It || tok == MDOC_El || + roff_tok_transparent(tok)) { + (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf); + return 1; + } + + /* + * If a column list contains a non-It macro, assume an implicit + * item macro. This can happen one or more times at the + * beginning of such a list, intermixed with text lines and + * with nodes generated on the roff level, for example by tbl. + */ + + if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && + n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || + (n->parent != NULL && n->parent->tok == MDOC_Bl && + n->parent->norm->Bl.type == LIST_column)) { + mdoc->flags |= MDOC_FREECOL; + (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf); + return 1; + } + + /* Normal processing of a macro. */ + + (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf); + + /* In quick mode (for mandocdb), abort after the NAME section. */ + + if (mdoc->quick && MDOC_Sh == tok && + SEC_NAME != mdoc->last->sec) + return 2; + + return 1; +} + +enum mdelim +mdoc_isdelim(const char *p) +{ + + if ('\0' == p[0]) + return DELIM_NONE; + + if ('\0' == p[1]) + switch (p[0]) { + case '(': + case '[': + return DELIM_OPEN; + case '|': + return DELIM_MIDDLE; + case '.': + case ',': + case ';': + case ':': + case '?': + case '!': + case ')': + case ']': + return DELIM_CLOSE; + default: + return DELIM_NONE; + } + + if ('\\' != p[0]) + return DELIM_NONE; + + if (0 == strcmp(p + 1, ".")) + return DELIM_CLOSE; + if (0 == strcmp(p + 1, "fR|\\fP")) + return DELIM_MIDDLE; + + return DELIM_NONE; +} diff --git a/usr.bin/mandoc/mdoc.h b/usr.bin/mandoc/mdoc.h new file mode 100644 index 0000000..aa4a5ec --- /dev/null +++ b/usr.bin/mandoc/mdoc.h @@ -0,0 +1,158 @@ +/* $OpenBSD: mdoc.h,v 1.71 2018/12/30 00:48:47 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct roff_node; +struct roff_man; + +enum mdocargt { + MDOC_Split, /* -split */ + MDOC_Nosplit, /* -nospli */ + MDOC_Ragged, /* -ragged */ + MDOC_Unfilled, /* -unfilled */ + MDOC_Literal, /* -literal */ + MDOC_File, /* -file */ + MDOC_Offset, /* -offset */ + MDOC_Bullet, /* -bullet */ + MDOC_Dash, /* -dash */ + MDOC_Hyphen, /* -hyphen */ + MDOC_Item, /* -item */ + MDOC_Enum, /* -enum */ + MDOC_Tag, /* -tag */ + MDOC_Diag, /* -diag */ + MDOC_Hang, /* -hang */ + MDOC_Ohang, /* -ohang */ + MDOC_Inset, /* -inset */ + MDOC_Column, /* -column */ + MDOC_Width, /* -width */ + MDOC_Compact, /* -compact */ + MDOC_Std, /* -std */ + MDOC_Filled, /* -filled */ + MDOC_Words, /* -words */ + MDOC_Emphasis, /* -emphasis */ + MDOC_Symbolic, /* -symbolic */ + MDOC_Nested, /* -nested */ + MDOC_Centred, /* -centered */ + MDOC_ARG_MAX +}; + +/* + * An argument to a macro (multiple values = `-column xxx yyy'). + */ +struct mdoc_argv { + enum mdocargt arg; /* type of argument */ + int line; + int pos; + size_t sz; /* elements in "value" */ + char **value; /* argument strings */ +}; + +/* + * Reference-counted macro arguments. These are refcounted because + * blocks have multiple instances of the same arguments spread across + * the HEAD, BODY, TAIL, and BLOCK node types. + */ +struct mdoc_arg { + size_t argc; + struct mdoc_argv *argv; + unsigned int refcnt; +}; + +enum mdoc_list { + LIST__NONE = 0, + LIST_bullet, /* -bullet */ + LIST_column, /* -column */ + LIST_dash, /* -dash */ + LIST_diag, /* -diag */ + LIST_enum, /* -enum */ + LIST_hang, /* -hang */ + LIST_hyphen, /* -hyphen */ + LIST_inset, /* -inset */ + LIST_item, /* -item */ + LIST_ohang, /* -ohang */ + LIST_tag, /* -tag */ + LIST_MAX +}; + +enum mdoc_disp { + DISP__NONE = 0, + DISP_centered, /* -centered */ + DISP_ragged, /* -ragged */ + DISP_unfilled, /* -unfilled */ + DISP_filled, /* -filled */ + DISP_literal /* -literal */ +}; + +enum mdoc_auth { + AUTH__NONE = 0, + AUTH_split, /* -split */ + AUTH_nosplit /* -nosplit */ +}; + +enum mdoc_font { + FONT__NONE = 0, + FONT_Em, /* Em, -emphasis */ + FONT_Li, /* Li, -literal */ + FONT_Sy /* Sy, -symbolic */ +}; + +struct mdoc_bd { + const char *offs; /* -offset */ + enum mdoc_disp type; /* -ragged, etc. */ + int comp; /* -compact */ +}; + +struct mdoc_bl { + const char *width; /* -width */ + const char *offs; /* -offset */ + enum mdoc_list type; /* -tag, -enum, etc. */ + int comp; /* -compact */ + size_t ncols; /* -column arg count */ + const char **cols; /* -column val ptr */ + int count; /* -enum counter */ +}; + +struct mdoc_bf { + enum mdoc_font font; /* font */ +}; + +struct mdoc_an { + enum mdoc_auth auth; /* -split, etc. */ +}; + +struct mdoc_rs { + int quote_T; /* whether to quote %T */ +}; + +/* + * Consists of normalised node arguments. These should be used instead + * of iterating through the mdoc_arg pointers of a node: defaults are + * provided, etc. + */ +union mdoc_data { + struct mdoc_an An; + struct mdoc_bd Bd; + struct mdoc_bf Bf; + struct mdoc_bl Bl; + struct roff_node *Es; + struct mdoc_rs Rs; +}; + +/* Names of macro args. Index is enum mdocargt. */ +extern const char *const *mdoc_argnames; + +void mdoc_validate(struct roff_man *); diff --git a/usr.bin/mandoc/mdoc_argv.c b/usr.bin/mandoc/mdoc_argv.c new file mode 100644 index 0000000..bacac34 --- /dev/null +++ b/usr.bin/mandoc/mdoc_argv.c @@ -0,0 +1,680 @@ +/* $OpenBSD: mdoc_argv.c,v 1.76 2019/07/11 16:56:52 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2012, 2014-2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +#define MULTI_STEP 5 /* pre-allocate argument values */ +#define DELIMSZ 6 /* max possible size of a delimiter */ + +enum argsflag { + ARGSFL_NONE = 0, + ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ + ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ +}; + +enum argvflag { + ARGV_NONE, /* no args to flag (e.g., -split) */ + ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ + ARGV_MULTI /* multiple args (e.g., -column xxx yyy) */ +}; + +struct mdocarg { + enum argsflag flags; + const enum mdocargt *argvs; +}; + +static void argn_free(struct mdoc_arg *, int); +static enum margserr args(struct roff_man *, int, int *, + char *, enum argsflag, char **); +static int args_checkpunct(const char *, int); +static void argv_multi(struct roff_man *, int, + struct mdoc_argv *, int *, char *); +static void argv_single(struct roff_man *, int, + struct mdoc_argv *, int *, char *); + +static const enum argvflag argvflags[MDOC_ARG_MAX] = { + ARGV_NONE, /* MDOC_Split */ + ARGV_NONE, /* MDOC_Nosplit */ + ARGV_NONE, /* MDOC_Ragged */ + ARGV_NONE, /* MDOC_Unfilled */ + ARGV_NONE, /* MDOC_Literal */ + ARGV_SINGLE, /* MDOC_File */ + ARGV_SINGLE, /* MDOC_Offset */ + ARGV_NONE, /* MDOC_Bullet */ + ARGV_NONE, /* MDOC_Dash */ + ARGV_NONE, /* MDOC_Hyphen */ + ARGV_NONE, /* MDOC_Item */ + ARGV_NONE, /* MDOC_Enum */ + ARGV_NONE, /* MDOC_Tag */ + ARGV_NONE, /* MDOC_Diag */ + ARGV_NONE, /* MDOC_Hang */ + ARGV_NONE, /* MDOC_Ohang */ + ARGV_NONE, /* MDOC_Inset */ + ARGV_MULTI, /* MDOC_Column */ + ARGV_SINGLE, /* MDOC_Width */ + ARGV_NONE, /* MDOC_Compact */ + ARGV_NONE, /* MDOC_Std */ + ARGV_NONE, /* MDOC_Filled */ + ARGV_NONE, /* MDOC_Words */ + ARGV_NONE, /* MDOC_Emphasis */ + ARGV_NONE, /* MDOC_Symbolic */ + ARGV_NONE /* MDOC_Symbolic */ +}; + +static const enum mdocargt args_Ex[] = { + MDOC_Std, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_An[] = { + MDOC_Split, + MDOC_Nosplit, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bd[] = { + MDOC_Ragged, + MDOC_Unfilled, + MDOC_Filled, + MDOC_Literal, + MDOC_File, + MDOC_Offset, + MDOC_Compact, + MDOC_Centred, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bf[] = { + MDOC_Emphasis, + MDOC_Literal, + MDOC_Symbolic, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bk[] = { + MDOC_Words, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bl[] = { + MDOC_Bullet, + MDOC_Dash, + MDOC_Hyphen, + MDOC_Item, + MDOC_Enum, + MDOC_Tag, + MDOC_Diag, + MDOC_Hang, + MDOC_Ohang, + MDOC_Inset, + MDOC_Column, + MDOC_Width, + MDOC_Offset, + MDOC_Compact, + MDOC_Nested, + MDOC_ARG_MAX +}; + +static const struct mdocarg mdocargs[MDOC_MAX - MDOC_Dd] = { + { ARGSFL_NONE, NULL }, /* Dd */ + { ARGSFL_NONE, NULL }, /* Dt */ + { ARGSFL_NONE, NULL }, /* Os */ + { ARGSFL_NONE, NULL }, /* Sh */ + { ARGSFL_NONE, NULL }, /* Ss */ + { ARGSFL_NONE, NULL }, /* Pp */ + { ARGSFL_DELIM, NULL }, /* D1 */ + { ARGSFL_DELIM, NULL }, /* Dl */ + { ARGSFL_NONE, args_Bd }, /* Bd */ + { ARGSFL_NONE, NULL }, /* Ed */ + { ARGSFL_NONE, args_Bl }, /* Bl */ + { ARGSFL_NONE, NULL }, /* El */ + { ARGSFL_NONE, NULL }, /* It */ + { ARGSFL_DELIM, NULL }, /* Ad */ + { ARGSFL_DELIM, args_An }, /* An */ + { ARGSFL_DELIM, NULL }, /* Ap */ + { ARGSFL_DELIM, NULL }, /* Ar */ + { ARGSFL_DELIM, NULL }, /* Cd */ + { ARGSFL_DELIM, NULL }, /* Cm */ + { ARGSFL_DELIM, NULL }, /* Dv */ + { ARGSFL_DELIM, NULL }, /* Er */ + { ARGSFL_DELIM, NULL }, /* Ev */ + { ARGSFL_NONE, args_Ex }, /* Ex */ + { ARGSFL_DELIM, NULL }, /* Fa */ + { ARGSFL_NONE, NULL }, /* Fd */ + { ARGSFL_DELIM, NULL }, /* Fl */ + { ARGSFL_DELIM, NULL }, /* Fn */ + { ARGSFL_DELIM, NULL }, /* Ft */ + { ARGSFL_DELIM, NULL }, /* Ic */ + { ARGSFL_DELIM, NULL }, /* In */ + { ARGSFL_DELIM, NULL }, /* Li */ + { ARGSFL_NONE, NULL }, /* Nd */ + { ARGSFL_DELIM, NULL }, /* Nm */ + { ARGSFL_DELIM, NULL }, /* Op */ + { ARGSFL_DELIM, NULL }, /* Ot */ + { ARGSFL_DELIM, NULL }, /* Pa */ + { ARGSFL_NONE, args_Ex }, /* Rv */ + { ARGSFL_DELIM, NULL }, /* St */ + { ARGSFL_DELIM, NULL }, /* Va */ + { ARGSFL_DELIM, NULL }, /* Vt */ + { ARGSFL_DELIM, NULL }, /* Xr */ + { ARGSFL_NONE, NULL }, /* %A */ + { ARGSFL_NONE, NULL }, /* %B */ + { ARGSFL_NONE, NULL }, /* %D */ + { ARGSFL_NONE, NULL }, /* %I */ + { ARGSFL_NONE, NULL }, /* %J */ + { ARGSFL_NONE, NULL }, /* %N */ + { ARGSFL_NONE, NULL }, /* %O */ + { ARGSFL_NONE, NULL }, /* %P */ + { ARGSFL_NONE, NULL }, /* %R */ + { ARGSFL_NONE, NULL }, /* %T */ + { ARGSFL_NONE, NULL }, /* %V */ + { ARGSFL_DELIM, NULL }, /* Ac */ + { ARGSFL_NONE, NULL }, /* Ao */ + { ARGSFL_DELIM, NULL }, /* Aq */ + { ARGSFL_DELIM, NULL }, /* At */ + { ARGSFL_DELIM, NULL }, /* Bc */ + { ARGSFL_NONE, args_Bf }, /* Bf */ + { ARGSFL_NONE, NULL }, /* Bo */ + { ARGSFL_DELIM, NULL }, /* Bq */ + { ARGSFL_DELIM, NULL }, /* Bsx */ + { ARGSFL_DELIM, NULL }, /* Bx */ + { ARGSFL_NONE, NULL }, /* Db */ + { ARGSFL_DELIM, NULL }, /* Dc */ + { ARGSFL_NONE, NULL }, /* Do */ + { ARGSFL_DELIM, NULL }, /* Dq */ + { ARGSFL_DELIM, NULL }, /* Ec */ + { ARGSFL_NONE, NULL }, /* Ef */ + { ARGSFL_DELIM, NULL }, /* Em */ + { ARGSFL_NONE, NULL }, /* Eo */ + { ARGSFL_DELIM, NULL }, /* Fx */ + { ARGSFL_DELIM, NULL }, /* Ms */ + { ARGSFL_DELIM, NULL }, /* No */ + { ARGSFL_DELIM, NULL }, /* Ns */ + { ARGSFL_DELIM, NULL }, /* Nx */ + { ARGSFL_DELIM, NULL }, /* Ox */ + { ARGSFL_DELIM, NULL }, /* Pc */ + { ARGSFL_DELIM, NULL }, /* Pf */ + { ARGSFL_NONE, NULL }, /* Po */ + { ARGSFL_DELIM, NULL }, /* Pq */ + { ARGSFL_DELIM, NULL }, /* Qc */ + { ARGSFL_DELIM, NULL }, /* Ql */ + { ARGSFL_NONE, NULL }, /* Qo */ + { ARGSFL_DELIM, NULL }, /* Qq */ + { ARGSFL_NONE, NULL }, /* Re */ + { ARGSFL_NONE, NULL }, /* Rs */ + { ARGSFL_DELIM, NULL }, /* Sc */ + { ARGSFL_NONE, NULL }, /* So */ + { ARGSFL_DELIM, NULL }, /* Sq */ + { ARGSFL_NONE, NULL }, /* Sm */ + { ARGSFL_DELIM, NULL }, /* Sx */ + { ARGSFL_DELIM, NULL }, /* Sy */ + { ARGSFL_DELIM, NULL }, /* Tn */ + { ARGSFL_DELIM, NULL }, /* Ux */ + { ARGSFL_DELIM, NULL }, /* Xc */ + { ARGSFL_NONE, NULL }, /* Xo */ + { ARGSFL_NONE, NULL }, /* Fo */ + { ARGSFL_DELIM, NULL }, /* Fc */ + { ARGSFL_NONE, NULL }, /* Oo */ + { ARGSFL_DELIM, NULL }, /* Oc */ + { ARGSFL_NONE, args_Bk }, /* Bk */ + { ARGSFL_NONE, NULL }, /* Ek */ + { ARGSFL_NONE, NULL }, /* Bt */ + { ARGSFL_NONE, NULL }, /* Hf */ + { ARGSFL_DELIM, NULL }, /* Fr */ + { ARGSFL_NONE, NULL }, /* Ud */ + { ARGSFL_DELIM, NULL }, /* Lb */ + { ARGSFL_NONE, NULL }, /* Lp */ + { ARGSFL_DELIM, NULL }, /* Lk */ + { ARGSFL_DELIM, NULL }, /* Mt */ + { ARGSFL_DELIM, NULL }, /* Brq */ + { ARGSFL_NONE, NULL }, /* Bro */ + { ARGSFL_DELIM, NULL }, /* Brc */ + { ARGSFL_NONE, NULL }, /* %C */ + { ARGSFL_NONE, NULL }, /* Es */ + { ARGSFL_DELIM, NULL }, /* En */ + { ARGSFL_DELIM, NULL }, /* Dx */ + { ARGSFL_NONE, NULL }, /* %Q */ + { ARGSFL_NONE, NULL }, /* %U */ + { ARGSFL_NONE, NULL }, /* Ta */ +}; + + +/* + * Parse flags and their arguments from the input line. + * These come in the form -flag [argument ...]. + * Some flags take no argument, some one, some multiple. + */ +void +mdoc_argv(struct roff_man *mdoc, int line, enum roff_tok tok, + struct mdoc_arg **reta, int *pos, char *buf) +{ + struct mdoc_argv tmpv; + struct mdoc_argv **retv; + const enum mdocargt *argtable; + char *argname; + int ipos, retc; + char savechar; + + *reta = NULL; + + /* Which flags does this macro support? */ + + assert(tok >= MDOC_Dd && tok < MDOC_MAX); + argtable = mdocargs[tok - MDOC_Dd].argvs; + if (argtable == NULL) + return; + + /* Loop over the flags on the input line. */ + + ipos = *pos; + while (buf[ipos] == '-') { + + /* Seek to the first unescaped space. */ + + for (argname = buf + ++ipos; buf[ipos] != '\0'; ipos++) + if (buf[ipos] == ' ' && buf[ipos - 1] != '\\') + break; + + /* + * We want to nil-terminate the word to look it up. + * But we may not have a flag, in which case we need + * to restore the line as-is. So keep around the + * stray byte, which we'll reset upon exiting. + */ + + if ((savechar = buf[ipos]) != '\0') + buf[ipos++] = '\0'; + + /* + * Now look up the word as a flag. Use temporary + * storage that we'll copy into the node's flags. + */ + + while ((tmpv.arg = *argtable++) != MDOC_ARG_MAX) + if ( ! strcmp(argname, mdoc_argnames[tmpv.arg])) + break; + + /* If it isn't a flag, restore the saved byte. */ + + if (tmpv.arg == MDOC_ARG_MAX) { + if (savechar != '\0') + buf[ipos - 1] = savechar; + break; + } + + /* Read to the next word (the first argument). */ + + while (buf[ipos] == ' ') + ipos++; + + /* Parse the arguments of the flag. */ + + tmpv.line = line; + tmpv.pos = *pos; + tmpv.sz = 0; + tmpv.value = NULL; + + switch (argvflags[tmpv.arg]) { + case ARGV_SINGLE: + argv_single(mdoc, line, &tmpv, &ipos, buf); + break; + case ARGV_MULTI: + argv_multi(mdoc, line, &tmpv, &ipos, buf); + break; + case ARGV_NONE: + break; + } + + /* Append to the return values. */ + + if (*reta == NULL) + *reta = mandoc_calloc(1, sizeof(**reta)); + + retc = ++(*reta)->argc; + retv = &(*reta)->argv; + *retv = mandoc_reallocarray(*retv, retc, sizeof(**retv)); + memcpy(*retv + retc - 1, &tmpv, sizeof(**retv)); + + /* Prepare for parsing the next flag. */ + + *pos = ipos; + argtable = mdocargs[tok - MDOC_Dd].argvs; + } +} + +void +mdoc_argv_free(struct mdoc_arg *p) +{ + int i; + + if (NULL == p) + return; + + if (p->refcnt) { + --(p->refcnt); + if (p->refcnt) + return; + } + assert(p->argc); + + for (i = (int)p->argc - 1; i >= 0; i--) + argn_free(p, i); + + free(p->argv); + free(p); +} + +static void +argn_free(struct mdoc_arg *p, int iarg) +{ + struct mdoc_argv *arg; + int j; + + arg = &p->argv[iarg]; + + if (arg->sz && arg->value) { + for (j = (int)arg->sz - 1; j >= 0; j--) + free(arg->value[j]); + free(arg->value); + } + + for (--p->argc; iarg < (int)p->argc; iarg++) + p->argv[iarg] = p->argv[iarg+1]; +} + +enum margserr +mdoc_args(struct roff_man *mdoc, int line, int *pos, + char *buf, enum roff_tok tok, char **v) +{ + struct roff_node *n; + enum argsflag fl; + + fl = tok == TOKEN_NONE ? ARGSFL_NONE : mdocargs[tok - MDOC_Dd].flags; + + /* + * We know that we're in an `It', so it's reasonable to expect + * us to be sitting in a `Bl'. Someday this may not be the case + * (if we allow random `It's sitting out there), so provide a + * safe fall-back into the default behaviour. + */ + + if (tok == MDOC_It) { + for (n = mdoc->last; n != NULL; n = n->parent) { + if (n->tok != MDOC_Bl) + continue; + if (n->norm->Bl.type == LIST_column) + fl = ARGSFL_TABSEP; + break; + } + } + + return args(mdoc, line, pos, buf, fl, v); +} + +static enum margserr +args(struct roff_man *mdoc, int line, int *pos, + char *buf, enum argsflag fl, char **v) +{ + char *p; + char *v_local; + int pairs; + + if (buf[*pos] == '\0') { + if (mdoc->flags & MDOC_PHRASELIT && + ! (mdoc->flags & MDOC_PHRASE)) { + mandoc_msg(MANDOCERR_ARG_QUOTE, line, *pos, NULL); + mdoc->flags &= ~MDOC_PHRASELIT; + } + mdoc->flags &= ~MDOC_PHRASEQL; + return ARGS_EOLN; + } + + if (v == NULL) + v = &v_local; + *v = buf + *pos; + + if (fl == ARGSFL_DELIM && args_checkpunct(buf, *pos)) + return ARGS_PUNCT; + + /* + * Tabs in `It' lines in `Bl -column' can't be escaped. + * Phrases are reparsed for `Ta' and other macros later. + */ + + if (fl == ARGSFL_TABSEP) { + if ((p = strchr(*v, '\t')) != NULL) { + + /* + * Words right before and right after + * tab characters are not parsed, + * unless there is a blank in between. + */ + + if (p > buf && p[-1] != ' ') + mdoc->flags |= MDOC_PHRASEQL; + if (p[1] != ' ') + mdoc->flags |= MDOC_PHRASEQN; + + /* + * One or more blanks after a tab cause + * one leading blank in the next column. + * So skip all but one of them. + */ + + *pos += (int)(p - *v) + 1; + while (buf[*pos] == ' ' && buf[*pos + 1] == ' ') + (*pos)++; + + /* + * A tab at the end of an input line + * switches to the next column. + */ + + if (buf[*pos] == '\0' || buf[*pos + 1] == '\0') + mdoc->flags |= MDOC_PHRASEQN; + } else { + p = strchr(*v, '\0'); + if (p[-1] == ' ') + mandoc_msg(MANDOCERR_SPACE_EOL, + line, *pos, NULL); + *pos += (int)(p - *v); + } + + /* Skip any trailing blank characters. */ + while (p > *v && p[-1] == ' ' && + (p - 1 == *v || p[-2] != '\\')) + p--; + *p = '\0'; + + return ARGS_PHRASE; + } + + /* + * Process a quoted literal. A quote begins with a double-quote + * and ends with a double-quote NOT preceded by a double-quote. + * NUL-terminate the literal in place. + * Collapse pairs of quotes inside quoted literals. + * Whitespace is NOT involved in literal termination. + */ + + if (mdoc->flags & MDOC_PHRASELIT || + (mdoc->flags & MDOC_PHRASE && buf[*pos] == '\"')) { + if ((mdoc->flags & MDOC_PHRASELIT) == 0) { + *v = &buf[++(*pos)]; + mdoc->flags |= MDOC_PHRASELIT; + } + pairs = 0; + for ( ; buf[*pos]; (*pos)++) { + /* Move following text left after quoted quotes. */ + if (pairs) + buf[*pos - pairs] = buf[*pos]; + if ('\"' != buf[*pos]) + continue; + /* Unquoted quotes end quoted args. */ + if ('\"' != buf[*pos + 1]) + break; + /* Quoted quotes collapse. */ + pairs++; + (*pos)++; + } + if (pairs) + buf[*pos - pairs] = '\0'; + + if (buf[*pos] == '\0') { + if ( ! (mdoc->flags & MDOC_PHRASE)) + mandoc_msg(MANDOCERR_ARG_QUOTE, + line, *pos, NULL); + return ARGS_WORD; + } + + mdoc->flags &= ~MDOC_PHRASELIT; + buf[(*pos)++] = '\0'; + + if ('\0' == buf[*pos]) + return ARGS_WORD; + + while (' ' == buf[*pos]) + (*pos)++; + + if ('\0' == buf[*pos]) + mandoc_msg(MANDOCERR_SPACE_EOL, line, *pos, NULL); + + return ARGS_WORD; + } + + p = &buf[*pos]; + *v = roff_getarg(mdoc->roff, &p, line, pos); + if (v == &v_local) + free(*v); + + /* + * After parsing the last word in this phrase, + * tell lookup() whether or not to interpret it. + */ + + if (*p == '\0' && mdoc->flags & MDOC_PHRASEQL) { + mdoc->flags &= ~MDOC_PHRASEQL; + mdoc->flags |= MDOC_PHRASEQF; + } + return ARGS_ALLOC; +} + +/* + * Check if the string consists only of space-separated closing + * delimiters. This is a bit of a dance: the first must be a close + * delimiter, but it may be followed by middle delimiters. Arbitrary + * whitespace may separate these tokens. + */ +static int +args_checkpunct(const char *buf, int i) +{ + int j; + char dbuf[DELIMSZ]; + enum mdelim d; + + /* First token must be a close-delimiter. */ + + for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) + dbuf[j] = buf[i]; + + if (DELIMSZ == j) + return 0; + + dbuf[j] = '\0'; + if (DELIM_CLOSE != mdoc_isdelim(dbuf)) + return 0; + + while (' ' == buf[i]) + i++; + + /* Remaining must NOT be open/none. */ + + while (buf[i]) { + j = 0; + while (buf[i] && ' ' != buf[i] && j < DELIMSZ) + dbuf[j++] = buf[i++]; + + if (DELIMSZ == j) + return 0; + + dbuf[j] = '\0'; + d = mdoc_isdelim(dbuf); + if (DELIM_NONE == d || DELIM_OPEN == d) + return 0; + + while (' ' == buf[i]) + i++; + } + + return '\0' == buf[i]; +} + +static void +argv_multi(struct roff_man *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + for (v->sz = 0; ; v->sz++) { + if (buf[*pos] == '-') + break; + ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); + if (ac == ARGS_EOLN) + break; + + if (v->sz % MULTI_STEP == 0) + v->value = mandoc_reallocarray(v->value, + v->sz + MULTI_STEP, sizeof(char *)); + + if (ac != ARGS_ALLOC) + p = mandoc_strdup(p); + v->value[(int)v->sz] = p; + } +} + +static void +argv_single(struct roff_man *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); + if (ac == ARGS_EOLN) + return; + + if (ac != ARGS_ALLOC) + p = mandoc_strdup(p); + + v->sz = 1; + v->value = mandoc_malloc(sizeof(char *)); + v->value[0] = p; +} diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c new file mode 100644 index 0000000..fcff005 --- /dev/null +++ b/usr.bin/mandoc/mdoc_html.c @@ -0,0 +1,1758 @@ +/* $OpenBSD: mdoc_html.c,v 1.215 2020/04/19 15:15:54 schwarze Exp $ */ +/* + * Copyright (c) 2014-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * HTML formatter for mdoc(7) used by mandoc(1). + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "out.h" +#include "html.h" +#include "main.h" + +#define MDOC_ARGS const struct roff_meta *meta, \ + struct roff_node *n, \ + struct html *h + +#ifndef MIN +#define MIN(a,b) ((/*CONSTCOND*/(a)<(b))?(a):(b)) +#endif + +struct mdoc_html_act { + int (*pre)(MDOC_ARGS); + void (*post)(MDOC_ARGS); +}; + +static void print_mdoc_head(const struct roff_meta *, + struct html *); +static void print_mdoc_node(MDOC_ARGS); +static void print_mdoc_nodelist(MDOC_ARGS); +static void synopsis_pre(struct html *, struct roff_node *); + +static void mdoc_root_post(const struct roff_meta *, + struct html *); +static int mdoc_root_pre(const struct roff_meta *, + struct html *); + +static void mdoc__x_post(MDOC_ARGS); +static int mdoc__x_pre(MDOC_ARGS); +static int mdoc_abort_pre(MDOC_ARGS); +static int mdoc_ad_pre(MDOC_ARGS); +static int mdoc_an_pre(MDOC_ARGS); +static int mdoc_ap_pre(MDOC_ARGS); +static int mdoc_ar_pre(MDOC_ARGS); +static int mdoc_bd_pre(MDOC_ARGS); +static int mdoc_bf_pre(MDOC_ARGS); +static void mdoc_bk_post(MDOC_ARGS); +static int mdoc_bk_pre(MDOC_ARGS); +static int mdoc_bl_pre(MDOC_ARGS); +static int mdoc_cd_pre(MDOC_ARGS); +static int mdoc_code_pre(MDOC_ARGS); +static int mdoc_d1_pre(MDOC_ARGS); +static int mdoc_fa_pre(MDOC_ARGS); +static int mdoc_fd_pre(MDOC_ARGS); +static int mdoc_fl_pre(MDOC_ARGS); +static int mdoc_fn_pre(MDOC_ARGS); +static int mdoc_ft_pre(MDOC_ARGS); +static int mdoc_em_pre(MDOC_ARGS); +static void mdoc_eo_post(MDOC_ARGS); +static int mdoc_eo_pre(MDOC_ARGS); +static int mdoc_ex_pre(MDOC_ARGS); +static void mdoc_fo_post(MDOC_ARGS); +static int mdoc_fo_pre(MDOC_ARGS); +static int mdoc_igndelim_pre(MDOC_ARGS); +static int mdoc_in_pre(MDOC_ARGS); +static int mdoc_it_pre(MDOC_ARGS); +static int mdoc_lb_pre(MDOC_ARGS); +static int mdoc_lk_pre(MDOC_ARGS); +static int mdoc_mt_pre(MDOC_ARGS); +static int mdoc_nd_pre(MDOC_ARGS); +static int mdoc_nm_pre(MDOC_ARGS); +static int mdoc_no_pre(MDOC_ARGS); +static int mdoc_ns_pre(MDOC_ARGS); +static int mdoc_pa_pre(MDOC_ARGS); +static void mdoc_pf_post(MDOC_ARGS); +static int mdoc_pp_pre(MDOC_ARGS); +static void mdoc_quote_post(MDOC_ARGS); +static int mdoc_quote_pre(MDOC_ARGS); +static int mdoc_rs_pre(MDOC_ARGS); +static int mdoc_sh_pre(MDOC_ARGS); +static int mdoc_skip_pre(MDOC_ARGS); +static int mdoc_sm_pre(MDOC_ARGS); +static int mdoc_ss_pre(MDOC_ARGS); +static int mdoc_st_pre(MDOC_ARGS); +static int mdoc_sx_pre(MDOC_ARGS); +static int mdoc_sy_pre(MDOC_ARGS); +static int mdoc_tg_pre(MDOC_ARGS); +static int mdoc_va_pre(MDOC_ARGS); +static int mdoc_vt_pre(MDOC_ARGS); +static int mdoc_xr_pre(MDOC_ARGS); +static int mdoc_xx_pre(MDOC_ARGS); + +static const struct mdoc_html_act mdoc_html_acts[MDOC_MAX - MDOC_Dd] = { + {NULL, NULL}, /* Dd */ + {NULL, NULL}, /* Dt */ + {NULL, NULL}, /* Os */ + {mdoc_sh_pre, NULL }, /* Sh */ + {mdoc_ss_pre, NULL }, /* Ss */ + {mdoc_pp_pre, NULL}, /* Pp */ + {mdoc_d1_pre, NULL}, /* D1 */ + {mdoc_d1_pre, NULL}, /* Dl */ + {mdoc_bd_pre, NULL}, /* Bd */ + {NULL, NULL}, /* Ed */ + {mdoc_bl_pre, NULL}, /* Bl */ + {NULL, NULL}, /* El */ + {mdoc_it_pre, NULL}, /* It */ + {mdoc_ad_pre, NULL}, /* Ad */ + {mdoc_an_pre, NULL}, /* An */ + {mdoc_ap_pre, NULL}, /* Ap */ + {mdoc_ar_pre, NULL}, /* Ar */ + {mdoc_cd_pre, NULL}, /* Cd */ + {mdoc_code_pre, NULL}, /* Cm */ + {mdoc_code_pre, NULL}, /* Dv */ + {mdoc_code_pre, NULL}, /* Er */ + {mdoc_code_pre, NULL}, /* Ev */ + {mdoc_ex_pre, NULL}, /* Ex */ + {mdoc_fa_pre, NULL}, /* Fa */ + {mdoc_fd_pre, NULL}, /* Fd */ + {mdoc_fl_pre, NULL}, /* Fl */ + {mdoc_fn_pre, NULL}, /* Fn */ + {mdoc_ft_pre, NULL}, /* Ft */ + {mdoc_code_pre, NULL}, /* Ic */ + {mdoc_in_pre, NULL}, /* In */ + {mdoc_code_pre, NULL}, /* Li */ + {mdoc_nd_pre, NULL}, /* Nd */ + {mdoc_nm_pre, NULL}, /* Nm */ + {mdoc_quote_pre, mdoc_quote_post}, /* Op */ + {mdoc_abort_pre, NULL}, /* Ot */ + {mdoc_pa_pre, NULL}, /* Pa */ + {mdoc_ex_pre, NULL}, /* Rv */ + {mdoc_st_pre, NULL}, /* St */ + {mdoc_va_pre, NULL}, /* Va */ + {mdoc_vt_pre, NULL}, /* Vt */ + {mdoc_xr_pre, NULL}, /* Xr */ + {mdoc__x_pre, mdoc__x_post}, /* %A */ + {mdoc__x_pre, mdoc__x_post}, /* %B */ + {mdoc__x_pre, mdoc__x_post}, /* %D */ + {mdoc__x_pre, mdoc__x_post}, /* %I */ + {mdoc__x_pre, mdoc__x_post}, /* %J */ + {mdoc__x_pre, mdoc__x_post}, /* %N */ + {mdoc__x_pre, mdoc__x_post}, /* %O */ + {mdoc__x_pre, mdoc__x_post}, /* %P */ + {mdoc__x_pre, mdoc__x_post}, /* %R */ + {mdoc__x_pre, mdoc__x_post}, /* %T */ + {mdoc__x_pre, mdoc__x_post}, /* %V */ + {NULL, NULL}, /* Ac */ + {mdoc_quote_pre, mdoc_quote_post}, /* Ao */ + {mdoc_quote_pre, mdoc_quote_post}, /* Aq */ + {mdoc_xx_pre, NULL}, /* At */ + {NULL, NULL}, /* Bc */ + {mdoc_bf_pre, NULL}, /* Bf */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bo */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bq */ + {mdoc_xx_pre, NULL}, /* Bsx */ + {mdoc_xx_pre, NULL}, /* Bx */ + {mdoc_skip_pre, NULL}, /* Db */ + {NULL, NULL}, /* Dc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Do */ + {mdoc_quote_pre, mdoc_quote_post}, /* Dq */ + {NULL, NULL}, /* Ec */ /* FIXME: no space */ + {NULL, NULL}, /* Ef */ + {mdoc_em_pre, NULL}, /* Em */ + {mdoc_eo_pre, mdoc_eo_post}, /* Eo */ + {mdoc_xx_pre, NULL}, /* Fx */ + {mdoc_no_pre, NULL}, /* Ms */ + {mdoc_no_pre, NULL}, /* No */ + {mdoc_ns_pre, NULL}, /* Ns */ + {mdoc_xx_pre, NULL}, /* Nx */ + {mdoc_xx_pre, NULL}, /* Ox */ + {NULL, NULL}, /* Pc */ + {mdoc_igndelim_pre, mdoc_pf_post}, /* Pf */ + {mdoc_quote_pre, mdoc_quote_post}, /* Po */ + {mdoc_quote_pre, mdoc_quote_post}, /* Pq */ + {NULL, NULL}, /* Qc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Ql */ + {mdoc_quote_pre, mdoc_quote_post}, /* Qo */ + {mdoc_quote_pre, mdoc_quote_post}, /* Qq */ + {NULL, NULL}, /* Re */ + {mdoc_rs_pre, NULL}, /* Rs */ + {NULL, NULL}, /* Sc */ + {mdoc_quote_pre, mdoc_quote_post}, /* So */ + {mdoc_quote_pre, mdoc_quote_post}, /* Sq */ + {mdoc_sm_pre, NULL}, /* Sm */ + {mdoc_sx_pre, NULL}, /* Sx */ + {mdoc_sy_pre, NULL}, /* Sy */ + {NULL, NULL}, /* Tn */ + {mdoc_xx_pre, NULL}, /* Ux */ + {NULL, NULL}, /* Xc */ + {NULL, NULL}, /* Xo */ + {mdoc_fo_pre, mdoc_fo_post}, /* Fo */ + {NULL, NULL}, /* Fc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Oo */ + {NULL, NULL}, /* Oc */ + {mdoc_bk_pre, mdoc_bk_post}, /* Bk */ + {NULL, NULL}, /* Ek */ + {NULL, NULL}, /* Bt */ + {NULL, NULL}, /* Hf */ + {mdoc_em_pre, NULL}, /* Fr */ + {NULL, NULL}, /* Ud */ + {mdoc_lb_pre, NULL}, /* Lb */ + {mdoc_abort_pre, NULL}, /* Lp */ + {mdoc_lk_pre, NULL}, /* Lk */ + {mdoc_mt_pre, NULL}, /* Mt */ + {mdoc_quote_pre, mdoc_quote_post}, /* Brq */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bro */ + {NULL, NULL}, /* Brc */ + {mdoc__x_pre, mdoc__x_post}, /* %C */ + {mdoc_skip_pre, NULL}, /* Es */ + {mdoc_quote_pre, mdoc_quote_post}, /* En */ + {mdoc_xx_pre, NULL}, /* Dx */ + {mdoc__x_pre, mdoc__x_post}, /* %Q */ + {mdoc__x_pre, mdoc__x_post}, /* %U */ + {NULL, NULL}, /* Ta */ + {mdoc_tg_pre, NULL}, /* Tg */ +}; + + +/* + * See the same function in mdoc_term.c for documentation. + */ +static void +synopsis_pre(struct html *h, struct roff_node *n) +{ + struct roff_node *np; + + if ((n->flags & NODE_SYNPRETTY) == 0 || + (np = roff_node_prev(n)) == NULL) + return; + + if (np->tok == n->tok && + MDOC_Fo != n->tok && + MDOC_Ft != n->tok && + MDOC_Fn != n->tok) { + print_otag(h, TAG_BR, ""); + return; + } + + switch (np->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + break; + case MDOC_Ft: + if (n->tok != MDOC_Fn && n->tok != MDOC_Fo) + break; + /* FALLTHROUGH */ + default: + print_otag(h, TAG_BR, ""); + return; + } + html_close_paragraph(h); + print_otag(h, TAG_P, "c", "Pp"); +} + +void +html_mdoc(void *arg, const struct roff_meta *mdoc) +{ + struct html *h; + struct roff_node *n; + struct tag *t; + + h = (struct html *)arg; + n = mdoc->first->child; + + if ((h->oflags & HTML_FRAGMENT) == 0) { + print_gen_decls(h); + print_otag(h, TAG_HTML, ""); + if (n != NULL && n->type == ROFFT_COMMENT) + print_gen_comment(h, n); + t = print_otag(h, TAG_HEAD, ""); + print_mdoc_head(mdoc, h); + print_tagq(h, t); + print_otag(h, TAG_BODY, ""); + } + + mdoc_root_pre(mdoc, h); + t = print_otag(h, TAG_DIV, "c", "manual-text"); + print_mdoc_nodelist(mdoc, n, h); + print_tagq(h, t); + mdoc_root_post(mdoc, h); + print_tagq(h, NULL); +} + +static void +print_mdoc_head(const struct roff_meta *meta, struct html *h) +{ + char *cp; + + print_gen_head(h); + + if (meta->arch != NULL && meta->msec != NULL) + mandoc_asprintf(&cp, "%s(%s) (%s)", meta->title, + meta->msec, meta->arch); + else if (meta->msec != NULL) + mandoc_asprintf(&cp, "%s(%s)", meta->title, meta->msec); + else if (meta->arch != NULL) + mandoc_asprintf(&cp, "%s (%s)", meta->title, meta->arch); + else + cp = mandoc_strdup(meta->title); + + print_otag(h, TAG_TITLE, ""); + print_text(h, cp); + free(cp); +} + +static void +print_mdoc_nodelist(MDOC_ARGS) +{ + + while (n != NULL) { + print_mdoc_node(meta, n, h); + n = n->next; + } +} + +static void +print_mdoc_node(MDOC_ARGS) +{ + struct tag *t; + int child; + + if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) + return; + + if (n->flags & NODE_NOFILL) { + html_fillmode(h, ROFF_nf); + if (n->flags & NODE_LINE) + print_endline(h); + } else + html_fillmode(h, ROFF_fi); + + child = 1; + n->flags &= ~NODE_ENDED; + switch (n->type) { + case ROFFT_TEXT: + if (n->flags & NODE_LINE) { + switch (*n->string) { + case '\0': + h->col = 1; + print_endline(h); + return; + case ' ': + if ((h->flags & HTML_NONEWLINE) == 0 && + (n->flags & NODE_NOFILL) == 0) + print_otag(h, TAG_BR, ""); + break; + default: + break; + } + } + t = h->tag; + t->refcnt++; + if (n->flags & NODE_DELIMC) + h->flags |= HTML_NOSPACE; + if (n->flags & NODE_HREF) + print_tagged_text(h, n->string, n); + else + print_text(h, n->string); + if (n->flags & NODE_DELIMO) + h->flags |= HTML_NOSPACE; + break; + case ROFFT_EQN: + t = h->tag; + t->refcnt++; + print_eqn(h, n->eqn); + break; + case ROFFT_TBL: + /* + * This will take care of initialising all of the table + * state data for the first table, then tearing it down + * for the last one. + */ + print_tbl(h, n->span); + return; + default: + /* + * Close out the current table, if it's open, and unset + * the "meta" table state. This will be reopened on the + * next table element. + */ + if (h->tblt != NULL) + print_tblclose(h); + assert(h->tblt == NULL); + t = h->tag; + t->refcnt++; + if (n->tok < ROFF_MAX) { + roff_html_pre(h, n); + t->refcnt--; + print_stagq(h, t); + return; + } + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + if (mdoc_html_acts[n->tok - MDOC_Dd].pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) + child = (*mdoc_html_acts[n->tok - MDOC_Dd].pre)(meta, + n, h); + break; + } + + if (h->flags & HTML_KEEP && n->flags & NODE_LINE) { + h->flags &= ~HTML_KEEP; + h->flags |= HTML_PREKEEP; + } + + if (child && n->child != NULL) + print_mdoc_nodelist(meta, n->child, h); + + t->refcnt--; + print_stagq(h, t); + + switch (n->type) { + case ROFFT_TEXT: + case ROFFT_EQN: + break; + default: + if (mdoc_html_acts[n->tok - MDOC_Dd].post == NULL || + n->flags & NODE_ENDED) + break; + (*mdoc_html_acts[n->tok - MDOC_Dd].post)(meta, n, h); + if (n->end != ENDBODY_NOT) + n->body->flags |= NODE_ENDED; + break; + } +} + +static void +mdoc_root_post(const struct roff_meta *meta, struct html *h) +{ + struct tag *t, *tt; + + t = print_otag(h, TAG_TABLE, "c", "foot"); + tt = print_otag(h, TAG_TR, ""); + + print_otag(h, TAG_TD, "c", "foot-date"); + print_text(h, meta->date); + print_stagq(h, tt); + + print_otag(h, TAG_TD, "c", "foot-os"); + print_text(h, meta->os); + print_tagq(h, t); +} + +static int +mdoc_root_pre(const struct roff_meta *meta, struct html *h) +{ + struct tag *t, *tt; + char *volume, *title; + + if (NULL == meta->arch) + volume = mandoc_strdup(meta->vol); + else + mandoc_asprintf(&volume, "%s (%s)", + meta->vol, meta->arch); + + if (NULL == meta->msec) + title = mandoc_strdup(meta->title); + else + mandoc_asprintf(&title, "%s(%s)", + meta->title, meta->msec); + + t = print_otag(h, TAG_TABLE, "c", "head"); + tt = print_otag(h, TAG_TR, ""); + + print_otag(h, TAG_TD, "c", "head-ltitle"); + print_text(h, title); + print_stagq(h, tt); + + print_otag(h, TAG_TD, "c", "head-vol"); + print_text(h, volume); + print_stagq(h, tt); + + print_otag(h, TAG_TD, "c", "head-rtitle"); + print_text(h, title); + print_tagq(h, t); + + free(title); + free(volume); + return 1; +} + +static int +mdoc_code_pre(MDOC_ARGS) +{ + print_otag_id(h, TAG_CODE, roff_name[n->tok], n); + return 1; +} + +static int +mdoc_sh_pre(MDOC_ARGS) +{ + struct roff_node *sn, *subn; + struct tag *t, *tsec, *tsub; + char *id; + int sc; + + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + if ((h->oflags & HTML_TOC) == 0 || + h->flags & HTML_TOCDONE || + n->sec <= SEC_SYNOPSIS) { + print_otag(h, TAG_SECTION, "c", "Sh"); + break; + } + h->flags |= HTML_TOCDONE; + sc = 0; + for (sn = n->next; sn != NULL; sn = sn->next) + if (sn->sec == SEC_CUSTOM) + if (++sc == 2) + break; + if (sc < 2) + break; + t = print_otag(h, TAG_H1, "c", "Sh"); + print_text(h, "TABLE OF CONTENTS"); + print_tagq(h, t); + t = print_otag(h, TAG_UL, "c", "Bl-compact"); + for (sn = n; sn != NULL; sn = sn->next) { + tsec = print_otag(h, TAG_LI, ""); + id = html_make_id(sn->head, 0); + tsub = print_otag(h, TAG_A, "hR", id); + free(id); + print_mdoc_nodelist(meta, sn->head->child, h); + print_tagq(h, tsub); + tsub = NULL; + for (subn = sn->body->child; subn != NULL; + subn = subn->next) { + if (subn->tok != MDOC_Ss) + continue; + id = html_make_id(subn->head, 0); + if (id == NULL) + continue; + if (tsub == NULL) + print_otag(h, TAG_UL, + "c", "Bl-compact"); + tsub = print_otag(h, TAG_LI, ""); + print_otag(h, TAG_A, "hR", id); + free(id); + print_mdoc_nodelist(meta, + subn->head->child, h); + print_tagq(h, tsub); + } + print_tagq(h, tsec); + } + print_tagq(h, t); + print_otag(h, TAG_SECTION, "c", "Sh"); + break; + case ROFFT_HEAD: + print_otag_id(h, TAG_H1, "Sh", n); + break; + case ROFFT_BODY: + if (n->sec == SEC_AUTHORS) + h->flags &= ~(HTML_SPLIT|HTML_NOSPLIT); + break; + default: + break; + } + return 1; +} + +static int +mdoc_ss_pre(MDOC_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + print_otag(h, TAG_SECTION, "c", "Ss"); + break; + case ROFFT_HEAD: + print_otag_id(h, TAG_H2, "Ss", n); + break; + case ROFFT_BODY: + break; + default: + abort(); + } + return 1; +} + +static int +mdoc_fl_pre(MDOC_ARGS) +{ + struct roff_node *nn; + + print_otag_id(h, TAG_CODE, "Fl", n); + print_text(h, "\\-"); + if (n->child != NULL || + ((nn = roff_node_next(n)) != NULL && + nn->type != ROFFT_TEXT && + (nn->flags & NODE_LINE) == 0)) + h->flags |= HTML_NOSPACE; + + return 1; +} + +static int +mdoc_nd_pre(MDOC_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + abort(); + } + print_text(h, "\\(em"); + print_otag(h, TAG_SPAN, "c", "Nd"); + return 1; +} + +static int +mdoc_nm_pre(MDOC_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + print_otag(h, TAG_TD, ""); + /* FALLTHROUGH */ + case ROFFT_ELEM: + print_otag(h, TAG_CODE, "c", "Nm"); + return 1; + case ROFFT_BODY: + print_otag(h, TAG_TD, ""); + return 1; + default: + abort(); + } + html_close_paragraph(h); + synopsis_pre(h, n); + print_otag(h, TAG_TABLE, "c", "Nm"); + print_otag(h, TAG_TR, ""); + return 1; +} + +static int +mdoc_xr_pre(MDOC_ARGS) +{ + if (NULL == n->child) + return 0; + + if (h->base_man1) + print_otag(h, TAG_A, "chM", "Xr", + n->child->string, n->child->next == NULL ? + NULL : n->child->next->string); + else + print_otag(h, TAG_A, "c", "Xr"); + + n = n->child; + print_text(h, n->string); + + if (NULL == (n = n->next)) + return 0; + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + return 0; +} + +static int +mdoc_tg_pre(MDOC_ARGS) +{ + char *id; + + if ((id = html_make_id(n, 1)) != NULL) { + print_tagq(h, print_otag(h, TAG_MARK, "i", id)); + free(id); + } + return 0; +} + +static int +mdoc_ns_pre(MDOC_ARGS) +{ + + if ( ! (NODE_LINE & n->flags)) + h->flags |= HTML_NOSPACE; + return 1; +} + +static int +mdoc_ar_pre(MDOC_ARGS) +{ + print_otag(h, TAG_VAR, "c", "Ar"); + return 1; +} + +static int +mdoc_xx_pre(MDOC_ARGS) +{ + print_otag(h, TAG_SPAN, "c", "Ux"); + return 1; +} + +static int +mdoc_it_pre(MDOC_ARGS) +{ + const struct roff_node *bl; + enum mdoc_list type; + + bl = n->parent; + while (bl->tok != MDOC_Bl) + bl = bl->parent; + type = bl->norm->Bl.type; + + switch (type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_item: + case LIST_enum: + switch (n->type) { + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + print_otag_id(h, TAG_LI, NULL, n); + break; + default: + break; + } + break; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + switch (n->type) { + case ROFFT_HEAD: + print_otag_id(h, TAG_DT, NULL, n); + break; + case ROFFT_BODY: + print_otag(h, TAG_DD, ""); + break; + default: + break; + } + break; + case LIST_tag: + switch (n->type) { + case ROFFT_HEAD: + print_otag_id(h, TAG_DT, NULL, n); + break; + case ROFFT_BODY: + if (n->child == NULL) { + print_otag(h, TAG_DD, "s", "width", "auto"); + print_text(h, "\\ "); + } else + print_otag(h, TAG_DD, ""); + break; + default: + break; + } + break; + case LIST_column: + switch (n->type) { + case ROFFT_HEAD: + break; + case ROFFT_BODY: + print_otag(h, TAG_TD, ""); + break; + default: + print_otag_id(h, TAG_TR, NULL, n); + } + default: + break; + } + + return 1; +} + +static int +mdoc_bl_pre(MDOC_ARGS) +{ + char cattr[32]; + struct mdoc_bl *bl; + enum htmltag elemtype; + + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + return 1; + default: + abort(); + } + + bl = &n->norm->Bl; + switch (bl->type) { + case LIST_bullet: + elemtype = TAG_UL; + (void)strlcpy(cattr, "Bl-bullet", sizeof(cattr)); + break; + case LIST_dash: + case LIST_hyphen: + elemtype = TAG_UL; + (void)strlcpy(cattr, "Bl-dash", sizeof(cattr)); + break; + case LIST_item: + elemtype = TAG_UL; + (void)strlcpy(cattr, "Bl-item", sizeof(cattr)); + break; + case LIST_enum: + elemtype = TAG_OL; + (void)strlcpy(cattr, "Bl-enum", sizeof(cattr)); + break; + case LIST_diag: + elemtype = TAG_DL; + (void)strlcpy(cattr, "Bl-diag", sizeof(cattr)); + break; + case LIST_hang: + elemtype = TAG_DL; + (void)strlcpy(cattr, "Bl-hang", sizeof(cattr)); + break; + case LIST_inset: + elemtype = TAG_DL; + (void)strlcpy(cattr, "Bl-inset", sizeof(cattr)); + break; + case LIST_ohang: + elemtype = TAG_DL; + (void)strlcpy(cattr, "Bl-ohang", sizeof(cattr)); + break; + case LIST_tag: + if (bl->offs) + print_otag(h, TAG_DIV, "c", "Bd-indent"); + print_otag_id(h, TAG_DL, + bl->comp ? "Bl-tag Bl-compact" : "Bl-tag", n->body); + return 1; + case LIST_column: + elemtype = TAG_TABLE; + (void)strlcpy(cattr, "Bl-column", sizeof(cattr)); + break; + default: + abort(); + } + if (bl->offs != NULL) + (void)strlcat(cattr, " Bd-indent", sizeof(cattr)); + if (bl->comp) + (void)strlcat(cattr, " Bl-compact", sizeof(cattr)); + print_otag_id(h, elemtype, cattr, n->body); + return 1; +} + +static int +mdoc_ex_pre(MDOC_ARGS) +{ + if (roff_node_prev(n) != NULL) + print_otag(h, TAG_BR, ""); + return 1; +} + +static int +mdoc_st_pre(MDOC_ARGS) +{ + print_otag(h, TAG_SPAN, "c", "St"); + return 1; +} + +static int +mdoc_em_pre(MDOC_ARGS) +{ + print_otag_id(h, TAG_I, "Em", n); + return 1; +} + +static int +mdoc_d1_pre(MDOC_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + return 1; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + abort(); + } + print_otag_id(h, TAG_DIV, "Bd Bd-indent", n); + if (n->tok == MDOC_Dl) + print_otag(h, TAG_CODE, "c", "Li"); + return 1; +} + +static int +mdoc_sx_pre(MDOC_ARGS) +{ + char *id; + + id = html_make_id(n, 0); + print_otag(h, TAG_A, "chR", "Sx", id); + free(id); + return 1; +} + +static int +mdoc_bd_pre(MDOC_ARGS) +{ + char buf[16]; + struct roff_node *nn; + int comp; + + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + return 1; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + abort(); + } + + /* Handle preceding whitespace. */ + + comp = n->norm->Bd.comp; + for (nn = n; nn != NULL && comp == 0; nn = nn->parent) { + if (nn->type != ROFFT_BLOCK) + continue; + if (nn->tok == MDOC_Sh || nn->tok == MDOC_Ss) + comp = 1; + if (roff_node_prev(nn) != NULL) + break; + } + (void)strlcpy(buf, "Bd", sizeof(buf)); + if (comp == 0) + (void)strlcat(buf, " Pp", sizeof(buf)); + + /* Handle the -offset argument. */ + + if (n->norm->Bd.offs != NULL && + strcmp(n->norm->Bd.offs, "left") != 0) + (void)strlcat(buf, " Bd-indent", sizeof(buf)); + + print_otag_id(h, TAG_DIV, buf, n); + return 1; +} + +static int +mdoc_pa_pre(MDOC_ARGS) +{ + print_otag(h, TAG_SPAN, "c", "Pa"); + return 1; +} + +static int +mdoc_ad_pre(MDOC_ARGS) +{ + print_otag(h, TAG_SPAN, "c", "Ad"); + return 1; +} + +static int +mdoc_an_pre(MDOC_ARGS) +{ + if (n->norm->An.auth == AUTH_split) { + h->flags &= ~HTML_NOSPLIT; + h->flags |= HTML_SPLIT; + return 0; + } + if (n->norm->An.auth == AUTH_nosplit) { + h->flags &= ~HTML_SPLIT; + h->flags |= HTML_NOSPLIT; + return 0; + } + + if (h->flags & HTML_SPLIT) + print_otag(h, TAG_BR, ""); + + if (n->sec == SEC_AUTHORS && ! (h->flags & HTML_NOSPLIT)) + h->flags |= HTML_SPLIT; + + print_otag(h, TAG_SPAN, "c", "An"); + return 1; +} + +static int +mdoc_cd_pre(MDOC_ARGS) +{ + synopsis_pre(h, n); + print_otag(h, TAG_CODE, "c", "Cd"); + return 1; +} + +static int +mdoc_fa_pre(MDOC_ARGS) +{ + const struct roff_node *nn; + struct tag *t; + + if (n->parent->tok != MDOC_Fo) { + print_otag(h, TAG_VAR, "c", "Fa"); + return 1; + } + for (nn = n->child; nn != NULL; nn = nn->next) { + t = print_otag(h, TAG_VAR, "c", "Fa"); + print_text(h, nn->string); + print_tagq(h, t); + if (nn->next != NULL) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + } + if (n->child != NULL && + (nn = roff_node_next(n)) != NULL && + nn->tok == MDOC_Fa) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + return 0; +} + +static int +mdoc_fd_pre(MDOC_ARGS) +{ + struct tag *t; + char *buf, *cp; + + synopsis_pre(h, n); + + if (NULL == (n = n->child)) + return 0; + + assert(n->type == ROFFT_TEXT); + + if (strcmp(n->string, "#include")) { + print_otag(h, TAG_CODE, "c", "Fd"); + return 1; + } + + print_otag(h, TAG_CODE, "c", "In"); + print_text(h, n->string); + + if (NULL != (n = n->next)) { + assert(n->type == ROFFT_TEXT); + + if (h->base_includes) { + cp = n->string; + if (*cp == '<' || *cp == '"') + cp++; + buf = mandoc_strdup(cp); + cp = strchr(buf, '\0') - 1; + if (cp >= buf && (*cp == '>' || *cp == '"')) + *cp = '\0'; + t = print_otag(h, TAG_A, "chI", "In", buf); + free(buf); + } else + t = print_otag(h, TAG_A, "c", "In"); + + print_text(h, n->string); + print_tagq(h, t); + + n = n->next; + } + + for ( ; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + print_text(h, n->string); + } + + return 0; +} + +static int +mdoc_vt_pre(MDOC_ARGS) +{ + if (n->type == ROFFT_BLOCK) { + synopsis_pre(h, n); + return 1; + } else if (n->type == ROFFT_ELEM) { + synopsis_pre(h, n); + } else if (n->type == ROFFT_HEAD) + return 0; + + print_otag(h, TAG_VAR, "c", "Vt"); + return 1; +} + +static int +mdoc_ft_pre(MDOC_ARGS) +{ + synopsis_pre(h, n); + print_otag(h, TAG_VAR, "c", "Ft"); + return 1; +} + +static int +mdoc_fn_pre(MDOC_ARGS) +{ + struct tag *t; + char nbuf[BUFSIZ]; + const char *sp, *ep; + int sz, pretty; + + pretty = NODE_SYNPRETTY & n->flags; + synopsis_pre(h, n); + + /* Split apart into type and name. */ + assert(n->child->string); + sp = n->child->string; + + ep = strchr(sp, ' '); + if (NULL != ep) { + t = print_otag(h, TAG_VAR, "c", "Ft"); + + while (ep) { + sz = MIN((int)(ep - sp), BUFSIZ - 1); + (void)memcpy(nbuf, sp, (size_t)sz); + nbuf[sz] = '\0'; + print_text(h, nbuf); + sp = ++ep; + ep = strchr(sp, ' '); + } + print_tagq(h, t); + } + + t = print_otag_id(h, TAG_CODE, "Fn", n); + + if (sp) + print_text(h, sp); + + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + + for (n = n->child->next; n; n = n->next) { + if (NODE_SYNPRETTY & n->flags) + t = print_otag(h, TAG_VAR, "cs", "Fa", + "white-space", "nowrap"); + else + t = print_otag(h, TAG_VAR, "c", "Fa"); + print_text(h, n->string); + print_tagq(h, t); + if (n->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + } + + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + + if (pretty) { + h->flags |= HTML_NOSPACE; + print_text(h, ";"); + } + + return 0; +} + +static int +mdoc_sm_pre(MDOC_ARGS) +{ + + if (NULL == n->child) + h->flags ^= HTML_NONOSPACE; + else if (0 == strcmp("on", n->child->string)) + h->flags &= ~HTML_NONOSPACE; + else + h->flags |= HTML_NONOSPACE; + + if ( ! (HTML_NONOSPACE & h->flags)) + h->flags &= ~HTML_NOSPACE; + + return 0; +} + +static int +mdoc_skip_pre(MDOC_ARGS) +{ + + return 0; +} + +static int +mdoc_pp_pre(MDOC_ARGS) +{ + char *id; + + if (n->flags & NODE_NOFILL) { + print_endline(h); + if (n->flags & NODE_ID) + mdoc_tg_pre(meta, n, h); + else { + h->col = 1; + print_endline(h); + } + } else { + html_close_paragraph(h); + id = n->flags & NODE_ID ? html_make_id(n, 1) : NULL; + print_otag(h, TAG_P, "ci", "Pp", id); + free(id); + } + return 0; +} + +static int +mdoc_lk_pre(MDOC_ARGS) +{ + const struct roff_node *link, *descr, *punct; + struct tag *t; + + if ((link = n->child) == NULL) + return 0; + + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link target and link text. */ + descr = link->next; + if (descr == punct) + descr = link; /* no text */ + t = print_otag(h, TAG_A, "ch", "Lk", link->string); + do { + if (descr->flags & (NODE_DELIMC | NODE_DELIMO)) + h->flags |= HTML_NOSPACE; + print_text(h, descr->string); + descr = descr->next; + } while (descr != punct); + print_tagq(h, t); + + /* Trailing punctuation. */ + while (punct != NULL) { + h->flags |= HTML_NOSPACE; + print_text(h, punct->string); + punct = punct->next; + } + return 0; +} + +static int +mdoc_mt_pre(MDOC_ARGS) +{ + struct tag *t; + char *cp; + + for (n = n->child; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + mandoc_asprintf(&cp, "mailto:%s", n->string); + t = print_otag(h, TAG_A, "ch", "Mt", cp); + print_text(h, n->string); + print_tagq(h, t); + free(cp); + } + return 0; +} + +static int +mdoc_fo_pre(MDOC_ARGS) +{ + struct tag *t; + + switch (n->type) { + case ROFFT_BLOCK: + synopsis_pre(h, n); + return 1; + case ROFFT_HEAD: + if (n->child != NULL) { + t = print_otag_id(h, TAG_CODE, "Fn", n); + print_text(h, n->child->string); + print_tagq(h, t); + } + return 0; + case ROFFT_BODY: + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + return 1; + default: + abort(); + } +} + +static void +mdoc_fo_post(MDOC_ARGS) +{ + if (n->type != ROFFT_BODY) + return; + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + h->flags |= HTML_NOSPACE; + print_text(h, ";"); +} + +static int +mdoc_in_pre(MDOC_ARGS) +{ + struct tag *t; + + synopsis_pre(h, n); + print_otag(h, TAG_CODE, "c", "In"); + + /* + * The first argument of the `In' gets special treatment as + * being a linked value. Subsequent values are printed + * afterward. groff does similarly. This also handles the case + * of no children. + */ + + if (NODE_SYNPRETTY & n->flags && NODE_LINE & n->flags) + print_text(h, "#include"); + + print_text(h, "<"); + h->flags |= HTML_NOSPACE; + + if (NULL != (n = n->child)) { + assert(n->type == ROFFT_TEXT); + + if (h->base_includes) + t = print_otag(h, TAG_A, "chI", "In", n->string); + else + t = print_otag(h, TAG_A, "c", "In"); + print_text(h, n->string); + print_tagq(h, t); + + n = n->next; + } + + h->flags |= HTML_NOSPACE; + print_text(h, ">"); + + for ( ; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + print_text(h, n->string); + } + return 0; +} + +static int +mdoc_va_pre(MDOC_ARGS) +{ + print_otag(h, TAG_VAR, "c", "Va"); + return 1; +} + +static int +mdoc_ap_pre(MDOC_ARGS) +{ + h->flags |= HTML_NOSPACE; + print_text(h, "\\(aq"); + h->flags |= HTML_NOSPACE; + return 1; +} + +static int +mdoc_bf_pre(MDOC_ARGS) +{ + const char *cattr; + + switch (n->type) { + case ROFFT_BLOCK: + html_close_paragraph(h); + return 1; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + abort(); + } + + if (FONT_Em == n->norm->Bf.font) + cattr = "Bf Em"; + else if (FONT_Sy == n->norm->Bf.font) + cattr = "Bf Sy"; + else if (FONT_Li == n->norm->Bf.font) + cattr = "Bf Li"; + else + cattr = "Bf No"; + + /* Cannot use TAG_SPAN because it may contain blocks. */ + print_otag(h, TAG_DIV, "c", cattr); + return 1; +} + +static int +mdoc_igndelim_pre(MDOC_ARGS) +{ + h->flags |= HTML_IGNDELIM; + return 1; +} + +static void +mdoc_pf_post(MDOC_ARGS) +{ + if ( ! (n->next == NULL || n->next->flags & NODE_LINE)) + h->flags |= HTML_NOSPACE; +} + +static int +mdoc_rs_pre(MDOC_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + if (n->sec == SEC_SEE_ALSO) + html_close_paragraph(h); + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + if (n->sec == SEC_SEE_ALSO) + print_otag(h, TAG_P, "c", "Pp"); + print_otag(h, TAG_CITE, "c", "Rs"); + break; + default: + abort(); + } + return 1; +} + +static int +mdoc_no_pre(MDOC_ARGS) +{ + print_otag_id(h, TAG_SPAN, roff_name[n->tok], n); + return 1; +} + +static int +mdoc_sy_pre(MDOC_ARGS) +{ + print_otag_id(h, TAG_B, "Sy", n); + return 1; +} + +static int +mdoc_lb_pre(MDOC_ARGS) +{ + if (n->sec == SEC_LIBRARY && + n->flags & NODE_LINE && + roff_node_prev(n) != NULL) + print_otag(h, TAG_BR, ""); + + print_otag(h, TAG_SPAN, "c", "Lb"); + return 1; +} + +static int +mdoc__x_pre(MDOC_ARGS) +{ + struct roff_node *nn; + const char *cattr; + enum htmltag t; + + t = TAG_SPAN; + + switch (n->tok) { + case MDOC__A: + cattr = "RsA"; + if ((nn = roff_node_prev(n)) != NULL && nn->tok == MDOC__A && + ((nn = roff_node_next(n)) == NULL || nn->tok != MDOC__A)) + print_text(h, "and"); + break; + case MDOC__B: + t = TAG_I; + cattr = "RsB"; + break; + case MDOC__C: + cattr = "RsC"; + break; + case MDOC__D: + cattr = "RsD"; + break; + case MDOC__I: + t = TAG_I; + cattr = "RsI"; + break; + case MDOC__J: + t = TAG_I; + cattr = "RsJ"; + break; + case MDOC__N: + cattr = "RsN"; + break; + case MDOC__O: + cattr = "RsO"; + break; + case MDOC__P: + cattr = "RsP"; + break; + case MDOC__Q: + cattr = "RsQ"; + break; + case MDOC__R: + cattr = "RsR"; + break; + case MDOC__T: + cattr = "RsT"; + break; + case MDOC__U: + print_otag(h, TAG_A, "ch", "RsU", n->child->string); + return 1; + case MDOC__V: + cattr = "RsV"; + break; + default: + abort(); + } + + print_otag(h, t, "c", cattr); + return 1; +} + +static void +mdoc__x_post(MDOC_ARGS) +{ + struct roff_node *nn; + + if (n->tok == MDOC__A && + (nn = roff_node_next(n)) != NULL && nn->tok == MDOC__A && + ((nn = roff_node_next(nn)) == NULL || nn->tok != MDOC__A) && + ((nn = roff_node_prev(n)) == NULL || nn->tok != MDOC__A)) + return; + + /* TODO: %U */ + + if (n->parent == NULL || n->parent->tok != MDOC_Rs) + return; + + h->flags |= HTML_NOSPACE; + print_text(h, roff_node_next(n) ? "," : "."); +} + +static int +mdoc_bk_pre(MDOC_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + if (n->parent->args != NULL || n->prev->child == NULL) + h->flags |= HTML_PREKEEP; + break; + default: + abort(); + } + + return 1; +} + +static void +mdoc_bk_post(MDOC_ARGS) +{ + + if (n->type == ROFFT_BODY) + h->flags &= ~(HTML_KEEP | HTML_PREKEEP); +} + +static int +mdoc_quote_pre(MDOC_ARGS) +{ + if (n->type != ROFFT_BODY) + return 1; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + print_text(h, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? "<" : "\\(la"); + break; + case MDOC_Bro: + case MDOC_Brq: + print_text(h, "\\(lC"); + break; + case MDOC_Bo: + case MDOC_Bq: + print_text(h, "\\(lB"); + break; + case MDOC_Oo: + case MDOC_Op: + print_text(h, "\\(lB"); + /* + * Give up on semantic markup for now. + * We cannot use TAG_SPAN because .Oo may contain blocks. + * We cannot use TAG_DIV because we might be in a + * phrasing context (like .Dl or .Pp); we cannot + * close out a .Pp at this point either because + * that would break the line. + */ + /* XXX print_otag(h, TAG_???, "c", "Op"); */ + break; + case MDOC_En: + if (NULL == n->norm->Es || + NULL == n->norm->Es->child) + return 1; + print_text(h, n->norm->Es->child->string); + break; + case MDOC_Do: + case MDOC_Dq: + print_text(h, "\\(lq"); + break; + case MDOC_Qo: + case MDOC_Qq: + print_text(h, "\""); + break; + case MDOC_Po: + case MDOC_Pq: + print_text(h, "("); + break; + case MDOC_Ql: + print_text(h, "\\(oq"); + h->flags |= HTML_NOSPACE; + print_otag(h, TAG_CODE, "c", "Li"); + break; + case MDOC_So: + case MDOC_Sq: + print_text(h, "\\(oq"); + break; + default: + abort(); + } + + h->flags |= HTML_NOSPACE; + return 1; +} + +static void +mdoc_quote_post(MDOC_ARGS) +{ + + if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) + return; + + h->flags |= HTML_NOSPACE; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + print_text(h, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? ">" : "\\(ra"); + break; + case MDOC_Bro: + case MDOC_Brq: + print_text(h, "\\(rC"); + break; + case MDOC_Oo: + case MDOC_Op: + case MDOC_Bo: + case MDOC_Bq: + print_text(h, "\\(rB"); + break; + case MDOC_En: + if (n->norm->Es == NULL || + n->norm->Es->child == NULL || + n->norm->Es->child->next == NULL) + h->flags &= ~HTML_NOSPACE; + else + print_text(h, n->norm->Es->child->next->string); + break; + case MDOC_Do: + case MDOC_Dq: + print_text(h, "\\(rq"); + break; + case MDOC_Qo: + case MDOC_Qq: + print_text(h, "\""); + break; + case MDOC_Po: + case MDOC_Pq: + print_text(h, ")"); + break; + case MDOC_Ql: + case MDOC_So: + case MDOC_Sq: + print_text(h, "\\(cq"); + break; + default: + abort(); + } +} + +static int +mdoc_eo_pre(MDOC_ARGS) +{ + + if (n->type != ROFFT_BODY) + return 1; + + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + print_text(h, "\\&"); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + h->flags |= HTML_NOSPACE; + return 1; +} + +static void +mdoc_eo_post(MDOC_ARGS) +{ + int body, tail; + + if (n->type != ROFFT_BODY) + return; + + if (n->end != ENDBODY_NOT) { + h->flags &= ~HTML_NOSPACE; + return; + } + + body = n->child != NULL || n->parent->head->child != NULL; + tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + + if (body && tail) + h->flags |= HTML_NOSPACE; + else if ( ! tail) + h->flags &= ~HTML_NOSPACE; +} + +static int +mdoc_abort_pre(MDOC_ARGS) +{ + abort(); +} diff --git a/usr.bin/mandoc/mdoc_macro.c b/usr.bin/mandoc/mdoc_macro.c new file mode 100644 index 0000000..a8aa3c9 --- /dev/null +++ b/usr.bin/mandoc/mdoc_macro.c @@ -0,0 +1,1598 @@ +/* $OpenBSD: mdoc_macro.c,v 1.191 2020/01/19 17:59:01 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +static void blk_full(MACRO_PROT_ARGS); +static void blk_exp_close(MACRO_PROT_ARGS); +static void blk_part_exp(MACRO_PROT_ARGS); +static void blk_part_imp(MACRO_PROT_ARGS); +static void ctx_synopsis(MACRO_PROT_ARGS); +static void in_line_eoln(MACRO_PROT_ARGS); +static void in_line_argn(MACRO_PROT_ARGS); +static void in_line(MACRO_PROT_ARGS); +static void phrase_ta(MACRO_PROT_ARGS); + +static void append_delims(struct roff_man *, int, int *, char *); +static void dword(struct roff_man *, int, int, const char *, + enum mdelim, int); +static int find_pending(struct roff_man *, enum roff_tok, + int, int, struct roff_node *); +static int lookup(struct roff_man *, int, int, int, const char *); +static int macro_or_word(MACRO_PROT_ARGS, char *, int); +static void break_intermediate(struct roff_node *, + struct roff_node *); +static int parse_rest(struct roff_man *, enum roff_tok, + int, int *, char *); +static enum roff_tok rew_alt(enum roff_tok); +static void rew_elem(struct roff_man *, enum roff_tok); +static void rew_last(struct roff_man *, const struct roff_node *); +static void rew_pending(struct roff_man *, + const struct roff_node *); + +static const struct mdoc_macro mdoc_macros[MDOC_MAX - MDOC_Dd] = { + { in_line_eoln, MDOC_PROLOGUE | MDOC_JOIN }, /* Dd */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dt */ + { in_line_eoln, MDOC_PROLOGUE }, /* Os */ + { blk_full, MDOC_PARSED | MDOC_JOIN }, /* Sh */ + { blk_full, MDOC_PARSED | MDOC_JOIN }, /* Ss */ + { in_line_eoln, 0 }, /* Pp */ + { blk_part_imp, MDOC_PARSED | MDOC_JOIN }, /* D1 */ + { blk_part_imp, MDOC_PARSED | MDOC_JOIN }, /* Dl */ + { blk_full, MDOC_EXPLICIT }, /* Bd */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Ed */ + { blk_full, MDOC_EXPLICIT }, /* Bl */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* El */ + { blk_full, MDOC_PARSED | MDOC_JOIN }, /* It */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* An */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | + MDOC_IGNDELIM | MDOC_JOIN }, /* Ap */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Cd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Dv */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Er */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ev */ + { in_line_eoln, 0 }, /* Ex */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fa */ + { in_line_eoln, 0 }, /* Fd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fl */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fn */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ft */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ic */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* In */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Li */ + { blk_full, MDOC_JOIN }, /* Nd */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Nm */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Op */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ot */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Pa */ + { in_line_eoln, 0 }, /* Rv */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* St */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Va */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Vt */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Xr */ + { in_line_eoln, MDOC_JOIN }, /* %A */ + { in_line_eoln, MDOC_JOIN }, /* %B */ + { in_line_eoln, MDOC_JOIN }, /* %D */ + { in_line_eoln, MDOC_JOIN }, /* %I */ + { in_line_eoln, MDOC_JOIN }, /* %J */ + { in_line_eoln, 0 }, /* %N */ + { in_line_eoln, MDOC_JOIN }, /* %O */ + { in_line_eoln, 0 }, /* %P */ + { in_line_eoln, MDOC_JOIN }, /* %R */ + { in_line_eoln, MDOC_JOIN }, /* %T */ + { in_line_eoln, 0 }, /* %V */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Ac */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Ao */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Aq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* At */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Bc */ + { blk_full, MDOC_EXPLICIT }, /* Bf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Bo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Bq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bsx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bx */ + { in_line_eoln, 0 }, /* Db */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Dc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Do */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Dq */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Ec */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Ef */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Em */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Eo */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Fx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ms */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* No */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | + MDOC_IGNDELIM | MDOC_JOIN }, /* Ns */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Nx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ox */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Pc */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Pf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Po */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Pq */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Qc */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ql */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Qo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Qq */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Re */ + { blk_full, MDOC_EXPLICIT }, /* Rs */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Sc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* So */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Sq */ + { in_line_argn, 0 }, /* Sm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Sx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Sy */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Tn */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ux */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Xc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Xo */ + { blk_full, MDOC_EXPLICIT | MDOC_CALLABLE }, /* Fo */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Fc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Oo */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Oc */ + { blk_full, MDOC_EXPLICIT }, /* Bk */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Ek */ + { in_line_eoln, 0 }, /* Bt */ + { in_line_eoln, 0 }, /* Hf */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fr */ + { in_line_eoln, 0 }, /* Ud */ + { in_line, 0 }, /* Lb */ + { in_line_eoln, 0 }, /* Lp */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Lk */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Mt */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Brq */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Bro */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Brc */ + { in_line_eoln, MDOC_JOIN }, /* %C */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Es */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* En */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Dx */ + { in_line_eoln, MDOC_JOIN }, /* %Q */ + { in_line_eoln, 0 }, /* %U */ + { phrase_ta, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ta */ + { in_line_eoln, 0 }, /* Tg */ +}; + + +const struct mdoc_macro * +mdoc_macro(enum roff_tok tok) +{ + assert(tok >= MDOC_Dd && tok < MDOC_MAX); + return mdoc_macros + (tok - MDOC_Dd); +} + +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ +void +mdoc_endparse(struct roff_man *mdoc) +{ + struct roff_node *n; + + /* Scan for open explicit scopes. */ + + n = mdoc->last->flags & NODE_VALID ? + mdoc->last->parent : mdoc->last; + + for ( ; n; n = n->parent) + if (n->type == ROFFT_BLOCK && + mdoc_macro(n->tok)->flags & MDOC_EXPLICIT) + mandoc_msg(MANDOCERR_BLK_NOEND, + n->line, n->pos, "%s", roff_name[n->tok]); + + /* Rewind to the first. */ + + rew_last(mdoc, mdoc->meta.first); +} + +/* + * Look up the macro at *p called by "from", + * or as a line macro if from == TOKEN_NONE. + */ +static int +lookup(struct roff_man *mdoc, int from, int line, int ppos, const char *p) +{ + enum roff_tok res; + + if (mdoc->flags & MDOC_PHRASEQF) { + mdoc->flags &= ~MDOC_PHRASEQF; + return TOKEN_NONE; + } + if (from == TOKEN_NONE || mdoc_macro(from)->flags & MDOC_PARSED) { + res = roffhash_find(mdoc->mdocmac, p, 0); + if (res != TOKEN_NONE) { + if (mdoc_macro(res)->flags & MDOC_CALLABLE) + return res; + mandoc_msg(MANDOCERR_MACRO_CALL, line, ppos, "%s", p); + } + } + return TOKEN_NONE; +} + +/* + * Rewind up to and including a specific node. + */ +static void +rew_last(struct roff_man *mdoc, const struct roff_node *to) +{ + + if (to->flags & NODE_VALID) + return; + + while (mdoc->last != to) { + mdoc_state(mdoc, mdoc->last); + mdoc->last->flags |= NODE_VALID | NODE_ENDED; + mdoc->last = mdoc->last->parent; + } + mdoc_state(mdoc, mdoc->last); + mdoc->last->flags |= NODE_VALID | NODE_ENDED; + mdoc->next = ROFF_NEXT_SIBLING; +} + +/* + * Rewind up to a specific block, including all blocks that broke it. + */ +static void +rew_pending(struct roff_man *mdoc, const struct roff_node *n) +{ + + for (;;) { + rew_last(mdoc, n); + + if (mdoc->last == n) { + switch (n->type) { + case ROFFT_HEAD: + roff_body_alloc(mdoc, n->line, n->pos, + n->tok); + if (n->tok == MDOC_Ss) + mdoc->flags &= ~ROFF_NONOFILL; + break; + case ROFFT_BLOCK: + break; + default: + return; + } + if ( ! (n->flags & NODE_BROKEN)) + return; + } else + n = mdoc->last; + + for (;;) { + if ((n = n->parent) == NULL) + return; + + if (n->type == ROFFT_BLOCK || + n->type == ROFFT_HEAD) { + if (n->flags & NODE_ENDED) + break; + else + return; + } + } + } +} + +/* + * For a block closing macro, return the corresponding opening one. + * Otherwise, return the macro itself. + */ +static enum roff_tok +rew_alt(enum roff_tok tok) +{ + switch (tok) { + case MDOC_Ac: + return MDOC_Ao; + case MDOC_Bc: + return MDOC_Bo; + case MDOC_Brc: + return MDOC_Bro; + case MDOC_Dc: + return MDOC_Do; + case MDOC_Ec: + return MDOC_Eo; + case MDOC_Ed: + return MDOC_Bd; + case MDOC_Ef: + return MDOC_Bf; + case MDOC_Ek: + return MDOC_Bk; + case MDOC_El: + return MDOC_Bl; + case MDOC_Fc: + return MDOC_Fo; + case MDOC_Oc: + return MDOC_Oo; + case MDOC_Pc: + return MDOC_Po; + case MDOC_Qc: + return MDOC_Qo; + case MDOC_Re: + return MDOC_Rs; + case MDOC_Sc: + return MDOC_So; + case MDOC_Xc: + return MDOC_Xo; + default: + return tok; + } +} + +static void +rew_elem(struct roff_man *mdoc, enum roff_tok tok) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->type != ROFFT_ELEM) + n = n->parent; + assert(n->type == ROFFT_ELEM); + assert(tok == n->tok); + rew_last(mdoc, n); +} + +static void +break_intermediate(struct roff_node *n, struct roff_node *breaker) +{ + if (n != breaker && + n->type != ROFFT_BLOCK && n->type != ROFFT_HEAD && + (n->type != ROFFT_BODY || n->end != ENDBODY_NOT)) + n = n->parent; + while (n != breaker) { + if ( ! (n->flags & NODE_VALID)) + n->flags |= NODE_BROKEN; + n = n->parent; + } +} + +/* + * If there is an open sub-block of the target requiring + * explicit close-out, postpone closing out the target until + * the rew_pending() call closing out the sub-block. + */ +static int +find_pending(struct roff_man *mdoc, enum roff_tok tok, int line, int ppos, + struct roff_node *target) +{ + struct roff_node *n; + int irc; + + if (target->flags & NODE_VALID) + return 0; + + irc = 0; + for (n = mdoc->last; n != NULL && n != target; n = n->parent) { + if (n->flags & NODE_ENDED) + continue; + if (n->type == ROFFT_BLOCK && + mdoc_macro(n->tok)->flags & MDOC_EXPLICIT) { + irc = 1; + break_intermediate(mdoc->last, target); + if (target->type == ROFFT_HEAD) + target->flags |= NODE_ENDED; + else if ( ! (target->flags & NODE_ENDED)) { + mandoc_msg(MANDOCERR_BLK_NEST, + line, ppos, "%s breaks %s", + roff_name[tok], roff_name[n->tok]); + mdoc_endbody_alloc(mdoc, line, ppos, + tok, target); + } + } + } + return irc; +} + +/* + * Allocate a word and check whether it's punctuation or not. + * Punctuation consists of those tokens found in mdoc_isdelim(). + */ +static void +dword(struct roff_man *mdoc, int line, int col, const char *p, + enum mdelim d, int may_append) +{ + + if (d == DELIM_MAX) + d = mdoc_isdelim(p); + + if (may_append && + ! (mdoc->flags & (MDOC_SYNOPSIS | MDOC_KEEP | MDOC_SMOFF)) && + d == DELIM_NONE && mdoc->last->type == ROFFT_TEXT && + mdoc_isdelim(mdoc->last->string) == DELIM_NONE) { + roff_word_append(mdoc, p); + return; + } + + roff_word_alloc(mdoc, line, col, p); + + /* + * If the word consists of a bare delimiter, + * flag the new node accordingly, + * unless doing so was vetoed by the invoking macro. + * Always clear the veto, it is only valid for one word. + */ + + if (d == DELIM_OPEN) + mdoc->last->flags |= NODE_DELIMO; + else if (d == DELIM_CLOSE && + ! (mdoc->flags & MDOC_NODELIMC) && + mdoc->last->parent->tok != MDOC_Fd) + mdoc->last->flags |= NODE_DELIMC; + mdoc->flags &= ~MDOC_NODELIMC; +} + +static void +append_delims(struct roff_man *mdoc, int line, int *pos, char *buf) +{ + char *p; + int la; + enum margserr ac; + + if (buf[*pos] == '\0') + return; + + for (;;) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, TOKEN_NONE, &p); + if (ac == ARGS_EOLN) + break; + dword(mdoc, line, la, p, DELIM_MAX, 1); + + /* + * If we encounter end-of-sentence symbols, then trigger + * the double-space. + * + * XXX: it's easy to allow this to propagate outward to + * the last symbol, such that `. )' will cause the + * correct double-spacing. However, (1) groff isn't + * smart enough to do this and (2) it would require + * knowing which symbols break this behaviour, for + * example, `. ;' shouldn't propagate the double-space. + */ + + if (mandoc_eos(p, strlen(p))) + mdoc->last->flags |= NODE_EOS; + if (ac == ARGS_ALLOC) + free(p); + } +} + +/* + * Parse one word. + * If it is a macro, call it and return 1. + * Otherwise, allocate it and return 0. + */ +static int +macro_or_word(MACRO_PROT_ARGS, char *p, int parsed) +{ + int ntok; + + ntok = buf[ppos] == '"' || parsed == 0 || + mdoc->flags & MDOC_PHRASELIT ? TOKEN_NONE : + lookup(mdoc, tok, line, ppos, p); + + if (ntok == TOKEN_NONE) { + dword(mdoc, line, ppos, p, DELIM_MAX, tok == TOKEN_NONE || + mdoc_macro(tok)->flags & MDOC_JOIN); + return 0; + } else { + if (tok != TOKEN_NONE && + mdoc_macro(tok)->fp == in_line_eoln) + rew_elem(mdoc, tok); + (*mdoc_macro(ntok)->fp)(mdoc, ntok, line, ppos, pos, buf); + if (tok == TOKEN_NONE) + append_delims(mdoc, line, pos, buf); + return 1; + } +} + +/* + * Close out block partial/full explicit. + */ +static void +blk_exp_close(MACRO_PROT_ARGS) +{ + struct roff_node *body; /* Our own body. */ + struct roff_node *endbody; /* Our own end marker. */ + struct roff_node *itblk; /* An It block starting later. */ + struct roff_node *later; /* A sub-block starting later. */ + struct roff_node *n; /* Search back to our block. */ + struct roff_node *target; /* For find_pending(). */ + + int j, lastarg, maxargs, nl, pending; + enum margserr ac; + enum roff_tok atok, ntok; + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + switch (tok) { + case MDOC_Ec: + maxargs = 1; + break; + case MDOC_Ek: + mdoc->flags &= ~MDOC_KEEP; + /* FALLTHROUGH */ + default: + maxargs = 0; + break; + } + + /* Search backwards for the beginning of our own body. */ + + atok = rew_alt(tok); + body = NULL; + for (n = mdoc->last; n; n = n->parent) { + if (n->flags & NODE_ENDED || n->tok != atok || + n->type != ROFFT_BODY || n->end != ENDBODY_NOT) + continue; + body = n; + break; + } + + /* + * Search backwards for beginnings of blocks, + * both of our own and of pending sub-blocks. + */ + + endbody = itblk = later = NULL; + for (n = mdoc->last; n; n = n->parent) { + if (n->flags & NODE_ENDED) + continue; + + /* + * Mismatching end macros can never break anything + * and we only care about the breaking of BLOCKs. + */ + + if (body == NULL || n->type != ROFFT_BLOCK) + continue; + + /* + * SYNOPSIS name blocks can not be broken themselves, + * but they do get broken together with a broken child. + */ + + if (n->tok == MDOC_Nm) { + if (later != NULL) + n->flags |= NODE_BROKEN | NODE_ENDED; + continue; + } + + if (n->tok == MDOC_It) { + itblk = n; + continue; + } + + if (atok == n->tok) { + + /* + * Found the start of our own block. + * When there is no pending sub block, + * just proceed to closing out. + */ + + if (later == NULL || + (tok == MDOC_El && itblk == NULL)) + break; + + /* + * When there is a pending sub block, postpone + * closing out the current block until the + * rew_pending() closing out the sub-block. + * Mark the place where the formatting - but not + * the scope - of the current block ends. + */ + + mandoc_msg(MANDOCERR_BLK_NEST, + line, ppos, "%s breaks %s", + roff_name[atok], roff_name[later->tok]); + + endbody = mdoc_endbody_alloc(mdoc, line, ppos, + atok, body); + + if (tok == MDOC_El) + itblk->flags |= NODE_ENDED | NODE_BROKEN; + + /* + * If a block closing macro taking arguments + * breaks another block, put the arguments + * into the end marker. + */ + + if (maxargs) + mdoc->next = ROFF_NEXT_CHILD; + break; + } + + /* + * Explicit blocks close out description lines, but + * even those can get broken together with a child. + */ + + if (n->tok == MDOC_Nd) { + if (later != NULL) + n->flags |= NODE_BROKEN | NODE_ENDED; + else + rew_last(mdoc, n); + continue; + } + + /* Breaking an open sub block. */ + + break_intermediate(mdoc->last, body); + n->flags |= NODE_BROKEN; + if (later == NULL) + later = n; + } + + if (body == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, line, ppos, + "%s", roff_name[tok]); + if (maxargs && endbody == NULL) { + /* + * Stray .Ec without previous .Eo: + * Break the output line, keep the arguments. + */ + roff_elem_alloc(mdoc, line, ppos, ROFF_br); + rew_elem(mdoc, ROFF_br); + } + } else if (endbody == NULL) { + rew_last(mdoc, body); + if (maxargs) + mdoc_tail_alloc(mdoc, line, ppos, atok); + } + + if ((mdoc_macro(tok)->flags & MDOC_PARSED) == 0) { + if (buf[*pos] != '\0') + mandoc_msg(MANDOCERR_ARG_SKIP, line, ppos, + "%s %s", roff_name[tok], buf + *pos); + if (endbody == NULL && n != NULL) + rew_pending(mdoc, n); + + /* + * Restore the fill mode that was set before the display. + * This needs to be done here rather than during validation + * such that subsequent nodes get the right flags. + */ + + if (tok == MDOC_Ed && body != NULL) { + if (body->flags & NODE_NOFILL) + mdoc->flags |= ROFF_NOFILL; + else + mdoc->flags &= ~ROFF_NOFILL; + } + return; + } + + if (endbody != NULL) + n = endbody; + + ntok = TOKEN_NONE; + for (j = 0; ; j++) { + lastarg = *pos; + + if (j == maxargs && n != NULL) + rew_last(mdoc, n); + + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_PUNCT || ac == ARGS_EOLN) + break; + + ntok = lookup(mdoc, tok, line, lastarg, p); + + if (ntok == TOKEN_NONE) { + dword(mdoc, line, lastarg, p, DELIM_MAX, + mdoc_macro(tok)->flags & MDOC_JOIN); + if (ac == ARGS_ALLOC) + free(p); + continue; + } + if (ac == ARGS_ALLOC) + free(p); + + if (n != NULL) + rew_last(mdoc, n); + mdoc->flags &= ~MDOC_NEWLINE; + (*mdoc_macro(ntok)->fp)(mdoc, ntok, line, lastarg, pos, buf); + break; + } + + if (n != NULL) { + pending = 0; + if (ntok != TOKEN_NONE && n->flags & NODE_BROKEN) { + target = n; + do + target = target->parent; + while ( ! (target->flags & NODE_ENDED)); + pending = find_pending(mdoc, ntok, line, ppos, target); + } + if ( ! pending) + rew_pending(mdoc, n); + } + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +in_line(MACRO_PROT_ARGS) +{ + int la, scope, cnt, firstarg, mayopen, nc, nl; + enum roff_tok ntok; + enum margserr ac; + enum mdelim d; + struct mdoc_arg *arg; + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + /* + * Whether we allow ignored elements (those without content, + * usually because of reserved words) to squeak by. + */ + + switch (tok) { + case MDOC_An: + case MDOC_Ar: + case MDOC_Fl: + case MDOC_Mt: + case MDOC_Nm: + case MDOC_Pa: + nc = 1; + break; + default: + nc = 0; + break; + } + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + + d = DELIM_NONE; + firstarg = 1; + mayopen = 1; + for (cnt = scope = 0;; ) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + + /* + * At the end of a macro line, + * opening delimiters do not suppress spacing. + */ + + if (ac == ARGS_EOLN) { + if (d == DELIM_OPEN) + mdoc->last->flags &= ~NODE_DELIMO; + break; + } + + /* + * The rest of the macro line is only punctuation, + * to be handled by append_delims(). + * If there were no other arguments, + * do not allow the first one to suppress spacing, + * even if it turns out to be a closing one. + */ + + if (ac == ARGS_PUNCT) { + if (cnt == 0 && (nc == 0 || tok == MDOC_An)) + mdoc->flags |= MDOC_NODELIMC; + break; + } + + ntok = (tok == MDOC_Fn && !cnt) ? + TOKEN_NONE : lookup(mdoc, tok, line, la, p); + + /* + * In this case, we've located a submacro and must + * execute it. Close out scope, if open. If no + * elements have been generated, either create one (nc) + * or raise a warning. + */ + + if (ntok != TOKEN_NONE) { + if (scope) + rew_elem(mdoc, tok); + if (nc && ! cnt) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + rew_last(mdoc, mdoc->last); + } else if ( ! nc && ! cnt) { + mdoc_argv_free(arg); + mandoc_msg(MANDOCERR_MACRO_EMPTY, + line, ppos, "%s", roff_name[tok]); + } + (*mdoc_macro(ntok)->fp)(mdoc, ntok, + line, la, pos, buf); + if (nl) + append_delims(mdoc, line, pos, buf); + if (ac == ARGS_ALLOC) + free(p); + return; + } + + /* + * Handle punctuation. Set up our scope, if a word; + * rewind the scope, if a delimiter; then append the word. + */ + + if ((d = mdoc_isdelim(p)) != DELIM_NONE) { + /* + * If we encounter closing punctuation, no word + * has been emitted, no scope is open, and we're + * allowed to have an empty element, then start + * a new scope. + */ + if ((d == DELIM_CLOSE || + (d == DELIM_MIDDLE && tok == MDOC_Fl)) && + !cnt && !scope && nc && mayopen) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + scope = 1; + cnt++; + if (tok == MDOC_Nm) + mayopen = 0; + } + /* + * Close out our scope, if one is open, before + * any punctuation. + */ + if (scope && tok != MDOC_Lk) { + rew_elem(mdoc, tok); + scope = 0; + if (tok == MDOC_Fn) + mayopen = 0; + } + } else if (mayopen && !scope) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + scope = 1; + cnt++; + } + + dword(mdoc, line, la, p, d, + mdoc_macro(tok)->flags & MDOC_JOIN); + + if (ac == ARGS_ALLOC) + free(p); + + /* + * If the first argument is a closing delimiter, + * do not suppress spacing before it. + */ + + if (firstarg && d == DELIM_CLOSE && !nc) + mdoc->last->flags &= ~NODE_DELIMC; + firstarg = 0; + + /* + * `Fl' macros have their scope re-opened with each new + * word so that the `-' can be added to each one without + * having to parse out spaces. + */ + if (scope && tok == MDOC_Fl) { + rew_elem(mdoc, tok); + scope = 0; + } + } + + if (scope && tok != MDOC_Lk) { + rew_elem(mdoc, tok); + scope = 0; + } + + /* + * If no elements have been collected and we're allowed to have + * empties (nc), open a scope and close it out. Otherwise, + * raise a warning. + */ + + if ( ! cnt) { + if (nc) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + rew_last(mdoc, mdoc->last); + } else { + mdoc_argv_free(arg); + mandoc_msg(MANDOCERR_MACRO_EMPTY, + line, ppos, "%s", roff_name[tok]); + } + } + if (nl) + append_delims(mdoc, line, pos, buf); + if (scope) + rew_elem(mdoc, tok); +} + +static void +blk_full(MACRO_PROT_ARGS) +{ + struct mdoc_arg *arg; + struct roff_node *blk; /* Our own or a broken block. */ + struct roff_node *head; /* Our own head. */ + struct roff_node *body; /* Our own body. */ + struct roff_node *n; + char *p; + size_t iarg; + int done, la, nl, parsed; + enum margserr ac, lac; + + nl = MDOC_NEWLINE & mdoc->flags; + + if (buf[*pos] == '\0' && (tok == MDOC_Sh || tok == MDOC_Ss)) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, + line, ppos, "%s", roff_name[tok]); + return; + } + + if ((mdoc_macro(tok)->flags & MDOC_EXPLICIT) == 0) { + + /* Here, tok is one of Sh Ss Nm Nd It. */ + + blk = NULL; + for (n = mdoc->last; n != NULL; n = n->parent) { + if (n->flags & NODE_ENDED) { + if ( ! (n->flags & NODE_VALID)) + n->flags |= NODE_BROKEN; + continue; + } + if (n->type != ROFFT_BLOCK) + continue; + + if (tok == MDOC_It && n->tok == MDOC_Bl) { + if (blk != NULL) { + mandoc_msg(MANDOCERR_BLK_BROKEN, + line, ppos, "It breaks %s", + roff_name[blk->tok]); + rew_pending(mdoc, blk); + } + break; + } + + if (mdoc_macro(n->tok)->flags & MDOC_EXPLICIT) { + switch (tok) { + case MDOC_Sh: + case MDOC_Ss: + mandoc_msg(MANDOCERR_BLK_BROKEN, + line, ppos, + "%s breaks %s", roff_name[tok], + roff_name[n->tok]); + rew_pending(mdoc, n); + n = mdoc->last; + continue; + case MDOC_It: + /* Delay in case it's astray. */ + blk = n; + continue; + default: + break; + } + break; + } + + /* Here, n is one of Sh Ss Nm Nd It. */ + + if (tok != MDOC_Sh && (n->tok == MDOC_Sh || + (tok != MDOC_Ss && (n->tok == MDOC_Ss || + (tok != MDOC_It && n->tok == MDOC_It))))) + break; + + /* Item breaking an explicit block. */ + + if (blk != NULL) { + mandoc_msg(MANDOCERR_BLK_BROKEN, line, ppos, + "It breaks %s", roff_name[blk->tok]); + rew_pending(mdoc, blk); + blk = NULL; + } + + /* Close out prior implicit scopes. */ + + rew_pending(mdoc, n); + } + + /* Skip items outside lists. */ + + if (tok == MDOC_It && (n == NULL || n->tok != MDOC_Bl)) { + mandoc_msg(MANDOCERR_IT_STRAY, + line, ppos, "It %s", buf + *pos); + roff_elem_alloc(mdoc, line, ppos, ROFF_br); + rew_elem(mdoc, ROFF_br); + return; + } + } + + /* + * This routine accommodates implicitly- and explicitly-scoped + * macro openings. Implicit ones first close out prior scope + * (seen above). Delay opening the head until necessary to + * allow leading punctuation to print. Special consideration + * for `It -column', which has phrase-part syntax instead of + * regular child nodes. + */ + + switch (tok) { + case MDOC_Sh: + mdoc->flags &= ~ROFF_NOFILL; + break; + case MDOC_Ss: + mdoc->flags |= ROFF_NONOFILL; + break; + default: + break; + } + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + blk = mdoc_block_alloc(mdoc, line, ppos, tok, arg); + head = body = NULL; + + /* + * Exception: Heads of `It' macros in `-diag' lists are not + * parsed, even though `It' macros in general are parsed. + */ + + parsed = tok != MDOC_It || + mdoc->last->parent->tok != MDOC_Bl || + mdoc->last->parent->norm->Bl.type != LIST_diag; + + /* + * The `Nd' macro has all arguments in its body: it's a hybrid + * of block partial-explicit and full-implicit. Stupid. + */ + + if (tok == MDOC_Nd) { + head = roff_head_alloc(mdoc, line, ppos, tok); + rew_last(mdoc, head); + body = roff_body_alloc(mdoc, line, ppos, tok); + } + + if (tok == MDOC_Bk) + mdoc->flags |= MDOC_KEEP; + + ac = ARGS_EOLN; + for (;;) { + + /* + * If we are right after a tab character, + * do not parse the first word for macros. + */ + + if (mdoc->flags & MDOC_PHRASEQN) { + mdoc->flags &= ~MDOC_PHRASEQN; + mdoc->flags |= MDOC_PHRASEQF; + } + + la = *pos; + lac = ac; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_EOLN) { + if (lac != ARGS_PHRASE || + ! (mdoc->flags & MDOC_PHRASEQF)) + break; + + /* + * This line ends in a tab; start the next + * column now, with a leading blank. + */ + + if (body != NULL) + rew_last(mdoc, body); + body = roff_body_alloc(mdoc, line, ppos, tok); + roff_word_alloc(mdoc, line, ppos, "\\&"); + break; + } + + if (tok == MDOC_Bd || tok == MDOC_Bk) { + mandoc_msg(MANDOCERR_ARG_EXCESS, line, la, + "%s ... %s", roff_name[tok], buf + la); + if (ac == ARGS_ALLOC) + free(p); + break; + } + if (tok == MDOC_Rs) { + mandoc_msg(MANDOCERR_ARG_SKIP, + line, la, "Rs %s", buf + la); + if (ac == ARGS_ALLOC) + free(p); + break; + } + if (ac == ARGS_PUNCT) + break; + + /* + * Emit leading punctuation (i.e., punctuation before + * the ROFFT_HEAD) for non-phrase types. + */ + + if (head == NULL && + ac != ARGS_PHRASE && + mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); + continue; + } + + /* Open a head if one hasn't been opened. */ + + if (head == NULL) + head = roff_head_alloc(mdoc, line, ppos, tok); + + if (ac == ARGS_PHRASE) { + + /* + * If we haven't opened a body yet, rewind the + * head; if we have, rewind that instead. + */ + + rew_last(mdoc, body == NULL ? head : body); + body = roff_body_alloc(mdoc, line, ppos, tok); + + /* Process to the tab or to the end of the line. */ + + mdoc->flags |= MDOC_PHRASE; + parse_rest(mdoc, TOKEN_NONE, line, &la, buf); + mdoc->flags &= ~MDOC_PHRASE; + + /* There may have been `Ta' macros. */ + + while (body->next != NULL) + body = body->next; + continue; + } + + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, parsed); + if (ac == ARGS_ALLOC) + free(p); + if (done) + break; + } + + if (blk->flags & NODE_VALID) + return; + if (head == NULL) + head = roff_head_alloc(mdoc, line, ppos, tok); + if (nl && tok != MDOC_Bd && tok != MDOC_Bl && tok != MDOC_Rs) + append_delims(mdoc, line, pos, buf); + if (body != NULL) + goto out; + if (find_pending(mdoc, tok, line, ppos, head)) + return; + + /* Close out scopes to remain in a consistent state. */ + + rew_last(mdoc, head); + body = roff_body_alloc(mdoc, line, ppos, tok); + if (tok == MDOC_Ss) + mdoc->flags &= ~ROFF_NONOFILL; + + /* + * Set up fill mode for display blocks. + * This needs to be done here up front rather than during + * validation such that child nodes get the right flags. + */ + + if (tok == MDOC_Bd && arg != NULL) { + for (iarg = 0; iarg < arg->argc; iarg++) { + switch (arg->argv[iarg].arg) { + case MDOC_Unfilled: + case MDOC_Literal: + mdoc->flags |= ROFF_NOFILL; + break; + case MDOC_Filled: + case MDOC_Ragged: + case MDOC_Centred: + mdoc->flags &= ~ROFF_NOFILL; + break; + default: + continue; + } + break; + } + } +out: + if (mdoc->flags & MDOC_FREECOL) { + rew_last(mdoc, body); + rew_last(mdoc, blk); + mdoc->flags &= ~MDOC_FREECOL; + } +} + +static void +blk_part_imp(MACRO_PROT_ARGS) +{ + int done, la, nl; + enum margserr ac; + char *p; + struct roff_node *blk; /* saved block context */ + struct roff_node *body; /* saved body context */ + struct roff_node *n; + + nl = MDOC_NEWLINE & mdoc->flags; + + /* + * A macro that spans to the end of the line. This is generally + * (but not necessarily) called as the first macro. The block + * has a head as the immediate child, which is always empty, + * followed by zero or more opening punctuation nodes, then the + * body (which may be empty, depending on the macro), then zero + * or more closing punctuation nodes. + */ + + blk = mdoc_block_alloc(mdoc, line, ppos, tok, NULL); + rew_last(mdoc, roff_head_alloc(mdoc, line, ppos, tok)); + + /* + * Open the body scope "on-demand", that is, after we've + * processed all our the leading delimiters (open parenthesis, + * etc.). + */ + + for (body = NULL; ; ) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_EOLN || ac == ARGS_PUNCT) + break; + + if (body == NULL && mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); + continue; + } + + if (body == NULL) + body = roff_body_alloc(mdoc, line, ppos, tok); + + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, 1); + if (ac == ARGS_ALLOC) + free(p); + if (done) + break; + } + if (body == NULL) + body = roff_body_alloc(mdoc, line, ppos, tok); + + if (find_pending(mdoc, tok, line, ppos, body)) + return; + + rew_last(mdoc, body); + if (nl) + append_delims(mdoc, line, pos, buf); + rew_pending(mdoc, blk); + + /* Move trailing .Ns out of scope. */ + + for (n = body->child; n && n->next; n = n->next) + /* Do nothing. */ ; + if (n && n->tok == MDOC_Ns) + roff_node_relink(mdoc, n); +} + +static void +blk_part_exp(MACRO_PROT_ARGS) +{ + int done, la, nl; + enum margserr ac; + struct roff_node *head; /* keep track of head */ + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + /* + * The opening of an explicit macro having zero or more leading + * punctuation nodes; a head with optional single element (the + * case of `Eo'); and a body that may be empty. + */ + + roff_block_alloc(mdoc, line, ppos, tok); + head = NULL; + for (;;) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_PUNCT || ac == ARGS_EOLN) + break; + + /* Flush out leading punctuation. */ + + if (head == NULL && mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); + continue; + } + + if (head == NULL) { + head = roff_head_alloc(mdoc, line, ppos, tok); + if (tok == MDOC_Eo) /* Not parsed. */ + dword(mdoc, line, la, p, DELIM_MAX, 0); + rew_last(mdoc, head); + roff_body_alloc(mdoc, line, ppos, tok); + if (tok == MDOC_Eo) { + if (ac == ARGS_ALLOC) + free(p); + continue; + } + } + + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, 1); + if (ac == ARGS_ALLOC) + free(p); + if (done) + break; + } + + /* Clean-up to leave in a consistent state. */ + + if (head == NULL) { + rew_last(mdoc, roff_head_alloc(mdoc, line, ppos, tok)); + roff_body_alloc(mdoc, line, ppos, tok); + } + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +in_line_argn(MACRO_PROT_ARGS) +{ + struct mdoc_arg *arg; + char *p; + enum margserr ac; + enum roff_tok ntok; + int state; /* arg#; -1: not yet open; -2: closed */ + int la, maxargs, nl; + + nl = mdoc->flags & MDOC_NEWLINE; + + /* + * A line macro that has a fixed number of arguments (maxargs). + * Only open the scope once the first non-leading-punctuation is + * found (unless MDOC_IGNDELIM is noted, like in `Pf'), then + * keep it open until the maximum number of arguments are + * exhausted. + */ + + switch (tok) { + case MDOC_Ap: + case MDOC_Ns: + case MDOC_Ux: + maxargs = 0; + break; + case MDOC_Bx: + case MDOC_Es: + case MDOC_Xr: + maxargs = 2; + break; + default: + maxargs = 1; + break; + } + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + + state = -1; + p = NULL; + for (;;) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + + if ((ac == ARGS_WORD || ac == ARGS_ALLOC) && state == -1 && + (mdoc_macro(tok)->flags & MDOC_IGNDELIM) == 0 && + mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + if (ac == ARGS_ALLOC) + free(p); + continue; + } + + if (state == -1 && tok != MDOC_In && + tok != MDOC_St && tok != MDOC_Xr) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + state = 0; + } + + if (ac == ARGS_PUNCT || ac == ARGS_EOLN) { + if (abs(state) < 2 && tok == MDOC_Pf) + mandoc_msg(MANDOCERR_PF_SKIP, + line, ppos, "Pf %s", + p == NULL ? "at eol" : p); + break; + } + + if (state == maxargs) { + rew_elem(mdoc, tok); + state = -2; + } + + ntok = (tok == MDOC_Pf && state == 0) ? + TOKEN_NONE : lookup(mdoc, tok, line, la, p); + + if (ntok != TOKEN_NONE) { + if (state >= 0) { + rew_elem(mdoc, tok); + state = -2; + } + (*mdoc_macro(ntok)->fp)(mdoc, ntok, + line, la, pos, buf); + if (ac == ARGS_ALLOC) + free(p); + break; + } + + if (mdoc_macro(tok)->flags & MDOC_IGNDELIM || + mdoc_isdelim(p) == DELIM_NONE) { + if (state == -1) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + state = 1; + } else if (state >= 0) + state++; + } else if (state >= 0) { + rew_elem(mdoc, tok); + state = -2; + } + + dword(mdoc, line, la, p, DELIM_MAX, + mdoc_macro(tok)->flags & MDOC_JOIN); + if (ac == ARGS_ALLOC) + free(p); + p = mdoc->last->string; + } + + if (state == -1) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, + line, ppos, "%s", roff_name[tok]); + return; + } + + if (state == 0 && tok == MDOC_Pf) + append_delims(mdoc, line, pos, buf); + if (state >= 0) + rew_elem(mdoc, tok); + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +in_line_eoln(MACRO_PROT_ARGS) +{ + struct roff_node *n; + struct mdoc_arg *arg; + + if ((tok == MDOC_Pp || tok == MDOC_Lp) && + ! (mdoc->flags & MDOC_SYNOPSIS)) { + n = mdoc->last; + if (mdoc->next == ROFF_NEXT_SIBLING) + n = n->parent; + if (n->tok == MDOC_Nm) + rew_last(mdoc, n->parent); + } + + if (buf[*pos] == '\0' && + (tok == MDOC_Fd || *roff_name[tok] == '%')) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, + line, ppos, "%s", roff_name[tok]); + return; + } + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + if (parse_rest(mdoc, tok, line, pos, buf)) + return; + rew_elem(mdoc, tok); +} + +/* + * The simplest argument parser available: Parse the remaining + * words until the end of the phrase or line and return 0 + * or until the next macro, call that macro, and return 1. + */ +static int +parse_rest(struct roff_man *mdoc, enum roff_tok tok, + int line, int *pos, char *buf) +{ + char *p; + int done, la; + enum margserr ac; + + for (;;) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_EOLN) + return 0; + done = macro_or_word(mdoc, tok, line, la, pos, buf, p, 1); + if (ac == ARGS_ALLOC) + free(p); + if (done) + return 1; + } +} + +static void +ctx_synopsis(MACRO_PROT_ARGS) +{ + + if (~mdoc->flags & (MDOC_SYNOPSIS | MDOC_NEWLINE)) + in_line(mdoc, tok, line, ppos, pos, buf); + else if (tok == MDOC_Nm) + blk_full(mdoc, tok, line, ppos, pos, buf); + else { + assert(tok == MDOC_Vt); + blk_part_imp(mdoc, tok, line, ppos, pos, buf); + } +} + +/* + * Phrases occur within `Bl -column' entries, separated by `Ta' or tabs. + * They're unusual because they're basically free-form text until a + * macro is encountered. + */ +static void +phrase_ta(MACRO_PROT_ARGS) +{ + struct roff_node *body, *n; + + /* Make sure we are in a column list or ignore this macro. */ + + body = NULL; + for (n = mdoc->last; n != NULL; n = n->parent) { + if (n->flags & NODE_ENDED) + continue; + if (n->tok == MDOC_It && n->type == ROFFT_BODY) + body = n; + if (n->tok == MDOC_Bl && n->end == ENDBODY_NOT) + break; + } + + if (n == NULL || n->norm->Bl.type != LIST_column) { + mandoc_msg(MANDOCERR_TA_STRAY, line, ppos, "Ta"); + return; + } + + /* Advance to the next column. */ + + rew_last(mdoc, body); + roff_body_alloc(mdoc, line, ppos, MDOC_It); + parse_rest(mdoc, TOKEN_NONE, line, pos, buf); +} diff --git a/usr.bin/mandoc/mdoc_man.c b/usr.bin/mandoc/mdoc_man.c new file mode 100644 index 0000000..25d59a5 --- /dev/null +++ b/usr.bin/mandoc/mdoc_man.c @@ -0,0 +1,1836 @@ +/* $OpenBSD: mdoc_man.c,v 1.134 2020/02/27 01:25:57 schwarze Exp $ */ +/* + * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "out.h" +#include "main.h" + +#define DECL_ARGS const struct roff_meta *meta, struct roff_node *n + +typedef int (*int_fp)(DECL_ARGS); +typedef void (*void_fp)(DECL_ARGS); + +struct mdoc_man_act { + int_fp cond; /* DON'T run actions */ + int_fp pre; /* pre-node action */ + void_fp post; /* post-node action */ + const char *prefix; /* pre-node string constant */ + const char *suffix; /* post-node string constant */ +}; + +static int cond_body(DECL_ARGS); +static int cond_head(DECL_ARGS); +static void font_push(char); +static void font_pop(void); +static int man_strlen(const char *); +static void mid_it(void); +static void post__t(DECL_ARGS); +static void post_aq(DECL_ARGS); +static void post_bd(DECL_ARGS); +static void post_bf(DECL_ARGS); +static void post_bk(DECL_ARGS); +static void post_bl(DECL_ARGS); +static void post_dl(DECL_ARGS); +static void post_en(DECL_ARGS); +static void post_enc(DECL_ARGS); +static void post_eo(DECL_ARGS); +static void post_fa(DECL_ARGS); +static void post_fd(DECL_ARGS); +static void post_fl(DECL_ARGS); +static void post_fn(DECL_ARGS); +static void post_fo(DECL_ARGS); +static void post_font(DECL_ARGS); +static void post_in(DECL_ARGS); +static void post_it(DECL_ARGS); +static void post_lb(DECL_ARGS); +static void post_nm(DECL_ARGS); +static void post_percent(DECL_ARGS); +static void post_pf(DECL_ARGS); +static void post_sect(DECL_ARGS); +static void post_vt(DECL_ARGS); +static int pre__t(DECL_ARGS); +static int pre_abort(DECL_ARGS); +static int pre_an(DECL_ARGS); +static int pre_ap(DECL_ARGS); +static int pre_aq(DECL_ARGS); +static int pre_bd(DECL_ARGS); +static int pre_bf(DECL_ARGS); +static int pre_bk(DECL_ARGS); +static int pre_bl(DECL_ARGS); +static void pre_br(DECL_ARGS); +static int pre_dl(DECL_ARGS); +static int pre_en(DECL_ARGS); +static int pre_enc(DECL_ARGS); +static int pre_em(DECL_ARGS); +static int pre_skip(DECL_ARGS); +static int pre_eo(DECL_ARGS); +static int pre_ex(DECL_ARGS); +static int pre_fa(DECL_ARGS); +static int pre_fd(DECL_ARGS); +static int pre_fl(DECL_ARGS); +static int pre_fn(DECL_ARGS); +static int pre_fo(DECL_ARGS); +static void pre_ft(DECL_ARGS); +static int pre_Ft(DECL_ARGS); +static int pre_in(DECL_ARGS); +static int pre_it(DECL_ARGS); +static int pre_lk(DECL_ARGS); +static int pre_li(DECL_ARGS); +static int pre_nm(DECL_ARGS); +static int pre_no(DECL_ARGS); +static void pre_noarg(DECL_ARGS); +static int pre_ns(DECL_ARGS); +static void pre_onearg(DECL_ARGS); +static int pre_pp(DECL_ARGS); +static int pre_rs(DECL_ARGS); +static int pre_sm(DECL_ARGS); +static void pre_sp(DECL_ARGS); +static int pre_sect(DECL_ARGS); +static int pre_sy(DECL_ARGS); +static void pre_syn(struct roff_node *); +static void pre_ta(DECL_ARGS); +static int pre_vt(DECL_ARGS); +static int pre_xr(DECL_ARGS); +static void print_word(const char *); +static void print_line(const char *, int); +static void print_block(const char *, int); +static void print_offs(const char *, int); +static void print_width(const struct mdoc_bl *, + const struct roff_node *); +static void print_count(int *); +static void print_node(DECL_ARGS); + +static const void_fp roff_man_acts[ROFF_MAX] = { + pre_br, /* br */ + pre_onearg, /* ce */ + pre_noarg, /* fi */ + pre_ft, /* ft */ + pre_onearg, /* ll */ + pre_onearg, /* mc */ + pre_noarg, /* nf */ + pre_onearg, /* po */ + pre_onearg, /* rj */ + pre_sp, /* sp */ + pre_ta, /* ta */ + pre_onearg, /* ti */ +}; + +static const struct mdoc_man_act mdoc_man_acts[MDOC_MAX - MDOC_Dd] = { + { NULL, NULL, NULL, NULL, NULL }, /* Dd */ + { NULL, NULL, NULL, NULL, NULL }, /* Dt */ + { NULL, NULL, NULL, NULL, NULL }, /* Os */ + { NULL, pre_sect, post_sect, ".SH", NULL }, /* Sh */ + { NULL, pre_sect, post_sect, ".SS", NULL }, /* Ss */ + { NULL, pre_pp, NULL, NULL, NULL }, /* Pp */ + { cond_body, pre_dl, post_dl, NULL, NULL }, /* D1 */ + { cond_body, pre_dl, post_dl, NULL, NULL }, /* Dl */ + { cond_body, pre_bd, post_bd, NULL, NULL }, /* Bd */ + { NULL, NULL, NULL, NULL, NULL }, /* Ed */ + { cond_body, pre_bl, post_bl, NULL, NULL }, /* Bl */ + { NULL, NULL, NULL, NULL, NULL }, /* El */ + { NULL, pre_it, post_it, NULL, NULL }, /* It */ + { NULL, pre_em, post_font, NULL, NULL }, /* Ad */ + { NULL, pre_an, NULL, NULL, NULL }, /* An */ + { NULL, pre_ap, NULL, NULL, NULL }, /* Ap */ + { NULL, pre_em, post_font, NULL, NULL }, /* Ar */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Cd */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Cm */ + { NULL, pre_li, post_font, NULL, NULL }, /* Dv */ + { NULL, pre_li, post_font, NULL, NULL }, /* Er */ + { NULL, pre_li, post_font, NULL, NULL }, /* Ev */ + { NULL, pre_ex, NULL, NULL, NULL }, /* Ex */ + { NULL, pre_fa, post_fa, NULL, NULL }, /* Fa */ + { NULL, pre_fd, post_fd, NULL, NULL }, /* Fd */ + { NULL, pre_fl, post_fl, NULL, NULL }, /* Fl */ + { NULL, pre_fn, post_fn, NULL, NULL }, /* Fn */ + { NULL, pre_Ft, post_font, NULL, NULL }, /* Ft */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Ic */ + { NULL, pre_in, post_in, NULL, NULL }, /* In */ + { NULL, pre_li, post_font, NULL, NULL }, /* Li */ + { cond_head, pre_enc, NULL, "\\- ", NULL }, /* Nd */ + { NULL, pre_nm, post_nm, NULL, NULL }, /* Nm */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Op */ + { NULL, pre_abort, NULL, NULL, NULL }, /* Ot */ + { NULL, pre_em, post_font, NULL, NULL }, /* Pa */ + { NULL, pre_ex, NULL, NULL, NULL }, /* Rv */ + { NULL, NULL, NULL, NULL, NULL }, /* St */ + { NULL, pre_em, post_font, NULL, NULL }, /* Va */ + { NULL, pre_vt, post_vt, NULL, NULL }, /* Vt */ + { NULL, pre_xr, NULL, NULL, NULL }, /* Xr */ + { NULL, NULL, post_percent, NULL, NULL }, /* %A */ + { NULL, pre_em, post_percent, NULL, NULL }, /* %B */ + { NULL, NULL, post_percent, NULL, NULL }, /* %D */ + { NULL, pre_em, post_percent, NULL, NULL }, /* %I */ + { NULL, pre_em, post_percent, NULL, NULL }, /* %J */ + { NULL, NULL, post_percent, NULL, NULL }, /* %N */ + { NULL, NULL, post_percent, NULL, NULL }, /* %O */ + { NULL, NULL, post_percent, NULL, NULL }, /* %P */ + { NULL, NULL, post_percent, NULL, NULL }, /* %R */ + { NULL, pre__t, post__t, NULL, NULL }, /* %T */ + { NULL, NULL, post_percent, NULL, NULL }, /* %V */ + { NULL, NULL, NULL, NULL, NULL }, /* Ac */ + { cond_body, pre_aq, post_aq, NULL, NULL }, /* Ao */ + { cond_body, pre_aq, post_aq, NULL, NULL }, /* Aq */ + { NULL, NULL, NULL, NULL, NULL }, /* At */ + { NULL, NULL, NULL, NULL, NULL }, /* Bc */ + { NULL, pre_bf, post_bf, NULL, NULL }, /* Bf */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Bo */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Bq */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Bsx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Bx */ + { NULL, pre_skip, NULL, NULL, NULL }, /* Db */ + { NULL, NULL, NULL, NULL, NULL }, /* Dc */ + { cond_body, pre_enc, post_enc, "\\(lq", "\\(rq" }, /* Do */ + { cond_body, pre_enc, post_enc, "\\(lq", "\\(rq" }, /* Dq */ + { NULL, NULL, NULL, NULL, NULL }, /* Ec */ + { NULL, NULL, NULL, NULL, NULL }, /* Ef */ + { NULL, pre_em, post_font, NULL, NULL }, /* Em */ + { cond_body, pre_eo, post_eo, NULL, NULL }, /* Eo */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Fx */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Ms */ + { NULL, pre_no, NULL, NULL, NULL }, /* No */ + { NULL, pre_ns, NULL, NULL, NULL }, /* Ns */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Nx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Ox */ + { NULL, NULL, NULL, NULL, NULL }, /* Pc */ + { NULL, NULL, post_pf, NULL, NULL }, /* Pf */ + { cond_body, pre_enc, post_enc, "(", ")" }, /* Po */ + { cond_body, pre_enc, post_enc, "(", ")" }, /* Pq */ + { NULL, NULL, NULL, NULL, NULL }, /* Qc */ + { cond_body, pre_enc, post_enc, "\\(oq", "\\(cq" }, /* Ql */ + { cond_body, pre_enc, post_enc, "\"", "\"" }, /* Qo */ + { cond_body, pre_enc, post_enc, "\"", "\"" }, /* Qq */ + { NULL, NULL, NULL, NULL, NULL }, /* Re */ + { cond_body, pre_rs, NULL, NULL, NULL }, /* Rs */ + { NULL, NULL, NULL, NULL, NULL }, /* Sc */ + { cond_body, pre_enc, post_enc, "\\(oq", "\\(cq" }, /* So */ + { cond_body, pre_enc, post_enc, "\\(oq", "\\(cq" }, /* Sq */ + { NULL, pre_sm, NULL, NULL, NULL }, /* Sm */ + { NULL, pre_em, post_font, NULL, NULL }, /* Sx */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Sy */ + { NULL, pre_li, post_font, NULL, NULL }, /* Tn */ + { NULL, NULL, NULL, NULL, NULL }, /* Ux */ + { NULL, NULL, NULL, NULL, NULL }, /* Xc */ + { NULL, NULL, NULL, NULL, NULL }, /* Xo */ + { NULL, pre_fo, post_fo, NULL, NULL }, /* Fo */ + { NULL, NULL, NULL, NULL, NULL }, /* Fc */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Oo */ + { NULL, NULL, NULL, NULL, NULL }, /* Oc */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Bk */ + { NULL, NULL, NULL, NULL, NULL }, /* Ek */ + { NULL, NULL, NULL, NULL, NULL }, /* Bt */ + { NULL, NULL, NULL, NULL, NULL }, /* Hf */ + { NULL, pre_em, post_font, NULL, NULL }, /* Fr */ + { NULL, NULL, NULL, NULL, NULL }, /* Ud */ + { NULL, NULL, post_lb, NULL, NULL }, /* Lb */ + { NULL, pre_abort, NULL, NULL, NULL }, /* Lp */ + { NULL, pre_lk, NULL, NULL, NULL }, /* Lk */ + { NULL, pre_em, post_font, NULL, NULL }, /* Mt */ + { cond_body, pre_enc, post_enc, "{", "}" }, /* Brq */ + { cond_body, pre_enc, post_enc, "{", "}" }, /* Bro */ + { NULL, NULL, NULL, NULL, NULL }, /* Brc */ + { NULL, NULL, post_percent, NULL, NULL }, /* %C */ + { NULL, pre_skip, NULL, NULL, NULL }, /* Es */ + { cond_body, pre_en, post_en, NULL, NULL }, /* En */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Dx */ + { NULL, NULL, post_percent, NULL, NULL }, /* %Q */ + { NULL, NULL, post_percent, NULL, NULL }, /* %U */ + { NULL, NULL, NULL, NULL, NULL }, /* Ta */ + { NULL, pre_skip, NULL, NULL, NULL }, /* Tg */ +}; +static const struct mdoc_man_act *mdoc_man_act(enum roff_tok); + +static int outflags; +#define MMAN_spc (1 << 0) /* blank character before next word */ +#define MMAN_spc_force (1 << 1) /* even before trailing punctuation */ +#define MMAN_nl (1 << 2) /* break man(7) code line */ +#define MMAN_br (1 << 3) /* break output line */ +#define MMAN_sp (1 << 4) /* insert a blank output line */ +#define MMAN_PP (1 << 5) /* reset indentation etc. */ +#define MMAN_Sm (1 << 6) /* horizontal spacing mode */ +#define MMAN_Bk (1 << 7) /* word keep mode */ +#define MMAN_Bk_susp (1 << 8) /* suspend this (after a macro) */ +#define MMAN_An_split (1 << 9) /* author mode is "split" */ +#define MMAN_An_nosplit (1 << 10) /* author mode is "nosplit" */ +#define MMAN_PD (1 << 11) /* inter-paragraph spacing disabled */ +#define MMAN_nbrword (1 << 12) /* do not break the next word */ + +#define BL_STACK_MAX 32 + +static int Bl_stack[BL_STACK_MAX]; /* offsets [chars] */ +static int Bl_stack_post[BL_STACK_MAX]; /* add final .RE */ +static int Bl_stack_len; /* number of nested Bl blocks */ +static int TPremain; /* characters before tag is full */ + +static struct { + char *head; + char *tail; + size_t size; +} fontqueue; + + +static const struct mdoc_man_act * +mdoc_man_act(enum roff_tok tok) +{ + assert(tok >= MDOC_Dd && tok <= MDOC_MAX); + return mdoc_man_acts + (tok - MDOC_Dd); +} + +static int +man_strlen(const char *cp) +{ + size_t rsz; + int skip, sz; + + sz = 0; + skip = 0; + for (;;) { + rsz = strcspn(cp, "\\"); + if (rsz) { + cp += rsz; + if (skip) { + skip = 0; + rsz--; + } + sz += rsz; + } + if ('\0' == *cp) + break; + cp++; + switch (mandoc_escape(&cp, NULL, NULL)) { + case ESCAPE_ERROR: + return sz; + case ESCAPE_UNICODE: + case ESCAPE_NUMBERED: + case ESCAPE_SPECIAL: + case ESCAPE_UNDEF: + case ESCAPE_OVERSTRIKE: + if (skip) + skip = 0; + else + sz++; + break; + case ESCAPE_SKIPCHAR: + skip = 1; + break; + default: + break; + } + } + return sz; +} + +static void +font_push(char newfont) +{ + + if (fontqueue.head + fontqueue.size <= ++fontqueue.tail) { + fontqueue.size += 8; + fontqueue.head = mandoc_realloc(fontqueue.head, + fontqueue.size); + } + *fontqueue.tail = newfont; + print_word(""); + printf("\\f"); + putchar(newfont); + outflags &= ~MMAN_spc; +} + +static void +font_pop(void) +{ + + if (fontqueue.tail > fontqueue.head) + fontqueue.tail--; + outflags &= ~MMAN_spc; + print_word(""); + printf("\\f"); + putchar(*fontqueue.tail); +} + +static void +print_word(const char *s) +{ + + if ((MMAN_PP | MMAN_sp | MMAN_br | MMAN_nl) & outflags) { + /* + * If we need a newline, print it now and start afresh. + */ + if (MMAN_PP & outflags) { + if (MMAN_sp & outflags) { + if (MMAN_PD & outflags) { + printf("\n.PD"); + outflags &= ~MMAN_PD; + } + } else if ( ! (MMAN_PD & outflags)) { + printf("\n.PD 0"); + outflags |= MMAN_PD; + } + printf("\n.PP\n"); + } else if (MMAN_sp & outflags) + printf("\n.sp\n"); + else if (MMAN_br & outflags) + printf("\n.br\n"); + else if (MMAN_nl & outflags) + putchar('\n'); + outflags &= ~(MMAN_PP|MMAN_sp|MMAN_br|MMAN_nl|MMAN_spc); + if (1 == TPremain) + printf(".br\n"); + TPremain = 0; + } else if (MMAN_spc & outflags) { + /* + * If we need a space, only print it if + * (1) it is forced by `No' or + * (2) what follows is not terminating punctuation or + * (3) what follows is longer than one character. + */ + if (MMAN_spc_force & outflags || '\0' == s[0] || + NULL == strchr(".,:;)]?!", s[0]) || '\0' != s[1]) { + if (MMAN_Bk & outflags && + ! (MMAN_Bk_susp & outflags)) + putchar('\\'); + putchar(' '); + if (TPremain) + TPremain--; + } + } + + /* + * Reassign needing space if we're not following opening + * punctuation. + */ + if (MMAN_Sm & outflags && ('\0' == s[0] || + (('(' != s[0] && '[' != s[0]) || '\0' != s[1]))) + outflags |= MMAN_spc; + else + outflags &= ~MMAN_spc; + outflags &= ~(MMAN_spc_force | MMAN_Bk_susp); + + for ( ; *s; s++) { + switch (*s) { + case ASCII_NBRSP: + printf("\\ "); + break; + case ASCII_HYPH: + putchar('-'); + break; + case ASCII_BREAK: + printf("\\:"); + break; + case ' ': + if (MMAN_nbrword & outflags) { + printf("\\ "); + break; + } + /* FALLTHROUGH */ + default: + putchar((unsigned char)*s); + break; + } + if (TPremain) + TPremain--; + } + outflags &= ~MMAN_nbrword; +} + +static void +print_line(const char *s, int newflags) +{ + + outflags |= MMAN_nl; + print_word(s); + outflags |= newflags; +} + +static void +print_block(const char *s, int newflags) +{ + + outflags &= ~MMAN_PP; + if (MMAN_sp & outflags) { + outflags &= ~(MMAN_sp | MMAN_br); + if (MMAN_PD & outflags) { + print_line(".PD", 0); + outflags &= ~MMAN_PD; + } + } else if (! (MMAN_PD & outflags)) + print_line(".PD 0", MMAN_PD); + outflags |= MMAN_nl; + print_word(s); + outflags |= MMAN_Bk_susp | newflags; +} + +static void +print_offs(const char *v, int keywords) +{ + char buf[24]; + struct roffsu su; + const char *end; + int sz; + + print_line(".RS", MMAN_Bk_susp); + + /* Convert v into a number (of characters). */ + if (NULL == v || '\0' == *v || (keywords && !strcmp(v, "left"))) + sz = 0; + else if (keywords && !strcmp(v, "indent")) + sz = 6; + else if (keywords && !strcmp(v, "indent-two")) + sz = 12; + else { + end = a2roffsu(v, &su, SCALE_EN); + if (end == NULL || *end != '\0') + sz = man_strlen(v); + else if (SCALE_EN == su.unit) + sz = su.scale; + else { + /* + * XXX + * If we are inside an enclosing list, + * there is no easy way to add the two + * indentations because they are provided + * in terms of different units. + */ + print_word(v); + outflags |= MMAN_nl; + return; + } + } + + /* + * We are inside an enclosing list. + * Add the two indentations. + */ + if (Bl_stack_len) + sz += Bl_stack[Bl_stack_len - 1]; + + (void)snprintf(buf, sizeof(buf), "%dn", sz); + print_word(buf); + outflags |= MMAN_nl; +} + +/* + * Set up the indentation for a list item; used from pre_it(). + */ +static void +print_width(const struct mdoc_bl *bl, const struct roff_node *child) +{ + char buf[24]; + struct roffsu su; + const char *end; + int numeric, remain, sz, chsz; + + numeric = 1; + remain = 0; + + /* Convert the width into a number (of characters). */ + if (bl->width == NULL) + sz = (bl->type == LIST_hang) ? 6 : 0; + else { + end = a2roffsu(bl->width, &su, SCALE_MAX); + if (end == NULL || *end != '\0') + sz = man_strlen(bl->width); + else if (SCALE_EN == su.unit) + sz = su.scale; + else { + sz = 0; + numeric = 0; + } + } + + /* XXX Rough estimation, might have multiple parts. */ + if (bl->type == LIST_enum) + chsz = (bl->count > 8) + 1; + else if (child != NULL && child->type == ROFFT_TEXT) + chsz = man_strlen(child->string); + else + chsz = 0; + + /* Maybe we are inside an enclosing list? */ + mid_it(); + + /* + * Save our own indentation, + * such that child lists can use it. + */ + Bl_stack[Bl_stack_len++] = sz + 2; + + /* Set up the current list. */ + if (chsz > sz && bl->type != LIST_tag) + print_block(".HP", MMAN_spc); + else { + print_block(".TP", MMAN_spc); + remain = sz + 2; + } + if (numeric) { + (void)snprintf(buf, sizeof(buf), "%dn", sz + 2); + print_word(buf); + } else + print_word(bl->width); + TPremain = remain; +} + +static void +print_count(int *count) +{ + char buf[24]; + + (void)snprintf(buf, sizeof(buf), "%d.\\&", ++*count); + print_word(buf); +} + +void +man_mdoc(void *arg, const struct roff_meta *mdoc) +{ + struct roff_node *n; + + printf(".\\\" Automatically generated from an mdoc input file." + " Do not edit.\n"); + for (n = mdoc->first->child; n != NULL; n = n->next) { + if (n->type != ROFFT_COMMENT) + break; + printf(".\\\"%s\n", n->string); + } + + printf(".TH \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n", + mdoc->title, (mdoc->msec == NULL ? "" : mdoc->msec), + mdoc->date, mdoc->os, mdoc->vol); + + /* Disable hyphenation and if nroff, disable justification. */ + printf(".nh\n.if n .ad l"); + + outflags = MMAN_nl | MMAN_Sm; + if (0 == fontqueue.size) { + fontqueue.size = 8; + fontqueue.head = fontqueue.tail = mandoc_malloc(8); + *fontqueue.tail = 'R'; + } + for (; n != NULL; n = n->next) + print_node(mdoc, n); + putchar('\n'); +} + +static void +print_node(DECL_ARGS) +{ + const struct mdoc_man_act *act; + struct roff_node *sub; + int cond, do_sub; + + if (n->flags & NODE_NOPRT) + return; + + /* + * Break the line if we were parsed subsequent the current node. + * This makes the page structure be more consistent. + */ + if (outflags & MMAN_spc && + n->flags & NODE_LINE && + !roff_node_transparent(n)) + outflags |= MMAN_nl; + + act = NULL; + cond = 0; + do_sub = 1; + n->flags &= ~NODE_ENDED; + + if (n->type == ROFFT_TEXT) { + /* + * Make sure that we don't happen to start with a + * control character at the start of a line. + */ + if (MMAN_nl & outflags && + ('.' == *n->string || '\'' == *n->string)) { + print_word(""); + printf("\\&"); + outflags &= ~MMAN_spc; + } + if (n->flags & NODE_DELIMC) + outflags &= ~(MMAN_spc | MMAN_spc_force); + else if (outflags & MMAN_Sm) + outflags |= MMAN_spc_force; + print_word(n->string); + if (n->flags & NODE_DELIMO) + outflags &= ~(MMAN_spc | MMAN_spc_force); + else if (outflags & MMAN_Sm) + outflags |= MMAN_spc; + } else if (n->tok < ROFF_MAX) { + (*roff_man_acts[n->tok])(meta, n); + return; + } else { + /* + * Conditionally run the pre-node action handler for a + * node. + */ + act = mdoc_man_act(n->tok); + cond = act->cond == NULL || (*act->cond)(meta, n); + if (cond && act->pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) + do_sub = (*act->pre)(meta, n); + } + + /* + * Conditionally run all child nodes. + * Note that this iterates over children instead of using + * recursion. This prevents unnecessary depth in the stack. + */ + if (do_sub) + for (sub = n->child; sub; sub = sub->next) + print_node(meta, sub); + + /* + * Lastly, conditionally run the post-node handler. + */ + if (NODE_ENDED & n->flags) + return; + + if (cond && act->post) + (*act->post)(meta, n); + + if (ENDBODY_NOT != n->end) + n->body->flags |= NODE_ENDED; +} + +static int +cond_head(DECL_ARGS) +{ + + return n->type == ROFFT_HEAD; +} + +static int +cond_body(DECL_ARGS) +{ + + return n->type == ROFFT_BODY; +} + +static int +pre_abort(DECL_ARGS) +{ + abort(); +} + +static int +pre_enc(DECL_ARGS) +{ + const char *prefix; + + prefix = mdoc_man_act(n->tok)->prefix; + if (NULL == prefix) + return 1; + print_word(prefix); + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_enc(DECL_ARGS) +{ + const char *suffix; + + suffix = mdoc_man_act(n->tok)->suffix; + if (NULL == suffix) + return; + outflags &= ~(MMAN_spc | MMAN_nl); + print_word(suffix); +} + +static int +pre_ex(DECL_ARGS) +{ + outflags |= MMAN_br | MMAN_nl; + return 1; +} + +static void +post_font(DECL_ARGS) +{ + + font_pop(); +} + +static void +post_percent(DECL_ARGS) +{ + struct roff_node *np, *nn, *nnn; + + if (mdoc_man_act(n->tok)->pre == pre_em) + font_pop(); + + if ((nn = roff_node_next(n)) != NULL) { + np = roff_node_prev(n); + nnn = nn == NULL ? NULL : roff_node_next(nn); + if (nn->tok != n->tok || + (np != NULL && np->tok == n->tok) || + (nnn != NULL && nnn->tok == n->tok)) + print_word(","); + if (nn->tok == n->tok && + (nnn == NULL || nnn->tok != n->tok)) + print_word("and"); + } else { + print_word("."); + outflags |= MMAN_nl; + } +} + +static int +pre__t(DECL_ARGS) +{ + + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) { + print_word("\\(lq"); + outflags &= ~MMAN_spc; + } else + font_push('I'); + return 1; +} + +static void +post__t(DECL_ARGS) +{ + + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) { + outflags &= ~MMAN_spc; + print_word("\\(rq"); + } else + font_pop(); + post_percent(meta, n); +} + +/* + * Print before a section header. + */ +static int +pre_sect(DECL_ARGS) +{ + + if (n->type == ROFFT_HEAD) { + outflags |= MMAN_sp; + print_block(mdoc_man_act(n->tok)->prefix, 0); + print_word(""); + putchar('\"'); + outflags &= ~MMAN_spc; + } + return 1; +} + +/* + * Print subsequent a section header. + */ +static void +post_sect(DECL_ARGS) +{ + + if (n->type != ROFFT_HEAD) + return; + outflags &= ~MMAN_spc; + print_word(""); + putchar('\"'); + outflags |= MMAN_nl; + if (MDOC_Sh == n->tok && SEC_AUTHORS == n->sec) + outflags &= ~(MMAN_An_split | MMAN_An_nosplit); +} + +/* See mdoc_term.c, synopsis_pre() for comments. */ +static void +pre_syn(struct roff_node *n) +{ + struct roff_node *np; + + if ((n->flags & NODE_SYNPRETTY) == 0 || + (np = roff_node_prev(n)) == NULL) + return; + + if (np->tok == n->tok && + MDOC_Ft != n->tok && + MDOC_Fo != n->tok && + MDOC_Fn != n->tok) { + outflags |= MMAN_br; + return; + } + + switch (np->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + outflags |= MMAN_sp; + break; + case MDOC_Ft: + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + outflags |= MMAN_sp; + break; + } + /* FALLTHROUGH */ + default: + outflags |= MMAN_br; + break; + } +} + +static int +pre_an(DECL_ARGS) +{ + + switch (n->norm->An.auth) { + case AUTH_split: + outflags &= ~MMAN_An_nosplit; + outflags |= MMAN_An_split; + return 0; + case AUTH_nosplit: + outflags &= ~MMAN_An_split; + outflags |= MMAN_An_nosplit; + return 0; + default: + if (MMAN_An_split & outflags) + outflags |= MMAN_br; + else if (SEC_AUTHORS == n->sec && + ! (MMAN_An_nosplit & outflags)) + outflags |= MMAN_An_split; + return 1; + } +} + +static int +pre_ap(DECL_ARGS) +{ + + outflags &= ~MMAN_spc; + print_word("'"); + outflags &= ~MMAN_spc; + return 0; +} + +static int +pre_aq(DECL_ARGS) +{ + + print_word(n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? "<" : "\\(la"); + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_aq(DECL_ARGS) +{ + + outflags &= ~(MMAN_spc | MMAN_nl); + print_word(n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? ">" : "\\(ra"); +} + +static int +pre_bd(DECL_ARGS) +{ + outflags &= ~(MMAN_PP | MMAN_sp | MMAN_br); + if (n->norm->Bd.type == DISP_unfilled || + n->norm->Bd.type == DISP_literal) + print_line(".nf", 0); + if (n->norm->Bd.comp == 0 && roff_node_prev(n->parent) != NULL) + outflags |= MMAN_sp; + print_offs(n->norm->Bd.offs, 1); + return 1; +} + +static void +post_bd(DECL_ARGS) +{ + enum roff_tok bef, now; + + /* Close out this display. */ + print_line(".RE", MMAN_nl); + bef = n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi; + if (n->last == NULL) + now = n->norm->Bd.type == DISP_unfilled || + n->norm->Bd.type == DISP_literal ? ROFF_nf : ROFF_fi; + else if (n->last->tok == ROFF_nf) + now = ROFF_nf; + else if (n->last->tok == ROFF_fi) + now = ROFF_fi; + else + now = n->last->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi; + if (bef != now) { + outflags |= MMAN_nl; + print_word("."); + outflags &= ~MMAN_spc; + print_word(roff_name[bef]); + outflags |= MMAN_nl; + } + + /* Maybe we are inside an enclosing list? */ + if (roff_node_next(n->parent) != NULL) + mid_it(); +} + +static int +pre_bf(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_BODY: + break; + default: + return 0; + } + switch (n->norm->Bf.font) { + case FONT_Em: + font_push('I'); + break; + case FONT_Sy: + font_push('B'); + break; + default: + font_push('R'); + break; + } + return 1; +} + +static void +post_bf(DECL_ARGS) +{ + + if (n->type == ROFFT_BODY) + font_pop(); +} + +static int +pre_bk(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_BODY: + case ROFFT_ELEM: + outflags |= MMAN_Bk; + return 1; + default: + return 0; + } +} + +static void +post_bk(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_ELEM: + while ((n = n->parent) != NULL) + if (n->tok == MDOC_Bk) + return; + /* FALLTHROUGH */ + case ROFFT_BODY: + outflags &= ~MMAN_Bk; + break; + default: + break; + } +} + +static int +pre_bl(DECL_ARGS) +{ + size_t icol; + + /* + * print_offs() will increase the -offset to account for + * a possible enclosing .It, but any enclosed .It blocks + * just nest and do not add up their indentation. + */ + if (n->norm->Bl.offs) { + print_offs(n->norm->Bl.offs, 0); + Bl_stack[Bl_stack_len++] = 0; + } + + switch (n->norm->Bl.type) { + case LIST_enum: + n->norm->Bl.count = 0; + return 1; + case LIST_column: + break; + default: + return 1; + } + + if (n->child != NULL) { + print_line(".TS", MMAN_nl); + for (icol = 0; icol < n->norm->Bl.ncols; icol++) + print_word("l"); + print_word("."); + } + outflags |= MMAN_nl; + return 1; +} + +static void +post_bl(DECL_ARGS) +{ + + switch (n->norm->Bl.type) { + case LIST_column: + if (n->child != NULL) + print_line(".TE", 0); + break; + case LIST_enum: + n->norm->Bl.count = 0; + break; + default: + break; + } + + if (n->norm->Bl.offs) { + print_line(".RE", MMAN_nl); + assert(Bl_stack_len); + Bl_stack_len--; + assert(Bl_stack[Bl_stack_len] == 0); + } else { + outflags |= MMAN_PP | MMAN_nl; + outflags &= ~(MMAN_sp | MMAN_br); + } + + /* Maybe we are inside an enclosing list? */ + if (roff_node_next(n->parent) != NULL) + mid_it(); +} + +static void +pre_br(DECL_ARGS) +{ + outflags |= MMAN_br; +} + +static int +pre_dl(DECL_ARGS) +{ + print_offs("6n", 0); + return 1; +} + +static void +post_dl(DECL_ARGS) +{ + print_line(".RE", MMAN_nl); + + /* Maybe we are inside an enclosing list? */ + if (roff_node_next(n->parent) != NULL) + mid_it(); +} + +static int +pre_em(DECL_ARGS) +{ + + font_push('I'); + return 1; +} + +static int +pre_en(DECL_ARGS) +{ + + if (NULL == n->norm->Es || + NULL == n->norm->Es->child) + return 1; + + print_word(n->norm->Es->child->string); + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_en(DECL_ARGS) +{ + + if (NULL == n->norm->Es || + NULL == n->norm->Es->child || + NULL == n->norm->Es->child->next) + return; + + outflags &= ~MMAN_spc; + print_word(n->norm->Es->child->next->string); + return; +} + +static int +pre_eo(DECL_ARGS) +{ + + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + print_word("\\&"); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + outflags &= ~(MMAN_spc | MMAN_nl); + return 1; +} + +static void +post_eo(DECL_ARGS) +{ + int body, tail; + + if (n->end != ENDBODY_NOT) { + outflags |= MMAN_spc; + return; + } + + body = n->child != NULL || n->parent->head->child != NULL; + tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + + if (body && tail) + outflags &= ~MMAN_spc; + else if ( ! (body || tail)) + print_word("\\&"); + else if ( ! tail) + outflags |= MMAN_spc; +} + +static int +pre_fa(DECL_ARGS) +{ + int am_Fa; + + am_Fa = MDOC_Fa == n->tok; + + if (am_Fa) + n = n->child; + + while (NULL != n) { + font_push('I'); + if (am_Fa || NODE_SYNPRETTY & n->flags) + outflags |= MMAN_nbrword; + print_node(meta, n); + font_pop(); + if (NULL != (n = n->next)) + print_word(","); + } + return 0; +} + +static void +post_fa(DECL_ARGS) +{ + struct roff_node *nn; + + if ((nn = roff_node_next(n)) != NULL && nn->tok == MDOC_Fa) + print_word(","); +} + +static int +pre_fd(DECL_ARGS) +{ + pre_syn(n); + font_push('B'); + return 1; +} + +static void +post_fd(DECL_ARGS) +{ + font_pop(); + outflags |= MMAN_br; +} + +static int +pre_fl(DECL_ARGS) +{ + font_push('B'); + print_word("\\-"); + if (n->child != NULL) + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_fl(DECL_ARGS) +{ + struct roff_node *nn; + + font_pop(); + if (n->child == NULL && + ((nn = roff_node_next(n)) != NULL && + nn->type != ROFFT_TEXT && + (nn->flags & NODE_LINE) == 0)) + outflags &= ~MMAN_spc; +} + +static int +pre_fn(DECL_ARGS) +{ + + pre_syn(n); + + n = n->child; + if (NULL == n) + return 0; + + if (NODE_SYNPRETTY & n->flags) + print_block(".HP 4n", MMAN_nl); + + font_push('B'); + print_node(meta, n); + font_pop(); + outflags &= ~MMAN_spc; + print_word("("); + outflags &= ~MMAN_spc; + + n = n->next; + if (NULL != n) + pre_fa(meta, n); + return 0; +} + +static void +post_fn(DECL_ARGS) +{ + + print_word(")"); + if (NODE_SYNPRETTY & n->flags) { + print_word(";"); + outflags |= MMAN_PP; + } +} + +static int +pre_fo(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + pre_syn(n); + break; + case ROFFT_HEAD: + if (n->child == NULL) + return 0; + if (NODE_SYNPRETTY & n->flags) + print_block(".HP 4n", MMAN_nl); + font_push('B'); + break; + case ROFFT_BODY: + outflags &= ~(MMAN_spc | MMAN_nl); + print_word("("); + outflags &= ~MMAN_spc; + break; + default: + break; + } + return 1; +} + +static void +post_fo(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + if (n->child != NULL) + font_pop(); + break; + case ROFFT_BODY: + post_fn(meta, n); + break; + default: + break; + } +} + +static int +pre_Ft(DECL_ARGS) +{ + + pre_syn(n); + font_push('I'); + return 1; +} + +static void +pre_ft(DECL_ARGS) +{ + print_line(".ft", 0); + print_word(n->child->string); + outflags |= MMAN_nl; +} + +static int +pre_in(DECL_ARGS) +{ + + if (NODE_SYNPRETTY & n->flags) { + pre_syn(n); + font_push('B'); + print_word("#include <"); + outflags &= ~MMAN_spc; + } else { + print_word("<"); + outflags &= ~MMAN_spc; + font_push('I'); + } + return 1; +} + +static void +post_in(DECL_ARGS) +{ + + if (NODE_SYNPRETTY & n->flags) { + outflags &= ~MMAN_spc; + print_word(">"); + font_pop(); + outflags |= MMAN_br; + } else { + font_pop(); + outflags &= ~MMAN_spc; + print_word(">"); + } +} + +static int +pre_it(DECL_ARGS) +{ + const struct roff_node *bln; + + switch (n->type) { + case ROFFT_HEAD: + outflags |= MMAN_PP | MMAN_nl; + bln = n->parent->parent; + if (bln->norm->Bl.comp == 0 || + (n->parent->prev == NULL && + roff_node_prev(bln->parent) == NULL)) + outflags |= MMAN_sp; + outflags &= ~MMAN_br; + switch (bln->norm->Bl.type) { + case LIST_item: + return 0; + case LIST_inset: + case LIST_diag: + case LIST_ohang: + if (bln->norm->Bl.type == LIST_diag) + print_line(".B \"", 0); + else + print_line(".BR \\& \"", 0); + outflags &= ~MMAN_spc; + return 1; + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + print_width(&bln->norm->Bl, NULL); + TPremain = 0; + outflags |= MMAN_nl; + font_push('B'); + if (LIST_bullet == bln->norm->Bl.type) + print_word("\\(bu"); + else + print_word("-"); + font_pop(); + outflags |= MMAN_nl; + return 0; + case LIST_enum: + print_width(&bln->norm->Bl, NULL); + TPremain = 0; + outflags |= MMAN_nl; + print_count(&bln->norm->Bl.count); + outflags |= MMAN_nl; + return 0; + case LIST_hang: + print_width(&bln->norm->Bl, n->child); + TPremain = 0; + outflags |= MMAN_nl; + return 1; + case LIST_tag: + print_width(&bln->norm->Bl, n->child); + putchar('\n'); + outflags &= ~MMAN_spc; + return 1; + default: + return 1; + } + default: + break; + } + return 1; +} + +/* + * This function is called after closing out an indented block. + * If we are inside an enclosing list, restore its indentation. + */ +static void +mid_it(void) +{ + char buf[24]; + + /* Nothing to do outside a list. */ + if (0 == Bl_stack_len || 0 == Bl_stack[Bl_stack_len - 1]) + return; + + /* The indentation has already been set up. */ + if (Bl_stack_post[Bl_stack_len - 1]) + return; + + /* Restore the indentation of the enclosing list. */ + print_line(".RS", MMAN_Bk_susp); + (void)snprintf(buf, sizeof(buf), "%dn", + Bl_stack[Bl_stack_len - 1]); + print_word(buf); + + /* Remeber to close out this .RS block later. */ + Bl_stack_post[Bl_stack_len - 1] = 1; +} + +static void +post_it(DECL_ARGS) +{ + const struct roff_node *bln; + + bln = n->parent->parent; + + switch (n->type) { + case ROFFT_HEAD: + switch (bln->norm->Bl.type) { + case LIST_diag: + outflags &= ~MMAN_spc; + print_word("\\ "); + break; + case LIST_ohang: + outflags |= MMAN_br; + break; + default: + break; + } + break; + case ROFFT_BODY: + switch (bln->norm->Bl.type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + case LIST_hang: + case LIST_tag: + assert(Bl_stack_len); + Bl_stack[--Bl_stack_len] = 0; + + /* + * Our indentation had to be restored + * after a child display or child list. + * Close out that indentation block now. + */ + if (Bl_stack_post[Bl_stack_len]) { + print_line(".RE", MMAN_nl); + Bl_stack_post[Bl_stack_len] = 0; + } + break; + case LIST_column: + if (NULL != n->next) { + putchar('\t'); + outflags &= ~MMAN_spc; + } + break; + default: + break; + } + break; + default: + break; + } +} + +static void +post_lb(DECL_ARGS) +{ + + if (SEC_LIBRARY == n->sec) + outflags |= MMAN_br; +} + +static int +pre_lk(DECL_ARGS) +{ + const struct roff_node *link, *descr, *punct; + + if ((link = n->child) == NULL) + return 0; + + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link text. */ + if ((descr = link->next) != NULL && descr != punct) { + font_push('I'); + while (descr != punct) { + print_word(descr->string); + descr = descr->next; + } + font_pop(); + print_word(":"); + } + + /* Link target. */ + font_push('B'); + print_word(link->string); + font_pop(); + + /* Trailing punctuation. */ + while (punct != NULL) { + print_word(punct->string); + punct = punct->next; + } + return 0; +} + +static void +pre_onearg(DECL_ARGS) +{ + outflags |= MMAN_nl; + print_word("."); + outflags &= ~MMAN_spc; + print_word(roff_name[n->tok]); + if (n->child != NULL) + print_word(n->child->string); + outflags |= MMAN_nl; + if (n->tok == ROFF_ce) + for (n = n->child->next; n != NULL; n = n->next) + print_node(meta, n); +} + +static int +pre_li(DECL_ARGS) +{ + font_push('R'); + return 1; +} + +static int +pre_nm(DECL_ARGS) +{ + char *name; + + switch (n->type) { + case ROFFT_BLOCK: + outflags |= MMAN_Bk; + pre_syn(n); + return 1; + case ROFFT_HEAD: + case ROFFT_ELEM: + break; + default: + return 1; + } + name = n->child == NULL ? NULL : n->child->string; + if (name == NULL) + return 0; + if (n->type == ROFFT_HEAD) { + if (roff_node_prev(n->parent) == NULL) + outflags |= MMAN_sp; + print_block(".HP", 0); + printf(" %dn", man_strlen(name) + 1); + outflags |= MMAN_nl; + } + font_push('B'); + return 1; +} + +static void +post_nm(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + outflags &= ~MMAN_Bk; + break; + case ROFFT_HEAD: + case ROFFT_ELEM: + if (n->child != NULL && n->child->string != NULL) + font_pop(); + break; + default: + break; + } +} + +static int +pre_no(DECL_ARGS) +{ + outflags |= MMAN_spc_force; + return 1; +} + +static void +pre_noarg(DECL_ARGS) +{ + outflags |= MMAN_nl; + print_word("."); + outflags &= ~MMAN_spc; + print_word(roff_name[n->tok]); + outflags |= MMAN_nl; +} + +static int +pre_ns(DECL_ARGS) +{ + outflags &= ~MMAN_spc; + return 0; +} + +static void +post_pf(DECL_ARGS) +{ + + if ( ! (n->next == NULL || n->next->flags & NODE_LINE)) + outflags &= ~MMAN_spc; +} + +static int +pre_pp(DECL_ARGS) +{ + + if (MDOC_It != n->parent->tok) + outflags |= MMAN_PP; + outflags |= MMAN_sp | MMAN_nl; + outflags &= ~MMAN_br; + return 0; +} + +static int +pre_rs(DECL_ARGS) +{ + + if (SEC_SEE_ALSO == n->sec) { + outflags |= MMAN_PP | MMAN_sp | MMAN_nl; + outflags &= ~MMAN_br; + } + return 1; +} + +static int +pre_skip(DECL_ARGS) +{ + + return 0; +} + +static int +pre_sm(DECL_ARGS) +{ + + if (NULL == n->child) + outflags ^= MMAN_Sm; + else if (0 == strcmp("on", n->child->string)) + outflags |= MMAN_Sm; + else + outflags &= ~MMAN_Sm; + + if (MMAN_Sm & outflags) + outflags |= MMAN_spc; + + return 0; +} + +static void +pre_sp(DECL_ARGS) +{ + if (outflags & MMAN_PP) { + outflags &= ~MMAN_PP; + print_line(".PP", 0); + } else { + print_line(".sp", 0); + if (n->child != NULL) + print_word(n->child->string); + } + outflags |= MMAN_nl; +} + +static int +pre_sy(DECL_ARGS) +{ + + font_push('B'); + return 1; +} + +static void +pre_ta(DECL_ARGS) +{ + print_line(".ta", 0); + for (n = n->child; n != NULL; n = n->next) + print_word(n->string); + outflags |= MMAN_nl; +} + +static int +pre_vt(DECL_ARGS) +{ + + if (NODE_SYNPRETTY & n->flags) { + switch (n->type) { + case ROFFT_BLOCK: + pre_syn(n); + return 1; + case ROFFT_BODY: + break; + default: + return 0; + } + } + font_push('I'); + return 1; +} + +static void +post_vt(DECL_ARGS) +{ + + if (n->flags & NODE_SYNPRETTY && n->type != ROFFT_BODY) + return; + font_pop(); +} + +static int +pre_xr(DECL_ARGS) +{ + + n = n->child; + if (NULL == n) + return 0; + print_node(meta, n); + n = n->next; + if (NULL == n) + return 0; + outflags &= ~MMAN_spc; + print_word("("); + print_node(meta, n); + print_word(")"); + return 0; +} diff --git a/usr.bin/mandoc/mdoc_markdown.c b/usr.bin/mandoc/mdoc_markdown.c new file mode 100644 index 0000000..e0572cb --- /dev/null +++ b/usr.bin/mandoc/mdoc_markdown.c @@ -0,0 +1,1606 @@ +/* $OpenBSD: mdoc_markdown.c,v 1.35 2020/04/03 11:34:19 schwarze Exp $ */ +/* + * Copyright (c) 2017, 2018, 2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Markdown formatter for mdoc(7) used by mandoc(1). + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "main.h" + +struct md_act { + int (*cond)(struct roff_node *); + int (*pre)(struct roff_node *); + void (*post)(struct roff_node *); + const char *prefix; /* pre-node string constant */ + const char *suffix; /* post-node string constant */ +}; + +static void md_nodelist(struct roff_node *); +static void md_node(struct roff_node *); +static const char *md_stack(char); +static void md_preword(void); +static void md_rawword(const char *); +static void md_word(const char *); +static void md_named(const char *); +static void md_char(unsigned char); +static void md_uri(const char *); + +static int md_cond_head(struct roff_node *); +static int md_cond_body(struct roff_node *); + +static int md_pre_abort(struct roff_node *); +static int md_pre_raw(struct roff_node *); +static int md_pre_word(struct roff_node *); +static int md_pre_skip(struct roff_node *); +static void md_pre_syn(struct roff_node *); +static int md_pre_An(struct roff_node *); +static int md_pre_Ap(struct roff_node *); +static int md_pre_Bd(struct roff_node *); +static int md_pre_Bk(struct roff_node *); +static int md_pre_Bl(struct roff_node *); +static int md_pre_D1(struct roff_node *); +static int md_pre_Dl(struct roff_node *); +static int md_pre_En(struct roff_node *); +static int md_pre_Eo(struct roff_node *); +static int md_pre_Fa(struct roff_node *); +static int md_pre_Fd(struct roff_node *); +static int md_pre_Fn(struct roff_node *); +static int md_pre_Fo(struct roff_node *); +static int md_pre_In(struct roff_node *); +static int md_pre_It(struct roff_node *); +static int md_pre_Lk(struct roff_node *); +static int md_pre_Mt(struct roff_node *); +static int md_pre_Nd(struct roff_node *); +static int md_pre_Nm(struct roff_node *); +static int md_pre_No(struct roff_node *); +static int md_pre_Ns(struct roff_node *); +static int md_pre_Pp(struct roff_node *); +static int md_pre_Rs(struct roff_node *); +static int md_pre_Sh(struct roff_node *); +static int md_pre_Sm(struct roff_node *); +static int md_pre_Vt(struct roff_node *); +static int md_pre_Xr(struct roff_node *); +static int md_pre__T(struct roff_node *); +static int md_pre_br(struct roff_node *); + +static void md_post_raw(struct roff_node *); +static void md_post_word(struct roff_node *); +static void md_post_pc(struct roff_node *); +static void md_post_Bk(struct roff_node *); +static void md_post_Bl(struct roff_node *); +static void md_post_D1(struct roff_node *); +static void md_post_En(struct roff_node *); +static void md_post_Eo(struct roff_node *); +static void md_post_Fa(struct roff_node *); +static void md_post_Fd(struct roff_node *); +static void md_post_Fl(struct roff_node *); +static void md_post_Fn(struct roff_node *); +static void md_post_Fo(struct roff_node *); +static void md_post_In(struct roff_node *); +static void md_post_It(struct roff_node *); +static void md_post_Lb(struct roff_node *); +static void md_post_Nm(struct roff_node *); +static void md_post_Pf(struct roff_node *); +static void md_post_Vt(struct roff_node *); +static void md_post__T(struct roff_node *); + +static const struct md_act md_acts[MDOC_MAX - MDOC_Dd] = { + { NULL, NULL, NULL, NULL, NULL }, /* Dd */ + { NULL, NULL, NULL, NULL, NULL }, /* Dt */ + { NULL, NULL, NULL, NULL, NULL }, /* Os */ + { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Sh */ + { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Ss */ + { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Pp */ + { md_cond_body, md_pre_D1, md_post_D1, NULL, NULL }, /* D1 */ + { md_cond_body, md_pre_Dl, md_post_D1, NULL, NULL }, /* Dl */ + { md_cond_body, md_pre_Bd, md_post_D1, NULL, NULL }, /* Bd */ + { NULL, NULL, NULL, NULL, NULL }, /* Ed */ + { md_cond_body, md_pre_Bl, md_post_Bl, NULL, NULL }, /* Bl */ + { NULL, NULL, NULL, NULL, NULL }, /* El */ + { NULL, md_pre_It, md_post_It, NULL, NULL }, /* It */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ad */ + { NULL, md_pre_An, NULL, NULL, NULL }, /* An */ + { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ar */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cd */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cm */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Dv */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Er */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Ev */ + { NULL, NULL, NULL, NULL, NULL }, /* Ex */ + { NULL, md_pre_Fa, md_post_Fa, NULL, NULL }, /* Fa */ + { NULL, md_pre_Fd, md_post_Fd, "**", "**" }, /* Fd */ + { NULL, md_pre_raw, md_post_Fl, "**-", "**" }, /* Fl */ + { NULL, md_pre_Fn, md_post_Fn, NULL, NULL }, /* Fn */ + { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ft */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ic */ + { NULL, md_pre_In, md_post_In, NULL, NULL }, /* In */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Li */ + { md_cond_head, md_pre_Nd, NULL, NULL, NULL }, /* Nd */ + { NULL, md_pre_Nm, md_post_Nm, "**", "**" }, /* Nm */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Op */ + { NULL, md_pre_abort, NULL, NULL, NULL }, /* Ot */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Pa */ + { NULL, NULL, NULL, NULL, NULL }, /* Rv */ + { NULL, NULL, NULL, NULL, NULL }, /* St */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Va */ + { NULL, md_pre_Vt, md_post_Vt, "*", "*" }, /* Vt */ + { NULL, md_pre_Xr, NULL, NULL, NULL }, /* Xr */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %A */ + { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %B */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %D */ + { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %I */ + { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %J */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %N */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %O */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %P */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %R */ + { NULL, md_pre__T, md_post__T, NULL, NULL }, /* %T */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %V */ + { NULL, NULL, NULL, NULL, NULL }, /* Ac */ + { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Ao */ + { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Aq */ + { NULL, NULL, NULL, NULL, NULL }, /* At */ + { NULL, NULL, NULL, NULL, NULL }, /* Bc */ + { NULL, NULL, NULL, NULL, NULL }, /* Bf XXX not implemented */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bo */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bq */ + { NULL, NULL, NULL, NULL, NULL }, /* Bsx */ + { NULL, NULL, NULL, NULL, NULL }, /* Bx */ + { NULL, NULL, NULL, NULL, NULL }, /* Db */ + { NULL, NULL, NULL, NULL, NULL }, /* Dc */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Do */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Dq */ + { NULL, NULL, NULL, NULL, NULL }, /* Ec */ + { NULL, NULL, NULL, NULL, NULL }, /* Ef */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Em */ + { md_cond_body, md_pre_Eo, md_post_Eo, NULL, NULL }, /* Eo */ + { NULL, NULL, NULL, NULL, NULL }, /* Fx */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ms */ + { NULL, md_pre_No, NULL, NULL, NULL }, /* No */ + { NULL, md_pre_Ns, NULL, NULL, NULL }, /* Ns */ + { NULL, NULL, NULL, NULL, NULL }, /* Nx */ + { NULL, NULL, NULL, NULL, NULL }, /* Ox */ + { NULL, NULL, NULL, NULL, NULL }, /* Pc */ + { NULL, NULL, md_post_Pf, NULL, NULL }, /* Pf */ + { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Po */ + { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Pq */ + { NULL, NULL, NULL, NULL, NULL }, /* Qc */ + { md_cond_body, md_pre_raw, md_post_raw, "'`", "`'" }, /* Ql */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qo */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qq */ + { NULL, NULL, NULL, NULL, NULL }, /* Re */ + { md_cond_body, md_pre_Rs, NULL, NULL, NULL }, /* Rs */ + { NULL, NULL, NULL, NULL, NULL }, /* Sc */ + { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* So */ + { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* Sq */ + { NULL, md_pre_Sm, NULL, NULL, NULL }, /* Sm */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Sx */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Sy */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Tn */ + { NULL, NULL, NULL, NULL, NULL }, /* Ux */ + { NULL, NULL, NULL, NULL, NULL }, /* Xc */ + { NULL, NULL, NULL, NULL, NULL }, /* Xo */ + { NULL, md_pre_Fo, md_post_Fo, "**", "**" }, /* Fo */ + { NULL, NULL, NULL, NULL, NULL }, /* Fc */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Oo */ + { NULL, NULL, NULL, NULL, NULL }, /* Oc */ + { NULL, md_pre_Bk, md_post_Bk, NULL, NULL }, /* Bk */ + { NULL, NULL, NULL, NULL, NULL }, /* Ek */ + { NULL, NULL, NULL, NULL, NULL }, /* Bt */ + { NULL, NULL, NULL, NULL, NULL }, /* Hf */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Fr */ + { NULL, NULL, NULL, NULL, NULL }, /* Ud */ + { NULL, NULL, md_post_Lb, NULL, NULL }, /* Lb */ + { NULL, md_pre_abort, NULL, NULL, NULL }, /* Lp */ + { NULL, md_pre_Lk, NULL, NULL, NULL }, /* Lk */ + { NULL, md_pre_Mt, NULL, NULL, NULL }, /* Mt */ + { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Brq */ + { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Bro */ + { NULL, NULL, NULL, NULL, NULL }, /* Brc */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %C */ + { NULL, md_pre_skip, NULL, NULL, NULL }, /* Es */ + { md_cond_body, md_pre_En, md_post_En, NULL, NULL }, /* En */ + { NULL, NULL, NULL, NULL, NULL }, /* Dx */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %Q */ + { NULL, md_pre_Lk, md_post_pc, NULL, NULL }, /* %U */ + { NULL, NULL, NULL, NULL, NULL }, /* Ta */ + { NULL, md_pre_skip, NULL, NULL, NULL }, /* Tg */ +}; +static const struct md_act *md_act(enum roff_tok); + +static int outflags; +#define MD_spc (1 << 0) /* Blank character before next word. */ +#define MD_spc_force (1 << 1) /* Even before trailing punctuation. */ +#define MD_nonl (1 << 2) /* Prevent linebreak in markdown code. */ +#define MD_nl (1 << 3) /* Break markdown code line. */ +#define MD_br (1 << 4) /* Insert an output line break. */ +#define MD_sp (1 << 5) /* Insert a paragraph break. */ +#define MD_Sm (1 << 6) /* Horizontal spacing mode. */ +#define MD_Bk (1 << 7) /* Word keep mode. */ +#define MD_An_split (1 << 8) /* Author mode is "split". */ +#define MD_An_nosplit (1 << 9) /* Author mode is "nosplit". */ + +static int escflags; /* Escape in generated markdown code: */ +#define ESC_BOL (1 << 0) /* "#*+-" near the beginning of a line. */ +#define ESC_NUM (1 << 1) /* "." after a leading number. */ +#define ESC_HYP (1 << 2) /* "(" immediately after "]". */ +#define ESC_SQU (1 << 4) /* "]" when "[" is open. */ +#define ESC_FON (1 << 5) /* "*" immediately after unrelated "*". */ +#define ESC_EOL (1 << 6) /* " " at the and of a line. */ + +static int code_blocks, quote_blocks, list_blocks; +static int outcount; + + +static const struct md_act * +md_act(enum roff_tok tok) +{ + assert(tok >= MDOC_Dd && tok <= MDOC_MAX); + return md_acts + (tok - MDOC_Dd); +} + +void +markdown_mdoc(void *arg, const struct roff_meta *mdoc) +{ + outflags = MD_Sm; + md_word(mdoc->title); + if (mdoc->msec != NULL) { + outflags &= ~MD_spc; + md_word("("); + md_word(mdoc->msec); + md_word(")"); + } + md_word("-"); + md_word(mdoc->vol); + if (mdoc->arch != NULL) { + md_word("("); + md_word(mdoc->arch); + md_word(")"); + } + outflags |= MD_sp; + + md_nodelist(mdoc->first->child); + + outflags |= MD_sp; + md_word(mdoc->os); + md_word("-"); + md_word(mdoc->date); + putchar('\n'); +} + +static void +md_nodelist(struct roff_node *n) +{ + while (n != NULL) { + md_node(n); + n = n->next; + } +} + +static void +md_node(struct roff_node *n) +{ + const struct md_act *act; + int cond, process_children; + + if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) + return; + + if (outflags & MD_nonl) + outflags &= ~(MD_nl | MD_sp); + else if (outflags & MD_spc && + n->flags & NODE_LINE && + !roff_node_transparent(n)) + outflags |= MD_nl; + + act = NULL; + cond = 0; + process_children = 1; + n->flags &= ~NODE_ENDED; + + if (n->type == ROFFT_TEXT) { + if (n->flags & NODE_DELIMC) + outflags &= ~(MD_spc | MD_spc_force); + else if (outflags & MD_Sm) + outflags |= MD_spc_force; + md_word(n->string); + if (n->flags & NODE_DELIMO) + outflags &= ~(MD_spc | MD_spc_force); + else if (outflags & MD_Sm) + outflags |= MD_spc; + } else if (n->tok < ROFF_MAX) { + switch (n->tok) { + case ROFF_br: + process_children = md_pre_br(n); + break; + case ROFF_sp: + process_children = md_pre_Pp(n); + break; + default: + process_children = 0; + break; + } + } else { + act = md_act(n->tok); + cond = act->cond == NULL || (*act->cond)(n); + if (cond && act->pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) + process_children = (*act->pre)(n); + } + + if (process_children && n->child != NULL) + md_nodelist(n->child); + + if (n->flags & NODE_ENDED) + return; + + if (cond && act->post != NULL) + (*act->post)(n); + + if (n->end != ENDBODY_NOT) + n->body->flags |= NODE_ENDED; +} + +static const char * +md_stack(char c) +{ + static char *stack; + static size_t sz; + static size_t cur; + + switch (c) { + case '\0': + break; + case (char)-1: + assert(cur); + stack[--cur] = '\0'; + break; + default: + if (cur + 1 >= sz) { + sz += 8; + stack = mandoc_realloc(stack, sz); + } + stack[cur] = c; + stack[++cur] = '\0'; + break; + } + return stack == NULL ? "" : stack; +} + +/* + * Handle vertical and horizontal spacing. + */ +static void +md_preword(void) +{ + const char *cp; + + /* + * If a list block is nested inside a code block or a blockquote, + * blank lines for paragraph breaks no longer work; instead, + * they terminate the list. Work around this markdown issue + * by using mere line breaks instead. + */ + + if (list_blocks && outflags & MD_sp) { + outflags &= ~MD_sp; + outflags |= MD_br; + } + + /* + * End the old line if requested. + * Escape whitespace at the end of the markdown line + * such that it won't look like an output line break. + */ + + if (outflags & MD_sp) + putchar('\n'); + else if (outflags & MD_br) { + putchar(' '); + putchar(' '); + } else if (outflags & MD_nl && escflags & ESC_EOL) + md_named("zwnj"); + + /* Start a new line if necessary. */ + + if (outflags & (MD_nl | MD_br | MD_sp)) { + putchar('\n'); + for (cp = md_stack('\0'); *cp != '\0'; cp++) { + putchar(*cp); + if (*cp == '>') + putchar(' '); + } + outflags &= ~(MD_nl | MD_br | MD_sp); + escflags = ESC_BOL; + outcount = 0; + + /* Handle horizontal spacing. */ + + } else if (outflags & MD_spc) { + if (outflags & MD_Bk) + fputs(" ", stdout); + else + putchar(' '); + escflags &= ~ESC_FON; + outcount++; + } + + outflags &= ~(MD_spc_force | MD_nonl); + if (outflags & MD_Sm) + outflags |= MD_spc; + else + outflags &= ~MD_spc; +} + +/* + * Print markdown syntax elements. + * Can also be used for constant strings when neither escaping + * nor delimiter handling is required. + */ +static void +md_rawword(const char *s) +{ + md_preword(); + + if (*s == '\0') + return; + + if (escflags & ESC_FON) { + escflags &= ~ESC_FON; + if (*s == '*' && !code_blocks) + fputs("‌", stdout); + } + + while (*s != '\0') { + switch(*s) { + case '*': + if (s[1] == '\0') + escflags |= ESC_FON; + break; + case '[': + escflags |= ESC_SQU; + break; + case ']': + escflags |= ESC_HYP; + escflags &= ~ESC_SQU; + break; + default: + break; + } + md_char(*s++); + } + if (s[-1] == ' ') + escflags |= ESC_EOL; + else + escflags &= ~ESC_EOL; +} + +/* + * Print text and mdoc(7) syntax elements. + */ +static void +md_word(const char *s) +{ + const char *seq, *prevfont, *currfont, *nextfont; + char c; + int bs, sz, uc, breakline; + + /* No spacing before closing delimiters. */ + if (s[0] != '\0' && s[1] == '\0' && + strchr("!),.:;?]", s[0]) != NULL && + (outflags & MD_spc_force) == 0) + outflags &= ~MD_spc; + + md_preword(); + + if (*s == '\0') + return; + + /* No spacing after opening delimiters. */ + if ((s[0] == '(' || s[0] == '[') && s[1] == '\0') + outflags &= ~MD_spc; + + breakline = 0; + prevfont = currfont = ""; + while ((c = *s++) != '\0') { + bs = 0; + switch(c) { + case ASCII_NBRSP: + if (code_blocks) + c = ' '; + else { + md_named("nbsp"); + c = '\0'; + } + break; + case ASCII_HYPH: + bs = escflags & ESC_BOL && !code_blocks; + c = '-'; + break; + case ASCII_BREAK: + continue; + case '#': + case '+': + case '-': + bs = escflags & ESC_BOL && !code_blocks; + break; + case '(': + bs = escflags & ESC_HYP && !code_blocks; + break; + case ')': + bs = escflags & ESC_NUM && !code_blocks; + break; + case '*': + case '[': + case '_': + case '`': + bs = !code_blocks; + break; + case '.': + bs = escflags & ESC_NUM && !code_blocks; + break; + case '<': + if (code_blocks == 0) { + md_named("lt"); + c = '\0'; + } + break; + case '=': + if (escflags & ESC_BOL && !code_blocks) { + md_named("equals"); + c = '\0'; + } + break; + case '>': + if (code_blocks == 0) { + md_named("gt"); + c = '\0'; + } + break; + case '\\': + uc = 0; + nextfont = NULL; + switch (mandoc_escape(&s, &seq, &sz)) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(seq + 1, sz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(seq, sz); + break; + case ESCAPE_SPECIAL: + uc = mchars_spec2cp(seq, sz); + break; + case ESCAPE_UNDEF: + uc = *seq; + break; + case ESCAPE_DEVICE: + md_rawword("markdown"); + continue; + case ESCAPE_FONTBOLD: + nextfont = "**"; + break; + case ESCAPE_FONTITALIC: + nextfont = "*"; + break; + case ESCAPE_FONTBI: + nextfont = "***"; + break; + case ESCAPE_FONT: + case ESCAPE_FONTCW: + case ESCAPE_FONTROMAN: + nextfont = ""; + break; + case ESCAPE_FONTPREV: + nextfont = prevfont; + break; + case ESCAPE_BREAK: + breakline = 1; + break; + case ESCAPE_NOSPACE: + case ESCAPE_SKIPCHAR: + case ESCAPE_OVERSTRIKE: + /* XXX not implemented */ + /* FALLTHROUGH */ + case ESCAPE_ERROR: + default: + break; + } + if (nextfont != NULL && !code_blocks) { + if (*currfont != '\0') { + outflags &= ~MD_spc; + md_rawword(currfont); + } + prevfont = currfont; + currfont = nextfont; + if (*currfont != '\0') { + outflags &= ~MD_spc; + md_rawword(currfont); + } + } + if (uc) { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + if (code_blocks) { + seq = mchars_uc2str(uc); + fputs(seq, stdout); + outcount += strlen(seq); + } else { + printf("&#%d;", uc); + outcount++; + } + escflags &= ~ESC_FON; + } + c = '\0'; + break; + case ']': + bs = escflags & ESC_SQU && !code_blocks; + escflags |= ESC_HYP; + break; + default: + break; + } + if (bs) + putchar('\\'); + md_char(c); + if (breakline && + (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) { + printf(" \n"); + breakline = 0; + while (*s == ' ' || *s == ASCII_NBRSP) + s++; + } + } + if (*currfont != '\0') { + outflags &= ~MD_spc; + md_rawword(currfont); + } else if (s[-2] == ' ') + escflags |= ESC_EOL; + else + escflags &= ~ESC_EOL; +} + +/* + * Print a single HTML named character reference. + */ +static void +md_named(const char *s) +{ + printf("&%s;", s); + escflags &= ~(ESC_FON | ESC_EOL); + outcount++; +} + +/* + * Print a single raw character and maintain certain escape flags. + */ +static void +md_char(unsigned char c) +{ + if (c != '\0') { + putchar(c); + if (c == '*') + escflags |= ESC_FON; + else + escflags &= ~ESC_FON; + outcount++; + } + if (c != ']') + escflags &= ~ESC_HYP; + if (c == ' ' || c == '\t' || c == '>') + return; + if (isdigit(c) == 0) + escflags &= ~ESC_NUM; + else if (escflags & ESC_BOL) + escflags |= ESC_NUM; + escflags &= ~ESC_BOL; +} + +static int +md_cond_head(struct roff_node *n) +{ + return n->type == ROFFT_HEAD; +} + +static int +md_cond_body(struct roff_node *n) +{ + return n->type == ROFFT_BODY; +} + +static int +md_pre_abort(struct roff_node *n) +{ + abort(); +} + +static int +md_pre_raw(struct roff_node *n) +{ + const char *prefix; + + if ((prefix = md_act(n->tok)->prefix) != NULL) { + md_rawword(prefix); + outflags &= ~MD_spc; + if (*prefix == '`') + code_blocks++; + } + return 1; +} + +static void +md_post_raw(struct roff_node *n) +{ + const char *suffix; + + if ((suffix = md_act(n->tok)->suffix) != NULL) { + outflags &= ~(MD_spc | MD_nl); + md_rawword(suffix); + if (*suffix == '`') + code_blocks--; + } +} + +static int +md_pre_word(struct roff_node *n) +{ + const char *prefix; + + if ((prefix = md_act(n->tok)->prefix) != NULL) { + md_word(prefix); + outflags &= ~MD_spc; + } + return 1; +} + +static void +md_post_word(struct roff_node *n) +{ + const char *suffix; + + if ((suffix = md_act(n->tok)->suffix) != NULL) { + outflags &= ~(MD_spc | MD_nl); + md_word(suffix); + } +} + +static void +md_post_pc(struct roff_node *n) +{ + struct roff_node *nn; + + md_post_raw(n); + if (n->parent->tok != MDOC_Rs) + return; + + if ((nn = roff_node_next(n)) != NULL) { + md_word(","); + if (nn->tok == n->tok && + (nn = roff_node_prev(n)) != NULL && + nn->tok == n->tok) + md_word("and"); + } else { + md_word("."); + outflags |= MD_nl; + } +} + +static int +md_pre_skip(struct roff_node *n) +{ + return 0; +} + +static void +md_pre_syn(struct roff_node *n) +{ + struct roff_node *np; + + if ((n->flags & NODE_SYNPRETTY) == 0 || + (np = roff_node_prev(n)) == NULL) + return; + + if (np->tok == n->tok && + n->tok != MDOC_Ft && + n->tok != MDOC_Fo && + n->tok != MDOC_Fn) { + outflags |= MD_br; + return; + } + + switch (np->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + outflags |= MD_sp; + break; + case MDOC_Ft: + if (n->tok != MDOC_Fn && n->tok != MDOC_Fo) { + outflags |= MD_sp; + break; + } + /* FALLTHROUGH */ + default: + outflags |= MD_br; + break; + } +} + +static int +md_pre_An(struct roff_node *n) +{ + switch (n->norm->An.auth) { + case AUTH_split: + outflags &= ~MD_An_nosplit; + outflags |= MD_An_split; + return 0; + case AUTH_nosplit: + outflags &= ~MD_An_split; + outflags |= MD_An_nosplit; + return 0; + default: + if (outflags & MD_An_split) + outflags |= MD_br; + else if (n->sec == SEC_AUTHORS && + ! (outflags & MD_An_nosplit)) + outflags |= MD_An_split; + return 1; + } +} + +static int +md_pre_Ap(struct roff_node *n) +{ + outflags &= ~MD_spc; + md_word("'"); + outflags &= ~MD_spc; + return 0; +} + +static int +md_pre_Bd(struct roff_node *n) +{ + switch (n->norm->Bd.type) { + case DISP_unfilled: + case DISP_literal: + return md_pre_Dl(n); + default: + return md_pre_D1(n); + } +} + +static int +md_pre_Bk(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_BODY: + outflags |= MD_Bk; + return 1; + default: + return 0; + } +} + +static void +md_post_Bk(struct roff_node *n) +{ + if (n->type == ROFFT_BODY) + outflags &= ~MD_Bk; +} + +static int +md_pre_Bl(struct roff_node *n) +{ + n->norm->Bl.count = 0; + if (n->norm->Bl.type == LIST_column) + md_pre_Dl(n); + outflags |= MD_sp; + return 1; +} + +static void +md_post_Bl(struct roff_node *n) +{ + n->norm->Bl.count = 0; + if (n->norm->Bl.type == LIST_column) + md_post_D1(n); + outflags |= MD_sp; +} + +static int +md_pre_D1(struct roff_node *n) +{ + /* + * Markdown blockquote syntax does not work inside code blocks. + * The best we can do is fall back to another nested code block. + */ + if (code_blocks) { + md_stack('\t'); + code_blocks++; + } else { + md_stack('>'); + quote_blocks++; + } + outflags |= MD_sp; + return 1; +} + +static void +md_post_D1(struct roff_node *n) +{ + md_stack((char)-1); + if (code_blocks) + code_blocks--; + else + quote_blocks--; + outflags |= MD_sp; +} + +static int +md_pre_Dl(struct roff_node *n) +{ + /* + * Markdown code block syntax does not work inside blockquotes. + * The best we can do is fall back to another nested blockquote. + */ + if (quote_blocks) { + md_stack('>'); + quote_blocks++; + } else { + md_stack('\t'); + code_blocks++; + } + outflags |= MD_sp; + return 1; +} + +static int +md_pre_En(struct roff_node *n) +{ + if (n->norm->Es == NULL || + n->norm->Es->child == NULL) + return 1; + + md_word(n->norm->Es->child->string); + outflags &= ~MD_spc; + return 1; +} + +static void +md_post_En(struct roff_node *n) +{ + if (n->norm->Es == NULL || + n->norm->Es->child == NULL || + n->norm->Es->child->next == NULL) + return; + + outflags &= ~MD_spc; + md_word(n->norm->Es->child->next->string); +} + +static int +md_pre_Eo(struct roff_node *n) +{ + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + md_preword(); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + outflags &= ~(MD_spc | MD_nl); + return 1; +} + +static void +md_post_Eo(struct roff_node *n) +{ + if (n->end != ENDBODY_NOT) { + outflags |= MD_spc; + return; + } + + if (n->child == NULL && n->parent->head->child == NULL) + return; + + if (n->parent->tail != NULL && n->parent->tail->child != NULL) + outflags &= ~MD_spc; + else + outflags |= MD_spc; +} + +static int +md_pre_Fa(struct roff_node *n) +{ + int am_Fa; + + am_Fa = n->tok == MDOC_Fa; + + if (am_Fa) + n = n->child; + + while (n != NULL) { + md_rawword("*"); + outflags &= ~MD_spc; + md_node(n); + outflags &= ~MD_spc; + md_rawword("*"); + if ((n = n->next) != NULL) + md_word(","); + } + return 0; +} + +static void +md_post_Fa(struct roff_node *n) +{ + struct roff_node *nn; + + if ((nn = roff_node_next(n)) != NULL && nn->tok == MDOC_Fa) + md_word(","); +} + +static int +md_pre_Fd(struct roff_node *n) +{ + md_pre_syn(n); + md_pre_raw(n); + return 1; +} + +static void +md_post_Fd(struct roff_node *n) +{ + md_post_raw(n); + outflags |= MD_br; +} + +static void +md_post_Fl(struct roff_node *n) +{ + struct roff_node *nn; + + md_post_raw(n); + if (n->child == NULL && (nn = roff_node_next(n)) != NULL && + nn->type != ROFFT_TEXT && (nn->flags & NODE_LINE) == 0) + outflags &= ~MD_spc; +} + +static int +md_pre_Fn(struct roff_node *n) +{ + md_pre_syn(n); + + if ((n = n->child) == NULL) + return 0; + + md_rawword("**"); + outflags &= ~MD_spc; + md_node(n); + outflags &= ~MD_spc; + md_rawword("**"); + outflags &= ~MD_spc; + md_word("("); + + if ((n = n->next) != NULL) + md_pre_Fa(n); + return 0; +} + +static void +md_post_Fn(struct roff_node *n) +{ + md_word(")"); + if (n->flags & NODE_SYNPRETTY) { + md_word(";"); + outflags |= MD_sp; + } +} + +static int +md_pre_Fo(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + md_pre_syn(n); + break; + case ROFFT_HEAD: + if (n->child == NULL) + return 0; + md_pre_raw(n); + break; + case ROFFT_BODY: + outflags &= ~(MD_spc | MD_nl); + md_word("("); + break; + default: + break; + } + return 1; +} + +static void +md_post_Fo(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_HEAD: + if (n->child != NULL) + md_post_raw(n); + break; + case ROFFT_BODY: + md_post_Fn(n); + break; + default: + break; + } +} + +static int +md_pre_In(struct roff_node *n) +{ + if (n->flags & NODE_SYNPRETTY) { + md_pre_syn(n); + md_rawword("**"); + outflags &= ~MD_spc; + md_word("#include <"); + } else { + md_word("<"); + outflags &= ~MD_spc; + md_rawword("*"); + } + outflags &= ~MD_spc; + return 1; +} + +static void +md_post_In(struct roff_node *n) +{ + if (n->flags & NODE_SYNPRETTY) { + outflags &= ~MD_spc; + md_rawword(">**"); + outflags |= MD_nl; + } else { + outflags &= ~MD_spc; + md_rawword("*>"); + } +} + +static int +md_pre_It(struct roff_node *n) +{ + struct roff_node *bln; + + switch (n->type) { + case ROFFT_BLOCK: + return 1; + + case ROFFT_HEAD: + bln = n->parent->parent; + if (bln->norm->Bl.comp == 0 && + bln->norm->Bl.type != LIST_column) + outflags |= MD_sp; + outflags |= MD_nl; + + switch (bln->norm->Bl.type) { + case LIST_item: + outflags |= MD_br; + return 0; + case LIST_inset: + case LIST_diag: + case LIST_ohang: + outflags |= MD_br; + return 1; + case LIST_tag: + case LIST_hang: + outflags |= MD_sp; + return 1; + case LIST_bullet: + md_rawword("*\t"); + break; + case LIST_dash: + case LIST_hyphen: + md_rawword("-\t"); + break; + case LIST_enum: + md_preword(); + if (bln->norm->Bl.count < 99) + bln->norm->Bl.count++; + printf("%d.\t", bln->norm->Bl.count); + escflags &= ~ESC_FON; + break; + case LIST_column: + outflags |= MD_br; + return 0; + default: + return 0; + } + outflags &= ~MD_spc; + outflags |= MD_nonl; + outcount = 0; + md_stack('\t'); + if (code_blocks || quote_blocks) + list_blocks++; + return 0; + + case ROFFT_BODY: + bln = n->parent->parent; + switch (bln->norm->Bl.type) { + case LIST_ohang: + outflags |= MD_br; + break; + case LIST_tag: + case LIST_hang: + md_pre_D1(n); + break; + default: + break; + } + return 1; + + default: + return 0; + } +} + +static void +md_post_It(struct roff_node *n) +{ + struct roff_node *bln; + int i, nc; + + if (n->type != ROFFT_BODY) + return; + + bln = n->parent->parent; + switch (bln->norm->Bl.type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + md_stack((char)-1); + if (code_blocks || quote_blocks) + list_blocks--; + break; + case LIST_tag: + case LIST_hang: + md_post_D1(n); + break; + + case LIST_column: + if (n->next == NULL) + break; + + /* Calculate the array index of the current column. */ + + i = 0; + while ((n = n->prev) != NULL && n->type != ROFFT_HEAD) + i++; + + /* + * If a width was specified for this column, + * subtract what printed, and + * add the same spacing as in mdoc_term.c. + */ + + nc = bln->norm->Bl.ncols; + i = i < nc ? strlen(bln->norm->Bl.cols[i]) - outcount + + (nc < 5 ? 4 : nc == 5 ? 3 : 1) : 1; + if (i < 1) + i = 1; + while (i-- > 0) + putchar(' '); + + outflags &= ~MD_spc; + escflags &= ~ESC_FON; + outcount = 0; + break; + + default: + break; + } +} + +static void +md_post_Lb(struct roff_node *n) +{ + if (n->sec == SEC_LIBRARY) + outflags |= MD_br; +} + +static void +md_uri(const char *s) +{ + while (*s != '\0') { + if (strchr("%()<>", *s) != NULL) { + printf("%%%2.2hhX", *s); + outcount += 3; + } else { + putchar(*s); + outcount++; + } + s++; + } +} + +static int +md_pre_Lk(struct roff_node *n) +{ + const struct roff_node *link, *descr, *punct; + + if ((link = n->child) == NULL) + return 0; + + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link text. */ + descr = link->next; + if (descr == punct) + descr = link; /* no text */ + md_rawword("["); + outflags &= ~MD_spc; + do { + md_word(descr->string); + descr = descr->next; + } while (descr != punct); + outflags &= ~MD_spc; + + /* Link target. */ + md_rawword("]("); + md_uri(link->string); + outflags &= ~MD_spc; + md_rawword(")"); + + /* Trailing punctuation. */ + while (punct != NULL) { + md_word(punct->string); + punct = punct->next; + } + return 0; +} + +static int +md_pre_Mt(struct roff_node *n) +{ + const struct roff_node *nch; + + md_rawword("["); + outflags &= ~MD_spc; + for (nch = n->child; nch != NULL; nch = nch->next) + md_word(nch->string); + outflags &= ~MD_spc; + md_rawword("](mailto:"); + for (nch = n->child; nch != NULL; nch = nch->next) { + md_uri(nch->string); + if (nch->next != NULL) { + putchar(' '); + outcount++; + } + } + outflags &= ~MD_spc; + md_rawword(")"); + return 0; +} + +static int +md_pre_Nd(struct roff_node *n) +{ + outflags &= ~MD_nl; + outflags |= MD_spc; + md_word("-"); + return 1; +} + +static int +md_pre_Nm(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + outflags |= MD_Bk; + md_pre_syn(n); + break; + case ROFFT_HEAD: + case ROFFT_ELEM: + md_pre_raw(n); + break; + default: + break; + } + return 1; +} + +static void +md_post_Nm(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + outflags &= ~MD_Bk; + break; + case ROFFT_HEAD: + case ROFFT_ELEM: + md_post_raw(n); + break; + default: + break; + } +} + +static int +md_pre_No(struct roff_node *n) +{ + outflags |= MD_spc_force; + return 1; +} + +static int +md_pre_Ns(struct roff_node *n) +{ + outflags &= ~MD_spc; + return 0; +} + +static void +md_post_Pf(struct roff_node *n) +{ + if (n->next != NULL && (n->next->flags & NODE_LINE) == 0) + outflags &= ~MD_spc; +} + +static int +md_pre_Pp(struct roff_node *n) +{ + outflags |= MD_sp; + return 0; +} + +static int +md_pre_Rs(struct roff_node *n) +{ + if (n->sec == SEC_SEE_ALSO) + outflags |= MD_sp; + return 1; +} + +static int +md_pre_Sh(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + if (n->sec == SEC_AUTHORS) + outflags &= ~(MD_An_split | MD_An_nosplit); + break; + case ROFFT_HEAD: + outflags |= MD_sp; + md_rawword(n->tok == MDOC_Sh ? "#" : "##"); + break; + case ROFFT_BODY: + outflags |= MD_sp; + break; + default: + break; + } + return 1; +} + +static int +md_pre_Sm(struct roff_node *n) +{ + if (n->child == NULL) + outflags ^= MD_Sm; + else if (strcmp("on", n->child->string) == 0) + outflags |= MD_Sm; + else + outflags &= ~MD_Sm; + + if (outflags & MD_Sm) + outflags |= MD_spc; + + return 0; +} + +static int +md_pre_Vt(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + md_pre_syn(n); + return 1; + case ROFFT_BODY: + case ROFFT_ELEM: + md_pre_raw(n); + return 1; + default: + return 0; + } +} + +static void +md_post_Vt(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BODY: + case ROFFT_ELEM: + md_post_raw(n); + break; + default: + break; + } +} + +static int +md_pre_Xr(struct roff_node *n) +{ + n = n->child; + if (n == NULL) + return 0; + md_node(n); + n = n->next; + if (n == NULL) + return 0; + outflags &= ~MD_spc; + md_word("("); + md_node(n); + md_word(")"); + return 0; +} + +static int +md_pre__T(struct roff_node *n) +{ + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) + md_word("\""); + else + md_rawword("*"); + outflags &= ~MD_spc; + return 1; +} + +static void +md_post__T(struct roff_node *n) +{ + outflags &= ~MD_spc; + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) + md_word("\""); + else + md_rawword("*"); + md_post_pc(n); +} + +static int +md_pre_br(struct roff_node *n) +{ + outflags |= MD_br; + return 0; +} diff --git a/usr.bin/mandoc/mdoc_state.c b/usr.bin/mandoc/mdoc_state.c new file mode 100644 index 0000000..954f709 --- /dev/null +++ b/usr.bin/mandoc/mdoc_state.c @@ -0,0 +1,254 @@ +/* $OpenBSD: mdoc_state.c,v 1.16 2020/01/19 17:59:01 schwarze Exp $ */ +/* + * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +#define STATE_ARGS struct roff_man *mdoc, struct roff_node *n + +typedef void (*state_handler)(STATE_ARGS); + +static void state_bl(STATE_ARGS); +static void state_sh(STATE_ARGS); +static void state_sm(STATE_ARGS); + +static const state_handler state_handlers[MDOC_MAX - MDOC_Dd] = { + NULL, /* Dd */ + NULL, /* Dt */ + NULL, /* Os */ + state_sh, /* Sh */ + NULL, /* Ss */ + NULL, /* Pp */ + NULL, /* D1 */ + NULL, /* Dl */ + NULL, /* Bd */ + NULL, /* Ed */ + state_bl, /* Bl */ + NULL, /* El */ + NULL, /* It */ + NULL, /* Ad */ + NULL, /* An */ + NULL, /* Ap */ + NULL, /* Ar */ + NULL, /* Cd */ + NULL, /* Cm */ + NULL, /* Dv */ + NULL, /* Er */ + NULL, /* Ev */ + NULL, /* Ex */ + NULL, /* Fa */ + NULL, /* Fd */ + NULL, /* Fl */ + NULL, /* Fn */ + NULL, /* Ft */ + NULL, /* Ic */ + NULL, /* In */ + NULL, /* Li */ + NULL, /* Nd */ + NULL, /* Nm */ + NULL, /* Op */ + NULL, /* Ot */ + NULL, /* Pa */ + NULL, /* Rv */ + NULL, /* St */ + NULL, /* Va */ + NULL, /* Vt */ + NULL, /* Xr */ + NULL, /* %A */ + NULL, /* %B */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + NULL, /* %N */ + NULL, /* %O */ + NULL, /* %P */ + NULL, /* %R */ + NULL, /* %T */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + NULL, /* Aq */ + NULL, /* At */ + NULL, /* Bc */ + NULL, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + NULL, /* Bsx */ + NULL, /* Bx */ + NULL, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + NULL, /* Em */ + NULL, /* Eo */ + NULL, /* Fx */ + NULL, /* Ms */ + NULL, /* No */ + NULL, /* Ns */ + NULL, /* Nx */ + NULL, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + NULL, /* Pq */ + NULL, /* Qc */ + NULL, /* Ql */ + NULL, /* Qo */ + NULL, /* Qq */ + NULL, /* Re */ + NULL, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + NULL, /* Sq */ + state_sm, /* Sm */ + NULL, /* Sx */ + NULL, /* Sy */ + NULL, /* Tn */ + NULL, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + NULL, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + NULL, /* Bk */ + NULL, /* Ek */ + NULL, /* Bt */ + NULL, /* Hf */ + NULL, /* Fr */ + NULL, /* Ud */ + NULL, /* Lb */ + NULL, /* Lp */ + NULL, /* Lk */ + NULL, /* Mt */ + NULL, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + NULL, /* Es */ + NULL, /* En */ + NULL, /* Dx */ + NULL, /* %Q */ + NULL, /* %U */ + NULL, /* Ta */ + NULL, /* Tg */ +}; + + +void +mdoc_state(struct roff_man *mdoc, struct roff_node *n) +{ + state_handler handler; + + if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX) + return; + + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + if ((mdoc_macro(n->tok)->flags & MDOC_PROLOGUE) == 0) + mdoc->flags |= MDOC_PBODY; + + handler = state_handlers[n->tok - MDOC_Dd]; + if (*handler) + (*handler)(mdoc, n); +} + +static void +state_bl(STATE_ARGS) +{ + struct mdoc_arg *args; + size_t i; + + if (n->type != ROFFT_HEAD || n->parent->args == NULL) + return; + + args = n->parent->args; + for (i = 0; i < args->argc; i++) { + switch(args->argv[i].arg) { + case MDOC_Diag: + n->norm->Bl.type = LIST_diag; + return; + case MDOC_Column: + n->norm->Bl.type = LIST_column; + return; + default: + break; + } + } +} + +static void +state_sh(STATE_ARGS) +{ + struct roff_node *nch; + char *secname; + + if (n->type != ROFFT_HEAD) + return; + + if ( ! (n->flags & NODE_VALID)) { + secname = NULL; + deroff(&secname, n); + + /* + * Set the section attribute for the BLOCK, HEAD, + * and HEAD children; the latter can only be TEXT + * nodes, so no recursion is needed. For other + * nodes, including the .Sh BODY, this is done + * when allocating the node data structures, but + * for .Sh BLOCK and HEAD, the section is still + * unknown at that time. + */ + + n->sec = n->parent->sec = secname == NULL ? + SEC_CUSTOM : mdoc_a2sec(secname); + for (nch = n->child; nch != NULL; nch = nch->next) + nch->sec = n->sec; + free(secname); + } + + if ((mdoc->lastsec = n->sec) == SEC_SYNOPSIS) { + roff_setreg(mdoc->roff, "nS", 1, '='); + mdoc->flags |= MDOC_SYNOPSIS; + } else { + roff_setreg(mdoc->roff, "nS", 0, '='); + mdoc->flags &= ~MDOC_SYNOPSIS; + } +} + +static void +state_sm(STATE_ARGS) +{ + + if (n->child == NULL) + mdoc->flags ^= MDOC_SMOFF; + else if ( ! strcmp(n->child->string, "on")) + mdoc->flags &= ~MDOC_SMOFF; + else if ( ! strcmp(n->child->string, "off")) + mdoc->flags |= MDOC_SMOFF; +} diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c new file mode 100644 index 0000000..6362a21 --- /dev/null +++ b/usr.bin/mandoc/mdoc_term.c @@ -0,0 +1,1962 @@ +/* $OpenBSD: mdoc_term.c,v 1.279 2020/04/06 09:55:49 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Plain text formatter for mdoc(7), used by mandoc(1) + * for ASCII, UTF-8, PostScript, and PDF output. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "roff.h" +#include "mdoc.h" +#include "out.h" +#include "term.h" +#include "term_tag.h" +#include "main.h" + +struct termpair { + struct termpair *ppair; + int count; +}; + +#define DECL_ARGS struct termp *p, \ + struct termpair *pair, \ + const struct roff_meta *meta, \ + struct roff_node *n + +struct mdoc_term_act { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); +}; + +static int a2width(const struct termp *, const char *); + +static void print_bvspace(struct termp *, + struct roff_node *, struct roff_node *); +static void print_mdoc_node(DECL_ARGS); +static void print_mdoc_nodelist(DECL_ARGS); +static void print_mdoc_head(struct termp *, const struct roff_meta *); +static void print_mdoc_foot(struct termp *, const struct roff_meta *); +static void synopsis_pre(struct termp *, struct roff_node *); + +static void termp____post(DECL_ARGS); +static void termp__t_post(DECL_ARGS); +static void termp_bd_post(DECL_ARGS); +static void termp_bk_post(DECL_ARGS); +static void termp_bl_post(DECL_ARGS); +static void termp_eo_post(DECL_ARGS); +static void termp_fd_post(DECL_ARGS); +static void termp_fo_post(DECL_ARGS); +static void termp_in_post(DECL_ARGS); +static void termp_it_post(DECL_ARGS); +static void termp_lb_post(DECL_ARGS); +static void termp_nm_post(DECL_ARGS); +static void termp_pf_post(DECL_ARGS); +static void termp_quote_post(DECL_ARGS); +static void termp_sh_post(DECL_ARGS); +static void termp_ss_post(DECL_ARGS); +static void termp_xx_post(DECL_ARGS); + +static int termp__a_pre(DECL_ARGS); +static int termp__t_pre(DECL_ARGS); +static int termp_abort_pre(DECL_ARGS); +static int termp_an_pre(DECL_ARGS); +static int termp_ap_pre(DECL_ARGS); +static int termp_bd_pre(DECL_ARGS); +static int termp_bf_pre(DECL_ARGS); +static int termp_bk_pre(DECL_ARGS); +static int termp_bl_pre(DECL_ARGS); +static int termp_bold_pre(DECL_ARGS); +static int termp_d1_pre(DECL_ARGS); +static int termp_eo_pre(DECL_ARGS); +static int termp_ex_pre(DECL_ARGS); +static int termp_fa_pre(DECL_ARGS); +static int termp_fd_pre(DECL_ARGS); +static int termp_fl_pre(DECL_ARGS); +static int termp_fn_pre(DECL_ARGS); +static int termp_fo_pre(DECL_ARGS); +static int termp_ft_pre(DECL_ARGS); +static int termp_in_pre(DECL_ARGS); +static int termp_it_pre(DECL_ARGS); +static int termp_li_pre(DECL_ARGS); +static int termp_lk_pre(DECL_ARGS); +static int termp_nd_pre(DECL_ARGS); +static int termp_nm_pre(DECL_ARGS); +static int termp_ns_pre(DECL_ARGS); +static int termp_quote_pre(DECL_ARGS); +static int termp_rs_pre(DECL_ARGS); +static int termp_sh_pre(DECL_ARGS); +static int termp_skip_pre(DECL_ARGS); +static int termp_sm_pre(DECL_ARGS); +static int termp_pp_pre(DECL_ARGS); +static int termp_ss_pre(DECL_ARGS); +static int termp_under_pre(DECL_ARGS); +static int termp_vt_pre(DECL_ARGS); +static int termp_xr_pre(DECL_ARGS); +static int termp_xx_pre(DECL_ARGS); + +static const struct mdoc_term_act mdoc_term_acts[MDOC_MAX - MDOC_Dd] = { + { NULL, NULL }, /* Dd */ + { NULL, NULL }, /* Dt */ + { NULL, NULL }, /* Os */ + { termp_sh_pre, termp_sh_post }, /* Sh */ + { termp_ss_pre, termp_ss_post }, /* Ss */ + { termp_pp_pre, NULL }, /* Pp */ + { termp_d1_pre, termp_bl_post }, /* D1 */ + { termp_d1_pre, termp_bl_post }, /* Dl */ + { termp_bd_pre, termp_bd_post }, /* Bd */ + { NULL, NULL }, /* Ed */ + { termp_bl_pre, termp_bl_post }, /* Bl */ + { NULL, NULL }, /* El */ + { termp_it_pre, termp_it_post }, /* It */ + { termp_under_pre, NULL }, /* Ad */ + { termp_an_pre, NULL }, /* An */ + { termp_ap_pre, NULL }, /* Ap */ + { termp_under_pre, NULL }, /* Ar */ + { termp_fd_pre, NULL }, /* Cd */ + { termp_bold_pre, NULL }, /* Cm */ + { termp_li_pre, NULL }, /* Dv */ + { NULL, NULL }, /* Er */ + { NULL, NULL }, /* Ev */ + { termp_ex_pre, NULL }, /* Ex */ + { termp_fa_pre, NULL }, /* Fa */ + { termp_fd_pre, termp_fd_post }, /* Fd */ + { termp_fl_pre, NULL }, /* Fl */ + { termp_fn_pre, NULL }, /* Fn */ + { termp_ft_pre, NULL }, /* Ft */ + { termp_bold_pre, NULL }, /* Ic */ + { termp_in_pre, termp_in_post }, /* In */ + { termp_li_pre, NULL }, /* Li */ + { termp_nd_pre, NULL }, /* Nd */ + { termp_nm_pre, termp_nm_post }, /* Nm */ + { termp_quote_pre, termp_quote_post }, /* Op */ + { termp_abort_pre, NULL }, /* Ot */ + { termp_under_pre, NULL }, /* Pa */ + { termp_ex_pre, NULL }, /* Rv */ + { NULL, NULL }, /* St */ + { termp_under_pre, NULL }, /* Va */ + { termp_vt_pre, NULL }, /* Vt */ + { termp_xr_pre, NULL }, /* Xr */ + { termp__a_pre, termp____post }, /* %A */ + { termp_under_pre, termp____post }, /* %B */ + { NULL, termp____post }, /* %D */ + { termp_under_pre, termp____post }, /* %I */ + { termp_under_pre, termp____post }, /* %J */ + { NULL, termp____post }, /* %N */ + { NULL, termp____post }, /* %O */ + { NULL, termp____post }, /* %P */ + { NULL, termp____post }, /* %R */ + { termp__t_pre, termp__t_post }, /* %T */ + { NULL, termp____post }, /* %V */ + { NULL, NULL }, /* Ac */ + { termp_quote_pre, termp_quote_post }, /* Ao */ + { termp_quote_pre, termp_quote_post }, /* Aq */ + { NULL, NULL }, /* At */ + { NULL, NULL }, /* Bc */ + { termp_bf_pre, NULL }, /* Bf */ + { termp_quote_pre, termp_quote_post }, /* Bo */ + { termp_quote_pre, termp_quote_post }, /* Bq */ + { termp_xx_pre, termp_xx_post }, /* Bsx */ + { NULL, NULL }, /* Bx */ + { termp_skip_pre, NULL }, /* Db */ + { NULL, NULL }, /* Dc */ + { termp_quote_pre, termp_quote_post }, /* Do */ + { termp_quote_pre, termp_quote_post }, /* Dq */ + { NULL, NULL }, /* Ec */ /* FIXME: no space */ + { NULL, NULL }, /* Ef */ + { termp_under_pre, NULL }, /* Em */ + { termp_eo_pre, termp_eo_post }, /* Eo */ + { termp_xx_pre, termp_xx_post }, /* Fx */ + { termp_bold_pre, NULL }, /* Ms */ + { termp_li_pre, NULL }, /* No */ + { termp_ns_pre, NULL }, /* Ns */ + { termp_xx_pre, termp_xx_post }, /* Nx */ + { termp_xx_pre, termp_xx_post }, /* Ox */ + { NULL, NULL }, /* Pc */ + { NULL, termp_pf_post }, /* Pf */ + { termp_quote_pre, termp_quote_post }, /* Po */ + { termp_quote_pre, termp_quote_post }, /* Pq */ + { NULL, NULL }, /* Qc */ + { termp_quote_pre, termp_quote_post }, /* Ql */ + { termp_quote_pre, termp_quote_post }, /* Qo */ + { termp_quote_pre, termp_quote_post }, /* Qq */ + { NULL, NULL }, /* Re */ + { termp_rs_pre, NULL }, /* Rs */ + { NULL, NULL }, /* Sc */ + { termp_quote_pre, termp_quote_post }, /* So */ + { termp_quote_pre, termp_quote_post }, /* Sq */ + { termp_sm_pre, NULL }, /* Sm */ + { termp_under_pre, NULL }, /* Sx */ + { termp_bold_pre, NULL }, /* Sy */ + { NULL, NULL }, /* Tn */ + { termp_xx_pre, termp_xx_post }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { termp_fo_pre, termp_fo_post }, /* Fo */ + { NULL, NULL }, /* Fc */ + { termp_quote_pre, termp_quote_post }, /* Oo */ + { NULL, NULL }, /* Oc */ + { termp_bk_pre, termp_bk_post }, /* Bk */ + { NULL, NULL }, /* Ek */ + { NULL, NULL }, /* Bt */ + { NULL, NULL }, /* Hf */ + { termp_under_pre, NULL }, /* Fr */ + { NULL, NULL }, /* Ud */ + { NULL, termp_lb_post }, /* Lb */ + { termp_abort_pre, NULL }, /* Lp */ + { termp_lk_pre, NULL }, /* Lk */ + { termp_under_pre, NULL }, /* Mt */ + { termp_quote_pre, termp_quote_post }, /* Brq */ + { termp_quote_pre, termp_quote_post }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, termp____post }, /* %C */ + { termp_skip_pre, NULL }, /* Es */ + { termp_quote_pre, termp_quote_post }, /* En */ + { termp_xx_pre, termp_xx_post }, /* Dx */ + { NULL, termp____post }, /* %Q */ + { NULL, termp____post }, /* %U */ + { NULL, NULL }, /* Ta */ + { termp_skip_pre, NULL }, /* Tg */ +}; + + +void +terminal_mdoc(void *arg, const struct roff_meta *mdoc) +{ + struct roff_node *n, *nn; + struct termp *p; + size_t save_defindent; + + p = (struct termp *)arg; + p->tcol->rmargin = p->maxrmargin = p->defrmargin; + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); + + n = mdoc->first->child; + if (p->synopsisonly) { + for (nn = NULL; n != NULL; n = n->next) { + if (n->tok != MDOC_Sh) + continue; + if (n->sec == SEC_SYNOPSIS) + break; + if (nn == NULL && n->sec == SEC_NAME) + nn = n; + } + if (n == NULL) + n = nn; + p->flags |= TERMP_NOSPACE; + if (n != NULL && (n = n->child->next->child) != NULL) + print_mdoc_nodelist(p, NULL, mdoc, n); + term_newln(p); + } else { + save_defindent = p->defindent; + if (p->defindent == 0) + p->defindent = 5; + term_begin(p, print_mdoc_head, print_mdoc_foot, mdoc); + while (n != NULL && + (n->type == ROFFT_COMMENT || + n->flags & NODE_NOPRT)) + n = n->next; + if (n != NULL) { + if (n->tok != MDOC_Sh) + term_vspace(p); + print_mdoc_nodelist(p, NULL, mdoc, n); + } + term_end(p); + p->defindent = save_defindent; + } +} + +static void +print_mdoc_nodelist(DECL_ARGS) +{ + while (n != NULL) { + print_mdoc_node(p, pair, meta, n); + n = n->next; + } +} + +static void +print_mdoc_node(DECL_ARGS) +{ + const struct mdoc_term_act *act; + struct termpair npair; + size_t offset, rmargin; + int chld; + + /* + * In no-fill mode, break the output line at the beginning + * of new input lines except after \c, and nowhere else. + */ + + if (n->flags & NODE_NOFILL) { + if (n->flags & NODE_LINE && + (p->flags & TERMP_NONEWLINE) == 0) + term_newln(p); + p->flags |= TERMP_BRNEVER; + } else + p->flags &= ~TERMP_BRNEVER; + + if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) + return; + + chld = 1; + offset = p->tcol->offset; + rmargin = p->tcol->rmargin; + n->flags &= ~NODE_ENDED; + n->prev_font = p->fonti; + + memset(&npair, 0, sizeof(struct termpair)); + npair.ppair = pair; + + if (n->flags & NODE_ID && n->tok != MDOC_Pp && + (n->tok != MDOC_It || n->type != ROFFT_BLOCK)) + term_tag_write(n, p->line); + + /* + * Keeps only work until the end of a line. If a keep was + * invoked in a prior line, revert it to PREKEEP. + */ + + if (p->flags & TERMP_KEEP && n->flags & NODE_LINE) { + p->flags &= ~TERMP_KEEP; + p->flags |= TERMP_PREKEEP; + } + + /* + * After the keep flags have been set up, we may now + * produce output. Note that some pre-handlers do so. + */ + + act = NULL; + switch (n->type) { + case ROFFT_TEXT: + if (n->flags & NODE_LINE) { + switch (*n->string) { + case '\0': + if (p->flags & TERMP_NONEWLINE) + term_newln(p); + else + term_vspace(p); + return; + case ' ': + if ((p->flags & TERMP_NONEWLINE) == 0) + term_newln(p); + break; + default: + break; + } + } + if (NODE_DELIMC & n->flags) + p->flags |= TERMP_NOSPACE; + term_word(p, n->string); + if (NODE_DELIMO & n->flags) + p->flags |= TERMP_NOSPACE; + break; + case ROFFT_EQN: + if ( ! (n->flags & NODE_LINE)) + p->flags |= TERMP_NOSPACE; + term_eqn(p, n->eqn); + if (n->next != NULL && ! (n->next->flags & NODE_LINE)) + p->flags |= TERMP_NOSPACE; + break; + case ROFFT_TBL: + if (p->tbl.cols == NULL) + term_newln(p); + term_tbl(p, n->span); + break; + default: + if (n->tok < ROFF_MAX) { + roff_term_pre(p, n); + return; + } + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + act = mdoc_term_acts + (n->tok - MDOC_Dd); + if (act->pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) + chld = (*act->pre)(p, &npair, meta, n); + break; + } + + if (chld && n->child) + print_mdoc_nodelist(p, &npair, meta, n->child); + + term_fontpopq(p, + (ENDBODY_NOT == n->end ? n : n->body)->prev_font); + + switch (n->type) { + case ROFFT_TEXT: + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + break; + default: + if (act->post == NULL || n->flags & NODE_ENDED) + break; + (void)(*act->post)(p, &npair, meta, n); + + /* + * Explicit end tokens not only call the post + * handler, but also tell the respective block + * that it must not call the post handler again. + */ + if (ENDBODY_NOT != n->end) + n->body->flags |= NODE_ENDED; + break; + } + + if (NODE_EOS & n->flags) + p->flags |= TERMP_SENTENCE; + + if (n->type != ROFFT_TEXT) + p->tcol->offset = offset; + p->tcol->rmargin = rmargin; +} + +static void +print_mdoc_foot(struct termp *p, const struct roff_meta *meta) +{ + size_t sz; + + term_fontrepl(p, TERMFONT_NONE); + + /* + * Output the footer in new-groff style, that is, three columns + * with the middle being the manual date and flanking columns + * being the operating system: + * + * SYSTEM DATE SYSTEM + */ + + term_vspace(p); + + p->tcol->offset = 0; + sz = term_strlen(p, meta->date); + p->tcol->rmargin = p->maxrmargin > sz ? + (p->maxrmargin + term_len(p, 1) - sz) / 2 : 0; + p->trailspace = 1; + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + + term_word(p, meta->os); + term_flushln(p); + + p->tcol->offset = p->tcol->rmargin; + sz = term_strlen(p, meta->os); + p->tcol->rmargin = p->maxrmargin > sz ? p->maxrmargin - sz : 0; + p->flags |= TERMP_NOSPACE; + + term_word(p, meta->date); + term_flushln(p); + + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOSPACE; + + term_word(p, meta->os); + term_flushln(p); + + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin; + p->flags = 0; +} + +static void +print_mdoc_head(struct termp *p, const struct roff_meta *meta) +{ + char *volume, *title; + size_t vollen, titlen; + + /* + * The header is strange. It has three components, which are + * really two with the first duplicated. It goes like this: + * + * IDENTIFIER TITLE IDENTIFIER + * + * The IDENTIFIER is NAME(SECTION), which is the command-name + * (if given, or "unknown" if not) followed by the manual page + * section. These are given in `Dt'. The TITLE is a free-form + * string depending on the manual volume. If not specified, it + * switches on the manual section. + */ + + assert(meta->vol); + if (NULL == meta->arch) + volume = mandoc_strdup(meta->vol); + else + mandoc_asprintf(&volume, "%s (%s)", + meta->vol, meta->arch); + vollen = term_strlen(p, volume); + + if (NULL == meta->msec) + title = mandoc_strdup(meta->title); + else + mandoc_asprintf(&title, "%s(%s)", + meta->title, meta->msec); + titlen = term_strlen(p, title); + + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + p->trailspace = 1; + p->tcol->offset = 0; + p->tcol->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? + (p->maxrmargin - vollen + term_len(p, 1)) / 2 : + vollen < p->maxrmargin ? p->maxrmargin - vollen : 0; + + term_word(p, title); + term_flushln(p); + + p->flags |= TERMP_NOSPACE; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + vollen + titlen < + p->maxrmargin ? p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, volume); + term_flushln(p); + + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + if (p->tcol->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOSPACE; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->flags &= ~TERMP_NOSPACE; + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin; + free(title); + free(volume); +} + +static int +a2width(const struct termp *p, const char *v) +{ + struct roffsu su; + const char *end; + + end = a2roffsu(v, &su, SCALE_MAX); + if (end == NULL || *end != '\0') { + SCALE_HS_INIT(&su, term_strlen(p, v)); + su.scale /= term_strlen(p, "0"); + } + return term_hen(p, &su); +} + +/* + * Determine how much space to print out before block elements of `It' + * (and thus `Bl') and `Bd'. And then go ahead and print that space, + * too. + */ +static void +print_bvspace(struct termp *p, struct roff_node *bl, struct roff_node *n) +{ + struct roff_node *nn; + + term_newln(p); + + if ((bl->tok == MDOC_Bd && bl->norm->Bd.comp) || + (bl->tok == MDOC_Bl && bl->norm->Bl.comp)) + return; + + /* Do not vspace directly after Ss/Sh. */ + + nn = n; + while (roff_node_prev(nn) == NULL) { + do { + nn = nn->parent; + if (nn->type == ROFFT_ROOT) + return; + } while (nn->type != ROFFT_BLOCK); + if (nn->tok == MDOC_Sh || nn->tok == MDOC_Ss) + return; + if (nn->tok == MDOC_It && + nn->parent->parent->norm->Bl.type != LIST_item) + break; + } + + /* + * No vertical space after: + * items in .Bl -column + * items without a body in .Bl -diag + */ + + if (bl->tok != MDOC_Bl || + n->prev == NULL || n->prev->tok != MDOC_It || + (bl->norm->Bl.type != LIST_column && + (bl->norm->Bl.type != LIST_diag || + n->prev->body->child != NULL))) + term_vspace(p); +} + + +static int +termp_it_pre(DECL_ARGS) +{ + struct roffsu su; + char buf[24]; + const struct roff_node *bl, *nn; + size_t ncols, dcol; + int i, offset, width; + enum mdoc_list type; + + if (n->type == ROFFT_BLOCK) { + print_bvspace(p, n->parent->parent, n); + if (n->flags & NODE_ID) + term_tag_write(n, p->line); + return 1; + } + + bl = n->parent->parent->parent; + type = bl->norm->Bl.type; + + /* + * Defaults for specific list types. + */ + + switch (type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + width = term_len(p, 2); + break; + case LIST_hang: + case LIST_tag: + width = term_len(p, 8); + break; + case LIST_column: + width = term_len(p, 10); + break; + default: + width = 0; + break; + } + offset = 0; + + /* + * First calculate width and offset. This is pretty easy unless + * we're a -column list, in which case all prior columns must + * be accounted for. + */ + + if (bl->norm->Bl.offs != NULL) { + offset = a2width(p, bl->norm->Bl.offs); + if (offset < 0 && (size_t)(-offset) > p->tcol->offset) + offset = -p->tcol->offset; + else if (offset > SHRT_MAX) + offset = 0; + } + + switch (type) { + case LIST_column: + if (n->type == ROFFT_HEAD) + break; + + /* + * Imitate groff's column handling: + * - For each earlier column, add its width. + * - For less than 5 columns, add four more blanks per + * column. + * - For exactly 5 columns, add three more blank per + * column. + * - For more than 5 columns, add only one column. + */ + ncols = bl->norm->Bl.ncols; + dcol = ncols < 5 ? term_len(p, 4) : + ncols == 5 ? term_len(p, 3) : term_len(p, 1); + + /* + * Calculate the offset by applying all prior ROFFT_BODY, + * so we stop at the ROFFT_HEAD (nn->prev == NULL). + */ + + for (i = 0, nn = n->prev; + nn->prev && i < (int)ncols; + nn = nn->prev, i++) { + SCALE_HS_INIT(&su, + term_strlen(p, bl->norm->Bl.cols[i])); + su.scale /= term_strlen(p, "0"); + offset += term_hen(p, &su) + dcol; + } + + /* + * When exceeding the declared number of columns, leave + * the remaining widths at 0. This will later be + * adjusted to the default width of 10, or, for the last + * column, stretched to the right margin. + */ + if (i >= (int)ncols) + break; + + /* + * Use the declared column widths, extended as explained + * in the preceding paragraph. + */ + SCALE_HS_INIT(&su, term_strlen(p, bl->norm->Bl.cols[i])); + su.scale /= term_strlen(p, "0"); + width = term_hen(p, &su) + dcol; + break; + default: + if (NULL == bl->norm->Bl.width) + break; + + /* + * Note: buffer the width by 2, which is groff's magic + * number for buffering single arguments. See the above + * handling for column for how this changes. + */ + width = a2width(p, bl->norm->Bl.width) + term_len(p, 2); + if (width < 0 && (size_t)(-width) > p->tcol->offset) + width = -p->tcol->offset; + else if (width > SHRT_MAX) + width = 0; + break; + } + + /* + * Whitespace control. Inset bodies need an initial space, + * while diagonal bodies need two. + */ + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case LIST_diag: + if (n->type == ROFFT_BODY) + term_word(p, "\\ \\ "); + break; + case LIST_inset: + if (n->type == ROFFT_BODY && n->parent->head->child != NULL) + term_word(p, "\\ "); + break; + default: + break; + } + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case LIST_diag: + if (n->type == ROFFT_HEAD) + term_fontpush(p, TERMFONT_BOLD); + break; + default: + break; + } + + /* + * Pad and break control. This is the tricky part. These flags + * are documented in term_flushln() in term.c. Note that we're + * going to unset all of these flags in termp_it_post() when we + * exit. + */ + + switch (type) { + case LIST_enum: + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + if (n->type == ROFFT_HEAD) { + p->flags |= TERMP_NOBREAK | TERMP_HANG; + p->trailspace = 1; + } else if (width <= (int)term_len(p, 2)) + p->flags |= TERMP_NOPAD; + break; + case LIST_hang: + if (n->type != ROFFT_HEAD) + break; + p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; + p->trailspace = 1; + break; + case LIST_tag: + if (n->type != ROFFT_HEAD) + break; + + p->flags |= TERMP_NOBREAK | TERMP_BRTRSP | TERMP_BRIND; + p->trailspace = 2; + + if (NULL == n->next || NULL == n->next->child) + p->flags |= TERMP_HANG; + break; + case LIST_column: + if (n->type == ROFFT_HEAD) + break; + + if (NULL == n->next) { + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + } else { + p->flags |= TERMP_NOBREAK; + p->trailspace = 1; + } + + break; + case LIST_diag: + if (n->type != ROFFT_HEAD) + break; + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + p->trailspace = 1; + break; + default: + break; + } + + /* + * Margin control. Set-head-width lists have their right + * margins shortened. The body for these lists has the offset + * necessarily lengthened. Everybody gets the offset. + */ + + p->tcol->offset += offset; + + switch (type) { + case LIST_bullet: + case LIST_dash: + case LIST_enum: + case LIST_hyphen: + case LIST_hang: + case LIST_tag: + if (n->type == ROFFT_HEAD) + p->tcol->rmargin = p->tcol->offset + width; + else + p->tcol->offset += width; + break; + case LIST_column: + assert(width); + p->tcol->rmargin = p->tcol->offset + width; + /* + * XXX - this behaviour is not documented: the + * right-most column is filled to the right margin. + */ + if (n->type == ROFFT_HEAD) + break; + if (n->next == NULL && p->tcol->rmargin < p->maxrmargin) + p->tcol->rmargin = p->maxrmargin; + break; + default: + break; + } + + /* + * The dash, hyphen, bullet and enum lists all have a special + * HEAD character (temporarily bold, in some cases). + */ + + if (n->type == ROFFT_HEAD) + switch (type) { + case LIST_bullet: + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\[bu]"); + term_fontpop(p); + break; + case LIST_dash: + case LIST_hyphen: + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "-"); + term_fontpop(p); + break; + case LIST_enum: + (pair->ppair->ppair->count)++; + (void)snprintf(buf, sizeof(buf), "%d.", + pair->ppair->ppair->count); + term_word(p, buf); + break; + default: + break; + } + + /* + * If we're not going to process our children, indicate so here. + */ + + switch (type) { + case LIST_bullet: + case LIST_item: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + if (n->type == ROFFT_HEAD) + return 0; + break; + case LIST_column: + if (n->type == ROFFT_HEAD) + return 0; + p->minbl = 0; + break; + default: + break; + } + + return 1; +} + +static void +termp_it_post(DECL_ARGS) +{ + enum mdoc_list type; + + if (n->type == ROFFT_BLOCK) + return; + + type = n->parent->parent->parent->norm->Bl.type; + + switch (type) { + case LIST_item: + case LIST_diag: + case LIST_inset: + if (n->type == ROFFT_BODY) + term_newln(p); + break; + case LIST_column: + if (n->type == ROFFT_BODY) + term_flushln(p); + break; + default: + term_newln(p); + break; + } + + /* + * Now that our output is flushed, we can reset our tags. Since + * only `It' sets these flags, we're free to assume that nobody + * has munged them in the meanwhile. + */ + + p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP | TERMP_BRIND | TERMP_HANG); + p->trailspace = 0; +} + +static int +termp_nm_pre(DECL_ARGS) +{ + const char *cp; + + if (n->type == ROFFT_BLOCK) { + p->flags |= TERMP_PREKEEP; + return 1; + } + + if (n->type == ROFFT_BODY) { + if (n->child == NULL) + return 0; + p->flags |= TERMP_NOSPACE; + cp = NULL; + if (n->prev->child != NULL) + cp = n->prev->child->string; + if (cp == NULL) + cp = meta->name; + if (cp == NULL) + p->tcol->offset += term_len(p, 6); + else + p->tcol->offset += term_len(p, 1) + + term_strlen(p, cp); + return 1; + } + + if (n->child == NULL) + return 0; + + if (n->type == ROFFT_HEAD) + synopsis_pre(p, n->parent); + + if (n->type == ROFFT_HEAD && + n->next != NULL && n->next->child != NULL) { + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK | TERMP_BRIND; + p->trailspace = 1; + p->tcol->rmargin = p->tcol->offset + term_len(p, 1); + if (n->child == NULL) + p->tcol->rmargin += term_strlen(p, meta->name); + else if (n->child->type == ROFFT_TEXT) { + p->tcol->rmargin += term_strlen(p, n->child->string); + if (n->child->next != NULL) + p->flags |= TERMP_HANG; + } else { + p->tcol->rmargin += term_len(p, 5); + p->flags |= TERMP_HANG; + } + } + return termp_bold_pre(p, pair, meta, n); +} + +static void +termp_nm_post(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); + break; + case ROFFT_HEAD: + if (n->next == NULL || n->next->child == NULL) + break; + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); + p->trailspace = 0; + break; + case ROFFT_BODY: + if (n->child != NULL) + term_flushln(p); + break; + default: + break; + } +} + +static int +termp_fl_pre(DECL_ARGS) +{ + struct roff_node *nn; + + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\-"); + + if (n->child != NULL || + ((nn = roff_node_next(n)) != NULL && + nn->type != ROFFT_TEXT && + (nn->flags & NODE_LINE) == 0)) + p->flags |= TERMP_NOSPACE; + + return 1; +} + +static int +termp__a_pre(DECL_ARGS) +{ + struct roff_node *nn; + + if ((nn = roff_node_prev(n)) != NULL && nn->tok == MDOC__A && + ((nn = roff_node_next(n)) == NULL || nn->tok != MDOC__A)) + term_word(p, "and"); + + return 1; +} + +static int +termp_an_pre(DECL_ARGS) +{ + + if (n->norm->An.auth == AUTH_split) { + p->flags &= ~TERMP_NOSPLIT; + p->flags |= TERMP_SPLIT; + return 0; + } + if (n->norm->An.auth == AUTH_nosplit) { + p->flags &= ~TERMP_SPLIT; + p->flags |= TERMP_NOSPLIT; + return 0; + } + + if (p->flags & TERMP_SPLIT) + term_newln(p); + + if (n->sec == SEC_AUTHORS && ! (p->flags & TERMP_NOSPLIT)) + p->flags |= TERMP_SPLIT; + + return 1; +} + +static int +termp_ns_pre(DECL_ARGS) +{ + + if ( ! (NODE_LINE & n->flags)) + p->flags |= TERMP_NOSPACE; + return 1; +} + +static int +termp_rs_pre(DECL_ARGS) +{ + if (SEC_SEE_ALSO != n->sec) + return 1; + if (n->type == ROFFT_BLOCK && roff_node_prev(n) != NULL) + term_vspace(p); + return 1; +} + +static int +termp_ex_pre(DECL_ARGS) +{ + term_newln(p); + return 1; +} + +static int +termp_nd_pre(DECL_ARGS) +{ + if (n->type == ROFFT_BODY) + term_word(p, "\\(en"); + return 1; +} + +static int +termp_bl_pre(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + term_newln(p); + return 1; + case ROFFT_HEAD: + return 0; + default: + return 1; + } +} + +static void +termp_bl_post(DECL_ARGS) +{ + if (n->type != ROFFT_BLOCK) + return; + term_newln(p); + if (n->tok != MDOC_Bl || n->norm->Bl.type != LIST_column) + return; + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); +} + +static int +termp_xr_pre(DECL_ARGS) +{ + if (NULL == (n = n->child)) + return 0; + + assert(n->type == ROFFT_TEXT); + term_word(p, n->string); + + if (NULL == (n = n->next)) + return 0; + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + + assert(n->type == ROFFT_TEXT); + term_word(p, n->string); + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + return 0; +} + +/* + * This decides how to assert whitespace before any of the SYNOPSIS set + * of macros (which, as in the case of Ft/Fo and Ft/Fn, may contain + * macro combos). + */ +static void +synopsis_pre(struct termp *p, struct roff_node *n) +{ + struct roff_node *np; + + if ((n->flags & NODE_SYNPRETTY) == 0 || + (np = roff_node_prev(n)) == NULL) + return; + + /* + * If we're the second in a pair of like elements, emit our + * newline and return. UNLESS we're `Fo', `Fn', `Fn', in which + * case we soldier on. + */ + if (np->tok == n->tok && + MDOC_Ft != n->tok && + MDOC_Fo != n->tok && + MDOC_Fn != n->tok) { + term_newln(p); + return; + } + + /* + * If we're one of the SYNOPSIS set and non-like pair-wise after + * another (or Fn/Fo, which we've let slip through) then assert + * vertical space, else only newline and move on. + */ + switch (np->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + term_vspace(p); + break; + case MDOC_Ft: + if (n->tok != MDOC_Fn && n->tok != MDOC_Fo) { + term_vspace(p); + break; + } + /* FALLTHROUGH */ + default: + term_newln(p); + break; + } +} + +static int +termp_vt_pre(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_ELEM: + return termp_ft_pre(p, pair, meta, n); + case ROFFT_BLOCK: + synopsis_pre(p, n); + return 1; + case ROFFT_HEAD: + return 0; + default: + return termp_under_pre(p, pair, meta, n); + } +} + +static int +termp_bold_pre(DECL_ARGS) +{ + term_fontpush(p, TERMFONT_BOLD); + return 1; +} + +static int +termp_fd_pre(DECL_ARGS) +{ + synopsis_pre(p, n); + return termp_bold_pre(p, pair, meta, n); +} + +static void +termp_fd_post(DECL_ARGS) +{ + term_newln(p); +} + +static int +termp_sh_pre(DECL_ARGS) +{ + struct roff_node *np; + + switch (n->type) { + case ROFFT_BLOCK: + /* + * Vertical space before sections, except + * when the previous section was empty. + */ + if ((np = roff_node_prev(n)) == NULL || + np->tok != MDOC_Sh || + (np->body != NULL && np->body->child != NULL)) + term_vspace(p); + break; + case ROFFT_HEAD: + return termp_bold_pre(p, pair, meta, n); + case ROFFT_BODY: + p->tcol->offset = term_len(p, p->defindent); + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); + if (n->sec == SEC_AUTHORS) + p->flags &= ~(TERMP_SPLIT|TERMP_NOSPLIT); + break; + default: + break; + } + return 1; +} + +static void +termp_sh_post(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_HEAD: + term_newln(p); + break; + case ROFFT_BODY: + term_newln(p); + p->tcol->offset = 0; + break; + default: + break; + } +} + +static void +termp_lb_post(DECL_ARGS) +{ + if (n->sec == SEC_LIBRARY && n->flags & NODE_LINE) + term_newln(p); +} + +static int +termp_d1_pre(DECL_ARGS) +{ + if (n->type != ROFFT_BLOCK) + return 1; + term_newln(p); + p->tcol->offset += term_len(p, p->defindent + 1); + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); + return 1; +} + +static int +termp_ft_pre(DECL_ARGS) +{ + synopsis_pre(p, n); + return termp_under_pre(p, pair, meta, n); +} + +static int +termp_fn_pre(DECL_ARGS) +{ + size_t rmargin = 0; + int pretty; + + synopsis_pre(p, n); + pretty = n->flags & NODE_SYNPRETTY; + if ((n = n->child) == NULL) + return 0; + + if (pretty) { + rmargin = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + term_len(p, 4); + p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; + } + + assert(n->type == ROFFT_TEXT); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->string); + term_fontpop(p); + + if (pretty) { + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); + p->flags |= TERMP_NOPAD; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = rmargin; + } + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + + for (n = n->next; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + term_fontpush(p, TERMFONT_UNDER); + if (pretty) + p->flags |= TERMP_NBRWORD; + term_word(p, n->string); + term_fontpop(p); + + if (n->next) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + } + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (pretty) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + term_flushln(p); + } + return 0; +} + +static int +termp_fa_pre(DECL_ARGS) +{ + const struct roff_node *nn; + + if (n->parent->tok != MDOC_Fo) + return termp_under_pre(p, pair, meta, n); + + for (nn = n->child; nn != NULL; nn = nn->next) { + term_fontpush(p, TERMFONT_UNDER); + p->flags |= TERMP_NBRWORD; + term_word(p, nn->string); + term_fontpop(p); + if (nn->next != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + } + if (n->child != NULL && + (nn = roff_node_next(n)) != NULL && + nn->tok == MDOC_Fa) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + return 0; +} + +static int +termp_bd_pre(DECL_ARGS) +{ + int offset; + + if (n->type == ROFFT_BLOCK) { + print_bvspace(p, n, n); + return 1; + } else if (n->type == ROFFT_HEAD) + return 0; + + /* Handle the -offset argument. */ + + if (n->norm->Bd.offs == NULL || + ! strcmp(n->norm->Bd.offs, "left")) + /* nothing */; + else if ( ! strcmp(n->norm->Bd.offs, "indent")) + p->tcol->offset += term_len(p, p->defindent + 1); + else if ( ! strcmp(n->norm->Bd.offs, "indent-two")) + p->tcol->offset += term_len(p, (p->defindent + 1) * 2); + else { + offset = a2width(p, n->norm->Bd.offs); + if (offset < 0 && (size_t)(-offset) > p->tcol->offset) + p->tcol->offset = 0; + else if (offset < SHRT_MAX) + p->tcol->offset += offset; + } + + switch (n->norm->Bd.type) { + case DISP_literal: + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, "8n"); + break; + case DISP_centered: + p->flags |= TERMP_CENTER; + break; + default: + break; + } + return 1; +} + +static void +termp_bd_post(DECL_ARGS) +{ + if (n->type != ROFFT_BODY) + return; + if (n->norm->Bd.type == DISP_unfilled || + n->norm->Bd.type == DISP_literal) + p->flags |= TERMP_BRNEVER; + p->flags |= TERMP_NOSPACE; + term_newln(p); + p->flags &= ~TERMP_BRNEVER; + if (n->norm->Bd.type == DISP_centered) + p->flags &= ~TERMP_CENTER; +} + +static int +termp_xx_pre(DECL_ARGS) +{ + if ((n->aux = p->flags & TERMP_PREKEEP) == 0) + p->flags |= TERMP_PREKEEP; + return 1; +} + +static void +termp_xx_post(DECL_ARGS) +{ + if (n->aux == 0) + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); +} + +static void +termp_pf_post(DECL_ARGS) +{ + if (n->next != NULL && (n->next->flags & NODE_LINE) == 0) + p->flags |= TERMP_NOSPACE; +} + +static int +termp_ss_pre(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + if (roff_node_prev(n) == NULL) + term_newln(p); + else + term_vspace(p); + break; + case ROFFT_HEAD: + p->tcol->offset = term_len(p, (p->defindent+1)/2); + return termp_bold_pre(p, pair, meta, n); + case ROFFT_BODY: + p->tcol->offset = term_len(p, p->defindent); + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); + break; + default: + break; + } + return 1; +} + +static void +termp_ss_post(DECL_ARGS) +{ + if (n->type == ROFFT_HEAD || n->type == ROFFT_BODY) + term_newln(p); +} + +static int +termp_in_pre(DECL_ARGS) +{ + synopsis_pre(p, n); + if (n->flags & NODE_SYNPRETTY && n->flags & NODE_LINE) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "#include"); + term_word(p, "<"); + } else { + term_word(p, "<"); + term_fontpush(p, TERMFONT_UNDER); + } + p->flags |= TERMP_NOSPACE; + return 1; +} + +static void +termp_in_post(DECL_ARGS) +{ + if (n->flags & NODE_SYNPRETTY) + term_fontpush(p, TERMFONT_BOLD); + p->flags |= TERMP_NOSPACE; + term_word(p, ">"); + if (n->flags & NODE_SYNPRETTY) + term_fontpop(p); +} + +static int +termp_pp_pre(DECL_ARGS) +{ + term_vspace(p); + if (n->flags & NODE_ID) + term_tag_write(n, p->line); + return 0; +} + +static int +termp_skip_pre(DECL_ARGS) +{ + return 0; +} + +static int +termp_quote_pre(DECL_ARGS) +{ + if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) + return 1; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + term_word(p, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? "<" : "\\(la"); + break; + case MDOC_Bro: + case MDOC_Brq: + term_word(p, "{"); + break; + case MDOC_Oo: + case MDOC_Op: + case MDOC_Bo: + case MDOC_Bq: + term_word(p, "["); + break; + case MDOC__T: + /* FALLTHROUGH */ + case MDOC_Do: + case MDOC_Dq: + term_word(p, "\\(lq"); + break; + case MDOC_En: + if (NULL == n->norm->Es || + NULL == n->norm->Es->child) + return 1; + term_word(p, n->norm->Es->child->string); + break; + case MDOC_Po: + case MDOC_Pq: + term_word(p, "("); + break; + case MDOC_Qo: + case MDOC_Qq: + term_word(p, "\""); + break; + case MDOC_Ql: + case MDOC_So: + case MDOC_Sq: + term_word(p, "\\(oq"); + break; + default: + abort(); + } + + p->flags |= TERMP_NOSPACE; + return 1; +} + +static void +termp_quote_post(DECL_ARGS) +{ + + if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) + return; + + p->flags |= TERMP_NOSPACE; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + term_word(p, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? ">" : "\\(ra"); + break; + case MDOC_Bro: + case MDOC_Brq: + term_word(p, "}"); + break; + case MDOC_Oo: + case MDOC_Op: + case MDOC_Bo: + case MDOC_Bq: + term_word(p, "]"); + break; + case MDOC__T: + /* FALLTHROUGH */ + case MDOC_Do: + case MDOC_Dq: + term_word(p, "\\(rq"); + break; + case MDOC_En: + if (n->norm->Es == NULL || + n->norm->Es->child == NULL || + n->norm->Es->child->next == NULL) + p->flags &= ~TERMP_NOSPACE; + else + term_word(p, n->norm->Es->child->next->string); + break; + case MDOC_Po: + case MDOC_Pq: + term_word(p, ")"); + break; + case MDOC_Qo: + case MDOC_Qq: + term_word(p, "\""); + break; + case MDOC_Ql: + case MDOC_So: + case MDOC_Sq: + term_word(p, "\\(cq"); + break; + default: + abort(); + } +} + +static int +termp_eo_pre(DECL_ARGS) +{ + + if (n->type != ROFFT_BODY) + return 1; + + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + term_word(p, "\\&"); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + p->flags |= TERMP_NOSPACE; + + return 1; +} + +static void +termp_eo_post(DECL_ARGS) +{ + int body, tail; + + if (n->type != ROFFT_BODY) + return; + + if (n->end != ENDBODY_NOT) { + p->flags &= ~TERMP_NOSPACE; + return; + } + + body = n->child != NULL || n->parent->head->child != NULL; + tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + + if (body && tail) + p->flags |= TERMP_NOSPACE; + else if ( ! (body || tail)) + term_word(p, "\\&"); + else if ( ! tail) + p->flags &= ~TERMP_NOSPACE; +} + +static int +termp_fo_pre(DECL_ARGS) +{ + size_t rmargin; + + switch (n->type) { + case ROFFT_BLOCK: + synopsis_pre(p, n); + return 1; + case ROFFT_BODY: + rmargin = p->tcol->rmargin; + if (n->flags & NODE_SYNPRETTY) { + p->tcol->rmargin = p->tcol->offset + term_len(p, 4); + p->flags |= TERMP_NOBREAK | TERMP_BRIND | + TERMP_HANG; + } + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + if (n->flags & NODE_SYNPRETTY) { + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | + TERMP_HANG); + p->flags |= TERMP_NOPAD; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = rmargin; + } + return 1; + default: + return termp_bold_pre(p, pair, meta, n); + } +} + +static void +termp_fo_post(DECL_ARGS) +{ + if (n->type != ROFFT_BODY) + return; + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (n->flags & NODE_SYNPRETTY) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + term_flushln(p); + } +} + +static int +termp_bf_pre(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + break; + default: + return 1; + } + switch (n->norm->Bf.font) { + case FONT_Em: + return termp_under_pre(p, pair, meta, n); + case FONT_Sy: + return termp_bold_pre(p, pair, meta, n); + default: + return termp_li_pre(p, pair, meta, n); + } +} + +static int +termp_sm_pre(DECL_ARGS) +{ + if (n->child == NULL) + p->flags ^= TERMP_NONOSPACE; + else if (strcmp(n->child->string, "on") == 0) + p->flags &= ~TERMP_NONOSPACE; + else + p->flags |= TERMP_NONOSPACE; + + if (p->col && ! (TERMP_NONOSPACE & p->flags)) + p->flags &= ~TERMP_NOSPACE; + + return 0; +} + +static int +termp_ap_pre(DECL_ARGS) +{ + p->flags |= TERMP_NOSPACE; + term_word(p, "'"); + p->flags |= TERMP_NOSPACE; + return 1; +} + +static void +termp____post(DECL_ARGS) +{ + struct roff_node *nn; + + /* + * Handle lists of authors. In general, print each followed by + * a comma. Don't print the comma if there are only two + * authors. + */ + if (n->tok == MDOC__A && + (nn = roff_node_next(n)) != NULL && nn->tok == MDOC__A && + ((nn = roff_node_next(nn)) == NULL || nn->tok != MDOC__A) && + ((nn = roff_node_prev(n)) == NULL || nn->tok != MDOC__A)) + return; + + /* TODO: %U. */ + + if (n->parent == NULL || n->parent->tok != MDOC_Rs) + return; + + p->flags |= TERMP_NOSPACE; + if (roff_node_next(n) == NULL) { + term_word(p, "."); + p->flags |= TERMP_SENTENCE; + } else + term_word(p, ","); +} + +static int +termp_li_pre(DECL_ARGS) +{ + term_fontpush(p, TERMFONT_NONE); + return 1; +} + +static int +termp_lk_pre(DECL_ARGS) +{ + const struct roff_node *link, *descr, *punct; + + if ((link = n->child) == NULL) + return 0; + + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link text. */ + if ((descr = link->next) != NULL && descr != punct) { + term_fontpush(p, TERMFONT_UNDER); + while (descr != punct) { + if (descr->flags & (NODE_DELIMC | NODE_DELIMO)) + p->flags |= TERMP_NOSPACE; + term_word(p, descr->string); + descr = descr->next; + } + term_fontpop(p); + p->flags |= TERMP_NOSPACE; + term_word(p, ":"); + } + + /* Link target. */ + term_fontpush(p, TERMFONT_BOLD); + term_word(p, link->string); + term_fontpop(p); + + /* Trailing punctuation. */ + while (punct != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, punct->string); + punct = punct->next; + } + return 0; +} + +static int +termp_bk_pre(DECL_ARGS) +{ + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + if (n->parent->args != NULL || n->prev->child == NULL) + p->flags |= TERMP_PREKEEP; + break; + default: + abort(); + } + return 1; +} + +static void +termp_bk_post(DECL_ARGS) +{ + if (n->type == ROFFT_BODY) + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); +} + +/* + * If we are in an `Rs' and there is a journal present, + * then quote us instead of underlining us (for disambiguation). + */ +static void +termp__t_post(DECL_ARGS) +{ + if (n->parent != NULL && n->parent->tok == MDOC_Rs && + n->parent->norm->Rs.quote_T) + termp_quote_post(p, pair, meta, n); + termp____post(p, pair, meta, n); +} + +static int +termp__t_pre(DECL_ARGS) +{ + if (n->parent != NULL && n->parent->tok == MDOC_Rs && + n->parent->norm->Rs.quote_T) + return termp_quote_pre(p, pair, meta, n); + else + return termp_under_pre(p, pair, meta, n); +} + +static int +termp_under_pre(DECL_ARGS) +{ + term_fontpush(p, TERMFONT_UNDER); + return 1; +} + +static int +termp_abort_pre(DECL_ARGS) +{ + abort(); +} diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c new file mode 100644 index 0000000..34668bf --- /dev/null +++ b/usr.bin/mandoc/mdoc_validate.c @@ -0,0 +1,3047 @@ +/* $OpenBSD: mdoc_validate.c,v 1.302 2020/04/26 21:29:45 schwarze Exp $ */ +/* + * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Validation module for mdoc(7) syntax trees used by mandoc(1). + */ +#include <sys/types.h> +#ifndef OSNAME +#include <sys/utsname.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "mandoc_xr.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" +#include "tag.h" + +/* FIXME: .Bl -diag can't have non-text children in HEAD. */ + +#define POST_ARGS struct roff_man *mdoc + +enum check_ineq { + CHECK_LT, + CHECK_GT, + CHECK_EQ +}; + +typedef void (*v_post)(POST_ARGS); + +static int build_list(struct roff_man *, int); +static void check_argv(struct roff_man *, + struct roff_node *, struct mdoc_argv *); +static void check_args(struct roff_man *, struct roff_node *); +static void check_text(struct roff_man *, int, int, char *); +static void check_text_em(struct roff_man *, int, int, char *); +static void check_toptext(struct roff_man *, int, int, const char *); +static int child_an(const struct roff_node *); +static size_t macro2len(enum roff_tok); +static void rewrite_macro2len(struct roff_man *, char **); +static int similar(const char *, const char *); + +static void post_abort(POST_ARGS) __attribute__((__noreturn__)); +static void post_an(POST_ARGS); +static void post_an_norm(POST_ARGS); +static void post_at(POST_ARGS); +static void post_bd(POST_ARGS); +static void post_bf(POST_ARGS); +static void post_bk(POST_ARGS); +static void post_bl(POST_ARGS); +static void post_bl_block(POST_ARGS); +static void post_bl_head(POST_ARGS); +static void post_bl_norm(POST_ARGS); +static void post_bx(POST_ARGS); +static void post_defaults(POST_ARGS); +static void post_display(POST_ARGS); +static void post_dd(POST_ARGS); +static void post_delim(POST_ARGS); +static void post_delim_nb(POST_ARGS); +static void post_dt(POST_ARGS); +static void post_em(POST_ARGS); +static void post_en(POST_ARGS); +static void post_er(POST_ARGS); +static void post_es(POST_ARGS); +static void post_eoln(POST_ARGS); +static void post_ex(POST_ARGS); +static void post_fa(POST_ARGS); +static void post_fl(POST_ARGS); +static void post_fn(POST_ARGS); +static void post_fname(POST_ARGS); +static void post_fo(POST_ARGS); +static void post_hyph(POST_ARGS); +static void post_it(POST_ARGS); +static void post_lb(POST_ARGS); +static void post_nd(POST_ARGS); +static void post_nm(POST_ARGS); +static void post_ns(POST_ARGS); +static void post_obsolete(POST_ARGS); +static void post_os(POST_ARGS); +static void post_par(POST_ARGS); +static void post_prevpar(POST_ARGS); +static void post_root(POST_ARGS); +static void post_rs(POST_ARGS); +static void post_rv(POST_ARGS); +static void post_section(POST_ARGS); +static void post_sh(POST_ARGS); +static void post_sh_head(POST_ARGS); +static void post_sh_name(POST_ARGS); +static void post_sh_see_also(POST_ARGS); +static void post_sh_authors(POST_ARGS); +static void post_sm(POST_ARGS); +static void post_st(POST_ARGS); +static void post_std(POST_ARGS); +static void post_sx(POST_ARGS); +static void post_tag(POST_ARGS); +static void post_tg(POST_ARGS); +static void post_useless(POST_ARGS); +static void post_xr(POST_ARGS); +static void post_xx(POST_ARGS); + +static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = { + post_dd, /* Dd */ + post_dt, /* Dt */ + post_os, /* Os */ + post_sh, /* Sh */ + post_section, /* Ss */ + post_par, /* Pp */ + post_display, /* D1 */ + post_display, /* Dl */ + post_display, /* Bd */ + NULL, /* Ed */ + post_bl, /* Bl */ + NULL, /* El */ + post_it, /* It */ + post_delim_nb, /* Ad */ + post_an, /* An */ + NULL, /* Ap */ + post_defaults, /* Ar */ + NULL, /* Cd */ + post_tag, /* Cm */ + post_tag, /* Dv */ + post_er, /* Er */ + post_tag, /* Ev */ + post_ex, /* Ex */ + post_fa, /* Fa */ + NULL, /* Fd */ + post_fl, /* Fl */ + post_fn, /* Fn */ + post_delim_nb, /* Ft */ + post_tag, /* Ic */ + post_delim_nb, /* In */ + post_tag, /* Li */ + post_nd, /* Nd */ + post_nm, /* Nm */ + post_delim_nb, /* Op */ + post_abort, /* Ot */ + post_defaults, /* Pa */ + post_rv, /* Rv */ + post_st, /* St */ + post_delim_nb, /* Va */ + post_delim_nb, /* Vt */ + post_xr, /* Xr */ + NULL, /* %A */ + post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + post_hyph, /* %N */ + post_hyph, /* %O */ + NULL, /* %P */ + post_hyph, /* %R */ + post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + post_delim_nb, /* Aq */ + post_at, /* At */ + NULL, /* Bc */ + post_bf, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + post_xx, /* Bsx */ + post_bx, /* Bx */ + post_obsolete, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + post_em, /* Em */ + NULL, /* Eo */ + post_xx, /* Fx */ + post_tag, /* Ms */ + post_tag, /* No */ + post_ns, /* Ns */ + post_xx, /* Nx */ + post_xx, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + post_delim_nb, /* Pq */ + NULL, /* Qc */ + post_delim_nb, /* Ql */ + NULL, /* Qo */ + post_delim_nb, /* Qq */ + NULL, /* Re */ + post_rs, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + post_delim_nb, /* Sq */ + post_sm, /* Sm */ + post_sx, /* Sx */ + post_em, /* Sy */ + post_useless, /* Tn */ + post_xx, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + post_fo, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + post_bk, /* Bk */ + NULL, /* Ek */ + post_eoln, /* Bt */ + post_obsolete, /* Hf */ + post_obsolete, /* Fr */ + post_eoln, /* Ud */ + post_lb, /* Lb */ + post_abort, /* Lp */ + post_delim_nb, /* Lk */ + post_defaults, /* Mt */ + post_delim_nb, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + post_es, /* Es */ + post_en, /* En */ + post_xx, /* Dx */ + NULL, /* %Q */ + NULL, /* %U */ + NULL, /* Ta */ + post_tg, /* Tg */ +}; + +#define RSORD_MAX 14 /* Number of `Rs' blocks. */ + +static const enum roff_tok rsord[RSORD_MAX] = { + MDOC__A, + MDOC__T, + MDOC__B, + MDOC__I, + MDOC__J, + MDOC__R, + MDOC__N, + MDOC__V, + MDOC__U, + MDOC__P, + MDOC__Q, + MDOC__C, + MDOC__D, + MDOC__O +}; + +static const char * const secnames[SEC__MAX] = { + NULL, + "NAME", + "LIBRARY", + "SYNOPSIS", + "DESCRIPTION", + "CONTEXT", + "IMPLEMENTATION NOTES", + "RETURN VALUES", + "ENVIRONMENT", + "FILES", + "EXIT STATUS", + "EXAMPLES", + "DIAGNOSTICS", + "COMPATIBILITY", + "ERRORS", + "SEE ALSO", + "STANDARDS", + "HISTORY", + "AUTHORS", + "CAVEATS", + "BUGS", + "SECURITY CONSIDERATIONS", + NULL +}; + +static int fn_prio = TAG_STRONG; + + +/* Validate the subtree rooted at mdoc->last. */ +void +mdoc_validate(struct roff_man *mdoc) +{ + struct roff_node *n, *np; + const v_post *p; + + /* + * Translate obsolete macros to modern macros first + * such that later code does not need to look + * for the obsolete versions. + */ + + n = mdoc->last; + switch (n->tok) { + case MDOC_Lp: + n->tok = MDOC_Pp; + break; + case MDOC_Ot: + post_obsolete(mdoc); + n->tok = MDOC_Ft; + break; + default: + break; + } + + /* + * Iterate over all children, recursing into each one + * in turn, depth-first. + */ + + mdoc->last = mdoc->last->child; + while (mdoc->last != NULL) { + mdoc_validate(mdoc); + if (mdoc->last == n) + mdoc->last = mdoc->last->child; + else + mdoc->last = mdoc->last->next; + } + + /* Finally validate the macro itself. */ + + mdoc->last = n; + mdoc->next = ROFF_NEXT_SIBLING; + switch (n->type) { + case ROFFT_TEXT: + np = n->parent; + if (n->sec != SEC_SYNOPSIS || + (np->tok != MDOC_Cd && np->tok != MDOC_Fd)) + check_text(mdoc, n->line, n->pos, n->string); + if ((n->flags & NODE_NOFILL) == 0 && + (np->tok != MDOC_It || np->type != ROFFT_HEAD || + np->parent->parent->norm->Bl.type != LIST_diag)) + check_text_em(mdoc, n->line, n->pos, n->string); + if (np->tok == MDOC_It || (np->type == ROFFT_BODY && + (np->tok == MDOC_Sh || np->tok == MDOC_Ss))) + check_toptext(mdoc, n->line, n->pos, n->string); + break; + case ROFFT_COMMENT: + case ROFFT_EQN: + case ROFFT_TBL: + break; + case ROFFT_ROOT: + post_root(mdoc); + break; + default: + check_args(mdoc, mdoc->last); + + /* + * Closing delimiters are not special at the + * beginning of a block, opening delimiters + * are not special at the end. + */ + + if (n->child != NULL) + n->child->flags &= ~NODE_DELIMC; + if (n->last != NULL) + n->last->flags &= ~NODE_DELIMO; + + /* Call the macro's postprocessor. */ + + if (n->tok < ROFF_MAX) { + roff_validate(mdoc); + break; + } + + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + p = mdoc_valids + (n->tok - MDOC_Dd); + if (*p) + (*p)(mdoc); + if (mdoc->last == n) + mdoc_state(mdoc, n); + break; + } +} + +static void +check_args(struct roff_man *mdoc, struct roff_node *n) +{ + int i; + + if (NULL == n->args) + return; + + assert(n->args->argc); + for (i = 0; i < (int)n->args->argc; i++) + check_argv(mdoc, n, &n->args->argv[i]); +} + +static void +check_argv(struct roff_man *mdoc, struct roff_node *n, struct mdoc_argv *v) +{ + int i; + + for (i = 0; i < (int)v->sz; i++) + check_text(mdoc, v->line, v->pos, v->value[i]); +} + +static void +check_text(struct roff_man *mdoc, int ln, int pos, char *p) +{ + char *cp; + + if (mdoc->last->flags & NODE_NOFILL) + return; + + for (cp = p; NULL != (p = strchr(p, '\t')); p++) + mandoc_msg(MANDOCERR_FI_TAB, ln, pos + (int)(p - cp), NULL); +} + +static void +check_text_em(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const struct roff_node *np, *nn; + char *cp; + + np = mdoc->last->prev; + nn = mdoc->last->next; + + /* Look for em-dashes wrongly encoded as "--". */ + + for (cp = p; *cp != '\0'; cp++) { + if (cp[0] != '-' || cp[1] != '-') + continue; + cp++; + + /* Skip input sequences of more than two '-'. */ + + if (cp[1] == '-') { + while (cp[1] == '-') + cp++; + continue; + } + + /* Skip "--" directly attached to something else. */ + + if ((cp - p > 1 && cp[-2] != ' ') || + (cp[1] != '\0' && cp[1] != ' ')) + continue; + + /* Require a letter right before or right afterwards. */ + + if ((cp - p > 2 ? + isalpha((unsigned char)cp[-3]) : + np != NULL && + np->type == ROFFT_TEXT && + *np->string != '\0' && + isalpha((unsigned char)np->string[ + strlen(np->string) - 1])) || + (cp[1] != '\0' && cp[2] != '\0' ? + isalpha((unsigned char)cp[2]) : + nn != NULL && + nn->type == ROFFT_TEXT && + isalpha((unsigned char)*nn->string))) { + mandoc_msg(MANDOCERR_DASHDASH, + ln, pos + (int)(cp - p) - 1, NULL); + break; + } + } +} + +static void +check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) +{ + const char *cp, *cpr; + + if (*p == '\0') + return; + + if ((cp = strstr(p, "OpenBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Ox"); + if ((cp = strstr(p, "NetBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Nx"); + if ((cp = strstr(p, "FreeBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Fx"); + if ((cp = strstr(p, "DragonFly")) != NULL) + mandoc_msg(MANDOCERR_BX, ln, pos + (int)(cp - p), "Dx"); + + cp = p; + while ((cp = strstr(cp + 1, "()")) != NULL) { + for (cpr = cp - 1; cpr >= p; cpr--) + if (*cpr != '_' && !isalnum((unsigned char)*cpr)) + break; + if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) { + cpr++; + mandoc_msg(MANDOCERR_FUNC, ln, pos + (int)(cpr - p), + "%.*s()", (int)(cp - cpr), cpr); + } + } +} + +static void +post_abort(POST_ARGS) +{ + abort(); +} + +static void +post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc; + enum mdelim delim; + enum roff_tok tok; + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc < nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + if (*lc == ')' && (tok == MDOC_Nd || tok == MDOC_Sh || + tok == MDOC_Ss || tok == MDOC_Fo)) + return; + + mandoc_msg(MANDOCERR_DELIM, nch->line, + nch->pos + (int)(lc - nch->string), "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void +post_delim_nb(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc, *cp; + int nw; + enum mdelim delim; + enum roff_tok tok; + + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc <= nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_Pq || tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + + mandoc_msg(MANDOCERR_DELIM_NB, nch->line, + nch->pos + (int)(lc - nch->string), "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void +post_bl_norm(POST_ARGS) +{ + struct roff_node *n; + struct mdoc_argv *argv, *wa; + int i; + enum mdocargt mdoclt; + enum mdoc_list lt; + + n = mdoc->last->parent; + n->norm->Bl.type = LIST__NONE; + + /* + * First figure out which kind of list to use: bind ourselves to + * the first mentioned list type and warn about any remaining + * ones. If we find no list type, we default to LIST_item. + */ + + wa = (n->args == NULL) ? NULL : n->args->argv; + mdoclt = MDOC_ARG_MAX; + for (i = 0; n->args && i < (int)n->args->argc; i++) { + argv = n->args->argv + i; + lt = LIST__NONE; + switch (argv->arg) { + /* Set list types. */ + case MDOC_Bullet: + lt = LIST_bullet; + break; + case MDOC_Dash: + lt = LIST_dash; + break; + case MDOC_Enum: + lt = LIST_enum; + break; + case MDOC_Hyphen: + lt = LIST_hyphen; + break; + case MDOC_Item: + lt = LIST_item; + break; + case MDOC_Tag: + lt = LIST_tag; + break; + case MDOC_Diag: + lt = LIST_diag; + break; + case MDOC_Hang: + lt = LIST_hang; + break; + case MDOC_Ohang: + lt = LIST_ohang; + break; + case MDOC_Inset: + lt = LIST_inset; + break; + case MDOC_Column: + lt = LIST_column; + break; + /* Set list arguments. */ + case MDOC_Compact: + if (n->norm->Bl.comp) + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, "Bl -compact"); + n->norm->Bl.comp = 1; + break; + case MDOC_Width: + wa = argv; + if (0 == argv->sz) { + mandoc_msg(MANDOCERR_ARG_EMPTY, + argv->line, argv->pos, "Bl -width"); + n->norm->Bl.width = "0n"; + break; + } + if (NULL != n->norm->Bl.width) + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, + "Bl -width %s", argv->value[0]); + rewrite_macro2len(mdoc, argv->value); + n->norm->Bl.width = argv->value[0]; + break; + case MDOC_Offset: + if (0 == argv->sz) { + mandoc_msg(MANDOCERR_ARG_EMPTY, + argv->line, argv->pos, "Bl -offset"); + break; + } + if (NULL != n->norm->Bl.offs) + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, + "Bl -offset %s", argv->value[0]); + rewrite_macro2len(mdoc, argv->value); + n->norm->Bl.offs = argv->value[0]; + break; + default: + continue; + } + if (LIST__NONE == lt) + continue; + mdoclt = argv->arg; + + /* Check: multiple list types. */ + + if (LIST__NONE != n->norm->Bl.type) { + mandoc_msg(MANDOCERR_BL_REP, n->line, n->pos, + "Bl -%s", mdoc_argnames[argv->arg]); + continue; + } + + /* The list type should come first. */ + + if (n->norm->Bl.width || + n->norm->Bl.offs || + n->norm->Bl.comp) + mandoc_msg(MANDOCERR_BL_LATETYPE, + n->line, n->pos, "Bl -%s", + mdoc_argnames[n->args->argv[0].arg]); + + n->norm->Bl.type = lt; + if (LIST_column == lt) { + n->norm->Bl.ncols = argv->sz; + n->norm->Bl.cols = (void *)argv->value; + } + } + + /* Allow lists to default to LIST_item. */ + + if (LIST__NONE == n->norm->Bl.type) { + mandoc_msg(MANDOCERR_BL_NOTYPE, n->line, n->pos, "Bl"); + n->norm->Bl.type = LIST_item; + mdoclt = MDOC_Item; + } + + /* + * Validate the width field. Some list types don't need width + * types and should be warned about them. Others should have it + * and must also be warned. Yet others have a default and need + * no warning. + */ + + switch (n->norm->Bl.type) { + case LIST_tag: + if (n->norm->Bl.width == NULL) + mandoc_msg(MANDOCERR_BL_NOWIDTH, + n->line, n->pos, "Bl -tag"); + break; + case LIST_column: + case LIST_diag: + case LIST_ohang: + case LIST_inset: + case LIST_item: + if (n->norm->Bl.width != NULL) + mandoc_msg(MANDOCERR_BL_SKIPW, wa->line, wa->pos, + "Bl -%s", mdoc_argnames[mdoclt]); + n->norm->Bl.width = NULL; + break; + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + if (n->norm->Bl.width == NULL) + n->norm->Bl.width = "2n"; + break; + case LIST_enum: + if (n->norm->Bl.width == NULL) + n->norm->Bl.width = "3n"; + break; + default: + break; + } +} + +static void +post_bd(POST_ARGS) +{ + struct roff_node *n; + struct mdoc_argv *argv; + int i; + enum mdoc_disp dt; + + n = mdoc->last; + for (i = 0; n->args && i < (int)n->args->argc; i++) { + argv = n->args->argv + i; + dt = DISP__NONE; + + switch (argv->arg) { + case MDOC_Centred: + dt = DISP_centered; + break; + case MDOC_Ragged: + dt = DISP_ragged; + break; + case MDOC_Unfilled: + dt = DISP_unfilled; + break; + case MDOC_Filled: + dt = DISP_filled; + break; + case MDOC_Literal: + dt = DISP_literal; + break; + case MDOC_File: + mandoc_msg(MANDOCERR_BD_FILE, n->line, n->pos, NULL); + break; + case MDOC_Offset: + if (0 == argv->sz) { + mandoc_msg(MANDOCERR_ARG_EMPTY, + argv->line, argv->pos, "Bd -offset"); + break; + } + if (NULL != n->norm->Bd.offs) + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, + "Bd -offset %s", argv->value[0]); + rewrite_macro2len(mdoc, argv->value); + n->norm->Bd.offs = argv->value[0]; + break; + case MDOC_Compact: + if (n->norm->Bd.comp) + mandoc_msg(MANDOCERR_ARG_REP, + argv->line, argv->pos, "Bd -compact"); + n->norm->Bd.comp = 1; + break; + default: + abort(); + } + if (DISP__NONE == dt) + continue; + + if (DISP__NONE == n->norm->Bd.type) + n->norm->Bd.type = dt; + else + mandoc_msg(MANDOCERR_BD_REP, n->line, n->pos, + "Bd -%s", mdoc_argnames[argv->arg]); + } + + if (DISP__NONE == n->norm->Bd.type) { + mandoc_msg(MANDOCERR_BD_NOTYPE, n->line, n->pos, "Bd"); + n->norm->Bd.type = DISP_ragged; + } +} + +/* + * Stand-alone line macros. + */ + +static void +post_an_norm(POST_ARGS) +{ + struct roff_node *n; + struct mdoc_argv *argv; + size_t i; + + n = mdoc->last; + if (n->args == NULL) + return; + + for (i = 1; i < n->args->argc; i++) { + argv = n->args->argv + i; + mandoc_msg(MANDOCERR_AN_REP, argv->line, argv->pos, + "An -%s", mdoc_argnames[argv->arg]); + } + + argv = n->args->argv; + if (argv->arg == MDOC_Split) + n->norm->An.auth = AUTH_split; + else if (argv->arg == MDOC_Nosplit) + n->norm->An.auth = AUTH_nosplit; + else + abort(); +} + +static void +post_eoln(POST_ARGS) +{ + struct roff_node *n; + + post_useless(mdoc); + n = mdoc->last; + if (n->child != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, n->line, + n->pos, "%s %s", roff_name[n->tok], n->child->string); + + while (n->child != NULL) + roff_node_delete(mdoc, n->child); + + roff_word_alloc(mdoc, n->line, n->pos, n->tok == MDOC_Bt ? + "is currently in beta test." : "currently under development."); + mdoc->last->flags |= NODE_EOS | NODE_NOSRC; + mdoc->last = n; +} + +static int +build_list(struct roff_man *mdoc, int tok) +{ + struct roff_node *n; + int ic; + + n = mdoc->last->next; + for (ic = 1;; ic++) { + roff_elem_alloc(mdoc, n->line, n->pos, tok); + mdoc->last->flags |= NODE_NOSRC; + roff_node_relink(mdoc, n); + n = mdoc->last = mdoc->last->parent; + mdoc->next = ROFF_NEXT_SIBLING; + if (n->next == NULL) + return ic; + if (ic > 1 || n->next->next != NULL) { + roff_word_alloc(mdoc, n->line, n->pos, ","); + mdoc->last->flags |= NODE_DELIMC | NODE_NOSRC; + } + n = mdoc->last->next; + if (n->next == NULL) { + roff_word_alloc(mdoc, n->line, n->pos, "and"); + mdoc->last->flags |= NODE_NOSRC; + } + } +} + +static void +post_ex(POST_ARGS) +{ + struct roff_node *n; + int ic; + + post_std(mdoc); + + n = mdoc->last; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, "The"); + mdoc->last->flags |= NODE_NOSRC; + + if (mdoc->last->next != NULL) + ic = build_list(mdoc, MDOC_Nm); + else if (mdoc->meta.name != NULL) { + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Nm); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = mdoc->last->parent; + mdoc->next = ROFF_NEXT_SIBLING; + ic = 1; + } else { + mandoc_msg(MANDOCERR_EX_NONAME, n->line, n->pos, "Ex"); + ic = 0; + } + + roff_word_alloc(mdoc, n->line, n->pos, + ic > 1 ? "utilities exit\\~0" : "utility exits\\~0"); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, + "on success, and\\~>0 if an error occurs."); + mdoc->last->flags |= NODE_EOS | NODE_NOSRC; + mdoc->last = n; +} + +static void +post_lb(POST_ARGS) +{ + struct roff_node *n; + + post_delim_nb(mdoc); + + n = mdoc->last; + assert(n->child->type == ROFFT_TEXT); + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, "library"); + mdoc->last->flags = NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, "\\(lq"); + mdoc->last->flags = NODE_DELIMO | NODE_NOSRC; + mdoc->last = mdoc->last->next; + roff_word_alloc(mdoc, n->line, n->pos, "\\(rq"); + mdoc->last->flags = NODE_DELIMC | NODE_NOSRC; + mdoc->last = n; +} + +static void +post_rv(POST_ARGS) +{ + struct roff_node *n; + int ic; + + post_std(mdoc); + + n = mdoc->last; + mdoc->next = ROFF_NEXT_CHILD; + if (n->child != NULL) { + roff_word_alloc(mdoc, n->line, n->pos, "The"); + mdoc->last->flags |= NODE_NOSRC; + ic = build_list(mdoc, MDOC_Fn); + roff_word_alloc(mdoc, n->line, n->pos, + ic > 1 ? "functions return" : "function returns"); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, + "the value\\~0 if successful;"); + } else + roff_word_alloc(mdoc, n->line, n->pos, "Upon successful " + "completion, the value\\~0 is returned;"); + mdoc->last->flags |= NODE_NOSRC; + + roff_word_alloc(mdoc, n->line, n->pos, "otherwise " + "the value\\~\\-1 is returned and the global variable"); + mdoc->last->flags |= NODE_NOSRC; + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Va); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, "errno"); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = mdoc->last->parent; + mdoc->next = ROFF_NEXT_SIBLING; + roff_word_alloc(mdoc, n->line, n->pos, + "is set to indicate the error."); + mdoc->last->flags |= NODE_EOS | NODE_NOSRC; + mdoc->last = n; +} + +static void +post_std(POST_ARGS) +{ + struct roff_node *n; + + post_delim(mdoc); + + n = mdoc->last; + if (n->args && n->args->argc == 1) + if (n->args->argv[0].arg == MDOC_Std) + return; + + mandoc_msg(MANDOCERR_ARG_STD, n->line, n->pos, + "%s", roff_name[n->tok]); +} + +static void +post_st(POST_ARGS) +{ + struct roff_node *n, *nch; + const char *p; + + n = mdoc->last; + nch = n->child; + assert(nch->type == ROFFT_TEXT); + + if ((p = mdoc_a2st(nch->string)) == NULL) { + mandoc_msg(MANDOCERR_ST_BAD, + nch->line, nch->pos, "St %s", nch->string); + roff_node_delete(mdoc, n); + return; + } + + nch->flags |= NODE_NOPRT; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, nch->line, nch->pos, p); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last= n; +} + +static void +post_tg(POST_ARGS) +{ + struct roff_node *n; /* The .Tg node. */ + struct roff_node *nch; /* The first child of the .Tg node. */ + struct roff_node *nn; /* The next node after the .Tg node. */ + struct roff_node *np; /* The parent of the next node. */ + struct roff_node *nt; /* The TEXT node containing the tag. */ + size_t len; /* The number of bytes in the tag. */ + + /* Find the next node. */ + n = mdoc->last; + for (nn = n; nn != NULL; nn = nn->parent) { + if (nn->next != NULL) { + nn = nn->next; + break; + } + } + + /* Find the tag. */ + nt = nch = n->child; + if (nch == NULL && nn != NULL && nn->child != NULL && + nn->child->type == ROFFT_TEXT) + nt = nn->child; + + /* Validate the tag. */ + if (nt == NULL || *nt->string == '\0') + mandoc_msg(MANDOCERR_MACRO_EMPTY, n->line, n->pos, "Tg"); + if (nt == NULL) { + roff_node_delete(mdoc, n); + return; + } + len = strcspn(nt->string, " \t\\"); + if (nt->string[len] != '\0') + mandoc_msg(MANDOCERR_TG_SPC, nt->line, + nt->pos + len, "Tg %s", nt->string); + + /* Keep only the first argument. */ + if (nch != NULL && nch->next != NULL) { + mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line, + nch->next->pos, "Tg ... %s", nch->next->string); + while (nch->next != NULL) + roff_node_delete(mdoc, nch->next); + } + + /* Drop the macro if the first argument is invalid. */ + if (len == 0 || nt->string[len] != '\0') { + roff_node_delete(mdoc, n); + return; + } + + /* By default, tag the .Tg node itself. */ + if (nn == NULL || nn->flags & NODE_ID) + nn = n; + + /* Explicit tagging of specific macros. */ + switch (nn->tok) { + case MDOC_Sh: + case MDOC_Ss: + case MDOC_Fo: + nn = nn->head->child == NULL ? n : nn->head; + break; + case MDOC_It: + np = nn->parent; + while (np->tok != MDOC_Bl) + np = np->parent; + switch (np->norm->Bl.type) { + case LIST_column: + break; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + case LIST_tag: + nn = nn->head; + break; + case LIST_bullet: + case LIST_dash: + case LIST_enum: + case LIST_hyphen: + case LIST_item: + nn = nn->body->child == NULL ? n : nn->body; + break; + default: + abort(); + } + break; + case MDOC_Bd: + case MDOC_Bl: + case MDOC_D1: + case MDOC_Dl: + nn = nn->body->child == NULL ? n : nn->body; + break; + case MDOC_Pp: + break; + case MDOC_Cm: + case MDOC_Dv: + case MDOC_Em: + case MDOC_Er: + case MDOC_Ev: + case MDOC_Fl: + case MDOC_Fn: + case MDOC_Ic: + case MDOC_Li: + case MDOC_Ms: + case MDOC_No: + case MDOC_Sy: + if (nn->child == NULL) + nn = n; + break; + default: + nn = n; + break; + } + tag_put(nt->string, TAG_MANUAL, nn); + if (nn != n) + n->flags |= NODE_NOPRT; +} + +static void +post_obsolete(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->type == ROFFT_ELEM || n->type == ROFFT_BLOCK) + mandoc_msg(MANDOCERR_MACRO_OBS, n->line, n->pos, + "%s", roff_name[n->tok]); +} + +static void +post_useless(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + mandoc_msg(MANDOCERR_MACRO_USELESS, n->line, n->pos, + "%s", roff_name[n->tok]); +} + +/* + * Block macros. + */ + +static void +post_bf(POST_ARGS) +{ + struct roff_node *np, *nch; + + /* + * Unlike other data pointers, these are "housed" by the HEAD + * element, which contains the goods. + */ + + np = mdoc->last; + if (np->type != ROFFT_HEAD) + return; + + assert(np->parent->type == ROFFT_BLOCK); + assert(np->parent->tok == MDOC_Bf); + + /* Check the number of arguments. */ + + nch = np->child; + if (np->parent->args == NULL) { + if (nch == NULL) { + mandoc_msg(MANDOCERR_BF_NOFONT, + np->line, np->pos, "Bf"); + return; + } + nch = nch->next; + } + if (nch != NULL) + mandoc_msg(MANDOCERR_ARG_EXCESS, + nch->line, nch->pos, "Bf ... %s", nch->string); + + /* Extract argument into data. */ + + if (np->parent->args != NULL) { + switch (np->parent->args->argv[0].arg) { + case MDOC_Emphasis: + np->norm->Bf.font = FONT_Em; + break; + case MDOC_Literal: + np->norm->Bf.font = FONT_Li; + break; + case MDOC_Symbolic: + np->norm->Bf.font = FONT_Sy; + break; + default: + abort(); + } + return; + } + + /* Extract parameter into data. */ + + if ( ! strcmp(np->child->string, "Em")) + np->norm->Bf.font = FONT_Em; + else if ( ! strcmp(np->child->string, "Li")) + np->norm->Bf.font = FONT_Li; + else if ( ! strcmp(np->child->string, "Sy")) + np->norm->Bf.font = FONT_Sy; + else + mandoc_msg(MANDOCERR_BF_BADFONT, np->child->line, + np->child->pos, "Bf %s", np->child->string); +} + +static void +post_fname(POST_ARGS) +{ + struct roff_node *n, *nch; + const char *cp; + size_t pos; + + n = mdoc->last; + nch = n->child; + cp = nch->string; + if (*cp == '(') { + if (cp[strlen(cp + 1)] == ')') + return; + pos = 0; + } else { + pos = strcspn(cp, "()"); + if (cp[pos] == '\0') { + if (n->sec == SEC_DESCRIPTION || + n->sec == SEC_CUSTOM) + tag_put(NULL, fn_prio++, n); + return; + } + } + mandoc_msg(MANDOCERR_FN_PAREN, nch->line, nch->pos + pos, "%s", cp); +} + +static void +post_fn(POST_ARGS) +{ + post_fname(mdoc); + post_fa(mdoc); +} + +static void +post_fo(POST_ARGS) +{ + const struct roff_node *n; + + n = mdoc->last; + + if (n->type != ROFFT_HEAD) + return; + + if (n->child == NULL) { + mandoc_msg(MANDOCERR_FO_NOHEAD, n->line, n->pos, "Fo"); + return; + } + if (n->child != n->last) { + mandoc_msg(MANDOCERR_ARG_EXCESS, + n->child->next->line, n->child->next->pos, + "Fo ... %s", n->child->next->string); + while (n->child != n->last) + roff_node_delete(mdoc, n->last); + } else + post_delim(mdoc); + + post_fname(mdoc); +} + +static void +post_fa(POST_ARGS) +{ + const struct roff_node *n; + const char *cp; + + for (n = mdoc->last->child; n != NULL; n = n->next) { + for (cp = n->string; *cp != '\0'; cp++) { + /* Ignore callbacks and alterations. */ + if (*cp == '(' || *cp == '{') + break; + if (*cp != ',') + continue; + mandoc_msg(MANDOCERR_FA_COMMA, n->line, + n->pos + (int)(cp - n->string), "%s", n->string); + break; + } + } + post_delim_nb(mdoc); +} + +static void +post_nm(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + + if (n->sec == SEC_NAME && n->child != NULL && + n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL) + mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1); + + if (n->last != NULL && n->last->tok == MDOC_Pp) + roff_node_relink(mdoc, n->last); + + if (mdoc->meta.name == NULL) + deroff(&mdoc->meta.name, n); + + if (mdoc->meta.name == NULL || + (mdoc->lastsec == SEC_NAME && n->child == NULL)) + mandoc_msg(MANDOCERR_NM_NONAME, n->line, n->pos, "Nm"); + + switch (n->type) { + case ROFFT_ELEM: + post_delim_nb(mdoc); + break; + case ROFFT_HEAD: + post_delim(mdoc); + break; + default: + return; + } + + if ((n->child != NULL && n->child->type == ROFFT_TEXT) || + mdoc->meta.name == NULL) + return; + + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; +} + +static void +post_nd(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + + if (n->type != ROFFT_BODY) + return; + + if (n->sec != SEC_NAME) + mandoc_msg(MANDOCERR_ND_LATE, n->line, n->pos, "Nd"); + + if (n->child == NULL) + mandoc_msg(MANDOCERR_ND_EMPTY, n->line, n->pos, "Nd"); + else + post_delim(mdoc); + + post_hyph(mdoc); +} + +static void +post_display(POST_ARGS) +{ + struct roff_node *n, *np; + + n = mdoc->last; + switch (n->type) { + case ROFFT_BODY: + if (n->end != ENDBODY_NOT) { + if (n->tok == MDOC_Bd && + n->body->parent->args == NULL) + roff_node_delete(mdoc, n); + } else if (n->child == NULL) + mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, + "%s", roff_name[n->tok]); + else if (n->tok == MDOC_D1) + post_hyph(mdoc); + break; + case ROFFT_BLOCK: + if (n->tok == MDOC_Bd) { + if (n->args == NULL) { + mandoc_msg(MANDOCERR_BD_NOARG, + n->line, n->pos, "Bd"); + mdoc->next = ROFF_NEXT_SIBLING; + while (n->body->child != NULL) + roff_node_relink(mdoc, + n->body->child); + roff_node_delete(mdoc, n); + break; + } + post_bd(mdoc); + post_prevpar(mdoc); + } + for (np = n->parent; np != NULL; np = np->parent) { + if (np->type == ROFFT_BLOCK && np->tok == MDOC_Bd) { + mandoc_msg(MANDOCERR_BD_NEST, n->line, + n->pos, "%s in Bd", roff_name[n->tok]); + break; + } + } + break; + default: + break; + } +} + +static void +post_defaults(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->child != NULL) { + post_delim_nb(mdoc); + return; + } + mdoc->next = ROFF_NEXT_CHILD; + switch (n->tok) { + case MDOC_Ar: + roff_word_alloc(mdoc, n->line, n->pos, "file"); + mdoc->last->flags |= NODE_NOSRC; + roff_word_alloc(mdoc, n->line, n->pos, "..."); + break; + case MDOC_Pa: + case MDOC_Mt: + roff_word_alloc(mdoc, n->line, n->pos, "~"); + break; + default: + abort(); + } + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; +} + +static void +post_at(POST_ARGS) +{ + struct roff_node *n, *nch; + const char *att; + + n = mdoc->last; + nch = n->child; + + /* + * If we have a child, look it up in the standard keys. If a + * key exist, use that instead of the child; if it doesn't, + * prefix "AT&T UNIX " to the existing data. + */ + + att = NULL; + if (nch != NULL && ((att = mdoc_a2att(nch->string)) == NULL)) + mandoc_msg(MANDOCERR_AT_BAD, + nch->line, nch->pos, "At %s", nch->string); + + mdoc->next = ROFF_NEXT_CHILD; + if (att != NULL) { + roff_word_alloc(mdoc, nch->line, nch->pos, att); + nch->flags |= NODE_NOPRT; + } else + roff_word_alloc(mdoc, n->line, n->pos, "AT&T UNIX"); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; +} + +static void +post_an(POST_ARGS) +{ + struct roff_node *np, *nch; + + post_an_norm(mdoc); + + np = mdoc->last; + nch = np->child; + if (np->norm->An.auth == AUTH__NONE) { + if (nch == NULL) + mandoc_msg(MANDOCERR_MACRO_EMPTY, + np->line, np->pos, "An"); + else + post_delim_nb(mdoc); + } else if (nch != NULL) + mandoc_msg(MANDOCERR_ARG_EXCESS, + nch->line, nch->pos, "An ... %s", nch->string); +} + +static void +post_em(POST_ARGS) +{ + post_tag(mdoc); + tag_put(NULL, TAG_FALLBACK, mdoc->last); +} + +static void +post_en(POST_ARGS) +{ + post_obsolete(mdoc); + if (mdoc->last->type == ROFFT_BLOCK) + mdoc->last->norm->Es = mdoc->last_es; +} + +static void +post_er(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->sec == SEC_ERRORS && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Bq && + n->parent->parent->parent->tok == MDOC_It))) + tag_put(NULL, TAG_STRONG, n); + post_delim_nb(mdoc); +} + +static void +post_tag(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if ((n->prev == NULL || + (n->prev->type == ROFFT_TEXT && + strcmp(n->prev->string, "|") == 0)) && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Xo && + n->parent->parent->prev == NULL && + n->parent->parent->parent->tok == MDOC_It))) + tag_put(NULL, TAG_STRONG, n); + post_delim_nb(mdoc); +} + +static void +post_es(POST_ARGS) +{ + post_obsolete(mdoc); + mdoc->last_es = mdoc->last; +} + +static void +post_fl(POST_ARGS) +{ + struct roff_node *n; + char *cp; + + /* + * Transform ".Fl Fl long" to ".Fl \-long", + * resulting for example in better HTML output. + */ + + n = mdoc->last; + if (n->prev != NULL && n->prev->tok == MDOC_Fl && + n->prev->child == NULL && n->child != NULL && + (n->flags & NODE_LINE) == 0) { + mandoc_asprintf(&cp, "\\-%s", n->child->string); + free(n->child->string); + n->child->string = cp; + roff_node_delete(mdoc, n->prev); + } + post_tag(mdoc); +} + +static void +post_xx(POST_ARGS) +{ + struct roff_node *n; + const char *os; + char *v; + + post_delim_nb(mdoc); + + n = mdoc->last; + switch (n->tok) { + case MDOC_Bsx: + os = "BSD/OS"; + break; + case MDOC_Dx: + os = "DragonFly"; + break; + case MDOC_Fx: + os = "FreeBSD"; + break; + case MDOC_Nx: + os = "NetBSD"; + if (n->child == NULL) + break; + v = n->child->string; + if ((v[0] != '0' && v[0] != '1') || v[1] != '.' || + v[2] < '0' || v[2] > '9' || + v[3] < 'a' || v[3] > 'z' || v[4] != '\0') + break; + n->child->flags |= NODE_NOPRT; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->child->line, n->child->pos, v); + v = mdoc->last->string; + v[3] = toupper((unsigned char)v[3]); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; + break; + case MDOC_Ox: + os = "OpenBSD"; + break; + case MDOC_Ux: + os = "UNIX"; + break; + default: + abort(); + } + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, os); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; +} + +static void +post_it(POST_ARGS) +{ + struct roff_node *nbl, *nit, *nch; + int i, cols; + enum mdoc_list lt; + + post_prevpar(mdoc); + + nit = mdoc->last; + if (nit->type != ROFFT_BLOCK) + return; + + nbl = nit->parent->parent; + lt = nbl->norm->Bl.type; + + switch (lt) { + case LIST_tag: + case LIST_hang: + case LIST_ohang: + case LIST_inset: + case LIST_diag: + if (nit->head->child == NULL) + mandoc_msg(MANDOCERR_IT_NOHEAD, + nit->line, nit->pos, "Bl -%s It", + mdoc_argnames[nbl->args->argv[0].arg]); + break; + case LIST_bullet: + case LIST_dash: + case LIST_enum: + case LIST_hyphen: + if (nit->body == NULL || nit->body->child == NULL) + mandoc_msg(MANDOCERR_IT_NOBODY, + nit->line, nit->pos, "Bl -%s It", + mdoc_argnames[nbl->args->argv[0].arg]); + /* FALLTHROUGH */ + case LIST_item: + if ((nch = nit->head->child) != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, + nit->line, nit->pos, "It %s", + nch->type == ROFFT_TEXT ? nch->string : + roff_name[nch->tok]); + break; + case LIST_column: + cols = (int)nbl->norm->Bl.ncols; + + assert(nit->head->child == NULL); + + if (nit->head->next->child == NULL && + nit->head->next->next == NULL) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, + nit->line, nit->pos, "It"); + roff_node_delete(mdoc, nit); + break; + } + + i = 0; + for (nch = nit->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_BODY) + continue; + if (i++ && nch->flags & NODE_LINE) + mandoc_msg(MANDOCERR_TA_LINE, + nch->line, nch->pos, "Ta"); + } + if (i < cols || i > cols + 1) + mandoc_msg(MANDOCERR_BL_COL, nit->line, nit->pos, + "%d columns, %d cells", cols, i); + else if (nit->head->next->child != NULL && + nit->head->next->child->flags & NODE_LINE) + mandoc_msg(MANDOCERR_IT_NOARG, + nit->line, nit->pos, "Bl -column It"); + break; + default: + abort(); + } +} + +static void +post_bl_block(POST_ARGS) +{ + struct roff_node *n, *ni, *nc; + + post_prevpar(mdoc); + + n = mdoc->last; + for (ni = n->body->child; ni != NULL; ni = ni->next) { + if (ni->body == NULL) + continue; + nc = ni->body->last; + while (nc != NULL) { + switch (nc->tok) { + case MDOC_Pp: + case ROFF_br: + break; + default: + nc = NULL; + continue; + } + if (ni->next == NULL) { + mandoc_msg(MANDOCERR_PAR_MOVE, nc->line, + nc->pos, "%s", roff_name[nc->tok]); + roff_node_relink(mdoc, nc); + } else if (n->norm->Bl.comp == 0 && + n->norm->Bl.type != LIST_column) { + mandoc_msg(MANDOCERR_PAR_SKIP, + nc->line, nc->pos, + "%s before It", roff_name[nc->tok]); + roff_node_delete(mdoc, nc); + } else + break; + nc = ni->body->last; + } + } +} + +/* + * If the argument of -offset or -width is a macro, + * replace it with the associated default width. + */ +static void +rewrite_macro2len(struct roff_man *mdoc, char **arg) +{ + size_t width; + enum roff_tok tok; + + if (*arg == NULL) + return; + else if ( ! strcmp(*arg, "Ds")) + width = 6; + else if ((tok = roffhash_find(mdoc->mdocmac, *arg, 0)) == TOKEN_NONE) + return; + else + width = macro2len(tok); + + free(*arg); + mandoc_asprintf(arg, "%zun", width); +} + +static void +post_bl_head(POST_ARGS) +{ + struct roff_node *nbl, *nh, *nch, *nnext; + struct mdoc_argv *argv; + int i, j; + + post_bl_norm(mdoc); + + nh = mdoc->last; + if (nh->norm->Bl.type != LIST_column) { + if ((nch = nh->child) == NULL) + return; + mandoc_msg(MANDOCERR_ARG_EXCESS, + nch->line, nch->pos, "Bl ... %s", nch->string); + while (nch != NULL) { + roff_node_delete(mdoc, nch); + nch = nh->child; + } + return; + } + + /* + * Append old-style lists, where the column width specifiers + * trail as macro parameters, to the new-style ("normal-form") + * lists where they're argument values following -column. + */ + + if (nh->child == NULL) + return; + + nbl = nh->parent; + for (j = 0; j < (int)nbl->args->argc; j++) + if (nbl->args->argv[j].arg == MDOC_Column) + break; + + assert(j < (int)nbl->args->argc); + + /* + * Accommodate for new-style groff column syntax. Shuffle the + * child nodes, all of which must be TEXT, as arguments for the + * column field. Then, delete the head children. + */ + + argv = nbl->args->argv + j; + i = argv->sz; + for (nch = nh->child; nch != NULL; nch = nch->next) + argv->sz++; + argv->value = mandoc_reallocarray(argv->value, + argv->sz, sizeof(char *)); + + nh->norm->Bl.ncols = argv->sz; + nh->norm->Bl.cols = (void *)argv->value; + + for (nch = nh->child; nch != NULL; nch = nnext) { + argv->value[i++] = nch->string; + nch->string = NULL; + nnext = nch->next; + roff_node_delete(NULL, nch); + } + nh->child = NULL; +} + +static void +post_bl(POST_ARGS) +{ + struct roff_node *nbody; /* of the Bl */ + struct roff_node *nchild, *nnext; /* of the Bl body */ + const char *prev_Er; + int order; + + nbody = mdoc->last; + switch (nbody->type) { + case ROFFT_BLOCK: + post_bl_block(mdoc); + return; + case ROFFT_HEAD: + post_bl_head(mdoc); + return; + case ROFFT_BODY: + break; + default: + return; + } + if (nbody->end != ENDBODY_NOT) + return; + + /* + * Up to the first item, move nodes before the list, + * but leave transparent nodes where they are + * if they precede an item. + * The next non-transparent node is kept in nchild. + * It only needs to be updated after a non-transparent + * node was moved out, and at the very beginning + * when no node at all was moved yet. + */ + + nchild = mdoc->last; + for (;;) { + if (nchild == mdoc->last) + nchild = roff_node_child(nbody); + if (nchild == NULL) { + mdoc->last = nbody; + mandoc_msg(MANDOCERR_BLK_EMPTY, + nbody->line, nbody->pos, "Bl"); + return; + } + if (nchild->tok == MDOC_It) { + mdoc->last = nbody; + break; + } + mandoc_msg(MANDOCERR_BL_MOVE, nbody->child->line, + nbody->child->pos, "%s", roff_name[nbody->child->tok]); + if (nbody->parent->prev == NULL) { + mdoc->last = nbody->parent->parent; + mdoc->next = ROFF_NEXT_CHILD; + } else { + mdoc->last = nbody->parent->prev; + mdoc->next = ROFF_NEXT_SIBLING; + } + roff_node_relink(mdoc, nbody->child); + } + + /* + * We have reached the first item, + * so moving nodes out is no longer possible. + * But in .Bl -column, the first rows may be implicit, + * that is, they may not start with .It macros. + * Such rows may be followed by nodes generated on the + * roff level, for example .TS. + * Wrap such roff nodes into an implicit row. + */ + + while (nchild != NULL) { + if (nchild->tok == MDOC_It) { + nchild = roff_node_next(nchild); + continue; + } + nnext = nchild->next; + mdoc->last = nchild->prev; + mdoc->next = ROFF_NEXT_SIBLING; + roff_block_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); + roff_head_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); + mdoc->next = ROFF_NEXT_SIBLING; + roff_body_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); + while (nchild->tok != MDOC_It) { + roff_node_relink(mdoc, nchild); + if (nnext == NULL) + break; + nchild = nnext; + nnext = nchild->next; + mdoc->next = ROFF_NEXT_SIBLING; + } + mdoc->last = nbody; + } + + if (mdoc->meta.os_e != MANDOC_OS_NETBSD) + return; + + prev_Er = NULL; + for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) { + if (nchild->tok != MDOC_It) + continue; + if ((nnext = nchild->head->child) == NULL) + continue; + if (nnext->type == ROFFT_BLOCK) + nnext = nnext->body->child; + if (nnext == NULL || nnext->tok != MDOC_Er) + continue; + nnext = nnext->child; + if (prev_Er != NULL) { + order = strcmp(prev_Er, nnext->string); + if (order > 0) + mandoc_msg(MANDOCERR_ER_ORDER, + nnext->line, nnext->pos, + "Er %s %s (NetBSD)", + prev_Er, nnext->string); + else if (order == 0) + mandoc_msg(MANDOCERR_ER_REP, + nnext->line, nnext->pos, + "Er %s (NetBSD)", prev_Er); + } + prev_Er = nnext->string; + } +} + +static void +post_bk(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + + if (n->type == ROFFT_BLOCK && n->body->child == NULL) { + mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, "Bk"); + roff_node_delete(mdoc, n); + } +} + +static void +post_sm(POST_ARGS) +{ + struct roff_node *nch; + + nch = mdoc->last->child; + + if (nch == NULL) { + mdoc->flags ^= MDOC_SMOFF; + return; + } + + assert(nch->type == ROFFT_TEXT); + + if ( ! strcmp(nch->string, "on")) { + mdoc->flags &= ~MDOC_SMOFF; + return; + } + if ( ! strcmp(nch->string, "off")) { + mdoc->flags |= MDOC_SMOFF; + return; + } + + mandoc_msg(MANDOCERR_SM_BAD, nch->line, nch->pos, + "%s %s", roff_name[mdoc->last->tok], nch->string); + roff_node_relink(mdoc, nch); + return; +} + +static void +post_root(POST_ARGS) +{ + struct roff_node *n; + + /* Add missing prologue data. */ + + if (mdoc->meta.date == NULL) + mdoc->meta.date = mandoc_normdate(NULL, NULL); + + if (mdoc->meta.title == NULL) { + mandoc_msg(MANDOCERR_DT_NOTITLE, 0, 0, "EOF"); + mdoc->meta.title = mandoc_strdup("UNTITLED"); + } + + if (mdoc->meta.vol == NULL) + mdoc->meta.vol = mandoc_strdup("LOCAL"); + + if (mdoc->meta.os == NULL) { + mandoc_msg(MANDOCERR_OS_MISSING, 0, 0, NULL); + mdoc->meta.os = mandoc_strdup(""); + } else if (mdoc->meta.os_e && + (mdoc->meta.rcsids & (1 << mdoc->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + + if (mdoc->meta.arch != NULL && + arch_valid(mdoc->meta.arch, mdoc->meta.os_e) == 0) { + n = mdoc->meta.first->child; + while (n->tok != MDOC_Dt || + n->child == NULL || + n->child->next == NULL || + n->child->next->next == NULL) + n = n->next; + n = n->child->next->next; + mandoc_msg(MANDOCERR_ARCH_BAD, n->line, n->pos, + "Dt ... %s %s", mdoc->meta.arch, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + } + + /* Check that we begin with a proper `Sh'. */ + + n = mdoc->meta.first->child; + while (n != NULL && + (n->type == ROFFT_COMMENT || + (n->tok >= MDOC_Dd && + mdoc_macro(n->tok)->flags & MDOC_PROLOGUE))) + n = n->next; + + if (n == NULL) + mandoc_msg(MANDOCERR_DOC_EMPTY, 0, 0, NULL); + else if (n->tok != MDOC_Sh) + mandoc_msg(MANDOCERR_SEC_BEFORE, n->line, n->pos, + "%s", roff_name[n->tok]); +} + +static void +post_rs(POST_ARGS) +{ + struct roff_node *np, *nch, *next, *prev; + int i, j; + + np = mdoc->last; + + if (np->type != ROFFT_BODY) + return; + + if (np->child == NULL) { + mandoc_msg(MANDOCERR_RS_EMPTY, np->line, np->pos, "Rs"); + return; + } + + /* + * The full `Rs' block needs special handling to order the + * sub-elements according to `rsord'. Pick through each element + * and correctly order it. This is an insertion sort. + */ + + next = NULL; + for (nch = np->child->next; nch != NULL; nch = next) { + /* Determine order number of this child. */ + for (i = 0; i < RSORD_MAX; i++) + if (rsord[i] == nch->tok) + break; + + if (i == RSORD_MAX) { + mandoc_msg(MANDOCERR_RS_BAD, nch->line, nch->pos, + "%s", roff_name[nch->tok]); + i = -1; + } else if (nch->tok == MDOC__J || nch->tok == MDOC__B) + np->norm->Rs.quote_T++; + + /* + * Remove this child from the chain. This somewhat + * repeats roff_node_unlink(), but since we're + * just re-ordering, there's no need for the + * full unlink process. + */ + + if ((next = nch->next) != NULL) + next->prev = nch->prev; + + if ((prev = nch->prev) != NULL) + prev->next = nch->next; + + nch->prev = nch->next = NULL; + + /* + * Scan back until we reach a node that's + * to be ordered before this child. + */ + + for ( ; prev ; prev = prev->prev) { + /* Determine order of `prev'. */ + for (j = 0; j < RSORD_MAX; j++) + if (rsord[j] == prev->tok) + break; + if (j == RSORD_MAX) + j = -1; + + if (j <= i) + break; + } + + /* + * Set this child back into its correct place + * in front of the `prev' node. + */ + + nch->prev = prev; + + if (prev == NULL) { + np->child->prev = nch; + nch->next = np->child; + np->child = nch; + } else { + if (prev->next) + prev->next->prev = nch; + nch->next = prev->next; + prev->next = nch; + } + } +} + +/* + * For some arguments of some macros, + * convert all breakable hyphens into ASCII_HYPH. + */ +static void +post_hyph(POST_ARGS) +{ + struct roff_node *n, *nch; + char *cp; + + n = mdoc->last; + for (nch = n->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_TEXT) + continue; + cp = nch->string; + if (*cp == '\0') + continue; + while (*(++cp) != '\0') + if (*cp == '-' && + isalpha((unsigned char)cp[-1]) && + isalpha((unsigned char)cp[1])) { + if (n->tag == NULL && n->flags & NODE_ID) + n->tag = mandoc_strdup(nch->string); + *cp = ASCII_HYPH; + } + } +} + +static void +post_ns(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->flags & NODE_LINE || + (n->next != NULL && n->next->flags & NODE_DELIMC)) + mandoc_msg(MANDOCERR_NS_SKIP, n->line, n->pos, NULL); +} + +static void +post_sx(POST_ARGS) +{ + post_delim(mdoc); + post_hyph(mdoc); +} + +static void +post_sh(POST_ARGS) +{ + post_section(mdoc); + + switch (mdoc->last->type) { + case ROFFT_HEAD: + post_sh_head(mdoc); + break; + case ROFFT_BODY: + switch (mdoc->lastsec) { + case SEC_NAME: + post_sh_name(mdoc); + break; + case SEC_SEE_ALSO: + post_sh_see_also(mdoc); + break; + case SEC_AUTHORS: + post_sh_authors(mdoc); + break; + default: + break; + } + break; + default: + break; + } +} + +static void +post_sh_name(POST_ARGS) +{ + struct roff_node *n; + int hasnm, hasnd; + + hasnm = hasnd = 0; + + for (n = mdoc->last->child; n != NULL; n = n->next) { + switch (n->tok) { + case MDOC_Nm: + if (hasnm && n->child != NULL) + mandoc_msg(MANDOCERR_NAMESEC_PUNCT, + n->line, n->pos, + "Nm %s", n->child->string); + hasnm = 1; + continue; + case MDOC_Nd: + hasnd = 1; + if (n->next != NULL) + mandoc_msg(MANDOCERR_NAMESEC_ND, + n->line, n->pos, NULL); + break; + case TOKEN_NONE: + if (n->type == ROFFT_TEXT && + n->string[0] == ',' && n->string[1] == '\0' && + n->next != NULL && n->next->tok == MDOC_Nm) { + n = n->next; + continue; + } + /* FALLTHROUGH */ + default: + mandoc_msg(MANDOCERR_NAMESEC_BAD, + n->line, n->pos, "%s", roff_name[n->tok]); + continue; + } + break; + } + + if ( ! hasnm) + mandoc_msg(MANDOCERR_NAMESEC_NONM, + mdoc->last->line, mdoc->last->pos, NULL); + if ( ! hasnd) + mandoc_msg(MANDOCERR_NAMESEC_NOND, + mdoc->last->line, mdoc->last->pos, NULL); +} + +static void +post_sh_see_also(POST_ARGS) +{ + const struct roff_node *n; + const char *name, *sec; + const char *lastname, *lastsec, *lastpunct; + int cmp; + + n = mdoc->last->child; + lastname = lastsec = lastpunct = NULL; + while (n != NULL) { + if (n->tok != MDOC_Xr || + n->child == NULL || + n->child->next == NULL) + break; + + /* Process one .Xr node. */ + + name = n->child->string; + sec = n->child->next->string; + if (lastsec != NULL) { + if (lastpunct[0] != ',' || lastpunct[1] != '\0') + mandoc_msg(MANDOCERR_XR_PUNCT, n->line, + n->pos, "%s before %s(%s)", + lastpunct, name, sec); + cmp = strcmp(lastsec, sec); + if (cmp > 0) + mandoc_msg(MANDOCERR_XR_ORDER, n->line, + n->pos, "%s(%s) after %s(%s)", + name, sec, lastname, lastsec); + else if (cmp == 0 && + strcasecmp(lastname, name) > 0) + mandoc_msg(MANDOCERR_XR_ORDER, n->line, + n->pos, "%s after %s", name, lastname); + } + lastname = name; + lastsec = sec; + + /* Process the following node. */ + + n = n->next; + if (n == NULL) + break; + if (n->tok == MDOC_Xr) { + lastpunct = "none"; + continue; + } + if (n->type != ROFFT_TEXT) + break; + for (name = n->string; *name != '\0'; name++) + if (isalpha((const unsigned char)*name)) + return; + lastpunct = n->string; + if (n->next == NULL || n->next->tok == MDOC_Rs) + mandoc_msg(MANDOCERR_XR_PUNCT, n->line, + n->pos, "%s after %s(%s)", + lastpunct, lastname, lastsec); + n = n->next; + } +} + +static int +child_an(const struct roff_node *n) +{ + + for (n = n->child; n != NULL; n = n->next) + if ((n->tok == MDOC_An && n->child != NULL) || child_an(n)) + return 1; + return 0; +} + +static void +post_sh_authors(POST_ARGS) +{ + + if ( ! child_an(mdoc->last)) + mandoc_msg(MANDOCERR_AN_MISSING, + mdoc->last->line, mdoc->last->pos, NULL); +} + +/* + * Return an upper bound for the string distance (allowing + * transpositions). Not a full Levenshtein implementation + * because Levenshtein is quadratic in the string length + * and this function is called for every standard name, + * so the check for each custom name would be cubic. + * The following crude heuristics is linear, resulting + * in quadratic behaviour for checking one custom name, + * which does not cause measurable slowdown. + */ +static int +similar(const char *s1, const char *s2) +{ + const int maxdist = 3; + int dist = 0; + + while (s1[0] != '\0' && s2[0] != '\0') { + if (s1[0] == s2[0]) { + s1++; + s2++; + continue; + } + if (++dist > maxdist) + return INT_MAX; + if (s1[1] == s2[1]) { /* replacement */ + s1++; + s2++; + } else if (s1[0] == s2[1] && s1[1] == s2[0]) { + s1 += 2; /* transposition */ + s2 += 2; + } else if (s1[0] == s2[1]) /* insertion */ + s2++; + else if (s1[1] == s2[0]) /* deletion */ + s1++; + else + return INT_MAX; + } + dist += strlen(s1) + strlen(s2); + return dist > maxdist ? INT_MAX : dist; +} + +static void +post_sh_head(POST_ARGS) +{ + struct roff_node *nch; + const char *goodsec; + const char *const *testsec; + int dist, mindist; + enum roff_sec sec; + + /* + * Process a new section. Sections are either "named" or + * "custom". Custom sections are user-defined, while named ones + * follow a conventional order and may only appear in certain + * manual sections. + */ + + sec = mdoc->last->sec; + + /* The NAME should be first. */ + + if (sec != SEC_NAME && mdoc->lastnamed == SEC_NONE) + mandoc_msg(MANDOCERR_NAMESEC_FIRST, + mdoc->last->line, mdoc->last->pos, "Sh %s", + sec != SEC_CUSTOM ? secnames[sec] : + (nch = mdoc->last->child) == NULL ? "" : + nch->type == ROFFT_TEXT ? nch->string : + roff_name[nch->tok]); + + /* The SYNOPSIS gets special attention in other areas. */ + + if (sec == SEC_SYNOPSIS) { + roff_setreg(mdoc->roff, "nS", 1, '='); + mdoc->flags |= MDOC_SYNOPSIS; + } else { + roff_setreg(mdoc->roff, "nS", 0, '='); + mdoc->flags &= ~MDOC_SYNOPSIS; + } + if (sec == SEC_DESCRIPTION) + fn_prio = TAG_STRONG; + + /* Mark our last section. */ + + mdoc->lastsec = sec; + + /* We don't care about custom sections after this. */ + + if (sec == SEC_CUSTOM) { + if ((nch = mdoc->last->child) == NULL || + nch->type != ROFFT_TEXT || nch->next != NULL) + return; + goodsec = NULL; + mindist = INT_MAX; + for (testsec = secnames + 1; *testsec != NULL; testsec++) { + dist = similar(nch->string, *testsec); + if (dist < mindist) { + goodsec = *testsec; + mindist = dist; + } + } + if (goodsec != NULL) + mandoc_msg(MANDOCERR_SEC_TYPO, nch->line, nch->pos, + "Sh %s instead of %s", nch->string, goodsec); + return; + } + + /* + * Check whether our non-custom section is being repeated or is + * out of order. + */ + + if (sec == mdoc->lastnamed) + mandoc_msg(MANDOCERR_SEC_REP, mdoc->last->line, + mdoc->last->pos, "Sh %s", secnames[sec]); + + if (sec < mdoc->lastnamed) + mandoc_msg(MANDOCERR_SEC_ORDER, mdoc->last->line, + mdoc->last->pos, "Sh %s", secnames[sec]); + + /* Mark the last named section. */ + + mdoc->lastnamed = sec; + + /* Check particular section/manual conventions. */ + + if (mdoc->meta.msec == NULL) + return; + + goodsec = NULL; + switch (sec) { + case SEC_ERRORS: + if (*mdoc->meta.msec == '4') + break; + goodsec = "2, 3, 4, 9"; + /* FALLTHROUGH */ + case SEC_RETURN_VALUES: + case SEC_LIBRARY: + if (*mdoc->meta.msec == '2') + break; + if (*mdoc->meta.msec == '3') + break; + if (NULL == goodsec) + goodsec = "2, 3, 9"; + /* FALLTHROUGH */ + case SEC_CONTEXT: + if (*mdoc->meta.msec == '9') + break; + if (NULL == goodsec) + goodsec = "9"; + mandoc_msg(MANDOCERR_SEC_MSEC, + mdoc->last->line, mdoc->last->pos, + "Sh %s for %s only", secnames[sec], goodsec); + break; + default: + break; + } +} + +static void +post_xr(POST_ARGS) +{ + struct roff_node *n, *nch; + + n = mdoc->last; + nch = n->child; + if (nch->next == NULL) { + mandoc_msg(MANDOCERR_XR_NOSEC, + n->line, n->pos, "Xr %s", nch->string); + } else { + assert(nch->next == n->last); + if(mandoc_xr_add(nch->next->string, nch->string, + nch->line, nch->pos)) + mandoc_msg(MANDOCERR_XR_SELF, + nch->line, nch->pos, "Xr %s %s", + nch->string, nch->next->string); + } + post_delim_nb(mdoc); +} + +static void +post_section(POST_ARGS) +{ + struct roff_node *n, *nch; + char *cp, *tag; + + n = mdoc->last; + switch (n->type) { + case ROFFT_BLOCK: + post_prevpar(mdoc); + return; + case ROFFT_HEAD: + tag = NULL; + deroff(&tag, n); + if (tag != NULL) { + for (cp = tag; *cp != '\0'; cp++) + if (*cp == ' ') + *cp = '_'; + if ((nch = n->child) != NULL && + nch->type == ROFFT_TEXT && + strcmp(nch->string, tag) == 0) + tag_put(NULL, TAG_WEAK, n); + else + tag_put(tag, TAG_FALLBACK, n); + free(tag); + } + post_delim(mdoc); + post_hyph(mdoc); + return; + case ROFFT_BODY: + break; + default: + return; + } + if ((nch = n->child) != NULL && + (nch->tok == MDOC_Pp || nch->tok == ROFF_br || + nch->tok == ROFF_sp)) { + mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos, + "%s after %s", roff_name[nch->tok], + roff_name[n->tok]); + roff_node_delete(mdoc, nch); + } + if ((nch = n->last) != NULL && + (nch->tok == MDOC_Pp || nch->tok == ROFF_br)) { + mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos, + "%s at the end of %s", roff_name[nch->tok], + roff_name[n->tok]); + roff_node_delete(mdoc, nch); + } +} + +static void +post_prevpar(POST_ARGS) +{ + struct roff_node *n, *np; + + n = mdoc->last; + if (n->type != ROFFT_ELEM && n->type != ROFFT_BLOCK) + return; + if ((np = roff_node_prev(n)) == NULL) + return; + + /* + * Don't allow `Pp' prior to a paragraph-type + * block: `Pp' or non-compact `Bd' or `Bl'. + */ + + if (np->tok != MDOC_Pp && np->tok != ROFF_br) + return; + if (n->tok == MDOC_Bl && n->norm->Bl.comp) + return; + if (n->tok == MDOC_Bd && n->norm->Bd.comp) + return; + if (n->tok == MDOC_It && n->parent->norm->Bl.comp) + return; + + mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos, + "%s before %s", roff_name[np->tok], roff_name[n->tok]); + roff_node_delete(mdoc, np); +} + +static void +post_par(POST_ARGS) +{ + struct roff_node *np; + + fn_prio = TAG_STRONG; + post_prevpar(mdoc); + + np = mdoc->last; + if (np->child != NULL) + mandoc_msg(MANDOCERR_ARG_SKIP, np->line, np->pos, + "%s %s", roff_name[np->tok], np->child->string); +} + +static void +post_dd(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + n->flags |= NODE_NOPRT; + + if (mdoc->meta.date != NULL) { + mandoc_msg(MANDOCERR_PROLOG_REP, n->line, n->pos, "Dd"); + free(mdoc->meta.date); + } else if (mdoc->flags & MDOC_PBODY) + mandoc_msg(MANDOCERR_PROLOG_LATE, n->line, n->pos, "Dd"); + else if (mdoc->meta.title != NULL) + mandoc_msg(MANDOCERR_PROLOG_ORDER, + n->line, n->pos, "Dd after Dt"); + else if (mdoc->meta.os != NULL) + mandoc_msg(MANDOCERR_PROLOG_ORDER, + n->line, n->pos, "Dd after Os"); + + if (mdoc->quick && n != NULL) + mdoc->meta.date = mandoc_strdup(""); + else + mdoc->meta.date = mandoc_normdate(n->child, n); +} + +static void +post_dt(POST_ARGS) +{ + struct roff_node *nn, *n; + const char *cp; + char *p; + + n = mdoc->last; + n->flags |= NODE_NOPRT; + + if (mdoc->flags & MDOC_PBODY) { + mandoc_msg(MANDOCERR_DT_LATE, n->line, n->pos, "Dt"); + return; + } + + if (mdoc->meta.title != NULL) + mandoc_msg(MANDOCERR_PROLOG_REP, n->line, n->pos, "Dt"); + else if (mdoc->meta.os != NULL) + mandoc_msg(MANDOCERR_PROLOG_ORDER, + n->line, n->pos, "Dt after Os"); + + free(mdoc->meta.title); + free(mdoc->meta.msec); + free(mdoc->meta.vol); + free(mdoc->meta.arch); + + mdoc->meta.title = NULL; + mdoc->meta.msec = NULL; + mdoc->meta.vol = NULL; + mdoc->meta.arch = NULL; + + /* Mandatory first argument: title. */ + + nn = n->child; + if (nn == NULL || *nn->string == '\0') { + mandoc_msg(MANDOCERR_DT_NOTITLE, n->line, n->pos, "Dt"); + mdoc->meta.title = mandoc_strdup("UNTITLED"); + } else { + mdoc->meta.title = mandoc_strdup(nn->string); + + /* Check that all characters are uppercase. */ + + for (p = nn->string; *p != '\0'; p++) + if (islower((unsigned char)*p)) { + mandoc_msg(MANDOCERR_TITLE_CASE, nn->line, + nn->pos + (int)(p - nn->string), + "Dt %s", nn->string); + break; + } + } + + /* Mandatory second argument: section. */ + + if (nn != NULL) + nn = nn->next; + + if (nn == NULL) { + mandoc_msg(MANDOCERR_MSEC_MISSING, n->line, n->pos, + "Dt %s", mdoc->meta.title); + mdoc->meta.vol = mandoc_strdup("LOCAL"); + return; /* msec and arch remain NULL. */ + } + + mdoc->meta.msec = mandoc_strdup(nn->string); + + /* Infer volume title from section number. */ + + cp = mandoc_a2msec(nn->string); + if (cp == NULL) { + mandoc_msg(MANDOCERR_MSEC_BAD, + nn->line, nn->pos, "Dt ... %s", nn->string); + mdoc->meta.vol = mandoc_strdup(nn->string); + } else { + mdoc->meta.vol = mandoc_strdup(cp); + if (mdoc->filesec != '\0' && + mdoc->filesec != *nn->string && + *nn->string >= '1' && *nn->string <= '9') + mandoc_msg(MANDOCERR_MSEC_FILE, nn->line, nn->pos, + "*.%c vs Dt ... %c", mdoc->filesec, *nn->string); + } + + /* Optional third argument: architecture. */ + + if ((nn = nn->next) == NULL) + return; + + for (p = nn->string; *p != '\0'; p++) + *p = tolower((unsigned char)*p); + mdoc->meta.arch = mandoc_strdup(nn->string); + + /* Ignore fourth and later arguments. */ + + if ((nn = nn->next) != NULL) + mandoc_msg(MANDOCERR_ARG_EXCESS, + nn->line, nn->pos, "Dt ... %s", nn->string); +} + +static void +post_bx(POST_ARGS) +{ + struct roff_node *n, *nch; + const char *macro; + + post_delim_nb(mdoc); + + n = mdoc->last; + nch = n->child; + + if (nch != NULL) { + macro = !strcmp(nch->string, "Open") ? "Ox" : + !strcmp(nch->string, "Net") ? "Nx" : + !strcmp(nch->string, "Free") ? "Fx" : + !strcmp(nch->string, "DragonFly") ? "Dx" : NULL; + if (macro != NULL) + mandoc_msg(MANDOCERR_BX, + n->line, n->pos, "%s", macro); + mdoc->last = nch; + nch = nch->next; + mdoc->next = ROFF_NEXT_SIBLING; + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Ns); + mdoc->last->flags |= NODE_NOSRC; + mdoc->next = ROFF_NEXT_SIBLING; + } else + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, "BSD"); + mdoc->last->flags |= NODE_NOSRC; + + if (nch == NULL) { + mdoc->last = n; + return; + } + + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Ns); + mdoc->last->flags |= NODE_NOSRC; + mdoc->next = ROFF_NEXT_SIBLING; + roff_word_alloc(mdoc, n->line, n->pos, "-"); + mdoc->last->flags |= NODE_NOSRC; + roff_elem_alloc(mdoc, n->line, n->pos, MDOC_Ns); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; + + /* + * Make `Bx's second argument always start with an uppercase + * letter. Groff checks if it's an "accepted" term, but we just + * uppercase blindly. + */ + + *nch->string = (char)toupper((unsigned char)*nch->string); +} + +static void +post_os(POST_ARGS) +{ +#ifndef OSNAME + struct utsname utsname; + static char *defbuf; +#endif + struct roff_node *n; + + n = mdoc->last; + n->flags |= NODE_NOPRT; + + if (mdoc->meta.os != NULL) + mandoc_msg(MANDOCERR_PROLOG_REP, n->line, n->pos, "Os"); + else if (mdoc->flags & MDOC_PBODY) + mandoc_msg(MANDOCERR_PROLOG_LATE, n->line, n->pos, "Os"); + + post_delim(mdoc); + + /* + * Set the operating system by way of the `Os' macro. + * The order of precedence is: + * 1. the argument of the `Os' macro, unless empty + * 2. the -Ios=foo command line argument, if provided + * 3. -DOSNAME="\"foo\"", if provided during compilation + * 4. "sysname release" from uname(3) + */ + + free(mdoc->meta.os); + mdoc->meta.os = NULL; + deroff(&mdoc->meta.os, n); + if (mdoc->meta.os) + goto out; + + if (mdoc->os_s != NULL) { + mdoc->meta.os = mandoc_strdup(mdoc->os_s); + goto out; + } + +#ifdef OSNAME + mdoc->meta.os = mandoc_strdup(OSNAME); +#else /*!OSNAME */ + if (defbuf == NULL) { + if (uname(&utsname) == -1) { + mandoc_msg(MANDOCERR_OS_UNAME, n->line, n->pos, "Os"); + defbuf = mandoc_strdup("UNKNOWN"); + } else + mandoc_asprintf(&defbuf, "%s %s", + utsname.sysname, utsname.release); + } + mdoc->meta.os = mandoc_strdup(defbuf); +#endif /*!OSNAME*/ + +out: + if (mdoc->meta.os_e == MANDOC_OS_OTHER) { + if (strstr(mdoc->meta.os, "OpenBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(mdoc->meta.os, "NetBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_NETBSD; + } + + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ + + if (n->child != NULL) + mandoc_msg(MANDOCERR_OS_ARG, n->child->line, n->child->pos, + "Os %s (%s)", n->child->string, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "OpenBSD" : "NetBSD"); + + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) + return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MANDOC_OS_OPENBSD) + mandoc_msg(MANDOCERR_MDOCDATE_MISSING, n->line, + n->pos, "Dd %s (OpenBSD)", n->string); + } else { + if (mdoc->meta.os_e == MANDOC_OS_NETBSD) + mandoc_msg(MANDOCERR_MDOCDATE, n->line, + n->pos, "Dd %s (NetBSD)", n->string); + } +} + +enum roff_sec +mdoc_a2sec(const char *p) +{ + int i; + + for (i = 0; i < (int)SEC__MAX; i++) + if (secnames[i] && 0 == strcmp(p, secnames[i])) + return (enum roff_sec)i; + + return SEC_CUSTOM; +} + +static size_t +macro2len(enum roff_tok macro) +{ + + switch (macro) { + case MDOC_Ad: + return 12; + case MDOC_Ao: + return 12; + case MDOC_An: + return 12; + case MDOC_Aq: + return 12; + case MDOC_Ar: + return 12; + case MDOC_Bo: + return 12; + case MDOC_Bq: + return 12; + case MDOC_Cd: + return 12; + case MDOC_Cm: + return 10; + case MDOC_Do: + return 10; + case MDOC_Dq: + return 12; + case MDOC_Dv: + return 12; + case MDOC_Eo: + return 12; + case MDOC_Em: + return 10; + case MDOC_Er: + return 17; + case MDOC_Ev: + return 15; + case MDOC_Fa: + return 12; + case MDOC_Fl: + return 10; + case MDOC_Fo: + return 16; + case MDOC_Fn: + return 16; + case MDOC_Ic: + return 10; + case MDOC_Li: + return 16; + case MDOC_Ms: + return 6; + case MDOC_Nm: + return 10; + case MDOC_No: + return 12; + case MDOC_Oo: + return 10; + case MDOC_Op: + return 14; + case MDOC_Pa: + return 32; + case MDOC_Pf: + return 12; + case MDOC_Po: + return 12; + case MDOC_Pq: + return 12; + case MDOC_Ql: + return 16; + case MDOC_Qo: + return 12; + case MDOC_So: + return 12; + case MDOC_Sq: + return 12; + case MDOC_Sy: + return 6; + case MDOC_Sx: + return 16; + case MDOC_Tn: + return 10; + case MDOC_Va: + return 12; + case MDOC_Vt: + return 12; + case MDOC_Xr: + return 10; + default: + break; + }; + return 0; +} diff --git a/usr.bin/mandoc/msec.c b/usr.bin/mandoc/msec.c new file mode 100644 index 0000000..813a140 --- /dev/null +++ b/usr.bin/mandoc/msec.c @@ -0,0 +1,35 @@ +/* $OpenBSD: msec.c,v 1.13 2018/12/14 01:17:46 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "libmandoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mandoc_a2msec(const char *p) +{ + +#include "msec.in" + + return NULL; +} diff --git a/usr.bin/mandoc/msec.in b/usr.bin/mandoc/msec.in new file mode 100644 index 0000000..f8d786e --- /dev/null +++ b/usr.bin/mandoc/msec.in @@ -0,0 +1,34 @@ +/* $OpenBSD: msec.in,v 1.6 2017/06/24 17:36:50 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * These are all possible manual-section macros and what they correspond + * to when rendered as the volume title. + * + * Be sure to escape strings. + */ + +LINE("1", "General Commands Manual") +LINE("2", "System Calls Manual") +LINE("3", "Library Functions Manual") +LINE("3p", "Perl Library Manual") +LINE("4", "Device Drivers Manual") +LINE("5", "File Formats Manual") +LINE("6", "Games Manual") +LINE("7", "Miscellaneous Information Manual") +LINE("8", "System Manager\'s Manual") +LINE("9", "Kernel Developer\'s Manual") diff --git a/usr.bin/mandoc/out.c b/usr.bin/mandoc/out.c new file mode 100644 index 0000000..7cc5702 --- /dev/null +++ b/usr.bin/mandoc/out.c @@ -0,0 +1,563 @@ +/* $OpenBSD: out.c,v 1.51 2019/12/31 22:49:17 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011,2014,2015,2017,2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "tbl.h" +#include "out.h" + +struct tbl_colgroup { + struct tbl_colgroup *next; + size_t wanted; + int startcol; + int endcol; +}; + +static size_t tblcalc_data(struct rofftbl *, struct roffcol *, + const struct tbl_opts *, const struct tbl_dat *, + size_t); +static size_t tblcalc_literal(struct rofftbl *, struct roffcol *, + const struct tbl_dat *, size_t); +static size_t tblcalc_number(struct rofftbl *, struct roffcol *, + const struct tbl_opts *, const struct tbl_dat *); + + +/* + * Parse the *src string and store a scaling unit into *dst. + * If the string doesn't specify the unit, use the default. + * If no default is specified, fail. + * Return a pointer to the byte after the last byte used, + * or NULL on total failure. + */ +const char * +a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) +{ + char *endptr; + + dst->unit = def == SCALE_MAX ? SCALE_BU : def; + dst->scale = strtod(src, &endptr); + if (endptr == src) + return NULL; + + switch (*endptr++) { + case 'c': + dst->unit = SCALE_CM; + break; + case 'i': + dst->unit = SCALE_IN; + break; + case 'f': + dst->unit = SCALE_FS; + break; + case 'M': + dst->unit = SCALE_MM; + break; + case 'm': + dst->unit = SCALE_EM; + break; + case 'n': + dst->unit = SCALE_EN; + break; + case 'P': + dst->unit = SCALE_PC; + break; + case 'p': + dst->unit = SCALE_PT; + break; + case 'u': + dst->unit = SCALE_BU; + break; + case 'v': + dst->unit = SCALE_VS; + break; + default: + endptr--; + if (SCALE_MAX == def) + return NULL; + dst->unit = def; + break; + } + return endptr; +} + +/* + * Calculate the abstract widths and decimal positions of columns in a + * table. This routine allocates the columns structures then runs over + * all rows and cells in the table. The function pointers in "tbl" are + * used for the actual width calculations. + */ +void +tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first, + size_t offset, size_t rmargin) +{ + struct roffsu su; + const struct tbl_opts *opts; + const struct tbl_span *sp; + const struct tbl_dat *dp; + struct roffcol *col; + struct tbl_colgroup *first_group, **gp, *g; + size_t *colwidth; + size_t ewidth, min1, min2, wanted, width, xwidth; + int done, icol, maxcol, necol, nxcol, quirkcol; + + /* + * Allocate the master column specifiers. These will hold the + * widths and decimal positions for all cells in the column. It + * must be freed and nullified by the caller. + */ + + assert(tbl->cols == NULL); + tbl->cols = mandoc_calloc((size_t)sp_first->opts->cols, + sizeof(struct roffcol)); + opts = sp_first->opts; + + maxcol = -1; + first_group = NULL; + for (sp = sp_first; sp != NULL; sp = sp->next) { + if (sp->pos != TBL_SPAN_DATA) + continue; + + /* + * Account for the data cells in the layout, matching it + * to data cells in the data section. + */ + + gp = &first_group; + for (dp = sp->first; dp != NULL; dp = dp->next) { + icol = dp->layout->col; + while (maxcol < icol + dp->hspans) + tbl->cols[++maxcol].spacing = SIZE_MAX; + col = tbl->cols + icol; + col->flags |= dp->layout->flags; + if (dp->layout->flags & TBL_CELL_WIGN) + continue; + + /* Handle explicit width specifications. */ + + if (dp->layout->wstr != NULL && + dp->layout->width == 0 && + a2roffsu(dp->layout->wstr, &su, SCALE_EN) + != NULL) + dp->layout->width = + (*tbl->sulen)(&su, tbl->arg); + if (col->width < dp->layout->width) + col->width = dp->layout->width; + if (dp->layout->spacing != SIZE_MAX && + (col->spacing == SIZE_MAX || + col->spacing < dp->layout->spacing)) + col->spacing = dp->layout->spacing; + + /* + * Calculate an automatic width. + * Except for spanning cells, apply it. + */ + + width = tblcalc_data(tbl, + dp->hspans == 0 ? col : NULL, + opts, dp, + dp->block == 0 ? 0 : + dp->layout->width ? dp->layout->width : + rmargin ? (rmargin + sp->opts->cols / 2) + / (sp->opts->cols + 1) : 0); + if (dp->hspans == 0) + continue; + + /* + * Build an ordered, singly linked list + * of all groups of columns joined by spans, + * recording the minimum width for each group. + */ + + while (*gp != NULL && ((*gp)->startcol < icol || + (*gp)->endcol < icol + dp->hspans)) + gp = &(*gp)->next; + if (*gp == NULL || (*gp)->startcol > icol || + (*gp)->endcol > icol + dp->hspans) { + g = mandoc_malloc(sizeof(*g)); + g->next = *gp; + g->wanted = width; + g->startcol = icol; + g->endcol = icol + dp->hspans; + *gp = g; + } else if ((*gp)->wanted < width) + (*gp)->wanted = width; + } + } + + /* + * The minimum width of columns explicitly specified + * in the layout is 1n. + */ + + if (maxcol < sp_first->opts->cols - 1) + maxcol = sp_first->opts->cols - 1; + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if (col->width < 1) + col->width = 1; + + /* + * Column spacings are needed for span width + * calculations, so set the default values now. + */ + + if (col->spacing == SIZE_MAX || icol == maxcol) + col->spacing = 3; + } + + /* + * Replace the minimum widths with the missing widths, + * and dismiss groups that are already wide enough. + */ + + gp = &first_group; + while ((g = *gp) != NULL) { + done = 0; + for (icol = g->startcol; icol <= g->endcol; icol++) { + width = tbl->cols[icol].width; + if (icol < g->endcol) + width += tbl->cols[icol].spacing; + if (g->wanted <= width) { + done = 1; + break; + } else + (*gp)->wanted -= width; + } + if (done) { + *gp = g->next; + free(g); + } else + gp = &(*gp)->next; + } + + colwidth = mandoc_reallocarray(NULL, maxcol + 1, sizeof(*colwidth)); + while (first_group != NULL) { + + /* + * Rebuild the array of the widths of all columns + * participating in spans that require expansion. + */ + + for (icol = 0; icol <= maxcol; icol++) + colwidth[icol] = SIZE_MAX; + for (g = first_group; g != NULL; g = g->next) + for (icol = g->startcol; icol <= g->endcol; icol++) + colwidth[icol] = tbl->cols[icol].width; + + /* + * Find the smallest and second smallest column width + * among the columns which may need expamsion. + */ + + min1 = min2 = SIZE_MAX; + for (icol = 0; icol <= maxcol; icol++) { + if (min1 > colwidth[icol]) { + min2 = min1; + min1 = colwidth[icol]; + } else if (min1 < colwidth[icol] && + min2 > colwidth[icol]) + min2 = colwidth[icol]; + } + + /* + * Find the minimum wanted width + * for any one of the narrowest columns, + * and mark the columns wanting that width. + */ + + wanted = min2; + for (g = first_group; g != NULL; g = g->next) { + necol = 0; + for (icol = g->startcol; icol <= g->endcol; icol++) + if (tbl->cols[icol].width == min1) + necol++; + if (necol == 0) + continue; + width = min1 + (g->wanted - 1) / necol + 1; + if (width > min2) + width = min2; + if (wanted > width) + wanted = width; + for (icol = g->startcol; icol <= g->endcol; icol++) + if (colwidth[icol] == min1 || + (colwidth[icol] < min2 && + colwidth[icol] > width)) + colwidth[icol] = width; + } + + /* Record the effect of the widening on the group list. */ + + gp = &first_group; + while ((g = *gp) != NULL) { + done = 0; + for (icol = g->startcol; icol <= g->endcol; icol++) { + if (colwidth[icol] != wanted || + tbl->cols[icol].width == wanted) + continue; + if (g->wanted <= wanted - min1) { + done = 1; + break; + } + g->wanted -= wanted - min1; + } + if (done) { + *gp = g->next; + free(g); + } else + gp = &(*gp)->next; + } + + /* Record the effect of the widening on the columns. */ + + for (icol = 0; icol <= maxcol; icol++) + if (colwidth[icol] == wanted) + tbl->cols[icol].width = wanted; + } + free(colwidth); + + /* + * Align numbers with text. + * Count columns to equalize and columns to maximize. + * Find maximum width of the columns to equalize. + * Find total width of the columns *not* to maximize. + */ + + necol = nxcol = 0; + ewidth = xwidth = 0; + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if (col->width > col->nwidth) + col->decimal += (col->width - col->nwidth) / 2; + else + col->width = col->nwidth; + if (col->flags & TBL_CELL_EQUAL) { + necol++; + if (ewidth < col->width) + ewidth = col->width; + } + if (col->flags & TBL_CELL_WMAX) + nxcol++; + else + xwidth += col->width; + } + + /* + * Equalize columns, if requested for any of them. + * Update total width of the columns not to maximize. + */ + + if (necol) { + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if ( ! (col->flags & TBL_CELL_EQUAL)) + continue; + if (col->width == ewidth) + continue; + if (nxcol && rmargin) + xwidth += ewidth - col->width; + col->width = ewidth; + } + } + + /* + * If there are any columns to maximize, find the total + * available width, deducting 3n margins between columns. + * Distribute the available width evenly. + */ + + if (nxcol && rmargin) { + xwidth += 3*maxcol + + (opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) ? + 2 : !!opts->lvert + !!opts->rvert); + if (rmargin <= offset + xwidth) + return; + xwidth = rmargin - offset - xwidth; + + /* + * Emulate a bug in GNU tbl width calculation that + * manifests itself for large numbers of x-columns. + * Emulating it for 5 x-columns gives identical + * behaviour for up to 6 x-columns. + */ + + if (nxcol == 5) { + quirkcol = xwidth % nxcol + 2; + if (quirkcol != 3 && quirkcol != 4) + quirkcol = -1; + } else + quirkcol = -1; + + necol = 0; + ewidth = 0; + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if ( ! (col->flags & TBL_CELL_WMAX)) + continue; + col->width = (double)xwidth * ++necol / nxcol + - ewidth + 0.4995; + if (necol == quirkcol) + col->width--; + ewidth += col->width; + } + } +} + +static size_t +tblcalc_data(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_opts *opts, const struct tbl_dat *dp, size_t mw) +{ + size_t sz; + + /* Branch down into data sub-types. */ + + switch (dp->layout->pos) { + case TBL_CELL_HORIZ: + case TBL_CELL_DHORIZ: + sz = (*tbl->len)(1, tbl->arg); + if (col != NULL && col->width < sz) + col->width = sz; + return sz; + case TBL_CELL_LONG: + case TBL_CELL_CENTRE: + case TBL_CELL_LEFT: + case TBL_CELL_RIGHT: + return tblcalc_literal(tbl, col, dp, mw); + case TBL_CELL_NUMBER: + return tblcalc_number(tbl, col, opts, dp); + case TBL_CELL_DOWN: + return 0; + default: + abort(); + } +} + +static size_t +tblcalc_literal(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_dat *dp, size_t mw) +{ + const char *str; /* Beginning of the first line. */ + const char *beg; /* Beginning of the current line. */ + char *end; /* End of the current line. */ + size_t lsz; /* Length of the current line. */ + size_t wsz; /* Length of the current word. */ + size_t msz; /* Length of the longest line. */ + + if (dp->string == NULL || *dp->string == '\0') + return 0; + str = mw ? mandoc_strdup(dp->string) : dp->string; + msz = lsz = 0; + for (beg = str; beg != NULL && *beg != '\0'; beg = end) { + end = mw ? strchr(beg, ' ') : NULL; + if (end != NULL) { + *end++ = '\0'; + while (*end == ' ') + end++; + } + wsz = (*tbl->slen)(beg, tbl->arg); + if (mw && lsz && lsz + 1 + wsz <= mw) + lsz += 1 + wsz; + else + lsz = wsz; + if (msz < lsz) + msz = lsz; + } + if (mw) + free((void *)str); + if (col != NULL && col->width < msz) + col->width = msz; + return msz; +} + +static size_t +tblcalc_number(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_opts *opts, const struct tbl_dat *dp) +{ + const char *cp, *lastdigit, *lastpoint; + size_t intsz, totsz; + char buf[2]; + + if (dp->string == NULL || *dp->string == '\0') + return 0; + + totsz = (*tbl->slen)(dp->string, tbl->arg); + if (col == NULL) + return totsz; + + /* + * Find the last digit and + * the last decimal point that is adjacent to a digit. + * The alignment indicator "\&" overrides everything. + */ + + lastdigit = lastpoint = NULL; + for (cp = dp->string; cp[0] != '\0'; cp++) { + if (cp[0] == '\\' && cp[1] == '&') { + lastdigit = lastpoint = cp; + break; + } else if (cp[0] == opts->decimal && + (isdigit((unsigned char)cp[1]) || + (cp > dp->string && isdigit((unsigned char)cp[-1])))) + lastpoint = cp; + else if (isdigit((unsigned char)cp[0])) + lastdigit = cp; + } + + /* Not a number, treat as a literal string. */ + + if (lastdigit == NULL) { + if (col != NULL && col->width < totsz) + col->width = totsz; + return totsz; + } + + /* Measure the width of the integer part. */ + + if (lastpoint == NULL) + lastpoint = lastdigit + 1; + intsz = 0; + buf[1] = '\0'; + for (cp = dp->string; cp < lastpoint; cp++) { + buf[0] = cp[0]; + intsz += (*tbl->slen)(buf, tbl->arg); + } + + /* + * If this number has more integer digits than all numbers + * seen on earlier lines, shift them all to the right. + * If it has fewer, shift this number to the right. + */ + + if (intsz > col->decimal) { + col->nwidth += intsz - col->decimal; + col->decimal = intsz; + } else + totsz += col->decimal - intsz; + + /* Update the maximum total width seen so far. */ + + if (totsz > col->nwidth) + col->nwidth = totsz; + return totsz; +} diff --git a/usr.bin/mandoc/out.h b/usr.bin/mandoc/out.h new file mode 100644 index 0000000..fcd691c --- /dev/null +++ b/usr.bin/mandoc/out.h @@ -0,0 +1,70 @@ +/* $OpenBSD: out.h,v 1.25 2020/04/03 11:34:19 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Utilities for use by multiple mandoc(1) formatters. + */ + +enum roffscale { + SCALE_CM, /* centimeters (c) */ + SCALE_IN, /* inches (i) */ + SCALE_PC, /* pica (P) */ + SCALE_PT, /* points (p) */ + SCALE_EM, /* ems (m) */ + SCALE_MM, /* mini-ems (M) */ + SCALE_EN, /* ens (n) */ + SCALE_BU, /* default horizontal (u) */ + SCALE_VS, /* default vertical (v) */ + SCALE_FS, /* syn. for u (f) */ + SCALE_MAX +}; + +struct roffcol { + size_t width; /* width of cell */ + size_t nwidth; /* max. width of number in cell */ + size_t decimal; /* decimal position in cell */ + size_t spacing; /* spacing after the column */ + int flags; /* layout flags, see tbl_cell */ +}; + +struct roffsu { + enum roffscale unit; + double scale; +}; + +typedef size_t (*tbl_sulen)(const struct roffsu *, void *); +typedef size_t (*tbl_strlen)(const char *, void *); +typedef size_t (*tbl_len)(size_t, void *); + +struct rofftbl { + tbl_sulen sulen; /* calculate scaling unit length */ + tbl_strlen slen; /* calculate string length */ + tbl_len len; /* produce width of empty space */ + struct roffcol *cols; /* master column specifiers */ + void *arg; /* passed to sulen, slen, and len */ +}; + +#define SCALE_HS_INIT(p, v) \ + do { (p)->unit = SCALE_EN; \ + (p)->scale = (v); } \ + while (/* CONSTCOND */ 0) + + +struct tbl_span; + +const char *a2roffsu(const char *, struct roffsu *, enum roffscale); +void tblcalc(struct rofftbl *, + const struct tbl_span *, size_t, size_t); diff --git a/usr.bin/mandoc/preconv.c b/usr.bin/mandoc/preconv.c new file mode 100644 index 0000000..2cbdcda --- /dev/null +++ b/usr.bin/mandoc/preconv.c @@ -0,0 +1,177 @@ +/* $OpenBSD: preconv.c,v 1.9 2018/12/13 11:55:14 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "mandoc_parse.h" +#include "libmandoc.h" + +int +preconv_encode(const struct buf *ib, size_t *ii, struct buf *ob, size_t *oi, + int *filenc) +{ + const unsigned char *cu; + int nby; + unsigned int accum; + + cu = (const unsigned char *)ib->buf + *ii; + assert(*cu & 0x80); + + if ( ! (*filenc & MPARSE_UTF8)) + goto latin; + + nby = 1; + while (nby < 5 && *cu & (1 << (7 - nby))) + nby++; + + switch (nby) { + case 2: + accum = *cu & 0x1f; + if (accum < 0x02) /* Obfuscated ASCII. */ + goto latin; + break; + case 3: + accum = *cu & 0x0f; + break; + case 4: + accum = *cu & 0x07; + if (accum > 0x04) /* Beyond Unicode. */ + goto latin; + break; + default: /* Bad sequence header. */ + goto latin; + } + + cu++; + switch (nby) { + case 3: + if ((accum == 0x00 && ! (*cu & 0x20)) || /* Use 2-byte. */ + (accum == 0x0d && *cu & 0x20)) /* Surrogates. */ + goto latin; + break; + case 4: + if ((accum == 0x00 && ! (*cu & 0x30)) || /* Use 3-byte. */ + (accum == 0x04 && *cu & 0x30)) /* Beyond Unicode. */ + goto latin; + break; + default: + break; + } + + while (--nby) { + if ((*cu & 0xc0) != 0x80) /* Invalid continuation. */ + goto latin; + accum <<= 6; + accum += *cu & 0x3f; + cu++; + } + + assert(accum > 0x7f); + assert(accum < 0x110000); + assert(accum < 0xd800 || accum > 0xdfff); + + *oi += snprintf(ob->buf + *oi, 11, "\\[u%.4X]", accum); + *ii = (const char *)cu - ib->buf; + *filenc &= ~MPARSE_LATIN1; + return 1; + +latin: + if ( ! (*filenc & MPARSE_LATIN1)) + return 0; + + *oi += snprintf(ob->buf + *oi, 11, + "\\[u%.4X]", (unsigned char)ib->buf[(*ii)++]); + + *filenc &= ~MPARSE_UTF8; + return 1; +} + +int +preconv_cue(const struct buf *b, size_t offset) +{ + const char *ln, *eoln, *eoph; + size_t sz, phsz; + + ln = b->buf + offset; + sz = b->sz - offset; + + /* Look for the end-of-line. */ + + if (NULL == (eoln = memchr(ln, '\n', sz))) + eoln = ln + sz; + + /* Check if we have the correct header/trailer. */ + + if ((sz = (size_t)(eoln - ln)) < 10 || + memcmp(ln, ".\\\" -*-", 7) || memcmp(eoln - 3, "-*-", 3)) + return MPARSE_UTF8 | MPARSE_LATIN1; + + /* Move after the header and adjust for the trailer. */ + + ln += 7; + sz -= 10; + + while (sz > 0) { + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Find the end-of-phrase marker (or eoln). */ + + if (NULL == (eoph = memchr(ln, ';', sz))) + eoph = eoln - 3; + else + eoph++; + + /* Only account for the "coding" phrase. */ + + if ((phsz = eoph - ln) < 7 || + strncasecmp(ln, "coding:", 7)) { + sz -= phsz; + ln += phsz; + continue; + } + + sz -= 7; + ln += 7; + + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + return 0; + + /* Check us against known encodings. */ + + if (phsz > 4 && !strncasecmp(ln, "utf-8", 5)) + return MPARSE_UTF8; + if (phsz > 10 && !strncasecmp(ln, "iso-latin-1", 11)) + return MPARSE_LATIN1; + return 0; + } + return MPARSE_UTF8 | MPARSE_LATIN1; +} diff --git a/usr.bin/mandoc/predefs.in b/usr.bin/mandoc/predefs.in new file mode 100644 index 0000000..f022028 --- /dev/null +++ b/usr.bin/mandoc/predefs.in @@ -0,0 +1,65 @@ +/* $OpenBSD: predefs.in,v 1.4 2014/11/28 19:25:03 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The predefined-string translation tables. Each corresponds to a + * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand + * side corresponds to the input sequence (\*x, \*(xx and so on). The + * right-hand side is what's produced by libroff. + * + * XXX - C-escape strings! + * XXX - update PREDEF_MAX in roff.c if adding more! + */ + +PREDEF("Am", "&") +PREDEF("Ba", "\\fR|\\fP") +PREDEF("Ge", "\\(>=") +PREDEF("Gt", ">") +PREDEF("If", "infinity") +PREDEF("Le", "\\(<=") +PREDEF("Lq", "\\(lq") +PREDEF("Lt", "<") +PREDEF("Na", "NaN") +PREDEF("Ne", "\\(!=") +PREDEF("Pi", "pi") +PREDEF("Pm", "\\(+-") +PREDEF("Rq", "\\(rq") +PREDEF("left-bracket", "[") +PREDEF("left-parenthesis", "(") +PREDEF("lp", "(") +PREDEF("left-singlequote", "\\(oq") +PREDEF("q", "\\(dq") +PREDEF("quote-left", "\\(oq") +PREDEF("quote-right", "\\(cq") +PREDEF("R", "\\(rg") +PREDEF("right-bracket", "]") +PREDEF("right-parenthesis", ")") +PREDEF("rp", ")") +PREDEF("right-singlequote", "\\(cq") +PREDEF("Tm", "(Tm)") +PREDEF("Px", "POSIX") +PREDEF("Ai", "ANSI") +PREDEF("\'", "\\\'") +PREDEF("aa", "\\(aa") +PREDEF("ga", "\\(ga") +PREDEF("`", "\\`") +PREDEF("lq", "\\(lq") +PREDEF("rq", "\\(rq") +PREDEF("ua", "\\(ua") +PREDEF("va", "\\(va") +PREDEF("<=", "\\(<=") +PREDEF(">=", "\\(>=") diff --git a/usr.bin/mandoc/read.c b/usr.bin/mandoc/read.c new file mode 100644 index 0000000..a6a25de --- /dev/null +++ b/usr.bin/mandoc/read.c @@ -0,0 +1,727 @@ +/* $OpenBSD: read.c,v 1.190 2020/04/24 11:58:02 schwarze Exp $ */ +/* + * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Top-level functions of the mandoc(3) parser: + * Parser and input encoding selection, decompression, + * handling of input bytes, characters, lines, and files, + * handling of roff(7) loops and file inclusion, + * and steering of the various parsers. + */ +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <zlib.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "mandoc_parse.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "tag.h" + +#define REPARSE_LIMIT 1000 + +struct mparse { + struct roff *roff; /* roff parser (!NULL) */ + struct roff_man *man; /* man parser */ + struct buf *primary; /* buffer currently being parsed */ + struct buf *secondary; /* copy of top level input */ + struct buf *loop; /* open .while request line */ + const char *os_s; /* default operating system */ + int options; /* parser options */ + int gzip; /* current input file is gzipped */ + int filenc; /* encoding of the current file */ + int reparse_count; /* finite interp. stack */ + int line; /* line number in the file */ +}; + +static void choose_parser(struct mparse *); +static void free_buf_list(struct buf *); +static void resize_buf(struct buf *, size_t); +static int mparse_buf_r(struct mparse *, struct buf, size_t, int); +static int read_whole_file(struct mparse *, int, struct buf *, int *); +static void mparse_end(struct mparse *); + + +static void +resize_buf(struct buf *buf, size_t initial) +{ + + buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; + buf->buf = mandoc_realloc(buf->buf, buf->sz); +} + +static void +free_buf_list(struct buf *buf) +{ + struct buf *tmp; + + while (buf != NULL) { + tmp = buf; + buf = tmp->next; + free(tmp->buf); + free(tmp); + } +} + +static void +choose_parser(struct mparse *curp) +{ + char *cp, *ep; + int format; + + /* + * If neither command line arguments -mdoc or -man select + * a parser nor the roff parser found a .Dd or .TH macro + * yet, look ahead in the main input buffer. + */ + + if ((format = roff_getformat(curp->roff)) == 0) { + cp = curp->primary->buf; + ep = cp + curp->primary->sz; + while (cp < ep) { + if (*cp == '.' || *cp == '\'') { + cp++; + if (cp[0] == 'D' && cp[1] == 'd') { + format = MPARSE_MDOC; + break; + } + if (cp[0] == 'T' && cp[1] == 'H') { + format = MPARSE_MAN; + break; + } + } + cp = memchr(cp, '\n', ep - cp); + if (cp == NULL) + break; + cp++; + } + } + + if (format == MPARSE_MDOC) { + curp->man->meta.macroset = MACROSET_MDOC; + if (curp->man->mdocmac == NULL) + curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); + } else { + curp->man->meta.macroset = MACROSET_MAN; + if (curp->man->manmac == NULL) + curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); + } + curp->man->meta.first->tok = TOKEN_NONE; +} + +/* + * Main parse routine for a buffer. + * It assumes encoding and line numbering are already set up. + * It can recurse directly (for invocations of user-defined + * macros, inline equations, and input line traps) + * and indirectly (for .so file inclusion). + */ +static int +mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) +{ + struct buf ln; + struct buf *firstln, *lastln, *thisln, *loop; + char *cp; + size_t pos; /* byte number in the ln buffer */ + int line_result, result; + int of; + int lnn; /* line number in the real file */ + int fd; + int inloop; /* Saw .while on this level. */ + unsigned char c; + + ln.sz = 256; + ln.buf = mandoc_malloc(ln.sz); + ln.next = NULL; + firstln = lastln = loop = NULL; + lnn = curp->line; + pos = 0; + inloop = 0; + result = ROFF_CONT; + + while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) { + if (start) { + curp->line = lnn; + curp->reparse_count = 0; + + if (lnn < 3 && + curp->filenc & MPARSE_UTF8 && + curp->filenc & MPARSE_LATIN1) + curp->filenc = preconv_cue(&blk, i); + } + + while (i < blk.sz && (start || blk.buf[i] != '\0')) { + + /* + * When finding an unescaped newline character, + * leave the character loop to process the line. + * Skip a preceding carriage return, if any. + */ + + if ('\r' == blk.buf[i] && i + 1 < blk.sz && + '\n' == blk.buf[i + 1]) + ++i; + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + + /* + * Make sure we have space for the worst + * case of 12 bytes: "\\[u10ffff]\n\0" + */ + + if (pos + 12 > ln.sz) + resize_buf(&ln, 256); + + /* + * Encode 8-bit input. + */ + + c = blk.buf[i]; + if (c & 0x80) { + if ( ! (curp->filenc && preconv_encode( + &blk, &i, &ln, &pos, &curp->filenc))) { + mandoc_msg(MANDOCERR_CHAR_BAD, + curp->line, pos, "0x%x", c); + ln.buf[pos++] = '?'; + i++; + } + continue; + } + + /* + * Exclude control characters. + */ + + if (c == 0x7f || (c < 0x20 && c != 0x09)) { + mandoc_msg(c == 0x00 || c == 0x04 || + c > 0x0a ? MANDOCERR_CHAR_BAD : + MANDOCERR_CHAR_UNSUPP, + curp->line, pos, "0x%x", c); + i++; + if (c != '\r') + ln.buf[pos++] = '?'; + continue; + } + + ln.buf[pos++] = blk.buf[i++]; + } + ln.buf[pos] = '\0'; + + /* + * Maintain a lookaside buffer of all lines. + * parsed from this input source. + */ + + thisln = mandoc_malloc(sizeof(*thisln)); + thisln->buf = mandoc_strdup(ln.buf); + thisln->sz = strlen(ln.buf) + 1; + thisln->next = NULL; + if (firstln == NULL) { + firstln = lastln = thisln; + if (curp->secondary == NULL) + curp->secondary = firstln; + } else { + lastln->next = thisln; + lastln = thisln; + } + + /* XXX Ugly hack to mark the end of the input. */ + + if (i == blk.sz || blk.buf[i] == '\0') { + if (pos + 2 > ln.sz) + resize_buf(&ln, 256); + ln.buf[pos++] = '\n'; + ln.buf[pos] = '\0'; + } + + /* + * A significant amount of complexity is contained by + * the roff preprocessor. It's line-oriented but can be + * expressed on one line, so we need at times to + * readjust our starting point and re-run it. The roff + * preprocessor can also readjust the buffers with new + * data, so we pass them in wholesale. + */ + + of = 0; +rerun: + line_result = roff_parseln(curp->roff, curp->line, &ln, &of); + + /* Process options. */ + + if (line_result & ROFF_APPEND) + assert(line_result == (ROFF_IGN | ROFF_APPEND)); + + if (line_result & ROFF_USERCALL) + assert((line_result & ROFF_MASK) == ROFF_REPARSE); + + if (line_result & ROFF_USERRET) { + assert(line_result == (ROFF_IGN | ROFF_USERRET)); + if (start == 0) { + /* Return from the current macro. */ + result = ROFF_USERRET; + goto out; + } + } + + switch (line_result & ROFF_LOOPMASK) { + case ROFF_IGN: + break; + case ROFF_WHILE: + if (curp->loop != NULL) { + if (loop == curp->loop) + break; + mandoc_msg(MANDOCERR_WHILE_NEST, + curp->line, pos, NULL); + } + curp->loop = thisln; + loop = NULL; + inloop = 1; + break; + case ROFF_LOOPCONT: + case ROFF_LOOPEXIT: + if (curp->loop == NULL) { + mandoc_msg(MANDOCERR_WHILE_FAIL, + curp->line, pos, NULL); + break; + } + if (inloop == 0) { + mandoc_msg(MANDOCERR_WHILE_INTO, + curp->line, pos, NULL); + curp->loop = loop = NULL; + break; + } + if (line_result & ROFF_LOOPCONT) + loop = curp->loop; + else { + curp->loop = loop = NULL; + inloop = 0; + } + break; + default: + abort(); + } + + /* Process the main instruction from the roff parser. */ + + switch (line_result & ROFF_MASK) { + case ROFF_IGN: + break; + case ROFF_CONT: + if (curp->man->meta.macroset == MACROSET_NONE) + choose_parser(curp); + if ((curp->man->meta.macroset == MACROSET_MDOC ? + mdoc_parseln(curp->man, curp->line, ln.buf, of) : + man_parseln(curp->man, curp->line, ln.buf, of) + ) == 2) + goto out; + break; + case ROFF_RERUN: + goto rerun; + case ROFF_REPARSE: + if (++curp->reparse_count > REPARSE_LIMIT) { + /* Abort and return to the top level. */ + result = ROFF_IGN; + mandoc_msg(MANDOCERR_ROFFLOOP, + curp->line, pos, NULL); + goto out; + } + result = mparse_buf_r(curp, ln, of, 0); + if (line_result & ROFF_USERCALL) { + roff_userret(curp->roff); + /* Continue normally. */ + if (result & ROFF_USERRET) + result = ROFF_CONT; + } + if (start == 0 && result != ROFF_CONT) + goto out; + break; + case ROFF_SO: + if ( ! (curp->options & MPARSE_SO) && + (i >= blk.sz || blk.buf[i] == '\0')) { + curp->man->meta.sodest = + mandoc_strdup(ln.buf + of); + goto out; + } + if ((fd = mparse_open(curp, ln.buf + of)) != -1) { + mparse_readfd(curp, fd, ln.buf + of); + close(fd); + } else { + mandoc_msg(MANDOCERR_SO_FAIL, + curp->line, of, ".so %s: %s", + ln.buf + of, strerror(errno)); + ln.sz = mandoc_asprintf(&cp, + ".sp\nSee the file %s.\n.sp", + ln.buf + of); + free(ln.buf); + ln.buf = cp; + of = 0; + mparse_buf_r(curp, ln, of, 0); + } + break; + default: + abort(); + } + + /* Start the next input line. */ + + if (loop != NULL && + (line_result & ROFF_LOOPMASK) == ROFF_IGN) + loop = loop->next; + + if (loop != NULL) { + if ((line_result & ROFF_APPEND) == 0) + *ln.buf = '\0'; + if (ln.sz < loop->sz) + resize_buf(&ln, loop->sz); + (void)strlcat(ln.buf, loop->buf, ln.sz); + of = 0; + goto rerun; + } + + pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0; + } +out: + if (inloop) { + if (result != ROFF_USERRET) + mandoc_msg(MANDOCERR_WHILE_OUTOF, + curp->line, pos, NULL); + curp->loop = NULL; + } + free(ln.buf); + if (firstln != curp->secondary) + free_buf_list(firstln); + return result; +} + +static int +read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap) +{ + struct stat st; + gzFile gz; + size_t off; + ssize_t ssz; + int gzerrnum, retval; + + if (fstat(fd, &st) == -1) { + mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno)); + return -1; + } + + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (curp->gzip == 0 && S_ISREG(st.st_mode)) { + if (st.st_size > 0x7fffffff) { + mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); + return -1; + } + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); + if (fb->buf != MAP_FAILED) + return 0; + } + + if (curp->gzip) { + /* + * Duplicating the file descriptor is required + * because we will have to call gzclose(3) + * to free memory used internally by zlib, + * but that will also close the file descriptor, + * which this function must not do. + */ + if ((fd = dup(fd)) == -1) { + mandoc_msg(MANDOCERR_DUP, 0, 0, + "%s", strerror(errno)); + return -1; + } + if ((gz = gzdopen(fd, "rb")) == NULL) { + mandoc_msg(MANDOCERR_GZDOPEN, 0, 0, + "%s", strerror(errno)); + close(fd); + return -1; + } + } else + gz = NULL; + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + retval = -1; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz) { + if (fb->sz == (1U << 31)) { + mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); + break; + } + resize_buf(fb, 65536); + } + ssz = curp->gzip ? + gzread(gz, fb->buf + (int)off, fb->sz - off) : + read(fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + retval = 0; + break; + } + if (ssz == -1) { + if (curp->gzip) + (void)gzerror(gz, &gzerrnum); + mandoc_msg(MANDOCERR_READ, 0, 0, "%s", + curp->gzip && gzerrnum != Z_ERRNO ? + zError(gzerrnum) : strerror(errno)); + break; + } + off += (size_t)ssz; + } + + if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) + mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s", + gzerrnum == Z_ERRNO ? strerror(errno) : + zError(gzerrnum)); + if (retval == -1) { + free(fb->buf); + fb->buf = NULL; + } + return retval; +} + +static void +mparse_end(struct mparse *curp) +{ + if (curp->man->meta.macroset == MACROSET_NONE) + curp->man->meta.macroset = MACROSET_MAN; + if (curp->man->meta.macroset == MACROSET_MDOC) + mdoc_endparse(curp->man); + else + man_endparse(curp->man); + roff_endparse(curp->roff); +} + +/* + * Read the whole file into memory and call the parsers. + * Called recursively when an .so request is encountered. + */ +void +mparse_readfd(struct mparse *curp, int fd, const char *filename) +{ + static int recursion_depth; + + struct buf blk; + struct buf *save_primary; + const char *save_filename, *cp; + size_t offset; + int save_filenc, save_lineno; + int with_mmap; + + if (recursion_depth > 64) { + mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL); + return; + } else if (recursion_depth == 0 && + (cp = strrchr(filename, '.')) != NULL && + cp[1] >= '1' && cp[1] <= '9') + curp->man->filesec = cp[1]; + else + curp->man->filesec = '\0'; + + if (read_whole_file(curp, fd, &blk, &with_mmap) == -1) + return; + + /* + * Save some properties of the parent file. + */ + + save_primary = curp->primary; + save_filenc = curp->filenc; + save_lineno = curp->line; + save_filename = mandoc_msg_getinfilename(); + + curp->primary = &blk; + curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1); + curp->line = 1; + mandoc_msg_setinfilename(filename); + + /* Skip an UTF-8 byte order mark. */ + if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && + (unsigned char)blk.buf[0] == 0xef && + (unsigned char)blk.buf[1] == 0xbb && + (unsigned char)blk.buf[2] == 0xbf) { + offset = 3; + curp->filenc &= ~MPARSE_LATIN1; + } else + offset = 0; + + recursion_depth++; + mparse_buf_r(curp, blk, offset, 1); + if (--recursion_depth == 0) + mparse_end(curp); + + /* + * Clean up and restore saved parent properties. + */ + + if (with_mmap) + munmap(blk.buf, blk.sz); + else + free(blk.buf); + + curp->primary = save_primary; + curp->filenc = save_filenc; + curp->line = save_lineno; + if (save_filename != NULL) + mandoc_msg_setinfilename(save_filename); +} + +int +mparse_open(struct mparse *curp, const char *file) +{ + char *cp; + int fd, save_errno; + + cp = strrchr(file, '.'); + curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); + + /* First try to use the filename as it is. */ + + if ((fd = open(file, O_RDONLY)) != -1) + return fd; + + /* + * If that doesn't work and the filename doesn't + * already end in .gz, try appending .gz. + */ + + if ( ! curp->gzip) { + save_errno = errno; + mandoc_asprintf(&cp, "%s.gz", file); + fd = open(cp, O_RDONLY); + free(cp); + errno = save_errno; + if (fd != -1) { + curp->gzip = 1; + return fd; + } + } + + /* Neither worked, give up. */ + + return -1; +} + +struct mparse * +mparse_alloc(int options, enum mandoc_os os_e, const char *os_s) +{ + struct mparse *curp; + + curp = mandoc_calloc(1, sizeof(struct mparse)); + + curp->options = options; + curp->os_s = os_s; + + curp->roff = roff_alloc(options); + curp->man = roff_man_alloc(curp->roff, curp->os_s, + curp->options & MPARSE_QUICK ? 1 : 0); + if (curp->options & MPARSE_MDOC) { + curp->man->meta.macroset = MACROSET_MDOC; + if (curp->man->mdocmac == NULL) + curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); + } else if (curp->options & MPARSE_MAN) { + curp->man->meta.macroset = MACROSET_MAN; + if (curp->man->manmac == NULL) + curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); + } + curp->man->meta.first->tok = TOKEN_NONE; + curp->man->meta.os_e = os_e; + tag_alloc(); + return curp; +} + +void +mparse_reset(struct mparse *curp) +{ + tag_free(); + roff_reset(curp->roff); + roff_man_reset(curp->man); + free_buf_list(curp->secondary); + curp->secondary = NULL; + curp->gzip = 0; + tag_alloc(); +} + +void +mparse_free(struct mparse *curp) +{ + tag_free(); + roffhash_free(curp->man->mdocmac); + roffhash_free(curp->man->manmac); + roff_man_free(curp->man); + roff_free(curp->roff); + free_buf_list(curp->secondary); + free(curp); +} + +struct roff_meta * +mparse_result(struct mparse *curp) +{ + roff_state_reset(curp->man); + if (curp->options & MPARSE_VALIDATE) { + if (curp->man->meta.macroset == MACROSET_MDOC) + mdoc_validate(curp->man); + else + man_validate(curp->man); + tag_postprocess(curp->man, curp->man->meta.first); + } + return &curp->man->meta; +} + +void +mparse_copy(const struct mparse *p) +{ + struct buf *buf; + + for (buf = p->secondary; buf != NULL; buf = buf->next) + puts(buf->buf); +} diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c new file mode 100644 index 0000000..870305e --- /dev/null +++ b/usr.bin/mandoc/roff.c @@ -0,0 +1,4372 @@ +/* $OpenBSD: roff.c,v 1.246 2020/04/08 11:54:14 schwarze Exp $ */ +/* + * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Implementation of the roff(7) parser for mandoc(1). + */ +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mandoc.h" +#include "roff.h" +#include "mandoc_parse.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "tbl_parse.h" +#include "eqn_parse.h" + +/* + * ASCII_ESC is used to signal from roff_getarg() to roff_expand() + * that an escape sequence resulted from copy-in processing and + * needs to be checked or interpolated. As it is used nowhere + * else, it is defined here rather than in a header file. + */ +#define ASCII_ESC 27 + +/* Maximum number of string expansions per line, to break infinite loops. */ +#define EXPAND_LIMIT 1000 + +/* Types of definitions of macros and strings. */ +#define ROFFDEF_USER (1 << 1) /* User-defined. */ +#define ROFFDEF_PRE (1 << 2) /* Predefined. */ +#define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ +#define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ +#define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ + ROFFDEF_REN | ROFFDEF_STD) +#define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ + +/* --- data types --------------------------------------------------------- */ + +/* + * An incredibly-simple string buffer. + */ +struct roffstr { + char *p; /* nil-terminated buffer */ + size_t sz; /* saved strlen(p) */ +}; + +/* + * A key-value roffstr pair as part of a singly-linked list. + */ +struct roffkv { + struct roffstr key; + struct roffstr val; + struct roffkv *next; /* next in list */ +}; + +/* + * A single number register as part of a singly-linked list. + */ +struct roffreg { + struct roffstr key; + int val; + int step; + struct roffreg *next; +}; + +/* + * Association of request and macro names with token IDs. + */ +struct roffreq { + enum roff_tok tok; + char name[]; +}; + +/* + * A macro processing context. + * More than one is needed when macro calls are nested. + */ +struct mctx { + char **argv; + int argc; + int argsz; +}; + +struct roff { + struct roff_man *man; /* mdoc or man parser */ + struct roffnode *last; /* leaf of stack */ + struct mctx *mstack; /* stack of macro contexts */ + int *rstack; /* stack of inverted `ie' values */ + struct ohash *reqtab; /* request lookup table */ + struct roffreg *regtab; /* number registers */ + struct roffkv *strtab; /* user-defined strings & macros */ + struct roffkv *rentab; /* renamed strings & macros */ + struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ + struct roffstr *xtab; /* single-byte trans table (`tr') */ + const char *current_string; /* value of last called user macro */ + struct tbl_node *first_tbl; /* first table parsed */ + struct tbl_node *last_tbl; /* last table parsed */ + struct tbl_node *tbl; /* current table being parsed */ + struct eqn_node *last_eqn; /* equation parser */ + struct eqn_node *eqn; /* active equation parser */ + int eqn_inline; /* current equation is inline */ + int options; /* parse options */ + int mstacksz; /* current size of mstack */ + int mstackpos; /* position in mstack */ + int rstacksz; /* current size limit of rstack */ + int rstackpos; /* position in rstack */ + int format; /* current file in mdoc or man format */ + char control; /* control character */ + char escape; /* escape character */ +}; + +/* + * A macro definition, condition, or ignored block. + */ +struct roffnode { + enum roff_tok tok; /* type of node */ + struct roffnode *parent; /* up one in stack */ + int line; /* parse line */ + int col; /* parse col */ + char *name; /* node name, e.g. macro name */ + char *end; /* custom end macro of the block */ + int endspan; /* scope to: 1=eol 2=next line -1=\} */ + int rule; /* content is: 1=evaluated 0=skipped */ +}; + +#define ROFF_ARGS struct roff *r, /* parse ctx */ \ + enum roff_tok tok, /* tok of macro */ \ + struct buf *buf, /* input buffer */ \ + int ln, /* parse line */ \ + int ppos, /* original pos in buffer */ \ + int pos, /* current pos in buffer */ \ + int *offs /* reset offset of buffer data */ + +typedef int (*roffproc)(ROFF_ARGS); + +struct roffmac { + roffproc proc; /* process new macro */ + roffproc text; /* process as child text of macro */ + roffproc sub; /* process as child of macro */ + int flags; +#define ROFFMAC_STRUCT (1 << 0) /* always interpret */ +}; + +struct predef { + const char *name; /* predefined input name */ + const char *str; /* replacement symbol */ +}; + +#define PREDEF(__name, __str) \ + { (__name), (__str) }, + +/* --- function prototypes ------------------------------------------------ */ + +static int roffnode_cleanscope(struct roff *); +static int roffnode_pop(struct roff *); +static void roffnode_push(struct roff *, enum roff_tok, + const char *, int, int); +static void roff_addtbl(struct roff_man *, int, struct tbl_node *); +static int roff_als(ROFF_ARGS); +static int roff_block(ROFF_ARGS); +static int roff_block_text(ROFF_ARGS); +static int roff_block_sub(ROFF_ARGS); +static int roff_break(ROFF_ARGS); +static int roff_cblock(ROFF_ARGS); +static int roff_cc(ROFF_ARGS); +static int roff_ccond(struct roff *, int, int); +static int roff_char(ROFF_ARGS); +static int roff_cond(ROFF_ARGS); +static int roff_cond_text(ROFF_ARGS); +static int roff_cond_sub(ROFF_ARGS); +static int roff_ds(ROFF_ARGS); +static int roff_ec(ROFF_ARGS); +static int roff_eo(ROFF_ARGS); +static int roff_eqndelim(struct roff *, struct buf *, int); +static int roff_evalcond(struct roff *, int, char *, int *); +static int roff_evalnum(struct roff *, int, + const char *, int *, int *, int); +static int roff_evalpar(struct roff *, int, + const char *, int *, int *, int); +static int roff_evalstrcond(const char *, int *); +static int roff_expand(struct roff *, struct buf *, + int, int, char); +static void roff_free1(struct roff *); +static void roff_freereg(struct roffreg *); +static void roff_freestr(struct roffkv *); +static size_t roff_getname(struct roff *, char **, int, int); +static int roff_getnum(const char *, int *, int *, int); +static int roff_getop(const char *, int *, char *); +static int roff_getregn(struct roff *, + const char *, size_t, char); +static int roff_getregro(const struct roff *, + const char *name); +static const char *roff_getstrn(struct roff *, + const char *, size_t, int *); +static int roff_hasregn(const struct roff *, + const char *, size_t); +static int roff_insec(ROFF_ARGS); +static int roff_it(ROFF_ARGS); +static int roff_line_ignore(ROFF_ARGS); +static void roff_man_alloc1(struct roff_man *); +static void roff_man_free1(struct roff_man *); +static int roff_manyarg(ROFF_ARGS); +static int roff_noarg(ROFF_ARGS); +static int roff_nop(ROFF_ARGS); +static int roff_nr(ROFF_ARGS); +static int roff_onearg(ROFF_ARGS); +static enum roff_tok roff_parse(struct roff *, char *, int *, + int, int); +static int roff_parsetext(struct roff *, struct buf *, + int, int *); +static int roff_renamed(ROFF_ARGS); +static int roff_return(ROFF_ARGS); +static int roff_rm(ROFF_ARGS); +static int roff_rn(ROFF_ARGS); +static int roff_rr(ROFF_ARGS); +static void roff_setregn(struct roff *, const char *, + size_t, int, char, int); +static void roff_setstr(struct roff *, + const char *, const char *, int); +static void roff_setstrn(struct roffkv **, const char *, + size_t, const char *, size_t, int); +static int roff_shift(ROFF_ARGS); +static int roff_so(ROFF_ARGS); +static int roff_tr(ROFF_ARGS); +static int roff_Dd(ROFF_ARGS); +static int roff_TE(ROFF_ARGS); +static int roff_TS(ROFF_ARGS); +static int roff_EQ(ROFF_ARGS); +static int roff_EN(ROFF_ARGS); +static int roff_T_(ROFF_ARGS); +static int roff_unsupp(ROFF_ARGS); +static int roff_userdef(ROFF_ARGS); + +/* --- constant data ------------------------------------------------------ */ + +#define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ +#define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ + +const char *__roff_name[MAN_MAX + 1] = { + "br", "ce", "fi", "ft", + "ll", "mc", "nf", + "po", "rj", "sp", + "ta", "ti", NULL, + "ab", "ad", "af", "aln", + "als", "am", "am1", "ami", + "ami1", "as", "as1", "asciify", + "backtrace", "bd", "bleedat", "blm", + "box", "boxa", "bp", "BP", + "break", "breakchar", "brnl", "brp", + "brpnl", "c2", "cc", + "cf", "cflags", "ch", "char", + "chop", "class", "close", "CL", + "color", "composite", "continue", "cp", + "cropat", "cs", "cu", "da", + "dch", "Dd", "de", "de1", + "defcolor", "dei", "dei1", "device", + "devicem", "di", "do", "ds", + "ds1", "dwh", "dt", "ec", + "ecr", "ecs", "el", "em", + "EN", "eo", "EP", "EQ", + "errprint", "ev", "evc", "ex", + "fallback", "fam", "fc", "fchar", + "fcolor", "fdeferlig", "feature", "fkern", + "fl", "flig", "fp", "fps", + "fschar", "fspacewidth", "fspecial", "ftr", + "fzoom", "gcolor", "hc", "hcode", + "hidechar", "hla", "hlm", "hpf", + "hpfa", "hpfcode", "hw", "hy", + "hylang", "hylen", "hym", "hypp", + "hys", "ie", "if", "ig", + "index", "it", "itc", "IX", + "kern", "kernafter", "kernbefore", "kernpair", + "lc", "lc_ctype", "lds", "length", + "letadj", "lf", "lg", "lhang", + "linetabs", "lnr", "lnrf", "lpfx", + "ls", "lsm", "lt", + "mediasize", "minss", "mk", "mso", + "na", "ne", "nh", "nhychar", + "nm", "nn", "nop", "nr", + "nrf", "nroff", "ns", "nx", + "open", "opena", "os", "output", + "padj", "papersize", "pc", "pev", + "pi", "PI", "pl", "pm", + "pn", "pnr", "ps", + "psbb", "pshape", "pso", "ptr", + "pvs", "rchar", "rd", "recursionlimit", + "return", "rfschar", "rhang", + "rm", "rn", "rnn", "rr", + "rs", "rt", "schar", "sentchar", + "shc", "shift", "sizes", "so", + "spacewidth", "special", "spreadwarn", "ss", + "sty", "substring", "sv", "sy", + "T&", "tc", "TE", + "TH", "tkf", "tl", + "tm", "tm1", "tmc", "tr", + "track", "transchar", "trf", "trimat", + "trin", "trnt", "troff", "TS", + "uf", "ul", "unformat", "unwatch", + "unwatchn", "vpt", "vs", "warn", + "warnscale", "watch", "watchlength", "watchn", + "wh", "while", "write", "writec", + "writem", "xflag", ".", NULL, + NULL, "text", + "Dd", "Dt", "Os", "Sh", + "Ss", "Pp", "D1", "Dl", + "Bd", "Ed", "Bl", "El", + "It", "Ad", "An", "Ap", + "Ar", "Cd", "Cm", "Dv", + "Er", "Ev", "Ex", "Fa", + "Fd", "Fl", "Fn", "Ft", + "Ic", "In", "Li", "Nd", + "Nm", "Op", "Ot", "Pa", + "Rv", "St", "Va", "Vt", + "Xr", "%A", "%B", "%D", + "%I", "%J", "%N", "%O", + "%P", "%R", "%T", "%V", + "Ac", "Ao", "Aq", "At", + "Bc", "Bf", "Bo", "Bq", + "Bsx", "Bx", "Db", "Dc", + "Do", "Dq", "Ec", "Ef", + "Em", "Eo", "Fx", "Ms", + "No", "Ns", "Nx", "Ox", + "Pc", "Pf", "Po", "Pq", + "Qc", "Ql", "Qo", "Qq", + "Re", "Rs", "Sc", "So", + "Sq", "Sm", "Sx", "Sy", + "Tn", "Ux", "Xc", "Xo", + "Fo", "Fc", "Oo", "Oc", + "Bk", "Ek", "Bt", "Hf", + "Fr", "Ud", "Lb", "Lp", + "Lk", "Mt", "Brq", "Bro", + "Brc", "%C", "Es", "En", + "Dx", "%Q", "%U", "Ta", + "Tg", NULL, + "TH", "SH", "SS", "TP", + "TQ", + "LP", "PP", "P", "IP", + "HP", "SM", "SB", "BI", + "IB", "BR", "RB", "R", + "B", "I", "IR", "RI", + "RE", "RS", "DT", "UC", + "PD", "AT", "in", + "SY", "YS", "OP", + "EX", "EE", "UR", + "UE", "MT", "ME", NULL +}; +const char *const *roff_name = __roff_name; + +static struct roffmac roffs[TOKEN_NONE] = { + { roff_noarg, NULL, NULL, 0 }, /* br */ + { roff_onearg, NULL, NULL, 0 }, /* ce */ + { roff_noarg, NULL, NULL, 0 }, /* fi */ + { roff_onearg, NULL, NULL, 0 }, /* ft */ + { roff_onearg, NULL, NULL, 0 }, /* ll */ + { roff_onearg, NULL, NULL, 0 }, /* mc */ + { roff_noarg, NULL, NULL, 0 }, /* nf */ + { roff_onearg, NULL, NULL, 0 }, /* po */ + { roff_onearg, NULL, NULL, 0 }, /* rj */ + { roff_onearg, NULL, NULL, 0 }, /* sp */ + { roff_manyarg, NULL, NULL, 0 }, /* ta */ + { roff_onearg, NULL, NULL, 0 }, /* ti */ + { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ + { roff_unsupp, NULL, NULL, 0 }, /* ab */ + { roff_line_ignore, NULL, NULL, 0 }, /* ad */ + { roff_line_ignore, NULL, NULL, 0 }, /* af */ + { roff_unsupp, NULL, NULL, 0 }, /* aln */ + { roff_als, NULL, NULL, 0 }, /* als */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ + { roff_ds, NULL, NULL, 0 }, /* as */ + { roff_ds, NULL, NULL, 0 }, /* as1 */ + { roff_unsupp, NULL, NULL, 0 }, /* asciify */ + { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ + { roff_line_ignore, NULL, NULL, 0 }, /* bd */ + { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ + { roff_unsupp, NULL, NULL, 0 }, /* blm */ + { roff_unsupp, NULL, NULL, 0 }, /* box */ + { roff_unsupp, NULL, NULL, 0 }, /* boxa */ + { roff_line_ignore, NULL, NULL, 0 }, /* bp */ + { roff_unsupp, NULL, NULL, 0 }, /* BP */ + { roff_break, NULL, NULL, 0 }, /* break */ + { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ + { roff_noarg, NULL, NULL, 0 }, /* brp */ + { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ + { roff_unsupp, NULL, NULL, 0 }, /* c2 */ + { roff_cc, NULL, NULL, 0 }, /* cc */ + { roff_insec, NULL, NULL, 0 }, /* cf */ + { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ + { roff_line_ignore, NULL, NULL, 0 }, /* ch */ + { roff_char, NULL, NULL, 0 }, /* char */ + { roff_unsupp, NULL, NULL, 0 }, /* chop */ + { roff_line_ignore, NULL, NULL, 0 }, /* class */ + { roff_insec, NULL, NULL, 0 }, /* close */ + { roff_unsupp, NULL, NULL, 0 }, /* CL */ + { roff_line_ignore, NULL, NULL, 0 }, /* color */ + { roff_unsupp, NULL, NULL, 0 }, /* composite */ + { roff_unsupp, NULL, NULL, 0 }, /* continue */ + { roff_line_ignore, NULL, NULL, 0 }, /* cp */ + { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ + { roff_line_ignore, NULL, NULL, 0 }, /* cs */ + { roff_line_ignore, NULL, NULL, 0 }, /* cu */ + { roff_unsupp, NULL, NULL, 0 }, /* da */ + { roff_unsupp, NULL, NULL, 0 }, /* dch */ + { roff_Dd, NULL, NULL, 0 }, /* Dd */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ + { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ + { roff_unsupp, NULL, NULL, 0 }, /* device */ + { roff_unsupp, NULL, NULL, 0 }, /* devicem */ + { roff_unsupp, NULL, NULL, 0 }, /* di */ + { roff_unsupp, NULL, NULL, 0 }, /* do */ + { roff_ds, NULL, NULL, 0 }, /* ds */ + { roff_ds, NULL, NULL, 0 }, /* ds1 */ + { roff_unsupp, NULL, NULL, 0 }, /* dwh */ + { roff_unsupp, NULL, NULL, 0 }, /* dt */ + { roff_ec, NULL, NULL, 0 }, /* ec */ + { roff_unsupp, NULL, NULL, 0 }, /* ecr */ + { roff_unsupp, NULL, NULL, 0 }, /* ecs */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ + { roff_unsupp, NULL, NULL, 0 }, /* em */ + { roff_EN, NULL, NULL, 0 }, /* EN */ + { roff_eo, NULL, NULL, 0 }, /* eo */ + { roff_unsupp, NULL, NULL, 0 }, /* EP */ + { roff_EQ, NULL, NULL, 0 }, /* EQ */ + { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ + { roff_unsupp, NULL, NULL, 0 }, /* ev */ + { roff_unsupp, NULL, NULL, 0 }, /* evc */ + { roff_unsupp, NULL, NULL, 0 }, /* ex */ + { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ + { roff_line_ignore, NULL, NULL, 0 }, /* fam */ + { roff_unsupp, NULL, NULL, 0 }, /* fc */ + { roff_unsupp, NULL, NULL, 0 }, /* fchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ + { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ + { roff_line_ignore, NULL, NULL, 0 }, /* feature */ + { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ + { roff_line_ignore, NULL, NULL, 0 }, /* fl */ + { roff_line_ignore, NULL, NULL, 0 }, /* flig */ + { roff_line_ignore, NULL, NULL, 0 }, /* fp */ + { roff_line_ignore, NULL, NULL, 0 }, /* fps */ + { roff_unsupp, NULL, NULL, 0 }, /* fschar */ + { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ + { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ + { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ + { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ + { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ + { roff_line_ignore, NULL, NULL, 0 }, /* hc */ + { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ + { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ + { roff_line_ignore, NULL, NULL, 0 }, /* hla */ + { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ + { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ + { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ + { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ + { roff_line_ignore, NULL, NULL, 0 }, /* hw */ + { roff_line_ignore, NULL, NULL, 0 }, /* hy */ + { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ + { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ + { roff_line_ignore, NULL, NULL, 0 }, /* hym */ + { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ + { roff_line_ignore, NULL, NULL, 0 }, /* hys */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ + { roff_unsupp, NULL, NULL, 0 }, /* index */ + { roff_it, NULL, NULL, 0 }, /* it */ + { roff_unsupp, NULL, NULL, 0 }, /* itc */ + { roff_line_ignore, NULL, NULL, 0 }, /* IX */ + { roff_line_ignore, NULL, NULL, 0 }, /* kern */ + { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ + { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ + { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ + { roff_unsupp, NULL, NULL, 0 }, /* lc */ + { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ + { roff_unsupp, NULL, NULL, 0 }, /* lds */ + { roff_unsupp, NULL, NULL, 0 }, /* length */ + { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ + { roff_insec, NULL, NULL, 0 }, /* lf */ + { roff_line_ignore, NULL, NULL, 0 }, /* lg */ + { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ + { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ + { roff_unsupp, NULL, NULL, 0 }, /* lnr */ + { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ + { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ + { roff_line_ignore, NULL, NULL, 0 }, /* ls */ + { roff_unsupp, NULL, NULL, 0 }, /* lsm */ + { roff_line_ignore, NULL, NULL, 0 }, /* lt */ + { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ + { roff_line_ignore, NULL, NULL, 0 }, /* minss */ + { roff_line_ignore, NULL, NULL, 0 }, /* mk */ + { roff_insec, NULL, NULL, 0 }, /* mso */ + { roff_line_ignore, NULL, NULL, 0 }, /* na */ + { roff_line_ignore, NULL, NULL, 0 }, /* ne */ + { roff_line_ignore, NULL, NULL, 0 }, /* nh */ + { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ + { roff_unsupp, NULL, NULL, 0 }, /* nm */ + { roff_unsupp, NULL, NULL, 0 }, /* nn */ + { roff_nop, NULL, NULL, 0 }, /* nop */ + { roff_nr, NULL, NULL, 0 }, /* nr */ + { roff_unsupp, NULL, NULL, 0 }, /* nrf */ + { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ + { roff_line_ignore, NULL, NULL, 0 }, /* ns */ + { roff_insec, NULL, NULL, 0 }, /* nx */ + { roff_insec, NULL, NULL, 0 }, /* open */ + { roff_insec, NULL, NULL, 0 }, /* opena */ + { roff_line_ignore, NULL, NULL, 0 }, /* os */ + { roff_unsupp, NULL, NULL, 0 }, /* output */ + { roff_line_ignore, NULL, NULL, 0 }, /* padj */ + { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ + { roff_line_ignore, NULL, NULL, 0 }, /* pc */ + { roff_line_ignore, NULL, NULL, 0 }, /* pev */ + { roff_insec, NULL, NULL, 0 }, /* pi */ + { roff_unsupp, NULL, NULL, 0 }, /* PI */ + { roff_line_ignore, NULL, NULL, 0 }, /* pl */ + { roff_line_ignore, NULL, NULL, 0 }, /* pm */ + { roff_line_ignore, NULL, NULL, 0 }, /* pn */ + { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ + { roff_line_ignore, NULL, NULL, 0 }, /* ps */ + { roff_unsupp, NULL, NULL, 0 }, /* psbb */ + { roff_unsupp, NULL, NULL, 0 }, /* pshape */ + { roff_insec, NULL, NULL, 0 }, /* pso */ + { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ + { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ + { roff_unsupp, NULL, NULL, 0 }, /* rchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* rd */ + { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ + { roff_return, NULL, NULL, 0 }, /* return */ + { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ + { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ + { roff_rm, NULL, NULL, 0 }, /* rm */ + { roff_rn, NULL, NULL, 0 }, /* rn */ + { roff_unsupp, NULL, NULL, 0 }, /* rnn */ + { roff_rr, NULL, NULL, 0 }, /* rr */ + { roff_line_ignore, NULL, NULL, 0 }, /* rs */ + { roff_line_ignore, NULL, NULL, 0 }, /* rt */ + { roff_unsupp, NULL, NULL, 0 }, /* schar */ + { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* shc */ + { roff_shift, NULL, NULL, 0 }, /* shift */ + { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ + { roff_so, NULL, NULL, 0 }, /* so */ + { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ + { roff_line_ignore, NULL, NULL, 0 }, /* special */ + { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ + { roff_line_ignore, NULL, NULL, 0 }, /* ss */ + { roff_line_ignore, NULL, NULL, 0 }, /* sty */ + { roff_unsupp, NULL, NULL, 0 }, /* substring */ + { roff_line_ignore, NULL, NULL, 0 }, /* sv */ + { roff_insec, NULL, NULL, 0 }, /* sy */ + { roff_T_, NULL, NULL, 0 }, /* T& */ + { roff_unsupp, NULL, NULL, 0 }, /* tc */ + { roff_TE, NULL, NULL, 0 }, /* TE */ + { roff_Dd, NULL, NULL, 0 }, /* TH */ + { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ + { roff_unsupp, NULL, NULL, 0 }, /* tl */ + { roff_line_ignore, NULL, NULL, 0 }, /* tm */ + { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ + { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ + { roff_tr, NULL, NULL, 0 }, /* tr */ + { roff_line_ignore, NULL, NULL, 0 }, /* track */ + { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ + { roff_insec, NULL, NULL, 0 }, /* trf */ + { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ + { roff_unsupp, NULL, NULL, 0 }, /* trin */ + { roff_unsupp, NULL, NULL, 0 }, /* trnt */ + { roff_line_ignore, NULL, NULL, 0 }, /* troff */ + { roff_TS, NULL, NULL, 0 }, /* TS */ + { roff_line_ignore, NULL, NULL, 0 }, /* uf */ + { roff_line_ignore, NULL, NULL, 0 }, /* ul */ + { roff_unsupp, NULL, NULL, 0 }, /* unformat */ + { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ + { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ + { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ + { roff_line_ignore, NULL, NULL, 0 }, /* vs */ + { roff_line_ignore, NULL, NULL, 0 }, /* warn */ + { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ + { roff_line_ignore, NULL, NULL, 0 }, /* watch */ + { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ + { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ + { roff_unsupp, NULL, NULL, 0 }, /* wh */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ + { roff_insec, NULL, NULL, 0 }, /* write */ + { roff_insec, NULL, NULL, 0 }, /* writec */ + { roff_insec, NULL, NULL, 0 }, /* writem */ + { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ + { roff_cblock, NULL, NULL, 0 }, /* . */ + { roff_renamed, NULL, NULL, 0 }, + { roff_userdef, NULL, NULL, 0 } +}; + +/* Array of injected predefined strings. */ +#define PREDEFS_MAX 38 +static const struct predef predefs[PREDEFS_MAX] = { +#include "predefs.in" +}; + +static int roffce_lines; /* number of input lines to center */ +static struct roff_node *roffce_node; /* active request */ +static int roffit_lines; /* number of lines to delay */ +static char *roffit_macro; /* nil-terminated macro line */ + + +/* --- request table ------------------------------------------------------ */ + +struct ohash * +roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) +{ + struct ohash *htab; + struct roffreq *req; + enum roff_tok tok; + size_t sz; + unsigned int slot; + + htab = mandoc_malloc(sizeof(*htab)); + mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); + + for (tok = mintok; tok < maxtok; tok++) { + if (roff_name[tok] == NULL) + continue; + sz = strlen(roff_name[tok]); + req = mandoc_malloc(sizeof(*req) + sz + 1); + req->tok = tok; + memcpy(req->name, roff_name[tok], sz + 1); + slot = ohash_qlookup(htab, req->name); + ohash_insert(htab, slot, req); + } + return htab; +} + +void +roffhash_free(struct ohash *htab) +{ + struct roffreq *req; + unsigned int slot; + + if (htab == NULL) + return; + for (req = ohash_first(htab, &slot); req != NULL; + req = ohash_next(htab, &slot)) + free(req); + ohash_delete(htab); + free(htab); +} + +enum roff_tok +roffhash_find(struct ohash *htab, const char *name, size_t sz) +{ + struct roffreq *req; + const char *end; + + if (sz) { + end = name + sz; + req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); + } else + req = ohash_find(htab, ohash_qlookup(htab, name)); + return req == NULL ? TOKEN_NONE : req->tok; +} + +/* --- stack of request blocks -------------------------------------------- */ + +/* + * Pop the current node off of the stack of roff instructions currently + * pending. Return 1 if it is a loop or 0 otherwise. + */ +static int +roffnode_pop(struct roff *r) +{ + struct roffnode *p; + int inloop; + + p = r->last; + inloop = p->tok == ROFF_while; + r->last = p->parent; + free(p->name); + free(p->end); + free(p); + return inloop; +} + +/* + * Push a roff node onto the instruction stack. This must later be + * removed with roffnode_pop(). + */ +static void +roffnode_push(struct roff *r, enum roff_tok tok, const char *name, + int line, int col) +{ + struct roffnode *p; + + p = mandoc_calloc(1, sizeof(struct roffnode)); + p->tok = tok; + if (name) + p->name = mandoc_strdup(name); + p->parent = r->last; + p->line = line; + p->col = col; + p->rule = p->parent ? p->parent->rule : 0; + + r->last = p; +} + +/* --- roff parser state data management ---------------------------------- */ + +static void +roff_free1(struct roff *r) +{ + int i; + + tbl_free(r->first_tbl); + r->first_tbl = r->last_tbl = r->tbl = NULL; + + eqn_free(r->last_eqn); + r->last_eqn = r->eqn = NULL; + + while (r->mstackpos >= 0) + roff_userret(r); + + while (r->last) + roffnode_pop(r); + + free (r->rstack); + r->rstack = NULL; + r->rstacksz = 0; + r->rstackpos = -1; + + roff_freereg(r->regtab); + r->regtab = NULL; + + roff_freestr(r->strtab); + roff_freestr(r->rentab); + roff_freestr(r->xmbtab); + r->strtab = r->rentab = r->xmbtab = NULL; + + if (r->xtab) + for (i = 0; i < 128; i++) + free(r->xtab[i].p); + free(r->xtab); + r->xtab = NULL; +} + +void +roff_reset(struct roff *r) +{ + roff_free1(r); + r->options |= MPARSE_COMMENT; + r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); + r->control = '\0'; + r->escape = '\\'; + roffce_lines = 0; + roffce_node = NULL; + roffit_lines = 0; + roffit_macro = NULL; +} + +void +roff_free(struct roff *r) +{ + int i; + + roff_free1(r); + for (i = 0; i < r->mstacksz; i++) + free(r->mstack[i].argv); + free(r->mstack); + roffhash_free(r->reqtab); + free(r); +} + +struct roff * +roff_alloc(int options) +{ + struct roff *r; + + r = mandoc_calloc(1, sizeof(struct roff)); + r->reqtab = roffhash_alloc(0, ROFF_RENAMED); + r->options = options | MPARSE_COMMENT; + r->format = options & (MPARSE_MDOC | MPARSE_MAN); + r->mstackpos = -1; + r->rstackpos = -1; + r->escape = '\\'; + return r; +} + +/* --- syntax tree state data management ---------------------------------- */ + +static void +roff_man_free1(struct roff_man *man) +{ + if (man->meta.first != NULL) + roff_node_delete(man, man->meta.first); + free(man->meta.msec); + free(man->meta.vol); + free(man->meta.os); + free(man->meta.arch); + free(man->meta.title); + free(man->meta.name); + free(man->meta.date); + free(man->meta.sodest); +} + +void +roff_state_reset(struct roff_man *man) +{ + man->last = man->meta.first; + man->last_es = NULL; + man->flags = 0; + man->lastsec = man->lastnamed = SEC_NONE; + man->next = ROFF_NEXT_CHILD; + roff_setreg(man->roff, "nS", 0, '='); +} + +static void +roff_man_alloc1(struct roff_man *man) +{ + memset(&man->meta, 0, sizeof(man->meta)); + man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); + man->meta.first->type = ROFFT_ROOT; + man->meta.macroset = MACROSET_NONE; + roff_state_reset(man); +} + +void +roff_man_reset(struct roff_man *man) +{ + roff_man_free1(man); + roff_man_alloc1(man); +} + +void +roff_man_free(struct roff_man *man) +{ + roff_man_free1(man); + free(man); +} + +struct roff_man * +roff_man_alloc(struct roff *roff, const char *os_s, int quick) +{ + struct roff_man *man; + + man = mandoc_calloc(1, sizeof(*man)); + man->roff = roff; + man->os_s = os_s; + man->quick = quick; + roff_man_alloc1(man); + roff->man = man; + return man; +} + +/* --- syntax tree handling ----------------------------------------------- */ + +struct roff_node * +roff_node_alloc(struct roff_man *man, int line, int pos, + enum roff_type type, int tok) +{ + struct roff_node *n; + + n = mandoc_calloc(1, sizeof(*n)); + n->line = line; + n->pos = pos; + n->tok = tok; + n->type = type; + n->sec = man->lastsec; + + if (man->flags & MDOC_SYNOPSIS) + n->flags |= NODE_SYNPRETTY; + else + n->flags &= ~NODE_SYNPRETTY; + if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) + n->flags |= NODE_NOFILL; + else + n->flags &= ~NODE_NOFILL; + if (man->flags & MDOC_NEWLINE) + n->flags |= NODE_LINE; + man->flags &= ~MDOC_NEWLINE; + + return n; +} + +void +roff_node_append(struct roff_man *man, struct roff_node *n) +{ + + switch (man->next) { + case ROFF_NEXT_SIBLING: + if (man->last->next != NULL) { + n->next = man->last->next; + man->last->next->prev = n; + } else + man->last->parent->last = n; + man->last->next = n; + n->prev = man->last; + n->parent = man->last->parent; + break; + case ROFF_NEXT_CHILD: + if (man->last->child != NULL) { + n->next = man->last->child; + man->last->child->prev = n; + } else + man->last->last = n; + man->last->child = n; + n->parent = man->last; + break; + default: + abort(); + } + man->last = n; + + switch (n->type) { + case ROFFT_HEAD: + n->parent->head = n; + break; + case ROFFT_BODY: + if (n->end != ENDBODY_NOT) + return; + n->parent->body = n; + break; + case ROFFT_TAIL: + n->parent->tail = n; + break; + default: + return; + } + + /* + * Copy over the normalised-data pointer of our parent. Not + * everybody has one, but copying a null pointer is fine. + */ + + n->norm = n->parent->norm; + assert(n->parent->type == ROFFT_BLOCK); +} + +void +roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); + n->string = roff_strdup(man->roff, word); + roff_node_append(man, n); + n->flags |= NODE_VALID | NODE_ENDED; + man->next = ROFF_NEXT_SIBLING; +} + +void +roff_word_append(struct roff_man *man, const char *word) +{ + struct roff_node *n; + char *addstr, *newstr; + + n = man->last; + addstr = roff_strdup(man->roff, word); + mandoc_asprintf(&newstr, "%s %s", n->string, addstr); + free(addstr); + free(n->string); + n->string = newstr; + man->next = ROFF_NEXT_SIBLING; +} + +void +roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; +} + +struct roff_node * +roff_block_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; + return n; +} + +struct roff_node * +roff_head_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; + return n; +} + +struct roff_node * +roff_body_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; + return n; +} + +static void +roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) +{ + struct roff_node *n; + struct tbl_span *span; + + if (man->meta.macroset == MACROSET_MAN) + man_breakscope(man, ROFF_TS); + while ((span = tbl_span(tbl)) != NULL) { + n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); + n->span = span; + roff_node_append(man, n); + n->flags |= NODE_VALID | NODE_ENDED; + man->next = ROFF_NEXT_SIBLING; + } +} + +void +roff_node_unlink(struct roff_man *man, struct roff_node *n) +{ + + /* Adjust siblings. */ + + if (n->prev) + n->prev->next = n->next; + if (n->next) + n->next->prev = n->prev; + + /* Adjust parent. */ + + if (n->parent != NULL) { + if (n->parent->child == n) + n->parent->child = n->next; + if (n->parent->last == n) + n->parent->last = n->prev; + } + + /* Adjust parse point. */ + + if (man == NULL) + return; + if (man->last == n) { + if (n->prev == NULL) { + man->last = n->parent; + man->next = ROFF_NEXT_CHILD; + } else { + man->last = n->prev; + man->next = ROFF_NEXT_SIBLING; + } + } + if (man->meta.first == n) + man->meta.first = NULL; +} + +void +roff_node_relink(struct roff_man *man, struct roff_node *n) +{ + roff_node_unlink(man, n); + n->prev = n->next = NULL; + roff_node_append(man, n); +} + +void +roff_node_free(struct roff_node *n) +{ + + if (n->args != NULL) + mdoc_argv_free(n->args); + if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) + free(n->norm); + eqn_box_free(n->eqn); + free(n->string); + free(n->tag); + free(n); +} + +void +roff_node_delete(struct roff_man *man, struct roff_node *n) +{ + + while (n->child != NULL) + roff_node_delete(man, n->child); + roff_node_unlink(man, n); + roff_node_free(n); +} + +int +roff_node_transparent(struct roff_node *n) +{ + if (n == NULL) + return 0; + if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) + return 1; + return roff_tok_transparent(n->tok); +} + +int +roff_tok_transparent(enum roff_tok tok) +{ + switch (tok) { + case ROFF_ft: + case ROFF_ll: + case ROFF_mc: + case ROFF_po: + case ROFF_ta: + case MDOC_Db: + case MDOC_Es: + case MDOC_Sm: + case MDOC_Tg: + case MAN_DT: + case MAN_UC: + case MAN_PD: + case MAN_AT: + return 1; + default: + return 0; + } +} + +struct roff_node * +roff_node_child(struct roff_node *n) +{ + for (n = n->child; roff_node_transparent(n); n = n->next) + continue; + return n; +} + +struct roff_node * +roff_node_prev(struct roff_node *n) +{ + do { + n = n->prev; + } while (roff_node_transparent(n)); + return n; +} + +struct roff_node * +roff_node_next(struct roff_node *n) +{ + do { + n = n->next; + } while (roff_node_transparent(n)); + return n; +} + +void +deroff(char **dest, const struct roff_node *n) +{ + char *cp; + size_t sz; + + if (n->string == NULL) { + for (n = n->child; n != NULL; n = n->next) + deroff(dest, n); + return; + } + + /* Skip leading whitespace. */ + + for (cp = n->string; *cp != '\0'; cp++) { + if (cp[0] == '\\' && cp[1] != '\0' && + strchr(" %&0^|~", cp[1]) != NULL) + cp++; + else if ( ! isspace((unsigned char)*cp)) + break; + } + + /* Skip trailing backslash. */ + + sz = strlen(cp); + if (sz > 0 && cp[sz - 1] == '\\') + sz--; + + /* Skip trailing whitespace. */ + + for (; sz; sz--) + if ( ! isspace((unsigned char)cp[sz-1])) + break; + + /* Skip empty strings. */ + + if (sz == 0) + return; + + if (*dest == NULL) { + *dest = mandoc_strndup(cp, sz); + return; + } + + mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); + free(*dest); + *dest = cp; +} + +/* --- main functions of the roff parser ---------------------------------- */ + +/* + * In the current line, expand escape sequences that produce parsable + * input text. Also check the syntax of the remaining escape sequences, + * which typically produce output glyphs or change formatter state. + */ +static int +roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) +{ + struct mctx *ctx; /* current macro call context */ + char ubuf[24]; /* buffer to print the number */ + struct roff_node *n; /* used for header comments */ + const char *start; /* start of the string to process */ + char *stesc; /* start of an escape sequence ('\\') */ + const char *esct; /* type of esccape sequence */ + char *ep; /* end of comment string */ + const char *stnam; /* start of the name, after "[(*" */ + const char *cp; /* end of the name, e.g. before ']' */ + const char *res; /* the string to be substituted */ + char *nbuf; /* new buffer to copy buf->buf to */ + size_t maxl; /* expected length of the escape name */ + size_t naml; /* actual length of the escape name */ + size_t asz; /* length of the replacement */ + size_t rsz; /* length of the rest of the string */ + int inaml; /* length returned from mandoc_escape() */ + int expand_count; /* to avoid infinite loops */ + int npos; /* position in numeric expression */ + int arg_complete; /* argument not interrupted by eol */ + int quote_args; /* true for \\$@, false for \\$* */ + int done; /* no more input available */ + int deftype; /* type of definition to paste */ + int rcsid; /* kind of RCS id seen */ + enum mandocerr err; /* for escape sequence problems */ + char sign; /* increment number register */ + char term; /* character terminating the escape */ + + /* Search forward for comments. */ + + done = 0; + start = buf->buf + pos; + for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { + if (stesc[0] != newesc || stesc[1] == '\0') + continue; + stesc++; + if (*stesc != '"' && *stesc != '#') + continue; + + /* Comment found, look for RCS id. */ + + rcsid = 0; + if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { + rcsid = 1 << MANDOC_OS_OPENBSD; + cp += 8; + } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { + rcsid = 1 << MANDOC_OS_NETBSD; + cp += 7; + } + if (cp != NULL && + isalnum((unsigned char)*cp) == 0 && + strchr(cp, '$') != NULL) { + if (r->man->meta.rcsids & rcsid) + mandoc_msg(MANDOCERR_RCS_REP, ln, + (int)(stesc - buf->buf) + 1, + "%s", stesc + 1); + r->man->meta.rcsids |= rcsid; + } + + /* Handle trailing whitespace. */ + + ep = strchr(stesc--, '\0') - 1; + if (*ep == '\n') { + done = 1; + ep--; + } + if (*ep == ' ' || *ep == '\t') + mandoc_msg(MANDOCERR_SPACE_EOL, + ln, (int)(ep - buf->buf), NULL); + + /* + * Save comments preceding the title macro + * in the syntax tree. + */ + + if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { + while (*ep == ' ' || *ep == '\t') + ep--; + ep[1] = '\0'; + n = roff_node_alloc(r->man, + ln, stesc + 1 - buf->buf, + ROFFT_COMMENT, TOKEN_NONE); + n->string = mandoc_strdup(stesc + 2); + roff_node_append(r->man, n); + n->flags |= NODE_VALID | NODE_ENDED; + r->man->next = ROFF_NEXT_SIBLING; + } + + /* Line continuation with comment. */ + + if (stesc[1] == '#') { + *stesc = '\0'; + return ROFF_IGN | ROFF_APPEND; + } + + /* Discard normal comments. */ + + while (stesc > start && stesc[-1] == ' ' && + (stesc == start + 1 || stesc[-2] != '\\')) + stesc--; + *stesc = '\0'; + break; + } + if (stesc == start) + return ROFF_CONT; + stesc--; + + /* Notice the end of the input. */ + + if (*stesc == '\n') { + *stesc-- = '\0'; + done = 1; + } + + expand_count = 0; + while (stesc >= start) { + if (*stesc != newesc) { + + /* + * If we have a non-standard escape character, + * escape literal backslashes because all + * processing in subsequent functions uses + * the standard escaping rules. + */ + + if (newesc != ASCII_ESC && *stesc == '\\') { + *stesc = '\0'; + buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", + buf->buf, stesc + 1) + 1; + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf); + free(buf->buf); + buf->buf = nbuf; + } + + /* Search backwards for the next escape. */ + + stesc--; + continue; + } + + /* If it is escaped, skip it. */ + + for (cp = stesc - 1; cp >= start; cp--) + if (*cp != r->escape) + break; + + if ((stesc - cp) % 2 == 0) { + while (stesc > cp) + *stesc-- = '\\'; + continue; + } else if (stesc[1] != '\0') { + *stesc = '\\'; + } else { + *stesc-- = '\0'; + if (done) + continue; + else + return ROFF_IGN | ROFF_APPEND; + } + + /* Decide whether to expand or to check only. */ + + term = '\0'; + cp = stesc + 1; + if (*cp == 'E') + cp++; + esct = cp; + switch (*esct) { + case '*': + case '$': + res = NULL; + break; + case 'B': + case 'w': + term = cp[1]; + /* FALLTHROUGH */ + case 'n': + sign = cp[1]; + if (sign == '+' || sign == '-') + cp++; + res = ubuf; + break; + default: + err = MANDOCERR_OK; + switch(mandoc_escape(&cp, &stnam, &inaml)) { + case ESCAPE_SPECIAL: + if (mchars_spec2cp(stnam, inaml) >= 0) + break; + /* FALLTHROUGH */ + case ESCAPE_ERROR: + err = MANDOCERR_ESC_BAD; + break; + case ESCAPE_UNDEF: + err = MANDOCERR_ESC_UNDEF; + break; + case ESCAPE_UNSUPP: + err = MANDOCERR_ESC_UNSUPP; + break; + default: + break; + } + if (err != MANDOCERR_OK) + mandoc_msg(err, ln, (int)(stesc - buf->buf), + "%.*s", (int)(cp - stesc), stesc); + stesc--; + continue; + } + + if (EXPAND_LIMIT < ++expand_count) { + mandoc_msg(MANDOCERR_ROFFLOOP, + ln, (int)(stesc - buf->buf), NULL); + return ROFF_IGN; + } + + /* + * The third character decides the length + * of the name of the string or register. + * Save a pointer to the name. + */ + + if (term == '\0') { + switch (*++cp) { + case '\0': + maxl = 0; + break; + case '(': + cp++; + maxl = 2; + break; + case '[': + cp++; + term = ']'; + maxl = 0; + break; + default: + maxl = 1; + break; + } + } else { + cp += 2; + maxl = 0; + } + stnam = cp; + + /* Advance to the end of the name. */ + + naml = 0; + arg_complete = 1; + while (maxl == 0 || naml < maxl) { + if (*cp == '\0') { + mandoc_msg(MANDOCERR_ESC_BAD, ln, + (int)(stesc - buf->buf), "%s", stesc); + arg_complete = 0; + break; + } + if (maxl == 0 && *cp == term) { + cp++; + break; + } + if (*cp++ != '\\' || *esct != 'w') { + naml++; + continue; + } + switch (mandoc_escape(&cp, NULL, NULL)) { + case ESCAPE_SPECIAL: + case ESCAPE_UNICODE: + case ESCAPE_NUMBERED: + case ESCAPE_UNDEF: + case ESCAPE_OVERSTRIKE: + naml++; + break; + default: + break; + } + } + + /* + * Retrieve the replacement string; if it is + * undefined, resume searching for escapes. + */ + + switch (*esct) { + case '*': + if (arg_complete) { + deftype = ROFFDEF_USER | ROFFDEF_PRE; + res = roff_getstrn(r, stnam, naml, &deftype); + + /* + * If not overriden, let \*(.T + * through to the formatters. + */ + + if (res == NULL && naml == 2 && + stnam[0] == '.' && stnam[1] == 'T') { + roff_setstrn(&r->strtab, + ".T", 2, NULL, 0, 0); + stesc--; + continue; + } + } + break; + case '$': + if (r->mstackpos < 0) { + mandoc_msg(MANDOCERR_ARG_UNDEF, ln, + (int)(stesc - buf->buf), "%.3s", stesc); + break; + } + ctx = r->mstack + r->mstackpos; + npos = esct[1] - '1'; + if (npos >= 0 && npos <= 8) { + res = npos < ctx->argc ? + ctx->argv[npos] : ""; + break; + } + if (esct[1] == '*') + quote_args = 0; + else if (esct[1] == '@') + quote_args = 1; + else { + mandoc_msg(MANDOCERR_ARG_NONUM, ln, + (int)(stesc - buf->buf), "%.3s", stesc); + break; + } + asz = 0; + for (npos = 0; npos < ctx->argc; npos++) { + if (npos) + asz++; /* blank */ + if (quote_args) + asz += 2; /* quotes */ + asz += strlen(ctx->argv[npos]); + } + if (asz != 3) { + rsz = buf->sz - (stesc - buf->buf) - 3; + if (asz < 3) + memmove(stesc + asz, stesc + 3, rsz); + buf->sz += asz - 3; + nbuf = mandoc_realloc(buf->buf, buf->sz); + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf); + buf->buf = nbuf; + if (asz > 3) + memmove(stesc + asz, stesc + 3, rsz); + } + for (npos = 0; npos < ctx->argc; npos++) { + if (npos) + *stesc++ = ' '; + if (quote_args) + *stesc++ = '"'; + cp = ctx->argv[npos]; + while (*cp != '\0') + *stesc++ = *cp++; + if (quote_args) + *stesc++ = '"'; + } + continue; + case 'B': + npos = 0; + ubuf[0] = arg_complete && + roff_evalnum(r, ln, stnam, &npos, + NULL, ROFFNUM_SCALE) && + stnam + npos + 1 == cp ? '1' : '0'; + ubuf[1] = '\0'; + break; + case 'n': + if (arg_complete) + (void)snprintf(ubuf, sizeof(ubuf), "%d", + roff_getregn(r, stnam, naml, sign)); + else + ubuf[0] = '\0'; + break; + case 'w': + /* use even incomplete args */ + (void)snprintf(ubuf, sizeof(ubuf), "%d", + 24 * (int)naml); + break; + } + + if (res == NULL) { + if (*esct == '*') + mandoc_msg(MANDOCERR_STR_UNDEF, + ln, (int)(stesc - buf->buf), + "%.*s", (int)naml, stnam); + res = ""; + } else if (buf->sz + strlen(res) > SHRT_MAX) { + mandoc_msg(MANDOCERR_ROFFLOOP, + ln, (int)(stesc - buf->buf), NULL); + return ROFF_IGN; + } + + /* Replace the escape sequence by the string. */ + + *stesc = '\0'; + buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", + buf->buf, res, cp) + 1; + + /* Prepare for the next replacement. */ + + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf) + strlen(res); + free(buf->buf); + buf->buf = nbuf; + } + return ROFF_CONT; +} + +/* + * Parse a quoted or unquoted roff-style request or macro argument. + * Return a pointer to the parsed argument, which is either the original + * pointer or advanced by one byte in case the argument is quoted. + * NUL-terminate the argument in place. + * Collapse pairs of quotes inside quoted arguments. + * Advance the argument pointer to the next argument, + * or to the NUL byte terminating the argument line. + */ +char * +roff_getarg(struct roff *r, char **cpp, int ln, int *pos) +{ + struct buf buf; + char *cp, *start; + int newesc, pairs, quoted, white; + + /* Quoting can only start with a new word. */ + start = *cpp; + quoted = 0; + if ('"' == *start) { + quoted = 1; + start++; + } + + newesc = pairs = white = 0; + for (cp = start; '\0' != *cp; cp++) { + + /* + * Move the following text left + * after quoted quotes and after "\\" and "\t". + */ + if (pairs) + cp[-pairs] = cp[0]; + + if ('\\' == cp[0]) { + /* + * In copy mode, translate double to single + * backslashes and backslash-t to literal tabs. + */ + switch (cp[1]) { + case 'a': + case 't': + cp[-pairs] = '\t'; + pairs++; + cp++; + break; + case '\\': + newesc = 1; + cp[-pairs] = ASCII_ESC; + pairs++; + cp++; + break; + case ' ': + /* Skip escaped blanks. */ + if (0 == quoted) + cp++; + break; + default: + break; + } + } else if (0 == quoted) { + if (' ' == cp[0]) { + /* Unescaped blanks end unquoted args. */ + white = 1; + break; + } + } else if ('"' == cp[0]) { + if ('"' == cp[1]) { + /* Quoted quotes collapse. */ + pairs++; + cp++; + } else { + /* Unquoted quotes end quoted args. */ + quoted = 2; + break; + } + } + } + + /* Quoted argument without a closing quote. */ + if (1 == quoted) + mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); + + /* NUL-terminate this argument and move to the next one. */ + if (pairs) + cp[-pairs] = '\0'; + if ('\0' != *cp) { + *cp++ = '\0'; + while (' ' == *cp) + cp++; + } + *pos += (int)(cp - start) + (quoted ? 1 : 0); + *cpp = cp; + + if ('\0' == *cp && (white || ' ' == cp[-1])) + mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); + + start = mandoc_strdup(start); + if (newesc == 0) + return start; + + buf.buf = start; + buf.sz = strlen(start) + 1; + buf.next = NULL; + if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { + free(buf.buf); + buf.buf = mandoc_strdup(""); + } + return buf.buf; +} + + +/* + * Process text streams. + */ +static int +roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) +{ + size_t sz; + const char *start; + char *p; + int isz; + enum mandoc_esc esc; + + /* Spring the input line trap. */ + + if (roffit_lines == 1) { + isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); + free(buf->buf); + buf->buf = p; + buf->sz = isz + 1; + *offs = 0; + free(roffit_macro); + roffit_lines = 0; + return ROFF_REPARSE; + } else if (roffit_lines > 1) + --roffit_lines; + + if (roffce_node != NULL && buf->buf[pos] != '\0') { + if (roffce_lines < 1) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; + } else + roffce_lines--; + } + + /* Convert all breakable hyphens into ASCII_HYPH. */ + + start = p = buf->buf + pos; + + while (*p != '\0') { + sz = strcspn(p, "-\\"); + p += sz; + + if (*p == '\0') + break; + + if (*p == '\\') { + /* Skip over escapes. */ + p++; + esc = mandoc_escape((const char **)&p, NULL, NULL); + if (esc == ESCAPE_ERROR) + break; + while (*p == '-') + p++; + continue; + } else if (p == start) { + p++; + continue; + } + + if (isalpha((unsigned char)p[-1]) && + isalpha((unsigned char)p[1])) + *p = ASCII_HYPH; + p++; + } + return ROFF_CONT; +} + +int +roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) +{ + enum roff_tok t; + int e; + int pos; /* parse point */ + int spos; /* saved parse point for messages */ + int ppos; /* original offset in buf->buf */ + int ctl; /* macro line (boolean) */ + + ppos = pos = *offs; + + /* Handle in-line equation delimiters. */ + + if (r->tbl == NULL && + r->last_eqn != NULL && r->last_eqn->delim && + (r->eqn == NULL || r->eqn_inline)) { + e = roff_eqndelim(r, buf, pos); + if (e == ROFF_REPARSE) + return e; + assert(e == ROFF_CONT); + } + + /* Expand some escape sequences. */ + + e = roff_expand(r, buf, ln, pos, r->escape); + if ((e & ROFF_MASK) == ROFF_IGN) + return e; + assert(e == ROFF_CONT); + + ctl = roff_getcontrol(r, buf->buf, &pos); + + /* + * First, if a scope is open and we're not a macro, pass the + * text through the macro's filter. + * Equations process all content themselves. + * Tables process almost all content themselves, but we want + * to warn about macros before passing it there. + */ + + if (r->last != NULL && ! ctl) { + t = r->last->tok; + e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); + if ((e & ROFF_MASK) == ROFF_IGN) + return e; + e &= ~ROFF_MASK; + } else + e = ROFF_IGN; + if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { + eqn_read(r->eqn, buf->buf + ppos); + return e; + } + if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { + tbl_read(r->tbl, ln, buf->buf, ppos); + roff_addtbl(r->man, ln, r->tbl); + return e; + } + if ( ! ctl) { + r->options &= ~MPARSE_COMMENT; + return roff_parsetext(r, buf, pos, offs) | e; + } + + /* Skip empty request lines. */ + + if (buf->buf[pos] == '"') { + mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); + return ROFF_IGN; + } else if (buf->buf[pos] == '\0') + return ROFF_IGN; + + /* + * If a scope is open, go to the child handler for that macro, + * as it may want to preprocess before doing anything with it. + * Don't do so if an equation is open. + */ + + if (r->last) { + t = r->last->tok; + return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); + } + + /* No scope is open. This is a new request or macro. */ + + r->options &= ~MPARSE_COMMENT; + spos = pos; + t = roff_parse(r, buf->buf, &pos, ln, ppos); + + /* Tables ignore most macros. */ + + if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || + t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { + mandoc_msg(MANDOCERR_TBLMACRO, + ln, pos, "%s", buf->buf + spos); + if (t != TOKEN_NONE) + return ROFF_IGN; + while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') + pos++; + while (buf->buf[pos] == ' ') + pos++; + tbl_read(r->tbl, ln, buf->buf, pos); + roff_addtbl(r->man, ln, r->tbl); + return ROFF_IGN; + } + + /* For now, let high level macros abort .ce mode. */ + + if (ctl && roffce_node != NULL && + (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || + t == ROFF_TH || t == ROFF_TS)) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; + } + + /* + * This is neither a roff request nor a user-defined macro. + * Let the standard macro set parsers handle it. + */ + + if (t == TOKEN_NONE) + return ROFF_CONT; + + /* Execute a roff request or a user defined macro. */ + + return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); +} + +/* + * Internal interface function to tell the roff parser that execution + * of the current macro ended. This is required because macro + * definitions usually do not end with a .return request. + */ +void +roff_userret(struct roff *r) +{ + struct mctx *ctx; + int i; + + assert(r->mstackpos >= 0); + ctx = r->mstack + r->mstackpos; + for (i = 0; i < ctx->argc; i++) + free(ctx->argv[i]); + ctx->argc = 0; + r->mstackpos--; +} + +void +roff_endparse(struct roff *r) +{ + if (r->last != NULL) + mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, + r->last->col, "%s", roff_name[r->last->tok]); + + if (r->eqn != NULL) { + mandoc_msg(MANDOCERR_BLK_NOEND, + r->eqn->node->line, r->eqn->node->pos, "EQ"); + eqn_parse(r->eqn); + r->eqn = NULL; + } + + if (r->tbl != NULL) { + tbl_end(r->tbl, 1); + r->tbl = NULL; + } +} + +/* + * Parse a roff node's type from the input buffer. This must be in the + * form of ".foo xxx" in the usual way. + */ +static enum roff_tok +roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) +{ + char *cp; + const char *mac; + size_t maclen; + int deftype; + enum roff_tok t; + + cp = buf + *pos; + + if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) + return TOKEN_NONE; + + mac = cp; + maclen = roff_getname(r, &cp, ln, ppos); + + deftype = ROFFDEF_USER | ROFFDEF_REN; + r->current_string = roff_getstrn(r, mac, maclen, &deftype); + switch (deftype) { + case ROFFDEF_USER: + t = ROFF_USERDEF; + break; + case ROFFDEF_REN: + t = ROFF_RENAMED; + break; + default: + t = roffhash_find(r->reqtab, mac, maclen); + break; + } + if (t != TOKEN_NONE) + *pos = cp - buf; + else if (deftype == ROFFDEF_UNDEF) { + /* Using an undefined macro defines it to be empty. */ + roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); + roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); + } + return t; +} + +/* --- handling of request blocks ----------------------------------------- */ + +static int +roff_cblock(ROFF_ARGS) +{ + + /* + * A block-close `..' should only be invoked as a child of an + * ignore macro, otherwise raise a warning and just ignore it. + */ + + if (r->last == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); + return ROFF_IGN; + } + + switch (r->last->tok) { + case ROFF_am: + /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ + case ROFF_ami: + case ROFF_de: + /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ + case ROFF_dei: + case ROFF_ig: + break; + default: + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); + return ROFF_IGN; + } + + if (buf->buf[pos] != '\0') + mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, + ".. %s", buf->buf + pos); + + roffnode_pop(r); + roffnode_cleanscope(r); + return ROFF_IGN; + +} + +/* + * Pop all nodes ending at the end of the current input line. + * Return the number of loops ended. + */ +static int +roffnode_cleanscope(struct roff *r) +{ + int inloop; + + inloop = 0; + while (r->last != NULL) { + if (--r->last->endspan != 0) + break; + inloop += roffnode_pop(r); + } + return inloop; +} + +/* + * Handle the closing \} of a conditional block. + * Apart from generating warnings, this only pops nodes. + * Return the number of loops ended. + */ +static int +roff_ccond(struct roff *r, int ln, int ppos) +{ + if (NULL == r->last) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); + return 0; + } + + switch (r->last->tok) { + case ROFF_el: + case ROFF_ie: + case ROFF_if: + case ROFF_while: + break; + default: + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); + return 0; + } + + if (r->last->endspan > -1) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); + return 0; + } + + return roffnode_pop(r) + roffnode_cleanscope(r); +} + +static int +roff_block(ROFF_ARGS) +{ + const char *name, *value; + char *call, *cp, *iname, *rname; + size_t csz, namesz, rsz; + int deftype; + + /* Ignore groff compatibility mode for now. */ + + if (tok == ROFF_de1) + tok = ROFF_de; + else if (tok == ROFF_dei1) + tok = ROFF_dei; + else if (tok == ROFF_am1) + tok = ROFF_am; + else if (tok == ROFF_ami1) + tok = ROFF_ami; + + /* Parse the macro name argument. */ + + cp = buf->buf + pos; + if (tok == ROFF_ig) { + iname = NULL; + namesz = 0; + } else { + iname = cp; + namesz = roff_getname(r, &cp, ln, ppos); + iname[namesz] = '\0'; + } + + /* Resolve the macro name argument if it is indirect. */ + + if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { + deftype = ROFFDEF_USER; + name = roff_getstrn(r, iname, namesz, &deftype); + if (name == NULL) { + mandoc_msg(MANDOCERR_STR_UNDEF, + ln, (int)(iname - buf->buf), + "%.*s", (int)namesz, iname); + namesz = 0; + } else + namesz = strlen(name); + } else + name = iname; + + if (namesz == 0 && tok != ROFF_ig) { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ln, ppos, "%s", roff_name[tok]); + return ROFF_IGN; + } + + roffnode_push(r, tok, name, ln, ppos); + + /* + * At the beginning of a `de' macro, clear the existing string + * with the same name, if there is one. New content will be + * appended from roff_block_text() in multiline mode. + */ + + if (tok == ROFF_de || tok == ROFF_dei) { + roff_setstrn(&r->strtab, name, namesz, "", 0, 0); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); + } else if (tok == ROFF_am || tok == ROFF_ami) { + deftype = ROFFDEF_ANY; + value = roff_getstrn(r, iname, namesz, &deftype); + switch (deftype) { /* Before appending, ... */ + case ROFFDEF_PRE: /* copy predefined to user-defined. */ + roff_setstrn(&r->strtab, name, namesz, + value, strlen(value), 0); + break; + case ROFFDEF_REN: /* call original standard macro. */ + csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", + (int)strlen(value), value); + roff_setstrn(&r->strtab, name, namesz, call, csz, 0); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); + free(call); + break; + case ROFFDEF_STD: /* rename and call standard macro. */ + rsz = mandoc_asprintf(&rname, "__%s_renamed", name); + roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); + csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", + (int)rsz, rname); + roff_setstrn(&r->strtab, name, namesz, call, csz, 0); + free(call); + free(rname); + break; + default: + break; + } + } + + if (*cp == '\0') + return ROFF_IGN; + + /* Get the custom end marker. */ + + iname = cp; + namesz = roff_getname(r, &cp, ln, ppos); + + /* Resolve the end marker if it is indirect. */ + + if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { + deftype = ROFFDEF_USER; + name = roff_getstrn(r, iname, namesz, &deftype); + if (name == NULL) { + mandoc_msg(MANDOCERR_STR_UNDEF, + ln, (int)(iname - buf->buf), + "%.*s", (int)namesz, iname); + namesz = 0; + } else + namesz = strlen(name); + } else + name = iname; + + if (namesz) + r->last->end = mandoc_strndup(name, namesz); + + if (*cp != '\0') + mandoc_msg(MANDOCERR_ARG_EXCESS, + ln, pos, ".%s ... %s", roff_name[tok], cp); + + return ROFF_IGN; +} + +static int +roff_block_sub(ROFF_ARGS) +{ + enum roff_tok t; + int i, j; + + /* + * First check whether a custom macro exists at this level. If + * it does, then check against it. This is some of groff's + * stranger behaviours. If we encountered a custom end-scope + * tag and that tag also happens to be a "real" macro, then we + * need to try interpreting it again as a real macro. If it's + * not, then return ignore. Else continue. + */ + + if (r->last->end) { + for (i = pos, j = 0; r->last->end[j]; j++, i++) + if (buf->buf[i] != r->last->end[j]) + break; + + if (r->last->end[j] == '\0' && + (buf->buf[i] == '\0' || + buf->buf[i] == ' ' || + buf->buf[i] == '\t')) { + roffnode_pop(r); + roffnode_cleanscope(r); + + while (buf->buf[i] == ' ' || buf->buf[i] == '\t') + i++; + + pos = i; + if (roff_parse(r, buf->buf, &pos, ln, ppos) != + TOKEN_NONE) + return ROFF_RERUN; + return ROFF_IGN; + } + } + + /* + * If we have no custom end-query or lookup failed, then try + * pulling it out of the hashtable. + */ + + t = roff_parse(r, buf->buf, &pos, ln, ppos); + + if (t != ROFF_cblock) { + if (tok != ROFF_ig) + roff_setstr(r, r->last->name, buf->buf + ppos, 2); + return ROFF_IGN; + } + + return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); +} + +static int +roff_block_text(ROFF_ARGS) +{ + + if (tok != ROFF_ig) + roff_setstr(r, r->last->name, buf->buf + pos, 2); + + return ROFF_IGN; +} + +static int +roff_cond_sub(ROFF_ARGS) +{ + struct roffnode *bl; + char *ep; + int endloop, irc, rr; + enum roff_tok t; + + irc = ROFF_IGN; + rr = r->last->rule; + endloop = tok != ROFF_while ? ROFF_IGN : + rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; + if (roffnode_cleanscope(r)) + irc |= endloop; + + /* + * If `\}' occurs on a macro line without a preceding macro, + * drop the line completely. + */ + + ep = buf->buf + pos; + if (ep[0] == '\\' && ep[1] == '}') + rr = 0; + + /* + * The closing delimiter `\}' rewinds the conditional scope + * but is otherwise ignored when interpreting the line. + */ + + while ((ep = strchr(ep, '\\')) != NULL) { + switch (ep[1]) { + case '}': + memmove(ep, ep + 2, strlen(ep + 2) + 1); + if (roff_ccond(r, ln, ep - buf->buf)) + irc |= endloop; + break; + case '\0': + ++ep; + break; + default: + ep += 2; + break; + } + } + + t = roff_parse(r, buf->buf, &pos, ln, ppos); + + /* For now, let high level macros abort .ce mode. */ + + if (roffce_node != NULL && + (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || + t == ROFF_TH || t == ROFF_TS)) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; + } + + /* + * Fully handle known macros when they are structurally + * required or when the conditional evaluated to true. + */ + + if (t == ROFF_break) { + if (irc & ROFF_LOOPMASK) + irc = ROFF_IGN | ROFF_LOOPEXIT; + else if (rr) { + for (bl = r->last; bl != NULL; bl = bl->parent) { + bl->rule = 0; + if (bl->tok == ROFF_while) + break; + } + } + } else if (t != TOKEN_NONE && + (rr || roffs[t].flags & ROFFMAC_STRUCT)) + irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); + else + irc |= rr ? ROFF_CONT : ROFF_IGN; + return irc; +} + +static int +roff_cond_text(ROFF_ARGS) +{ + char *ep; + int endloop, irc, rr; + + irc = ROFF_IGN; + rr = r->last->rule; + endloop = tok != ROFF_while ? ROFF_IGN : + rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; + if (roffnode_cleanscope(r)) + irc |= endloop; + + /* + * If `\}' occurs on a text line with neither preceding + * nor following characters, drop the line completely. + */ + + ep = buf->buf + pos; + if (strcmp(ep, "\\}") == 0) + rr = 0; + + /* + * The closing delimiter `\}' rewinds the conditional scope + * but is otherwise ignored when interpreting the line. + */ + + while ((ep = strchr(ep, '\\')) != NULL) { + switch (ep[1]) { + case '}': + memmove(ep, ep + 2, strlen(ep + 2) + 1); + if (roff_ccond(r, ln, ep - buf->buf)) + irc |= endloop; + break; + case '\0': + ++ep; + break; + default: + ep += 2; + break; + } + } + if (rr) + irc |= ROFF_CONT; + return irc; +} + +/* --- handling of numeric and conditional expressions -------------------- */ + +/* + * Parse a single signed integer number. Stop at the first non-digit. + * If there is at least one digit, return success and advance the + * parse point, else return failure and let the parse point unchanged. + * Ignore overflows, treat them just like the C language. + */ +static int +roff_getnum(const char *v, int *pos, int *res, int flags) +{ + int myres, scaled, n, p; + + if (NULL == res) + res = &myres; + + p = *pos; + n = v[p] == '-'; + if (n || v[p] == '+') + p++; + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[p])) + p++; + + for (*res = 0; isdigit((unsigned char)v[p]); p++) + *res = 10 * *res + v[p] - '0'; + if (p == *pos + n) + return 0; + + if (n) + *res = -*res; + + /* Each number may be followed by one optional scaling unit. */ + + switch (v[p]) { + case 'f': + scaled = *res * 65536; + break; + case 'i': + scaled = *res * 240; + break; + case 'c': + scaled = *res * 240 / 2.54; + break; + case 'v': + case 'P': + scaled = *res * 40; + break; + case 'm': + case 'n': + scaled = *res * 24; + break; + case 'p': + scaled = *res * 10 / 3; + break; + case 'u': + scaled = *res; + break; + case 'M': + scaled = *res * 6 / 25; + break; + default: + scaled = *res; + p--; + break; + } + if (flags & ROFFNUM_SCALE) + *res = scaled; + + *pos = p + 1; + return 1; +} + +/* + * Evaluate a string comparison condition. + * The first character is the delimiter. + * Succeed if the string up to its second occurrence + * matches the string up to its third occurence. + * Advance the cursor after the third occurrence + * or lacking that, to the end of the line. + */ +static int +roff_evalstrcond(const char *v, int *pos) +{ + const char *s1, *s2, *s3; + int match; + + match = 0; + s1 = v + *pos; /* initial delimiter */ + s2 = s1 + 1; /* for scanning the first string */ + s3 = strchr(s2, *s1); /* for scanning the second string */ + + if (NULL == s3) /* found no middle delimiter */ + goto out; + + while ('\0' != *++s3) { + if (*s2 != *s3) { /* mismatch */ + s3 = strchr(s3, *s1); + break; + } + if (*s3 == *s1) { /* found the final delimiter */ + match = 1; + break; + } + s2++; + } + +out: + if (NULL == s3) + s3 = strchr(s2, '\0'); + else if (*s3 != '\0') + s3++; + *pos = s3 - v; + return match; +} + +/* + * Evaluate an optionally negated single character, numerical, + * or string condition. + */ +static int +roff_evalcond(struct roff *r, int ln, char *v, int *pos) +{ + const char *start, *end; + char *cp, *name; + size_t sz; + int deftype, len, number, savepos, istrue, wanttrue; + + if ('!' == v[*pos]) { + wanttrue = 0; + (*pos)++; + } else + wanttrue = 1; + + switch (v[*pos]) { + case '\0': + return 0; + case 'n': + case 'o': + (*pos)++; + return wanttrue; + case 'e': + case 't': + case 'v': + (*pos)++; + return !wanttrue; + case 'c': + do { + (*pos)++; + } while (v[*pos] == ' '); + + /* + * Quirk for groff compatibility: + * The horizontal tab is neither available nor unavailable. + */ + + if (v[*pos] == '\t') { + (*pos)++; + return 0; + } + + /* Printable ASCII characters are available. */ + + if (v[*pos] != '\\') { + (*pos)++; + return wanttrue; + } + + end = v + ++*pos; + switch (mandoc_escape(&end, &start, &len)) { + case ESCAPE_SPECIAL: + istrue = mchars_spec2cp(start, len) != -1; + break; + case ESCAPE_UNICODE: + istrue = 1; + break; + case ESCAPE_NUMBERED: + istrue = mchars_num2char(start, len) != -1; + break; + default: + istrue = !wanttrue; + break; + } + *pos = end - v; + return istrue == wanttrue; + case 'd': + case 'r': + cp = v + *pos + 1; + while (*cp == ' ') + cp++; + name = cp; + sz = roff_getname(r, &cp, ln, cp - v); + if (sz == 0) + istrue = 0; + else if (v[*pos] == 'r') + istrue = roff_hasregn(r, name, sz); + else { + deftype = ROFFDEF_ANY; + roff_getstrn(r, name, sz, &deftype); + istrue = !!deftype; + } + *pos = (name + sz) - v; + return istrue == wanttrue; + default: + break; + } + + savepos = *pos; + if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) + return (number > 0) == wanttrue; + else if (*pos == savepos) + return roff_evalstrcond(v, pos) == wanttrue; + else + return 0; +} + +static int +roff_line_ignore(ROFF_ARGS) +{ + + return ROFF_IGN; +} + +static int +roff_insec(ROFF_ARGS) +{ + + mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); + return ROFF_IGN; +} + +static int +roff_unsupp(ROFF_ARGS) +{ + + mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); + return ROFF_IGN; +} + +static int +roff_cond(ROFF_ARGS) +{ + int irc; + + roffnode_push(r, tok, NULL, ln, ppos); + + /* + * An `.el' has no conditional body: it will consume the value + * of the current rstack entry set in prior `ie' calls or + * defaults to DENY. + * + * If we're not an `el', however, then evaluate the conditional. + */ + + r->last->rule = tok == ROFF_el ? + (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : + roff_evalcond(r, ln, buf->buf, &pos); + + /* + * An if-else will put the NEGATION of the current evaluated + * conditional into the stack of rules. + */ + + if (tok == ROFF_ie) { + if (r->rstackpos + 1 == r->rstacksz) { + r->rstacksz += 16; + r->rstack = mandoc_reallocarray(r->rstack, + r->rstacksz, sizeof(int)); + } + r->rstack[++r->rstackpos] = !r->last->rule; + } + + /* If the parent has false as its rule, then so do we. */ + + if (r->last->parent && !r->last->parent->rule) + r->last->rule = 0; + + /* + * Determine scope. + * If there is nothing on the line after the conditional, + * not even whitespace, use next-line scope. + * Except that .while does not support next-line scope. + */ + + if (buf->buf[pos] == '\0' && tok != ROFF_while) { + r->last->endspan = 2; + goto out; + } + + while (buf->buf[pos] == ' ') + pos++; + + /* An opening brace requests multiline scope. */ + + if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { + r->last->endspan = -1; + pos += 2; + while (buf->buf[pos] == ' ') + pos++; + goto out; + } + + /* + * Anything else following the conditional causes + * single-line scope. Warn if the scope contains + * nothing but trailing whitespace. + */ + + if (buf->buf[pos] == '\0') + mandoc_msg(MANDOCERR_COND_EMPTY, + ln, ppos, "%s", roff_name[tok]); + + r->last->endspan = 1; + +out: + *offs = pos; + irc = ROFF_RERUN; + if (tok == ROFF_while) + irc |= ROFF_WHILE; + return irc; +} + +static int +roff_ds(ROFF_ARGS) +{ + char *string; + const char *name; + size_t namesz; + + /* Ignore groff compatibility mode for now. */ + + if (tok == ROFF_ds1) + tok = ROFF_ds; + else if (tok == ROFF_as1) + tok = ROFF_as; + + /* + * The first word is the name of the string. + * If it is empty or terminated by an escape sequence, + * abort the `ds' request without defining anything. + */ + + name = string = buf->buf + pos; + if (*name == '\0') + return ROFF_IGN; + + namesz = roff_getname(r, &string, ln, pos); + switch (name[namesz]) { + case '\\': + return ROFF_IGN; + case '\t': + string = buf->buf + pos + namesz; + break; + default: + break; + } + + /* Read past the initial double-quote, if any. */ + if (*string == '"') + string++; + + /* The rest is the value. */ + roff_setstrn(&r->strtab, name, namesz, string, strlen(string), + ROFF_as == tok); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); + return ROFF_IGN; +} + +/* + * Parse a single operator, one or two characters long. + * If the operator is recognized, return success and advance the + * parse point, else return failure and let the parse point unchanged. + */ +static int +roff_getop(const char *v, int *pos, char *res) +{ + + *res = v[*pos]; + + switch (*res) { + case '+': + case '-': + case '*': + case '/': + case '%': + case '&': + case ':': + break; + case '<': + switch (v[*pos + 1]) { + case '=': + *res = 'l'; + (*pos)++; + break; + case '>': + *res = '!'; + (*pos)++; + break; + case '?': + *res = 'i'; + (*pos)++; + break; + default: + break; + } + break; + case '>': + switch (v[*pos + 1]) { + case '=': + *res = 'g'; + (*pos)++; + break; + case '?': + *res = 'a'; + (*pos)++; + break; + default: + break; + } + break; + case '=': + if ('=' == v[*pos + 1]) + (*pos)++; + break; + default: + return 0; + } + (*pos)++; + + return *res; +} + +/* + * Evaluate either a parenthesized numeric expression + * or a single signed integer number. + */ +static int +roff_evalpar(struct roff *r, int ln, + const char *v, int *pos, int *res, int flags) +{ + + if ('(' != v[*pos]) + return roff_getnum(v, pos, res, flags); + + (*pos)++; + if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) + return 0; + + /* + * Omission of the closing parenthesis + * is an error in validation mode, + * but ignored in evaluation mode. + */ + + if (')' == v[*pos]) + (*pos)++; + else if (NULL == res) + return 0; + + return 1; +} + +/* + * Evaluate a complete numeric expression. + * Proceed left to right, there is no concept of precedence. + */ +static int +roff_evalnum(struct roff *r, int ln, const char *v, + int *pos, int *res, int flags) +{ + int mypos, operand2; + char operator; + + if (NULL == pos) { + mypos = 0; + pos = &mypos; + } + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_evalpar(r, ln, v, pos, res, flags)) + return 0; + + while (1) { + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_getop(v, pos, &operator)) + break; + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) + return 0; + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if (NULL == res) + continue; + + switch (operator) { + case '+': + *res += operand2; + break; + case '-': + *res -= operand2; + break; + case '*': + *res *= operand2; + break; + case '/': + if (operand2 == 0) { + mandoc_msg(MANDOCERR_DIVZERO, + ln, *pos, "%s", v); + *res = 0; + break; + } + *res /= operand2; + break; + case '%': + if (operand2 == 0) { + mandoc_msg(MANDOCERR_DIVZERO, + ln, *pos, "%s", v); + *res = 0; + break; + } + *res %= operand2; + break; + case '<': + *res = *res < operand2; + break; + case '>': + *res = *res > operand2; + break; + case 'l': + *res = *res <= operand2; + break; + case 'g': + *res = *res >= operand2; + break; + case '=': + *res = *res == operand2; + break; + case '!': + *res = *res != operand2; + break; + case '&': + *res = *res && operand2; + break; + case ':': + *res = *res || operand2; + break; + case 'i': + if (operand2 < *res) + *res = operand2; + break; + case 'a': + if (operand2 > *res) + *res = operand2; + break; + default: + abort(); + } + } + return 1; +} + +/* --- register management ------------------------------------------------ */ + +void +roff_setreg(struct roff *r, const char *name, int val, char sign) +{ + roff_setregn(r, name, strlen(name), val, sign, INT_MIN); +} + +static void +roff_setregn(struct roff *r, const char *name, size_t len, + int val, char sign, int step) +{ + struct roffreg *reg; + + /* Search for an existing register with the same name. */ + reg = r->regtab; + + while (reg != NULL && (reg->key.sz != len || + strncmp(reg->key.p, name, len) != 0)) + reg = reg->next; + + if (NULL == reg) { + /* Create a new register. */ + reg = mandoc_malloc(sizeof(struct roffreg)); + reg->key.p = mandoc_strndup(name, len); + reg->key.sz = len; + reg->val = 0; + reg->step = 0; + reg->next = r->regtab; + r->regtab = reg; + } + + if ('+' == sign) + reg->val += val; + else if ('-' == sign) + reg->val -= val; + else + reg->val = val; + if (step != INT_MIN) + reg->step = step; +} + +/* + * Handle some predefined read-only number registers. + * For now, return -1 if the requested register is not predefined; + * in case a predefined read-only register having the value -1 + * were to turn up, another special value would have to be chosen. + */ +static int +roff_getregro(const struct roff *r, const char *name) +{ + + switch (*name) { + case '$': /* Number of arguments of the last macro evaluated. */ + return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; + case 'A': /* ASCII approximation mode is always off. */ + return 0; + case 'g': /* Groff compatibility mode is always on. */ + return 1; + case 'H': /* Fixed horizontal resolution. */ + return 24; + case 'j': /* Always adjust left margin only. */ + return 0; + case 'T': /* Some output device is always defined. */ + return 1; + case 'V': /* Fixed vertical resolution. */ + return 40; + default: + return -1; + } +} + +int +roff_getreg(struct roff *r, const char *name) +{ + return roff_getregn(r, name, strlen(name), '\0'); +} + +static int +roff_getregn(struct roff *r, const char *name, size_t len, char sign) +{ + struct roffreg *reg; + int val; + + if ('.' == name[0] && 2 == len) { + val = roff_getregro(r, name + 1); + if (-1 != val) + return val; + } + + for (reg = r->regtab; reg; reg = reg->next) { + if (len == reg->key.sz && + 0 == strncmp(name, reg->key.p, len)) { + switch (sign) { + case '+': + reg->val += reg->step; + break; + case '-': + reg->val -= reg->step; + break; + default: + break; + } + return reg->val; + } + } + + roff_setregn(r, name, len, 0, '\0', INT_MIN); + return 0; +} + +static int +roff_hasregn(const struct roff *r, const char *name, size_t len) +{ + struct roffreg *reg; + int val; + + if ('.' == name[0] && 2 == len) { + val = roff_getregro(r, name + 1); + if (-1 != val) + return 1; + } + + for (reg = r->regtab; reg; reg = reg->next) + if (len == reg->key.sz && + 0 == strncmp(name, reg->key.p, len)) + return 1; + + return 0; +} + +static void +roff_freereg(struct roffreg *reg) +{ + struct roffreg *old_reg; + + while (NULL != reg) { + free(reg->key.p); + old_reg = reg; + reg = reg->next; + free(old_reg); + } +} + +static int +roff_nr(ROFF_ARGS) +{ + char *key, *val, *step; + size_t keysz; + int iv, is, len; + char sign; + + key = val = buf->buf + pos; + if (*key == '\0') + return ROFF_IGN; + + keysz = roff_getname(r, &val, ln, pos); + if (key[keysz] == '\\' || key[keysz] == '\t') + return ROFF_IGN; + + sign = *val; + if (sign == '+' || sign == '-') + val++; + + len = 0; + if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) + return ROFF_IGN; + + step = val + len; + while (isspace((unsigned char)*step)) + step++; + if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) + is = INT_MIN; + + roff_setregn(r, key, keysz, iv, sign, is); + return ROFF_IGN; +} + +static int +roff_rr(ROFF_ARGS) +{ + struct roffreg *reg, **prev; + char *name, *cp; + size_t namesz; + + name = cp = buf->buf + pos; + if (*name == '\0') + return ROFF_IGN; + namesz = roff_getname(r, &cp, ln, pos); + name[namesz] = '\0'; + + prev = &r->regtab; + while (1) { + reg = *prev; + if (reg == NULL || !strcmp(name, reg->key.p)) + break; + prev = ®->next; + } + if (reg != NULL) { + *prev = reg->next; + free(reg->key.p); + free(reg); + } + return ROFF_IGN; +} + +/* --- handler functions for roff requests -------------------------------- */ + +static int +roff_rm(ROFF_ARGS) +{ + const char *name; + char *cp; + size_t namesz; + + cp = buf->buf + pos; + while (*cp != '\0') { + name = cp; + namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); + roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); + if (name[namesz] == '\\' || name[namesz] == '\t') + break; + } + return ROFF_IGN; +} + +static int +roff_it(ROFF_ARGS) +{ + int iv; + + /* Parse the number of lines. */ + + if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { + mandoc_msg(MANDOCERR_IT_NONUM, + ln, ppos, "%s", buf->buf + 1); + return ROFF_IGN; + } + + while (isspace((unsigned char)buf->buf[pos])) + pos++; + + /* + * Arm the input line trap. + * Special-casing "an-trap" is an ugly workaround to cope + * with DocBook stupidly fiddling with man(7) internals. + */ + + roffit_lines = iv; + roffit_macro = mandoc_strdup(iv != 1 || + strcmp(buf->buf + pos, "an-trap") ? + buf->buf + pos : "br"); + return ROFF_IGN; +} + +static int +roff_Dd(ROFF_ARGS) +{ + int mask; + enum roff_tok t, te; + + switch (tok) { + case ROFF_Dd: + tok = MDOC_Dd; + te = MDOC_MAX; + if (r->format == 0) + r->format = MPARSE_MDOC; + mask = MPARSE_MDOC | MPARSE_QUICK; + break; + case ROFF_TH: + tok = MAN_TH; + te = MAN_MAX; + if (r->format == 0) + r->format = MPARSE_MAN; + mask = MPARSE_QUICK; + break; + default: + abort(); + } + if ((r->options & mask) == 0) + for (t = tok; t < te; t++) + roff_setstr(r, roff_name[t], NULL, 0); + return ROFF_CONT; +} + +static int +roff_TE(ROFF_ARGS) +{ + r->man->flags &= ~ROFF_NONOFILL; + if (r->tbl == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); + return ROFF_IGN; + } + if (tbl_end(r->tbl, 0) == 0) { + r->tbl = NULL; + free(buf->buf); + buf->buf = mandoc_strdup(".sp"); + buf->sz = 4; + *offs = 0; + return ROFF_REPARSE; + } + r->tbl = NULL; + return ROFF_IGN; +} + +static int +roff_T_(ROFF_ARGS) +{ + + if (NULL == r->tbl) + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); + else + tbl_restart(ln, ppos, r->tbl); + + return ROFF_IGN; +} + +/* + * Handle in-line equation delimiters. + */ +static int +roff_eqndelim(struct roff *r, struct buf *buf, int pos) +{ + char *cp1, *cp2; + const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; + + /* + * Outside equations, look for an opening delimiter. + * If we are inside an equation, we already know it is + * in-line, or this function wouldn't have been called; + * so look for a closing delimiter. + */ + + cp1 = buf->buf + pos; + cp2 = strchr(cp1, r->eqn == NULL ? + r->last_eqn->odelim : r->last_eqn->cdelim); + if (cp2 == NULL) + return ROFF_CONT; + + *cp2++ = '\0'; + bef_pr = bef_nl = aft_nl = aft_pr = ""; + + /* Handle preceding text, protecting whitespace. */ + + if (*buf->buf != '\0') { + if (r->eqn == NULL) + bef_pr = "\\&"; + bef_nl = "\n"; + } + + /* + * Prepare replacing the delimiter with an equation macro + * and drop leading white space from the equation. + */ + + if (r->eqn == NULL) { + while (*cp2 == ' ') + cp2++; + mac = ".EQ"; + } else + mac = ".EN"; + + /* Handle following text, protecting whitespace. */ + + if (*cp2 != '\0') { + aft_nl = "\n"; + if (r->eqn != NULL) + aft_pr = "\\&"; + } + + /* Do the actual replacement. */ + + buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, + bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; + free(buf->buf); + buf->buf = cp1; + + /* Toggle the in-line state of the eqn subsystem. */ + + r->eqn_inline = r->eqn == NULL; + return ROFF_REPARSE; +} + +static int +roff_EQ(ROFF_ARGS) +{ + struct roff_node *n; + + if (r->man->meta.macroset == MACROSET_MAN) + man_breakscope(r->man, ROFF_EQ); + n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); + if (ln > r->man->last->line) + n->flags |= NODE_LINE; + n->eqn = eqn_box_new(); + roff_node_append(r->man, n); + r->man->next = ROFF_NEXT_SIBLING; + + assert(r->eqn == NULL); + if (r->last_eqn == NULL) + r->last_eqn = eqn_alloc(); + else + eqn_reset(r->last_eqn); + r->eqn = r->last_eqn; + r->eqn->node = n; + + if (buf->buf[pos] != '\0') + mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, + ".EQ %s", buf->buf + pos); + + return ROFF_IGN; +} + +static int +roff_EN(ROFF_ARGS) +{ + if (r->eqn != NULL) { + eqn_parse(r->eqn); + r->eqn = NULL; + } else + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); + if (buf->buf[pos] != '\0') + mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, + "EN %s", buf->buf + pos); + return ROFF_IGN; +} + +static int +roff_TS(ROFF_ARGS) +{ + if (r->tbl != NULL) { + mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); + tbl_end(r->tbl, 0); + } + r->man->flags |= ROFF_NONOFILL; + r->tbl = tbl_alloc(ppos, ln, r->last_tbl); + if (r->last_tbl == NULL) + r->first_tbl = r->tbl; + r->last_tbl = r->tbl; + return ROFF_IGN; +} + +static int +roff_noarg(ROFF_ARGS) +{ + if (r->man->flags & (MAN_BLINE | MAN_ELINE)) + man_breakscope(r->man, tok); + if (tok == ROFF_brp) + tok = ROFF_br; + roff_elem_alloc(r->man, ln, ppos, tok); + if (buf->buf[pos] != '\0') + mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, + "%s %s", roff_name[tok], buf->buf + pos); + if (tok == ROFF_nf) + r->man->flags |= ROFF_NOFILL; + else if (tok == ROFF_fi) + r->man->flags &= ~ROFF_NOFILL; + r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; + r->man->next = ROFF_NEXT_SIBLING; + return ROFF_IGN; +} + +static int +roff_onearg(ROFF_ARGS) +{ + struct roff_node *n; + char *cp; + int npos; + + if (r->man->flags & (MAN_BLINE | MAN_ELINE) && + (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || + tok == ROFF_ti)) + man_breakscope(r->man, tok); + + if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + } + + roff_elem_alloc(r->man, ln, ppos, tok); + n = r->man->last; + + cp = buf->buf + pos; + if (*cp != '\0') { + while (*cp != '\0' && *cp != ' ') + cp++; + while (*cp == ' ') + *cp++ = '\0'; + if (*cp != '\0') + mandoc_msg(MANDOCERR_ARG_EXCESS, + ln, (int)(cp - buf->buf), + "%s ... %s", roff_name[tok], cp); + roff_word_alloc(r->man, ln, pos, buf->buf + pos); + } + + if (tok == ROFF_ce || tok == ROFF_rj) { + if (r->man->last->type == ROFFT_ELEM) { + roff_word_alloc(r->man, ln, pos, "1"); + r->man->last->flags |= NODE_NOSRC; + } + npos = 0; + if (roff_evalnum(r, ln, r->man->last->string, &npos, + &roffce_lines, 0) == 0) { + mandoc_msg(MANDOCERR_CE_NONUM, + ln, pos, "ce %s", buf->buf + pos); + roffce_lines = 1; + } + if (roffce_lines < 1) { + r->man->last = r->man->last->parent; + roffce_node = NULL; + roffce_lines = 0; + } else + roffce_node = r->man->last->parent; + } else { + n->flags |= NODE_VALID | NODE_ENDED; + r->man->last = n; + } + n->flags |= NODE_LINE; + r->man->next = ROFF_NEXT_SIBLING; + return ROFF_IGN; +} + +static int +roff_manyarg(ROFF_ARGS) +{ + struct roff_node *n; + char *sp, *ep; + + roff_elem_alloc(r->man, ln, ppos, tok); + n = r->man->last; + + for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { + while (*ep != '\0' && *ep != ' ') + ep++; + while (*ep == ' ') + *ep++ = '\0'; + roff_word_alloc(r->man, ln, sp - buf->buf, sp); + } + + n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; + r->man->last = n; + r->man->next = ROFF_NEXT_SIBLING; + return ROFF_IGN; +} + +static int +roff_als(ROFF_ARGS) +{ + char *oldn, *newn, *end, *value; + size_t oldsz, newsz, valsz; + + newn = oldn = buf->buf + pos; + if (*newn == '\0') + return ROFF_IGN; + + newsz = roff_getname(r, &oldn, ln, pos); + if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') + return ROFF_IGN; + + end = oldn; + oldsz = roff_getname(r, &end, ln, oldn - buf->buf); + if (oldsz == 0) + return ROFF_IGN; + + valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", + (int)oldsz, oldn); + roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + free(value); + return ROFF_IGN; +} + +/* + * The .break request only makes sense inside conditionals, + * and that case is already handled in roff_cond_sub(). + */ +static int +roff_break(ROFF_ARGS) +{ + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); + return ROFF_IGN; +} + +static int +roff_cc(ROFF_ARGS) +{ + const char *p; + + p = buf->buf + pos; + + if (*p == '\0' || (r->control = *p++) == '.') + r->control = '\0'; + + if (*p != '\0') + mandoc_msg(MANDOCERR_ARG_EXCESS, + ln, p - buf->buf, "cc ... %s", p); + + return ROFF_IGN; +} + +static int +roff_char(ROFF_ARGS) +{ + const char *p, *kp, *vp; + size_t ksz, vsz; + int font; + + /* Parse the character to be replaced. */ + + kp = buf->buf + pos; + p = kp + 1; + if (*kp == '\0' || (*kp == '\\' && + mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || + (*p != ' ' && *p != '\0')) { + mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); + return ROFF_IGN; + } + ksz = p - kp; + while (*p == ' ') + p++; + + /* + * If the replacement string contains a font escape sequence, + * we have to restore the font at the end. + */ + + vp = p; + vsz = strlen(p); + font = 0; + while (*p != '\0') { + if (*p++ != '\\') + continue; + switch (mandoc_escape(&p, NULL, NULL)) { + case ESCAPE_FONT: + case ESCAPE_FONTROMAN: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTBI: + case ESCAPE_FONTCW: + case ESCAPE_FONTPREV: + font++; + break; + default: + break; + } + } + if (font > 1) + mandoc_msg(MANDOCERR_CHAR_FONT, + ln, (int)(vp - buf->buf), "%s", vp); + + /* + * Approximate the effect of .char using the .tr tables. + * XXX In groff, .char and .tr interact differently. + */ + + if (ksz == 1) { + if (r->xtab == NULL) + r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); + assert((unsigned int)*kp < 128); + free(r->xtab[(int)*kp].p); + r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, + "%s%s", vp, font ? "\fP" : ""); + } else { + roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); + if (font) + roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); + } + return ROFF_IGN; +} + +static int +roff_ec(ROFF_ARGS) +{ + const char *p; + + p = buf->buf + pos; + if (*p == '\0') + r->escape = '\\'; + else { + r->escape = *p; + if (*++p != '\0') + mandoc_msg(MANDOCERR_ARG_EXCESS, ln, + (int)(p - buf->buf), "ec ... %s", p); + } + return ROFF_IGN; +} + +static int +roff_eo(ROFF_ARGS) +{ + r->escape = '\0'; + if (buf->buf[pos] != '\0') + mandoc_msg(MANDOCERR_ARG_SKIP, + ln, pos, "eo %s", buf->buf + pos); + return ROFF_IGN; +} + +static int +roff_nop(ROFF_ARGS) +{ + while (buf->buf[pos] == ' ') + pos++; + *offs = pos; + return ROFF_RERUN; +} + +static int +roff_tr(ROFF_ARGS) +{ + const char *p, *first, *second; + size_t fsz, ssz; + enum mandoc_esc esc; + + p = buf->buf + pos; + + if (*p == '\0') { + mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); + return ROFF_IGN; + } + + while (*p != '\0') { + fsz = ssz = 1; + + first = p++; + if (*first == '\\') { + esc = mandoc_escape(&p, NULL, NULL); + if (esc == ESCAPE_ERROR) { + mandoc_msg(MANDOCERR_ESC_BAD, ln, + (int)(p - buf->buf), "%s", first); + return ROFF_IGN; + } + fsz = (size_t)(p - first); + } + + second = p++; + if (*second == '\\') { + esc = mandoc_escape(&p, NULL, NULL); + if (esc == ESCAPE_ERROR) { + mandoc_msg(MANDOCERR_ESC_BAD, ln, + (int)(p - buf->buf), "%s", second); + return ROFF_IGN; + } + ssz = (size_t)(p - second); + } else if (*second == '\0') { + mandoc_msg(MANDOCERR_TR_ODD, ln, + (int)(first - buf->buf), "tr %s", first); + second = " "; + p--; + } + + if (fsz > 1) { + roff_setstrn(&r->xmbtab, first, fsz, + second, ssz, 0); + continue; + } + + if (r->xtab == NULL) + r->xtab = mandoc_calloc(128, + sizeof(struct roffstr)); + + free(r->xtab[(int)*first].p); + r->xtab[(int)*first].p = mandoc_strndup(second, ssz); + r->xtab[(int)*first].sz = ssz; + } + + return ROFF_IGN; +} + +/* + * Implementation of the .return request. + * There is no need to call roff_userret() from here. + * The read module will call that after rewinding the reader stack + * to the place from where the current macro was called. + */ +static int +roff_return(ROFF_ARGS) +{ + if (r->mstackpos >= 0) + return ROFF_IGN | ROFF_USERRET; + + mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); + return ROFF_IGN; +} + +static int +roff_rn(ROFF_ARGS) +{ + const char *value; + char *oldn, *newn, *end; + size_t oldsz, newsz; + int deftype; + + oldn = newn = buf->buf + pos; + if (*oldn == '\0') + return ROFF_IGN; + + oldsz = roff_getname(r, &newn, ln, pos); + if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') + return ROFF_IGN; + + end = newn; + newsz = roff_getname(r, &end, ln, newn - buf->buf); + if (newsz == 0) + return ROFF_IGN; + + deftype = ROFFDEF_ANY; + value = roff_getstrn(r, oldn, oldsz, &deftype); + switch (deftype) { + case ROFFDEF_USER: + roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + break; + case ROFFDEF_PRE: + roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + break; + case ROFFDEF_REN: + roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + break; + case ROFFDEF_STD: + roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + break; + default: + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + break; + } + return ROFF_IGN; +} + +static int +roff_shift(ROFF_ARGS) +{ + struct mctx *ctx; + int levels, i; + + levels = 1; + if (buf->buf[pos] != '\0' && + roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { + mandoc_msg(MANDOCERR_CE_NONUM, + ln, pos, "shift %s", buf->buf + pos); + levels = 1; + } + if (r->mstackpos < 0) { + mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); + return ROFF_IGN; + } + ctx = r->mstack + r->mstackpos; + if (levels > ctx->argc) { + mandoc_msg(MANDOCERR_SHIFT, + ln, pos, "%d, but max is %d", levels, ctx->argc); + levels = ctx->argc; + } + if (levels == 0) + return ROFF_IGN; + for (i = 0; i < levels; i++) + free(ctx->argv[i]); + ctx->argc -= levels; + for (i = 0; i < ctx->argc; i++) + ctx->argv[i] = ctx->argv[i + levels]; + return ROFF_IGN; +} + +static int +roff_so(ROFF_ARGS) +{ + char *name, *cp; + + name = buf->buf + pos; + mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); + + /* + * Handle `so'. Be EXTREMELY careful, as we shouldn't be + * opening anything that's not in our cwd or anything beneath + * it. Thus, explicitly disallow traversing up the file-system + * or using absolute paths. + */ + + if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { + mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); + buf->sz = mandoc_asprintf(&cp, + ".sp\nSee the file %s.\n.sp", name) + 1; + free(buf->buf); + buf->buf = cp; + *offs = 0; + return ROFF_REPARSE; + } + + *offs = pos; + return ROFF_SO; +} + +/* --- user defined strings and macros ------------------------------------ */ + +static int +roff_userdef(ROFF_ARGS) +{ + struct mctx *ctx; + char *arg, *ap, *dst, *src; + size_t sz; + + /* If the macro is empty, ignore it altogether. */ + + if (*r->current_string == '\0') + return ROFF_IGN; + + /* Initialize a new macro stack context. */ + + if (++r->mstackpos == r->mstacksz) { + r->mstack = mandoc_recallocarray(r->mstack, + r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); + r->mstacksz += 8; + } + ctx = r->mstack + r->mstackpos; + ctx->argsz = 0; + ctx->argc = 0; + ctx->argv = NULL; + + /* + * Collect pointers to macro argument strings, + * NUL-terminating them and escaping quotes. + */ + + src = buf->buf + pos; + while (*src != '\0') { + if (ctx->argc == ctx->argsz) { + ctx->argsz += 8; + ctx->argv = mandoc_reallocarray(ctx->argv, + ctx->argsz, sizeof(*ctx->argv)); + } + arg = roff_getarg(r, &src, ln, &pos); + sz = 1; /* For the terminating NUL. */ + for (ap = arg; *ap != '\0'; ap++) + sz += *ap == '"' ? 4 : 1; + ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); + for (ap = arg; *ap != '\0'; ap++) { + if (*ap == '"') { + memcpy(dst, "\\(dq", 4); + dst += 4; + } else + *dst++ = *ap; + } + *dst = '\0'; + free(arg); + } + + /* Replace the macro invocation by the macro definition. */ + + free(buf->buf); + buf->buf = mandoc_strdup(r->current_string); + buf->sz = strlen(buf->buf) + 1; + *offs = 0; + + return buf->buf[buf->sz - 2] == '\n' ? + ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; +} + +/* + * Calling a high-level macro that was renamed with .rn. + * r->current_string has already been set up by roff_parse(). + */ +static int +roff_renamed(ROFF_ARGS) +{ + char *nbuf; + + buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, + buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; + free(buf->buf); + buf->buf = nbuf; + *offs = 0; + return ROFF_CONT; +} + +/* + * Measure the length in bytes of the roff identifier at *cpp + * and advance the pointer to the next word. + */ +static size_t +roff_getname(struct roff *r, char **cpp, int ln, int pos) +{ + char *name, *cp; + size_t namesz; + + name = *cpp; + if (*name == '\0') + return 0; + + /* Advance cp to the byte after the end of the name. */ + + for (cp = name; 1; cp++) { + namesz = cp - name; + if (*cp == '\0') + break; + if (*cp == ' ' || *cp == '\t') { + cp++; + break; + } + if (*cp != '\\') + continue; + if (cp[1] == '{' || cp[1] == '}') + break; + if (*++cp == '\\') + continue; + mandoc_msg(MANDOCERR_NAMESC, ln, pos, + "%.*s", (int)(cp - name + 1), name); + mandoc_escape((const char **)&cp, NULL, NULL); + break; + } + + /* Read past spaces. */ + + while (*cp == ' ') + cp++; + + *cpp = cp; + return namesz; +} + +/* + * Store *string into the user-defined string called *name. + * To clear an existing entry, call with (*r, *name, NULL, 0). + * append == 0: replace mode + * append == 1: single-line append mode + * append == 2: multiline append mode, append '\n' after each call + */ +static void +roff_setstr(struct roff *r, const char *name, const char *string, + int append) +{ + size_t namesz; + + namesz = strlen(name); + roff_setstrn(&r->strtab, name, namesz, string, + string ? strlen(string) : 0, append); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); +} + +static void +roff_setstrn(struct roffkv **r, const char *name, size_t namesz, + const char *string, size_t stringsz, int append) +{ + struct roffkv *n; + char *c; + int i; + size_t oldch, newch; + + /* Search for an existing string with the same name. */ + n = *r; + + while (n && (namesz != n->key.sz || + strncmp(n->key.p, name, namesz))) + n = n->next; + + if (NULL == n) { + /* Create a new string table entry. */ + n = mandoc_malloc(sizeof(struct roffkv)); + n->key.p = mandoc_strndup(name, namesz); + n->key.sz = namesz; + n->val.p = NULL; + n->val.sz = 0; + n->next = *r; + *r = n; + } else if (0 == append) { + free(n->val.p); + n->val.p = NULL; + n->val.sz = 0; + } + + if (NULL == string) + return; + + /* + * One additional byte for the '\n' in multiline mode, + * and one for the terminating '\0'. + */ + newch = stringsz + (1 < append ? 2u : 1u); + + if (NULL == n->val.p) { + n->val.p = mandoc_malloc(newch); + *n->val.p = '\0'; + oldch = 0; + } else { + oldch = n->val.sz; + n->val.p = mandoc_realloc(n->val.p, oldch + newch); + } + + /* Skip existing content in the destination buffer. */ + c = n->val.p + (int)oldch; + + /* Append new content to the destination buffer. */ + i = 0; + while (i < (int)stringsz) { + /* + * Rudimentary roff copy mode: + * Handle escaped backslashes. + */ + if ('\\' == string[i] && '\\' == string[i + 1]) + i++; + *c++ = string[i++]; + } + + /* Append terminating bytes. */ + if (1 < append) + *c++ = '\n'; + + *c = '\0'; + n->val.sz = (int)(c - n->val.p); +} + +static const char * +roff_getstrn(struct roff *r, const char *name, size_t len, + int *deftype) +{ + const struct roffkv *n; + int found, i; + enum roff_tok tok; + + found = 0; + for (n = r->strtab; n != NULL; n = n->next) { + if (strncmp(name, n->key.p, len) != 0 || + n->key.p[len] != '\0' || n->val.p == NULL) + continue; + if (*deftype & ROFFDEF_USER) { + *deftype = ROFFDEF_USER; + return n->val.p; + } else { + found = 1; + break; + } + } + for (n = r->rentab; n != NULL; n = n->next) { + if (strncmp(name, n->key.p, len) != 0 || + n->key.p[len] != '\0' || n->val.p == NULL) + continue; + if (*deftype & ROFFDEF_REN) { + *deftype = ROFFDEF_REN; + return n->val.p; + } else { + found = 1; + break; + } + } + for (i = 0; i < PREDEFS_MAX; i++) { + if (strncmp(name, predefs[i].name, len) != 0 || + predefs[i].name[len] != '\0') + continue; + if (*deftype & ROFFDEF_PRE) { + *deftype = ROFFDEF_PRE; + return predefs[i].str; + } else { + found = 1; + break; + } + } + if (r->man->meta.macroset != MACROSET_MAN) { + for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { + if (strncmp(name, roff_name[tok], len) != 0 || + roff_name[tok][len] != '\0') + continue; + if (*deftype & ROFFDEF_STD) { + *deftype = ROFFDEF_STD; + return NULL; + } else { + found = 1; + break; + } + } + } + if (r->man->meta.macroset != MACROSET_MDOC) { + for (tok = MAN_TH; tok < MAN_MAX; tok++) { + if (strncmp(name, roff_name[tok], len) != 0 || + roff_name[tok][len] != '\0') + continue; + if (*deftype & ROFFDEF_STD) { + *deftype = ROFFDEF_STD; + return NULL; + } else { + found = 1; + break; + } + } + } + + if (found == 0 && *deftype != ROFFDEF_ANY) { + if (*deftype & ROFFDEF_REN) { + /* + * This might still be a request, + * so do not treat it as undefined yet. + */ + *deftype = ROFFDEF_UNDEF; + return NULL; + } + + /* Using an undefined string defines it to be empty. */ + + roff_setstrn(&r->strtab, name, len, "", 0, 0); + roff_setstrn(&r->rentab, name, len, NULL, 0, 0); + } + + *deftype = 0; + return NULL; +} + +static void +roff_freestr(struct roffkv *r) +{ + struct roffkv *n, *nn; + + for (n = r; n; n = nn) { + free(n->key.p); + free(n->val.p); + nn = n->next; + free(n); + } +} + +/* --- accessors and utility functions ------------------------------------ */ + +/* + * Duplicate an input string, making the appropriate character + * conversations (as stipulated by `tr') along the way. + * Returns a heap-allocated string with all the replacements made. + */ +char * +roff_strdup(const struct roff *r, const char *p) +{ + const struct roffkv *cp; + char *res; + const char *pp; + size_t ssz, sz; + enum mandoc_esc esc; + + if (NULL == r->xmbtab && NULL == r->xtab) + return mandoc_strdup(p); + else if ('\0' == *p) + return mandoc_strdup(""); + + /* + * Step through each character looking for term matches + * (remember that a `tr' can be invoked with an escape, which is + * a glyph but the escape is multi-character). + * We only do this if the character hash has been initialised + * and the string is >0 length. + */ + + res = NULL; + ssz = 0; + + while ('\0' != *p) { + assert((unsigned int)*p < 128); + if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { + sz = r->xtab[(int)*p].sz; + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, r->xtab[(int)*p].p, sz); + ssz += sz; + p++; + continue; + } else if ('\\' != *p) { + res = mandoc_realloc(res, ssz + 2); + res[ssz++] = *p++; + continue; + } + + /* Search for term matches. */ + for (cp = r->xmbtab; cp; cp = cp->next) + if (0 == strncmp(p, cp->key.p, cp->key.sz)) + break; + + if (NULL != cp) { + /* + * A match has been found. + * Append the match to the array and move + * forward by its keysize. + */ + res = mandoc_realloc(res, + ssz + cp->val.sz + 1); + memcpy(res + ssz, cp->val.p, cp->val.sz); + ssz += cp->val.sz; + p += (int)cp->key.sz; + continue; + } + + /* + * Handle escapes carefully: we need to copy + * over just the escape itself, or else we might + * do replacements within the escape itself. + * Make sure to pass along the bogus string. + */ + pp = p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + sz = strlen(pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + break; + } + /* + * We bail out on bad escapes. + * No need to warn: we already did so when + * roff_expand() was called. + */ + sz = (int)(p - pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + ssz += sz; + } + + res[(int)ssz] = '\0'; + return res; +} + +int +roff_getformat(const struct roff *r) +{ + + return r->format; +} + +/* + * Find out whether a line is a macro line or not. + * If it is, adjust the current position and return one; if it isn't, + * return zero and don't change the current position. + * If the control character has been set with `.cc', then let that grain + * precedence. + * This is slighly contrary to groff, where using the non-breaking + * control character when `cc' has been invoked will cause the + * non-breaking macro contents to be printed verbatim. + */ +int +roff_getcontrol(const struct roff *r, const char *cp, int *ppos) +{ + int pos; + + pos = *ppos; + + if (r->control != '\0' && cp[pos] == r->control) + pos++; + else if (r->control != '\0') + return 0; + else if ('\\' == cp[pos] && '.' == cp[pos + 1]) + pos += 2; + else if ('.' == cp[pos] || '\'' == cp[pos]) + pos++; + else + return 0; + + while (' ' == cp[pos] || '\t' == cp[pos]) + pos++; + + *ppos = pos; + return 1; +} diff --git a/usr.bin/mandoc/roff.h b/usr.bin/mandoc/roff.h new file mode 100644 index 0000000..aefeafc --- /dev/null +++ b/usr.bin/mandoc/roff.h @@ -0,0 +1,561 @@ +/* $OpenBSD: roff.h,v 1.56 2020/04/08 11:54:14 schwarze Exp $ */ +/* + * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Common data types for all syntax trees and related functions. + */ + +struct ohash; +struct mdoc_arg; +union mdoc_data; +struct tbl_span; +struct eqn_box; + +enum roff_macroset { + MACROSET_NONE = 0, + MACROSET_MDOC, + MACROSET_MAN +}; + +enum roff_sec { + SEC_NONE = 0, + SEC_NAME, + SEC_LIBRARY, + SEC_SYNOPSIS, + SEC_DESCRIPTION, + SEC_CONTEXT, + SEC_IMPLEMENTATION, /* IMPLEMENTATION NOTES */ + SEC_RETURN_VALUES, + SEC_ENVIRONMENT, + SEC_FILES, + SEC_EXIT_STATUS, + SEC_EXAMPLES, + SEC_DIAGNOSTICS, + SEC_COMPATIBILITY, + SEC_ERRORS, + SEC_SEE_ALSO, + SEC_STANDARDS, + SEC_HISTORY, + SEC_AUTHORS, + SEC_CAVEATS, + SEC_BUGS, + SEC_SECURITY, + SEC_CUSTOM, + SEC__MAX +}; + +enum roff_type { + ROFFT_ROOT, + ROFFT_BLOCK, + ROFFT_HEAD, + ROFFT_BODY, + ROFFT_TAIL, + ROFFT_ELEM, + ROFFT_TEXT, + ROFFT_COMMENT, + ROFFT_TBL, + ROFFT_EQN +}; + +enum roff_tok { + ROFF_br = 0, + ROFF_ce, + ROFF_fi, + ROFF_ft, + ROFF_ll, + ROFF_mc, + ROFF_nf, + ROFF_po, + ROFF_rj, + ROFF_sp, + ROFF_ta, + ROFF_ti, + ROFF_MAX, + ROFF_ab, + ROFF_ad, + ROFF_af, + ROFF_aln, + ROFF_als, + ROFF_am, + ROFF_am1, + ROFF_ami, + ROFF_ami1, + ROFF_as, + ROFF_as1, + ROFF_asciify, + ROFF_backtrace, + ROFF_bd, + ROFF_bleedat, + ROFF_blm, + ROFF_box, + ROFF_boxa, + ROFF_bp, + ROFF_BP, + ROFF_break, + ROFF_breakchar, + ROFF_brnl, + ROFF_brp, + ROFF_brpnl, + ROFF_c2, + ROFF_cc, + ROFF_cf, + ROFF_cflags, + ROFF_ch, + ROFF_char, + ROFF_chop, + ROFF_class, + ROFF_close, + ROFF_CL, + ROFF_color, + ROFF_composite, + ROFF_continue, + ROFF_cp, + ROFF_cropat, + ROFF_cs, + ROFF_cu, + ROFF_da, + ROFF_dch, + ROFF_Dd, + ROFF_de, + ROFF_de1, + ROFF_defcolor, + ROFF_dei, + ROFF_dei1, + ROFF_device, + ROFF_devicem, + ROFF_di, + ROFF_do, + ROFF_ds, + ROFF_ds1, + ROFF_dwh, + ROFF_dt, + ROFF_ec, + ROFF_ecr, + ROFF_ecs, + ROFF_el, + ROFF_em, + ROFF_EN, + ROFF_eo, + ROFF_EP, + ROFF_EQ, + ROFF_errprint, + ROFF_ev, + ROFF_evc, + ROFF_ex, + ROFF_fallback, + ROFF_fam, + ROFF_fc, + ROFF_fchar, + ROFF_fcolor, + ROFF_fdeferlig, + ROFF_feature, + ROFF_fkern, + ROFF_fl, + ROFF_flig, + ROFF_fp, + ROFF_fps, + ROFF_fschar, + ROFF_fspacewidth, + ROFF_fspecial, + ROFF_ftr, + ROFF_fzoom, + ROFF_gcolor, + ROFF_hc, + ROFF_hcode, + ROFF_hidechar, + ROFF_hla, + ROFF_hlm, + ROFF_hpf, + ROFF_hpfa, + ROFF_hpfcode, + ROFF_hw, + ROFF_hy, + ROFF_hylang, + ROFF_hylen, + ROFF_hym, + ROFF_hypp, + ROFF_hys, + ROFF_ie, + ROFF_if, + ROFF_ig, + /* MAN_in; ignored in mdoc(7) */ + ROFF_index, + ROFF_it, + ROFF_itc, + ROFF_IX, + ROFF_kern, + ROFF_kernafter, + ROFF_kernbefore, + ROFF_kernpair, + ROFF_lc, + ROFF_lc_ctype, + ROFF_lds, + ROFF_length, + ROFF_letadj, + ROFF_lf, + ROFF_lg, + ROFF_lhang, + ROFF_linetabs, + ROFF_lnr, + ROFF_lnrf, + ROFF_lpfx, + ROFF_ls, + ROFF_lsm, + ROFF_lt, + ROFF_mediasize, + ROFF_minss, + ROFF_mk, + ROFF_mso, + ROFF_na, + ROFF_ne, + ROFF_nh, + ROFF_nhychar, + ROFF_nm, + ROFF_nn, + ROFF_nop, + ROFF_nr, + ROFF_nrf, + ROFF_nroff, + ROFF_ns, + ROFF_nx, + ROFF_open, + ROFF_opena, + ROFF_os, + ROFF_output, + ROFF_padj, + ROFF_papersize, + ROFF_pc, + ROFF_pev, + ROFF_pi, + ROFF_PI, + ROFF_pl, + ROFF_pm, + ROFF_pn, + ROFF_pnr, + ROFF_ps, + ROFF_psbb, + ROFF_pshape, + ROFF_pso, + ROFF_ptr, + ROFF_pvs, + ROFF_rchar, + ROFF_rd, + ROFF_recursionlimit, + ROFF_return, + ROFF_rfschar, + ROFF_rhang, + ROFF_rm, + ROFF_rn, + ROFF_rnn, + ROFF_rr, + ROFF_rs, + ROFF_rt, + ROFF_schar, + ROFF_sentchar, + ROFF_shc, + ROFF_shift, + ROFF_sizes, + ROFF_so, + ROFF_spacewidth, + ROFF_special, + ROFF_spreadwarn, + ROFF_ss, + ROFF_sty, + ROFF_substring, + ROFF_sv, + ROFF_sy, + ROFF_T_, + ROFF_tc, + ROFF_TE, + ROFF_TH, + ROFF_tkf, + ROFF_tl, + ROFF_tm, + ROFF_tm1, + ROFF_tmc, + ROFF_tr, + ROFF_track, + ROFF_transchar, + ROFF_trf, + ROFF_trimat, + ROFF_trin, + ROFF_trnt, + ROFF_troff, + ROFF_TS, + ROFF_uf, + ROFF_ul, + ROFF_unformat, + ROFF_unwatch, + ROFF_unwatchn, + ROFF_vpt, + ROFF_vs, + ROFF_warn, + ROFF_warnscale, + ROFF_watch, + ROFF_watchlength, + ROFF_watchn, + ROFF_wh, + ROFF_while, + ROFF_write, + ROFF_writec, + ROFF_writem, + ROFF_xflag, + ROFF_cblock, + ROFF_RENAMED, + ROFF_USERDEF, + TOKEN_NONE, + MDOC_Dd, + MDOC_Dt, + MDOC_Os, + MDOC_Sh, + MDOC_Ss, + MDOC_Pp, + MDOC_D1, + MDOC_Dl, + MDOC_Bd, + MDOC_Ed, + MDOC_Bl, + MDOC_El, + MDOC_It, + MDOC_Ad, + MDOC_An, + MDOC_Ap, + MDOC_Ar, + MDOC_Cd, + MDOC_Cm, + MDOC_Dv, + MDOC_Er, + MDOC_Ev, + MDOC_Ex, + MDOC_Fa, + MDOC_Fd, + MDOC_Fl, + MDOC_Fn, + MDOC_Ft, + MDOC_Ic, + MDOC_In, + MDOC_Li, + MDOC_Nd, + MDOC_Nm, + MDOC_Op, + MDOC_Ot, + MDOC_Pa, + MDOC_Rv, + MDOC_St, + MDOC_Va, + MDOC_Vt, + MDOC_Xr, + MDOC__A, + MDOC__B, + MDOC__D, + MDOC__I, + MDOC__J, + MDOC__N, + MDOC__O, + MDOC__P, + MDOC__R, + MDOC__T, + MDOC__V, + MDOC_Ac, + MDOC_Ao, + MDOC_Aq, + MDOC_At, + MDOC_Bc, + MDOC_Bf, + MDOC_Bo, + MDOC_Bq, + MDOC_Bsx, + MDOC_Bx, + MDOC_Db, + MDOC_Dc, + MDOC_Do, + MDOC_Dq, + MDOC_Ec, + MDOC_Ef, + MDOC_Em, + MDOC_Eo, + MDOC_Fx, + MDOC_Ms, + MDOC_No, + MDOC_Ns, + MDOC_Nx, + MDOC_Ox, + MDOC_Pc, + MDOC_Pf, + MDOC_Po, + MDOC_Pq, + MDOC_Qc, + MDOC_Ql, + MDOC_Qo, + MDOC_Qq, + MDOC_Re, + MDOC_Rs, + MDOC_Sc, + MDOC_So, + MDOC_Sq, + MDOC_Sm, + MDOC_Sx, + MDOC_Sy, + MDOC_Tn, + MDOC_Ux, + MDOC_Xc, + MDOC_Xo, + MDOC_Fo, + MDOC_Fc, + MDOC_Oo, + MDOC_Oc, + MDOC_Bk, + MDOC_Ek, + MDOC_Bt, + MDOC_Hf, + MDOC_Fr, + MDOC_Ud, + MDOC_Lb, + MDOC_Lp, + MDOC_Lk, + MDOC_Mt, + MDOC_Brq, + MDOC_Bro, + MDOC_Brc, + MDOC__C, + MDOC_Es, + MDOC_En, + MDOC_Dx, + MDOC__Q, + MDOC__U, + MDOC_Ta, + MDOC_Tg, + MDOC_MAX, + MAN_TH, + MAN_SH, + MAN_SS, + MAN_TP, + MAN_TQ, + MAN_LP, + MAN_PP, + MAN_P, + MAN_IP, + MAN_HP, + MAN_SM, + MAN_SB, + MAN_BI, + MAN_IB, + MAN_BR, + MAN_RB, + MAN_R, + MAN_B, + MAN_I, + MAN_IR, + MAN_RI, + MAN_RE, + MAN_RS, + MAN_DT, + MAN_UC, + MAN_PD, + MAN_AT, + MAN_in, + MAN_SY, + MAN_YS, + MAN_OP, + MAN_EX, + MAN_EE, + MAN_UR, + MAN_UE, + MAN_MT, + MAN_ME, + MAN_MAX +}; + +/* + * Indicates that a BODY's formatting has ended, but + * the scope is still open. Used for badly nested blocks. + */ +enum mdoc_endbody { + ENDBODY_NOT = 0, + ENDBODY_SPACE /* Is broken: append a space. */ +}; + +enum mandoc_os { + MANDOC_OS_OTHER = 0, + MANDOC_OS_NETBSD, + MANDOC_OS_OPENBSD +}; + +struct roff_node { + struct roff_node *parent; /* Parent AST node. */ + struct roff_node *child; /* First child AST node. */ + struct roff_node *last; /* Last child AST node. */ + struct roff_node *next; /* Sibling AST node. */ + struct roff_node *prev; /* Prior sibling AST node. */ + struct roff_node *head; /* BLOCK */ + struct roff_node *body; /* BLOCK/ENDBODY */ + struct roff_node *tail; /* BLOCK */ + struct mdoc_arg *args; /* BLOCK/ELEM */ + union mdoc_data *norm; /* Normalized arguments. */ + char *string; /* TEXT */ + char *tag; /* For less(1) :t and HTML id=. */ + struct tbl_span *span; /* TBL */ + struct eqn_box *eqn; /* EQN */ + int line; /* Input file line number. */ + int pos; /* Input file column number. */ + int flags; +#define NODE_VALID (1 << 0) /* Has been validated. */ +#define NODE_ENDED (1 << 1) /* Gone past body end mark. */ +#define NODE_BROKEN (1 << 2) /* Must validate parent when ending. */ +#define NODE_LINE (1 << 3) /* First macro/text on line. */ +#define NODE_DELIMO (1 << 4) +#define NODE_DELIMC (1 << 5) +#define NODE_EOS (1 << 6) /* At sentence boundary. */ +#define NODE_SYNPRETTY (1 << 7) /* SYNOPSIS-style formatting. */ +#define NODE_NOFILL (1 << 8) /* Fill mode switched off. */ +#define NODE_NOSRC (1 << 9) /* Generated node, not in input file. */ +#define NODE_NOPRT (1 << 10) /* Shall not print anything. */ +#define NODE_ID (1 << 11) /* Target for deep linking. */ +#define NODE_HREF (1 << 12) /* Link to another place in this page. */ + int prev_font; /* Before entering this node. */ + int aux; /* Decoded node data, type-dependent. */ + enum roff_tok tok; /* Request or macro ID. */ + enum roff_type type; /* AST node type. */ + enum roff_sec sec; /* Current named section. */ + enum mdoc_endbody end; /* BODY */ +}; + +struct roff_meta { + struct roff_node *first; /* The first node parsed. */ + char *msec; /* Manual section, usually a digit. */ + char *vol; /* Manual volume title. */ + char *os; /* Operating system. */ + char *arch; /* Machine architecture. */ + char *title; /* Manual title, usually CAPS. */ + char *name; /* Leading manual name. */ + char *date; /* Normalized date. */ + char *sodest; /* .so target file name or NULL. */ + int hasbody; /* Document is not empty. */ + int rcsids; /* Bits indexed by enum mandoc_os. */ + enum mandoc_os os_e; /* Operating system. */ + enum roff_macroset macroset; /* Kind of high-level macros used. */ +}; + +extern const char *const *roff_name; + + +int arch_valid(const char *, enum mandoc_os); +void deroff(char **, const struct roff_node *); +struct roff_node *roff_node_child(struct roff_node *); +struct roff_node *roff_node_next(struct roff_node *); +struct roff_node *roff_node_prev(struct roff_node *); +int roff_node_transparent(struct roff_node *); +int roff_tok_transparent(enum roff_tok); diff --git a/usr.bin/mandoc/roff_html.c b/usr.bin/mandoc/roff_html.c new file mode 100644 index 0000000..ed9639a --- /dev/null +++ b/usr.bin/mandoc/roff_html.c @@ -0,0 +1,117 @@ +/* $OpenBSD: roff_html.c,v 1.20 2019/04/30 15:52:42 schwarze Exp $ */ +/* + * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2017, 2018, 2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "out.h" +#include "html.h" + +#define ROFF_HTML_ARGS struct html *h, const struct roff_node *n + +typedef void (*roff_html_pre_fp)(ROFF_HTML_ARGS); + +static void roff_html_pre_br(ROFF_HTML_ARGS); +static void roff_html_pre_ce(ROFF_HTML_ARGS); +static void roff_html_pre_fi(ROFF_HTML_ARGS); +static void roff_html_pre_ft(ROFF_HTML_ARGS); +static void roff_html_pre_nf(ROFF_HTML_ARGS); +static void roff_html_pre_sp(ROFF_HTML_ARGS); + +static const roff_html_pre_fp roff_html_pre_acts[ROFF_MAX] = { + roff_html_pre_br, /* br */ + roff_html_pre_ce, /* ce */ + roff_html_pre_fi, /* fi */ + roff_html_pre_ft, /* ft */ + NULL, /* ll */ + NULL, /* mc */ + roff_html_pre_nf, /* nf */ + NULL, /* po */ + roff_html_pre_ce, /* rj */ + roff_html_pre_sp, /* sp */ + NULL, /* ta */ + NULL, /* ti */ +}; + + +void +roff_html_pre(struct html *h, const struct roff_node *n) +{ + assert(n->tok < ROFF_MAX); + if (roff_html_pre_acts[n->tok] != NULL) + (*roff_html_pre_acts[n->tok])(h, n); +} + +static void +roff_html_pre_br(ROFF_HTML_ARGS) +{ + print_otag(h, TAG_BR, ""); +} + +static void +roff_html_pre_ce(ROFF_HTML_ARGS) +{ + for (n = n->child->next; n != NULL; n = n->next) { + if (n->type == ROFFT_TEXT) { + if (n->flags & NODE_LINE) + roff_html_pre_br(h, n); + print_text(h, n->string); + } else + roff_html_pre(h, n); + } + roff_html_pre_br(h, n); +} + +static void +roff_html_pre_fi(ROFF_HTML_ARGS) +{ + if (html_fillmode(h, TOKEN_NONE) == ROFF_fi) + print_otag(h, TAG_BR, ""); +} + +static void +roff_html_pre_ft(ROFF_HTML_ARGS) +{ + const char *cp; + + cp = n->child->string; + html_setfont(h, mandoc_font(cp, (int)strlen(cp))); +} + +static void +roff_html_pre_nf(ROFF_HTML_ARGS) +{ + if (html_fillmode(h, TOKEN_NONE) == ROFF_nf) + print_otag(h, TAG_BR, ""); +} + +static void +roff_html_pre_sp(ROFF_HTML_ARGS) +{ + if (html_fillmode(h, TOKEN_NONE) == ROFF_nf) { + h->col++; + print_endline(h); + } else { + html_close_paragraph(h); + print_otag(h, TAG_P, "c", "Pp"); + } +} diff --git a/usr.bin/mandoc/roff_int.h b/usr.bin/mandoc/roff_int.h new file mode 100644 index 0000000..779f50f --- /dev/null +++ b/usr.bin/mandoc/roff_int.h @@ -0,0 +1,94 @@ +/* $OpenBSD: roff_int.h,v 1.17 2020/04/24 11:58:02 schwarze Exp $ */ +/* + * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Parser internals shared by multiple parsers. + */ + +struct ohash; +struct roff_node; +struct roff_meta; +struct roff; +struct mdoc_arg; + +enum roff_next { + ROFF_NEXT_SIBLING = 0, + ROFF_NEXT_CHILD +}; + +struct roff_man { + struct roff_meta meta; /* Public parse results. */ + struct roff *roff; /* Roff parser state data. */ + struct ohash *mdocmac; /* Mdoc macro lookup table. */ + struct ohash *manmac; /* Man macro lookup table. */ + const char *os_s; /* Default operating system. */ + struct roff_node *last; /* The last node parsed. */ + struct roff_node *last_es; /* The most recent Es node. */ + int quick; /* Abort parse early. */ + int flags; /* Parse flags. */ +#define ROFF_NOFILL (1 << 1) /* Fill mode switched off. */ +#define MDOC_PBODY (1 << 2) /* In the document body. */ +#define MDOC_NEWLINE (1 << 3) /* First macro/text in a line. */ +#define MDOC_PHRASE (1 << 4) /* In a Bl -column phrase. */ +#define MDOC_PHRASELIT (1 << 5) /* Literal within a phrase. */ +#define MDOC_FREECOL (1 << 6) /* `It' invocation should close. */ +#define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting. */ +#define MDOC_KEEP (1 << 8) /* In a word keep. */ +#define MDOC_SMOFF (1 << 9) /* Spacing is off. */ +#define MDOC_NODELIMC (1 << 10) /* Disable closing delimiter handling. */ +#define MAN_ELINE (1 << 11) /* Next-line element scope. */ +#define MAN_BLINE (1 << 12) /* Next-line block scope. */ +#define MDOC_PHRASEQF (1 << 13) /* Quote first word encountered. */ +#define MDOC_PHRASEQL (1 << 14) /* Quote last word of this phrase. */ +#define MDOC_PHRASEQN (1 << 15) /* Quote first word of the next phrase. */ +#define ROFF_NONOFILL (1 << 16) /* Temporarily suspend no-fill mode. */ +#define MAN_NEWLINE MDOC_NEWLINE + enum roff_sec lastsec; /* Last section seen. */ + enum roff_sec lastnamed; /* Last standard section seen. */ + enum roff_next next; /* Where to put the next node. */ + char filesec; /* Section digit in the file name. */ +}; + + +struct roff_node *roff_node_alloc(struct roff_man *, int, int, + enum roff_type, int); +void roff_node_append(struct roff_man *, struct roff_node *); +void roff_word_alloc(struct roff_man *, int, int, const char *); +void roff_word_append(struct roff_man *, const char *); +void roff_elem_alloc(struct roff_man *, int, int, int); +struct roff_node *roff_block_alloc(struct roff_man *, int, int, int); +struct roff_node *roff_head_alloc(struct roff_man *, int, int, int); +struct roff_node *roff_body_alloc(struct roff_man *, int, int, int); +void roff_node_unlink(struct roff_man *, struct roff_node *); +void roff_node_relink(struct roff_man *, struct roff_node *); +void roff_node_free(struct roff_node *); +void roff_node_delete(struct roff_man *, struct roff_node *); + +struct ohash *roffhash_alloc(enum roff_tok, enum roff_tok); +enum roff_tok roffhash_find(struct ohash *, const char *, size_t); +void roffhash_free(struct ohash *); + +void roff_state_reset(struct roff_man *); +void roff_validate(struct roff_man *); + +/* + * Functions called from roff.c need to be declared here, + * not in libmdoc.h or libman.h, even if they are specific + * to either the mdoc(7) or the man(7) parser. + */ + +void man_breakscope(struct roff_man *, int); +void mdoc_argv_free(struct mdoc_arg *); diff --git a/usr.bin/mandoc/roff_term.c b/usr.bin/mandoc/roff_term.c new file mode 100644 index 0000000..ef90623 --- /dev/null +++ b/usr.bin/mandoc/roff_term.c @@ -0,0 +1,244 @@ +/* $OpenBSD: roff_term.c,v 1.19 2019/01/04 03:24:30 schwarze Exp $ */ +/* + * Copyright (c) 2010,2014,2015,2017-2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "out.h" +#include "term.h" + +#define ROFF_TERM_ARGS struct termp *p, const struct roff_node *n + +typedef void (*roff_term_pre_fp)(ROFF_TERM_ARGS); + +static void roff_term_pre_br(ROFF_TERM_ARGS); +static void roff_term_pre_ce(ROFF_TERM_ARGS); +static void roff_term_pre_ft(ROFF_TERM_ARGS); +static void roff_term_pre_ll(ROFF_TERM_ARGS); +static void roff_term_pre_mc(ROFF_TERM_ARGS); +static void roff_term_pre_po(ROFF_TERM_ARGS); +static void roff_term_pre_sp(ROFF_TERM_ARGS); +static void roff_term_pre_ta(ROFF_TERM_ARGS); +static void roff_term_pre_ti(ROFF_TERM_ARGS); + +static const roff_term_pre_fp roff_term_pre_acts[ROFF_MAX] = { + roff_term_pre_br, /* br */ + roff_term_pre_ce, /* ce */ + roff_term_pre_br, /* fi */ + roff_term_pre_ft, /* ft */ + roff_term_pre_ll, /* ll */ + roff_term_pre_mc, /* mc */ + roff_term_pre_br, /* nf */ + roff_term_pre_po, /* po */ + roff_term_pre_ce, /* rj */ + roff_term_pre_sp, /* sp */ + roff_term_pre_ta, /* ta */ + roff_term_pre_ti, /* ti */ +}; + + +void +roff_term_pre(struct termp *p, const struct roff_node *n) +{ + assert(n->tok < ROFF_MAX); + (*roff_term_pre_acts[n->tok])(p, n); +} + +static void +roff_term_pre_br(ROFF_TERM_ARGS) +{ + term_newln(p); + if (p->flags & TERMP_BRIND) { + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + p->flags |= TERMP_NOSPACE; + } +} + +static void +roff_term_pre_ce(ROFF_TERM_ARGS) +{ + const struct roff_node *nc1, *nc2; + + roff_term_pre_br(p, n); + p->flags |= n->tok == ROFF_ce ? TERMP_CENTER : TERMP_RIGHT; + nc1 = n->child->next; + while (nc1 != NULL) { + nc2 = nc1; + do { + nc2 = nc2->next; + } while (nc2 != NULL && (nc2->type != ROFFT_TEXT || + (nc2->flags & NODE_LINE) == 0)); + while (nc1 != nc2) { + if (nc1->type == ROFFT_TEXT) + term_word(p, nc1->string); + else + roff_term_pre(p, nc1); + nc1 = nc1->next; + } + p->flags |= TERMP_NOSPACE; + term_flushln(p); + } + p->flags &= ~(TERMP_CENTER | TERMP_RIGHT); +} + +static void +roff_term_pre_ft(ROFF_TERM_ARGS) +{ + const char *cp; + + cp = n->child->string; + switch (mandoc_font(cp, (int)strlen(cp))) { + case ESCAPE_FONTBOLD: + term_fontrepl(p, TERMFONT_BOLD); + break; + case ESCAPE_FONTITALIC: + term_fontrepl(p, TERMFONT_UNDER); + break; + case ESCAPE_FONTBI: + term_fontrepl(p, TERMFONT_BI); + break; + case ESCAPE_FONTPREV: + term_fontlast(p); + break; + case ESCAPE_FONTROMAN: + case ESCAPE_FONTCW: + term_fontrepl(p, TERMFONT_NONE); + break; + default: + break; + } +} + +static void +roff_term_pre_ll(ROFF_TERM_ARGS) +{ + term_setwidth(p, n->child != NULL ? n->child->string : NULL); +} + +static void +roff_term_pre_mc(ROFF_TERM_ARGS) +{ + if (p->col) { + p->flags |= TERMP_NOBREAK; + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_NOSPACE); + } + if (n->child != NULL) { + p->mc = n->child->string; + p->flags |= TERMP_NEWMC; + } else + p->flags |= TERMP_ENDMC; +} + +static void +roff_term_pre_po(ROFF_TERM_ARGS) +{ + struct roffsu su; + static int po, polast; + int ponew; + + if (n->child != NULL && + a2roffsu(n->child->string, &su, SCALE_EM) != NULL) { + ponew = term_hen(p, &su); + if (*n->child->string == '+' || + *n->child->string == '-') + ponew += po; + } else + ponew = polast; + polast = po; + po = ponew; + + ponew = po - polast + (int)p->tcol->offset; + p->tcol->offset = ponew > 0 ? ponew : 0; +} + +static void +roff_term_pre_sp(ROFF_TERM_ARGS) +{ + struct roffsu su; + int len; + + if (n->child != NULL) { + if (a2roffsu(n->child->string, &su, SCALE_VS) == NULL) + su.scale = 1.0; + len = term_vspan(p, &su); + } else + len = 1; + + if (len < 0) + p->skipvsp -= len; + else + while (len--) + term_vspace(p); + + roff_term_pre_br(p, n); +} + +static void +roff_term_pre_ta(ROFF_TERM_ARGS) +{ + term_tab_set(p, NULL); + for (n = n->child; n != NULL; n = n->next) + term_tab_set(p, n->string); +} + +static void +roff_term_pre_ti(ROFF_TERM_ARGS) +{ + struct roffsu su; + const char *cp; + int len, sign; + + roff_term_pre_br(p, n); + + if (n->child == NULL) + return; + cp = n->child->string; + if (*cp == '+') { + sign = 1; + cp++; + } else if (*cp == '-') { + sign = -1; + cp++; + } else + sign = 0; + + if (a2roffsu(cp, &su, SCALE_EM) == NULL) + return; + len = term_hen(p, &su); + + if (sign == 0) { + p->ti = len - p->tcol->offset; + p->tcol->offset = len; + } else if (sign == 1) { + p->ti = len; + p->tcol->offset += len; + } else if ((size_t)len < p->tcol->offset) { + p->ti = -len; + p->tcol->offset -= len; + } else { + p->ti = -p->tcol->offset; + p->tcol->offset = 0; + } +} diff --git a/usr.bin/mandoc/roff_validate.c b/usr.bin/mandoc/roff_validate.c new file mode 100644 index 0000000..6fa4f33 --- /dev/null +++ b/usr.bin/mandoc/roff_validate.c @@ -0,0 +1,149 @@ +/* $OpenBSD: roff_validate.c,v 1.19 2020/02/27 01:25:58 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2017, 2018, 2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "libmandoc.h" +#include "roff_int.h" + +#define ROFF_VALID_ARGS struct roff_man *man, struct roff_node *n + +typedef void (*roff_valid_fp)(ROFF_VALID_ARGS); + +static void roff_valid_br(ROFF_VALID_ARGS); +static void roff_valid_fi(ROFF_VALID_ARGS); +static void roff_valid_ft(ROFF_VALID_ARGS); +static void roff_valid_nf(ROFF_VALID_ARGS); +static void roff_valid_sp(ROFF_VALID_ARGS); + +static const roff_valid_fp roff_valids[ROFF_MAX] = { + roff_valid_br, /* br */ + NULL, /* ce */ + roff_valid_fi, /* fi */ + roff_valid_ft, /* ft */ + NULL, /* ll */ + NULL, /* mc */ + roff_valid_nf, /* nf */ + NULL, /* po */ + NULL, /* rj */ + roff_valid_sp, /* sp */ + NULL, /* ta */ + NULL, /* ti */ +}; + + +void +roff_validate(struct roff_man *man) +{ + struct roff_node *n; + + n = man->last; + assert(n->tok < ROFF_MAX); + if (roff_valids[n->tok] != NULL) + (*roff_valids[n->tok])(man, n); +} + +static void +roff_valid_br(ROFF_VALID_ARGS) +{ + struct roff_node *np; + + if (n->next != NULL && n->next->type == ROFFT_TEXT && + *n->next->string == ' ') { + mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, + "br before text line with leading blank"); + roff_node_delete(man, n); + return; + } + + if ((np = roff_node_prev(n)) == NULL) + return; + + switch (np->tok) { + case ROFF_br: + case ROFF_sp: + case MDOC_Pp: + mandoc_msg(MANDOCERR_PAR_SKIP, + n->line, n->pos, "br after %s", roff_name[np->tok]); + roff_node_delete(man, n); + break; + default: + break; + } +} + +static void +roff_valid_fi(ROFF_VALID_ARGS) +{ + if ((n->flags & NODE_NOFILL) == 0) + mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "fi"); +} + +static void +roff_valid_ft(ROFF_VALID_ARGS) +{ + const char *cp; + + if (n->child == NULL) { + man->next = ROFF_NEXT_CHILD; + roff_word_alloc(man, n->line, n->pos, "P"); + man->last = n; + return; + } + + cp = n->child->string; + if (mandoc_font(cp, (int)strlen(cp)) != ESCAPE_ERROR) + return; + mandoc_msg(MANDOCERR_FT_BAD, n->line, n->pos, "ft %s", cp); + roff_node_delete(man, n); +} + +static void +roff_valid_nf(ROFF_VALID_ARGS) +{ + if (n->flags & NODE_NOFILL) + mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "nf"); +} + +static void +roff_valid_sp(ROFF_VALID_ARGS) +{ + struct roff_node *np; + + if ((np = roff_node_prev(n)) == NULL) + return; + + switch (np->tok) { + case ROFF_br: + mandoc_msg(MANDOCERR_PAR_SKIP, + np->line, np->pos, "br before sp"); + roff_node_delete(man, np); + break; + case MDOC_Pp: + mandoc_msg(MANDOCERR_PAR_SKIP, + n->line, n->pos, "sp after Pp"); + roff_node_delete(man, n); + break; + default: + break; + } +} diff --git a/usr.bin/mandoc/st.c b/usr.bin/mandoc/st.c new file mode 100644 index 0000000..27039fe --- /dev/null +++ b/usr.bin/mandoc/st.c @@ -0,0 +1,80 @@ +/* $OpenBSD: st.c,v 1.13 2018/12/14 01:17:46 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2st(const char *p) +{ +LINE("-p1003.1-88", "IEEE Std 1003.1-1988 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-90", "IEEE Std 1003.1-1990 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2001", "IEEE Std 1003.1-2001 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2004", "IEEE Std 1003.1-2004 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2008", "IEEE Std 1003.1-2008 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1", "IEEE Std 1003.1 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1b", "IEEE Std 1003.1b (\\(lqPOSIX.1b\\(rq)") +LINE("-p1003.1b-93", "IEEE Std 1003.1b-1993 (\\(lqPOSIX.1b\\(rq)") +LINE("-p1003.1c-95", "IEEE Std 1003.1c-1995 (\\(lqPOSIX.1c\\(rq)") +LINE("-p1003.1g-2000", "IEEE Std 1003.1g-2000 (\\(lqPOSIX.1g\\(rq)") +LINE("-p1003.1i-95", "IEEE Std 1003.1i-1995 (\\(lqPOSIX.1i\\(rq)") +LINE("-p1003.2", "IEEE Std 1003.2 (\\(lqPOSIX.2\\(rq)") +LINE("-p1003.2-92", "IEEE Std 1003.2-1992 (\\(lqPOSIX.2\\(rq)") +LINE("-p1003.2a-92", "IEEE Std 1003.2a-1992 (\\(lqPOSIX.2\\(rq)") +LINE("-isoC", "ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)") +LINE("-isoC-90", "ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)") +LINE("-isoC-amd1", "ISO/IEC 9899/AMD1:1995 (\\(lqISO\\~C90, Amendment 1\\(rq)") +LINE("-isoC-tcor1", "ISO/IEC 9899/TCOR1:1994 (\\(lqISO\\~C90, Technical Corrigendum 1\\(rq)") +LINE("-isoC-tcor2", "ISO/IEC 9899/TCOR2:1995 (\\(lqISO\\~C90, Technical Corrigendum 2\\(rq)") +LINE("-isoC-99", "ISO/IEC 9899:1999 (\\(lqISO\\~C99\\(rq)") +LINE("-isoC-2011", "ISO/IEC 9899:2011 (\\(lqISO\\~C11\\(rq)") +LINE("-iso9945-1-90", "ISO/IEC 9945-1:1990 (\\(lqPOSIX.1\\(rq)") +LINE("-iso9945-1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") +LINE("-iso9945-2-93", "ISO/IEC 9945-2:1993 (\\(lqPOSIX.2\\(rq)") +LINE("-ansiC", "ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)") +LINE("-ansiC-89", "ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)") +LINE("-ieee754", "IEEE Std 754-1985") +LINE("-iso8802-3", "ISO 8802-3: 1989") +LINE("-iso8601", "ISO 8601") +LINE("-ieee1275-94", "IEEE Std 1275-1994 (\\(lqOpen Firmware\\(rq)") +LINE("-xpg3", "X/Open Portability Guide Issue\\~3 (\\(lqXPG3\\(rq)") +LINE("-xpg4", "X/Open Portability Guide Issue\\~4 (\\(lqXPG4\\(rq)") +LINE("-xpg4.2", "X/Open Portability Guide Issue\\~4, Version\\~2 (\\(lqXPG4.2\\(rq)") +LINE("-xbd5", "X/Open Base Definitions Issue\\~5 (\\(lqXBD5\\(rq)") +LINE("-xcu5", "X/Open Commands and Utilities Issue\\~5 (\\(lqXCU5\\(rq)") +LINE("-xsh4.2", "X/Open System Interfaces and Headers Issue\\~4, Version\\~2 (\\(lqXSH4.2\\(rq)") +LINE("-xsh5", "X/Open System Interfaces and Headers Issue\\~5 (\\(lqXSH5\\(rq)") +LINE("-xns5", "X/Open Networking Services Issue\\~5 (\\(lqXNS5\\(rq)") +LINE("-xns5.2", "X/Open Networking Services Issue\\~5.2 (\\(lqXNS5.2\\(rq)") +LINE("-xcurses4.2", "X/Open Curses Issue\\~4, Version\\~2 (\\(lqXCURSES4.2\\(rq)") +LINE("-susv1", "Version\\~1 of the Single UNIX Specification (\\(lqSUSv1\\(rq)") +LINE("-susv2", "Version\\~2 of the Single UNIX Specification (\\(lqSUSv2\\(rq)") +LINE("-susv3", "Version\\~3 of the Single UNIX Specification (\\(lqSUSv3\\(rq)") +LINE("-susv4", "Version\\~4 of the Single UNIX Specification (\\(lqSUSv4\\(rq)") +LINE("-svid4", "System\\~V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)") + + return NULL; +} diff --git a/usr.bin/mandoc/tag.c b/usr.bin/mandoc/tag.c new file mode 100644 index 0000000..c580d3b --- /dev/null +++ b/usr.bin/mandoc/tag.c @@ -0,0 +1,326 @@ +/* $OpenBSD: tag.c,v 1.36 2020/04/19 16:26:11 schwarze Exp $ */ +/* + * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Functions to tag syntax tree nodes. + * For internal use by mandoc(1) validation modules only. + */ +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <assert.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "roff.h" +#include "mdoc.h" +#include "roff_int.h" +#include "tag.h" + +struct tag_entry { + struct roff_node **nodes; + size_t maxnodes; + size_t nnodes; + int prio; + char s[]; +}; + +static void tag_move_href(struct roff_man *, + struct roff_node *, const char *); +static void tag_move_id(struct roff_node *); + +static struct ohash tag_data; + + +/* + * Set up the ohash table to collect nodes + * where various marked-up terms are documented. + */ +void +tag_alloc(void) +{ + mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); +} + +void +tag_free(void) +{ + struct tag_entry *entry; + unsigned int slot; + + if (tag_data.info.free == NULL) + return; + entry = ohash_first(&tag_data, &slot); + while (entry != NULL) { + free(entry->nodes); + free(entry); + entry = ohash_next(&tag_data, &slot); + } + ohash_delete(&tag_data); + tag_data.info.free = NULL; +} + +/* + * Set a node where a term is defined, + * unless it is already defined at a lower priority. + */ +void +tag_put(const char *s, int prio, struct roff_node *n) +{ + struct tag_entry *entry; + struct roff_node *nold; + const char *se; + size_t len; + unsigned int slot; + + assert(prio <= TAG_FALLBACK); + + if (s == NULL) { + if (n->child == NULL || n->child->type != ROFFT_TEXT) + return; + s = n->child->string; + switch (s[0]) { + case '-': + s++; + break; + case '\\': + switch (s[1]) { + case '&': + case '-': + case 'e': + s += 2; + break; + default: + break; + } + break; + default: + break; + } + } + + /* + * Skip whitespace and escapes and whatever follows, + * and if there is any, downgrade the priority. + */ + + len = strcspn(s, " \t\\"); + if (len == 0) + return; + + se = s + len; + if (*se != '\0' && prio < TAG_WEAK) + prio = TAG_WEAK; + + slot = ohash_qlookupi(&tag_data, s, &se); + entry = ohash_find(&tag_data, slot); + + /* Build a new entry. */ + + if (entry == NULL) { + entry = mandoc_malloc(sizeof(*entry) + len + 1); + memcpy(entry->s, s, len); + entry->s[len] = '\0'; + entry->nodes = NULL; + entry->maxnodes = entry->nnodes = 0; + ohash_insert(&tag_data, slot, entry); + } + + /* + * Lower priority numbers take precedence. + * If a better entry is already present, ignore the new one. + */ + + else if (entry->prio < prio) + return; + + /* + * If the existing entry is worse, clear it. + * In addition, a tag with priority TAG_FALLBACK + * is only used if the tag occurs exactly once. + */ + + else if (entry->prio > prio || prio == TAG_FALLBACK) { + while (entry->nnodes > 0) { + nold = entry->nodes[--entry->nnodes]; + nold->flags &= ~NODE_ID; + free(nold->tag); + nold->tag = NULL; + } + if (prio == TAG_FALLBACK) { + entry->prio = TAG_DELETE; + return; + } + } + + /* Remember the new node. */ + + if (entry->maxnodes == entry->nnodes) { + entry->maxnodes += 4; + entry->nodes = mandoc_reallocarray(entry->nodes, + entry->maxnodes, sizeof(*entry->nodes)); + } + entry->nodes[entry->nnodes++] = n; + entry->prio = prio; + n->flags |= NODE_ID; + if (n->child == NULL || n->child->string != s || *se != '\0') { + assert(n->tag == NULL); + n->tag = mandoc_strndup(s, len); + } +} + +int +tag_exists(const char *tag) +{ + return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; +} + +/* + * For in-line elements, move the link target + * to the enclosing paragraph when appropriate. + */ +static void +tag_move_id(struct roff_node *n) +{ + struct roff_node *np; + + np = n; + for (;;) { + if (np->prev != NULL) + np = np->prev; + else if ((np = np->parent) == NULL) + return; + switch (np->tok) { + case MDOC_It: + switch (np->parent->parent->norm->Bl.type) { + case LIST_column: + /* Target the ROFFT_BLOCK = <tr>. */ + np = np->parent; + break; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + case LIST_tag: + /* Target the ROFFT_HEAD = <dt>. */ + np = np->parent->head; + break; + default: + /* Target the ROFF_BODY = <li>. */ + break; + } + /* FALLTHROUGH */ + case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ + if (np->tag == NULL) { + np->tag = mandoc_strdup(n->tag == NULL ? + n->child->string : n->tag); + np->flags |= NODE_ID; + n->flags &= ~NODE_ID; + } + return; + case MDOC_Sh: + case MDOC_Ss: + case MDOC_Bd: + case MDOC_Bl: + case MDOC_D1: + case MDOC_Dl: + case MDOC_Rs: + /* Do not move past major blocks. */ + return; + default: + /* + * Move past in-line content and partial + * blocks, for example .It Xo or .It Bq Er. + */ + break; + } + } +} + +/* + * When a paragraph is tagged and starts with text, + * move the permalink to the first few words. + */ +static void +tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) +{ + char *cp; + + if (n == NULL || n->type != ROFFT_TEXT || + *n->string == '\0' || *n->string == ' ') + return; + + cp = n->string; + while (cp != NULL && cp - n->string < 5) + cp = strchr(cp + 1, ' '); + + /* If the first text node is longer, split it. */ + + if (cp != NULL && cp[1] != '\0') { + man->last = n; + man->next = ROFF_NEXT_SIBLING; + roff_word_alloc(man, n->line, + n->pos + (cp - n->string), cp + 1); + man->last->flags = n->flags & ~NODE_LINE; + *cp = '\0'; + } + + assert(n->tag == NULL); + n->tag = mandoc_strdup(tag); + n->flags |= NODE_HREF; +} + +/* + * When all tags have been set, decide where to put + * the associated permalinks, and maybe move some tags + * to the beginning of the respective paragraphs. + */ +void +tag_postprocess(struct roff_man *man, struct roff_node *n) +{ + if (n->flags & NODE_ID) { + switch (n->tok) { + case MDOC_Pp: + tag_move_href(man, n->next, n->tag); + break; + case MDOC_Bd: + case MDOC_D1: + case MDOC_Dl: + tag_move_href(man, n->child, n->tag); + break; + case MDOC_Bl: + /* XXX No permalink for now. */ + break; + default: + if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) + tag_move_id(n); + if (n->tok != MDOC_Tg) + n->flags |= NODE_HREF; + else if ((n->flags & NODE_ID) == 0) { + n->flags |= NODE_NOPRT; + free(n->tag); + n->tag = NULL; + } + break; + } + } + for (n = n->child; n != NULL; n = n->next) + tag_postprocess(man, n); +} diff --git a/usr.bin/mandoc/tag.h b/usr.bin/mandoc/tag.h new file mode 100644 index 0000000..7fa7504 --- /dev/null +++ b/usr.bin/mandoc/tag.h @@ -0,0 +1,35 @@ +/* $OpenBSD: tag.h,v 1.14 2020/04/18 20:28:46 schwarze Exp $ */ +/* + * Copyright (c) 2015, 2018, 2019, 2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Internal interfaces to tag syntax tree nodes. + * For use by mandoc(1) validation modules only. + */ + +/* + * Tagging priorities. + * Lower numbers indicate higher importance. + */ +#define TAG_MANUAL 1 /* Set with a .Tg macro. */ +#define TAG_STRONG 2 /* Good automatic tagging. */ +#define TAG_WEAK (INT_MAX - 2) /* Dubious automatic tagging. */ +#define TAG_FALLBACK (INT_MAX - 1) /* Tag only used if unique. */ +#define TAG_DELETE (INT_MAX) /* Tag not used at all. */ + +void tag_alloc(void); +int tag_exists(const char *); +void tag_put(const char *, int, struct roff_node *); +void tag_postprocess(struct roff_man *, struct roff_node *); +void tag_free(void); diff --git a/usr.bin/mandoc/tbl.c b/usr.bin/mandoc/tbl.c new file mode 100644 index 0000000..a3da958 --- /dev/null +++ b/usr.bin/mandoc/tbl.c @@ -0,0 +1,181 @@ +/* $OpenBSD: tbl.c,v 1.27 2018/12/14 06:33:03 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "tbl.h" +#include "libmandoc.h" +#include "tbl_parse.h" +#include "tbl_int.h" + + +void +tbl_read(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + const char *cp; + int active; + + /* + * In the options section, proceed to the layout section + * after a semicolon, or right away if there is no semicolon. + * Ignore semicolons in arguments. + */ + + if (tbl->part == TBL_PART_OPTS) { + tbl->part = TBL_PART_LAYOUT; + active = 1; + for (cp = p + pos; *cp != '\0'; cp++) { + switch (*cp) { + case '(': + active = 0; + continue; + case ')': + active = 1; + continue; + case ';': + if (active) + break; + continue; + default: + continue; + } + break; + } + if (*cp == ';') { + tbl_option(tbl, ln, p, &pos); + if (p[pos] == '\0') + return; + } + } + + /* Process the other section types. */ + + switch (tbl->part) { + case TBL_PART_LAYOUT: + tbl_layout(tbl, ln, p, pos); + break; + case TBL_PART_CDATA: + tbl_cdata(tbl, ln, p, pos); + break; + default: + tbl_data(tbl, ln, p, pos); + break; + } +} + +struct tbl_node * +tbl_alloc(int pos, int line, struct tbl_node *last_tbl) +{ + struct tbl_node *tbl; + + tbl = mandoc_calloc(1, sizeof(*tbl)); + if (last_tbl != NULL) + last_tbl->next = tbl; + tbl->line = line; + tbl->pos = pos; + tbl->part = TBL_PART_OPTS; + tbl->opts.tab = '\t'; + tbl->opts.decimal = '.'; + return tbl; +} + +void +tbl_free(struct tbl_node *tbl) +{ + struct tbl_node *old_tbl; + struct tbl_row *rp; + struct tbl_cell *cp; + struct tbl_span *sp; + struct tbl_dat *dp; + + while (tbl != NULL) { + while ((rp = tbl->first_row) != NULL) { + tbl->first_row = rp->next; + while (rp->first != NULL) { + cp = rp->first; + rp->first = cp->next; + free(cp->wstr); + free(cp); + } + free(rp); + } + while ((sp = tbl->first_span) != NULL) { + tbl->first_span = sp->next; + while (sp->first != NULL) { + dp = sp->first; + sp->first = dp->next; + free(dp->string); + free(dp); + } + free(sp); + } + old_tbl = tbl; + tbl = tbl->next; + free(old_tbl); + } +} + +void +tbl_restart(int line, int pos, struct tbl_node *tbl) +{ + if (tbl->part == TBL_PART_CDATA) + mandoc_msg(MANDOCERR_TBLDATA_BLK, line, pos, "T&"); + + tbl->part = TBL_PART_LAYOUT; + tbl->line = line; + tbl->pos = pos; +} + +struct tbl_span * +tbl_span(struct tbl_node *tbl) +{ + struct tbl_span *span; + + span = tbl->current_span ? tbl->current_span->next + : tbl->first_span; + if (span != NULL) + tbl->current_span = span; + return span; +} + +int +tbl_end(struct tbl_node *tbl, int still_open) +{ + struct tbl_span *sp; + + if (still_open) + mandoc_msg(MANDOCERR_BLK_NOEND, tbl->line, tbl->pos, "TS"); + else if (tbl->part == TBL_PART_CDATA) + mandoc_msg(MANDOCERR_TBLDATA_BLK, tbl->line, tbl->pos, "TE"); + + sp = tbl->first_span; + while (sp != NULL && sp->first == NULL) + sp = sp->next; + if (sp == NULL) { + mandoc_msg(MANDOCERR_TBLDATA_NONE, tbl->line, tbl->pos, NULL); + return 0; + } + return 1; +} diff --git a/usr.bin/mandoc/tbl.h b/usr.bin/mandoc/tbl.h new file mode 100644 index 0000000..2e77ac1 --- /dev/null +++ b/usr.bin/mandoc/tbl.h @@ -0,0 +1,122 @@ +/* $OpenBSD: tbl.h,v 1.5 2018/12/12 21:54:30 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct tbl_opts { + int opts; +#define TBL_OPT_ALLBOX (1 << 0) /* Option "allbox". */ +#define TBL_OPT_BOX (1 << 1) /* Option "box". */ +#define TBL_OPT_CENTRE (1 << 2) /* Option "center". */ +#define TBL_OPT_DBOX (1 << 3) /* Option "doublebox". */ +#define TBL_OPT_EXPAND (1 << 4) /* Option "expand". */ +#define TBL_OPT_NOKEEP (1 << 5) /* Option "nokeep". */ +#define TBL_OPT_NOSPACE (1 << 6) /* Option "nospaces". */ +#define TBL_OPT_NOWARN (1 << 7) /* Option "nowarn". */ + int cols; /* Number of columns. */ + int lvert; /* Width of left vertical line. */ + int rvert; /* Width of right vertical line. */ + char tab; /* Option "tab": cell separator. */ + char decimal; /* Option "decimalpoint". */ +}; + +enum tbl_cellt { + TBL_CELL_CENTRE, /* c, C */ + TBL_CELL_RIGHT, /* r, R */ + TBL_CELL_LEFT, /* l, L */ + TBL_CELL_NUMBER, /* n, N */ + TBL_CELL_SPAN, /* s, S */ + TBL_CELL_LONG, /* a, A */ + TBL_CELL_DOWN, /* ^ */ + TBL_CELL_HORIZ, /* _, - */ + TBL_CELL_DHORIZ, /* = */ + TBL_CELL_MAX +}; + +/* + * A cell in a layout row. + */ +struct tbl_cell { + struct tbl_cell *next; /* Layout cell to the right. */ + char *wstr; /* Min width represented as a string. */ + size_t width; /* Minimum column width. */ + size_t spacing; /* To the right of the column. */ + int vert; /* Width of subsequent vertical line. */ + int col; /* Column number, starting from 0. */ + int flags; +#define TBL_CELL_BOLD (1 << 0) /* b, B, fB */ +#define TBL_CELL_ITALIC (1 << 1) /* i, I, fI */ +#define TBL_CELL_TALIGN (1 << 2) /* t, T */ +#define TBL_CELL_UP (1 << 3) /* u, U */ +#define TBL_CELL_BALIGN (1 << 4) /* d, D */ +#define TBL_CELL_WIGN (1 << 5) /* z, Z */ +#define TBL_CELL_EQUAL (1 << 6) /* e, E */ +#define TBL_CELL_WMAX (1 << 7) /* x, X */ + enum tbl_cellt pos; +}; + +/* + * A layout row. + */ +struct tbl_row { + struct tbl_row *next; /* Layout row below. */ + struct tbl_cell *first; /* Leftmost layout cell. */ + struct tbl_cell *last; /* Rightmost layout cell. */ + int vert; /* Width of left vertical line. */ +}; + +enum tbl_datt { + TBL_DATA_NONE, /* Uninitialized row. */ + TBL_DATA_DATA, /* Contains data rather than a line. */ + TBL_DATA_HORIZ, /* _: connecting horizontal line. */ + TBL_DATA_DHORIZ, /* =: connecting double horizontal line. */ + TBL_DATA_NHORIZ, /* \_: isolated horizontal line. */ + TBL_DATA_NDHORIZ /* \=: isolated double horizontal line. */ +}; + +/* + * A cell within a row of data. The "string" field contains the + * actual string value that's in the cell. The rest is layout. + */ +struct tbl_dat { + struct tbl_dat *next; /* Data cell to the right. */ + struct tbl_cell *layout; /* Associated layout cell. */ + char *string; /* Data, or NULL if not TBL_DATA_DATA. */ + int hspans; /* How many horizontal spans follow. */ + int vspans; /* How many vertical spans follow. */ + int block; /* T{ text block T} */ + enum tbl_datt pos; +}; + +enum tbl_spant { + TBL_SPAN_DATA, /* Contains data rather than a line. */ + TBL_SPAN_HORIZ, /* _: horizontal line. */ + TBL_SPAN_DHORIZ /* =: double horizontal line. */ +}; + +/* + * A row of data in a table. + */ +struct tbl_span { + struct tbl_opts *opts; /* Options for the table as a whole. */ + struct tbl_span *prev; /* Data row above. */ + struct tbl_span *next; /* Data row below. */ + struct tbl_row *layout; /* Associated layout row. */ + struct tbl_dat *first; /* Leftmost data cell. */ + struct tbl_dat *last; /* Rightmost data cell. */ + int line; /* Input file line number. */ + enum tbl_spant pos; +}; diff --git a/usr.bin/mandoc/tbl_data.c b/usr.bin/mandoc/tbl_data.c new file mode 100644 index 0000000..b2556d9 --- /dev/null +++ b/usr.bin/mandoc/tbl_data.c @@ -0,0 +1,300 @@ +/* $OpenBSD: tbl_data.c,v 1.40 2020/01/11 20:48:13 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011,2015,2017,2018,2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "tbl.h" +#include "libmandoc.h" +#include "tbl_int.h" + +static void getdata(struct tbl_node *, struct tbl_span *, + int, const char *, int *); +static struct tbl_span *newspan(struct tbl_node *, int, + struct tbl_row *); + + +static void +getdata(struct tbl_node *tbl, struct tbl_span *dp, + int ln, const char *p, int *pos) +{ + struct tbl_dat *dat, *pdat; + struct tbl_cell *cp; + struct tbl_span *pdp; + int sv; + + /* + * Determine the length of the string in the cell + * and advance the parse point to the end of the cell. + */ + + sv = *pos; + while (p[*pos] != '\0' && p[*pos] != tbl->opts.tab) + (*pos)++; + + /* Advance to the next layout cell, skipping spanners. */ + + cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next; + while (cp != NULL && cp->pos == TBL_CELL_SPAN) + cp = cp->next; + + /* + * If the current layout row is out of cells, allocate + * a new cell if another row of the table has at least + * this number of columns, or discard the input if we + * are beyond the last column of the table as a whole. + */ + + if (cp == NULL) { + if (dp->layout->last->col + 1 < dp->opts->cols) { + cp = mandoc_calloc(1, sizeof(*cp)); + cp->pos = TBL_CELL_LEFT; + cp->spacing = SIZE_MAX; + dp->layout->last->next = cp; + cp->col = dp->layout->last->col + 1; + dp->layout->last = cp; + } else { + mandoc_msg(MANDOCERR_TBLDATA_EXTRA, + ln, sv, "%s", p + sv); + while (p[*pos] != '\0') + (*pos)++; + return; + } + } + + dat = mandoc_malloc(sizeof(*dat)); + dat->layout = cp; + dat->next = NULL; + dat->string = NULL; + dat->hspans = 0; + dat->vspans = 0; + dat->block = 0; + dat->pos = TBL_DATA_NONE; + + /* + * Increment the number of vertical spans in a data cell above, + * if this cell vertically extends one or more cells above. + * The iteration must be done over data rows, + * not over layout rows, because one layout row + * can be reused for more than one data row. + */ + + if (cp->pos == TBL_CELL_DOWN || + (*pos - sv == 2 && p[sv] == '\\' && p[sv + 1] == '^')) { + pdp = dp; + while ((pdp = pdp->prev) != NULL) { + pdat = pdp->first; + while (pdat != NULL && + pdat->layout->col < dat->layout->col) + pdat = pdat->next; + if (pdat == NULL) + break; + if (pdat->layout->pos != TBL_CELL_DOWN && + strcmp(pdat->string, "\\^") != 0) { + pdat->vspans++; + break; + } + } + } + + /* + * Count the number of horizontal spans to the right of this cell. + * This is purely a matter of the layout, independent of the data. + */ + + for (cp = cp->next; cp != NULL; cp = cp->next) + if (cp->pos == TBL_CELL_SPAN) + dat->hspans++; + else + break; + + /* Append the new data cell to the data row. */ + + if (dp->last == NULL) + dp->first = dat; + else + dp->last->next = dat; + dp->last = dat; + + /* + * Check for a continued-data scope opening. This consists of a + * trailing `T{' at the end of the line. Subsequent lines, + * until a standalone `T}', are included in our cell. + */ + + if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') { + tbl->part = TBL_PART_CDATA; + return; + } + + dat->string = mandoc_strndup(p + sv, *pos - sv); + + if (p[*pos] != '\0') + (*pos)++; + + if ( ! strcmp(dat->string, "_")) + dat->pos = TBL_DATA_HORIZ; + else if ( ! strcmp(dat->string, "=")) + dat->pos = TBL_DATA_DHORIZ; + else if ( ! strcmp(dat->string, "\\_")) + dat->pos = TBL_DATA_NHORIZ; + else if ( ! strcmp(dat->string, "\\=")) + dat->pos = TBL_DATA_NDHORIZ; + else + dat->pos = TBL_DATA_DATA; + + if ((dat->layout->pos == TBL_CELL_HORIZ || + dat->layout->pos == TBL_CELL_DHORIZ || + dat->layout->pos == TBL_CELL_DOWN) && + dat->pos == TBL_DATA_DATA && *dat->string != '\0') + mandoc_msg(MANDOCERR_TBLDATA_SPAN, + ln, sv, "%s", dat->string); +} + +void +tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + struct tbl_dat *dat; + size_t sz; + + dat = tbl->last_span->last; + + if (p[pos] == 'T' && p[pos + 1] == '}') { + pos += 2; + if (p[pos] == tbl->opts.tab) { + tbl->part = TBL_PART_DATA; + pos++; + while (p[pos] != '\0') + getdata(tbl, tbl->last_span, ln, p, &pos); + return; + } else if (p[pos] == '\0') { + tbl->part = TBL_PART_DATA; + return; + } + + /* Fallthrough: T} is part of a word. */ + } + + dat->pos = TBL_DATA_DATA; + dat->block = 1; + + if (dat->string != NULL) { + sz = strlen(p + pos) + strlen(dat->string) + 2; + dat->string = mandoc_realloc(dat->string, sz); + (void)strlcat(dat->string, " ", sz); + (void)strlcat(dat->string, p + pos, sz); + } else + dat->string = mandoc_strdup(p + pos); + + if (dat->layout->pos == TBL_CELL_DOWN) + mandoc_msg(MANDOCERR_TBLDATA_SPAN, + ln, pos, "%s", dat->string); +} + +static struct tbl_span * +newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) +{ + struct tbl_span *dp; + + dp = mandoc_calloc(1, sizeof(*dp)); + dp->line = line; + dp->opts = &tbl->opts; + dp->layout = rp; + dp->prev = tbl->last_span; + + if (dp->prev == NULL) { + tbl->first_span = dp; + tbl->current_span = NULL; + } else + dp->prev->next = dp; + tbl->last_span = dp; + + return dp; +} + +void +tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + struct tbl_row *rp; + struct tbl_cell *cp; + struct tbl_span *sp; + + rp = (sp = tbl->last_span) == NULL ? tbl->first_row : + sp->pos == TBL_SPAN_DATA && sp->layout->next != NULL ? + sp->layout->next : sp->layout; + + assert(rp != NULL); + + if (p[1] == '\0') { + switch (p[0]) { + case '.': + /* + * Empty request lines must be handled here + * and cannot be discarded in roff_parseln() + * because in the layout section, they + * are significant and end the layout. + */ + return; + case '_': + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_HORIZ; + return; + case '=': + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_DHORIZ; + return; + default: + break; + } + } + + /* + * If the layout row contains nothing but horizontal lines, + * allocate an empty span for it and assign the current span + * to the next layout row accepting data. + */ + + while (rp->next != NULL) { + if (rp->last->col + 1 < tbl->opts.cols) + break; + for (cp = rp->first; cp != NULL; cp = cp->next) + if (cp->pos != TBL_CELL_HORIZ && + cp->pos != TBL_CELL_DHORIZ) + break; + if (cp != NULL) + break; + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_DATA; + rp = rp->next; + } + + /* Process a real data row. */ + + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_DATA; + while (p[pos] != '\0') + getdata(tbl, sp, ln, p, &pos); +} diff --git a/usr.bin/mandoc/tbl_html.c b/usr.bin/mandoc/tbl_html.c new file mode 100644 index 0000000..cbc4bfa --- /dev/null +++ b/usr.bin/mandoc/tbl_html.c @@ -0,0 +1,255 @@ +/* $OpenBSD: tbl_html.c,v 1.28 2019/03/17 18:20:07 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "tbl.h" +#include "out.h" +#include "html.h" + +static void html_tblopen(struct html *, const struct tbl_span *); +static size_t html_tbl_len(size_t, void *); +static size_t html_tbl_strlen(const char *, void *); +static size_t html_tbl_sulen(const struct roffsu *, void *); + + +static size_t +html_tbl_len(size_t sz, void *arg) +{ + return sz; +} + +static size_t +html_tbl_strlen(const char *p, void *arg) +{ + return strlen(p); +} + +static size_t +html_tbl_sulen(const struct roffsu *su, void *arg) +{ + if (su->scale < 0.0) + return 0; + + switch (su->unit) { + case SCALE_FS: /* 2^16 basic units */ + return su->scale * 65536.0 / 24.0; + case SCALE_IN: /* 10 characters per inch */ + return su->scale * 10.0; + case SCALE_CM: /* 2.54 cm per inch */ + return su->scale * 10.0 / 2.54; + case SCALE_PC: /* 6 pica per inch */ + case SCALE_VS: + return su->scale * 10.0 / 6.0; + case SCALE_EN: + case SCALE_EM: + return su->scale; + case SCALE_PT: /* 12 points per pica */ + return su->scale * 10.0 / 6.0 / 12.0; + case SCALE_BU: /* 24 basic units per character */ + return su->scale / 24.0; + case SCALE_MM: /* 1/1000 inch */ + return su->scale / 100.0; + default: + abort(); + } +} + +static void +html_tblopen(struct html *h, const struct tbl_span *sp) +{ + html_close_paragraph(h); + if (h->tbl.cols == NULL) { + h->tbl.len = html_tbl_len; + h->tbl.slen = html_tbl_strlen; + h->tbl.sulen = html_tbl_sulen; + tblcalc(&h->tbl, sp, 0, 0); + } + assert(NULL == h->tblt); + h->tblt = print_otag(h, TAG_TABLE, "c?ss", "tbl", + "border", + sp->opts->opts & TBL_OPT_ALLBOX ? "1" : NULL, + "border-style", + sp->opts->opts & TBL_OPT_DBOX ? "double" : + sp->opts->opts & TBL_OPT_BOX ? "solid" : NULL, + "border-top-style", + sp->pos == TBL_SPAN_DHORIZ ? "double" : + sp->pos == TBL_SPAN_HORIZ ? "solid" : NULL); +} + +void +print_tblclose(struct html *h) +{ + + assert(h->tblt); + print_tagq(h, h->tblt); + h->tblt = NULL; +} + +void +print_tbl(struct html *h, const struct tbl_span *sp) +{ + const struct tbl_dat *dp; + const struct tbl_cell *cp; + const struct tbl_span *psp; + struct tag *tt; + const char *hspans, *vspans, *halign, *valign; + const char *bborder, *lborder, *rborder; + char hbuf[4], vbuf[4]; + int i; + + if (h->tblt == NULL) + html_tblopen(h, sp); + + /* + * Horizontal lines spanning the whole table + * are handled by previous or following table rows. + */ + + if (sp->pos != TBL_SPAN_DATA) + return; + + /* Inhibit printing of spaces: we do padding ourselves. */ + + h->flags |= HTML_NONOSPACE; + h->flags |= HTML_NOSPACE; + + /* Draw a vertical line left of this row? */ + + switch (sp->layout->vert) { + case 2: + lborder = "double"; + break; + case 1: + lborder = "solid"; + break; + default: + lborder = NULL; + break; + } + + /* Draw a horizontal line below this row? */ + + bborder = NULL; + if ((psp = sp->next) != NULL) { + switch (psp->pos) { + case TBL_SPAN_DHORIZ: + bborder = "double"; + break; + case TBL_SPAN_HORIZ: + bborder = "solid"; + break; + default: + break; + } + } + + tt = print_otag(h, TAG_TR, "ss", + "border-left-style", lborder, + "border-bottom-style", bborder); + + for (dp = sp->first; dp != NULL; dp = dp->next) { + print_stagq(h, tt); + + /* + * Do not generate <td> elements for continuations + * of spanned cells. Larger <td> elements covering + * this space were already generated earlier. + */ + + cp = dp->layout; + if (cp->pos == TBL_CELL_SPAN || cp->pos == TBL_CELL_DOWN || + (dp->string != NULL && strcmp(dp->string, "\\^") == 0)) + continue; + + /* Determine the attribute values. */ + + if (dp->hspans > 0) { + (void)snprintf(hbuf, sizeof(hbuf), + "%d", dp->hspans + 1); + hspans = hbuf; + } else + hspans = NULL; + if (dp->vspans > 0) { + (void)snprintf(vbuf, sizeof(vbuf), + "%d", dp->vspans + 1); + vspans = vbuf; + } else + vspans = NULL; + + switch (cp->pos) { + case TBL_CELL_CENTRE: + halign = "center"; + break; + case TBL_CELL_RIGHT: + case TBL_CELL_NUMBER: + halign = "right"; + break; + default: + halign = NULL; + break; + } + if (cp->flags & TBL_CELL_TALIGN) + valign = "top"; + else if (cp->flags & TBL_CELL_BALIGN) + valign = "bottom"; + else + valign = NULL; + + for (i = dp->hspans; i > 0; i--) + cp = cp->next; + switch (cp->vert) { + case 2: + rborder = "double"; + break; + case 1: + rborder = "solid"; + break; + default: + rborder = NULL; + break; + } + + /* Print the element and the attributes. */ + + print_otag(h, TAG_TD, "??sss", + "colspan", hspans, "rowspan", vspans, + "vertical-align", valign, + "text-align", halign, + "border-right-style", rborder); + if (dp->string != NULL) + print_text(h, dp->string); + } + + print_tagq(h, tt); + + h->flags &= ~HTML_NONOSPACE; + + if (sp->next == NULL) { + assert(h->tbl.cols); + free(h->tbl.cols); + h->tbl.cols = NULL; + print_tblclose(h); + } +} diff --git a/usr.bin/mandoc/tbl_int.h b/usr.bin/mandoc/tbl_int.h new file mode 100644 index 0000000..299ceaa --- /dev/null +++ b/usr.bin/mandoc/tbl_int.h @@ -0,0 +1,47 @@ +/* $OpenBSD: tbl_int.h,v 1.2 2018/12/14 06:33:03 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011,2013,2015,2017,2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Internal interfaces of the tbl(7) parser. + * For use inside the tbl(7) parser only. + */ + +enum tbl_part { + TBL_PART_OPTS, /* In the first line, ends with semicolon. */ + TBL_PART_LAYOUT, /* In the layout section, ends with full stop. */ + TBL_PART_DATA, /* In the data section, ends with TE. */ + TBL_PART_CDATA /* In a T{ block, ends with T} */ +}; + +struct tbl_node { + struct tbl_opts opts; /* Options for the whole table. */ + struct tbl_node *next; /* Next table. */ + struct tbl_row *first_row; /* First layout row. */ + struct tbl_row *last_row; /* Last layout row. */ + struct tbl_span *first_span; /* First data row. */ + struct tbl_span *current_span; /* Data row being parsed. */ + struct tbl_span *last_span; /* Last data row. */ + int line; /* Line number in input file. */ + int pos; /* Column number in input file. */ + enum tbl_part part; /* Table section being parsed. */ +}; + + +void tbl_option(struct tbl_node *, int, const char *, int *); +void tbl_layout(struct tbl_node *, int, const char *, int); +void tbl_data(struct tbl_node *, int, const char *, int); +void tbl_cdata(struct tbl_node *, int, const char *, int); +void tbl_reset(struct tbl_node *); diff --git a/usr.bin/mandoc/tbl_layout.c b/usr.bin/mandoc/tbl_layout.c new file mode 100644 index 0000000..aae36d9 --- /dev/null +++ b/usr.bin/mandoc/tbl_layout.c @@ -0,0 +1,371 @@ +/* $OpenBSD: tbl_layout.c,v 1.35 2018/12/14 05:17:45 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2012, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "tbl.h" +#include "libmandoc.h" +#include "tbl_int.h" + +struct tbl_phrase { + char name; + enum tbl_cellt key; +}; + +static const struct tbl_phrase keys[] = { + { 'c', TBL_CELL_CENTRE }, + { 'r', TBL_CELL_RIGHT }, + { 'l', TBL_CELL_LEFT }, + { 'n', TBL_CELL_NUMBER }, + { 's', TBL_CELL_SPAN }, + { 'a', TBL_CELL_LONG }, + { '^', TBL_CELL_DOWN }, + { '-', TBL_CELL_HORIZ }, + { '_', TBL_CELL_HORIZ }, + { '=', TBL_CELL_DHORIZ } +}; + +#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0]))) + +static void mods(struct tbl_node *, struct tbl_cell *, + int, const char *, int *); +static void cell(struct tbl_node *, struct tbl_row *, + int, const char *, int *); +static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *, + enum tbl_cellt); + + +static void +mods(struct tbl_node *tbl, struct tbl_cell *cp, + int ln, const char *p, int *pos) +{ + char *endptr; + size_t sz; + +mod: + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; + + /* Row delimiters and cell specifiers end modifier lists. */ + + if (strchr(".,-=^_ACLNRSaclnrs", p[*pos]) != NULL) + return; + + /* Throw away parenthesised expression. */ + + if ('(' == p[*pos]) { + (*pos)++; + while (p[*pos] && ')' != p[*pos]) + (*pos)++; + if (')' == p[*pos]) { + (*pos)++; + goto mod; + } + mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, ln, *pos, NULL); + return; + } + + /* Parse numerical spacing from modifier string. */ + + if (isdigit((unsigned char)p[*pos])) { + cp->spacing = strtoull(p + *pos, &endptr, 10); + *pos = endptr - p; + goto mod; + } + + switch (tolower((unsigned char)p[(*pos)++])) { + case 'b': + cp->flags |= TBL_CELL_BOLD; + goto mod; + case 'd': + cp->flags |= TBL_CELL_BALIGN; + goto mod; + case 'e': + cp->flags |= TBL_CELL_EQUAL; + goto mod; + case 'f': + break; + case 'i': + cp->flags |= TBL_CELL_ITALIC; + goto mod; + case 'm': + mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, ln, *pos, "m"); + goto mod; + case 'p': + case 'v': + if (p[*pos] == '-' || p[*pos] == '+') + (*pos)++; + while (isdigit((unsigned char)p[*pos])) + (*pos)++; + goto mod; + case 't': + cp->flags |= TBL_CELL_TALIGN; + goto mod; + case 'u': + cp->flags |= TBL_CELL_UP; + goto mod; + case 'w': + sz = 0; + if (p[*pos] == '(') { + (*pos)++; + while (p[*pos + sz] != '\0' && p[*pos + sz] != ')') + sz++; + } else + while (isdigit((unsigned char)p[*pos + sz])) + sz++; + if (sz) { + free(cp->wstr); + cp->wstr = mandoc_strndup(p + *pos, sz); + *pos += sz; + if (p[*pos] == ')') + (*pos)++; + } + goto mod; + case 'x': + cp->flags |= TBL_CELL_WMAX; + goto mod; + case 'z': + cp->flags |= TBL_CELL_WIGN; + goto mod; + case '|': + if (cp->vert < 2) + cp->vert++; + else + mandoc_msg(MANDOCERR_TBLLAYOUT_VERT, + ln, *pos - 1, NULL); + goto mod; + default: + mandoc_msg(MANDOCERR_TBLLAYOUT_CHAR, + ln, *pos - 1, "%c", p[*pos - 1]); + goto mod; + } + + /* Ignore parenthised font names for now. */ + + if (p[*pos] == '(') + goto mod; + + /* Support only one-character font-names for now. */ + + if (p[*pos] == '\0' || (p[*pos + 1] != ' ' && p[*pos + 1] != '.')) { + mandoc_msg(MANDOCERR_FT_BAD, + ln, *pos, "TS %s", p + *pos - 1); + if (p[*pos] != '\0') + (*pos)++; + if (p[*pos] != '\0') + (*pos)++; + goto mod; + } + + switch (p[(*pos)++]) { + case '3': + case 'B': + cp->flags |= TBL_CELL_BOLD; + goto mod; + case '2': + case 'I': + cp->flags |= TBL_CELL_ITALIC; + goto mod; + case '1': + case 'R': + goto mod; + default: + mandoc_msg(MANDOCERR_FT_BAD, + ln, *pos - 1, "TS f%c", p[*pos - 1]); + goto mod; + } +} + +static void +cell(struct tbl_node *tbl, struct tbl_row *rp, + int ln, const char *p, int *pos) +{ + int i; + enum tbl_cellt c; + + /* Handle leading vertical lines */ + + while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') { + if (p[*pos] == '|') { + if (rp->vert < 2) + rp->vert++; + else + mandoc_msg(MANDOCERR_TBLLAYOUT_VERT, + ln, *pos, NULL); + } + (*pos)++; + } + +again: + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; + + if (p[*pos] == '.' || p[*pos] == '\0') + return; + + /* Parse the column position (`c', `l', `r', ...). */ + + for (i = 0; i < KEYS_MAX; i++) + if (tolower((unsigned char)p[*pos]) == keys[i].name) + break; + + if (i == KEYS_MAX) { + mandoc_msg(MANDOCERR_TBLLAYOUT_CHAR, + ln, *pos, "%c", p[*pos]); + (*pos)++; + goto again; + } + c = keys[i].key; + + /* Special cases of spanners. */ + + if (c == TBL_CELL_SPAN) { + if (rp->last == NULL) + mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN, ln, *pos, NULL); + else if (rp->last->pos == TBL_CELL_HORIZ || + rp->last->pos == TBL_CELL_DHORIZ) + c = rp->last->pos; + } else if (c == TBL_CELL_DOWN && rp == tbl->first_row) + mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN, ln, *pos, NULL); + + (*pos)++; + + /* Allocate cell then parse its modifiers. */ + + mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos); +} + +void +tbl_layout(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + struct tbl_row *rp; + + rp = NULL; + for (;;) { + /* Skip whitespace before and after each cell. */ + + while (p[pos] == ' ' || p[pos] == '\t') + pos++; + + switch (p[pos]) { + case ',': /* Next row on this input line. */ + pos++; + rp = NULL; + continue; + case '\0': /* Next row on next input line. */ + return; + case '.': /* End of layout. */ + pos++; + tbl->part = TBL_PART_DATA; + + /* + * When the layout is completely empty, + * default to one left-justified column. + */ + + if (tbl->first_row == NULL) { + tbl->first_row = tbl->last_row = + mandoc_calloc(1, sizeof(*rp)); + } + if (tbl->first_row->first == NULL) { + mandoc_msg(MANDOCERR_TBLLAYOUT_NONE, + ln, pos, NULL); + cell_alloc(tbl, tbl->first_row, + TBL_CELL_LEFT); + if (tbl->opts.lvert < tbl->first_row->vert) + tbl->opts.lvert = tbl->first_row->vert; + return; + } + + /* + * Search for the widest line + * along the left and right margins. + */ + + for (rp = tbl->first_row; rp; rp = rp->next) { + if (tbl->opts.lvert < rp->vert) + tbl->opts.lvert = rp->vert; + if (rp->last != NULL && + rp->last->col + 1 == tbl->opts.cols && + tbl->opts.rvert < rp->last->vert) + tbl->opts.rvert = rp->last->vert; + + /* If the last line is empty, drop it. */ + + if (rp->next != NULL && + rp->next->first == NULL) { + free(rp->next); + rp->next = NULL; + tbl->last_row = rp; + } + } + return; + default: /* Cell. */ + break; + } + + /* + * If the last line had at least one cell, + * start a new one; otherwise, continue it. + */ + + if (rp == NULL) { + if (tbl->last_row == NULL || + tbl->last_row->first != NULL) { + rp = mandoc_calloc(1, sizeof(*rp)); + if (tbl->last_row) + tbl->last_row->next = rp; + else + tbl->first_row = rp; + tbl->last_row = rp; + } else + rp = tbl->last_row; + } + cell(tbl, rp, ln, p, &pos); + } +} + +static struct tbl_cell * +cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos) +{ + struct tbl_cell *p, *pp; + + p = mandoc_calloc(1, sizeof(*p)); + p->spacing = SIZE_MAX; + p->pos = pos; + + if ((pp = rp->last) != NULL) { + pp->next = p; + p->col = pp->col + 1; + } else + rp->first = p; + rp->last = p; + + if (tbl->opts.cols <= p->col) + tbl->opts.cols = p->col + 1; + + return p; +} diff --git a/usr.bin/mandoc/tbl_opts.c b/usr.bin/mandoc/tbl_opts.c new file mode 100644 index 0000000..8f1e77c --- /dev/null +++ b/usr.bin/mandoc/tbl_opts.c @@ -0,0 +1,171 @@ +/* $OpenBSD: tbl_opts.c,v 1.16 2018/12/14 05:17:45 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "tbl.h" +#include "libmandoc.h" +#include "tbl_int.h" + +#define KEY_DPOINT 0 +#define KEY_DELIM 1 +#define KEY_LINESIZE 2 +#define KEY_TAB 3 + +struct tbl_phrase { + const char *name; + int key; +}; + +static const struct tbl_phrase keys[] = { + {"decimalpoint", 0}, + {"delim", 0}, + {"linesize", 0}, + {"tab", 0}, + {"allbox", TBL_OPT_ALLBOX | TBL_OPT_BOX}, + {"box", TBL_OPT_BOX}, + {"frame", TBL_OPT_BOX}, + {"center", TBL_OPT_CENTRE}, + {"centre", TBL_OPT_CENTRE}, + {"doublebox", TBL_OPT_DBOX}, + {"doubleframe", TBL_OPT_DBOX}, + {"expand", TBL_OPT_EXPAND}, + {"nokeep", TBL_OPT_NOKEEP}, + {"nospaces", TBL_OPT_NOSPACE}, + {"nowarn", TBL_OPT_NOWARN}, +}; + +#define KEY_MAXKEYS ((int)(sizeof(keys)/sizeof(keys[0]))) + +static void arg(struct tbl_node *, int, const char *, int *, int); + + +static void +arg(struct tbl_node *tbl, int ln, const char *p, int *pos, int key) +{ + int len, want; + + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; + + /* Arguments are enclosed in parentheses. */ + + len = 0; + if (p[*pos] == '(') { + (*pos)++; + while (p[*pos + len] != ')') + len++; + } + + switch (key) { + case KEY_DELIM: + mandoc_msg(MANDOCERR_TBLOPT_EQN, + ln, *pos, "%.*s", len, p + *pos); + want = 2; + break; + case KEY_TAB: + want = 1; + if (len == want) + tbl->opts.tab = p[*pos]; + break; + case KEY_LINESIZE: + want = 0; + break; + case KEY_DPOINT: + want = 1; + if (len == want) + tbl->opts.decimal = p[*pos]; + break; + default: + abort(); + } + + if (len == 0) + mandoc_msg(MANDOCERR_TBLOPT_NOARG, ln, *pos, + "%s", keys[key].name); + else if (want && len != want) + mandoc_msg(MANDOCERR_TBLOPT_ARGSZ, ln, *pos, + "%s want %d have %d", keys[key].name, want, len); + + *pos += len; + if (p[*pos] == ')') + (*pos)++; +} + +/* + * Parse one line of options up to the semicolon. + * Each option can be preceded by blanks and/or commas, + * and some options are followed by arguments. + */ +void +tbl_option(struct tbl_node *tbl, int ln, const char *p, int *offs) +{ + int i, pos, len; + + pos = *offs; + for (;;) { + while (p[pos] == ' ' || p[pos] == '\t' || p[pos] == ',') + pos++; + + if (p[pos] == ';') { + *offs = pos + 1; + return; + } + + /* Parse one option name. */ + + len = 0; + while (isalpha((unsigned char)p[pos + len])) + len++; + + if (len == 0) { + mandoc_msg(MANDOCERR_TBLOPT_ALPHA, + ln, pos, "%c", p[pos]); + pos++; + continue; + } + + /* Look up the option name. */ + + i = 0; + while (i < KEY_MAXKEYS && + (strncasecmp(p + pos, keys[i].name, len) || + keys[i].name[len] != '\0')) + i++; + + if (i == KEY_MAXKEYS) { + mandoc_msg(MANDOCERR_TBLOPT_BAD, + ln, pos, "%.*s", len, p + pos); + pos += len; + continue; + } + + /* Handle the option. */ + + pos += len; + if (keys[i].key) + tbl->opts.opts |= keys[i].key; + else + arg(tbl, ln, p, &pos, i); + } +} diff --git a/usr.bin/mandoc/tbl_parse.h b/usr.bin/mandoc/tbl_parse.h new file mode 100644 index 0000000..b564490 --- /dev/null +++ b/usr.bin/mandoc/tbl_parse.h @@ -0,0 +1,30 @@ +/* $OpenBSD: tbl_parse.h,v 1.2 2018/12/14 06:33:03 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * External interface of the tbl(7) parser. + * For use in the roff(7) and tbl(7) parsers only. + */ + +struct tbl_node; +struct tbl_span; + +struct tbl_node *tbl_alloc(int, int, struct tbl_node *); +int tbl_end(struct tbl_node *, int); +void tbl_free(struct tbl_node *); +void tbl_read(struct tbl_node *, int, const char *, int); +void tbl_restart(int, int, struct tbl_node *); +struct tbl_span *tbl_span(struct tbl_node *); diff --git a/usr.bin/mandoc/tbl_term.c b/usr.bin/mandoc/tbl_term.c new file mode 100644 index 0000000..238bf7a --- /dev/null +++ b/usr.bin/mandoc/tbl_term.c @@ -0,0 +1,943 @@ +/* $OpenBSD: tbl_term.c,v 1.61 2020/01/11 16:24:33 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "tbl.h" +#include "out.h" +#include "term.h" + +#define IS_HORIZ(cp) ((cp)->pos == TBL_CELL_HORIZ || \ + (cp)->pos == TBL_CELL_DHORIZ) + + +static size_t term_tbl_len(size_t, void *); +static size_t term_tbl_strlen(const char *, void *); +static size_t term_tbl_sulen(const struct roffsu *, void *); +static void tbl_data(struct termp *, const struct tbl_opts *, + const struct tbl_cell *, + const struct tbl_dat *, + const struct roffcol *); +static void tbl_direct_border(struct termp *, int, size_t); +static void tbl_fill_border(struct termp *, int, size_t); +static void tbl_fill_char(struct termp *, char, size_t); +static void tbl_fill_string(struct termp *, const char *, size_t); +static void tbl_hrule(struct termp *, const struct tbl_span *, + const struct tbl_span *, const struct tbl_span *, + int); +static void tbl_literal(struct termp *, const struct tbl_dat *, + const struct roffcol *); +static void tbl_number(struct termp *, const struct tbl_opts *, + const struct tbl_dat *, + const struct roffcol *); +static void tbl_word(struct termp *, const struct tbl_dat *); + + +/* + * The following border-character tables are indexed + * by ternary (3-based) numbers, as opposed to binary or decimal. + * Each ternary digit describes the line width in one direction: + * 0 means no line, 1 single or light line, 2 double or heavy line. + */ + +/* Positional values of the four directions. */ +#define BRIGHT 1 +#define BDOWN 3 +#define BLEFT (3 * 3) +#define BUP (3 * 3 * 3) +#define BHORIZ (BLEFT + BRIGHT) + +/* Code points to use for each combination of widths. */ +static const int borders_utf8[81] = { + 0x0020, 0x2576, 0x257a, /* 000 right */ + 0x2577, 0x250c, 0x250d, /* 001 down */ + 0x257b, 0x250e, 0x250f, /* 002 */ + 0x2574, 0x2500, 0x257c, /* 010 left */ + 0x2510, 0x252c, 0x252e, /* 011 left down */ + 0x2512, 0x2530, 0x2532, /* 012 */ + 0x2578, 0x257e, 0x2501, /* 020 left */ + 0x2511, 0x252d, 0x252f, /* 021 left down */ + 0x2513, 0x2531, 0x2533, /* 022 */ + 0x2575, 0x2514, 0x2515, /* 100 up */ + 0x2502, 0x251c, 0x251d, /* 101 up down */ + 0x257d, 0x251f, 0x2522, /* 102 */ + 0x2518, 0x2534, 0x2536, /* 110 up left */ + 0x2524, 0x253c, 0x253e, /* 111 all */ + 0x2527, 0x2541, 0x2546, /* 112 */ + 0x2519, 0x2535, 0x2537, /* 120 up left */ + 0x2525, 0x253d, 0x253f, /* 121 all */ + 0x252a, 0x2545, 0x2548, /* 122 */ + 0x2579, 0x2516, 0x2517, /* 200 up */ + 0x257f, 0x251e, 0x2521, /* 201 up down */ + 0x2503, 0x2520, 0x2523, /* 202 */ + 0x251a, 0x2538, 0x253a, /* 210 up left */ + 0x2526, 0x2540, 0x2544, /* 211 all */ + 0x2528, 0x2542, 0x254a, /* 212 */ + 0x251b, 0x2539, 0x253b, /* 220 up left */ + 0x2529, 0x2543, 0x2547, /* 221 all */ + 0x252b, 0x2549, 0x254b, /* 222 */ +}; + +/* ASCII approximations for these code points, compatible with groff. */ +static const int borders_ascii[81] = { + ' ', '-', '=', /* 000 right */ + '|', '+', '+', /* 001 down */ + '|', '+', '+', /* 002 */ + '-', '-', '=', /* 010 left */ + '+', '+', '+', /* 011 left down */ + '+', '+', '+', /* 012 */ + '=', '=', '=', /* 020 left */ + '+', '+', '+', /* 021 left down */ + '+', '+', '+', /* 022 */ + '|', '+', '+', /* 100 up */ + '|', '+', '+', /* 101 up down */ + '|', '+', '+', /* 102 */ + '+', '+', '+', /* 110 up left */ + '+', '+', '+', /* 111 all */ + '+', '+', '+', /* 112 */ + '+', '+', '+', /* 120 up left */ + '+', '+', '+', /* 121 all */ + '+', '+', '+', /* 122 */ + '|', '+', '+', /* 200 up */ + '|', '+', '+', /* 201 up down */ + '|', '+', '+', /* 202 */ + '+', '+', '+', /* 210 up left */ + '+', '+', '+', /* 211 all */ + '+', '+', '+', /* 212 */ + '+', '+', '+', /* 220 up left */ + '+', '+', '+', /* 221 all */ + '+', '+', '+', /* 222 */ +}; + +/* Either of the above according to the selected output encoding. */ +static const int *borders_locale; + + +static size_t +term_tbl_sulen(const struct roffsu *su, void *arg) +{ + int i; + + i = term_hen((const struct termp *)arg, su); + return i > 0 ? i : 0; +} + +static size_t +term_tbl_strlen(const char *p, void *arg) +{ + return term_strlen((const struct termp *)arg, p); +} + +static size_t +term_tbl_len(size_t sz, void *arg) +{ + return term_len((const struct termp *)arg, sz); +} + + +void +term_tbl(struct termp *tp, const struct tbl_span *sp) +{ + const struct tbl_cell *cp, *cpn, *cpp, *cps; + const struct tbl_dat *dp; + static size_t offset; + size_t save_offset; + size_t coloff, tsz; + int hspans, ic, more; + int dvert, fc, horiz, lhori, rhori, uvert; + + /* Inhibit printing of spaces: we do padding ourselves. */ + + tp->flags |= TERMP_NOSPACE | TERMP_NONOSPACE; + save_offset = tp->tcol->offset; + + /* + * The first time we're invoked for a given table block, + * calculate the table widths and decimal positions. + */ + + if (tp->tbl.cols == NULL) { + borders_locale = tp->enc == TERMENC_UTF8 ? + borders_utf8 : borders_ascii; + + tp->tbl.len = term_tbl_len; + tp->tbl.slen = term_tbl_strlen; + tp->tbl.sulen = term_tbl_sulen; + tp->tbl.arg = tp; + + tblcalc(&tp->tbl, sp, tp->tcol->offset, tp->tcol->rmargin); + + /* Tables leak .ta settings to subsequent text. */ + + term_tab_set(tp, NULL); + coloff = sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) || + sp->opts->lvert; + for (ic = 0; ic < sp->opts->cols; ic++) { + coloff += tp->tbl.cols[ic].width; + term_tab_iset(coloff); + coloff += tp->tbl.cols[ic].spacing; + } + + /* Center the table as a whole. */ + + offset = tp->tcol->offset; + if (sp->opts->opts & TBL_OPT_CENTRE) { + tsz = sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) + ? 2 : !!sp->opts->lvert + !!sp->opts->rvert; + for (ic = 0; ic + 1 < sp->opts->cols; ic++) + tsz += tp->tbl.cols[ic].width + + tp->tbl.cols[ic].spacing; + if (sp->opts->cols) + tsz += tp->tbl.cols[sp->opts->cols - 1].width; + if (offset + tsz > tp->tcol->rmargin) + tsz -= 1; + offset = offset + tp->tcol->rmargin > tsz ? + (offset + tp->tcol->rmargin - tsz) / 2 : 0; + tp->tcol->offset = offset; + } + + /* Horizontal frame at the start of boxed tables. */ + + if (tp->enc == TERMENC_ASCII && + sp->opts->opts & TBL_OPT_DBOX) + tbl_hrule(tp, NULL, sp, sp, TBL_OPT_DBOX); + if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) + tbl_hrule(tp, NULL, sp, sp, TBL_OPT_BOX); + } + + /* Set up the columns. */ + + tp->flags |= TERMP_MULTICOL; + tp->tcol->offset = offset; + horiz = 0; + switch (sp->pos) { + case TBL_SPAN_HORIZ: + case TBL_SPAN_DHORIZ: + horiz = 1; + term_setcol(tp, 1); + break; + case TBL_SPAN_DATA: + term_setcol(tp, sp->opts->cols + 2); + coloff = tp->tcol->offset; + + /* Set up a column for a left vertical frame. */ + + if (sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) || + sp->opts->lvert) + coloff++; + tp->tcol->rmargin = coloff; + + /* Set up the data columns. */ + + dp = sp->first; + hspans = 0; + for (ic = 0; ic < sp->opts->cols; ic++) { + if (hspans == 0) { + tp->tcol++; + tp->tcol->offset = coloff; + } + coloff += tp->tbl.cols[ic].width; + tp->tcol->rmargin = coloff; + if (ic + 1 < sp->opts->cols) + coloff += tp->tbl.cols[ic].spacing; + if (hspans) { + hspans--; + continue; + } + if (dp != NULL && + (ic || sp->layout->first->pos != TBL_CELL_SPAN)) { + hspans = dp->hspans; + dp = dp->next; + } + } + + /* Set up a column for a right vertical frame. */ + + tp->tcol++; + tp->tcol->offset = coloff + 1; + tp->tcol->rmargin = tp->maxrmargin; + + /* Spans may have reduced the number of columns. */ + + tp->lasttcol = tp->tcol - tp->tcols; + + /* Fill the buffers for all data columns. */ + + tp->tcol = tp->tcols; + cp = cpn = sp->layout->first; + dp = sp->first; + hspans = 0; + for (ic = 0; ic < sp->opts->cols; ic++) { + if (cpn != NULL) { + cp = cpn; + cpn = cpn->next; + } + if (hspans) { + hspans--; + continue; + } + tp->tcol++; + tp->col = 0; + tbl_data(tp, sp->opts, cp, dp, tp->tbl.cols + ic); + if (dp != NULL && + (ic || sp->layout->first->pos != TBL_CELL_SPAN)) { + hspans = dp->hspans; + dp = dp->next; + } + } + break; + } + + do { + /* Print the vertical frame at the start of each row. */ + + tp->tcol = tp->tcols; + uvert = dvert = sp->opts->opts & TBL_OPT_DBOX ? 2 : + sp->opts->opts & TBL_OPT_BOX ? 1 : 0; + if (sp->pos == TBL_SPAN_DATA && uvert < sp->layout->vert) + uvert = dvert = sp->layout->vert; + if (sp->next != NULL && sp->next->pos == TBL_SPAN_DATA && + dvert < sp->next->layout->vert) + dvert = sp->next->layout->vert; + if (sp->prev != NULL && uvert < sp->prev->layout->vert && + (horiz || (IS_HORIZ(sp->layout->first) && + !IS_HORIZ(sp->prev->layout->first)))) + uvert = sp->prev->layout->vert; + rhori = sp->pos == TBL_SPAN_DHORIZ || + (sp->first != NULL && sp->first->pos == TBL_DATA_DHORIZ) || + sp->layout->first->pos == TBL_CELL_DHORIZ ? 2 : + sp->pos == TBL_SPAN_HORIZ || + (sp->first != NULL && sp->first->pos == TBL_DATA_HORIZ) || + sp->layout->first->pos == TBL_CELL_HORIZ ? 1 : 0; + fc = BUP * uvert + BDOWN * dvert + BRIGHT * rhori; + if (uvert > 0 || dvert > 0 || (horiz && sp->opts->lvert)) { + (*tp->advance)(tp, tp->tcols->offset); + tp->viscol = tp->tcol->offset; + tbl_direct_border(tp, fc, 1); + } + + /* Print the data cells. */ + + more = 0; + if (horiz) + tbl_hrule(tp, sp->prev, sp, sp->next, 0); + else { + cp = sp->layout->first; + cpn = sp->next == NULL ? NULL : + sp->next->layout->first; + cpp = sp->prev == NULL ? NULL : + sp->prev->layout->first; + dp = sp->first; + hspans = 0; + for (ic = 0; ic < sp->opts->cols; ic++) { + + /* + * Figure out whether to print a + * vertical line after this cell + * and advance to next layout cell. + */ + + uvert = dvert = fc = 0; + if (cp != NULL) { + cps = cp; + while (cps->next != NULL && + cps->next->pos == TBL_CELL_SPAN) + cps = cps->next; + if (sp->pos == TBL_SPAN_DATA) + uvert = dvert = cps->vert; + switch (cp->pos) { + case TBL_CELL_HORIZ: + fc = BHORIZ; + break; + case TBL_CELL_DHORIZ: + fc = BHORIZ * 2; + break; + default: + break; + } + } + if (cpp != NULL) { + if (uvert < cpp->vert && + cp != NULL && + ((IS_HORIZ(cp) && + !IS_HORIZ(cpp)) || + (cp->next != NULL && + cpp->next != NULL && + IS_HORIZ(cp->next) && + !IS_HORIZ(cpp->next)))) + uvert = cpp->vert; + cpp = cpp->next; + } + if (sp->opts->opts & TBL_OPT_ALLBOX) { + if (uvert == 0) + uvert = 1; + if (dvert == 0) + dvert = 1; + } + if (cpn != NULL) { + if (dvert == 0 || + (dvert < cpn->vert && + tp->enc == TERMENC_UTF8)) + dvert = cpn->vert; + cpn = cpn->next; + } + + lhori = (cp != NULL && + cp->pos == TBL_CELL_DHORIZ) || + (dp != NULL && + dp->pos == TBL_DATA_DHORIZ) ? 2 : + (cp != NULL && + cp->pos == TBL_CELL_HORIZ) || + (dp != NULL && + dp->pos == TBL_DATA_HORIZ) ? 1 : 0; + + /* + * Skip later cells in a span, + * figure out whether to start a span, + * and advance to next data cell. + */ + + if (hspans) { + hspans--; + cp = cp->next; + continue; + } + if (dp != NULL && (ic || + sp->layout->first->pos != TBL_CELL_SPAN)) { + hspans = dp->hspans; + dp = dp->next; + } + + /* + * Print one line of text in the cell + * and remember whether there is more. + */ + + tp->tcol++; + if (tp->tcol->col < tp->tcol->lastcol) + term_flushln(tp); + if (tp->tcol->col < tp->tcol->lastcol) + more = 1; + + /* + * Vertical frames between data cells, + * but not after the last column. + */ + + if (fc == 0 && + ((uvert == 0 && dvert == 0 && + cp != NULL && (cp->next == NULL || + !IS_HORIZ(cp->next))) || + tp->tcol + 1 == + tp->tcols + tp->lasttcol)) { + if (cp != NULL) + cp = cp->next; + continue; + } + + if (tp->viscol < tp->tcol->rmargin) { + (*tp->advance)(tp, tp->tcol->rmargin + - tp->viscol); + tp->viscol = tp->tcol->rmargin; + } + while (tp->viscol < tp->tcol->rmargin + + tp->tbl.cols[ic].spacing / 2) + tbl_direct_border(tp, + BHORIZ * lhori, 1); + + if (tp->tcol + 1 == tp->tcols + tp->lasttcol) + continue; + + if (cp != NULL) + cp = cp->next; + + rhori = (cp != NULL && + cp->pos == TBL_CELL_DHORIZ) || + (dp != NULL && + dp->pos == TBL_DATA_DHORIZ) ? 2 : + (cp != NULL && + cp->pos == TBL_CELL_HORIZ) || + (dp != NULL && + dp->pos == TBL_DATA_HORIZ) ? 1 : 0; + + if (tp->tbl.cols[ic].spacing) + tbl_direct_border(tp, + BLEFT * lhori + BRIGHT * rhori + + BUP * uvert + BDOWN * dvert, 1); + + if (tp->enc == TERMENC_UTF8) + uvert = dvert = 0; + + if (tp->tbl.cols[ic].spacing > 2 && + (uvert > 1 || dvert > 1 || rhori)) + tbl_direct_border(tp, + BHORIZ * rhori + + BUP * (uvert > 1) + + BDOWN * (dvert > 1), 1); + } + } + + /* Print the vertical frame at the end of each row. */ + + uvert = dvert = sp->opts->opts & TBL_OPT_DBOX ? 2 : + sp->opts->opts & TBL_OPT_BOX ? 1 : 0; + if (sp->pos == TBL_SPAN_DATA && + uvert < sp->layout->last->vert && + sp->layout->last->col + 1 == sp->opts->cols) + uvert = dvert = sp->layout->last->vert; + if (sp->next != NULL && + dvert < sp->next->layout->last->vert && + sp->next->layout->last->col + 1 == sp->opts->cols) + dvert = sp->next->layout->last->vert; + if (sp->prev != NULL && + uvert < sp->prev->layout->last->vert && + sp->prev->layout->last->col + 1 == sp->opts->cols && + (horiz || (IS_HORIZ(sp->layout->last) && + !IS_HORIZ(sp->prev->layout->last)))) + uvert = sp->prev->layout->last->vert; + lhori = sp->pos == TBL_SPAN_DHORIZ || + (sp->last != NULL && + sp->last->pos == TBL_DATA_DHORIZ && + sp->last->layout->col + 1 == sp->opts->cols) || + (sp->layout->last->pos == TBL_CELL_DHORIZ && + sp->layout->last->col + 1 == sp->opts->cols) ? 2 : + sp->pos == TBL_SPAN_HORIZ || + (sp->last != NULL && + sp->last->pos == TBL_DATA_HORIZ && + sp->last->layout->col + 1 == sp->opts->cols) || + (sp->layout->last->pos == TBL_CELL_HORIZ && + sp->layout->last->col + 1 == sp->opts->cols) ? 1 : 0; + fc = BUP * uvert + BDOWN * dvert + BLEFT * lhori; + if (uvert > 0 || dvert > 0 || (horiz && sp->opts->rvert)) { + if (horiz == 0 && (IS_HORIZ(sp->layout->last) == 0 || + sp->layout->last->col + 1 < sp->opts->cols)) { + tp->tcol++; + do { + tbl_direct_border(tp, + BHORIZ * lhori, 1); + } while (tp->viscol < tp->tcol->offset); + } + tbl_direct_border(tp, fc, 1); + } + (*tp->endline)(tp); + tp->viscol = 0; + } while (more); + + /* + * Clean up after this row. If it is the last line + * of the table, print the box line and clean up + * column data; otherwise, print the allbox line. + */ + + term_setcol(tp, 1); + tp->flags &= ~TERMP_MULTICOL; + tp->tcol->rmargin = tp->maxrmargin; + if (sp->next == NULL) { + if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) { + tbl_hrule(tp, sp, sp, NULL, TBL_OPT_BOX); + tp->skipvsp = 1; + } + if (tp->enc == TERMENC_ASCII && + sp->opts->opts & TBL_OPT_DBOX) { + tbl_hrule(tp, sp, sp, NULL, TBL_OPT_DBOX); + tp->skipvsp = 2; + } + assert(tp->tbl.cols); + free(tp->tbl.cols); + tp->tbl.cols = NULL; + } else if (horiz == 0 && sp->opts->opts & TBL_OPT_ALLBOX && + (sp->next == NULL || sp->next->pos == TBL_SPAN_DATA || + sp->next->next != NULL)) + tbl_hrule(tp, sp, sp, sp->next, TBL_OPT_ALLBOX); + + tp->tcol->offset = save_offset; + tp->flags &= ~TERMP_NONOSPACE; +} + +static void +tbl_hrule(struct termp *tp, const struct tbl_span *spp, + const struct tbl_span *sp, const struct tbl_span *spn, int flags) +{ + const struct tbl_cell *cpp; /* Layout cell above this line. */ + const struct tbl_cell *cp; /* Layout cell in this line. */ + const struct tbl_cell *cpn; /* Layout cell below this line. */ + const struct tbl_dat *dpn; /* Data cell below this line. */ + const struct roffcol *col; /* Contains width and spacing. */ + int opts; /* For the table as a whole. */ + int bw; /* Box line width. */ + int hw; /* Horizontal line width. */ + int lw, rw; /* Left and right line widths. */ + int uw, dw; /* Vertical line widths. */ + + cpp = spp == NULL ? NULL : spp->layout->first; + cp = sp == NULL ? NULL : sp->layout->first; + cpn = spn == NULL ? NULL : spn->layout->first; + dpn = NULL; + if (spn != NULL) { + if (spn->pos == TBL_SPAN_DATA) + dpn = spn->first; + else if (spn->next != NULL) + dpn = spn->next->first; + } + opts = sp->opts->opts; + bw = opts & TBL_OPT_DBOX ? (tp->enc == TERMENC_UTF8 ? 2 : 1) : + opts & (TBL_OPT_BOX | TBL_OPT_ALLBOX) ? 1 : 0; + hw = flags == TBL_OPT_DBOX || flags == TBL_OPT_BOX ? bw : + sp->pos == TBL_SPAN_DHORIZ ? 2 : 1; + + /* Print the left end of the line. */ + + if (tp->viscol == 0) { + (*tp->advance)(tp, tp->tcols->offset); + tp->viscol = tp->tcols->offset; + } + if (flags != 0) + tbl_direct_border(tp, + (spp == NULL ? 0 : BUP * bw) + + (spn == NULL ? 0 : BDOWN * bw) + + (spp == NULL || cpn == NULL || + cpn->pos != TBL_CELL_DOWN ? BRIGHT * hw : 0), 1); + + col = tp->tbl.cols; + for (;;) { + if (cp == NULL) + col++; + else + col = tp->tbl.cols + cp->col; + + /* Print the horizontal line inside this column. */ + + lw = cpp == NULL || cpn == NULL || + (cpn->pos != TBL_CELL_DOWN && + (dpn == NULL || dpn->string == NULL || + strcmp(dpn->string, "\\^") != 0)) + ? hw : 0; + tbl_direct_border(tp, BHORIZ * lw, + col->width + col->spacing / 2); + + /* + * Figure out whether a vertical line is crossing + * at the end of this column, + * and advance to the next column. + */ + + uw = dw = 0; + if (cpp != NULL) { + if (flags != TBL_OPT_DBOX) { + uw = cpp->vert; + if (uw == 0 && opts & TBL_OPT_ALLBOX) + uw = 1; + } + cpp = cpp->next; + } else if (spp != NULL && opts & TBL_OPT_ALLBOX) + uw = 1; + if (cp != NULL) + cp = cp->next; + if (cpn != NULL) { + if (flags != TBL_OPT_DBOX) { + dw = cpn->vert; + if (dw == 0 && opts & TBL_OPT_ALLBOX) + dw = 1; + } + cpn = cpn->next; + while (dpn != NULL && dpn->layout != cpn) + dpn = dpn->next; + } else if (spn != NULL && opts & TBL_OPT_ALLBOX) + dw = 1; + if (col + 1 == tp->tbl.cols + sp->opts->cols) + break; + + /* Vertical lines do not cross spanned cells. */ + + if (cpp != NULL && cpp->pos == TBL_CELL_SPAN) + uw = 0; + if (cpn != NULL && cpn->pos == TBL_CELL_SPAN) + dw = 0; + + /* The horizontal line inside the next column. */ + + rw = cpp == NULL || cpn == NULL || + (cpn->pos != TBL_CELL_DOWN && + (dpn == NULL || dpn->string == NULL || + strcmp(dpn->string, "\\^") != 0)) + ? hw : 0; + + /* The line crossing at the end of this column. */ + + if (col->spacing) + tbl_direct_border(tp, BLEFT * lw + + BRIGHT * rw + BUP * uw + BDOWN * dw, 1); + + /* + * In ASCII output, a crossing may print two characters. + */ + + if (tp->enc != TERMENC_ASCII || (uw < 2 && dw < 2)) + uw = dw = 0; + if (col->spacing > 2) + tbl_direct_border(tp, + BHORIZ * rw + BUP * uw + BDOWN * dw, 1); + + /* Padding before the start of the next column. */ + + if (col->spacing > 4) + tbl_direct_border(tp, + BHORIZ * rw, (col->spacing - 3) / 2); + } + + /* Print the right end of the line. */ + + if (flags != 0) { + tbl_direct_border(tp, + (spp == NULL ? 0 : BUP * bw) + + (spn == NULL ? 0 : BDOWN * bw) + + (spp == NULL || spn == NULL || + spn->layout->last->pos != TBL_CELL_DOWN ? + BLEFT * hw : 0), 1); + (*tp->endline)(tp); + tp->viscol = 0; + } +} + +static void +tbl_data(struct termp *tp, const struct tbl_opts *opts, + const struct tbl_cell *cp, const struct tbl_dat *dp, + const struct roffcol *col) +{ + switch (cp->pos) { + case TBL_CELL_HORIZ: + tbl_fill_border(tp, BHORIZ, col->width); + return; + case TBL_CELL_DHORIZ: + tbl_fill_border(tp, BHORIZ * 2, col->width); + return; + default: + break; + } + + if (dp == NULL) + return; + + switch (dp->pos) { + case TBL_DATA_NONE: + return; + case TBL_DATA_HORIZ: + case TBL_DATA_NHORIZ: + tbl_fill_border(tp, BHORIZ, col->width); + return; + case TBL_DATA_NDHORIZ: + case TBL_DATA_DHORIZ: + tbl_fill_border(tp, BHORIZ * 2, col->width); + return; + default: + break; + } + + switch (cp->pos) { + case TBL_CELL_LONG: + case TBL_CELL_CENTRE: + case TBL_CELL_LEFT: + case TBL_CELL_RIGHT: + tbl_literal(tp, dp, col); + break; + case TBL_CELL_NUMBER: + tbl_number(tp, opts, dp, col); + break; + case TBL_CELL_DOWN: + case TBL_CELL_SPAN: + break; + default: + abort(); + } +} + +static void +tbl_fill_string(struct termp *tp, const char *cp, size_t len) +{ + size_t i, sz; + + sz = term_strlen(tp, cp); + for (i = 0; i < len; i += sz) + term_word(tp, cp); +} + +static void +tbl_fill_char(struct termp *tp, char c, size_t len) +{ + char cp[2]; + + cp[0] = c; + cp[1] = '\0'; + tbl_fill_string(tp, cp, len); +} + +static void +tbl_fill_border(struct termp *tp, int c, size_t len) +{ + char buf[12]; + + if ((c = borders_locale[c]) > 127) { + (void)snprintf(buf, sizeof(buf), "\\[u%04x]", c); + tbl_fill_string(tp, buf, len); + } else + tbl_fill_char(tp, c, len); +} + +static void +tbl_direct_border(struct termp *tp, int c, size_t len) +{ + size_t i, sz; + + c = borders_locale[c]; + sz = (*tp->width)(tp, c); + for (i = 0; i < len; i += sz) { + (*tp->letter)(tp, c); + tp->viscol += sz; + } +} + +static void +tbl_literal(struct termp *tp, const struct tbl_dat *dp, + const struct roffcol *col) +{ + size_t len, padl, padr, width; + int ic, hspans; + + assert(dp->string); + len = term_strlen(tp, dp->string); + width = col->width; + ic = dp->layout->col; + hspans = dp->hspans; + while (hspans--) + width += tp->tbl.cols[++ic].width + 3; + + padr = width > len ? width - len : 0; + padl = 0; + + switch (dp->layout->pos) { + case TBL_CELL_LONG: + padl = term_len(tp, 1); + padr = padr > padl ? padr - padl : 0; + break; + case TBL_CELL_CENTRE: + if (2 > padr) + break; + padl = padr / 2; + padr -= padl; + break; + case TBL_CELL_RIGHT: + padl = padr; + padr = 0; + break; + default: + break; + } + + tbl_fill_char(tp, ASCII_NBRSP, padl); + tbl_word(tp, dp); + tbl_fill_char(tp, ASCII_NBRSP, padr); +} + +static void +tbl_number(struct termp *tp, const struct tbl_opts *opts, + const struct tbl_dat *dp, + const struct roffcol *col) +{ + const char *cp, *lastdigit, *lastpoint; + size_t intsz, padl, totsz; + char buf[2]; + + /* + * Almost the same code as in tblcalc_number(): + * First find the position of the decimal point. + */ + + assert(dp->string); + lastdigit = lastpoint = NULL; + for (cp = dp->string; cp[0] != '\0'; cp++) { + if (cp[0] == '\\' && cp[1] == '&') { + lastdigit = lastpoint = cp; + break; + } else if (cp[0] == opts->decimal && + (isdigit((unsigned char)cp[1]) || + (cp > dp->string && isdigit((unsigned char)cp[-1])))) + lastpoint = cp; + else if (isdigit((unsigned char)cp[0])) + lastdigit = cp; + } + + /* Then measure both widths. */ + + padl = 0; + totsz = term_strlen(tp, dp->string); + if (lastdigit != NULL) { + if (lastpoint == NULL) + lastpoint = lastdigit + 1; + intsz = 0; + buf[1] = '\0'; + for (cp = dp->string; cp < lastpoint; cp++) { + buf[0] = cp[0]; + intsz += term_strlen(tp, buf); + } + + /* + * Pad left to match the decimal position, + * but avoid exceeding the total column width. + */ + + if (col->decimal > intsz && col->width > totsz) { + padl = col->decimal - intsz; + if (padl + totsz > col->width) + padl = col->width - totsz; + } + + /* If it is not a number, simply center the string. */ + + } else if (col->width > totsz) + padl = (col->width - totsz) / 2; + + tbl_fill_char(tp, ASCII_NBRSP, padl); + tbl_word(tp, dp); + + /* Pad right to fill the column. */ + + if (col->width > padl + totsz) + tbl_fill_char(tp, ASCII_NBRSP, col->width - padl - totsz); +} + +static void +tbl_word(struct termp *tp, const struct tbl_dat *dp) +{ + int prev_font; + + prev_font = tp->fonti; + if (dp->layout->flags & TBL_CELL_BOLD) + term_fontpush(tp, TERMFONT_BOLD); + else if (dp->layout->flags & TBL_CELL_ITALIC) + term_fontpush(tp, TERMFONT_UNDER); + + term_word(tp, dp->string); + + term_fontpopq(tp, prev_font); +} diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c new file mode 100644 index 0000000..68f78d7 --- /dev/null +++ b/usr.bin/mandoc/term.c @@ -0,0 +1,1112 @@ +/* $OpenBSD: term.c,v 1.141 2019/06/03 20:23:39 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" +#include "main.h" + +static size_t cond_width(const struct termp *, int, int *); +static void adjbuf(struct termp_col *, size_t); +static void bufferc(struct termp *, char); +static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); +static void endline(struct termp *); +static void term_field(struct termp *, size_t, size_t, + size_t, size_t); +static void term_fill(struct termp *, size_t *, size_t *, + size_t); + + +void +term_setcol(struct termp *p, size_t maxtcol) +{ + if (maxtcol > p->maxtcol) { + p->tcols = mandoc_recallocarray(p->tcols, + p->maxtcol, maxtcol, sizeof(*p->tcols)); + p->maxtcol = maxtcol; + } + p->lasttcol = maxtcol - 1; + p->tcol = p->tcols; +} + +void +term_free(struct termp *p) +{ + for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++) + free(p->tcol->buf); + free(p->tcols); + free(p->fontq); + free(p); +} + +void +term_begin(struct termp *p, term_margin head, + term_margin foot, const struct roff_meta *arg) +{ + + p->headf = head; + p->footf = foot; + p->argf = arg; + (*p->begin)(p); +} + +void +term_end(struct termp *p) +{ + + (*p->end)(p); +} + +/* + * Flush a chunk of text. By default, break the output line each time + * the right margin is reached, and continue output on the next line + * at the same offset as the chunk itself. By default, also break the + * output line at the end of the chunk. There are many flags modifying + * this behaviour, see the comments in the body of the function. + */ +void +term_flushln(struct termp *p) +{ + size_t vbl; /* Number of blanks to prepend to the output. */ + size_t vbr; /* Actual visual position of the end of field. */ + size_t vfield; /* Desired visual field width. */ + size_t vtarget; /* Desired visual position of the right margin. */ + size_t ic; /* Character position in the input buffer. */ + size_t nbr; /* Number of characters to print in this field. */ + + /* + * Normally, start writing at the left margin, but with the + * NOPAD flag, start writing at the current position instead. + */ + + vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ? + 0 : p->tcol->offset - p->viscol; + if (p->minbl && vbl < p->minbl) + vbl = p->minbl; + + if ((p->flags & TERMP_MULTICOL) == 0) + p->tcol->col = 0; + + /* Loop over output lines. */ + + for (;;) { + vfield = p->tcol->rmargin > p->viscol + vbl ? + p->tcol->rmargin - p->viscol - vbl : 0; + + /* + * Normally, break the line at the the right margin + * of the field, but with the NOBREAK flag, only + * break it at the max right margin of the screen, + * and with the BRNEVER flag, never break it at all. + */ + + vtarget = p->flags & TERMP_BRNEVER ? SIZE_MAX : + (p->flags & TERMP_NOBREAK) == 0 ? vfield : + p->maxrmargin > p->viscol + vbl ? + p->maxrmargin - p->viscol - vbl : 0; + + /* + * Figure out how much text will fit in the field. + * If there is whitespace only, print nothing. + */ + + term_fill(p, &nbr, &vbr, vtarget); + if (nbr == 0) + break; + + /* + * With the CENTER or RIGHT flag, increase the indentation + * to center the text between the left and right margins + * or to adjust it to the right margin, respectively. + */ + + if (vbr < vtarget) { + if (p->flags & TERMP_CENTER) + vbl += (vtarget - vbr) / 2; + else if (p->flags & TERMP_RIGHT) + vbl += vtarget - vbr; + } + + /* Finally, print the field content. */ + + term_field(p, vbl, nbr, vbr, vtarget); + + /* + * If there is no text left in the field, exit the loop. + * If the BRTRSP flag is set, consider trailing + * whitespace significant when deciding whether + * the field fits or not. + */ + + for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { + switch (p->tcol->buf[ic]) { + case '\t': + if (p->flags & TERMP_BRTRSP) + vbr = term_tab_next(vbr); + continue; + case ' ': + if (p->flags & TERMP_BRTRSP) + vbr += (*p->width)(p, ' '); + continue; + case '\n': + case ASCII_BREAK: + continue; + default: + break; + } + break; + } + if (ic == p->tcol->lastcol) + break; + + /* + * At the location of an automtic line break, input + * space characters are consumed by the line break. + */ + + while (p->tcol->col < p->tcol->lastcol && + p->tcol->buf[p->tcol->col] == ' ') + p->tcol->col++; + + /* + * In multi-column mode, leave the rest of the text + * in the buffer to be handled by a subsequent + * invocation, such that the other columns of the + * table can be handled first. + * In single-column mode, simply break the line. + */ + + if (p->flags & TERMP_MULTICOL) + return; + + endline(p); + p->viscol = 0; + + /* + * Normally, start the next line at the same indentation + * as this one, but with the BRIND flag, start it at the + * right margin instead. This is used together with + * NOBREAK for the tags in various kinds of tagged lists. + */ + + vbl = p->flags & TERMP_BRIND ? + p->tcol->rmargin : p->tcol->offset; + } + + /* Reset output state in preparation for the next field. */ + + p->col = p->tcol->col = p->tcol->lastcol = 0; + p->minbl = p->trailspace; + p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD); + + if (p->flags & TERMP_MULTICOL) + return; + + /* + * The HANG flag means that the next field + * always follows on the same line. + * The NOBREAK flag means that the next field + * follows on the same line unless the field was overrun. + * Normally, break the line at the end of each field. + */ + + if ((p->flags & TERMP_HANG) == 0 && + ((p->flags & TERMP_NOBREAK) == 0 || + vbr + term_len(p, p->trailspace) > vfield)) + endline(p); +} + +/* + * Store the number of input characters to print in this field in *nbr + * and their total visual width to print in *vbr. + * If there is only whitespace in the field, both remain zero. + * The desired visual width of the field is provided by vtarget. + * If the first word is longer, the field will be overrun. + */ +static void +term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget) +{ + size_t ic; /* Character position in the input buffer. */ + size_t vis; /* Visual position of the current character. */ + size_t vn; /* Visual position of the next character. */ + int breakline; /* Break at the end of this word. */ + int graph; /* Last character was non-blank. */ + + *nbr = *vbr = vis = 0; + breakline = graph = 0; + for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { + switch (p->tcol->buf[ic]) { + case '\b': /* Escape \o (overstrike) or backspace markup. */ + assert(ic > 0); + vis -= (*p->width)(p, p->tcol->buf[ic - 1]); + continue; + + case '\t': /* Normal ASCII whitespace. */ + case ' ': + case ASCII_BREAK: /* Escape \: (breakpoint). */ + switch (p->tcol->buf[ic]) { + case '\t': + vn = term_tab_next(vis); + break; + case ' ': + vn = vis + (*p->width)(p, ' '); + break; + case ASCII_BREAK: + vn = vis; + break; + default: + abort(); + } + /* Can break at the end of a word. */ + if (breakline || vn > vtarget) + break; + if (graph) { + *nbr = ic; + *vbr = vis; + graph = 0; + } + vis = vn; + continue; + + case '\n': /* Escape \p (break at the end of the word). */ + breakline = 1; + continue; + + case ASCII_HYPH: /* Breakable hyphen. */ + graph = 1; + /* + * We are about to decide whether to break the + * line or not, so we no longer need this hyphen + * to be marked as breakable. Put back a real + * hyphen such that we get the correct width. + */ + p->tcol->buf[ic] = '-'; + vis += (*p->width)(p, '-'); + if (vis > vtarget) { + ic++; + break; + } + *nbr = ic + 1; + *vbr = vis; + continue; + + case ASCII_NBRSP: /* Non-breakable space. */ + p->tcol->buf[ic] = ' '; + /* FALLTHROUGH */ + default: /* Printable character. */ + graph = 1; + vis += (*p->width)(p, p->tcol->buf[ic]); + if (vis > vtarget && *nbr > 0) + return; + continue; + } + break; + } + + /* + * If the last word extends to the end of the field without any + * trailing whitespace, the loop could not check yet whether it + * can remain on this line. So do the check now. + */ + + if (graph && (vis <= vtarget || *nbr == 0)) { + *nbr = ic; + *vbr = vis; + } +} + +/* + * Print the contents of one field + * with an indentation of vbl visual columns, + * an input string length of nbr characters, + * an output width of vbr visual columns, + * and a desired field width of vtarget visual columns. + */ +static void +term_field(struct termp *p, size_t vbl, size_t nbr, size_t vbr, size_t vtarget) +{ + size_t ic; /* Character position in the input buffer. */ + size_t vis; /* Visual position of the current character. */ + size_t dv; /* Visual width of the current character. */ + size_t vn; /* Visual position of the next character. */ + + vis = 0; + for (ic = p->tcol->col; ic < nbr; ic++) { + + /* + * To avoid the printing of trailing whitespace, + * do not print whitespace right away, only count it. + */ + + switch (p->tcol->buf[ic]) { + case '\n': + case ASCII_BREAK: + continue; + case '\t': + vn = term_tab_next(vis); + vbl += vn - vis; + vis = vn; + continue; + case ' ': + case ASCII_NBRSP: + dv = (*p->width)(p, ' '); + vbl += dv; + vis += dv; + continue; + default: + break; + } + + /* + * We found a non-blank character to print, + * so write preceding white space now. + */ + + if (vbl > 0) { + (*p->advance)(p, vbl); + p->viscol += vbl; + vbl = 0; + } + + /* Print the character and adjust the visual position. */ + + (*p->letter)(p, p->tcol->buf[ic]); + if (p->tcol->buf[ic] == '\b') { + dv = (*p->width)(p, p->tcol->buf[ic - 1]); + p->viscol -= dv; + vis -= dv; + } else { + dv = (*p->width)(p, p->tcol->buf[ic]); + p->viscol += dv; + vis += dv; + } + } + p->tcol->col = nbr; +} + +static void +endline(struct termp *p) +{ + if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) { + p->mc = NULL; + p->flags &= ~TERMP_ENDMC; + } + if (p->mc != NULL) { + if (p->viscol && p->maxrmargin >= p->viscol) + (*p->advance)(p, p->maxrmargin - p->viscol + 1); + p->flags |= TERMP_NOBUF | TERMP_NOSPACE; + term_word(p, p->mc); + p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC); + } + p->viscol = 0; + p->minbl = 0; + (*p->endline)(p); +} + +/* + * A newline only breaks an existing line; it won't assert vertical + * space. All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_newln(struct termp *p) +{ + + p->flags |= TERMP_NOSPACE; + if (p->tcol->lastcol || p->viscol) + term_flushln(p); +} + +/* + * Asserts a vertical space (a full, empty line-break between lines). + * Note that if used twice, this will cause two blank spaces and so on. + * All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_vspace(struct termp *p) +{ + + term_newln(p); + p->viscol = 0; + p->minbl = 0; + if (0 < p->skipvsp) + p->skipvsp--; + else + (*p->endline)(p); +} + +/* Swap current and previous font; for \fP and .ft P */ +void +term_fontlast(struct termp *p) +{ + enum termfont f; + + f = p->fontl; + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + +/* Set font, save current, discard previous; for \f, .ft, .B etc. */ +void +term_fontrepl(struct termp *p, enum termfont f) +{ + + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + +/* Set font, save previous. */ +void +term_fontpush(struct termp *p, enum termfont f) +{ + + p->fontl = p->fontq[p->fonti]; + if (++p->fonti == p->fontsz) { + p->fontsz += 8; + p->fontq = mandoc_reallocarray(p->fontq, + p->fontsz, sizeof(*p->fontq)); + } + p->fontq[p->fonti] = f; +} + +/* Flush to make the saved pointer current again. */ +void +term_fontpopq(struct termp *p, int i) +{ + + assert(i >= 0); + if (p->fonti > i) + p->fonti = i; +} + +/* Pop one font off the stack. */ +void +term_fontpop(struct termp *p) +{ + + assert(p->fonti); + p->fonti--; +} + +/* + * Handle pwords, partial words, which may be either a single word or a + * phrase that cannot be broken down (such as a literal string). This + * handles word styling. + */ +void +term_word(struct termp *p, const char *word) +{ + struct roffsu su; + const char nbrsp[2] = { ASCII_NBRSP, 0 }; + const char *seq, *cp; + int sz, uc; + size_t csz, lsz, ssz; + enum mandoc_esc esc; + + if ((p->flags & TERMP_NOBUF) == 0) { + if ((p->flags & TERMP_NOSPACE) == 0) { + if ((p->flags & TERMP_KEEP) == 0) { + bufferc(p, ' '); + if (p->flags & TERMP_SENTENCE) + bufferc(p, ' '); + } else + bufferc(p, ASCII_NBRSP); + } + if (p->flags & TERMP_PREKEEP) + p->flags |= TERMP_KEEP; + if (p->flags & TERMP_NONOSPACE) + p->flags |= TERMP_NOSPACE; + else + p->flags &= ~TERMP_NOSPACE; + p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); + p->skipvsp = 0; + } + + while ('\0' != *word) { + if ('\\' != *word) { + if (TERMP_NBRWORD & p->flags) { + if (' ' == *word) { + encode(p, nbrsp, 1); + word++; + continue; + } + ssz = strcspn(word, "\\ "); + } else + ssz = strcspn(word, "\\"); + encode(p, word, ssz); + word += (int)ssz; + continue; + } + + word++; + esc = mandoc_escape(&word, &seq, &sz); + switch (esc) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(seq + 1, sz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(seq, sz); + if (uc < 0) + continue; + break; + case ESCAPE_SPECIAL: + if (p->enc == TERMENC_ASCII) { + cp = mchars_spec2str(seq, sz, &ssz); + if (cp != NULL) + encode(p, cp, ssz); + } else { + uc = mchars_spec2cp(seq, sz); + if (uc > 0) + encode1(p, uc); + } + continue; + case ESCAPE_UNDEF: + uc = *seq; + break; + case ESCAPE_FONTBOLD: + term_fontrepl(p, TERMFONT_BOLD); + continue; + case ESCAPE_FONTITALIC: + term_fontrepl(p, TERMFONT_UNDER); + continue; + case ESCAPE_FONTBI: + term_fontrepl(p, TERMFONT_BI); + continue; + case ESCAPE_FONT: + case ESCAPE_FONTCW: + case ESCAPE_FONTROMAN: + term_fontrepl(p, TERMFONT_NONE); + continue; + case ESCAPE_FONTPREV: + term_fontlast(p); + continue; + case ESCAPE_BREAK: + bufferc(p, '\n'); + continue; + case ESCAPE_NOSPACE: + if (p->flags & TERMP_BACKAFTER) + p->flags &= ~TERMP_BACKAFTER; + else if (*word == '\0') + p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); + continue; + case ESCAPE_DEVICE: + if (p->type == TERMTYPE_PDF) + encode(p, "pdf", 3); + else if (p->type == TERMTYPE_PS) + encode(p, "ps", 2); + else if (p->enc == TERMENC_ASCII) + encode(p, "ascii", 5); + else + encode(p, "utf8", 4); + continue; + case ESCAPE_HORIZ: + if (*seq == '|') { + seq++; + uc = -p->col; + } else + uc = 0; + if (a2roffsu(seq, &su, SCALE_EM) == NULL) + continue; + uc += term_hen(p, &su); + if (uc > 0) + while (uc-- > 0) + bufferc(p, ASCII_NBRSP); + else if (p->col > (size_t)(-uc)) + p->col += uc; + else { + uc += p->col; + p->col = 0; + if (p->tcol->offset > (size_t)(-uc)) { + p->ti += uc; + p->tcol->offset += uc; + } else { + p->ti -= p->tcol->offset; + p->tcol->offset = 0; + } + } + continue; + case ESCAPE_HLINE: + if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL) + continue; + uc = term_hen(p, &su); + if (uc <= 0) { + if (p->tcol->rmargin <= p->tcol->offset) + continue; + lsz = p->tcol->rmargin - p->tcol->offset; + } else + lsz = uc; + if (*cp == seq[-1]) + uc = -1; + else if (*cp == '\\') { + seq = cp + 1; + esc = mandoc_escape(&seq, &cp, &sz); + switch (esc) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(cp + 1, sz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(cp, sz); + break; + case ESCAPE_SPECIAL: + uc = mchars_spec2cp(cp, sz); + break; + case ESCAPE_UNDEF: + uc = *seq; + break; + default: + uc = -1; + break; + } + } else + uc = *cp; + if (uc < 0x20 || (uc > 0x7E && uc < 0xA0)) + uc = '_'; + if (p->enc == TERMENC_ASCII) { + cp = ascii_uc2str(uc); + csz = term_strlen(p, cp); + ssz = strlen(cp); + } else + csz = (*p->width)(p, uc); + while (lsz >= csz) { + if (p->enc == TERMENC_ASCII) + encode(p, cp, ssz); + else + encode1(p, uc); + lsz -= csz; + } + continue; + case ESCAPE_SKIPCHAR: + p->flags |= TERMP_BACKAFTER; + continue; + case ESCAPE_OVERSTRIKE: + cp = seq + sz; + while (seq < cp) { + if (*seq == '\\') { + mandoc_escape(&seq, NULL, NULL); + continue; + } + encode1(p, *seq++); + if (seq < cp) { + if (p->flags & TERMP_BACKBEFORE) + p->flags |= TERMP_BACKAFTER; + else + p->flags |= TERMP_BACKBEFORE; + } + } + /* Trim trailing backspace/blank pair. */ + if (p->tcol->lastcol > 2 && + (p->tcol->buf[p->tcol->lastcol - 1] == ' ' || + p->tcol->buf[p->tcol->lastcol - 1] == '\t')) + p->tcol->lastcol -= 2; + if (p->col > p->tcol->lastcol) + p->col = p->tcol->lastcol; + continue; + default: + continue; + } + + /* + * Common handling for Unicode and numbered + * character escape sequences. + */ + + if (p->enc == TERMENC_ASCII) { + cp = ascii_uc2str(uc); + encode(p, cp, strlen(cp)); + } else { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + encode1(p, uc); + } + } + p->flags &= ~TERMP_NBRWORD; +} + +static void +adjbuf(struct termp_col *c, size_t sz) +{ + if (c->maxcols == 0) + c->maxcols = 1024; + while (c->maxcols <= sz) + c->maxcols <<= 2; + c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf)); +} + +static void +bufferc(struct termp *p, char c) +{ + if (p->flags & TERMP_NOBUF) { + (*p->letter)(p, c); + return; + } + if (p->col + 1 >= p->tcol->maxcols) + adjbuf(p->tcol, p->col + 1); + if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) + p->tcol->buf[p->col] = c; + if (p->tcol->lastcol < ++p->col) + p->tcol->lastcol = p->col; +} + +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ +static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->flags & TERMP_NOBUF) { + (*p->letter)(p, c); + return; + } + + if (p->col + 7 >= p->tcol->maxcols) + adjbuf(p->tcol, p->col + 7); + + f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ? + p->fontq[p->fonti] : TERMFONT_NONE; + + if (p->flags & TERMP_BACKBEFORE) { + if (p->tcol->buf[p->col - 1] == ' ' || + p->tcol->buf[p->col - 1] == '\t') + p->col--; + else + p->tcol->buf[p->col++] = '\b'; + p->flags &= ~TERMP_BACKBEFORE; + } + if (f == TERMFONT_UNDER || f == TERMFONT_BI) { + p->tcol->buf[p->col++] = '_'; + p->tcol->buf[p->col++] = '\b'; + } + if (f == TERMFONT_BOLD || f == TERMFONT_BI) { + if (c == ASCII_HYPH) + p->tcol->buf[p->col++] = '-'; + else + p->tcol->buf[p->col++] = c; + p->tcol->buf[p->col++] = '\b'; + } + if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) + p->tcol->buf[p->col] = c; + if (p->tcol->lastcol < ++p->col) + p->tcol->lastcol = p->col; + if (p->flags & TERMP_BACKAFTER) { + p->flags |= TERMP_BACKBEFORE; + p->flags &= ~TERMP_BACKAFTER; + } +} + +static void +encode(struct termp *p, const char *word, size_t sz) +{ + size_t i; + + if (p->flags & TERMP_NOBUF) { + for (i = 0; i < sz; i++) + (*p->letter)(p, word[i]); + return; + } + + if (p->col + 2 + (sz * 5) >= p->tcol->maxcols) + adjbuf(p->tcol, p->col + 2 + (sz * 5)); + + for (i = 0; i < sz; i++) { + if (ASCII_HYPH == word[i] || + isgraph((unsigned char)word[i])) + encode1(p, word[i]); + else { + if (p->tcol->lastcol <= p->col || + (word[i] != ' ' && word[i] != ASCII_NBRSP)) + p->tcol->buf[p->col] = word[i]; + p->col++; + + /* + * Postpone the effect of \z while handling + * an overstrike sequence from ascii_uc2str(). + */ + + if (word[i] == '\b' && + (p->flags & TERMP_BACKBEFORE)) { + p->flags &= ~TERMP_BACKBEFORE; + p->flags |= TERMP_BACKAFTER; + } + } + } + if (p->tcol->lastcol < p->col) + p->tcol->lastcol = p->col; +} + +void +term_setwidth(struct termp *p, const char *wstr) +{ + struct roffsu su; + int iop, width; + + iop = 0; + width = 0; + if (NULL != wstr) { + switch (*wstr) { + case '+': + iop = 1; + wstr++; + break; + case '-': + iop = -1; + wstr++; + break; + default: + break; + } + if (a2roffsu(wstr, &su, SCALE_MAX) != NULL) + width = term_hspan(p, &su); + else + iop = 0; + } + (*p->setwidth)(p, iop, width); +} + +size_t +term_len(const struct termp *p, size_t sz) +{ + + return (*p->width)(p, ' ') * sz; +} + +static size_t +cond_width(const struct termp *p, int c, int *skip) +{ + + if (*skip) { + (*skip) = 0; + return 0; + } else + return (*p->width)(p, c); +} + +size_t +term_strlen(const struct termp *p, const char *cp) +{ + size_t sz, rsz, i; + int ssz, skip, uc; + const char *seq, *rhs; + enum mandoc_esc esc; + static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, + ASCII_BREAK, '\0' }; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_word() as we + * must calculate the width of produced strings. + */ + + sz = 0; + skip = 0; + while ('\0' != *cp) { + rsz = strcspn(cp, rej); + for (i = 0; i < rsz; i++) + sz += cond_width(p, *cp++, &skip); + + switch (*cp) { + case '\\': + cp++; + rhs = NULL; + esc = mandoc_escape(&cp, &seq, &ssz); + switch (esc) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(seq + 1, ssz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(seq, ssz); + if (uc < 0) + continue; + break; + case ESCAPE_SPECIAL: + if (p->enc == TERMENC_ASCII) { + rhs = mchars_spec2str(seq, ssz, &rsz); + if (rhs != NULL) + break; + } else { + uc = mchars_spec2cp(seq, ssz); + if (uc > 0) + sz += cond_width(p, uc, &skip); + } + continue; + case ESCAPE_UNDEF: + uc = *seq; + break; + case ESCAPE_DEVICE: + if (p->type == TERMTYPE_PDF) { + rhs = "pdf"; + rsz = 3; + } else if (p->type == TERMTYPE_PS) { + rhs = "ps"; + rsz = 2; + } else if (p->enc == TERMENC_ASCII) { + rhs = "ascii"; + rsz = 5; + } else { + rhs = "utf8"; + rsz = 4; + } + break; + case ESCAPE_SKIPCHAR: + skip = 1; + continue; + case ESCAPE_OVERSTRIKE: + rsz = 0; + rhs = seq + ssz; + while (seq < rhs) { + if (*seq == '\\') { + mandoc_escape(&seq, NULL, NULL); + continue; + } + i = (*p->width)(p, *seq++); + if (rsz < i) + rsz = i; + } + sz += rsz; + continue; + default: + continue; + } + + /* + * Common handling for Unicode and numbered + * character escape sequences. + */ + + if (rhs == NULL) { + if (p->enc == TERMENC_ASCII) { + rhs = ascii_uc2str(uc); + rsz = strlen(rhs); + } else { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + sz += cond_width(p, uc, &skip); + continue; + } + } + + if (skip) { + skip = 0; + break; + } + + /* + * Common handling for all escape sequences + * printing more than one character. + */ + + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *rhs++); + break; + case ASCII_NBRSP: + sz += cond_width(p, ' ', &skip); + cp++; + break; + case ASCII_HYPH: + sz += cond_width(p, '-', &skip); + cp++; + break; + default: + break; + } + } + + return sz; +} + +int +term_vspan(const struct termp *p, const struct roffsu *su) +{ + double r; + int ri; + + switch (su->unit) { + case SCALE_BU: + r = su->scale / 40.0; + break; + case SCALE_CM: + r = su->scale * 6.0 / 2.54; + break; + case SCALE_FS: + r = su->scale * 65536.0 / 40.0; + break; + case SCALE_IN: + r = su->scale * 6.0; + break; + case SCALE_MM: + r = su->scale * 0.006; + break; + case SCALE_PC: + r = su->scale; + break; + case SCALE_PT: + r = su->scale / 12.0; + break; + case SCALE_EN: + case SCALE_EM: + r = su->scale * 0.6; + break; + case SCALE_VS: + r = su->scale; + break; + default: + abort(); + } + ri = r > 0.0 ? r + 0.4995 : r - 0.4995; + return ri < 66 ? ri : 1; +} + +/* + * Convert a scaling width to basic units, rounding towards 0. + */ +int +term_hspan(const struct termp *p, const struct roffsu *su) +{ + + return (*p->hspan)(p, su); +} + +/* + * Convert a scaling width to basic units, rounding to closest. + */ +int +term_hen(const struct termp *p, const struct roffsu *su) +{ + int bu; + + if ((bu = (*p->hspan)(p, su)) >= 0) + return (bu + 11) / 24; + else + return -((-bu + 11) / 24); +} diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h new file mode 100644 index 0000000..525aa3f --- /dev/null +++ b/usr.bin/mandoc/term.h @@ -0,0 +1,158 @@ +/* $OpenBSD: term.h,v 1.75 2019/01/04 03:20:44 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2015, 2017, 2019 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +enum termenc { + TERMENC_ASCII, + TERMENC_LOCALE, + TERMENC_UTF8 +}; + +enum termtype { + TERMTYPE_CHAR, + TERMTYPE_PS, + TERMTYPE_PDF +}; + +enum termfont { + TERMFONT_NONE = 0, + TERMFONT_BOLD, + TERMFONT_UNDER, + TERMFONT_BI, + TERMFONT__MAX +}; + +struct eqn_box; +struct roff_meta; +struct roff_node; +struct tbl_span; +struct termp; + +typedef void (*term_margin)(struct termp *, const struct roff_meta *); + +struct termp_tbl { + int width; /* width in fixed chars */ + int decimal; /* decimal point position */ +}; + +struct termp_col { + int *buf; /* Output buffer. */ + size_t maxcols; /* Allocated bytes in buf. */ + size_t lastcol; /* Last byte in buf. */ + size_t col; /* Byte in buf to be written. */ + size_t rmargin; /* Current right margin. */ + size_t offset; /* Current left margin. */ +}; + +struct termp { + struct rofftbl tbl; /* Table configuration. */ + struct termp_col *tcols; /* Array of table columns. */ + struct termp_col *tcol; /* Current table column. */ + size_t maxtcol; /* Allocated table columns. */ + size_t lasttcol; /* Last column currently used. */ + size_t line; /* Current output line number. */ + size_t defindent; /* Default indent for text. */ + size_t defrmargin; /* Right margin of the device. */ + size_t lastrmargin; /* Right margin before the last ll. */ + size_t maxrmargin; /* Max right margin. */ + size_t col; /* Byte position in buf. */ + size_t viscol; /* Chars on current line. */ + size_t trailspace; /* See term_flushln(). */ + size_t minbl; /* Minimum blanks before next field. */ + int synopsisonly; /* Print the synopsis only. */ + int mdocstyle; /* Imitate mdoc(7) output. */ + int ti; /* Temporary indent for one line. */ + int skipvsp; /* Vertical space to skip. */ + int flags; +#define TERMP_SENTENCE (1 << 0) /* Space before a sentence. */ +#define TERMP_NOSPACE (1 << 1) /* No space before words. */ +#define TERMP_NONOSPACE (1 << 2) /* No space (no autounset). */ +#define TERMP_NBRWORD (1 << 3) /* Make next word nonbreaking. */ +#define TERMP_KEEP (1 << 4) /* Keep words together. */ +#define TERMP_PREKEEP (1 << 5) /* ...starting with the next one. */ +#define TERMP_BACKAFTER (1 << 6) /* Back up after next character. */ +#define TERMP_BACKBEFORE (1 << 7) /* Back up before next character. */ +#define TERMP_NOBREAK (1 << 8) /* See term_flushln(). */ +#define TERMP_BRTRSP (1 << 9) /* See term_flushln(). */ +#define TERMP_BRIND (1 << 10) /* See term_flushln(). */ +#define TERMP_HANG (1 << 11) /* See term_flushln(). */ +#define TERMP_NOPAD (1 << 12) /* See term_flushln(). */ +#define TERMP_NOSPLIT (1 << 13) /* Do not break line before .An. */ +#define TERMP_SPLIT (1 << 14) /* Break line before .An. */ +#define TERMP_NONEWLINE (1 << 15) /* No line break in nofill mode. */ +#define TERMP_BRNEVER (1 << 16) /* Don't even break at maxrmargin. */ +#define TERMP_NOBUF (1 << 17) /* Bypass output buffer. */ +#define TERMP_NEWMC (1 << 18) /* No .mc printed yet. */ +#define TERMP_ENDMC (1 << 19) /* Next break ends .mc mode. */ +#define TERMP_MULTICOL (1 << 20) /* Multiple column mode. */ +#define TERMP_CENTER (1 << 21) /* Center output lines. */ +#define TERMP_RIGHT (1 << 22) /* Adjust to the right margin. */ + enum termtype type; /* Terminal, PS, or PDF. */ + enum termenc enc; /* Type of encoding. */ + enum termfont fontl; /* Last font set. */ + enum termfont *fontq; /* Symmetric fonts. */ + int fontsz; /* Allocated size of font stack */ + int fonti; /* Index of font stack. */ + term_margin headf; /* invoked to print head */ + term_margin footf; /* invoked to print foot */ + void (*letter)(struct termp *, int); + void (*begin)(struct termp *); + void (*end)(struct termp *); + void (*endline)(struct termp *); + void (*advance)(struct termp *, size_t); + void (*setwidth)(struct termp *, int, int); + size_t (*width)(const struct termp *, int); + int (*hspan)(const struct termp *, + const struct roffsu *); + const void *argf; /* arg for headf/footf */ + const char *mc; /* Margin character. */ + struct termp_ps *ps; +}; + + +const char *ascii_uc2str(int); + +void roff_term_pre(struct termp *, const struct roff_node *); + +void term_eqn(struct termp *, const struct eqn_box *); +void term_tbl(struct termp *, const struct tbl_span *); +void term_free(struct termp *); +void term_setcol(struct termp *, size_t); +void term_newln(struct termp *); +void term_vspace(struct termp *); +void term_word(struct termp *, const char *); +void term_flushln(struct termp *); +void term_begin(struct termp *, term_margin, + term_margin, const struct roff_meta *); +void term_end(struct termp *); + +void term_setwidth(struct termp *, const char *); +int term_hspan(const struct termp *, const struct roffsu *); +int term_hen(const struct termp *, const struct roffsu *); +int term_vspan(const struct termp *, const struct roffsu *); +size_t term_strlen(const struct termp *, const char *); +size_t term_len(const struct termp *, size_t); + +void term_tab_set(const struct termp *, const char *); +void term_tab_iset(size_t); +size_t term_tab_next(size_t); + +void term_fontpush(struct termp *, enum termfont); +void term_fontpop(struct termp *); +void term_fontpopq(struct termp *, int); +void term_fontrepl(struct termp *, enum termfont); +void term_fontlast(struct termp *); diff --git a/usr.bin/mandoc/term_ascii.c b/usr.bin/mandoc/term_ascii.c new file mode 100644 index 0000000..9b28060 --- /dev/null +++ b/usr.bin/mandoc/term_ascii.c @@ -0,0 +1,392 @@ +/* $OpenBSD: term_ascii.c,v 1.50 2019/07/19 21:45:37 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <langinfo.h> +#include <locale.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" +#include "manconf.h" +#include "main.h" + +#define UTF8_LOCALE "en_US.UTF-8" + +static struct termp *ascii_init(enum termenc, const struct manoutput *); +static int ascii_hspan(const struct termp *, + const struct roffsu *); +static size_t ascii_width(const struct termp *, int); +static void ascii_advance(struct termp *, size_t); +static void ascii_begin(struct termp *); +static void ascii_end(struct termp *); +static void ascii_endline(struct termp *); +static void ascii_letter(struct termp *, int); +static void ascii_setwidth(struct termp *, int, int); + +static void locale_advance(struct termp *, size_t); +static void locale_endline(struct termp *); +static void locale_letter(struct termp *, int); +static size_t locale_width(const struct termp *, int); + + +static struct termp * +ascii_init(enum termenc enc, const struct manoutput *outopts) +{ + char *v; + struct termp *p; + + p = mandoc_calloc(1, sizeof(*p)); + p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol)); + p->maxtcol = 1; + + p->line = 1; + p->defrmargin = p->lastrmargin = 78; + p->fontq = mandoc_reallocarray(NULL, + (p->fontsz = 8), sizeof(*p->fontq)); + p->fontq[0] = p->fontl = TERMFONT_NONE; + + p->begin = ascii_begin; + p->end = ascii_end; + p->hspan = ascii_hspan; + p->type = TERMTYPE_CHAR; + + p->enc = TERMENC_ASCII; + p->advance = ascii_advance; + p->endline = ascii_endline; + p->letter = ascii_letter; + p->setwidth = ascii_setwidth; + p->width = ascii_width; + + if (enc != TERMENC_ASCII) { + + /* + * Do not change any of this to LC_ALL. It might break + * the formatting by subtly changing the behaviour of + * various functions, for example strftime(3). As a + * worst case, it might even cause buffer overflows. + */ + + v = enc == TERMENC_LOCALE ? + setlocale(LC_CTYPE, "") : + setlocale(LC_CTYPE, UTF8_LOCALE); + + /* + * We only support UTF-8, + * so revert to ASCII for anything else. + */ + + if (v != NULL && + strcmp(nl_langinfo(CODESET), "UTF-8") != 0) + v = setlocale(LC_CTYPE, "C"); + + if (v != NULL && MB_CUR_MAX > 1) { + p->enc = TERMENC_UTF8; + p->advance = locale_advance; + p->endline = locale_endline; + p->letter = locale_letter; + p->width = locale_width; + } + } + + if (outopts->mdoc) { + p->mdocstyle = 1; + p->defindent = 5; + } + if (outopts->indent) + p->defindent = outopts->indent; + if (outopts->width) + p->defrmargin = outopts->width; + if (outopts->synopsisonly) + p->synopsisonly = 1; + + assert(p->defindent < UINT16_MAX); + assert(p->defrmargin < UINT16_MAX); + return p; +} + +void * +ascii_alloc(const struct manoutput *outopts) +{ + + return ascii_init(TERMENC_ASCII, outopts); +} + +void * +utf8_alloc(const struct manoutput *outopts) +{ + + return ascii_init(TERMENC_UTF8, outopts); +} + +void * +locale_alloc(const struct manoutput *outopts) +{ + + return ascii_init(TERMENC_LOCALE, outopts); +} + +static void +ascii_setwidth(struct termp *p, int iop, int width) +{ + + width /= 24; + p->tcol->rmargin = p->defrmargin; + if (iop > 0) + p->defrmargin += width; + else if (iop == 0) + p->defrmargin = width ? (size_t)width : p->lastrmargin; + else if (p->defrmargin > (size_t)width) + p->defrmargin -= width; + else + p->defrmargin = 0; + if (p->defrmargin > 1000) + p->defrmargin = 1000; + p->lastrmargin = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin = p->defrmargin; +} + +void +terminal_sepline(void *arg) +{ + struct termp *p; + size_t i; + + p = (struct termp *)arg; + (*p->endline)(p); + for (i = 0; i < p->defrmargin; i++) + (*p->letter)(p, '-'); + (*p->endline)(p); + (*p->endline)(p); +} + +static size_t +ascii_width(const struct termp *p, int c) +{ + return c != ASCII_BREAK; +} + +void +ascii_free(void *arg) +{ + + term_free((struct termp *)arg); +} + +static void +ascii_letter(struct termp *p, int c) +{ + + putchar(c); +} + +static void +ascii_begin(struct termp *p) +{ + + (*p->headf)(p, p->argf); +} + +static void +ascii_end(struct termp *p) +{ + + (*p->footf)(p, p->argf); +} + +static void +ascii_endline(struct termp *p) +{ + + p->line++; + p->tcol->offset -= p->ti; + p->ti = 0; + putchar('\n'); +} + +static void +ascii_advance(struct termp *p, size_t len) +{ + size_t i; + + assert(len < UINT16_MAX); + for (i = 0; i < len; i++) + putchar(' '); +} + +static int +ascii_hspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + switch (su->unit) { + case SCALE_BU: + r = su->scale; + break; + case SCALE_CM: + r = su->scale * 240.0 / 2.54; + break; + case SCALE_FS: + r = su->scale * 65536.0; + break; + case SCALE_IN: + r = su->scale * 240.0; + break; + case SCALE_MM: + r = su->scale * 0.24; + break; + case SCALE_VS: + case SCALE_PC: + r = su->scale * 40.0; + break; + case SCALE_PT: + r = su->scale * 10.0 / 3.0; + break; + case SCALE_EN: + case SCALE_EM: + r = su->scale * 24.0; + break; + default: + abort(); + } + return r > 0.0 ? r + 0.01 : r - 0.01; +} + +const char * +ascii_uc2str(int uc) +{ + static const char nbrsp[2] = { ASCII_NBRSP, '\0' }; + static const char *tab[] = { + "<NUL>","<SOH>","<STX>","<ETX>","<EOT>","<ENQ>","<ACK>","<BEL>", + "<BS>", "\t", "<LF>", "<VT>", "<FF>", "<CR>", "<SO>", "<SI>", + "<DLE>","<DC1>","<DC2>","<DC3>","<DC4>","<NAK>","<SYN>","<ETB>", + "<CAN>","<EM>", "<SUB>","<ESC>","<FS>", "<GS>", "<RS>", "<US>", + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "<DEL>", + "<80>", "<81>", "<82>", "<83>", "<84>", "<85>", "<86>", "<87>", + "<88>", "<89>", "<8A>", "<8B>", "<8C>", "<8D>", "<8E>", "<8F>", + "<90>", "<91>", "<92>", "<93>", "<94>", "<95>", "<96>", "<97>", + "<98>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", + nbrsp, "!", "/\bc", "-\bL", "o\bx", "=\bY", "|", "<section>", + "\"", "(C)", "_\ba", "<<", "~", "", "(R)", "-", + "<degree>","+-","^2", "^3", "'","<micro>","<paragraph>",".", + ",", "^1", "_\bo", ">>", "1/4", "1/2", "3/4", "?", + "`\bA", "'\bA", "^\bA", "~\bA", "\"\bA","o\bA", "AE", ",\bC", + "`\bE", "'\bE", "^\bE", "\"\bE","`\bI", "'\bI", "^\bI", "\"\bI", + "Dh", "~\bN", "`\bO", "'\bO", "^\bO", "~\bO", "\"\bO","x", + "/\bO", "`\bU", "'\bU", "^\bU", "\"\bU","'\bY", "Th", "ss", + "`\ba", "'\ba", "^\ba", "~\ba", "\"\ba","o\ba", "ae", ",\bc", + "`\be", "'\be", "^\be", "\"\be","`\bi", "'\bi", "^\bi", "\"\bi", + "dh", "~\bn", "`\bo", "'\bo", "^\bo", "~\bo", "\"\bo","/", + "/\bo", "`\bu", "'\bu", "^\bu", "\"\bu","'\by", "th", "\"\by", + "A", "a", "A", "a", "A", "a", "'\bC", "'\bc", + "^\bC", "^\bc", "C", "c", "C", "c", "D", "d", + "/\bD", "/\bd", "E", "e", "E", "e", "E", "e", + "E", "e", "E", "e", "^\bG", "^\bg", "G", "g", + "G", "g", ",\bG", ",\bg", "^\bH", "^\bh", "/\bH", "/\bh", + "~\bI", "~\bi", "I", "i", "I", "i", "I", "i", + "I", "i", "IJ", "ij", "^\bJ", "^\bj", ",\bK", ",\bk", + "q", "'\bL", "'\bl", ",\bL", ",\bl", "L", "l", "L", + "l", "/\bL", "/\bl", "'\bN", "'\bn", ",\bN", ",\bn", "N", + "n", "'n", "Ng", "ng", "O", "o", "O", "o", + "O", "o", "OE", "oe", "'\bR", "'\br", ",\bR", ",\br", + "R", "r", "'\bS", "'\bs", "^\bS", "^\bs", ",\bS", ",\bs", + "S", "s", ",\bT", ",\bt", "T", "t", "/\bT", "/\bt", + "~\bU", "~\bu", "U", "u", "U", "u", "U", "u", + "U", "u", "U", "u", "^\bW", "^\bw", "^\bY", "^\by", + "\"\bY","'\bZ", "'\bz", "Z", "z", "Z", "z", "s", + "b", "B", "B", "b", "6", "6", "O", "C", + "c", "D", "D", "D", "d", "d", "3", "@", + "E", "F", ",\bf", "G", "G", "hv", "I", "/\bI", + "K", "k", "/\bl", "l", "W", "N", "n", "~\bO", + "O", "o", "OI", "oi", "P", "p", "YR", "2", + "2", "SH", "sh", "t", "T", "t", "T", "U", + "u", "Y", "V", "Y", "y", "/\bZ", "/\bz", "ZH", + "ZH", "zh", "zh", "/\b2", "5", "5", "ts", "w", + "|", "||", "|=", "!", "DZ", "Dz", "dz", "LJ", + "Lj", "lj", "NJ", "Nj", "nj", "A", "a", "I", + "i", "O", "o", "U", "u", "U", "u", "U", + "u", "U", "u", "U", "u", "@", "A", "a", + "A", "a", "AE", "ae", "/\bG", "/\bg", "G", "g", + "K", "k", "O", "o", "O", "o", "ZH", "zh", + "j", "DZ", "Dz", "dz", "'\bG", "'\bg", "HV", "W", + "`\bN", "`\bn", "A", "a", "'\bAE","'\bae","O", "o"}; + + assert(uc >= 0); + if ((size_t)uc < sizeof(tab)/sizeof(tab[0])) + return tab[uc]; + return mchars_uc2str(uc); +} + +static size_t +locale_width(const struct termp *p, int c) +{ + int rc; + + if (c == ASCII_NBRSP) + c = ' '; + rc = wcwidth(c); + if (rc < 0) + rc = 0; + return rc; +} + +static void +locale_advance(struct termp *p, size_t len) +{ + size_t i; + + assert(len < UINT16_MAX); + for (i = 0; i < len; i++) + putwchar(L' '); +} + +static void +locale_endline(struct termp *p) +{ + + p->line++; + p->tcol->offset -= p->ti; + p->ti = 0; + putwchar(L'\n'); +} + +static void +locale_letter(struct termp *p, int c) +{ + + putwchar(c); +} diff --git a/usr.bin/mandoc/term_ps.c b/usr.bin/mandoc/term_ps.c new file mode 100644 index 0000000..9460c88 --- /dev/null +++ b/usr.bin/mandoc/term_ps.c @@ -0,0 +1,1355 @@ +/* $OpenBSD: term_ps.c,v 1.55 2017/11/10 14:16:28 espie Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2017 Marc Espie <espie@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <err.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" +#include "manconf.h" +#include "main.h" + +/* These work the buffer used by the header and footer. */ +#define PS_BUFSLOP 128 + +/* Convert PostScript point "x" to an AFM unit. */ +#define PNT2AFM(p, x) \ + (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale)) + +/* Convert an AFM unit "x" to a PostScript points */ +#define AFM2PNT(p, x) \ + ((double)(x) / (1000.0 / (double)(p)->ps->scale)) + +struct glyph { + unsigned short wx; /* WX in AFM */ +}; + +struct font { + const char *name; /* FontName in AFM */ +#define MAXCHAR 95 /* total characters we can handle */ + struct glyph gly[MAXCHAR]; /* glyph metrics */ +}; + +struct termp_ps { + int flags; +#define PS_INLINE (1 << 0) /* we're in a word */ +#define PS_MARGINS (1 << 1) /* we're in the margins */ +#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ +#define PS_BACKSP (1 << 3) /* last character was backspace */ + size_t pscol; /* visible column (AFM units) */ + size_t pscolnext; /* used for overstrike */ + size_t psrow; /* visible row (AFM units) */ + size_t lastrow; /* psrow of the previous word */ + char *psmarg; /* margin buf */ + size_t psmargsz; /* margin buf size */ + size_t psmargcur; /* cur index in margin buf */ + char last; /* last non-backspace seen */ + enum termfont lastf; /* last set font */ + enum termfont nextf; /* building next font here */ + size_t scale; /* font scaling factor */ + size_t pages; /* number of pages shown */ + size_t lineheight; /* line height (AFM units) */ + size_t top; /* body top (AFM units) */ + size_t bottom; /* body bottom (AFM units) */ + const char *medianame; /* for DocumentMedia and PageSize */ + size_t height; /* page height (AFM units */ + size_t width; /* page width (AFM units) */ + size_t lastwidth; /* page width before last ll */ + size_t left; /* body left (AFM units) */ + size_t header; /* header pos (AFM units) */ + size_t footer; /* footer pos (AFM units) */ + size_t pdfbytes; /* current output byte */ + size_t pdflastpg; /* byte of last page mark */ + size_t pdfbody; /* start of body object */ + size_t *pdfobjs; /* table of object offsets */ + size_t pdfobjsz; /* size of pdfobjs */ +}; + +static int ps_hspan(const struct termp *, + const struct roffsu *); +static size_t ps_width(const struct termp *, int); +static void ps_advance(struct termp *, size_t); +static void ps_begin(struct termp *); +static void ps_closepage(struct termp *); +static void ps_end(struct termp *); +static void ps_endline(struct termp *); +static void ps_growbuf(struct termp *, size_t); +static void ps_letter(struct termp *, int); +static void ps_pclose(struct termp *); +static void ps_plast(struct termp *); +static void ps_pletter(struct termp *, int); +static void ps_printf(struct termp *, const char *, ...) + __attribute__((__format__ (__printf__, 2, 3))); +static void ps_putchar(struct termp *, char); +static void ps_setfont(struct termp *, enum termfont); +static void ps_setwidth(struct termp *, int, int); +static struct termp *pspdf_alloc(const struct manoutput *, enum termtype); +static void pdf_obj(struct termp *, size_t); + +/* + * We define, for the time being, three fonts: bold, oblique/italic, and + * normal (roman). The following table hard-codes the font metrics for + * ASCII, i.e., 32--127. + */ + +static const struct font fonts[TERMFONT__MAX] = { + { "Times-Roman", { + { 250 }, + { 333 }, + { 408 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 564 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 278 }, + { 278 }, + { 564 }, + { 564 }, + { 564 }, + { 444 }, + { 921 }, + { 722 }, + { 667 }, + { 667 }, + { 722 }, + { 611 }, + { 556 }, + { 722 }, + { 722 }, + { 333 }, + { 389 }, + { 722 }, + { 611 }, + { 889 }, + { 722 }, + { 722 }, + { 556 }, + { 722 }, + { 667 }, + { 556 }, + { 611 }, + { 722 }, + { 722 }, + { 944 }, + { 722 }, + { 722 }, + { 611 }, + { 333 }, + { 278 }, + { 333 }, + { 469 }, + { 500 }, + { 333 }, + { 444 }, + { 500 }, + { 444 }, + { 500}, + { 444}, + { 333}, + { 500}, + { 500}, + { 278}, + { 278}, + { 500}, + { 278}, + { 778}, + { 500}, + { 500}, + { 500}, + { 500}, + { 333}, + { 389}, + { 278}, + { 500}, + { 500}, + { 722}, + { 500}, + { 500}, + { 444}, + { 480}, + { 200}, + { 480}, + { 541}, + } }, + { "Times-Bold", { + { 250 }, + { 333 }, + { 555 }, + { 500 }, + { 500 }, + { 1000 }, + { 833 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 570 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 570 }, + { 570 }, + { 570 }, + { 500 }, + { 930 }, + { 722 }, + { 667 }, + { 722 }, + { 722 }, + { 667 }, + { 611 }, + { 778 }, + { 778 }, + { 389 }, + { 500 }, + { 778 }, + { 667 }, + { 944 }, + { 722 }, + { 778 }, + { 611 }, + { 778 }, + { 722 }, + { 556 }, + { 667 }, + { 722 }, + { 722 }, + { 1000 }, + { 722 }, + { 722 }, + { 667 }, + { 333 }, + { 278 }, + { 333 }, + { 581 }, + { 500 }, + { 333 }, + { 500 }, + { 556 }, + { 444 }, + { 556 }, + { 444 }, + { 333 }, + { 500 }, + { 556 }, + { 278 }, + { 333 }, + { 556 }, + { 278 }, + { 833 }, + { 556 }, + { 500 }, + { 556 }, + { 556 }, + { 444 }, + { 389 }, + { 333 }, + { 556 }, + { 500 }, + { 722 }, + { 500 }, + { 500 }, + { 444 }, + { 394 }, + { 220 }, + { 394 }, + { 520 }, + } }, + { "Times-Italic", { + { 250 }, + { 333 }, + { 420 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 675 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 675 }, + { 675 }, + { 675 }, + { 500 }, + { 920 }, + { 611 }, + { 611 }, + { 667 }, + { 722 }, + { 611 }, + { 611 }, + { 722 }, + { 722 }, + { 333 }, + { 444 }, + { 667 }, + { 556 }, + { 833 }, + { 667 }, + { 722 }, + { 611 }, + { 722 }, + { 611 }, + { 500 }, + { 556 }, + { 722 }, + { 611 }, + { 833 }, + { 611 }, + { 556 }, + { 556 }, + { 389 }, + { 278 }, + { 389 }, + { 422 }, + { 500 }, + { 333 }, + { 500 }, + { 500 }, + { 444 }, + { 500 }, + { 444 }, + { 278 }, + { 500 }, + { 500 }, + { 278 }, + { 278 }, + { 444 }, + { 278 }, + { 722 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 389 }, + { 389 }, + { 278 }, + { 500 }, + { 444 }, + { 667 }, + { 444 }, + { 444 }, + { 389 }, + { 400 }, + { 275 }, + { 400 }, + { 541 }, + } }, + { "Times-BoldItalic", { + { 250 }, + { 389 }, + { 555 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 570 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 570 }, + { 570 }, + { 570 }, + { 500 }, + { 832 }, + { 667 }, + { 667 }, + { 667 }, + { 722 }, + { 667 }, + { 667 }, + { 722 }, + { 778 }, + { 389 }, + { 500 }, + { 667 }, + { 611 }, + { 889 }, + { 722 }, + { 722 }, + { 611 }, + { 722 }, + { 667 }, + { 556 }, + { 611 }, + { 722 }, + { 667 }, + { 889 }, + { 667 }, + { 611 }, + { 611 }, + { 333 }, + { 278 }, + { 333 }, + { 570 }, + { 500 }, + { 333 }, + { 500 }, + { 500 }, + { 444 }, + { 500 }, + { 444 }, + { 333 }, + { 500 }, + { 556 }, + { 278 }, + { 278 }, + { 500 }, + { 278 }, + { 778 }, + { 556 }, + { 500 }, + { 500 }, + { 500 }, + { 389 }, + { 389 }, + { 278 }, + { 556 }, + { 444 }, + { 667 }, + { 500 }, + { 444 }, + { 389 }, + { 348 }, + { 220 }, + { 348 }, + { 570 }, + } }, +}; + +void * +pdf_alloc(const struct manoutput *outopts) +{ + return pspdf_alloc(outopts, TERMTYPE_PDF); +} + +void * +ps_alloc(const struct manoutput *outopts) +{ + return pspdf_alloc(outopts, TERMTYPE_PS); +} + +static struct termp * +pspdf_alloc(const struct manoutput *outopts, enum termtype type) +{ + struct termp *p; + unsigned int pagex, pagey; + size_t marginx, marginy, lineheight; + const char *pp; + + p = mandoc_calloc(1, sizeof(*p)); + p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol)); + p->maxtcol = 1; + p->type = type; + + p->enc = TERMENC_ASCII; + p->fontq = mandoc_reallocarray(NULL, + (p->fontsz = 8), sizeof(*p->fontq)); + p->fontq[0] = p->fontl = TERMFONT_NONE; + p->ps = mandoc_calloc(1, sizeof(*p->ps)); + + p->advance = ps_advance; + p->begin = ps_begin; + p->end = ps_end; + p->endline = ps_endline; + p->hspan = ps_hspan; + p->letter = ps_letter; + p->setwidth = ps_setwidth; + p->width = ps_width; + + /* Default to US letter (millimetres). */ + + p->ps->medianame = "Letter"; + pagex = 216; + pagey = 279; + + /* + * The ISO-269 paper sizes can be calculated automatically, but + * it would require bringing in -lm for pow() and I'd rather not + * do that. So just do it the easy way for now. Since this + * only happens once, I'm not terribly concerned. + */ + + pp = outopts->paper; + if (pp != NULL && strcasecmp(pp, "letter") != 0) { + if (strcasecmp(pp, "a3") == 0) { + p->ps->medianame = "A3"; + pagex = 297; + pagey = 420; + } else if (strcasecmp(pp, "a4") == 0) { + p->ps->medianame = "A4"; + pagex = 210; + pagey = 297; + } else if (strcasecmp(pp, "a5") == 0) { + p->ps->medianame = "A5"; + pagex = 148; + pagey = 210; + } else if (strcasecmp(pp, "legal") == 0) { + p->ps->medianame = "Legal"; + pagex = 216; + pagey = 356; + } else if (sscanf(pp, "%ux%u", &pagex, &pagey) == 2) + p->ps->medianame = "CustomSize"; + else + warnx("%s: Unknown paper", pp); + } + + /* + * This MUST be defined before any PNT2AFM or AFM2PNT + * calculations occur. + */ + + p->ps->scale = 11; + + /* Remember millimetres -> AFM units. */ + + pagex = PNT2AFM(p, ((double)pagex * 72.0 / 25.4)); + pagey = PNT2AFM(p, ((double)pagey * 72.0 / 25.4)); + + /* Margins are 1/9 the page x and y. */ + + marginx = (size_t)((double)pagex / 9.0); + marginy = (size_t)((double)pagey / 9.0); + + /* Line-height is 1.4em. */ + + lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4)); + + p->ps->width = p->ps->lastwidth = (size_t)pagex; + p->ps->height = (size_t)pagey; + p->ps->header = pagey - (marginy / 2) - (lineheight / 2); + p->ps->top = pagey - marginy; + p->ps->footer = (marginy / 2) - (lineheight / 2); + p->ps->bottom = marginy; + p->ps->left = marginx; + p->ps->lineheight = lineheight; + + p->defrmargin = pagex - (marginx * 2); + return p; +} + +static void +ps_setwidth(struct termp *p, int iop, int width) +{ + size_t lastwidth; + + lastwidth = p->ps->width; + if (iop > 0) + p->ps->width += width; + else if (iop == 0) + p->ps->width = width ? (size_t)width : p->ps->lastwidth; + else if (p->ps->width > (size_t)width) + p->ps->width -= width; + else + p->ps->width = 0; + p->ps->lastwidth = lastwidth; +} + +void +pspdf_free(void *arg) +{ + struct termp *p; + + p = (struct termp *)arg; + + free(p->ps->psmarg); + free(p->ps->pdfobjs); + + free(p->ps); + term_free(p); +} + +static void +ps_printf(struct termp *p, const char *fmt, ...) +{ + va_list ap; + int pos, len; + + va_start(ap, fmt); + + /* + * If we're running in regular mode, then pipe directly into + * vprintf(). If we're processing margins, then push the data + * into our growable margin buffer. + */ + + if ( ! (PS_MARGINS & p->ps->flags)) { + len = vprintf(fmt, ap); + va_end(ap); + p->ps->pdfbytes += len < 0 ? 0 : (size_t)len; + return; + } + + /* + * XXX: I assume that the in-margin print won't exceed + * PS_BUFSLOP (128 bytes), which is reasonable but still an + * assumption that will cause pukeage if it's not the case. + */ + + ps_growbuf(p, PS_BUFSLOP); + + pos = (int)p->ps->psmargcur; + vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap); + + va_end(ap); + + p->ps->psmargcur = strlen(p->ps->psmarg); +} + +static void +ps_putchar(struct termp *p, char c) +{ + int pos; + + /* See ps_printf(). */ + + if ( ! (PS_MARGINS & p->ps->flags)) { + putchar(c); + p->ps->pdfbytes++; + return; + } + + ps_growbuf(p, 2); + + pos = (int)p->ps->psmargcur++; + p->ps->psmarg[pos++] = c; + p->ps->psmarg[pos] = '\0'; +} + +static void +pdf_obj(struct termp *p, size_t obj) +{ + + assert(obj > 0); + + if ((obj - 1) >= p->ps->pdfobjsz) { + p->ps->pdfobjsz = obj + 128; + p->ps->pdfobjs = mandoc_reallocarray(p->ps->pdfobjs, + p->ps->pdfobjsz, sizeof(size_t)); + } + + p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes; + ps_printf(p, "%zu 0 obj\n", obj); +} + +static void +ps_closepage(struct termp *p) +{ + int i; + size_t len, base; + + /* + * Close out a page that we've already flushed to output. In + * PostScript, we simply note that the page must be shown. In + * PDF, we must now create the Length, Resource, and Page node + * for the page contents. + */ + + assert(p->ps->psmarg && p->ps->psmarg[0]); + ps_printf(p, "%s", p->ps->psmarg); + + if (TERMTYPE_PS != p->type) { + len = p->ps->pdfbytes - p->ps->pdflastpg; + base = p->ps->pages * 4 + p->ps->pdfbody; + + ps_printf(p, "endstream\nendobj\n"); + + /* Length of content. */ + pdf_obj(p, base + 1); + ps_printf(p, "%zu\nendobj\n", len); + + /* Resource for content. */ + pdf_obj(p, base + 2); + ps_printf(p, "<<\n/ProcSet [/PDF /Text]\n"); + ps_printf(p, "/Font <<\n"); + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, "/F%d %d 0 R\n", i, 3 + i); + ps_printf(p, ">>\n>>\nendobj\n"); + + /* Page node. */ + pdf_obj(p, base + 3); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Page\n"); + ps_printf(p, "/Parent 2 0 R\n"); + ps_printf(p, "/Resources %zu 0 R\n", base + 2); + ps_printf(p, "/Contents %zu 0 R\n", base); + ps_printf(p, ">>\nendobj\n"); + } else + ps_printf(p, "showpage\n"); + + p->ps->pages++; + p->ps->psrow = p->ps->top; + assert( ! (PS_NEWPAGE & p->ps->flags)); + p->ps->flags |= PS_NEWPAGE; +} + +static void +ps_end(struct termp *p) +{ + size_t i, xref, base; + + ps_plast(p); + ps_pclose(p); + + /* + * At the end of the file, do one last showpage. This is the + * same behaviour as groff(1) and works for multiple pages as + * well as just one. + */ + + if ( ! (PS_NEWPAGE & p->ps->flags)) { + assert(0 == p->ps->flags); + assert('\0' == p->ps->last); + ps_closepage(p); + } + + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%%%Trailer\n"); + ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages); + ps_printf(p, "%%%%EOF\n"); + return; + } + + pdf_obj(p, 2); + ps_printf(p, "<<\n/Type /Pages\n"); + ps_printf(p, "/MediaBox [0 0 %zu %zu]\n", + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); + + ps_printf(p, "/Count %zu\n", p->ps->pages); + ps_printf(p, "/Kids ["); + + for (i = 0; i < p->ps->pages; i++) + ps_printf(p, " %zu 0 R", i * 4 + p->ps->pdfbody + 3); + + base = (p->ps->pages - 1) * 4 + p->ps->pdfbody + 4; + + ps_printf(p, "]\n>>\nendobj\n"); + pdf_obj(p, base); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Catalog\n"); + ps_printf(p, "/Pages 2 0 R\n"); + ps_printf(p, ">>\nendobj\n"); + xref = p->ps->pdfbytes; + ps_printf(p, "xref\n"); + ps_printf(p, "0 %zu\n", base + 1); + ps_printf(p, "0000000000 65535 f \n"); + + for (i = 0; i < base; i++) + ps_printf(p, "%.10zu 00000 n \n", + p->ps->pdfobjs[(int)i]); + + ps_printf(p, "trailer\n"); + ps_printf(p, "<<\n"); + ps_printf(p, "/Size %zu\n", base + 1); + ps_printf(p, "/Root %zu 0 R\n", base); + ps_printf(p, "/Info 1 0 R\n"); + ps_printf(p, ">>\n"); + ps_printf(p, "startxref\n"); + ps_printf(p, "%zu\n", xref); + ps_printf(p, "%%%%EOF\n"); +} + +static void +ps_begin(struct termp *p) +{ + size_t width, height; + int i; + + /* + * Print margins into margin buffer. Nothing gets output to the + * screen yet, so we don't need to initialise the primary state. + */ + + if (p->ps->psmarg) { + assert(p->ps->psmargsz); + p->ps->psmarg[0] = '\0'; + } + + /*p->ps->pdfbytes = 0;*/ + p->ps->psmargcur = 0; + p->ps->flags = PS_MARGINS; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->header; + p->ps->lastrow = 0; /* impossible row */ + + ps_setfont(p, TERMFONT_NONE); + + (*p->headf)(p, p->argf); + (*p->endline)(p); + + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->footer; + + (*p->footf)(p, p->argf); + (*p->endline)(p); + + p->ps->flags &= ~PS_MARGINS; + + assert(0 == p->ps->flags); + assert(p->ps->psmarg); + assert('\0' != p->ps->psmarg[0]); + + /* + * Print header and initialise page state. Following this, + * stuff gets printed to the screen, so make sure we're sane. + */ + + if (TERMTYPE_PS == p->type) { + width = AFM2PNT(p, p->ps->width); + height = AFM2PNT(p, p->ps->height); + + ps_printf(p, "%%!PS-Adobe-3.0\n"); + ps_printf(p, "%%%%DocumentData: Clean7Bit\n"); + ps_printf(p, "%%%%Orientation: Portrait\n"); + ps_printf(p, "%%%%Pages: (atend)\n"); + ps_printf(p, "%%%%PageOrder: Ascend\n"); + ps_printf(p, "%%%%DocumentMedia: man-%s %zu %zu 0 () ()\n", + p->ps->medianame, width, height); + ps_printf(p, "%%%%DocumentNeededResources: font"); + + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, " %s", fonts[i].name); + + ps_printf(p, "\n%%%%DocumentSuppliedResources: " + "procset MandocProcs 1.0 0\n"); + ps_printf(p, "%%%%EndComments\n"); + ps_printf(p, "%%%%BeginProlog\n"); + ps_printf(p, "%%%%BeginResource: procset MandocProcs " + "10170 10170\n"); + /* The font size is effectively hard-coded for now. */ + ps_printf(p, "/fs %zu def\n", p->ps->scale); + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, "/f%d { /%s fs selectfont } def\n", + i, fonts[i].name); + ps_printf(p, "/s { 3 1 roll moveto show } bind def\n"); + ps_printf(p, "/c { exch currentpoint exch pop " + "moveto show } bind def\n"); + ps_printf(p, "%%%%EndResource\n"); + ps_printf(p, "%%%%EndProlog\n"); + ps_printf(p, "%%%%BeginSetup\n"); + ps_printf(p, "%%%%BeginFeature: *PageSize %s\n", + p->ps->medianame); + ps_printf(p, "<</PageSize [%zu %zu]>>setpagedevice\n", + width, height); + ps_printf(p, "%%%%EndFeature\n"); + ps_printf(p, "%%%%EndSetup\n"); + } else { + ps_printf(p, "%%PDF-1.1\n"); + pdf_obj(p, 1); + ps_printf(p, "<<\n"); + ps_printf(p, ">>\n"); + ps_printf(p, "endobj\n"); + + for (i = 0; i < (int)TERMFONT__MAX; i++) { + pdf_obj(p, (size_t)i + 3); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Font\n"); + ps_printf(p, "/Subtype /Type1\n"); + ps_printf(p, "/Name /F%d\n", i); + ps_printf(p, "/BaseFont /%s\n", fonts[i].name); + ps_printf(p, ">>\nendobj\n"); + } + } + + p->ps->pdfbody = (size_t)TERMFONT__MAX + 3; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->top; + p->ps->flags |= PS_NEWPAGE; + ps_setfont(p, TERMFONT_NONE); +} + +static void +ps_pletter(struct termp *p, int c) +{ + int f; + + /* + * If we haven't opened a page context, then output that we're + * in a new page and make sure the font is correctly set. + */ + + if (PS_NEWPAGE & p->ps->flags) { + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%%%Page: %zu %zu\n", + p->ps->pages + 1, p->ps->pages + 1); + ps_printf(p, "f%d\n", (int)p->ps->lastf); + } else { + pdf_obj(p, p->ps->pdfbody + + p->ps->pages * 4); + ps_printf(p, "<<\n"); + ps_printf(p, "/Length %zu 0 R\n", + p->ps->pdfbody + 1 + p->ps->pages * 4); + ps_printf(p, ">>\nstream\n"); + } + p->ps->pdflastpg = p->ps->pdfbytes; + p->ps->flags &= ~PS_NEWPAGE; + } + + /* + * If we're not in a PostScript "word" context, then open one + * now at the current cursor. + */ + + if ( ! (PS_INLINE & p->ps->flags)) { + if (TERMTYPE_PS != p->type) { + ps_printf(p, "BT\n/F%d %zu Tf\n", + (int)p->ps->lastf, p->ps->scale); + ps_printf(p, "%.3f %.3f Td\n(", + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + } else { + ps_printf(p, "%.3f", AFM2PNT(p, p->ps->pscol)); + if (p->ps->psrow != p->ps->lastrow) + ps_printf(p, " %.3f", + AFM2PNT(p, p->ps->psrow)); + ps_printf(p, "("); + } + p->ps->flags |= PS_INLINE; + } + + assert( ! (PS_NEWPAGE & p->ps->flags)); + + /* + * We need to escape these characters as per the PostScript + * specification. We would also escape non-graphable characters + * (like tabs), but none of them would get to this point and + * it's superfluous to abort() on them. + */ + + switch (c) { + case '(': + case ')': + case '\\': + ps_putchar(p, '\\'); + break; + default: + break; + } + + /* Write the character and adjust where we are on the page. */ + + f = (int)p->ps->lastf; + + if (c <= 32 || c - 32 >= MAXCHAR) + c = 32; + + ps_putchar(p, (char)c); + c -= 32; + p->ps->pscol += (size_t)fonts[f].gly[c].wx; +} + +static void +ps_pclose(struct termp *p) +{ + + /* + * Spit out that we're exiting a word context (this is a + * "partial close" because we don't check the last-char buffer + * or anything). + */ + + if ( ! (PS_INLINE & p->ps->flags)) + return; + + if (TERMTYPE_PS != p->type) + ps_printf(p, ") Tj\nET\n"); + else if (p->ps->psrow == p->ps->lastrow) + ps_printf(p, ")c\n"); + else { + ps_printf(p, ")s\n"); + p->ps->lastrow = p->ps->psrow; + } + + p->ps->flags &= ~PS_INLINE; +} + +/* If we have a `last' char that wasn't printed yet, print it now. */ +static void +ps_plast(struct termp *p) +{ + size_t wx; + + if (p->ps->last == '\0') + return; + + /* Check the font mode; open a new scope if it doesn't match. */ + + if (p->ps->nextf != p->ps->lastf) { + ps_pclose(p); + ps_setfont(p, p->ps->nextf); + } + p->ps->nextf = TERMFONT_NONE; + + /* + * For an overstrike, if a previous character + * was wider, advance to center the new one. + */ + + if (p->ps->pscolnext) { + wx = fonts[p->ps->lastf].gly[(int)p->ps->last-32].wx; + if (p->ps->pscol + wx < p->ps->pscolnext) + p->ps->pscol = (p->ps->pscol + + p->ps->pscolnext - wx) / 2; + } + + ps_pletter(p, p->ps->last); + p->ps->last = '\0'; + + /* + * For an overstrike, if a previous character + * was wider, advance to the end of the old one. + */ + + if (p->ps->pscol < p->ps->pscolnext) { + ps_pclose(p); + p->ps->pscol = p->ps->pscolnext; + } +} + +static void +ps_letter(struct termp *p, int arg) +{ + size_t savecol; + char c; + + c = arg >= 128 || arg <= 0 ? '?' : arg; + + /* + * When receiving a backspace, merely flag it. + * We don't know yet whether it is + * a font instruction or an overstrike. + */ + + if (c == '\b') { + assert(p->ps->last != '\0'); + assert( ! (p->ps->flags & PS_BACKSP)); + p->ps->flags |= PS_BACKSP; + return; + } + + /* + * Decode font instructions. + */ + + if (p->ps->flags & PS_BACKSP) { + if (p->ps->last == '_') { + switch (p->ps->nextf) { + case TERMFONT_BI: + break; + case TERMFONT_BOLD: + p->ps->nextf = TERMFONT_BI; + break; + default: + p->ps->nextf = TERMFONT_UNDER; + } + p->ps->last = c; + p->ps->flags &= ~PS_BACKSP; + return; + } + if (p->ps->last == c) { + switch (p->ps->nextf) { + case TERMFONT_BI: + break; + case TERMFONT_UNDER: + p->ps->nextf = TERMFONT_BI; + break; + default: + p->ps->nextf = TERMFONT_BOLD; + } + p->ps->flags &= ~PS_BACKSP; + return; + } + + /* + * This is not a font instruction, but rather + * the next character. Prepare for overstrike. + */ + + savecol = p->ps->pscol; + } else + savecol = SIZE_MAX; + + /* + * We found the next character, so the font instructions + * for the previous one are complete. + * Use them and print it. + */ + + ps_plast(p); + + /* + * Do not print the current character yet because font + * instructions might follow; only remember the character. + * It will get printed later from ps_plast(). + */ + + p->ps->last = c; + + /* + * For an overstrike, back up to the previous position. + * If the previous character is wider than any it overstrikes, + * remember the current position, because it might also be + * wider than all that will overstrike it. + */ + + if (savecol != SIZE_MAX) { + if (p->ps->pscolnext < p->ps->pscol) + p->ps->pscolnext = p->ps->pscol; + ps_pclose(p); + p->ps->pscol = savecol; + p->ps->flags &= ~PS_BACKSP; + } else + p->ps->pscolnext = 0; +} + +static void +ps_advance(struct termp *p, size_t len) +{ + + /* + * Advance some spaces. This can probably be made smarter, + * i.e., to have multiple space-separated words in the same + * scope, but this is easier: just close out the current scope + * and readjust our column settings. + */ + + ps_plast(p); + ps_pclose(p); + p->ps->pscol += len; +} + +static void +ps_endline(struct termp *p) +{ + + /* Close out any scopes we have open: we're at eoln. */ + + ps_plast(p); + ps_pclose(p); + + /* + * If we're in the margin, don't try to recalculate our current + * row. XXX: if the column tries to be fancy with multiple + * lines, we'll do nasty stuff. + */ + + if (PS_MARGINS & p->ps->flags) + return; + + /* Left-justify. */ + + p->ps->pscol = p->ps->left; + + /* If we haven't printed anything, return. */ + + if (PS_NEWPAGE & p->ps->flags) + return; + + /* + * Put us down a line. If we're at the page bottom, spit out a + * showpage and restart our row. + */ + + if (p->ps->psrow >= p->ps->lineheight + p->ps->bottom) { + p->ps->psrow -= p->ps->lineheight; + return; + } + + ps_closepage(p); + + p->tcol->offset -= p->ti; + p->ti = 0; +} + +static void +ps_setfont(struct termp *p, enum termfont f) +{ + + assert(f < TERMFONT__MAX); + p->ps->lastf = f; + + /* + * If we're still at the top of the page, let the font-setting + * be delayed until we actually have stuff to print. + */ + + if (PS_NEWPAGE & p->ps->flags) + return; + + if (TERMTYPE_PS == p->type) + ps_printf(p, "f%d\n", (int)f); + else + ps_printf(p, "/F%d %zu Tf\n", + (int)f, p->ps->scale); +} + +static size_t +ps_width(const struct termp *p, int c) +{ + + if (c <= 32 || c - 32 >= MAXCHAR) + c = 0; + else + c -= 32; + + return (size_t)fonts[(int)TERMFONT_NONE].gly[c].wx; +} + +static int +ps_hspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + /* + * All of these measurements are derived by converting from the + * native measurement to AFM units. + */ + switch (su->unit) { + case SCALE_BU: + /* + * Traditionally, the default unit is fixed to the + * output media. So this would refer to the point. In + * mandoc(1), however, we stick to the default terminal + * scaling unit so that output is the same regardless + * the media. + */ + r = PNT2AFM(p, su->scale * 72.0 / 240.0); + break; + case SCALE_CM: + r = PNT2AFM(p, su->scale * 72.0 / 2.54); + break; + case SCALE_EM: + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[109 - 32].wx; + break; + case SCALE_EN: + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[110 - 32].wx; + break; + case SCALE_IN: + r = PNT2AFM(p, su->scale * 72.0); + break; + case SCALE_MM: + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[109 - 32].wx / 100.0; + break; + case SCALE_PC: + r = PNT2AFM(p, su->scale * 12.0); + break; + case SCALE_PT: + r = PNT2AFM(p, su->scale * 1.0); + break; + case SCALE_VS: + r = su->scale * p->ps->lineheight; + break; + default: + r = su->scale; + break; + } + + return r * 24.0; +} + +static void +ps_growbuf(struct termp *p, size_t sz) +{ + if (p->ps->psmargcur + sz <= p->ps->psmargsz) + return; + + if (sz < PS_BUFSLOP) + sz = PS_BUFSLOP; + + p->ps->psmargsz += sz; + p->ps->psmarg = mandoc_realloc(p->ps->psmarg, p->ps->psmargsz); +} diff --git a/usr.bin/mandoc/term_tab.c b/usr.bin/mandoc/term_tab.c new file mode 100644 index 0000000..0c80c72 --- /dev/null +++ b/usr.bin/mandoc/term_tab.c @@ -0,0 +1,128 @@ +/* $OpenBSD: term_tab.c,v 1.4 2017/06/17 14:55:02 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <stddef.h> + +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" + +struct tablist { + size_t *t; /* Allocated array of tab positions. */ + size_t s; /* Allocated number of positions. */ + size_t n; /* Currently used number of positions. */ +}; + +static struct { + struct tablist a; /* All tab positions for lookup. */ + struct tablist p; /* Periodic tab positions to add. */ + size_t d; /* Default tab width in units of n. */ +} tabs; + + +void +term_tab_set(const struct termp *p, const char *arg) +{ + static int recording_period; + + struct roffsu su; + struct tablist *tl; + size_t pos; + int add; + + /* Special arguments: clear all tabs or switch lists. */ + + if (arg == NULL) { + tabs.a.n = tabs.p.n = 0; + recording_period = 0; + if (tabs.d == 0) { + a2roffsu(".8i", &su, SCALE_IN); + tabs.d = term_hen(p, &su); + } + return; + } + if (arg[0] == 'T' && arg[1] == '\0') { + recording_period = 1; + return; + } + + /* Parse the sign, the number, and the unit. */ + + if (*arg == '+') { + add = 1; + arg++; + } else + add = 0; + if (a2roffsu(arg, &su, SCALE_EM) == NULL) + return; + + /* Select the list, and extend it if it is full. */ + + tl = recording_period ? &tabs.p : &tabs.a; + if (tl->n >= tl->s) { + tl->s += 8; + tl->t = mandoc_reallocarray(tl->t, tl->s, sizeof(*tl->t)); + } + + /* Append the new position. */ + + pos = term_hen(p, &su); + tl->t[tl->n] = pos; + if (add && tl->n) + tl->t[tl->n] += tl->t[tl->n - 1]; + tl->n++; +} + +/* + * Simplified version without a parser, + * never incremental, never periodic, for use by tbl(7). + */ +void +term_tab_iset(size_t inc) +{ + if (tabs.a.n >= tabs.a.s) { + tabs.a.s += 8; + tabs.a.t = mandoc_reallocarray(tabs.a.t, tabs.a.s, + sizeof(*tabs.a.t)); + } + tabs.a.t[tabs.a.n++] = inc; +} + +size_t +term_tab_next(size_t prev) +{ + size_t i, j; + + for (i = 0;; i++) { + if (i == tabs.a.n) { + if (tabs.p.n == 0) + return prev; + tabs.a.n += tabs.p.n; + if (tabs.a.s < tabs.a.n) { + tabs.a.s = tabs.a.n; + tabs.a.t = mandoc_reallocarray(tabs.a.t, + tabs.a.s, sizeof(*tabs.a.t)); + } + for (j = 0; j < tabs.p.n; j++) + tabs.a.t[i + j] = tabs.p.t[j] + + (i ? tabs.a.t[i - 1] : 0); + } + if (prev < tabs.a.t[i]) + return tabs.a.t[i]; + } +} diff --git a/usr.bin/mandoc/term_tag.c b/usr.bin/mandoc/term_tag.c new file mode 100644 index 0000000..1c67dcc --- /dev/null +++ b/usr.bin/mandoc/term_tag.c @@ -0,0 +1,199 @@ +/* $OpenBSD: term_tag.c,v 1.4 2020/04/18 20:28:46 schwarze Exp $ */ +/* + * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Functions to write a ctags(1) file. + * For use by the mandoc(1) ASCII and UTF-8 formatters only. + */ +#include <sys/types.h> + +#include <errno.h> +#include <signal.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc.h" +#include "roff.h" +#include "roff_int.h" +#include "tag.h" +#include "term_tag.h" + +static void tag_signal(int) __attribute__((__noreturn__)); + +static struct tag_files tag_files; + + +/* + * Prepare for using a pager. + * Not all pagers are capable of using a tag file, + * but for simplicity, create it anyway. + */ +struct tag_files * +term_tag_init(void) +{ + struct sigaction sa; + int ofd; /* In /tmp/, dup(2)ed to stdout. */ + int tfd; + + ofd = tfd = -1; + tag_files.tfs = NULL; + tag_files.tcpgid = -1; + + /* Clean up when dying from a signal. */ + + memset(&sa, 0, sizeof(sa)); + sigfillset(&sa.sa_mask); + sa.sa_handler = tag_signal; + sigaction(SIGHUP, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + sigaction(SIGTERM, &sa, NULL); + + /* + * POSIX requires that a process calling tcsetpgrp(3) + * from the background gets a SIGTTOU signal. + * In that case, do not stop. + */ + + sa.sa_handler = SIG_IGN; + sigaction(SIGTTOU, &sa, NULL); + + /* Save the original standard output for use by the pager. */ + + if ((tag_files.ofd = dup(STDOUT_FILENO)) == -1) { + mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); + goto fail; + } + + /* Create both temporary output files. */ + + (void)strlcpy(tag_files.ofn, "/tmp/man.XXXXXXXXXX", + sizeof(tag_files.ofn)); + (void)strlcpy(tag_files.tfn, "/tmp/man.XXXXXXXXXX", + sizeof(tag_files.tfn)); + if ((ofd = mkstemp(tag_files.ofn)) == -1) { + mandoc_msg(MANDOCERR_MKSTEMP, 0, 0, + "%s: %s", tag_files.ofn, strerror(errno)); + goto fail; + } + if ((tfd = mkstemp(tag_files.tfn)) == -1) { + mandoc_msg(MANDOCERR_MKSTEMP, 0, 0, + "%s: %s", tag_files.tfn, strerror(errno)); + goto fail; + } + if ((tag_files.tfs = fdopen(tfd, "w")) == NULL) { + mandoc_msg(MANDOCERR_FDOPEN, 0, 0, "%s", strerror(errno)); + goto fail; + } + tfd = -1; + if (dup2(ofd, STDOUT_FILENO) == -1) { + mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); + goto fail; + } + close(ofd); + return &tag_files; + +fail: + term_tag_unlink(); + if (ofd != -1) + close(ofd); + if (tfd != -1) + close(tfd); + if (tag_files.ofd != -1) { + close(tag_files.ofd); + tag_files.ofd = -1; + } + return NULL; +} + +void +term_tag_write(struct roff_node *n, size_t line) +{ + const char *cp; + int len; + + if (tag_files.tfs == NULL) + return; + cp = n->tag == NULL ? n->child->string : n->tag; + if (cp[0] == '\\' && (cp[1] == '&' || cp[1] == 'e')) + cp += 2; + len = strcspn(cp, " \t\\"); + fprintf(tag_files.tfs, "%.*s %s %zu\n", + len, cp, tag_files.ofn, line); +} + +/* + * Close both output files and restore the original standard output + * to the terminal. In the unlikely case that the latter fails, + * trying to start a pager would be useless, so report the failure + * to the main program. + */ +int +term_tag_close(void) +{ + int irc = 0; + + if (tag_files.tfs != NULL) { + fclose(tag_files.tfs); + tag_files.tfs = NULL; + } + if (tag_files.ofd != -1) { + fflush(stdout); + if ((irc = dup2(tag_files.ofd, STDOUT_FILENO)) == -1) + mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); + close(tag_files.ofd); + tag_files.ofd = -1; + } + return irc; +} + +void +term_tag_unlink(void) +{ + pid_t tc_pgid; + + if (tag_files.tcpgid != -1) { + tc_pgid = tcgetpgrp(STDOUT_FILENO); + if (tc_pgid == tag_files.pager_pid || + tc_pgid == getpgid(0) || + getpgid(tc_pgid) == -1) + (void)tcsetpgrp(STDOUT_FILENO, tag_files.tcpgid); + } + if (*tag_files.ofn != '\0') { + unlink(tag_files.ofn); + *tag_files.ofn = '\0'; + } + if (*tag_files.tfn != '\0') { + unlink(tag_files.tfn); + *tag_files.tfn = '\0'; + } +} + +static void +tag_signal(int signum) +{ + struct sigaction sa; + + term_tag_unlink(); + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + sigaction(signum, &sa, NULL); + kill(getpid(), signum); + /* NOTREACHED */ + _exit(1); +} diff --git a/usr.bin/mandoc/term_tag.h b/usr.bin/mandoc/term_tag.h new file mode 100644 index 0000000..62be2d3 --- /dev/null +++ b/usr.bin/mandoc/term_tag.h @@ -0,0 +1,34 @@ +/* $OpenBSD: term_tag.h,v 1.2 2020/04/02 22:10:27 schwarze Exp $ */ +/* + * Copyright (c) 2015, 2018, 2019, 2020 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Internal interfaces to write a ctags(1) file. + * For use by the mandoc(1) ASCII and UTF-8 formatters only. + */ + +struct tag_files { + char ofn[20]; /* Output file name. */ + char tfn[20]; /* Tag file name. */ + FILE *tfs; /* Tag file object. */ + int ofd; /* Original output file descriptor. */ + pid_t tcpgid; /* Process group controlling the terminal. */ + pid_t pager_pid; /* Process ID of the pager. */ +}; + + +struct tag_files *term_tag_init(void); +void term_tag_write(struct roff_node *, size_t); +int term_tag_close(void); +void term_tag_unlink(void); diff --git a/usr.bin/mandoc/tree.c b/usr.bin/mandoc/tree.c new file mode 100644 index 0000000..0125ebd --- /dev/null +++ b/usr.bin/mandoc/tree.c @@ -0,0 +1,514 @@ +/* $OpenBSD: tree.c,v 1.56 2020/04/08 11:54:14 schwarze Exp $ */ +/* + * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2008, 2009, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Formatting module to let mandoc(1) show + * a human readable representation of the syntax tree. + */ +#include <sys/types.h> + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "tbl.h" +#include "eqn.h" +#include "main.h" + +static void print_attr(const struct roff_node *); +static void print_box(const struct eqn_box *, int); +static void print_cellt(enum tbl_cellt); +static void print_man(const struct roff_node *, int); +static void print_meta(const struct roff_meta *); +static void print_mdoc(const struct roff_node *, int); +static void print_span(const struct tbl_span *, int); + + +void +tree_mdoc(void *arg, const struct roff_meta *mdoc) +{ + print_meta(mdoc); + putchar('\n'); + print_mdoc(mdoc->first->child, 0); +} + +void +tree_man(void *arg, const struct roff_meta *man) +{ + print_meta(man); + if (man->hasbody == 0) + puts("body = empty"); + putchar('\n'); + print_man(man->first->child, 0); +} + +static void +print_meta(const struct roff_meta *meta) +{ + if (meta->title != NULL) + printf("title = \"%s\"\n", meta->title); + if (meta->name != NULL) + printf("name = \"%s\"\n", meta->name); + if (meta->msec != NULL) + printf("sec = \"%s\"\n", meta->msec); + if (meta->vol != NULL) + printf("vol = \"%s\"\n", meta->vol); + if (meta->arch != NULL) + printf("arch = \"%s\"\n", meta->arch); + if (meta->os != NULL) + printf("os = \"%s\"\n", meta->os); + if (meta->date != NULL) + printf("date = \"%s\"\n", meta->date); +} + +static void +print_mdoc(const struct roff_node *n, int indent) +{ + const char *p, *t; + int i, j; + size_t argc; + struct mdoc_argv *argv; + + if (n == NULL) + return; + + argv = NULL; + argc = 0; + t = p = NULL; + + switch (n->type) { + case ROFFT_ROOT: + t = "root"; + break; + case ROFFT_BLOCK: + t = "block"; + break; + case ROFFT_HEAD: + t = "head"; + break; + case ROFFT_BODY: + if (n->end) + t = "body-end"; + else + t = "body"; + break; + case ROFFT_TAIL: + t = "tail"; + break; + case ROFFT_ELEM: + t = "elem"; + break; + case ROFFT_TEXT: + t = "text"; + break; + case ROFFT_COMMENT: + t = "comment"; + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + t = "eqn"; + break; + default: + abort(); + } + + switch (n->type) { + case ROFFT_TEXT: + case ROFFT_COMMENT: + p = n->string; + break; + case ROFFT_BODY: + p = roff_name[n->tok]; + break; + case ROFFT_HEAD: + p = roff_name[n->tok]; + break; + case ROFFT_TAIL: + p = roff_name[n->tok]; + break; + case ROFFT_ELEM: + p = roff_name[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case ROFFT_BLOCK: + p = roff_name[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + p = "EQ"; + break; + case ROFFT_ROOT: + p = "root"; + break; + default: + abort(); + } + + if (n->span) { + assert(NULL == p && NULL == t); + print_span(n->span, indent); + } else { + for (i = 0; i < indent; i++) + putchar(' '); + + printf("%s (%s)", p, t); + + for (i = 0; i < (int)argc; i++) { + printf(" -%s", mdoc_argnames[argv[i].arg]); + if (argv[i].sz > 0) + printf(" ["); + for (j = 0; j < (int)argv[i].sz; j++) + printf(" [%s]", argv[i].value[j]); + if (argv[i].sz > 0) + printf(" ]"); + } + print_attr(n); + } + if (n->eqn) + print_box(n->eqn->first, indent + 4); + if (n->child) + print_mdoc(n->child, indent + + (n->type == ROFFT_BLOCK ? 2 : 4)); + if (n->next) + print_mdoc(n->next, indent); +} + +static void +print_man(const struct roff_node *n, int indent) +{ + const char *p, *t; + int i; + + if (n == NULL) + return; + + t = p = NULL; + + switch (n->type) { + case ROFFT_ROOT: + t = "root"; + break; + case ROFFT_ELEM: + t = "elem"; + break; + case ROFFT_TEXT: + t = "text"; + break; + case ROFFT_COMMENT: + t = "comment"; + break; + case ROFFT_BLOCK: + t = "block"; + break; + case ROFFT_HEAD: + t = "head"; + break; + case ROFFT_BODY: + t = "body"; + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + t = "eqn"; + break; + default: + abort(); + } + + switch (n->type) { + case ROFFT_TEXT: + case ROFFT_COMMENT: + p = n->string; + break; + case ROFFT_ELEM: + case ROFFT_BLOCK: + case ROFFT_HEAD: + case ROFFT_BODY: + p = roff_name[n->tok]; + break; + case ROFFT_ROOT: + p = "root"; + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + p = "EQ"; + break; + default: + abort(); + } + + if (n->span) { + assert(NULL == p && NULL == t); + print_span(n->span, indent); + } else { + for (i = 0; i < indent; i++) + putchar(' '); + printf("%s (%s)", p, t); + print_attr(n); + } + if (n->eqn) + print_box(n->eqn->first, indent + 4); + if (n->child) + print_man(n->child, indent + + (n->type == ROFFT_BLOCK ? 2 : 4)); + if (n->next) + print_man(n->next, indent); +} + +static void +print_attr(const struct roff_node *n) +{ + putchar(' '); + if (n->flags & NODE_DELIMO) + putchar('('); + if (n->flags & NODE_LINE) + putchar('*'); + printf("%d:%d", n->line, n->pos + 1); + if (n->flags & NODE_DELIMC) + putchar(')'); + if (n->flags & NODE_EOS) + putchar('.'); + if (n->flags & NODE_ID) { + printf(" ID"); + if (n->flags & NODE_HREF) + printf("=HREF"); + } else if (n->flags & NODE_HREF) + printf(" HREF"); + else if (n->tag != NULL) + printf(" STRAYTAG"); + if (n->tag != NULL) + printf("=%s", n->tag); + if (n->flags & NODE_BROKEN) + printf(" BROKEN"); + if (n->flags & NODE_NOFILL) + printf(" NOFILL"); + if (n->flags & NODE_NOSRC) + printf(" NOSRC"); + if (n->flags & NODE_NOPRT) + printf(" NOPRT"); + putchar('\n'); +} + +static void +print_box(const struct eqn_box *ep, int indent) +{ + int i; + const char *t; + + static const char *posnames[] = { + NULL, "sup", "subsup", "sub", + "to", "from", "fromto", + "over", "sqrt", NULL }; + + if (NULL == ep) + return; + for (i = 0; i < indent; i++) + putchar(' '); + + t = NULL; + switch (ep->type) { + case EQN_LIST: + t = "eqn-list"; + break; + case EQN_SUBEXPR: + t = "eqn-expr"; + break; + case EQN_TEXT: + t = "eqn-text"; + break; + case EQN_PILE: + t = "eqn-pile"; + break; + case EQN_MATRIX: + t = "eqn-matrix"; + break; + } + + fputs(t, stdout); + if (ep->pos) + printf(" pos=%s", posnames[ep->pos]); + if (ep->left) + printf(" left=\"%s\"", ep->left); + if (ep->right) + printf(" right=\"%s\"", ep->right); + if (ep->top) + printf(" top=\"%s\"", ep->top); + if (ep->bottom) + printf(" bottom=\"%s\"", ep->bottom); + if (ep->text) + printf(" text=\"%s\"", ep->text); + if (ep->font) + printf(" font=%d", ep->font); + if (ep->size != EQN_DEFSIZE) + printf(" size=%d", ep->size); + if (ep->expectargs != UINT_MAX && ep->expectargs != ep->args) + printf(" badargs=%zu(%zu)", ep->args, ep->expectargs); + else if (ep->args) + printf(" args=%zu", ep->args); + putchar('\n'); + + print_box(ep->first, indent + 4); + print_box(ep->next, indent); +} + +static void +print_cellt(enum tbl_cellt pos) +{ + switch(pos) { + case TBL_CELL_LEFT: + putchar('L'); + break; + case TBL_CELL_LONG: + putchar('a'); + break; + case TBL_CELL_CENTRE: + putchar('c'); + break; + case TBL_CELL_RIGHT: + putchar('r'); + break; + case TBL_CELL_NUMBER: + putchar('n'); + break; + case TBL_CELL_SPAN: + putchar('s'); + break; + case TBL_CELL_DOWN: + putchar('^'); + break; + case TBL_CELL_HORIZ: + putchar('-'); + break; + case TBL_CELL_DHORIZ: + putchar('='); + break; + case TBL_CELL_MAX: + putchar('#'); + break; + } +} + +static void +print_span(const struct tbl_span *sp, int indent) +{ + const struct tbl_dat *dp; + const struct tbl_cell *cp; + int i; + + if (sp->prev == NULL) { + for (i = 0; i < indent; i++) + putchar(' '); + printf("%d", sp->opts->cols); + if (sp->opts->opts & TBL_OPT_CENTRE) + fputs(" center", stdout); + if (sp->opts->opts & TBL_OPT_EXPAND) + fputs(" expand", stdout); + if (sp->opts->opts & TBL_OPT_ALLBOX) + fputs(" allbox", stdout); + if (sp->opts->opts & TBL_OPT_BOX) + fputs(" box", stdout); + if (sp->opts->opts & TBL_OPT_DBOX) + fputs(" doublebox", stdout); + if (sp->opts->opts & TBL_OPT_NOKEEP) + fputs(" nokeep", stdout); + if (sp->opts->opts & TBL_OPT_NOSPACE) + fputs(" nospaces", stdout); + if (sp->opts->opts & TBL_OPT_NOWARN) + fputs(" nowarn", stdout); + printf(" (tbl options) %d:1\n", sp->line); + } + + for (i = 0; i < indent; i++) + putchar(' '); + + switch (sp->pos) { + case TBL_SPAN_HORIZ: + putchar('-'); + putchar(' '); + break; + case TBL_SPAN_DHORIZ: + putchar('='); + putchar(' '); + break; + default: + for (cp = sp->layout->first; cp != NULL; cp = cp->next) + print_cellt(cp->pos); + putchar(' '); + for (dp = sp->first; dp; dp = dp->next) { + if ((cp = dp->layout) == NULL) + putchar('*'); + else { + printf("%d", cp->col); + print_cellt(dp->layout->pos); + if (cp->flags & TBL_CELL_BOLD) + putchar('b'); + if (cp->flags & TBL_CELL_ITALIC) + putchar('i'); + if (cp->flags & TBL_CELL_TALIGN) + putchar('t'); + if (cp->flags & TBL_CELL_UP) + putchar('u'); + if (cp->flags & TBL_CELL_BALIGN) + putchar('d'); + if (cp->flags & TBL_CELL_WIGN) + putchar('z'); + if (cp->flags & TBL_CELL_EQUAL) + putchar('e'); + if (cp->flags & TBL_CELL_WMAX) + putchar('x'); + } + switch (dp->pos) { + case TBL_DATA_HORIZ: + case TBL_DATA_NHORIZ: + putchar('-'); + break; + case TBL_DATA_DHORIZ: + case TBL_DATA_NDHORIZ: + putchar('='); + break; + default: + putchar(dp->block ? '{' : '['); + if (dp->string != NULL) + fputs(dp->string, stdout); + putchar(dp->block ? '}' : ']'); + break; + } + if (dp->hspans) + printf(">%d", dp->hspans); + if (dp->vspans) + printf("v%d", dp->vspans); + putchar(' '); + } + break; + } + printf("(tbl) %d:1\n", sp->line); +} diff --git a/usr.bin/nc/CVS/Entries b/usr.bin/nc/CVS/Entries new file mode 100644 index 0000000..86fb43f --- /dev/null +++ b/usr.bin/nc/CVS/Entries @@ -0,0 +1,7 @@ +/Makefile/1.7/Fri Sep 11 21:07:01 2015// +/atomicio.c/1.11/Tue Dec 4 02:24:47 2012// +/atomicio.h/1.2/Fri Sep 7 14:50:44 2007// +/nc.1/1.95/Wed Feb 12 14:46:36 2020// +/netcat.c/1.217/Wed Feb 12 14:46:36 2020// +/socks.c/1.30/Mon Nov 4 17:33:28 2019// +D diff --git a/usr.bin/nc/CVS/Repository b/usr.bin/nc/CVS/Repository new file mode 100644 index 0000000..5a220f1 --- /dev/null +++ b/usr.bin/nc/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/nc diff --git a/usr.bin/nc/CVS/Root b/usr.bin/nc/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/nc/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/nc/Makefile b/usr.bin/nc/Makefile new file mode 100644 index 0000000..5f20c40 --- /dev/null +++ b/usr.bin/nc/Makefile @@ -0,0 +1,8 @@ +# $OpenBSD: Makefile,v 1.7 2015/09/11 21:07:01 beck Exp $ + +PROG= nc +SRCS= netcat.c atomicio.c socks.c +LDADD+= -ltls -lssl -lcrypto +DPADD+= ${LIBTLS} ${LIBSSL} ${LIBCRYPTO} + +.include <bsd.prog.mk> diff --git a/usr.bin/nc/atomicio.c b/usr.bin/nc/atomicio.c new file mode 100644 index 0000000..344ac63 --- /dev/null +++ b/usr.bin/nc/atomicio.c @@ -0,0 +1,67 @@ +/* $OpenBSD: atomicio.c,v 1.11 2012/12/04 02:24:47 deraadt Exp $ */ +/* + * Copyright (c) 2006 Damien Miller. All rights reserved. + * Copyright (c) 2005 Anil Madhavapeddy. All rights reserved. + * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> +#include <poll.h> +#include <unistd.h> + +#include "atomicio.h" + +/* + * ensure all of data on socket comes through. f==read || f==vwrite + */ +size_t +atomicio(ssize_t (*f) (int, void *, size_t), int fd, void *_s, size_t n) +{ + char *s = _s; + size_t pos = 0; + ssize_t res; + struct pollfd pfd; + + pfd.fd = fd; + pfd.events = f == read ? POLLIN : POLLOUT; + while (n > pos) { + res = (f) (fd, s + pos, n - pos); + switch (res) { + case -1: + if (errno == EINTR) + continue; + if ((errno == EAGAIN) || (errno == ENOBUFS)) { + (void)poll(&pfd, 1, -1); + continue; + } + return 0; + case 0: + errno = EPIPE; + return pos; + default: + pos += (size_t)res; + } + } + return (pos); +} diff --git a/usr.bin/nc/atomicio.h b/usr.bin/nc/atomicio.h new file mode 100644 index 0000000..7bf5b25 --- /dev/null +++ b/usr.bin/nc/atomicio.h @@ -0,0 +1,39 @@ +/* $OpenBSD: atomicio.h,v 1.2 2007/09/07 14:50:44 tobias Exp $ */ + +/* + * Copyright (c) 2006 Damien Miller. All rights reserved. + * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ATOMICIO_H +#define _ATOMICIO_H + +/* + * Ensure all of data on socket comes through. f==read || f==vwrite + */ +size_t atomicio(ssize_t (*)(int, void *, size_t), int, void *, size_t); + +#define vwrite (ssize_t (*)(int, void *, size_t))write + +#endif /* _ATOMICIO_H */ diff --git a/usr.bin/nc/nc.1 b/usr.bin/nc/nc.1 new file mode 100644 index 0000000..fff5857 --- /dev/null +++ b/usr.bin/nc/nc.1 @@ -0,0 +1,585 @@ +.\" $OpenBSD: nc.1,v 1.95 2020/02/12 14:46:36 schwarze Exp $ +.\" +.\" Copyright (c) 1996 David Sacerdote +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. The name of the author may not be used to endorse or promote products +.\" derived from this software without specific prior written permission +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd $Mdocdate: February 12 2020 $ +.Dt NC 1 +.Os +.Sh NAME +.Nm nc +.Nd arbitrary TCP and UDP connections and listens +.Sh SYNOPSIS +.Nm nc +.Op Fl 46cDdFhklNnrStUuvz +.Op Fl C Ar certfile +.Op Fl e Ar name +.Op Fl H Ar hash +.Op Fl I Ar length +.Op Fl i Ar interval +.Op Fl K Ar keyfile +.Op Fl M Ar ttl +.Op Fl m Ar minttl +.Op Fl O Ar length +.Op Fl o Ar staplefile +.Op Fl P Ar proxy_username +.Op Fl p Ar source_port +.Op Fl R Ar CAfile +.Op Fl s Ar sourceaddr +.Op Fl T Ar keyword +.Op Fl V Ar rtable +.Op Fl W Ar recvlimit +.Op Fl w Ar timeout +.Op Fl X Ar proxy_protocol +.Op Fl x Ar proxy_address Ns Op : Ns Ar port +.Op Fl Z Ar peercertfile +.Op Ar destination +.Op Ar port +.Sh DESCRIPTION +The +.Nm +(or +.Nm netcat ) +utility is used for just about anything under the sun involving TCP, +UDP, or +.Ux Ns -domain +sockets. +It can open TCP connections, send UDP packets, listen on arbitrary +TCP and UDP ports, do port scanning, and deal with both IPv4 and +IPv6. +Unlike +.Xr telnet 1 , +.Nm +scripts nicely, and separates error messages onto standard error instead +of sending them to standard output, as +.Xr telnet 1 +does with some. +.Pp +Common uses include: +.Pp +.Bl -bullet -offset indent -compact +.It +simple TCP proxies +.It +shell-script based HTTP clients and servers +.It +network daemon testing +.It +a SOCKS or HTTP ProxyCommand for +.Xr ssh 1 +.It +and much, much more +.El +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl 4 +Use IPv4 addresses only. +.It Fl 6 +Use IPv6 addresses only. +.It Fl C Ar certfile +Load the public key part of the TLS peer certificate from +.Ar certfile , +in PEM format. +Requires +.Fl c . +.It Fl c +Use TLS to connect or listen. +Cannot be used together with any of the options +.Fl FuU . +.It Fl D +Enable debugging on the socket. +.It Fl d +Do not attempt to read from stdin. +.It Fl e Ar name +Only accept the TLS peer certificate if it contains the +.Ar name . +Requires +.Fl c . +If not specified, +.Ar destination +is used. +.It Fl F +Pass the first connected socket using +.Xr sendmsg 2 +to stdout and exit. +This is useful in conjunction with +.Fl X +to have +.Nm +perform connection setup with a proxy but then leave the rest of the +connection to another program (e.g.\& +.Xr ssh 1 +using the +.Xr ssh_config 5 +.Cm ProxyUseFdpass +option). +Cannot be used with +.Fl c +or +.Fl U . +.It Fl H Ar hash +Only accept the TLS peer certificate if its hash returned from +.Xr tls_peer_cert_hash 3 +matches +.Ar hash . +Requires +.Fl c +and cannot be used with +.Fl T Cm noverify . +.It Fl h +Print out the +.Nm +help text and exit. +.It Fl I Ar length +Specify the size of the TCP receive buffer. +.It Fl i Ar interval +Sleep for +.Ar interval +seconds between lines of text sent and received. +Also causes a delay time between connections to multiple ports. +.It Fl K Ar keyfile +Load the TLS private key from +.Ar keyfile , +in PEM format. +Requires +.Fl c . +.It Fl k +When a connection is completed, listen for another one. +Requires +.Fl l . +When used together with the +.Fl u +option, the server socket is not connected and it can receive UDP datagrams from +multiple hosts. +.It Fl l +Listen for an incoming connection rather than initiating a +connection to a remote host. +Cannot be used together with any of the options +.Fl psxz . +Additionally, any timeouts specified with the +.Fl w +option are ignored. +.It Fl M Ar ttl +Set the TTL / hop limit of outgoing packets. +.It Fl m Ar minttl +Ask the kernel to drop incoming packets whose TTL / hop limit is under +.Ar minttl . +.It Fl N +.Xr shutdown 2 +the network socket after EOF on the input. +Some servers require this to finish their work. +.It Fl n +Do not perform domain name resolution. +If a name cannot be resolved without DNS, an error will be reported. +.It Fl O Ar length +Specify the size of the TCP send buffer. +.It Fl o Ar staplefile +During the TLS handshake, load data to be stapled from +.Ar staplefile , +which is expected to contain an OCSP response from an OCSP server in +DER format. +Requires +.Fl c +and +.Fl C . +.It Fl P Ar proxy_username +Specifies a username to present to a proxy server that requires authentication. +If no username is specified then authentication will not be attempted. +Proxy authentication is only supported for HTTP CONNECT proxies at present. +.It Fl p Ar source_port +Specify the source port +.Nm +should use, subject to privilege restrictions and availability. +Cannot be used together with +.Fl l . +.It Fl R Ar CAfile +Load the root CA bundle for TLS certificate verification from +.Ar CAfile , +in PEM format, instead of +.Pa /etc/ssl/cert.pem . +Requires +.Fl c . +.It Fl r +Choose source and/or destination ports randomly +instead of sequentially within a range or in the order that the system +assigns them. +.It Fl S +Enable the RFC 2385 TCP MD5 signature option. +.It Fl s Ar sourceaddr +Set the source address to send packets from, +which is useful on machines with multiple interfaces. +For +.Ux Ns -domain +datagram sockets, specifies the local temporary socket file +to create and use so that datagrams can be received. +Cannot be used together with +.Fl l +or +.Fl x . +.It Fl T Ar keyword +Change the IPv4 TOS/IPv6 traffic class value or the TLS options. +.Pp +For TLS options, +.Ar keyword +may be one of: +.Cm noverify , +which disables certificate verification; +.Cm noname , +which disables certificate name checking; +.Cm clientcert , +which requires a client certificate on incoming connections; or +.Cm muststaple , +which requires the peer to provide a valid stapled OCSP response +with the handshake. +The following TLS options specify a value in the form of a +.Ar key Ns = Ns Ar value +pair: +.Cm ciphers , +which allows the supported TLS ciphers to be specified (see +.Xr tls_config_set_ciphers 3 +for further details); +.Cm protocols , +which allows the supported TLS protocols to be specified (see +.Xr tls_config_parse_protocols 3 +for further details). +Specifying TLS options requires +.Fl c . +.Pp +For the IPv4 TOS/IPv6 traffic class value, +.Ar keyword +may be one of +.Cm critical , +.Cm inetcontrol , +.Cm lowdelay , +.Cm netcontrol , +.Cm throughput , +.Cm reliability , +or one of the DiffServ Code Points: +.Cm ef , +.Cm af11 No ... Cm af43 , +.Cm cs0 No ... Cm cs7 ; +or a number in either hex or decimal. +.It Fl t +Send RFC 854 DON'T and WON'T responses to RFC 854 DO and WILL requests. +This makes it possible to use +.Nm +to script telnet sessions. +.It Fl U +Use +.Ux Ns -domain +sockets. +Cannot be used together with any of the options +.Fl cFx . +.It Fl u +Use UDP instead of TCP. +Cannot be used together with +.Fl c +or +.Fl x . +For +.Ux Ns -domain +sockets, use a datagram socket instead of a stream socket. +If a +.Ux Ns -domain +socket is used, a temporary receiving socket is created in +.Pa /tmp +unless the +.Fl s +flag is given. +.It Fl V Ar rtable +Set the routing table to be used. +.It Fl v +Produce more verbose output. +.It Fl W Ar recvlimit +Terminate after receiving +.Ar recvlimit +packets from the network. +.It Fl w Ar timeout +Connections which cannot be established or are idle timeout after +.Ar timeout +seconds. +The +.Fl w +flag has no effect on the +.Fl l +option, i.e.\& +.Nm +will listen forever for a connection, with or without the +.Fl w +flag. +The default is no timeout. +.It Fl X Ar proxy_protocol +Use +.Ar proxy_protocol +when talking to the proxy server. +Supported protocols are +.Cm 4 +(SOCKS v.4), +.Cm 5 +(SOCKS v.5) +and +.Cm connect +(HTTPS proxy). +If the protocol is not specified, SOCKS version 5 is used. +.It Fl x Ar proxy_address Ns Op : Ns Ar port +Connect to +.Ar destination +using a proxy at +.Ar proxy_address +and +.Ar port . +If +.Ar port +is not specified, the well-known port for the proxy protocol is used (1080 +for SOCKS, 3128 for HTTPS). +An IPv6 address can be specified unambiguously by enclosing +.Ar proxy_address +in square brackets. +A proxy cannot be used with any of the options +.Fl lsuU . +.It Fl Z Ar peercertfile +Save the peer certificates to +.Ar peercertfile , +in PEM format. +Requires +.Fl c . +.It Fl z +Only scan for listening daemons, without sending any data to them. +Cannot be used together with +.Fl l . +.El +.Pp +.Ar destination +can be a numerical IP address or a symbolic hostname +(unless the +.Fl n +option is given). +In general, a destination must be specified, +unless the +.Fl l +option is given +(in which case the local host is used). +For +.Ux Ns -domain +sockets, a destination is required and is the socket path to connect to +(or listen on if the +.Fl l +option is given). +.Pp +.Ar port +can be specified as a numeric port number or as a service name. +Port ranges may be specified as numeric port numbers of the form +.Ar nn Ns - Ns Ar mm . +In general, +a destination port must be specified, +unless the +.Fl U +option is given. +.Sh CLIENT/SERVER MODEL +It is quite simple to build a very basic client/server model using +.Nm . +On one console, start +.Nm +listening on a specific port for a connection. +For example: +.Pp +.Dl $ nc -l 1234 +.Pp +.Nm +is now listening on port 1234 for a connection. +On a second console +.Pq or a second machine , +connect to the machine and port being listened on: +.Pp +.Dl $ nc 127.0.0.1 1234 +.Pp +There should now be a connection between the ports. +Anything typed at the second console will be concatenated to the first, +and vice-versa. +After the connection has been set up, +.Nm +does not really care which side is being used as a +.Sq server +and which side is being used as a +.Sq client . +The connection may be terminated using an +.Dv EOF +.Pq Sq ^D . +.Sh DATA TRANSFER +The example in the previous section can be expanded to build a +basic data transfer model. +Any information input into one end of the connection will be output +to the other end, and input and output can be easily captured in order to +emulate file transfer. +.Pp +Start by using +.Nm +to listen on a specific port, with output captured into a file: +.Pp +.Dl $ nc -l 1234 \*(Gt filename.out +.Pp +Using a second machine, connect to the listening +.Nm +process, feeding it the file which is to be transferred: +.Pp +.Dl $ nc -N host.example.com 1234 \*(Lt filename.in +.Pp +After the file has been transferred, the connection will close automatically. +.Sh TALKING TO SERVERS +It is sometimes useful to talk to servers +.Dq by hand +rather than through a user interface. +It can aid in troubleshooting, +when it might be necessary to verify what data a server is sending +in response to commands issued by the client. +For example, to retrieve the home page of a web site: +.Bd -literal -offset indent +$ printf "GET / HTTP/1.0\er\en\er\en" | nc host.example.com 80 +.Ed +.Pp +Note that this also displays the headers sent by the web server. +They can be filtered, using a tool such as +.Xr sed 1 , +if necessary. +.Pp +More complicated examples can be built up when the user knows the format +of requests required by the server. +As another example, an email may be submitted to an SMTP server using: +.Bd -literal -offset indent +$ nc localhost 25 \*(Lt\*(Lt EOF +HELO host.example.com +MAIL FROM:\*(Ltuser@host.example.com\*(Gt +RCPT TO:\*(Ltuser2@host.example.com\*(Gt +DATA +Body of email. +\&. +QUIT +EOF +.Ed +.Sh PORT SCANNING +It may be useful to know which ports are open and running services on +a target machine. +The +.Fl z +flag can be used to tell +.Nm +to report open ports, +rather than initiate a connection. +For example: +.Bd -literal -offset indent +$ nc -z host.example.com 20-30 +Connection to host.example.com 22 port [tcp/ssh] succeeded! +Connection to host.example.com 25 port [tcp/smtp] succeeded! +.Ed +.Pp +The port range was specified to limit the search to ports 20 \- 30. +.Pp +Alternatively, it might be useful to know which server software +is running, and which versions. +This information is often contained within the greeting banners. +In order to retrieve these, it is necessary to first make a connection, +and then break the connection when the banner has been retrieved. +This can be accomplished by specifying a small timeout with the +.Fl w +flag, or perhaps by issuing a +.Qq Dv QUIT +command to the server: +.Bd -literal -offset indent +$ echo "QUIT" | nc host.example.com 20-30 +SSH-1.99-OpenSSH_3.6.1p2 +Protocol mismatch. +220 host.example.com IMS SMTP Receiver Version 0.84 Ready +.Ed +.Sh EXAMPLES +Open a TCP connection to port 42 of host.example.com, using port 31337 as +the source port, with a timeout of 5 seconds: +.Pp +.Dl $ nc -p 31337 -w 5 host.example.com 42 +.Pp +Open a TCP connection to port 443 of www.example.com, and negotiate TLS with +any supported TLS protocol version and "compat" ciphers: +.Pp +.Dl $ nc -cv -T protocols=all -T ciphers=compat www.example.com 443 +.Pp +Open a TCP connection to port 443 of www.google.ca, and negotiate TLS. +Check for a different name in the certificate for validation: +.Pp +.Dl $ nc -cv -e adsf.au.doubleclick.net www.google.ca 443 +.Pp +Open a UDP connection to port 53 of host.example.com: +.Pp +.Dl $ nc -u host.example.com 53 +.Pp +Open a TCP connection to port 42 of host.example.com using 10.1.2.3 as the +IP for the local end of the connection: +.Pp +.Dl $ nc -s 10.1.2.3 host.example.com 42 +.Pp +Create and listen on a +.Ux Ns -domain +stream socket: +.Pp +.Dl $ nc -lU /var/tmp/dsocket +.Pp +Connect to port 42 of host.example.com via an HTTP proxy at 10.2.3.4, +port 8080. +This example could also be used by +.Xr ssh 1 ; +see the +.Cm ProxyCommand +directive in +.Xr ssh_config 5 +for more information. +.Pp +.Dl $ nc -x10.2.3.4:8080 -Xconnect host.example.com 42 +.Pp +The same example again, this time enabling proxy authentication with username +.Dq ruser +if the proxy requires it: +.Pp +.Dl $ nc -x10.2.3.4:8080 -Xconnect -Pruser host.example.com 42 +.Sh SEE ALSO +.Xr cat 1 , +.Xr ssh 1 +.Sh AUTHORS +Original implementation by +.An *Hobbit* Aq Mt hobbit@avian.org . +.br +Rewritten with IPv6 support by +.An Eric Jackson Aq Mt ericj@monkey.org . +.Sh CAVEATS +UDP port scans using the +.Fl uz +combination of flags will always report success irrespective of +the target machine's state. +However, +in conjunction with a traffic sniffer either on the target machine +or an intermediary device, +the +.Fl uz +combination could be useful for communications diagnostics. +Note that the amount of UDP traffic generated may be limited either +due to hardware resources and/or configuration settings. diff --git a/usr.bin/nc/netcat.c b/usr.bin/nc/netcat.c new file mode 100644 index 0000000..f052766 --- /dev/null +++ b/usr.bin/nc/netcat.c @@ -0,0 +1,1899 @@ +/* $OpenBSD: netcat.c,v 1.217 2020/02/12 14:46:36 schwarze Exp $ */ +/* + * Copyright (c) 2001 Eric Jackson <ericj@monkey.org> + * Copyright (c) 2015 Bob Beck. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Re-written nc(1) for OpenBSD. Original implementation by + * *Hobbit* <hobbit@avian.org>. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/uio.h> +#include <sys/un.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <netinet/ip.h> +#include <arpa/telnet.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <netdb.h> +#include <poll.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <tls.h> +#include <unistd.h> + +#include "atomicio.h" + +#define PORT_MAX 65535 +#define UNIX_DG_TMP_SOCKET_SIZE 19 + +#define POLL_STDIN 0 +#define POLL_NETOUT 1 +#define POLL_NETIN 2 +#define POLL_STDOUT 3 +#define BUFSIZE 16384 + +#define TLS_NOVERIFY (1 << 1) +#define TLS_NONAME (1 << 2) +#define TLS_CCERT (1 << 3) +#define TLS_MUSTSTAPLE (1 << 4) + +/* Command Line Options */ +int dflag; /* detached, no stdin */ +int Fflag; /* fdpass sock to stdout */ +unsigned int iflag; /* Interval Flag */ +int kflag; /* More than one connect */ +int lflag; /* Bind to local port */ +int Nflag; /* shutdown() network socket */ +int nflag; /* Don't do name look up */ +char *Pflag; /* Proxy username */ +char *pflag; /* Localport flag */ +int rflag; /* Random ports flag */ +char *sflag; /* Source Address */ +int tflag; /* Telnet Emulation */ +int uflag; /* UDP - Default to TCP */ +int vflag; /* Verbosity */ +int xflag; /* Socks proxy */ +int zflag; /* Port Scan Flag */ +int Dflag; /* sodebug */ +int Iflag; /* TCP receive buffer size */ +int Oflag; /* TCP send buffer size */ +#ifdef TCP_MD5SIG +int Sflag; /* TCP MD5 signature option */ +#endif +int Tflag = -1; /* IP Type of Service */ +#ifdef SO_RTABLE +int rtableid = -1; +#endif + +int usetls; /* use TLS */ +const char *Cflag; /* Public cert file */ +const char *Kflag; /* Private key file */ +const char *oflag; /* OCSP stapling file */ +const char *Rflag; /* Root CA file */ +int tls_cachanged; /* Using non-default CA file */ +int TLSopt; /* TLS options */ +char *tls_expectname; /* required name in peer cert */ +char *tls_expecthash; /* required hash of peer cert */ +char *tls_ciphers; /* TLS ciphers */ +char *tls_protocols; /* TLS protocols */ +FILE *Zflag; /* file to save peer cert */ + +int recvcount, recvlimit; +int timeout = -1; +int family = AF_UNSPEC; +char *portlist[PORT_MAX+1]; +char *unix_dg_tmp_socket; +int ttl = -1; +int minttl = -1; + +void atelnet(int, unsigned char *, unsigned int); +int strtoport(char *portstr, int udp); +void build_ports(char *); +void help(void) __attribute__((noreturn)); +int local_listen(const char *, const char *, struct addrinfo); +void readwrite(int, struct tls *); +void fdpass(int nfd) __attribute__((noreturn)); +int remote_connect(const char *, const char *, struct addrinfo, char *); +int timeout_tls(int, struct tls *, int (*)(struct tls *)); +int timeout_connect(int, const struct sockaddr *, socklen_t); +int socks_connect(const char *, const char *, struct addrinfo, + const char *, const char *, struct addrinfo, int, const char *); +int udptest(int); +int unix_bind(char *, int); +int unix_connect(char *); +int unix_listen(char *); +void set_common_sockopts(int, int); +int process_tos_opt(char *, int *); +int process_tls_opt(char *, int *); +void save_peer_cert(struct tls *_tls_ctx, FILE *_fp); +void report_sock(const char *, const struct sockaddr *, socklen_t, char *); +void report_tls(struct tls *tls_ctx, char * host); +void usage(int); +ssize_t drainbuf(int, unsigned char *, size_t *, struct tls *); +ssize_t fillbuf(int, unsigned char *, size_t *, struct tls *); +void tls_setup_client(struct tls *, int, char *); +struct tls *tls_setup_server(struct tls *, int, char *); + +int +main(int argc, char *argv[]) +{ + int ch, s = -1, ret, socksv; + char *host, *uport; + char ipaddr[NI_MAXHOST]; + struct addrinfo hints; + struct servent *sv; + socklen_t len; + struct sockaddr_storage cliaddr; + char *proxy = NULL, *proxyport = NULL; + const char *errstr; + struct addrinfo proxyhints; + char unix_dg_tmp_socket_buf[UNIX_DG_TMP_SOCKET_SIZE]; + struct tls_config *tls_cfg = NULL; + struct tls *tls_ctx = NULL; + uint32_t protocols; + + ret = 1; + socksv = 5; + host = NULL; + uport = NULL; + sv = NULL; + Rflag = tls_default_ca_cert_file(); + + signal(SIGPIPE, SIG_IGN); + + while ((ch = getopt(argc, argv, + "46C:cDde:FH:hI:i:K:klM:m:NnO:o:P:p:R:rSs:T:tUuV:vW:w:X:x:Z:z")) + != -1) { + switch (ch) { + case '4': + family = AF_INET; + break; + case '6': + family = AF_INET6; + break; + case 'U': + family = AF_UNIX; + break; + case 'X': + if (strcasecmp(optarg, "connect") == 0) + socksv = -1; /* HTTP proxy CONNECT */ + else if (strcmp(optarg, "4") == 0) + socksv = 4; /* SOCKS v.4 */ + else if (strcmp(optarg, "5") == 0) + socksv = 5; /* SOCKS v.5 */ + else + errx(1, "unsupported proxy protocol"); + break; + case 'C': + Cflag = optarg; + break; + case 'c': + usetls = 1; + break; + case 'd': + dflag = 1; + break; + case 'e': + tls_expectname = optarg; + break; + case 'F': + Fflag = 1; + break; + case 'H': + tls_expecthash = optarg; + break; + case 'h': + help(); + break; + case 'i': + iflag = strtonum(optarg, 0, UINT_MAX, &errstr); + if (errstr) + errx(1, "interval %s: %s", errstr, optarg); + break; + case 'K': + Kflag = optarg; + break; + case 'k': + kflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'M': + ttl = strtonum(optarg, 0, 255, &errstr); + if (errstr) + errx(1, "ttl is %s", errstr); + break; + case 'm': + minttl = strtonum(optarg, 0, 255, &errstr); + if (errstr) + errx(1, "minttl is %s", errstr); + break; + case 'N': + Nflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'P': + Pflag = optarg; + break; + case 'p': + pflag = optarg; + break; + case 'R': + tls_cachanged = 1; + Rflag = optarg; + break; + case 'r': + rflag = 1; + break; + case 's': + sflag = optarg; + break; + case 't': + tflag = 1; + break; + case 'u': + uflag = 1; + break; +#ifdef SO_RTABLE + case 'V': + rtableid = (int)strtonum(optarg, 0, + RT_TABLEID_MAX, &errstr); + if (errstr) + errx(1, "rtable %s: %s", errstr, optarg); + break; +#endif + case 'v': + vflag = 1; + break; + case 'W': + recvlimit = strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr) + errx(1, "receive limit %s: %s", errstr, optarg); + break; + case 'w': + timeout = strtonum(optarg, 0, INT_MAX / 1000, &errstr); + if (errstr) + errx(1, "timeout %s: %s", errstr, optarg); + timeout *= 1000; + break; + case 'x': + xflag = 1; + if ((proxy = strdup(optarg)) == NULL) + err(1, NULL); + break; + case 'Z': + if (strcmp(optarg, "-") == 0) + Zflag = stderr; + else if ((Zflag = fopen(optarg, "w")) == NULL) + err(1, "can't open %s", optarg); + break; + case 'z': + zflag = 1; + break; + case 'D': + Dflag = 1; + break; + case 'I': + Iflag = strtonum(optarg, 1, 65536 << 14, &errstr); + if (errstr != NULL) + errx(1, "TCP receive window %s: %s", + errstr, optarg); + break; + case 'O': + Oflag = strtonum(optarg, 1, 65536 << 14, &errstr); + if (errstr != NULL) + errx(1, "TCP send window %s: %s", + errstr, optarg); + break; + case 'o': + oflag = optarg; + break; +#ifdef TCP_MD5SIG + case 'S': + Sflag = 1; + break; +#endif + case 'T': + errstr = NULL; + errno = 0; + if (process_tls_opt(optarg, &TLSopt)) + break; + if (process_tos_opt(optarg, &Tflag)) + break; + if (strlen(optarg) > 1 && optarg[0] == '0' && + optarg[1] == 'x') + Tflag = (int)strtol(optarg, NULL, 16); + else + Tflag = (int)strtonum(optarg, 0, 255, + &errstr); + if (Tflag < 0 || Tflag > 255 || errstr || errno) + errx(1, "illegal tos/tls value %s", optarg); + break; + default: + usage(1); + } + } + argc -= optind; + argv += optind; + +#ifdef SO_RTABLE + if (rtableid >= 0) + if (setrtable(rtableid) == -1) + err(1, "setrtable"); +#endif + + /* Cruft to make sure options are clean, and used properly. */ + if (argc == 1 && family == AF_UNIX) { + host = argv[0]; + } else if (argc == 1 && lflag) { + uport = argv[0]; + } else if (argc == 2) { + host = argv[0]; + uport = argv[1]; + } else + usage(1); + + if (usetls) { + if (Cflag && unveil(Cflag, "r") == -1) + err(1, "unveil"); + if (unveil(Rflag, "r") == -1) + err(1, "unveil"); + if (Kflag && unveil(Kflag, "r") == -1) + err(1, "unveil"); + if (oflag && unveil(oflag, "r") == -1) + err(1, "unveil"); + } else if (family == AF_UNIX && uflag && lflag && !kflag) { + /* + * After recvfrom(2) from client, the server connects + * to the client socket. As the client path is determined + * during runtime, we cannot unveil(2). + */ + } else { + if (family == AF_UNIX) { + if (unveil(host, "rwc") == -1) + err(1, "unveil"); + if (uflag && !kflag) { + if (sflag) { + if (unveil(sflag, "rwc") == -1) + err(1, "unveil"); + } else { + if (unveil("/tmp", "rwc") == -1) + err(1, "unveil"); + } + } + } else { + /* no filesystem visibility */ + if (unveil("/", "") == -1) + err(1, "unveil"); + } + } + + if (family == AF_UNIX) { + if (pledge("stdio rpath wpath cpath tmppath unix", NULL) == -1) + err(1, "pledge"); + } else if (Fflag && Pflag) { + if (pledge("stdio inet dns sendfd tty", NULL) == -1) + err(1, "pledge"); + } else if (Fflag) { + if (pledge("stdio inet dns sendfd", NULL) == -1) + err(1, "pledge"); + } else if (Pflag && usetls) { + if (pledge("stdio rpath inet dns tty", NULL) == -1) + err(1, "pledge"); + } else if (Pflag) { + if (pledge("stdio inet dns tty", NULL) == -1) + err(1, "pledge"); + } else if (usetls) { + if (pledge("stdio rpath inet dns", NULL) == -1) + err(1, "pledge"); + } else if (pledge("stdio inet dns", NULL) == -1) + err(1, "pledge"); + + if (lflag && sflag) + errx(1, "cannot use -s and -l"); + if (lflag && pflag) + errx(1, "cannot use -p and -l"); + if (lflag && zflag) + errx(1, "cannot use -z and -l"); + if (!lflag && kflag) + errx(1, "must use -l with -k"); + if (uflag && usetls) + errx(1, "cannot use -c and -u"); + if ((family == AF_UNIX) && usetls) + errx(1, "cannot use -c and -U"); + if ((family == AF_UNIX) && Fflag) + errx(1, "cannot use -F and -U"); + if (Fflag && usetls) + errx(1, "cannot use -c and -F"); + if (TLSopt && !usetls) + errx(1, "you must specify -c to use TLS options"); + if (Cflag && !usetls) + errx(1, "you must specify -c to use -C"); + if (Kflag && !usetls) + errx(1, "you must specify -c to use -K"); + if (Zflag && !usetls) + errx(1, "you must specify -c to use -Z"); + if (oflag && !Cflag) + errx(1, "you must specify -C to use -o"); + if (tls_cachanged && !usetls) + errx(1, "you must specify -c to use -R"); + if (tls_expecthash && !usetls) + errx(1, "you must specify -c to use -H"); + if (tls_expectname && !usetls) + errx(1, "you must specify -c to use -e"); + + /* Get name of temporary socket for unix datagram client */ + if ((family == AF_UNIX) && uflag && !lflag) { + if (sflag) { + unix_dg_tmp_socket = sflag; + } else { + strlcpy(unix_dg_tmp_socket_buf, "/tmp/nc.XXXXXXXXXX", + UNIX_DG_TMP_SOCKET_SIZE); + if (mktemp(unix_dg_tmp_socket_buf) == NULL) + err(1, "mktemp"); + unix_dg_tmp_socket = unix_dg_tmp_socket_buf; + } + } + + /* Initialize addrinfo structure. */ + if (family != AF_UNIX) { + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = family; + hints.ai_socktype = uflag ? SOCK_DGRAM : SOCK_STREAM; + hints.ai_protocol = uflag ? IPPROTO_UDP : IPPROTO_TCP; + if (nflag) + hints.ai_flags |= AI_NUMERICHOST; + } + + if (xflag) { + if (uflag) + errx(1, "no proxy support for UDP mode"); + + if (lflag) + errx(1, "no proxy support for listen"); + + if (family == AF_UNIX) + errx(1, "no proxy support for unix sockets"); + + if (sflag) + errx(1, "no proxy support for local source address"); + + if (*proxy == '[') { + ++proxy; + proxyport = strchr(proxy, ']'); + if (proxyport == NULL) + errx(1, "missing closing bracket in proxy"); + *proxyport++ = '\0'; + if (*proxyport == '\0') + /* Use default proxy port. */ + proxyport = NULL; + else { + if (*proxyport == ':') + ++proxyport; + else + errx(1, "garbage proxy port delimiter"); + } + } else { + proxyport = strrchr(proxy, ':'); + if (proxyport != NULL) + *proxyport++ = '\0'; + } + + memset(&proxyhints, 0, sizeof(struct addrinfo)); + proxyhints.ai_family = family; + proxyhints.ai_socktype = SOCK_STREAM; + proxyhints.ai_protocol = IPPROTO_TCP; + if (nflag) + proxyhints.ai_flags |= AI_NUMERICHOST; + } + + if (usetls) { + if ((tls_cfg = tls_config_new()) == NULL) + errx(1, "unable to allocate TLS config"); + if (Rflag && tls_config_set_ca_file(tls_cfg, Rflag) == -1) + errx(1, "%s", tls_config_error(tls_cfg)); + if (Cflag && tls_config_set_cert_file(tls_cfg, Cflag) == -1) + errx(1, "%s", tls_config_error(tls_cfg)); + if (Kflag && tls_config_set_key_file(tls_cfg, Kflag) == -1) + errx(1, "%s", tls_config_error(tls_cfg)); + if (oflag && tls_config_set_ocsp_staple_file(tls_cfg, oflag) == -1) + errx(1, "%s", tls_config_error(tls_cfg)); + if (tls_config_parse_protocols(&protocols, tls_protocols) == -1) + errx(1, "invalid TLS protocols `%s'", tls_protocols); + if (tls_config_set_protocols(tls_cfg, protocols) == -1) + errx(1, "%s", tls_config_error(tls_cfg)); + if (tls_config_set_ciphers(tls_cfg, tls_ciphers) == -1) + errx(1, "%s", tls_config_error(tls_cfg)); + if (!lflag && (TLSopt & TLS_CCERT)) + errx(1, "clientcert is only valid with -l"); + if (TLSopt & TLS_NONAME) + tls_config_insecure_noverifyname(tls_cfg); + if (TLSopt & TLS_NOVERIFY) { + if (tls_expecthash != NULL) + errx(1, "-H and -T noverify may not be used " + "together"); + tls_config_insecure_noverifycert(tls_cfg); + } + if (TLSopt & TLS_MUSTSTAPLE) + tls_config_ocsp_require_stapling(tls_cfg); + + if (Pflag) { + if (pledge("stdio inet dns tty", NULL) == -1) + err(1, "pledge"); + } else if (pledge("stdio inet dns", NULL) == -1) + err(1, "pledge"); + } + if (lflag) { + ret = 0; + + if (family == AF_UNIX) { + if (uflag) + s = unix_bind(host, 0); + else + s = unix_listen(host); + } + + if (usetls) { + tls_config_verify_client_optional(tls_cfg); + if ((tls_ctx = tls_server()) == NULL) + errx(1, "tls server creation failed"); + if (tls_configure(tls_ctx, tls_cfg) == -1) + errx(1, "tls configuration failed (%s)", + tls_error(tls_ctx)); + } + /* Allow only one connection at a time, but stay alive. */ + for (;;) { + if (family != AF_UNIX) { + if (s != -1) + close(s); + s = local_listen(host, uport, hints); + } + if (s == -1) + err(1, NULL); + if (uflag && kflag) { + if (family == AF_UNIX) { + if (pledge("stdio unix", NULL) == -1) + err(1, "pledge"); + } + /* + * For UDP and -k, don't connect the socket, + * let it receive datagrams from multiple + * socket pairs. + */ + readwrite(s, NULL); + } else if (uflag && !kflag) { + /* + * For UDP and not -k, we will use recvfrom() + * initially to wait for a caller, then use + * the regular functions to talk to the caller. + */ + int rv; + char buf[2048]; + struct sockaddr_storage z; + + len = sizeof(z); + rv = recvfrom(s, buf, sizeof(buf), MSG_PEEK, + (struct sockaddr *)&z, &len); + if (rv == -1) + err(1, "recvfrom"); + + rv = connect(s, (struct sockaddr *)&z, len); + if (rv == -1) + err(1, "connect"); + + if (family == AF_UNIX) { + if (pledge("stdio unix", NULL) == -1) + err(1, "pledge"); + } + if (vflag) + report_sock("Connection received", + (struct sockaddr *)&z, len, + family == AF_UNIX ? host : NULL); + + readwrite(s, NULL); + } else { + struct tls *tls_cctx = NULL; + int connfd; + + len = sizeof(cliaddr); + connfd = accept4(s, (struct sockaddr *)&cliaddr, + &len, SOCK_NONBLOCK); + if (connfd == -1) { + /* For now, all errnos are fatal */ + err(1, "accept"); + } + if (vflag) + report_sock("Connection received", + (struct sockaddr *)&cliaddr, len, + family == AF_UNIX ? host : NULL); + if ((usetls) && + (tls_cctx = tls_setup_server(tls_ctx, connfd, host))) + readwrite(connfd, tls_cctx); + if (!usetls) + readwrite(connfd, NULL); + if (tls_cctx) + timeout_tls(s, tls_cctx, tls_close); + close(connfd); + tls_free(tls_cctx); + } + if (family == AF_UNIX && uflag) { + if (connect(s, NULL, 0) == -1) + err(1, "connect"); + } + + if (!kflag) + break; + } + } else if (family == AF_UNIX) { + ret = 0; + + if ((s = unix_connect(host)) > 0) { + if (!zflag) + readwrite(s, NULL); + close(s); + } else { + warn("%s", host); + ret = 1; + } + + if (uflag) + unlink(unix_dg_tmp_socket); + return ret; + + } else { + int i = 0; + + /* Construct the portlist[] array. */ + build_ports(uport); + + /* Cycle through portlist, connecting to each port. */ + for (s = -1, i = 0; portlist[i] != NULL; i++) { + if (s != -1) + close(s); + tls_free(tls_ctx); + tls_ctx = NULL; + + if (usetls) { + if ((tls_ctx = tls_client()) == NULL) + errx(1, "tls client creation failed"); + if (tls_configure(tls_ctx, tls_cfg) == -1) + errx(1, "tls configuration failed (%s)", + tls_error(tls_ctx)); + } + if (xflag) + s = socks_connect(host, portlist[i], hints, + proxy, proxyport, proxyhints, socksv, + Pflag); + else + s = remote_connect(host, portlist[i], hints, + ipaddr); + + if (s == -1) + continue; + + ret = 0; + if (vflag || zflag) { + /* For UDP, make sure we are connected. */ + if (uflag) { + if (udptest(s) == -1) { + ret = 1; + continue; + } + } + + /* Don't look up port if -n. */ + if (nflag) + sv = NULL; + else { + sv = getservbyport( + ntohs(atoi(portlist[i])), + uflag ? "udp" : "tcp"); + } + + fprintf(stderr, "Connection to %s", host); + + /* + * if we aren't connecting thru a proxy and + * there is something to report, print IP + */ + if (!nflag && !xflag + && (strcmp(host, ipaddr) != 0)) + fprintf(stderr, " (%s)", ipaddr); + + fprintf(stderr, " %s port [%s/%s] succeeded!\n", + portlist[i], uflag ? "udp" : "tcp", + sv ? sv->s_name : "*"); + } + if (Fflag) + fdpass(s); + else { + if (usetls) + tls_setup_client(tls_ctx, s, host); + if (!zflag) + readwrite(s, tls_ctx); + if (tls_ctx) + timeout_tls(s, tls_ctx, tls_close); + } + } + } + + if (s != -1) + close(s); + tls_free(tls_ctx); + tls_config_free(tls_cfg); + + return ret; +} + +/* + * unix_bind() + * Returns a unix socket bound to the given path + */ +int +unix_bind(char *path, int flags) +{ + struct sockaddr_un s_un; + int s, save_errno; + + /* Create unix domain socket. */ + if ((s = socket(AF_UNIX, flags | (uflag ? SOCK_DGRAM : SOCK_STREAM), + 0)) == -1) + return -1; + + memset(&s_un, 0, sizeof(struct sockaddr_un)); + s_un.sun_family = AF_UNIX; + + if (strlcpy(s_un.sun_path, path, sizeof(s_un.sun_path)) >= + sizeof(s_un.sun_path)) { + close(s); + errno = ENAMETOOLONG; + return -1; + } + + if (bind(s, (struct sockaddr *)&s_un, sizeof(s_un)) == -1) { + save_errno = errno; + close(s); + errno = save_errno; + return -1; + } + if (vflag) + report_sock("Bound", NULL, 0, path); + + return s; +} + +int +timeout_tls(int s, struct tls *tls_ctx, int (*func)(struct tls *)) +{ + struct pollfd pfd; + int ret; + + while ((ret = (*func)(tls_ctx)) != 0) { + if (ret == TLS_WANT_POLLIN) + pfd.events = POLLIN; + else if (ret == TLS_WANT_POLLOUT) + pfd.events = POLLOUT; + else + break; + pfd.fd = s; + if ((ret = poll(&pfd, 1, timeout)) == 1) + continue; + else if (ret == 0) { + errno = ETIMEDOUT; + ret = -1; + break; + } else + err(1, "poll failed"); + } + + return ret; +} + +void +tls_setup_client(struct tls *tls_ctx, int s, char *host) +{ + const char *errstr; + + if (tls_connect_socket(tls_ctx, s, + tls_expectname ? tls_expectname : host) == -1) { + errx(1, "tls connection failed (%s)", + tls_error(tls_ctx)); + } + if (timeout_tls(s, tls_ctx, tls_handshake) == -1) { + if ((errstr = tls_error(tls_ctx)) == NULL) + errstr = strerror(errno); + errx(1, "tls handshake failed (%s)", errstr); + } + if (vflag) + report_tls(tls_ctx, host); + if (tls_expecthash && (tls_peer_cert_hash(tls_ctx) == NULL || + strcmp(tls_expecthash, tls_peer_cert_hash(tls_ctx)) != 0)) + errx(1, "peer certificate is not %s", tls_expecthash); + if (Zflag) { + save_peer_cert(tls_ctx, Zflag); + if (Zflag != stderr && (fclose(Zflag) != 0)) + err(1, "fclose failed saving peer cert"); + } +} + +struct tls * +tls_setup_server(struct tls *tls_ctx, int connfd, char *host) +{ + struct tls *tls_cctx; + const char *errstr; + + if (tls_accept_socket(tls_ctx, &tls_cctx, connfd) == -1) { + warnx("tls accept failed (%s)", tls_error(tls_ctx)); + } else if (timeout_tls(connfd, tls_cctx, tls_handshake) == -1) { + if ((errstr = tls_error(tls_cctx)) == NULL) + errstr = strerror(errno); + warnx("tls handshake failed (%s)", errstr); + } else { + int gotcert = tls_peer_cert_provided(tls_cctx); + + if (vflag && gotcert) + report_tls(tls_cctx, host); + if ((TLSopt & TLS_CCERT) && !gotcert) + warnx("No client certificate provided"); + else if (gotcert && tls_expecthash && + (tls_peer_cert_hash(tls_cctx) == NULL || + strcmp(tls_expecthash, tls_peer_cert_hash(tls_cctx)) != 0)) + warnx("peer certificate is not %s", tls_expecthash); + else if (gotcert && tls_expectname && + (!tls_peer_cert_contains_name(tls_cctx, tls_expectname))) + warnx("name (%s) not found in client cert", + tls_expectname); + else { + return tls_cctx; + } + } + return NULL; +} + +/* + * unix_connect() + * Returns a socket connected to a local unix socket. Returns -1 on failure. + */ +int +unix_connect(char *path) +{ + struct sockaddr_un s_un; + int s, save_errno; + + if (uflag) { + if ((s = unix_bind(unix_dg_tmp_socket, SOCK_CLOEXEC)) == -1) + return -1; + } else { + if ((s = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)) == -1) + return -1; + } + + memset(&s_un, 0, sizeof(struct sockaddr_un)); + s_un.sun_family = AF_UNIX; + + if (strlcpy(s_un.sun_path, path, sizeof(s_un.sun_path)) >= + sizeof(s_un.sun_path)) { + close(s); + errno = ENAMETOOLONG; + return -1; + } + if (connect(s, (struct sockaddr *)&s_un, sizeof(s_un)) == -1) { + save_errno = errno; + close(s); + errno = save_errno; + return -1; + } + return s; + +} + +/* + * unix_listen() + * Create a unix domain socket, and listen on it. + */ +int +unix_listen(char *path) +{ + int s; + + if ((s = unix_bind(path, 0)) == -1) + return -1; + if (listen(s, 5) == -1) { + close(s); + return -1; + } + if (vflag) + report_sock("Listening", NULL, 0, path); + + return s; +} + +/* + * remote_connect() + * Returns a socket connected to a remote host. Properly binds to a local + * port or source address if needed. Returns -1 on failure. + */ +int +remote_connect(const char *host, const char *port, struct addrinfo hints, + char *ipaddr) +{ + struct addrinfo *res, *res0; + int s = -1, error, herr, save_errno; +#ifdef SO_BINDANY + int on = 1; +#endif + + if ((error = getaddrinfo(host, port, &hints, &res0))) + errx(1, "getaddrinfo for host \"%s\" port %s: %s", host, + port, gai_strerror(error)); + + for (res = res0; res; res = res->ai_next) { + if ((s = socket(res->ai_family, res->ai_socktype | + SOCK_NONBLOCK, res->ai_protocol)) == -1) + continue; + + /* Bind to a local port or source address if specified. */ + if (sflag || pflag) { + struct addrinfo ahints, *ares; + +#ifdef SO_BINDANY + /* try SO_BINDANY, but don't insist */ + setsockopt(s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on)); +#endif + memset(&ahints, 0, sizeof(struct addrinfo)); + ahints.ai_family = res->ai_family; + ahints.ai_socktype = uflag ? SOCK_DGRAM : SOCK_STREAM; + ahints.ai_protocol = uflag ? IPPROTO_UDP : IPPROTO_TCP; + ahints.ai_flags = AI_PASSIVE; + if ((error = getaddrinfo(sflag, pflag, &ahints, &ares))) + errx(1, "getaddrinfo: %s", gai_strerror(error)); + + if (bind(s, (struct sockaddr *)ares->ai_addr, + ares->ai_addrlen) == -1) + err(1, "bind failed"); + freeaddrinfo(ares); + } + + set_common_sockopts(s, res->ai_family); + + if (ipaddr != NULL) { + herr = getnameinfo(res->ai_addr, res->ai_addrlen, + ipaddr, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + switch (herr) { + case 0: + break; + case EAI_SYSTEM: + err(1, "getnameinfo"); + default: + errx(1, "getnameinfo: %s", gai_strerror(herr)); + } + } + + if (timeout_connect(s, res->ai_addr, res->ai_addrlen) == 0) + break; + + if (vflag) { + /* only print IP if there is something to report */ + if (nflag || ipaddr == NULL || + (strncmp(host, ipaddr, NI_MAXHOST) == 0)) + warn("connect to %s port %s (%s) failed", host, + port, uflag ? "udp" : "tcp"); + else + warn("connect to %s (%s) port %s (%s) failed", + host, ipaddr, port, uflag ? "udp" : "tcp"); + } + + save_errno = errno; + close(s); + errno = save_errno; + s = -1; + } + + freeaddrinfo(res0); + + return s; +} + +int +timeout_connect(int s, const struct sockaddr *name, socklen_t namelen) +{ + struct pollfd pfd; + socklen_t optlen; + int optval; + int ret; + + if ((ret = connect(s, name, namelen)) != 0 && errno == EINPROGRESS) { + pfd.fd = s; + pfd.events = POLLOUT; + if ((ret = poll(&pfd, 1, timeout)) == 1) { + optlen = sizeof(optval); + if ((ret = getsockopt(s, SOL_SOCKET, SO_ERROR, + &optval, &optlen)) == 0) { + errno = optval; + ret = optval == 0 ? 0 : -1; + } + } else if (ret == 0) { + errno = ETIMEDOUT; + ret = -1; + } else + err(1, "poll failed"); + } + + return ret; +} + +/* + * local_listen() + * Returns a socket listening on a local port, binds to specified source + * address. Returns -1 on failure. + */ +int +local_listen(const char *host, const char *port, struct addrinfo hints) +{ + struct addrinfo *res, *res0; + int s = -1, save_errno; + int error; +#ifdef SO_REUSEPORT + int ret, x = 1; +#endif + + /* Allow nodename to be null. */ + hints.ai_flags |= AI_PASSIVE; + + /* + * In the case of binding to a wildcard address + * default to binding to an ipv4 address. + */ + if (host == NULL && hints.ai_family == AF_UNSPEC) + hints.ai_family = AF_INET; + + if ((error = getaddrinfo(host, port, &hints, &res0))) + errx(1, "getaddrinfo: %s", gai_strerror(error)); + + for (res = res0; res; res = res->ai_next) { + if ((s = socket(res->ai_family, res->ai_socktype, + res->ai_protocol)) == -1) + continue; + +#ifdef SO_REUSEPORT + ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &x, sizeof(x)); + if (ret == -1) + err(1, NULL); +#endif + + set_common_sockopts(s, res->ai_family); + + if (bind(s, (struct sockaddr *)res->ai_addr, + res->ai_addrlen) == 0) + break; + + save_errno = errno; + close(s); + errno = save_errno; + s = -1; + } + + if (!uflag && s != -1) { + if (listen(s, 1) == -1) + err(1, "listen"); + } + if (vflag && s != -1) { + struct sockaddr_storage ss; + socklen_t len; + + len = sizeof(ss); + if (getsockname(s, (struct sockaddr *)&ss, &len) == -1) + err(1, "getsockname"); + report_sock(uflag ? "Bound" : "Listening", + (struct sockaddr *)&ss, len, NULL); + } + + freeaddrinfo(res0); + + return s; +} + +/* + * readwrite() + * Loop that polls on the network file descriptor and stdin. + */ +void +readwrite(int net_fd, struct tls *tls_ctx) +{ + struct pollfd pfd[4]; + int stdin_fd = STDIN_FILENO; + int stdout_fd = STDOUT_FILENO; + unsigned char netinbuf[BUFSIZE]; + size_t netinbufpos = 0; + unsigned char stdinbuf[BUFSIZE]; + size_t stdinbufpos = 0; + int n, num_fds; + ssize_t ret; + + /* don't read from stdin if requested */ + if (dflag) + stdin_fd = -1; + + /* stdin */ + pfd[POLL_STDIN].fd = stdin_fd; + pfd[POLL_STDIN].events = POLLIN; + + /* network out */ + pfd[POLL_NETOUT].fd = net_fd; + pfd[POLL_NETOUT].events = 0; + + /* network in */ + pfd[POLL_NETIN].fd = net_fd; + pfd[POLL_NETIN].events = POLLIN; + + /* stdout */ + pfd[POLL_STDOUT].fd = stdout_fd; + pfd[POLL_STDOUT].events = 0; + + while (1) { + /* both inputs are gone, buffers are empty, we are done */ + if (pfd[POLL_STDIN].fd == -1 && pfd[POLL_NETIN].fd == -1 && + stdinbufpos == 0 && netinbufpos == 0) + return; + /* both outputs are gone, we can't continue */ + if (pfd[POLL_NETOUT].fd == -1 && pfd[POLL_STDOUT].fd == -1) + return; + /* listen and net in gone, queues empty, done */ + if (lflag && pfd[POLL_NETIN].fd == -1 && + stdinbufpos == 0 && netinbufpos == 0) + return; + + /* help says -i is for "wait between lines sent". We read and + * write arbitrary amounts of data, and we don't want to start + * scanning for newlines, so this is as good as it gets */ + if (iflag) + sleep(iflag); + + /* poll */ + num_fds = poll(pfd, 4, timeout); + + /* treat poll errors */ + if (num_fds == -1) + err(1, "polling error"); + + /* timeout happened */ + if (num_fds == 0) + return; + + /* treat socket error conditions */ + for (n = 0; n < 4; n++) { + if (pfd[n].revents & (POLLERR|POLLNVAL)) { + pfd[n].fd = -1; + } + } + /* reading is possible after HUP */ + if (pfd[POLL_STDIN].events & POLLIN && + pfd[POLL_STDIN].revents & POLLHUP && + !(pfd[POLL_STDIN].revents & POLLIN)) + pfd[POLL_STDIN].fd = -1; + + if (pfd[POLL_NETIN].events & POLLIN && + pfd[POLL_NETIN].revents & POLLHUP && + !(pfd[POLL_NETIN].revents & POLLIN)) + pfd[POLL_NETIN].fd = -1; + + if (pfd[POLL_NETOUT].revents & POLLHUP) { + if (Nflag) + shutdown(pfd[POLL_NETOUT].fd, SHUT_WR); + pfd[POLL_NETOUT].fd = -1; + } + /* if HUP, stop watching stdout */ + if (pfd[POLL_STDOUT].revents & POLLHUP) + pfd[POLL_STDOUT].fd = -1; + /* if no net out, stop watching stdin */ + if (pfd[POLL_NETOUT].fd == -1) + pfd[POLL_STDIN].fd = -1; + /* if no stdout, stop watching net in */ + if (pfd[POLL_STDOUT].fd == -1) { + if (pfd[POLL_NETIN].fd != -1) + shutdown(pfd[POLL_NETIN].fd, SHUT_RD); + pfd[POLL_NETIN].fd = -1; + } + + /* try to read from stdin */ + if (pfd[POLL_STDIN].revents & POLLIN && stdinbufpos < BUFSIZE) { + ret = fillbuf(pfd[POLL_STDIN].fd, stdinbuf, + &stdinbufpos, NULL); + if (ret == TLS_WANT_POLLIN) + pfd[POLL_STDIN].events = POLLIN; + else if (ret == TLS_WANT_POLLOUT) + pfd[POLL_STDIN].events = POLLOUT; + else if (ret == 0 || ret == -1) + pfd[POLL_STDIN].fd = -1; + /* read something - poll net out */ + if (stdinbufpos > 0) + pfd[POLL_NETOUT].events = POLLOUT; + /* filled buffer - remove self from polling */ + if (stdinbufpos == BUFSIZE) + pfd[POLL_STDIN].events = 0; + } + /* try to write to network */ + if (pfd[POLL_NETOUT].revents & POLLOUT && stdinbufpos > 0) { + ret = drainbuf(pfd[POLL_NETOUT].fd, stdinbuf, + &stdinbufpos, tls_ctx); + if (ret == TLS_WANT_POLLIN) + pfd[POLL_NETOUT].events = POLLIN; + else if (ret == TLS_WANT_POLLOUT) + pfd[POLL_NETOUT].events = POLLOUT; + else if (ret == -1) + pfd[POLL_NETOUT].fd = -1; + /* buffer empty - remove self from polling */ + if (stdinbufpos == 0) + pfd[POLL_NETOUT].events = 0; + /* buffer no longer full - poll stdin again */ + if (stdinbufpos < BUFSIZE) + pfd[POLL_STDIN].events = POLLIN; + } + /* try to read from network */ + if (pfd[POLL_NETIN].revents & POLLIN && netinbufpos < BUFSIZE) { + ret = fillbuf(pfd[POLL_NETIN].fd, netinbuf, + &netinbufpos, tls_ctx); + if (ret == TLS_WANT_POLLIN) + pfd[POLL_NETIN].events = POLLIN; + else if (ret == TLS_WANT_POLLOUT) + pfd[POLL_NETIN].events = POLLOUT; + else if (ret == -1) + pfd[POLL_NETIN].fd = -1; + /* eof on net in - remove from pfd */ + if (ret == 0) { + shutdown(pfd[POLL_NETIN].fd, SHUT_RD); + pfd[POLL_NETIN].fd = -1; + } + if (recvlimit > 0 && ++recvcount >= recvlimit) { + if (pfd[POLL_NETIN].fd != -1) + shutdown(pfd[POLL_NETIN].fd, SHUT_RD); + pfd[POLL_NETIN].fd = -1; + pfd[POLL_STDIN].fd = -1; + } + /* read something - poll stdout */ + if (netinbufpos > 0) + pfd[POLL_STDOUT].events = POLLOUT; + /* filled buffer - remove self from polling */ + if (netinbufpos == BUFSIZE) + pfd[POLL_NETIN].events = 0; + /* handle telnet */ + if (tflag) + atelnet(pfd[POLL_NETIN].fd, netinbuf, + netinbufpos); + } + /* try to write to stdout */ + if (pfd[POLL_STDOUT].revents & POLLOUT && netinbufpos > 0) { + ret = drainbuf(pfd[POLL_STDOUT].fd, netinbuf, + &netinbufpos, NULL); + if (ret == TLS_WANT_POLLIN) + pfd[POLL_STDOUT].events = POLLIN; + else if (ret == TLS_WANT_POLLOUT) + pfd[POLL_STDOUT].events = POLLOUT; + else if (ret == -1) + pfd[POLL_STDOUT].fd = -1; + /* buffer empty - remove self from polling */ + if (netinbufpos == 0) + pfd[POLL_STDOUT].events = 0; + /* buffer no longer full - poll net in again */ + if (netinbufpos < BUFSIZE) + pfd[POLL_NETIN].events = POLLIN; + } + + /* stdin gone and queue empty? */ + if (pfd[POLL_STDIN].fd == -1 && stdinbufpos == 0) { + if (pfd[POLL_NETOUT].fd != -1 && Nflag) + shutdown(pfd[POLL_NETOUT].fd, SHUT_WR); + pfd[POLL_NETOUT].fd = -1; + } + /* net in gone and queue empty? */ + if (pfd[POLL_NETIN].fd == -1 && netinbufpos == 0) { + pfd[POLL_STDOUT].fd = -1; + } + } +} + +ssize_t +drainbuf(int fd, unsigned char *buf, size_t *bufpos, struct tls *tls) +{ + ssize_t n; + ssize_t adjust; + + if (tls) { + n = tls_write(tls, buf, *bufpos); + if (n == -1) + errx(1, "tls write failed (%s)", tls_error(tls)); + } else { + n = write(fd, buf, *bufpos); + /* don't treat EAGAIN, EINTR as error */ + if (n == -1 && (errno == EAGAIN || errno == EINTR)) + n = TLS_WANT_POLLOUT; + } + if (n <= 0) + return n; + /* adjust buffer */ + adjust = *bufpos - n; + if (adjust > 0) + memmove(buf, buf + n, adjust); + *bufpos -= n; + return n; +} + +ssize_t +fillbuf(int fd, unsigned char *buf, size_t *bufpos, struct tls *tls) +{ + size_t num = BUFSIZE - *bufpos; + ssize_t n; + + if (tls) { + n = tls_read(tls, buf + *bufpos, num); + if (n == -1) + errx(1, "tls read failed (%s)", tls_error(tls)); + } else { + n = read(fd, buf + *bufpos, num); + /* don't treat EAGAIN, EINTR as error */ + if (n == -1 && (errno == EAGAIN || errno == EINTR)) + n = TLS_WANT_POLLIN; + } + if (n <= 0) + return n; + *bufpos += n; + return n; +} + +/* + * fdpass() + * Pass the connected file descriptor to stdout and exit. + */ +void +fdpass(int nfd) +{ + struct msghdr mh; + union { + struct cmsghdr hdr; + char buf[CMSG_SPACE(sizeof(int))]; + } cmsgbuf; + struct cmsghdr *cmsg; + struct iovec iov; + char c = '\0'; + ssize_t r; + struct pollfd pfd; + + /* Avoid obvious stupidity */ + if (isatty(STDOUT_FILENO)) + errx(1, "Cannot pass file descriptor to tty"); + + memset(&mh, 0, sizeof(mh)); + memset(&cmsgbuf, 0, sizeof(cmsgbuf)); + memset(&iov, 0, sizeof(iov)); + + mh.msg_control = (caddr_t)&cmsgbuf.buf; + mh.msg_controllen = sizeof(cmsgbuf.buf); + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + *(int *)CMSG_DATA(cmsg) = nfd; + + iov.iov_base = &c; + iov.iov_len = 1; + mh.msg_iov = &iov; + mh.msg_iovlen = 1; + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = STDOUT_FILENO; + pfd.events = POLLOUT; + for (;;) { + r = sendmsg(STDOUT_FILENO, &mh, 0); + if (r == -1) { + if (errno == EAGAIN || errno == EINTR) { + if (poll(&pfd, 1, -1) == -1) + err(1, "poll"); + continue; + } + err(1, "sendmsg"); + } else if (r != 1) + errx(1, "sendmsg: unexpected return value %zd", r); + else + break; + } + exit(0); +} + +/* Deal with RFC 854 WILL/WONT DO/DONT negotiation. */ +void +atelnet(int nfd, unsigned char *buf, unsigned int size) +{ + unsigned char *p, *end; + unsigned char obuf[4]; + + if (size < 3) + return; + end = buf + size - 2; + + for (p = buf; p < end; p++) { + if (*p != IAC) + continue; + + obuf[0] = IAC; + p++; + if ((*p == WILL) || (*p == WONT)) + obuf[1] = DONT; + else if ((*p == DO) || (*p == DONT)) + obuf[1] = WONT; + else + continue; + + p++; + obuf[2] = *p; + if (atomicio(vwrite, nfd, obuf, 3) != 3) + warn("Write Error!"); + } +} + + +int +strtoport(char *portstr, int udp) +{ + struct servent *entry; + const char *errstr; + char *proto; + int port = -1; + + proto = udp ? "udp" : "tcp"; + + port = strtonum(portstr, 1, PORT_MAX, &errstr); + if (errstr == NULL) + return port; + if (errno != EINVAL) + errx(1, "port number %s: %s", errstr, portstr); + if ((entry = getservbyname(portstr, proto)) == NULL) + errx(1, "service \"%s\" unknown", portstr); + return ntohs(entry->s_port); +} + +/* + * build_ports() + * Build an array of ports in portlist[], listing each port + * that we should try to connect to. + */ +void +build_ports(char *p) +{ + char *n; + int hi, lo, cp; + int x = 0; + + if (isdigit((unsigned char)*p) && (n = strchr(p, '-')) != NULL) { + *n = '\0'; + n++; + + /* Make sure the ports are in order: lowest->highest. */ + hi = strtoport(n, uflag); + lo = strtoport(p, uflag); + if (lo > hi) { + cp = hi; + hi = lo; + lo = cp; + } + + /* + * Initialize portlist with a random permutation. Based on + * Knuth, as in ip_randomid() in sys/netinet/ip_id.c. + */ + if (rflag) { + for (x = 0; x <= hi - lo; x++) { + cp = arc4random_uniform(x + 1); + portlist[x] = portlist[cp]; + if (asprintf(&portlist[cp], "%d", x + lo) == -1) + err(1, "asprintf"); + } + } else { /* Load ports sequentially. */ + for (cp = lo; cp <= hi; cp++) { + if (asprintf(&portlist[x], "%d", cp) == -1) + err(1, "asprintf"); + x++; + } + } + } else { + char *tmp; + + hi = strtoport(p, uflag); + if (asprintf(&tmp, "%d", hi) != -1) + portlist[0] = tmp; + else + err(1, NULL); + } +} + +/* + * udptest() + * Do a few writes to see if the UDP port is there. + * Fails once PF state table is full. + */ +int +udptest(int s) +{ + int i, ret; + + for (i = 0; i <= 3; i++) { + if (write(s, "X", 1) == 1) + ret = 1; + else + ret = -1; + } + return ret; +} + +void +set_common_sockopts(int s, int af) +{ + int x = 1; + +#ifdef TCP_MD5SIG + if (Sflag) { + if (setsockopt(s, IPPROTO_TCP, TCP_MD5SIG, + &x, sizeof(x)) == -1) + err(1, NULL); + } +#endif + if (Dflag) { + if (setsockopt(s, SOL_SOCKET, SO_DEBUG, + &x, sizeof(x)) == -1) + err(1, NULL); + } + if (Tflag != -1) { + if (af == AF_INET && setsockopt(s, IPPROTO_IP, + IP_TOS, &Tflag, sizeof(Tflag)) == -1) + err(1, "set IP ToS"); + +#ifdef IPV6_TCLASS + else if (af == AF_INET6 && setsockopt(s, IPPROTO_IPV6, + IPV6_TCLASS, &Tflag, sizeof(Tflag)) == -1) + err(1, "set IPv6 traffic class"); +#else + else if (af == AF_INET6) { + errno = ENOPROTOOPT; + err(1, "set IPv6 traffic class not supported"); + } +#endif + } + if (Iflag) { + if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, + &Iflag, sizeof(Iflag)) == -1) + err(1, "set TCP receive buffer size"); + } + if (Oflag) { + if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, + &Oflag, sizeof(Oflag)) == -1) + err(1, "set TCP send buffer size"); + } + + if (ttl != -1) { + if (af == AF_INET && setsockopt(s, IPPROTO_IP, + IP_TTL, &ttl, sizeof(ttl))) + err(1, "set IP TTL"); + + else if (af == AF_INET6 && setsockopt(s, IPPROTO_IPV6, + IPV6_UNICAST_HOPS, &ttl, sizeof(ttl))) + err(1, "set IPv6 unicast hops"); + } + + if (minttl != -1) { +#ifdef IP_MINTTL + if (af == AF_INET && setsockopt(s, IPPROTO_IP, + IP_MINTTL, &minttl, sizeof(minttl))) + err(1, "set IP min TTL"); +#endif + +#ifdef IPV6_MINHOPCOUNT + if (af == AF_INET6 && setsockopt(s, IPPROTO_IPV6, + IPV6_MINHOPCOUNT, &minttl, sizeof(minttl))) + err(1, "set IPv6 min hop count"); +#endif + } +} + +int +process_tos_opt(char *s, int *val) +{ + /* DiffServ Codepoints and other TOS mappings */ + const struct toskeywords { + const char *keyword; + int val; + } *t, toskeywords[] = { + { "af11", IPTOS_DSCP_AF11 }, + { "af12", IPTOS_DSCP_AF12 }, + { "af13", IPTOS_DSCP_AF13 }, + { "af21", IPTOS_DSCP_AF21 }, + { "af22", IPTOS_DSCP_AF22 }, + { "af23", IPTOS_DSCP_AF23 }, + { "af31", IPTOS_DSCP_AF31 }, + { "af32", IPTOS_DSCP_AF32 }, + { "af33", IPTOS_DSCP_AF33 }, + { "af41", IPTOS_DSCP_AF41 }, + { "af42", IPTOS_DSCP_AF42 }, + { "af43", IPTOS_DSCP_AF43 }, + { "critical", IPTOS_PREC_CRITIC_ECP }, + { "cs0", IPTOS_DSCP_CS0 }, + { "cs1", IPTOS_DSCP_CS1 }, + { "cs2", IPTOS_DSCP_CS2 }, + { "cs3", IPTOS_DSCP_CS3 }, + { "cs4", IPTOS_DSCP_CS4 }, + { "cs5", IPTOS_DSCP_CS5 }, + { "cs6", IPTOS_DSCP_CS6 }, + { "cs7", IPTOS_DSCP_CS7 }, + { "ef", IPTOS_DSCP_EF }, + { "inetcontrol", IPTOS_PREC_INTERNETCONTROL }, + { "lowdelay", IPTOS_LOWDELAY }, + { "netcontrol", IPTOS_PREC_NETCONTROL }, + { "reliability", IPTOS_RELIABILITY }, + { "throughput", IPTOS_THROUGHPUT }, + { NULL, -1 }, + }; + + for (t = toskeywords; t->keyword != NULL; t++) { + if (strcmp(s, t->keyword) == 0) { + *val = t->val; + return 1; + } + } + + return 0; +} + +int +process_tls_opt(char *s, int *flags) +{ + size_t len; + char *v; + + const struct tlskeywords { + const char *keyword; + int flag; + char **value; + } *t, tlskeywords[] = { + { "ciphers", -1, &tls_ciphers }, + { "clientcert", TLS_CCERT, NULL }, + { "muststaple", TLS_MUSTSTAPLE, NULL }, + { "noverify", TLS_NOVERIFY, NULL }, + { "noname", TLS_NONAME, NULL }, + { "protocols", -1, &tls_protocols }, + { NULL, -1, NULL }, + }; + + len = strlen(s); + if ((v = strchr(s, '=')) != NULL) { + len = v - s; + v++; + } + + for (t = tlskeywords; t->keyword != NULL; t++) { + if (strlen(t->keyword) == len && + strncmp(s, t->keyword, len) == 0) { + if (t->value != NULL) { + if (v == NULL) + errx(1, "invalid tls value `%s'", s); + *t->value = v; + } else { + *flags |= t->flag; + } + return 1; + } + } + return 0; +} + +void +save_peer_cert(struct tls *tls_ctx, FILE *fp) +{ + const char *pem; + size_t plen; + + if ((pem = tls_peer_cert_chain_pem(tls_ctx, &plen)) == NULL) + errx(1, "Can't get peer certificate"); + if (fprintf(fp, "%.*s", (int)plen, pem) < 0) + err(1, "unable to save peer cert"); + if (fflush(fp) != 0) + err(1, "unable to flush peer cert"); +} + +void +report_tls(struct tls * tls_ctx, char * host) +{ + time_t t; + const char *ocsp_url; + + fprintf(stderr, "TLS handshake negotiated %s/%s with host %s\n", + tls_conn_version(tls_ctx), tls_conn_cipher(tls_ctx), host); + fprintf(stderr, "Peer name: %s\n", + tls_expectname ? tls_expectname : host); + if (tls_peer_cert_subject(tls_ctx)) + fprintf(stderr, "Subject: %s\n", + tls_peer_cert_subject(tls_ctx)); + if (tls_peer_cert_issuer(tls_ctx)) + fprintf(stderr, "Issuer: %s\n", + tls_peer_cert_issuer(tls_ctx)); + if ((t = tls_peer_cert_notbefore(tls_ctx)) != -1) + fprintf(stderr, "Valid From: %s", ctime(&t)); + if ((t = tls_peer_cert_notafter(tls_ctx)) != -1) + fprintf(stderr, "Valid Until: %s", ctime(&t)); + if (tls_peer_cert_hash(tls_ctx)) + fprintf(stderr, "Cert Hash: %s\n", + tls_peer_cert_hash(tls_ctx)); + ocsp_url = tls_peer_ocsp_url(tls_ctx); + if (ocsp_url != NULL) + fprintf(stderr, "OCSP URL: %s\n", ocsp_url); + switch (tls_peer_ocsp_response_status(tls_ctx)) { + case TLS_OCSP_RESPONSE_SUCCESSFUL: + fprintf(stderr, "OCSP Stapling: %s\n", + tls_peer_ocsp_result(tls_ctx) == NULL ? "" : + tls_peer_ocsp_result(tls_ctx)); + fprintf(stderr, + " response_status=%d cert_status=%d crl_reason=%d\n", + tls_peer_ocsp_response_status(tls_ctx), + tls_peer_ocsp_cert_status(tls_ctx), + tls_peer_ocsp_crl_reason(tls_ctx)); + t = tls_peer_ocsp_this_update(tls_ctx); + fprintf(stderr, " this update: %s", + t != -1 ? ctime(&t) : "\n"); + t = tls_peer_ocsp_next_update(tls_ctx); + fprintf(stderr, " next update: %s", + t != -1 ? ctime(&t) : "\n"); + t = tls_peer_ocsp_revocation_time(tls_ctx); + fprintf(stderr, " revocation: %s", + t != -1 ? ctime(&t) : "\n"); + break; + case -1: + break; + default: + fprintf(stderr, "OCSP Stapling: failure - response_status %d (%s)\n", + tls_peer_ocsp_response_status(tls_ctx), + tls_peer_ocsp_result(tls_ctx) == NULL ? "" : + tls_peer_ocsp_result(tls_ctx)); + break; + + } +} + +void +report_sock(const char *msg, const struct sockaddr *sa, socklen_t salen, + char *path) +{ + char host[NI_MAXHOST], port[NI_MAXSERV]; + int herr; + int flags = NI_NUMERICSERV; + + if (path != NULL) { + fprintf(stderr, "%s on %s\n", msg, path); + return; + } + + if (nflag) + flags |= NI_NUMERICHOST; + + herr = getnameinfo(sa, salen, host, sizeof(host), port, sizeof(port), + flags); + switch (herr) { + case 0: + break; + case EAI_SYSTEM: + err(1, "getnameinfo"); + default: + errx(1, "getnameinfo: %s", gai_strerror(herr)); + } + + fprintf(stderr, "%s on %s %s\n", msg, host, port); +} + +void +help(void) +{ + usage(0); + fprintf(stderr, "\tCommand Summary:\n\ + \t-4 Use IPv4\n\ + \t-6 Use IPv6\n\ + \t-C certfile Public key file\n\ + \t-c Use TLS\n\ + \t-D Enable the debug socket option\n\ + \t-d Detach from stdin\n\ + \t-e name\t Required name in peer certificate\n\ + \t-F Pass socket fd\n\ + \t-H hash\t Hash string of peer certificate\n\ + \t-h This help text\n\ + \t-I length TCP receive buffer length\n\ + \t-i interval Delay interval for lines sent, ports scanned\n\ + \t-K keyfile Private key file\n\ + \t-k Keep inbound sockets open for multiple connects\n\ + \t-l Listen mode, for inbound connects\n\ + \t-M ttl Outgoing TTL / Hop Limit\n\ + \t-m minttl Minimum incoming TTL / Hop Limit\n\ + \t-N Shutdown the network socket after EOF on stdin\n\ + \t-n Suppress name/port resolutions\n\ + \t-O length TCP send buffer length\n\ + \t-o staplefile Staple file\n\ + \t-P proxyuser\tUsername for proxy authentication\n\ + \t-p port\t Specify local port for remote connects\n\ + \t-R CAfile CA bundle\n\ + \t-r Randomize remote ports\n" +#ifdef TCP_MD5SIG + "\ + \t-S Enable the TCP MD5 signature option\n" +#endif + "\ + \t-s sourceaddr Local source address\n\ + \t-T keyword TOS value or TLS options\n\ + \t-t Answer TELNET negotiation\n\ + \t-U Use UNIX domain socket\n\ + \t-u UDP mode\n" +#ifdef SO_RTABLE + "\ + \t-V rtable Specify alternate routing table\n" +#endif + "\ + \t-v Verbose\n\ + \t-W recvlimit Terminate after receiving a number of packets\n\ + \t-w timeout Timeout for connects and final net reads\n\ + \t-X proto Proxy protocol: \"4\", \"5\" (SOCKS) or \"connect\"\n\ + \t-x addr[:port]\tSpecify proxy address and port\n\ + \t-Z Peer certificate file\n\ + \t-z Zero-I/O mode [used for scanning]\n\ + Port numbers can be individual or ranges: lo-hi [inclusive]\n"); + exit(1); +} + +void +usage(int ret) +{ + fprintf(stderr, + "usage: nc [-46cDdFhklNnrStUuvz] [-C certfile] [-e name] " + "[-H hash] [-I length]\n" + "\t [-i interval] [-K keyfile] [-M ttl] [-m minttl] [-O length]\n" + "\t [-o staplefile] [-P proxy_username] [-p source_port] " + "[-R CAfile]\n" + "\t [-s sourceaddr] [-T keyword] [-V rtable] [-W recvlimit] " + "[-w timeout]\n" + "\t [-X proxy_protocol] [-x proxy_address[:port]] " + "[-Z peercertfile]\n" + "\t [destination] [port]\n"); + if (ret) + exit(1); +} diff --git a/usr.bin/nc/socks.c b/usr.bin/nc/socks.c new file mode 100644 index 0000000..9766be7 --- /dev/null +++ b/usr.bin/nc/socks.c @@ -0,0 +1,400 @@ +/* $OpenBSD: socks.c,v 1.30 2019/11/04 17:33:28 millert Exp $ */ + +/* + * Copyright (c) 1999 Niklas Hallqvist. All rights reserved. + * Copyright (c) 2004, 2005 Damien Miller. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <err.h> +#include <errno.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <resolv.h> +#include <readpassphrase.h> +#include "atomicio.h" + +#define SOCKS_PORT "1080" +#define HTTP_PROXY_PORT "3128" +#define HTTP_MAXHDRS 64 +#define SOCKS_V5 5 +#define SOCKS_V4 4 +#define SOCKS_NOAUTH 0 +#define SOCKS_NOMETHOD 0xff +#define SOCKS_CONNECT 1 +#define SOCKS_IPV4 1 +#define SOCKS_DOMAIN 3 +#define SOCKS_IPV6 4 + +int remote_connect(const char *, const char *, struct addrinfo, char *); +int socks_connect(const char *, const char *, struct addrinfo, + const char *, const char *, struct addrinfo, int, + const char *); + +static int +decode_addrport(const char *h, const char *p, struct sockaddr *addr, + socklen_t addrlen, int v4only, int numeric) +{ + int r; + struct addrinfo hints, *res; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = v4only ? PF_INET : PF_UNSPEC; + hints.ai_flags = numeric ? AI_NUMERICHOST : 0; + hints.ai_socktype = SOCK_STREAM; + r = getaddrinfo(h, p, &hints, &res); + /* Don't fatal when attempting to convert a numeric address */ + if (r != 0) { + if (!numeric) { + errx(1, "getaddrinfo(\"%.64s\", \"%.64s\"): %s", h, p, + gai_strerror(r)); + } + return (-1); + } + if (addrlen < res->ai_addrlen) { + freeaddrinfo(res); + errx(1, "internal error: addrlen < res->ai_addrlen"); + } + memcpy(addr, res->ai_addr, res->ai_addrlen); + freeaddrinfo(res); + return (0); +} + +static int +proxy_read_line(int fd, char *buf, size_t bufsz) +{ + size_t off; + + for(off = 0;;) { + if (off >= bufsz) + errx(1, "proxy read too long"); + if (atomicio(read, fd, buf + off, 1) != 1) + err(1, "proxy read"); + /* Skip CR */ + if (buf[off] == '\r') + continue; + if (buf[off] == '\n') { + buf[off] = '\0'; + break; + } + off++; + } + return (off); +} + +static void +getproxypass(const char *proxyuser, const char *proxyhost, + char *pw, size_t pwlen) +{ + char prompt[512]; + + snprintf(prompt, sizeof(prompt), "Proxy password for %s@%s: ", + proxyuser, proxyhost); + if (readpassphrase(prompt, pw, pwlen, RPP_REQUIRE_TTY) == NULL) + errx(1, "Unable to read proxy passphrase"); +} + +/* + * Error strings adapted from the generally accepted SOCKSv4 spec: + * + * http://ftp.icm.edu.pl/packages/socks/socks4/SOCKS4.protocol + */ +static const char * +socks4_strerror(int e) +{ + switch (e) { + case 90: + return "Succeeded"; + case 91: + return "Request rejected or failed"; + case 92: + return "SOCKS server cannot connect to identd on the client"; + case 93: + return "Client program and identd report different user-ids"; + default: + return "Unknown error"; + } +} + +/* + * Error strings taken almost directly from RFC 1928. + */ +static const char * +socks5_strerror(int e) +{ + switch (e) { + case 0: + return "Succeeded"; + case 1: + return "General SOCKS server failure"; + case 2: + return "Connection not allowed by ruleset"; + case 3: + return "Network unreachable"; + case 4: + return "Host unreachable"; + case 5: + return "Connection refused"; + case 6: + return "TTL expired"; + case 7: + return "Command not supported"; + case 8: + return "Address type not supported"; + default: + return "Unknown error"; + } +} + +int +socks_connect(const char *host, const char *port, + struct addrinfo hints __attribute__ ((__unused__)), + const char *proxyhost, const char *proxyport, struct addrinfo proxyhints, + int socksv, const char *proxyuser) +{ + int proxyfd, r, authretry = 0; + size_t hlen, wlen; + unsigned char buf[1024]; + size_t cnt; + struct sockaddr_storage addr; + struct sockaddr_in *in4 = (struct sockaddr_in *)&addr; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&addr; + in_port_t serverport; + + if (proxyport == NULL) + proxyport = (socksv == -1) ? HTTP_PROXY_PORT : SOCKS_PORT; + + /* Abuse API to lookup port */ + if (decode_addrport("0.0.0.0", port, (struct sockaddr *)&addr, + sizeof(addr), 1, 1) == -1) + errx(1, "unknown port \"%.64s\"", port); + serverport = in4->sin_port; + + again: + if (authretry++ > 3) + errx(1, "Too many authentication failures"); + + proxyfd = remote_connect(proxyhost, proxyport, proxyhints, NULL); + + if (proxyfd < 0) + return (-1); + + if (socksv == 5) { + if (decode_addrport(host, port, (struct sockaddr *)&addr, + sizeof(addr), 0, 1) == -1) + addr.ss_family = 0; /* used in switch below */ + + /* Version 5, one method: no authentication */ + buf[0] = SOCKS_V5; + buf[1] = 1; + buf[2] = SOCKS_NOAUTH; + cnt = atomicio(vwrite, proxyfd, buf, 3); + if (cnt != 3) + err(1, "write failed (%zu/3)", cnt); + + cnt = atomicio(read, proxyfd, buf, 2); + if (cnt != 2) + err(1, "read failed (%zu/3)", cnt); + + if (buf[1] == SOCKS_NOMETHOD) + errx(1, "authentication method negotiation failed"); + + switch (addr.ss_family) { + case 0: + /* Version 5, connect: domain name */ + + /* Max domain name length is 255 bytes */ + hlen = strlen(host); + if (hlen > 255) + errx(1, "host name too long for SOCKS5"); + buf[0] = SOCKS_V5; + buf[1] = SOCKS_CONNECT; + buf[2] = 0; + buf[3] = SOCKS_DOMAIN; + buf[4] = hlen; + memcpy(buf + 5, host, hlen); + memcpy(buf + 5 + hlen, &serverport, sizeof serverport); + wlen = 7 + hlen; + break; + case AF_INET: + /* Version 5, connect: IPv4 address */ + buf[0] = SOCKS_V5; + buf[1] = SOCKS_CONNECT; + buf[2] = 0; + buf[3] = SOCKS_IPV4; + memcpy(buf + 4, &in4->sin_addr, sizeof in4->sin_addr); + memcpy(buf + 8, &in4->sin_port, sizeof in4->sin_port); + wlen = 10; + break; + case AF_INET6: + /* Version 5, connect: IPv6 address */ + buf[0] = SOCKS_V5; + buf[1] = SOCKS_CONNECT; + buf[2] = 0; + buf[3] = SOCKS_IPV6; + memcpy(buf + 4, &in6->sin6_addr, sizeof in6->sin6_addr); + memcpy(buf + 20, &in6->sin6_port, + sizeof in6->sin6_port); + wlen = 22; + break; + default: + errx(1, "internal error: silly AF"); + } + + cnt = atomicio(vwrite, proxyfd, buf, wlen); + if (cnt != wlen) + err(1, "write failed (%zu/%zu)", cnt, wlen); + + cnt = atomicio(read, proxyfd, buf, 4); + if (cnt != 4) + err(1, "read failed (%zu/4)", cnt); + if (buf[1] != 0) { + errx(1, "connection failed, SOCKSv5 error: %s", + socks5_strerror(buf[1])); + } + switch (buf[3]) { + case SOCKS_IPV4: + cnt = atomicio(read, proxyfd, buf + 4, 6); + if (cnt != 6) + err(1, "read failed (%zu/6)", cnt); + break; + case SOCKS_IPV6: + cnt = atomicio(read, proxyfd, buf + 4, 18); + if (cnt != 18) + err(1, "read failed (%zu/18)", cnt); + break; + default: + errx(1, "connection failed, unsupported address type"); + } + } else if (socksv == 4) { + /* This will exit on lookup failure */ + decode_addrport(host, port, (struct sockaddr *)&addr, + sizeof(addr), 1, 0); + + /* Version 4 */ + buf[0] = SOCKS_V4; + buf[1] = SOCKS_CONNECT; /* connect */ + memcpy(buf + 2, &in4->sin_port, sizeof in4->sin_port); + memcpy(buf + 4, &in4->sin_addr, sizeof in4->sin_addr); + buf[8] = 0; /* empty username */ + wlen = 9; + + cnt = atomicio(vwrite, proxyfd, buf, wlen); + if (cnt != wlen) + err(1, "write failed (%zu/%zu)", cnt, wlen); + + cnt = atomicio(read, proxyfd, buf, 8); + if (cnt != 8) + err(1, "read failed (%zu/8)", cnt); + if (buf[1] != 90) { + errx(1, "connection failed, SOCKSv4 error: %s", + socks4_strerror(buf[1])); + } + } else if (socksv == -1) { + /* HTTP proxy CONNECT */ + + /* Disallow bad chars in hostname */ + if (strcspn(host, "\r\n\t []:") != strlen(host)) + errx(1, "Invalid hostname"); + + /* Try to be sane about numeric IPv6 addresses */ + if (strchr(host, ':') != NULL) { + r = snprintf(buf, sizeof(buf), + "CONNECT [%s]:%d HTTP/1.0\r\n", + host, ntohs(serverport)); + } else { + r = snprintf(buf, sizeof(buf), + "CONNECT %s:%d HTTP/1.0\r\n", + host, ntohs(serverport)); + } + if (r < 0 || (size_t)r >= sizeof(buf)) + errx(1, "hostname too long"); + r = strlen(buf); + + cnt = atomicio(vwrite, proxyfd, buf, r); + if (cnt != r) + err(1, "write failed (%zu/%d)", cnt, r); + + if (authretry > 1) { + char proxypass[256]; + char resp[1024]; + + getproxypass(proxyuser, proxyhost, + proxypass, sizeof proxypass); + r = snprintf(buf, sizeof(buf), "%s:%s", + proxyuser, proxypass); + explicit_bzero(proxypass, sizeof proxypass); + if (r == -1 || (size_t)r >= sizeof(buf) || + b64_ntop(buf, strlen(buf), resp, + sizeof(resp)) == -1) + errx(1, "Proxy username/password too long"); + r = snprintf(buf, sizeof(buf), "Proxy-Authorization: " + "Basic %s\r\n", resp); + if (r < 0 || (size_t)r >= sizeof(buf)) + errx(1, "Proxy auth response too long"); + r = strlen(buf); + if ((cnt = atomicio(vwrite, proxyfd, buf, r)) != r) + err(1, "write failed (%zu/%d)", cnt, r); + explicit_bzero(proxypass, sizeof proxypass); + explicit_bzero(buf, sizeof buf); + } + + /* Terminate headers */ + if ((cnt = atomicio(vwrite, proxyfd, "\r\n", 2)) != 2) + err(1, "write failed (%zu/2)", cnt); + + /* Read status reply */ + proxy_read_line(proxyfd, buf, sizeof(buf)); + if (proxyuser != NULL && + (strncmp(buf, "HTTP/1.0 407 ", 12) == 0 || + strncmp(buf, "HTTP/1.1 407 ", 12) == 0)) { + if (authretry > 1) { + fprintf(stderr, "Proxy authentication " + "failed\n"); + } + close(proxyfd); + goto again; + } else if (strncmp(buf, "HTTP/1.0 200 ", 12) != 0 && + strncmp(buf, "HTTP/1.1 200 ", 12) != 0) + errx(1, "Proxy error: \"%s\"", buf); + + /* Headers continue until we hit an empty line */ + for (r = 0; r < HTTP_MAXHDRS; r++) { + proxy_read_line(proxyfd, buf, sizeof(buf)); + if (*buf == '\0') + break; + } + if (*buf != '\0') + errx(1, "Too many proxy headers received"); + } else + errx(1, "Unknown proxy protocol %d", socksv); + + return (proxyfd); +} diff --git a/usr.bin/patch/CVS/Entries b/usr.bin/patch/CVS/Entries new file mode 100644 index 0000000..aab1841 --- /dev/null +++ b/usr.bin/patch/CVS/Entries @@ -0,0 +1,16 @@ +/Makefile/1.5/Fri Oct 16 07:33:47 2015// +/backupfile.c/1.21/Tue Nov 26 13:19:07 2013// +/backupfile.h/1.6/Mon Jul 28 18:35:36 2003// +/common.h/1.30/Mon Dec 2 22:17:32 2019// +/ed.c/1.4/Mon Dec 2 22:17:32 2019// +/ed.h/1.1/Fri Oct 16 07:33:47 2015// +/inp.c/1.49/Fri Jun 28 13:35:02 2019// +/inp.h/1.8/Fri Aug 15 08:00:51 2003// +/mkpath.c/1.4/Tue May 20 01:25:23 2014// +/patch.1/1.32/Fri Jun 22 15:37:15 2018// +/patch.c/1.69/Mon Dec 2 22:17:32 2019// +/pch.c/1.62/Mon Dec 2 22:23:19 2019// +/pch.h/1.13/Wed Dec 11 20:10:17 2019// +/util.c/1.45/Mon Dec 2 22:17:32 2019// +/util.h/1.18/Sat Aug 17 14:25:06 2019// +D diff --git a/usr.bin/patch/CVS/Repository b/usr.bin/patch/CVS/Repository new file mode 100644 index 0000000..f6f2ebb --- /dev/null +++ b/usr.bin/patch/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/patch diff --git a/usr.bin/patch/CVS/Root b/usr.bin/patch/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/patch/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/patch/Makefile b/usr.bin/patch/Makefile new file mode 100644 index 0000000..7ff9ec2 --- /dev/null +++ b/usr.bin/patch/Makefile @@ -0,0 +1,6 @@ +# $OpenBSD: Makefile,v 1.5 2015/10/16 07:33:47 tobias Exp $ + +PROG= patch +SRCS= patch.c pch.c inp.c util.c backupfile.c mkpath.c ed.c + +.include <bsd.prog.mk> diff --git a/usr.bin/patch/backupfile.c b/usr.bin/patch/backupfile.c new file mode 100644 index 0000000..243d00e --- /dev/null +++ b/usr.bin/patch/backupfile.c @@ -0,0 +1,237 @@ +/* $OpenBSD: backupfile.c,v 1.21 2013/11/26 13:19:07 deraadt Exp $ */ + +/* + * backupfile.c -- make Emacs style backup file names Copyright (C) 1990 Free + * Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * without restriction. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* + * David MacKenzie <djm@ai.mit.edu>. Some algorithms adapted from GNU Emacs. + */ + +#include <ctype.h> +#include <dirent.h> +#include <libgen.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "backupfile.h" + + +#define ISDIGIT(c) (isascii (c) && isdigit (c)) + +/* Which type of backup file names are generated. */ +enum backup_type backup_type = none; + +/* + * The extension added to file names to produce a simple (as opposed to + * numbered) backup file name. + */ +char *simple_backup_suffix = "~"; + +static char *concat(const char *, const char *); +static char *make_version_name(const char *, int); +static int max_backup_version(const char *, const char *); +static int version_number(const char *, const char *, size_t); +static int argmatch(const char *, const char **); +static void invalid_arg(const char *, const char *, int); + +/* + * Return the name of the new backup file for file FILE, allocated with + * malloc. Return 0 if out of memory. FILE must not end with a '/' unless it + * is the root directory. Do not call this function if backup_type == none. + */ +char * +find_backup_file_name(const char *file) +{ + char *path, *base_versions; + int highest_backup; + + if (backup_type == simple) + return concat(file, simple_backup_suffix); + path = strdup(file); + if (path == NULL) + return NULL; + base_versions = concat(basename(path), ".~"); + if (base_versions == NULL) { + free(path); + return NULL; + } + highest_backup = max_backup_version(base_versions, dirname(path)); + free(base_versions); + free(path); + if (backup_type == numbered_existing && highest_backup == 0) + return concat(file, simple_backup_suffix); + return make_version_name(file, highest_backup + 1); +} + +/* + * Return the number of the highest-numbered backup file for file FILE in + * directory DIR. If there are no numbered backups of FILE in DIR, or an + * error occurs reading DIR, return 0. FILE should already have ".~" appended + * to it. + */ +static int +max_backup_version(const char *file, const char *dir) +{ + DIR *dirp; + struct dirent *dp; + int highest_version, this_version; + size_t file_name_length; + + dirp = opendir(dir); + if (dirp == NULL) + return 0; + + highest_version = 0; + file_name_length = strlen(file); + + while ((dp = readdir(dirp)) != NULL) { + if (strlen(dp->d_name) <= file_name_length) + continue; + + this_version = version_number(file, dp->d_name, file_name_length); + if (this_version > highest_version) + highest_version = this_version; + } + closedir(dirp); + return highest_version; +} + +/* + * Return a string, allocated with malloc, containing "FILE.~VERSION~". + * Return 0 if out of memory. + */ +static char * +make_version_name(const char *file, int version) +{ + char *backup_name; + + if (asprintf(&backup_name, "%s.~%d~", file, version) == -1) + return NULL; + return backup_name; +} + +/* + * If BACKUP is a numbered backup of BASE, return its version number; + * otherwise return 0. BASE_LENGTH is the length of BASE. BASE should + * already have ".~" appended to it. + */ +static int +version_number(const char *base, const char *backup, size_t base_length) +{ + int version; + const char *p; + + version = 0; + if (!strncmp(base, backup, base_length) && + ISDIGIT((unsigned char)backup[base_length])) { + for (p = &backup[base_length]; ISDIGIT((unsigned char)*p); ++p) + version = version * 10 + *p - '0'; + if (p[0] != '~' || p[1]) + version = 0; + } + return version; +} + +/* + * Return the newly-allocated concatenation of STR1 and STR2. If out of + * memory, return 0. + */ +static char * +concat(const char *str1, const char *str2) +{ + char *newstr; + + if (asprintf(&newstr, "%s%s", str1, str2) == -1) + return NULL; + return newstr; +} + +/* + * If ARG is an unambiguous match for an element of the null-terminated array + * OPTLIST, return the index in OPTLIST of the matched element, else -1 if it + * does not match any element or -2 if it is ambiguous (is a prefix of more + * than one element). + */ +static int +argmatch(const char *arg, const char **optlist) +{ + int i; /* Temporary index in OPTLIST. */ + size_t arglen; /* Length of ARG. */ + int matchind = -1; /* Index of first nonexact match. */ + int ambiguous = 0; /* If nonzero, multiple nonexact match(es). */ + + arglen = strlen(arg); + + /* Test all elements for either exact match or abbreviated matches. */ + for (i = 0; optlist[i]; i++) { + if (!strncmp(optlist[i], arg, arglen)) { + if (strlen(optlist[i]) == arglen) + /* Exact match found. */ + return i; + else if (matchind == -1) + /* First nonexact match found. */ + matchind = i; + else + /* Second nonexact match found. */ + ambiguous = 1; + } + } + if (ambiguous) + return -2; + else + return matchind; +} + +/* + * Error reporting for argmatch. KIND is a description of the type of entity + * that was being matched. VALUE is the invalid value that was given. PROBLEM + * is the return value from argmatch. + */ +static void +invalid_arg(const char *kind, const char *value, int problem) +{ + fprintf(stderr, "patch: "); + if (problem == -1) + fprintf(stderr, "invalid"); + else /* Assume -2. */ + fprintf(stderr, "ambiguous"); + fprintf(stderr, " %s `%s'\n", kind, value); +} + +static const char *backup_args[] = { + "never", "simple", "nil", "existing", "t", "numbered", 0 +}; + +static enum backup_type backup_types[] = { + simple, simple, numbered_existing, + numbered_existing, numbered, numbered +}; + +/* + * Return the type of backup indicated by VERSION. Unique abbreviations are + * accepted. + */ +enum backup_type +get_version(const char *version) +{ + int i; + + if (version == NULL || *version == '\0') + return numbered_existing; + i = argmatch(version, backup_args); + if (i >= 0) + return backup_types[i]; + invalid_arg("version control type", version, i); + exit(2); +} diff --git a/usr.bin/patch/backupfile.h b/usr.bin/patch/backupfile.h new file mode 100644 index 0000000..7c20ddb --- /dev/null +++ b/usr.bin/patch/backupfile.h @@ -0,0 +1,38 @@ +/* $OpenBSD: backupfile.h,v 1.6 2003/07/28 18:35:36 otto Exp $ */ + +/* + * backupfile.h -- declarations for making Emacs style backup file names + * Copyright (C) 1990 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * without restriction. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +/* When to make backup files. */ +enum backup_type { + /* Never make backups. */ + none, + + /* Make simple backups of every file. */ + simple, + + /* + * Make numbered backups of files that already have numbered backups, + * and simple backups of the others. + */ + numbered_existing, + + /* Make numbered backups of every file. */ + numbered +}; + +extern enum backup_type backup_type; +extern char *simple_backup_suffix; + +char *find_backup_file_name(const char *file); +enum backup_type get_version(const char *version); diff --git a/usr.bin/patch/common.h b/usr.bin/patch/common.h new file mode 100644 index 0000000..8165d5b --- /dev/null +++ b/usr.bin/patch/common.h @@ -0,0 +1,111 @@ +/* $OpenBSD: common.h,v 1.30 2019/12/02 22:17:32 jca Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +#include <sys/types.h> + +#include <limits.h> +#include <stdbool.h> + +#define DEBUGGING + +/* constants */ + +#define MAXHUNKSIZE 100000 /* is this enough lines? */ +#define INITHUNKMAX 125 /* initial dynamic allocation size */ +#define INITLINELEN 8192 +#define BUFFERSIZE 1024 +#define LINENUM_MAX LONG_MAX + +#define ORIGEXT ".orig" +#define REJEXT ".rej" + +/* handy definitions */ + +#define strNE(s1,s2) (strcmp(s1, s2)) +#define strEQ(s1,s2) (!strcmp(s1, s2)) +#define strnNE(s1,s2,l) (strncmp(s1, s2, l)) +#define strnEQ(s1,s2,l) (!strncmp(s1, s2, l)) + +/* typedefs */ + +typedef long LINENUM; /* must be signed */ + +/* globals */ + +extern mode_t filemode; + +extern char *buf; /* general purpose buffer */ +extern size_t bufsz; /* general purpose buffer size */ + +extern bool using_plan_a; /* try to keep everything in memory */ +extern bool out_of_mem; /* ran out of memory in plan a */ + +#define MAXFILEC 2 + +extern char *filearg[MAXFILEC]; +extern bool ok_to_create_file; +extern char *outname; +extern char *origprae; + +extern char *TMPOUTNAME; +extern char *TMPINNAME; +extern char *TMPREJNAME; +extern char *TMPPATNAME; +extern bool toutkeep; +extern bool trejkeep; + +#ifdef DEBUGGING +extern int debug; +#endif + +extern bool force; +extern bool batch; +extern bool verbose; +extern bool reverse; +extern bool noreverse; +extern bool skip_rest_of_patch; +extern int strippath; +extern bool canonicalize; +/* TRUE if -C was specified on command line. */ +extern bool check_only; +extern bool warn_on_invalid_line; +extern bool last_line_missing_eol; + + +#define CONTEXT_DIFF 1 +#define NORMAL_DIFF 2 +#define ED_DIFF 3 +#define NEW_CONTEXT_DIFF 4 +#define UNI_DIFF 5 + +extern int diff_type; +extern char *revision; /* prerequisite revision, if any */ +extern LINENUM input_lines; /* how long is input file in lines */ + +extern int posix; + diff --git a/usr.bin/patch/ed.c b/usr.bin/patch/ed.c new file mode 100644 index 0000000..3b83cb3 --- /dev/null +++ b/usr.bin/patch/ed.c @@ -0,0 +1,336 @@ +/* $OpenBSD: ed.c,v 1.4 2019/12/02 22:17:32 jca Exp $ */ + +/* + * Copyright (c) 2015 Tobias Stoeckmann <tobias@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/queue.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "common.h" +#include "util.h" +#include "pch.h" +#include "inp.h" + +/* states of finite state machine */ +#define FSM_CMD 1 +#define FSM_A 2 +#define FSM_C 3 +#define FSM_D 4 +#define FSM_I 5 +#define FSM_S 6 + +#define SRC_INP 1 /* line's origin is input file */ +#define SRC_PCH 2 /* line's origin is patch file */ + +#define S_PATTERN "/.//" + +static void init_lines(void); +static void free_lines(void); +static struct ed_line *get_line(LINENUM); +static struct ed_line *create_line(off_t); +static int valid_addr(LINENUM, LINENUM); +static int get_command(void); +static void write_lines(char *); + +LIST_HEAD(ed_head, ed_line) head; +struct ed_line { + LIST_ENTRY(ed_line) entries; + int src; + unsigned long subst; + union { + LINENUM lineno; + off_t seek; + } pos; +}; + +static LINENUM first_addr; +static LINENUM second_addr; +static LINENUM line_count; +static struct ed_line *cline; /* current line */ + +void +do_ed_script(void) +{ + off_t linepos; + struct ed_line *nline; + LINENUM i, range; + int fsm; + + init_lines(); + cline = NULL; + fsm = FSM_CMD; + + for (;;) { + linepos = ftello(pfp); + if (pgetline(&buf, &bufsz, pfp) == -1) + break; + p_input_line++; + + if (fsm == FSM_CMD) { + if ((fsm = get_command()) == -1) + break; + + switch (fsm) { + case FSM_C: + case FSM_D: + /* delete lines in specified range */ + if (second_addr == -1) + range = 1; + else + range = second_addr - first_addr + 1; + for (i = 0; i < range; i++) { + nline = LIST_NEXT(cline, entries); + LIST_REMOVE(cline, entries); + free(cline); + cline = nline; + line_count--; + } + cline = get_line(first_addr - 1); + fsm = (fsm == FSM_C) ? FSM_A : FSM_CMD; + break; + case FSM_S: + cline->subst++; + fsm = FSM_CMD; + break; + default: + break; + } + + continue; + } + + if (strcmp(buf, ".\n") == 0) { + fsm = FSM_CMD; + continue; + } + + nline = create_line(linepos); + if (cline == NULL) + LIST_INSERT_HEAD(&head, nline, entries); + else if (fsm == FSM_A) + LIST_INSERT_AFTER(cline, nline, entries); + else + LIST_INSERT_BEFORE(cline, nline, entries); + cline = nline; + line_count++; + fsm = FSM_A; + } + + next_intuit_at(linepos, p_input_line); + + if (skip_rest_of_patch) { + free_lines(); + return; + } + + write_lines(TMPOUTNAME); + free_lines(); + + ignore_signals(); + if (!check_only) { + if (move_file(TMPOUTNAME, outname) < 0) { + toutkeep = true; + chmod(TMPOUTNAME, filemode); + } else + chmod(outname, filemode); + } + set_signals(1); +} + +static int +get_command(void) +{ + char *p; + LINENUM min_addr; + int fsm; + + min_addr = 0; + fsm = -1; + p = buf; + + /* maybe garbage encountered at end of patch */ + if (!isdigit((unsigned char)*p)) + return -1; + + first_addr = strtolinenum(buf, &p); + second_addr = (*p == ',') ? strtolinenum(p + 1, &p) : -1; + + switch (*p++) { + case 'a': + if (second_addr != -1) + fatal("invalid address at line %ld: %s", + p_input_line, buf); + fsm = FSM_A; + break; + case 'c': + fsm = FSM_C; + min_addr = 1; + break; + case 'd': + fsm = FSM_D; + min_addr = 1; + break; + case 'i': + if (second_addr != -1) + fatal("invalid address at line %ld: %s", + p_input_line, buf); + fsm = FSM_I; + break; + case 's': + if (second_addr != -1) + fatal("unsupported address range at line %ld: %s", + p_input_line, buf); + if (strncmp(p, S_PATTERN, sizeof(S_PATTERN) - 1) != 0) + fatal("unsupported substitution at " + "line %ld: %s", p_input_line, buf); + p += sizeof(S_PATTERN) - 1; + fsm = FSM_S; + min_addr = 1; + break; + default: + return -1; + /* NOTREACHED */ + } + + if (*p != '\n') + return -1; + + if (!valid_addr(first_addr, min_addr) || + (second_addr != -1 && !valid_addr(second_addr, first_addr))) + fatal("invalid address at line %ld: %s", p_input_line, buf); + + cline = get_line(first_addr); + + return fsm; +} + +static void +write_lines(char *filename) +{ + FILE *ofp; + char *p; + struct ed_line *line; + off_t linepos; + + linepos = ftello(pfp); + ofp = fopen(filename, "w"); + if (ofp == NULL) + pfatal("can't create %s", filename); + + LIST_FOREACH(line, &head, entries) { + if (line->src == SRC_INP) { + p = ifetch(line->pos.lineno, 0); + /* Note: string is not NUL terminated. */ + for (; *p != '\n'; p++) + if (line->subst != 0) + line->subst--; + else + putc(*p, ofp); + putc('\n', ofp); + } else if (line->src == SRC_PCH) { + fseeko(pfp, line->pos.seek, SEEK_SET); + if (pgetline(&buf, &bufsz, pfp) == -1) + fatal("unexpected end of file"); + p = buf; + if (line->subst != 0) + for (; *p != '\0' && *p != '\n'; p++) + if (line->subst-- == 0) + break; + fputs(p, ofp); + if (strchr(p, '\n') == NULL) + putc('\n', ofp); + } + } + fclose(ofp); + + /* restore patch file position to match p_input_line */ + fseeko(pfp, linepos, SEEK_SET); +} + +/* initialize list with input file */ +static void +init_lines(void) +{ + struct ed_line *line; + LINENUM i; + + LIST_INIT(&head); + for (i = input_lines; i > 0; i--) { + line = malloc(sizeof(*line)); + if (line == NULL) + fatal("cannot allocate memory"); + line->src = SRC_INP; + line->subst = 0; + line->pos.lineno = i; + LIST_INSERT_HEAD(&head, line, entries); + } + line_count = input_lines; +} + +static void +free_lines(void) +{ + struct ed_line *line; + + while (!LIST_EMPTY(&head)) { + line = LIST_FIRST(&head); + LIST_REMOVE(line, entries); + free(line); + } +} + +static struct ed_line * +get_line(LINENUM lineno) +{ + struct ed_line *line; + LINENUM i; + + if (lineno == 0) + return NULL; + + i = 0; + LIST_FOREACH(line, &head, entries) + if (++i == lineno) + return line; + + return NULL; +} + +static struct ed_line * +create_line(off_t seek) +{ + struct ed_line *line; + + line = malloc(sizeof(*line)); + if (line == NULL) + fatal("cannot allocate memory"); + line->src = SRC_PCH; + line->subst = 0; + line->pos.seek = seek; + + return line; +} + +static int +valid_addr(LINENUM lineno, LINENUM min) +{ + return lineno >= min && lineno <= line_count; +} diff --git a/usr.bin/patch/ed.h b/usr.bin/patch/ed.h new file mode 100644 index 0000000..86bf9b9 --- /dev/null +++ b/usr.bin/patch/ed.h @@ -0,0 +1,19 @@ +/* $OpenBSD: ed.h,v 1.1 2015/10/16 07:33:47 tobias Exp $ */ + +/* + * Copyright (c) 2015 Tobias Stoeckmann <tobias@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +void do_ed_script(void); diff --git a/usr.bin/patch/inp.c b/usr.bin/patch/inp.c new file mode 100644 index 0000000..3583814 --- /dev/null +++ b/usr.bin/patch/inp.c @@ -0,0 +1,430 @@ +/* $OpenBSD: inp.c,v 1.49 2019/06/28 13:35:02 deraadt Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +#include <sys/stat.h> +#include <sys/mman.h> + +#include <ctype.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "common.h" +#include "util.h" +#include "pch.h" +#include "inp.h" + + +/* Input-file-with-indexable-lines abstract type */ + +static off_t i_size; /* size of the input file */ +static char *i_womp; /* plan a buffer for entire file */ +static char **i_ptr; /* pointers to lines in i_womp */ + +static int tifd = -1; /* plan b virtual string array */ +static char *tibuf[2]; /* plan b buffers */ +static LINENUM tiline[2] = {-1, -1}; /* 1st line in each buffer */ +static size_t lines_per_buf; /* how many lines per buffer */ +static size_t tibuflen; /* plan b buffer length */ +static size_t tireclen; /* length of records in tmp file */ + +static bool rev_in_string(const char *); +static bool reallocate_lines(size_t *); + +/* returns false if insufficient memory */ +static bool plan_a(const char *); + +static void plan_b(const char *); + +/* New patch--prepare to edit another file. */ + +void +re_input(void) +{ + if (using_plan_a) { + free(i_ptr); + i_ptr = NULL; + if (i_womp != NULL) { + munmap(i_womp, i_size); + i_womp = NULL; + } + i_size = 0; + } else { + using_plan_a = true; /* maybe the next one is smaller */ + close(tifd); + tifd = -1; + free(tibuf[0]); + free(tibuf[1]); + tibuf[0] = tibuf[1] = NULL; + tiline[0] = tiline[1] = -1; + tireclen = 0; + } +} + +/* Construct the line index, somehow or other. */ + +void +scan_input(const char *filename) +{ + if (!plan_a(filename)) + plan_b(filename); + if (verbose) { + say("Patching file %s using Plan %s...\n", filename, + (using_plan_a ? "A" : "B")); + } +} + +static bool +reallocate_lines(size_t *lines_allocatedp) +{ + char **p; + size_t new_size; + + new_size = *lines_allocatedp * 3 / 2; + p = reallocarray(i_ptr, new_size + 2, sizeof(char *)); + if (p == NULL) { /* shucks, it was a near thing */ + munmap(i_womp, i_size); + i_womp = NULL; + free(i_ptr); + i_ptr = NULL; + *lines_allocatedp = 0; + return false; + } + *lines_allocatedp = new_size; + i_ptr = p; + return true; +} + +/* Try keeping everything in memory. */ + +static bool +plan_a(const char *filename) +{ + int ifd, statfailed; + char *p, *s; + struct stat filestat; + off_t i; + ptrdiff_t sz; + size_t iline, lines_allocated; + +#ifdef DEBUGGING + if (debug & 8) + return false; +#endif + + if (filename == NULL || *filename == '\0') + return false; + + statfailed = stat(filename, &filestat); + if (statfailed && ok_to_create_file) { + int fd; + + if (verbose) + say("(Creating file %s...)\n", filename); + + /* + * in check_patch case, we still display `Creating file' even + * though we're not. The rule is that -C should be as similar + * to normal patch behavior as possible + */ + if (check_only) + return true; + makedirs(filename, true); + if ((fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY, 0666)) != -1) + close(fd); + + statfailed = stat(filename, &filestat); + } + if (statfailed) + fatal("can't find %s\n", filename); + filemode = filestat.st_mode; + if (!S_ISREG(filemode)) + fatal("%s is not a normal file--can't patch\n", filename); + i_size = filestat.st_size; + if (out_of_mem) { + set_hunkmax(); /* make sure dynamic arrays are allocated */ + out_of_mem = false; + return false; /* force plan b because plan a bombed */ + } + if (i_size > SIZE_MAX) { + say("block too large to mmap\n"); + return false; + } + if ((ifd = open(filename, O_RDONLY)) == -1) + pfatal("can't open file %s", filename); + + if (i_size) { + i_womp = mmap(NULL, i_size, PROT_READ, MAP_PRIVATE, ifd, 0); + if (i_womp == MAP_FAILED) { + perror("mmap failed"); + i_womp = NULL; + close(ifd); + return false; + } + } else { + i_womp = NULL; + } + + close(ifd); + if (i_size) + madvise(i_womp, i_size, MADV_SEQUENTIAL); + + /* estimate the number of lines */ + lines_allocated = i_size / 25; + if (lines_allocated < 100) + lines_allocated = 100; + + if (!reallocate_lines(&lines_allocated)) + return false; + + /* now scan the buffer and build pointer array */ + iline = 1; + i_ptr[iline] = i_womp; + /* test for NUL too, to maintain the behavior of the original code */ + for (s = i_womp, i = 0; i < i_size && *s != '\0'; s++, i++) { + if (*s == '\n') { + if (iline == lines_allocated) { + if (!reallocate_lines(&lines_allocated)) + return false; + } + /* these are NOT NUL terminated */ + i_ptr[++iline] = s + 1; + } + } + /* if the last line contains no EOL, append one */ + if (i_size > 0 && i_womp[i_size - 1] != '\n') { + last_line_missing_eol = true; + /* fix last line */ + sz = s - i_ptr[iline]; + p = malloc(sz + 1); + if (p == NULL) { + free(i_ptr); + i_ptr = NULL; + munmap(i_womp, i_size); + i_womp = NULL; + return false; + } + + memcpy(p, i_ptr[iline], sz); + p[sz] = '\n'; + i_ptr[iline] = p; + /* count the extra line and make it point to some valid mem */ + i_ptr[++iline] = ""; + } else + last_line_missing_eol = false; + + input_lines = iline - 1; + + /* now check for revision, if any */ + + if (revision != NULL) { + if (i_womp == NULL || !rev_in_string(i_womp)) { + if (force) { + if (verbose) + say("Warning: this file doesn't appear " + "to be the %s version--patching anyway.\n", + revision); + } else if (batch) { + fatal("this file doesn't appear to be the " + "%s version--aborting.\n", + revision); + } else { + ask("This file doesn't appear to be the " + "%s version--patch anyway? [n] ", + revision); + if (*buf != 'y') + fatal("aborted\n"); + } + } else if (verbose) + say("Good. This file appears to be the %s version.\n", + revision); + } + return true; /* plan a will work */ +} + +/* Keep (virtually) nothing in memory. */ + +static void +plan_b(const char *filename) +{ + FILE *ifp; + size_t i = 0, j, len, maxlen = 1; + char *lbuf = NULL, *p; + bool found_revision = (revision == NULL); + + using_plan_a = false; + if ((ifp = fopen(filename, "r")) == NULL) + pfatal("can't open file %s", filename); + (void) unlink(TMPINNAME); + if ((tifd = open(TMPINNAME, O_EXCL | O_CREAT | O_WRONLY, 0666)) == -1) + pfatal("can't open file %s", TMPINNAME); + while ((p = fgetln(ifp, &len)) != NULL) { + if (p[len - 1] == '\n') + p[len - 1] = '\0'; + else { + /* EOF without EOL, copy and add the NUL */ + if ((lbuf = malloc(len + 1)) == NULL) + fatal("out of memory\n"); + memcpy(lbuf, p, len); + lbuf[len] = '\0'; + p = lbuf; + + last_line_missing_eol = true; + len++; + } + if (revision != NULL && !found_revision && rev_in_string(p)) + found_revision = true; + if (len > maxlen) + maxlen = len; /* find longest line */ + } + free(lbuf); + if (ferror(ifp)) + pfatal("can't read file %s", filename); + + if (revision != NULL) { + if (!found_revision) { + if (force) { + if (verbose) + say("Warning: this file doesn't appear " + "to be the %s version--patching anyway.\n", + revision); + } else if (batch) { + fatal("this file doesn't appear to be the " + "%s version--aborting.\n", + revision); + } else { + ask("This file doesn't appear to be the %s " + "version--patch anyway? [n] ", + revision); + if (*buf != 'y') + fatal("aborted\n"); + } + } else if (verbose) + say("Good. This file appears to be the %s version.\n", + revision); + } + fseek(ifp, 0L, SEEK_SET); /* rewind file */ + tireclen = maxlen; + tibuflen = maxlen > BUFFERSIZE ? maxlen : BUFFERSIZE; + lines_per_buf = tibuflen / maxlen; + tibuf[0] = malloc(tibuflen + 1); + if (tibuf[0] == NULL) + fatal("out of memory\n"); + tibuf[1] = malloc(tibuflen + 1); + if (tibuf[1] == NULL) + fatal("out of memory\n"); + for (i = 1;; i++) { + p = tibuf[0] + maxlen * (i % lines_per_buf); + if (i % lines_per_buf == 0) /* new block */ + if (write(tifd, tibuf[0], tibuflen) != + (ssize_t) tibuflen) + pfatal("can't write temp file"); + if (fgets(p, maxlen + 1, ifp) == NULL) { + input_lines = i - 1; + if (i % lines_per_buf != 0) + if (write(tifd, tibuf[0], tibuflen) != + (ssize_t) tibuflen) + pfatal("can't write temp file"); + break; + } + j = strlen(p); + /* These are '\n' terminated strings, so no need to add a NUL */ + if (j == 0 || p[j - 1] != '\n') + p[j] = '\n'; + } + fclose(ifp); + close(tifd); + if ((tifd = open(TMPINNAME, O_RDONLY)) == -1) + pfatal("can't reopen file %s", TMPINNAME); +} + +/* + * Fetch a line from the input file, \n terminated, not necessarily \0. + */ +char * +ifetch(LINENUM line, int whichbuf) +{ + if (line < 1 || line > input_lines) { + if (warn_on_invalid_line) { + say("No such line %ld in input file, ignoring\n", line); + warn_on_invalid_line = false; + } + return NULL; + } + if (using_plan_a) + return i_ptr[line]; + else { + LINENUM offline = line % lines_per_buf; + LINENUM baseline = line - offline; + + if (tiline[0] == baseline) + whichbuf = 0; + else if (tiline[1] == baseline) + whichbuf = 1; + else { + tiline[whichbuf] = baseline; + + if (lseek(tifd, (off_t) (baseline / lines_per_buf * + tibuflen), SEEK_SET) == -1) + pfatal("cannot seek in the temporary input file"); + + if (read(tifd, tibuf[whichbuf], tibuflen) + != (ssize_t) tibuflen) + pfatal("error reading tmp file %s", TMPINNAME); + } + return tibuf[whichbuf] + (tireclen * offline); + } +} + +/* + * True if the string argument contains the revision number we want. + */ +static bool +rev_in_string(const char *string) +{ + const char *s; + size_t patlen; + + if (revision == NULL) + return true; + patlen = strlen(revision); + if (strnEQ(string, revision, patlen) && + isspace((unsigned char)string[patlen])) + return true; + for (s = string; *s; s++) { + if (isspace((unsigned char)*s) && strnEQ(s + 1, revision, patlen) && + isspace((unsigned char)s[patlen + 1])) { + return true; + } + } + return false; +} diff --git a/usr.bin/patch/inp.h b/usr.bin/patch/inp.h new file mode 100644 index 0000000..aa66208 --- /dev/null +++ b/usr.bin/patch/inp.h @@ -0,0 +1,31 @@ +/* $OpenBSD: inp.h,v 1.8 2003/08/15 08:00:51 otto Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +void re_input(void); +void scan_input(const char *); +char *ifetch(LINENUM, int); diff --git a/usr.bin/patch/mkpath.c b/usr.bin/patch/mkpath.c new file mode 100644 index 0000000..63d53ec --- /dev/null +++ b/usr.bin/patch/mkpath.c @@ -0,0 +1,77 @@ +/* $OpenBSD: mkpath.c,v 1.4 2014/05/20 01:25:23 guenther Exp $ */ +/* + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <err.h> +#include <errno.h> +#include <string.h> + +#include "common.h" +#include "util.h" + +/* Code taken directly from mkdir(1). + + * mkpath -- create directories. + * path - path + */ +int +mkpath(char *path) +{ + struct stat sb; + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (stat(path, &sb)) { + if (errno != ENOENT || (mkdir(path, 0777) && + errno != EEXIST)) { + warn("%s", path); + return (-1); + } + } else if (!S_ISDIR(sb.st_mode)) { + warnc(ENOTDIR, "%s", path); + return (-1); + } + + *slash = '/'; + } + + return (0); +} + diff --git a/usr.bin/patch/patch.1 b/usr.bin/patch/patch.1 new file mode 100644 index 0000000..8d915b8 --- /dev/null +++ b/usr.bin/patch/patch.1 @@ -0,0 +1,680 @@ +.\" $OpenBSD: patch.1,v 1.32 2018/06/22 15:37:15 zhuk Exp $ +.\" Copyright 1986, Larry Wall +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following condition +.\" is met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this condition and the following disclaimer. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd $Mdocdate: June 22 2018 $ +.Dt PATCH 1 +.Os +.Sh NAME +.Nm patch +.Nd apply a diff file to an original +.Sh SYNOPSIS +.Nm patch +.Bk -words +.Op Fl bCcEeflNnRstuv +.Op Fl B Ar backup-prefix +.Op Fl D Ar symbol +.Op Fl d Ar directory +.Op Fl F Ar max-fuzz +.Op Fl i Ar patchfile +.Op Fl o Ar out-file +.Op Fl p Ar strip-count +.Op Fl r Ar rej-name +.Op Fl V Cm t | nil | never +.Op Fl x Ar number +.Op Fl z Ar backup-ext +.Op Fl Fl posix +.Op Ar origfile Op Ar patchfile +.Ek +.Nm patch +.Pf \*(Lt Ar patchfile +.Sh DESCRIPTION +.Nm +will take a patch file containing any of the four forms of difference +listing produced by the +.Xr diff 1 +program and apply those differences to an original file, +producing a patched version. +If +.Ar patchfile +is omitted, or is a hyphen, the patch will be read from the standard input. +.Pp +.Nm +will attempt to determine the type of the diff listing, unless overruled by a +.Fl c , +.Fl e , +.Fl n , +or +.Fl u +option. +.Pp +If the +.Ar patchfile +contains more than one patch, +.Nm +will try to apply each of them as if they came from separate patch files. +This means, among other things, that it is assumed that the name of the file +to patch must be determined for each diff listing, and that the garbage before +each diff listing will be examined for interesting things such as file names +and revision level (see the section on +.Sx Filename Determination +below). +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Xo +.Fl B Ar backup-prefix , +.Fl Fl prefix Ar backup-prefix +.Xc +Causes the next argument to be interpreted as a prefix to the backup file +name. +If this argument is specified, any argument to +.Fl z +will be ignored. +.It Fl b , Fl Fl backup +Save a backup copy of the file before it is modified. +By default the original file is saved with a backup extension of +.Qq .orig +unless the file already has a numbered backup, in which case a numbered +backup is made. +This is equivalent to specifying +.Qo Fl V Cm existing Qc . +This option is currently the default, unless +.Fl -posix +is specified. +.It Fl C , Fl Fl check , Fl Fl dry-run +Checks that the patch would apply cleanly, but does not modify anything. +.It Fl c , Fl Fl context +Forces +.Nm +to interpret the patch file as a context diff. +.It Xo +.Fl D Ar symbol , +.Fl Fl ifdef Ar symbol +.Xc +Causes +.Nm +to use the +.Qq #ifdef...#endif +construct to mark changes. +The argument following will be used as the differentiating symbol. +Note that, unlike the C compiler, there must be a space between the +.Fl D +and the argument. +.It Xo +.Fl d Ar directory , +.Fl Fl directory Ar directory +.Xc +Causes +.Nm +to interpret the next argument as a directory, +and change working directory to it before doing anything else. +.It Fl E , Fl Fl remove-empty-files +Causes +.Nm +to remove output files that are empty after the patches have been applied. +This option is useful when applying patches that create or remove files. +.It Fl e , Fl Fl ed +Forces +.Nm +to interpret the patch file as an +.Xr ed 1 +script. +.It Xo +.Fl F Ar max-fuzz , +.Fl Fl fuzz Ar max-fuzz +.Xc +Sets the maximum fuzz factor. +This option only applies to context diffs, and causes +.Nm +to ignore up to that many lines in looking for places to install a hunk. +Note that a larger fuzz factor increases the odds of a faulty patch. +The default fuzz factor is 2, and it may not be set to more than +the number of lines of context in the context diff, ordinarily 3. +.It Fl f , Fl Fl force +Forces +.Nm +to assume that the user knows exactly what he or she is doing, and to not +ask any questions. +It assumes the following: +skip patches for which a file to patch can't be found; +patch files even though they have the wrong version for the +.Qq Prereq: +line in the patch; +and assume that patches are not reversed even if they look like they are. +This option does not suppress commentary; use +.Fl s +for that. +.It Xo +.Fl i Ar patchfile , +.Fl Fl input Ar patchfile +.Xc +Causes the next argument to be interpreted as the input file name +(i.e. a patchfile). +This option may be specified multiple times. +.It Fl l , Fl Fl ignore-whitespace +Causes the pattern matching to be done loosely, in case the tabs and +spaces have been munged in your input file. +Any sequence of whitespace in the pattern line will match any sequence +in the input file. +Normal characters must still match exactly. +Each line of the context must still match a line in the input file. +.It Fl N , Fl Fl forward +Causes +.Nm +to ignore patches that it thinks are reversed or already applied. +See also +.Fl R . +.It Fl n , Fl Fl normal +Forces +.Nm +to interpret the patch file as a normal diff. +.It Xo +.Fl o Ar out-file , +.Fl Fl output Ar out-file +.Xc +Causes the next argument to be interpreted as the output file name. +.It Xo +.Fl p Ar strip-count , +.Fl Fl strip Ar strip-count +.Xc +Sets the pathname strip count, +which controls how pathnames found in the patch file are treated, +in case you keep your files in a different directory than the person who sent +out the patch. +The strip count specifies how many slashes are to be stripped from +the front of the pathname. +(Any intervening directory names also go away.) +For example, supposing the file name in the patch file was +.Pa /u/howard/src/blurfl/blurfl.c : +.Pp +Setting +.Fl p Ns Ar 0 +gives the entire pathname unmodified. +.Pp +.Fl p Ns Ar 1 +gives +.Pp +.D1 Pa u/howard/src/blurfl/blurfl.c +.Pp +without the leading slash. +.Pp +.Fl p Ns Ar 4 +gives +.Pp +.D1 Pa blurfl/blurfl.c +.Pp +Not specifying +.Fl p +at all just gives you +.Pa blurfl.c , +unless all of the directories in the leading path +.Pq Pa u/howard/src/blurfl +exist and that path is relative, +in which case you get the entire pathname unmodified. +Whatever you end up with is looked for either in the current directory, +or the directory specified by the +.Fl d +option. +.It Fl R , Fl Fl reverse +Tells +.Nm +that this patch was created with the old and new files swapped. +(Yes, I'm afraid that does happen occasionally, human nature being what it +is.) +.Nm +will attempt to swap each hunk around before applying it. +Rejects will come out in the swapped format. +The +.Fl R +option will not work with ed diff scripts because there is too little +information to reconstruct the reverse operation. +.Pp +If the first hunk of a patch fails, +.Nm +will reverse the hunk to see if it can be applied that way. +If it can, you will be asked if you want to have the +.Fl R +option set. +If it can't, the patch will continue to be applied normally. +(Note: this method cannot detect a reversed patch if it is a normal diff +and if the first command is an append (i.e. it should have been a delete) +since appends always succeed, due to the fact that a null context will match +anywhere. +Luckily, most patches add or change lines rather than delete them, so most +reversed normal diffs will begin with a delete, which will fail, triggering +the heuristic.) +.It Xo +.Fl r Ar rej-name , +.Fl Fl reject-file Ar rej-name +.Xc +Causes the next argument to be interpreted as the reject file name. +.It Xo +.Fl s , Fl Fl quiet , +.Fl Fl silent +.Xc +Makes +.Nm +do its work silently, unless an error occurs. +.It Fl t , Fl Fl batch +Similar to +.Fl f , +in that it suppresses questions, but makes some different assumptions: +skip patches for which a file to patch can't be found (the same as +.Fl f ) ; +skip patches for which the file has the wrong version for the +.Qq Prereq: +line in the patch; +and assume that patches are reversed if they look like they are. +.It Fl u , Fl Fl unified +Forces +.Nm +to interpret the patch file as a unified context diff (a unidiff). +.It Xo +.Fl V Cm t | nil | never , +.Fl Fl version-control Cm t | nil | never +.Xc +Causes the next argument to be interpreted as a method for creating +backup file names. +The type of backups made can also be given in the +.Ev PATCH_VERSION_CONTROL +or +.Ev VERSION_CONTROL +environment variables, which are overridden by this option. +The +.Fl B +option overrides this option, causing the prefix to always be used for +making backup file names. +The values of the +.Ev PATCH_VERSION_CONTROL +and +.Ev VERSION_CONTROL +environment variables and the argument to the +.Fl V +option are like the GNU Emacs +.Dq version-control +variable; they also recognize synonyms that are more descriptive. +The valid values are (unique abbreviations are accepted): +.Bl -tag -width Ds -offset indent +.It Cm t , numbered +Always make numbered backups. +.It Cm nil , existing +Make numbered backups of files that already have them, +simple backups of the others. +.It Cm never , simple +Always make simple backups. +.El +.It Fl v , Fl Fl version +Causes +.Nm +to print out its revision header and patch level. +.It Xo +.Fl x Ar number , +.Fl Fl debug Ar number +.Xc +Sets internal debugging flags, and is of interest only to +.Nm +patchers. +.It Xo +.Fl z Ar backup-ext , +.Fl Fl suffix Ar backup-ext +.Xc +Causes the next argument to be interpreted as the backup extension, to be +used in place of +.Qq .orig . +.It Fl Fl posix +Enables strict +.St -p1003.1-2008 +conformance, specifically: +.Bl -enum +.It +Backup files are not created unless the +.Fl b +option is specified. +.It +If unspecified, the file name used is the first of the old, new and +index files that exists. +.El +.El +.Ss Patch Application +.Nm +will try to skip any leading garbage, apply the diff, +and then skip any trailing garbage. +Thus you could feed an article or message containing a +diff listing to +.Nm patch , +and it should work. +If the entire diff is indented by a consistent amount, +this will be taken into account. +.Pp +With context diffs, and to a lesser extent with normal diffs, +.Nm +can detect when the line numbers mentioned in the patch are incorrect, +and will attempt to find the correct place to apply each hunk of the patch. +As a first guess, it takes the line number mentioned for the hunk, plus or +minus any offset used in applying the previous hunk. +If that is not the correct place, +.Nm +will scan both forwards and backwards for a set of lines matching the context +given in the hunk. +First +.Nm +looks for a place where all lines of the context match. +If no such place is found, and it's a context diff, and the maximum fuzz factor +is set to 1 or more, then another scan takes place ignoring the first and last +line of context. +If that fails, and the maximum fuzz factor is set to 2 or more, +the first two and last two lines of context are ignored, +and another scan is made. +.Pq The default maximum fuzz factor is 2. +.Pp +If +.Nm +cannot find a place to install that hunk of the patch, it will put the hunk +out to a reject file, which normally is the name of the output file plus +.Qq .rej . +(Note that the rejected hunk will come out in context diff form whether the +input patch was a context diff or a normal diff. +If the input was a normal diff, many of the contexts will simply be null.) +The line numbers on the hunks in the reject file may be different than +in the patch file: they reflect the approximate location patch thinks the +failed hunks belong in the new file rather than the old one. +.Pp +As each hunk is completed, you will be told whether the hunk succeeded or +failed, and which line (in the new file) +.Nm +thought the hunk should go on. +If this is different from the line number specified in the diff, +you will be told the offset. +A single large offset MAY be an indication that a hunk was installed in the +wrong place. +You will also be told if a fuzz factor was used to make the match, in which +case you should also be slightly suspicious. +.Ss Filename Determination +If no original file is specified on the command line, +.Nm +will try to figure out from the leading garbage what the name of the file +to edit is. +When checking a prospective file name, pathname components are stripped +as specified by the +.Fl p +option and the file's existence and writability are checked relative +to the current working directory (or the directory specified by the +.Fl d +option). +.Pp +If the diff is a context or unified diff, +.Nm +is able to determine the old and new file names from the diff header. +For context diffs, the +.Dq old +file is specified in the line beginning with +.Qq *** +and the +.Dq new +file is specified in the line beginning with +.Qq --- . +For a unified diff, the +.Dq old +file is specified in the line beginning with +.Qq --- +and the +.Dq new +file is specified in the line beginning with +.Qq +++ . +If there is an +.Qq Index: +line in the leading garbage (regardless of the diff type), +.Nm +will use the file name from that line as the +.Dq index +file. +.Pp +.Nm +will choose the file name by performing the following steps, with the first +match used: +.Bl -enum +.It +If +.Nm +is operating in strict +.St -p1003.1-2008 +mode, the first of the +.Dq old , +.Dq new +and +.Dq index +file names that exist is used. +Otherwise, +.Nm +will examine either the +.Dq old +and +.Dq new +file names or, for a non-context diff, the +.Dq index +file name, and choose the file name with the fewest path components, +the shortest basename, and the shortest total file name length (in that order). +.It +If no suitable file was found to patch, the patch file is a context or +unified diff, and the old file was zero length, the new file name is +created and used. +.It +If the file name still cannot be determined, +.Nm +will prompt the user for the file name to use. +.El +.Pp +Additionally, if the leading garbage contains a +.Qq Prereq:\ \& +line, +.Nm +will take the first word from the prerequisites line (normally a version +number) and check the input file to see if that word can be found. +If not, +.Nm +will ask for confirmation before proceeding. +.Pp +The upshot of all this is that you should be able to say, while in a news +interface, the following: +.Pp +.Dl | patch -d /usr/src/local/blurfl +.Pp +and patch a file in the blurfl directory directly from the article containing +the patch. +.Ss Backup Files +By default, the patched version is put in place of the original, with +the original file backed up to the same name with the extension +.Qq .orig , +or as specified by the +.Fl B , +.Fl V , +or +.Fl z +options. +The extension used for making backup files may also be specified in the +.Ev SIMPLE_BACKUP_SUFFIX +environment variable, which is overridden by the options above. +.Pp +If the backup file is a symbolic or hard link to the original file, +.Nm +creates a new backup file name by changing the first lowercase letter +in the last component of the file's name into uppercase. +If there are no more lowercase letters in the name, +it removes the first character from the name. +It repeats this process until it comes up with a +backup file that does not already exist or is not linked to the original file. +.Pp +You may also specify where you want the output to go with the +.Fl o +option; if that file already exists, it is backed up first. +.Ss Notes For Patch Senders +There are several things you should bear in mind if you are going to +be sending out patches: +.Pp +First, you can save people a lot of grief by keeping a +.Pa patchlevel.h +file which is patched to increment the patch level as the first diff in the +patch file you send out. +If you put a +.Qq Prereq: +line in with the patch, it won't let them apply +patches out of order without some warning. +.Pp +Second, make sure you've specified the file names right, either in a +context diff header, or with an +.Qq Index: +line. +If you are patching something in a subdirectory, be sure to tell the patch +user to specify a +.Fl p +option as needed. +.Pp +Third, you can create a file by sending out a diff that compares a +null file to the file you want to create. +This will only work if the file you want to create doesn't exist already in +the target directory. +.Pp +Fourth, take care not to send out reversed patches, since it makes people wonder +whether they already applied the patch. +.Pp +Fifth, while you may be able to get away with putting 582 diff listings into +one file, it is probably wiser to group related patches into separate files in +case something goes haywire. +.Sh ENVIRONMENT +.Bl -tag -width "PATCH_VERSION_CONTROL" -compact +.It Ev POSIXLY_CORRECT +When set, +.Nm +behaves as if the +.Fl Fl posix +option has been specified. +.It Ev SIMPLE_BACKUP_SUFFIX +Extension to use for backup file names instead of +.Qq .orig . +.It Ev TMPDIR +Directory to put temporary files in; default is +.Pa /tmp . +.It Ev PATCH_VERSION_CONTROL +Selects when numbered backup files are made. +.It Ev VERSION_CONTROL +Same as +.Ev PATCH_VERSION_CONTROL . +.El +.Sh FILES +.Bl -tag -width "$TMPDIR/patch*" -compact +.It Pa $TMPDIR/patch* +.Nm +temporary files +.It Pa /dev/tty +used to read input when +.Nm +prompts the user +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +Successful completion. +.It 1 +One or more lines were written to a reject file. +.It \*(Gt1 +An error occurred. +.El +.Pp +When applying a set of patches in a loop it behooves you to check this +exit status so you don't apply a later patch to a partially patched file. +.Sh DIAGNOSTICS +Too many to list here, but generally indicative that +.Nm +couldn't parse your patch file. +.Pp +The message +.Qq Hmm... +indicates that there is unprocessed text in the patch file and that +.Nm +is attempting to intuit whether there is a patch in that text and, if so, +what kind of patch it is. +.Sh SEE ALSO +.Xr diff 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification, +except as detailed above for the +.Fl -posix +option. +.Pp +The flags +.Op Fl BCEFfstVvxz +and +.Op Fl -posix +are extensions to that specification. +.Sh AUTHORS +.An Larry Wall +with many other contributors. +.Sh CAVEATS +.Nm +cannot tell if the line numbers are off in an ed script, and can only detect +bad line numbers in a normal diff when it finds a +.Qq change +or a +.Qq delete +command. +A context diff using fuzz factor 3 may have the same problem. +Until a suitable interactive interface is added, you should probably do +a context diff in these cases to see if the changes made sense. +Of course, compiling without errors is a pretty good indication that the patch +worked, but not always. +.Pp +.Nm +usually produces the correct results, even when it has to do a lot of +guessing. +However, the results are guaranteed to be correct only when the patch is +applied to exactly the same version of the file that the patch was +generated from. +.Sh BUGS +Could be smarter about partial matches, excessively deviant offsets and +swapped code, but that would take an extra pass. +.Pp +Check patch mode +.Pq Fl C +will fail if you try to check several patches in succession that build on +each other. +The entire +.Nm +code would have to be restructured to keep temporary files around so that it +can handle this situation. +.Pp +If code has been duplicated (for instance with #ifdef OLDCODE ... #else ... +#endif), +.Nm +is incapable of patching both versions and, if it works at all, will likely +patch the wrong one, and tell you that it succeeded to boot. +.Pp +If you apply a patch you've already applied, +.Nm +will think it is a reversed patch, and offer to un-apply the patch. +This could be construed as a feature. diff --git a/usr.bin/patch/patch.c b/usr.bin/patch/patch.c new file mode 100644 index 0000000..088ea1f --- /dev/null +++ b/usr.bin/patch/patch.c @@ -0,0 +1,1064 @@ +/* $OpenBSD: patch.c,v 1.69 2019/12/02 22:17:32 jca Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <ctype.h> +#include <getopt.h> +#include <limits.h> +#include <paths.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include "common.h" +#include "util.h" +#include "pch.h" +#include "inp.h" +#include "backupfile.h" +#include "ed.h" + +mode_t filemode = 0644; + +char *buf; /* general purpose buffer */ +size_t bufsz; /* general purpose buffer size */ + +bool using_plan_a = true; /* try to keep everything in memory */ +bool out_of_mem = false; /* ran out of memory in plan a */ + +#define MAXFILEC 2 + +char *filearg[MAXFILEC]; +bool ok_to_create_file = false; +char *outname = NULL; +char *origprae = NULL; +char *TMPOUTNAME; +char *TMPINNAME; +char *TMPREJNAME; +char *TMPPATNAME; +bool toutkeep = false; +bool trejkeep = false; +bool warn_on_invalid_line; +bool last_line_missing_eol; + +#ifdef DEBUGGING +int debug = 0; +#endif + +bool force = false; +bool batch = false; +bool verbose = true; +bool reverse = false; +bool noreverse = false; +bool skip_rest_of_patch = false; +int strippath = 957; +bool canonicalize = false; +bool check_only = false; +int diff_type = 0; +char *revision = NULL; /* prerequisite revision, if any */ +LINENUM input_lines = 0; /* how long is input file in lines */ +int posix = 0; /* strict POSIX mode? */ + +static void reinitialize_almost_everything(void); +static void get_some_switches(void); +static LINENUM locate_hunk(LINENUM); +static void abort_context_hunk(void); +static void rej_line(int, LINENUM); +static void abort_hunk(void); +static void apply_hunk(LINENUM); +static void init_output(const char *); +static void init_reject(const char *); +static void copy_till(LINENUM, bool); +static void spew_output(void); +static void dump_line(LINENUM, bool); +static bool patch_match(LINENUM, LINENUM, LINENUM); +static bool similar(const char *, const char *, int); +static __dead void usage(void); + +/* true if -E was specified on command line. */ +static bool remove_empty_files = false; + +/* true if -R was specified on command line. */ +static bool reverse_flag_specified = false; + +/* buffer holding the name of the rejected patch file. */ +static char rejname[PATH_MAX]; + +/* how many input lines have been irretractibly output */ +static LINENUM last_frozen_line = 0; + +static int Argc; /* guess */ +static char **Argv; +static int Argc_last; /* for restarting plan_b */ +static char **Argv_last; + +static FILE *ofp = NULL; /* output file pointer */ +static FILE *rejfp = NULL; /* reject file pointer */ + +static int filec = 0; /* how many file arguments? */ +static LINENUM last_offset = 0; +static LINENUM maxfuzz = 2; + +/* patch using ifdef, ifndef, etc. */ +static bool do_defines = false; +/* #ifdef xyzzy */ +static char if_defined[128]; +/* #ifndef xyzzy */ +static char not_defined[128]; +/* #else */ +static const char else_defined[] = "#else\n"; +/* #endif xyzzy */ +static char end_defined[128]; + + +/* Apply a set of diffs as appropriate. */ + +int +main(int argc, char *argv[]) +{ + int error = 0, hunk, failed, i, fd; + bool patch_seen; + LINENUM where = 0, newwhere, fuzz, mymaxfuzz; + const char *tmpdir; + char *v; + + if (pledge("stdio rpath wpath cpath tmppath fattr", NULL) == -1) { + perror("pledge"); + my_exit(2); + } + + bufsz = INITLINELEN; + if ((buf = malloc(bufsz)) == NULL) + pfatal("allocating input buffer"); + buf[0] = '\0'; + + setvbuf(stdout, NULL, _IOLBF, 0); + setvbuf(stderr, NULL, _IOLBF, 0); + for (i = 0; i < MAXFILEC; i++) + filearg[i] = NULL; + + /* Cons up the names of the temporary files. */ + if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') + tmpdir = _PATH_TMP; + for (i = strlen(tmpdir) - 1; i > 0 && tmpdir[i] == '/'; i--) + ; + i++; + if (asprintf(&TMPOUTNAME, "%.*s/patchoXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPOUTNAME)) == -1) + pfatal("can't create %s", TMPOUTNAME); + close(fd); + + if (asprintf(&TMPINNAME, "%.*s/patchiXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPINNAME)) == -1) + pfatal("can't create %s", TMPINNAME); + close(fd); + + if (asprintf(&TMPREJNAME, "%.*s/patchrXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPREJNAME)) == -1) + pfatal("can't create %s", TMPREJNAME); + close(fd); + + if (asprintf(&TMPPATNAME, "%.*s/patchpXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPPATNAME)) == -1) + pfatal("can't create %s", TMPPATNAME); + close(fd); + + v = getenv("SIMPLE_BACKUP_SUFFIX"); + if (v) + simple_backup_suffix = v; + else + simple_backup_suffix = ORIGEXT; + + /* parse switches */ + Argc = argc; + Argv = argv; + get_some_switches(); + + if (backup_type == none) { + if ((v = getenv("PATCH_VERSION_CONTROL")) == NULL) + v = getenv("VERSION_CONTROL"); + if (v != NULL || !posix) + backup_type = get_version(v); /* OK to pass NULL. */ + } + + /* make sure we clean up /tmp in case of disaster */ + set_signals(0); + + patch_seen = false; + for (open_patch_file(filearg[1]); there_is_another_patch(); + reinitialize_almost_everything()) { + /* for each patch in patch file */ + + patch_seen = true; + + warn_on_invalid_line = true; + + if (outname == NULL) + outname = xstrdup(filearg[0]); + + /* initialize the patched file */ + if (!skip_rest_of_patch) + init_output(TMPOUTNAME); + + /* initialize reject file */ + init_reject(TMPREJNAME); + + /* find out where all the lines are */ + if (!skip_rest_of_patch) + scan_input(filearg[0]); + + /* for ed script just up and do it and exit */ + if (diff_type == ED_DIFF) { + do_ed_script(); + continue; + } + + /* from here on, open no standard i/o files, because malloc */ + /* might misfire and we can't catch it easily */ + + /* apply each hunk of patch */ + hunk = 0; + failed = 0; + out_of_mem = false; + while (another_hunk()) { + hunk++; + fuzz = 0; + mymaxfuzz = pch_context(); + if (maxfuzz < mymaxfuzz) + mymaxfuzz = maxfuzz; + if (!skip_rest_of_patch) { + do { + where = locate_hunk(fuzz); + if (hunk == 1 && where == 0 && !force) { + /* dwim for reversed patch? */ + if (!pch_swap()) { + if (fuzz == 0) + say("Not enough memory to try swapped hunk! Assuming unswapped.\n"); + continue; + } + reverse = !reverse; + /* try again */ + where = locate_hunk(fuzz); + if (where == 0) { + /* didn't find it swapped */ + if (!pch_swap()) + /* put it back to normal */ + fatal("lost hunk on alloc error!\n"); + reverse = !reverse; + } else if (noreverse) { + if (!pch_swap()) + /* put it back to normal */ + fatal("lost hunk on alloc error!\n"); + reverse = !reverse; + say("Ignoring previously applied (or reversed) patch.\n"); + skip_rest_of_patch = true; + } else if (batch) { + if (verbose) + say("%seversed (or previously applied) patch detected! %s -R.", + reverse ? "R" : "Unr", + reverse ? "Assuming" : "Ignoring"); + } else { + ask("%seversed (or previously applied) patch detected! %s -R? [y] ", + reverse ? "R" : "Unr", + reverse ? "Assume" : "Ignore"); + if (*buf == 'n') { + ask("Apply anyway? [n] "); + if (*buf != 'y') + skip_rest_of_patch = true; + where = 0; + reverse = !reverse; + if (!pch_swap()) + /* put it back to normal */ + fatal("lost hunk on alloc error!\n"); + } + } + } + } while (!skip_rest_of_patch && where == 0 && + ++fuzz <= mymaxfuzz); + + if (skip_rest_of_patch) { /* just got decided */ + fclose(ofp); + ofp = NULL; + } + } + newwhere = pch_newfirst() + last_offset; + if (skip_rest_of_patch) { + abort_hunk(); + failed++; + if (verbose) + say("Hunk #%d ignored at %ld.\n", + hunk, newwhere); + } else if (where == 0) { + abort_hunk(); + failed++; + if (verbose) + say("Hunk #%d failed at %ld.\n", + hunk, newwhere); + } else { + apply_hunk(where); + if (verbose) { + say("Hunk #%d succeeded at %ld", + hunk, newwhere); + if (fuzz != 0) + say(" with fuzz %ld", fuzz); + if (last_offset) + say(" (offset %ld line%s)", + last_offset, + last_offset == 1L ? "" : "s"); + say(".\n"); + } + } + } + + if (out_of_mem && using_plan_a) { + Argc = Argc_last; + Argv = Argv_last; + say("\n\nRan out of memory using Plan A--trying again...\n\n"); + if (ofp) + fclose(ofp); + ofp = NULL; + if (rejfp) + fclose(rejfp); + rejfp = NULL; + continue; + } + if (hunk == 0) + fatal("Internal error: hunk should not be 0\n"); + + /* finish spewing out the new file */ + if (!skip_rest_of_patch) + spew_output(); + + /* and put the output where desired */ + ignore_signals(); + if (!skip_rest_of_patch) { + struct stat statbuf; + char *realout = outname; + + if (!check_only) { + if (move_file(TMPOUTNAME, outname) < 0) { + toutkeep = true; + realout = TMPOUTNAME; + chmod(TMPOUTNAME, filemode); + } else + chmod(outname, filemode); + + if (remove_empty_files && + stat(realout, &statbuf) == 0 && + statbuf.st_size == 0) { + if (verbose) + say("Removing %s (empty after patching).\n", + realout); + unlink(realout); + } + } + } + fclose(rejfp); + rejfp = NULL; + if (failed) { + error = 1; + if (*rejname == '\0') { + if (strlcpy(rejname, outname, + sizeof(rejname)) >= sizeof(rejname)) + fatal("filename %s is too long\n", outname); + if (strlcat(rejname, REJEXT, + sizeof(rejname)) >= sizeof(rejname)) + fatal("filename %s is too long\n", outname); + } + if (!check_only) + say("%d out of %d hunks %s--saving rejects to %s\n", + failed, hunk, skip_rest_of_patch ? "ignored" : "failed", rejname); + else + say("%d out of %d hunks %s\n", + failed, hunk, skip_rest_of_patch ? "ignored" : "failed"); + if (!check_only && move_file(TMPREJNAME, rejname) < 0) + trejkeep = true; + } + set_signals(1); + } + + if (!patch_seen) + error = 2; + + my_exit(error); + /* NOTREACHED */ +} + +/* Prepare to find the next patch to do in the patch file. */ + +static void +reinitialize_almost_everything(void) +{ + re_patch(); + re_input(); + + input_lines = 0; + last_frozen_line = 0; + + filec = 0; + if (!out_of_mem) { + free(filearg[0]); + filearg[0] = NULL; + } + + free(outname); + outname = NULL; + + last_offset = 0; + diff_type = 0; + + free(revision); + revision = NULL; + + reverse = reverse_flag_specified; + skip_rest_of_patch = false; + + get_some_switches(); +} + +/* Process switches and filenames. */ + +static void +get_some_switches(void) +{ + const char *options = "b::B:cCd:D:eEfF:i:lnNo:p:r:RstuvV:x:z:"; + static struct option longopts[] = { + {"backup", no_argument, 0, 'b'}, + {"batch", no_argument, 0, 't'}, + {"check", no_argument, 0, 'C'}, + {"context", no_argument, 0, 'c'}, + {"debug", required_argument, 0, 'x'}, + {"directory", required_argument, 0, 'd'}, + {"dry-run", no_argument, 0, 'C'}, + {"ed", no_argument, 0, 'e'}, + {"force", no_argument, 0, 'f'}, + {"forward", no_argument, 0, 'N'}, + {"fuzz", required_argument, 0, 'F'}, + {"ifdef", required_argument, 0, 'D'}, + {"input", required_argument, 0, 'i'}, + {"ignore-whitespace", no_argument, 0, 'l'}, + {"normal", no_argument, 0, 'n'}, + {"output", required_argument, 0, 'o'}, + {"prefix", required_argument, 0, 'B'}, + {"quiet", no_argument, 0, 's'}, + {"reject-file", required_argument, 0, 'r'}, + {"remove-empty-files", no_argument, 0, 'E'}, + {"reverse", no_argument, 0, 'R'}, + {"silent", no_argument, 0, 's'}, + {"strip", required_argument, 0, 'p'}, + {"suffix", required_argument, 0, 'z'}, + {"unified", no_argument, 0, 'u'}, + {"version", no_argument, 0, 'v'}, + {"version-control", required_argument, 0, 'V'}, + {"posix", no_argument, &posix, 1}, + {NULL, 0, 0, 0} + }; + int ch; + + rejname[0] = '\0'; + Argc_last = Argc; + Argv_last = Argv; + if (!Argc) + return; + optreset = optind = 1; + while ((ch = getopt_long(Argc, Argv, options, longopts, NULL)) != -1) { + switch (ch) { + case 'b': + if (backup_type == none) + backup_type = numbered_existing; + if (optarg == NULL) + break; + if (verbose) + say("Warning, the ``-b suffix'' option has been" + " obsoleted by the -z option.\n"); + /* FALLTHROUGH */ + case 'z': + /* must directly follow 'b' case for backwards compat */ + simple_backup_suffix = xstrdup(optarg); + break; + case 'B': + origprae = xstrdup(optarg); + break; + case 'c': + diff_type = CONTEXT_DIFF; + break; + case 'C': + check_only = true; + break; + case 'd': + if (chdir(optarg) == -1) + pfatal("can't cd to %s", optarg); + break; + case 'D': + do_defines = true; + if (!isalpha((unsigned char)*optarg) && *optarg != '_') + fatal("argument to -D is not an identifier\n"); + snprintf(if_defined, sizeof if_defined, + "#ifdef %s\n", optarg); + snprintf(not_defined, sizeof not_defined, + "#ifndef %s\n", optarg); + snprintf(end_defined, sizeof end_defined, + "#endif /* %s */\n", optarg); + break; + case 'e': + diff_type = ED_DIFF; + break; + case 'E': + remove_empty_files = true; + break; + case 'f': + force = true; + break; + case 'F': + maxfuzz = atoi(optarg); + break; + case 'i': + if (++filec == MAXFILEC) + fatal("too many file arguments\n"); + filearg[filec] = xstrdup(optarg); + break; + case 'l': + canonicalize = true; + break; + case 'n': + diff_type = NORMAL_DIFF; + break; + case 'N': + noreverse = true; + break; + case 'o': + outname = xstrdup(optarg); + break; + case 'p': + strippath = atoi(optarg); + break; + case 'r': + if (strlcpy(rejname, optarg, + sizeof(rejname)) >= sizeof(rejname)) + fatal("argument for -r is too long\n"); + break; + case 'R': + reverse = true; + reverse_flag_specified = true; + break; + case 's': + verbose = false; + break; + case 't': + batch = true; + break; + case 'u': + diff_type = UNI_DIFF; + break; + case 'v': + version(); + break; + case 'V': + backup_type = get_version(optarg); + break; +#ifdef DEBUGGING + case 'x': + debug = atoi(optarg); + break; +#endif + default: + if (ch != '\0') + usage(); + break; + } + } + Argc -= optind; + Argv += optind; + + if (Argc > 0) { + filearg[0] = xstrdup(*Argv++); + Argc--; + while (Argc > 0) { + if (++filec == MAXFILEC) + fatal("too many file arguments\n"); + filearg[filec] = xstrdup(*Argv++); + Argc--; + } + } + + if (getenv("POSIXLY_CORRECT") != NULL) + posix = 1; +} + +static __dead void +usage(void) +{ + fprintf(stderr, +"usage: patch [-bCcEeflNnRstuv] [-B backup-prefix] [-D symbol] [-d directory]\n" +" [-F max-fuzz] [-i patchfile] [-o out-file] [-p strip-count]\n" +" [-r rej-name] [-V t | nil | never] [-x number] [-z backup-ext]\n" +" [--posix] [origfile [patchfile]]\n" +" patch <patchfile\n"); + my_exit(2); +} + +/* + * Attempt to find the right place to apply this hunk of patch. + */ +static LINENUM +locate_hunk(LINENUM fuzz) +{ + LINENUM first_guess = pch_first() + last_offset; + LINENUM offset; + LINENUM pat_lines = pch_ptrn_lines(); + LINENUM max_pos_offset = input_lines - first_guess - pat_lines + 1; + LINENUM max_neg_offset = first_guess - last_frozen_line - 1 + pch_context(); + + if (pat_lines == 0) { /* null range matches always */ + if (verbose && fuzz == 0 && (diff_type == CONTEXT_DIFF + || diff_type == NEW_CONTEXT_DIFF + || diff_type == UNI_DIFF)) { + say("Empty context always matches.\n"); + } + return (first_guess); + } + if (max_neg_offset >= first_guess) /* do not try lines < 0 */ + max_neg_offset = first_guess - 1; + if (first_guess <= input_lines && patch_match(first_guess, 0, fuzz)) + return first_guess; + for (offset = 1; ; offset++) { + bool check_after = (offset <= max_pos_offset); + bool check_before = (offset <= max_neg_offset); + + if (check_after && patch_match(first_guess, offset, fuzz)) { +#ifdef DEBUGGING + if (debug & 1) + say("Offset changing from %ld to %ld\n", + last_offset, offset); +#endif + last_offset = offset; + return first_guess + offset; + } else if (check_before && patch_match(first_guess, -offset, fuzz)) { +#ifdef DEBUGGING + if (debug & 1) + say("Offset changing from %ld to %ld\n", + last_offset, -offset); +#endif + last_offset = -offset; + return first_guess - offset; + } else if (!check_before && !check_after) + return 0; + } +} + +/* We did not find the pattern, dump out the hunk so they can handle it. */ + +static void +abort_context_hunk(void) +{ + LINENUM i; + const LINENUM pat_end = pch_end(); + /* + * add in last_offset to guess the same as the previous successful + * hunk + */ + const LINENUM oldfirst = pch_first() + last_offset; + const LINENUM newfirst = pch_newfirst() + last_offset; + const LINENUM oldlast = oldfirst + pch_ptrn_lines() - 1; + const LINENUM newlast = newfirst + pch_repl_lines() - 1; + const char *stars = (diff_type >= NEW_CONTEXT_DIFF ? " ****" : ""); + const char *minuses = (diff_type >= NEW_CONTEXT_DIFF ? " ----" : " -----"); + + fprintf(rejfp, "***************\n"); + for (i = 0; i <= pat_end; i++) { + switch (pch_char(i)) { + case '*': + if (oldlast < oldfirst) + fprintf(rejfp, "*** 0%s\n", stars); + else if (oldlast == oldfirst) + fprintf(rejfp, "*** %ld%s\n", oldfirst, stars); + else + fprintf(rejfp, "*** %ld,%ld%s\n", oldfirst, + oldlast, stars); + break; + case '=': + if (newlast < newfirst) + fprintf(rejfp, "--- 0%s\n", minuses); + else if (newlast == newfirst) + fprintf(rejfp, "--- %ld%s\n", newfirst, minuses); + else + fprintf(rejfp, "--- %ld,%ld%s\n", newfirst, + newlast, minuses); + break; + case '\n': + fprintf(rejfp, "%s", pfetch(i)); + break; + case ' ': + case '-': + case '+': + case '!': + fprintf(rejfp, "%c %s", pch_char(i), pfetch(i)); + break; + default: + fatal("fatal internal error in abort_context_hunk\n"); + } + } +} + +static void +rej_line(int ch, LINENUM i) +{ + size_t len; + const char *line = pfetch(i); + + len = strlen(line); + + fprintf(rejfp, "%c%s", ch, line); + if (len == 0 || line[len-1] != '\n') + fprintf(rejfp, "\n\\ No newline at end of file\n"); +} + +static void +abort_hunk(void) +{ + LINENUM i, j, split; + int ch1, ch2; + const LINENUM pat_end = pch_end(); + const LINENUM oldfirst = pch_first() + last_offset; + const LINENUM newfirst = pch_newfirst() + last_offset; + + if (diff_type != UNI_DIFF) { + abort_context_hunk(); + return; + } + split = -1; + for (i = 0; i <= pat_end; i++) { + if (pch_char(i) == '=') { + split = i; + break; + } + } + if (split == -1) { + fprintf(rejfp, "malformed hunk: no split found\n"); + return; + } + i = 0; + j = split + 1; + fprintf(rejfp, "@@ -%ld,%ld +%ld,%ld @@\n", + pch_ptrn_lines() ? oldfirst : 0, + pch_ptrn_lines(), newfirst, pch_repl_lines()); + while (i < split || j <= pat_end) { + ch1 = i < split ? pch_char(i) : -1; + ch2 = j <= pat_end ? pch_char(j) : -1; + if (ch1 == '-') { + rej_line('-', i); + i++; + } else if (ch1 == ' ' && ch2 == ' ') { + rej_line(' ', i); + i++; + j++; + } else if (ch1 == '!' && ch2 == '!') { + while (i < split && ch1 == '!') { + rej_line('-', i); + i++; + ch1 = i < split ? pch_char(i) : -1; + } + while (j <= pat_end && ch2 == '!') { + rej_line('+', j); + j++; + ch2 = j <= pat_end ? pch_char(j) : -1; + } + } else if (ch1 == '*') { + i++; + } else if (ch2 == '+' || ch2 == ' ') { + rej_line(ch2, j); + j++; + } else { + fprintf(rejfp, "internal error on (%ld %ld %ld)\n", + i, split, j); + rej_line(ch1, i); + rej_line(ch2, j); + return; + } + } +} + +/* We found where to apply it (we hope), so do it. */ + +static void +apply_hunk(LINENUM where) +{ + LINENUM old = 1; + const LINENUM lastline = pch_ptrn_lines(); + LINENUM new = lastline + 1; +#define OUTSIDE 0 +#define IN_IFNDEF 1 +#define IN_IFDEF 2 +#define IN_ELSE 3 + int def_state = OUTSIDE; + const LINENUM pat_end = pch_end(); + + where--; + while (pch_char(new) == '=' || pch_char(new) == '\n') + new++; + + while (old <= lastline) { + if (pch_char(old) == '-') { + copy_till(where + old - 1, false); + if (do_defines) { + if (def_state == OUTSIDE) { + fputs(not_defined, ofp); + def_state = IN_IFNDEF; + } else if (def_state == IN_IFDEF) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } + fputs(pfetch(old), ofp); + } + last_frozen_line++; + old++; + } else if (new > pat_end) { + break; + } else if (pch_char(new) == '+') { + copy_till(where + old - 1, false); + if (do_defines) { + if (def_state == IN_IFNDEF) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } else if (def_state == OUTSIDE) { + fputs(if_defined, ofp); + def_state = IN_IFDEF; + } + } + fputs(pfetch(new), ofp); + new++; + } else if (pch_char(new) != pch_char(old)) { + say("Out-of-sync patch, lines %ld,%ld--mangled text or line numbers, maybe?\n", + pch_hunk_beg() + old, + pch_hunk_beg() + new); +#ifdef DEBUGGING + say("oldchar = '%c', newchar = '%c'\n", + pch_char(old), pch_char(new)); +#endif + my_exit(2); + } else if (pch_char(new) == '!') { + copy_till(where + old - 1, false); + if (do_defines) { + fputs(not_defined, ofp); + def_state = IN_IFNDEF; + } + while (pch_char(old) == '!') { + if (do_defines) { + fputs(pfetch(old), ofp); + } + last_frozen_line++; + old++; + } + if (do_defines) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } + while (pch_char(new) == '!') { + fputs(pfetch(new), ofp); + new++; + } + } else { + if (pch_char(new) != ' ') + fatal("Internal error: expected ' '\n"); + old++; + new++; + if (do_defines && def_state != OUTSIDE) { + fputs(end_defined, ofp); + def_state = OUTSIDE; + } + } + } + if (new <= pat_end && pch_char(new) == '+') { + copy_till(where + old - 1, false); + if (do_defines) { + if (def_state == OUTSIDE) { + fputs(if_defined, ofp); + def_state = IN_IFDEF; + } else if (def_state == IN_IFNDEF) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } + } + while (new <= pat_end && pch_char(new) == '+') { + fputs(pfetch(new), ofp); + new++; + } + } + if (do_defines && def_state != OUTSIDE) { + fputs(end_defined, ofp); + } +} + +/* + * Open the new file. + */ +static void +init_output(const char *name) +{ + ofp = fopen(name, "w"); + if (ofp == NULL) + pfatal("can't create %s", name); +} + +/* + * Open a file to put hunks we can't locate. + */ +static void +init_reject(const char *name) +{ + rejfp = fopen(name, "w"); + if (rejfp == NULL) + pfatal("can't create %s", name); +} + +/* + * Copy input file to output, up to wherever hunk is to be applied. + * If endoffile is true, treat the last line specially since it may + * lack a newline. + */ +static void +copy_till(LINENUM lastline, bool endoffile) +{ + if (last_frozen_line > lastline) + fatal("misordered hunks! output would be garbled\n"); + while (last_frozen_line < lastline) { + if (++last_frozen_line == lastline && endoffile) + dump_line(last_frozen_line, !last_line_missing_eol); + else + dump_line(last_frozen_line, true); + } +} + +/* + * Finish copying the input file to the output file. + */ +static void +spew_output(void) +{ +#ifdef DEBUGGING + if (debug & 256) + say("il=%ld lfl=%ld\n", input_lines, last_frozen_line); +#endif + if (input_lines) + copy_till(input_lines, true); /* dump remainder of file */ + fclose(ofp); + ofp = NULL; +} + +/* + * Copy one line from input to output. + */ +static void +dump_line(LINENUM line, bool write_newline) +{ + char *s; + + s = ifetch(line, 0); + if (s == NULL) + return; + /* Note: string is not NUL terminated. */ + for (; *s != '\n'; s++) + putc(*s, ofp); + if (write_newline) + putc('\n', ofp); +} + +/* + * Does the patch pattern match at line base+offset? + */ +static bool +patch_match(LINENUM base, LINENUM offset, LINENUM fuzz) +{ + LINENUM pline = 1 + fuzz; + LINENUM iline; + LINENUM pat_lines = pch_ptrn_lines() - fuzz; + const char *ilineptr; + const char *plineptr; + short plinelen; + + for (iline = base + offset + fuzz; pline <= pat_lines; pline++, iline++) { + ilineptr = ifetch(iline, offset >= 0); + if (ilineptr == NULL) + return false; + plineptr = pfetch(pline); + plinelen = pch_line_len(pline); + if (canonicalize) { + if (!similar(ilineptr, plineptr, plinelen)) + return false; + } else if (strnNE(ilineptr, plineptr, plinelen)) + return false; + if (iline == input_lines) { + /* + * We are looking at the last line of the file. + * If the file has no eol, the patch line should + * not have one either and vice-versa. Note that + * plinelen > 0. + */ + if (last_line_missing_eol) { + if (plineptr[plinelen - 1] == '\n') + return false; + } else { + if (plineptr[plinelen - 1] != '\n') + return false; + } + } + } + return true; +} + +/* + * Do two lines match with canonicalized white space? + */ +static bool +similar(const char *a, const char *b, int len) +{ + while (len) { + if (isspace((unsigned char)*b)) { /* whitespace (or \n) to match? */ + if (!isspace((unsigned char)*a)) + return false; /* no corresponding whitespace */ + while (len && isspace((unsigned char)*b) && *b != '\n') + b++, len--; /* skip pattern whitespace */ + while (isspace((unsigned char)*a) && *a != '\n') + a++; /* skip target whitespace */ + if (*a == '\n' || *b == '\n') + return (*a == *b); /* should end in sync */ + } else if (*a++ != *b++) /* match non-whitespace chars */ + return false; + else + len--; /* probably not necessary */ + } + return true; /* actually, this is not reached */ + /* since there is always a \n */ +} diff --git a/usr.bin/patch/pch.c b/usr.bin/patch/pch.c new file mode 100644 index 0000000..89db753 --- /dev/null +++ b/usr.bin/patch/pch.c @@ -0,0 +1,1522 @@ +/* $OpenBSD: pch.c,v 1.62 2019/12/02 22:23:19 jca Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <libgen.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "common.h" +#include "util.h" +#include "pch.h" + +/* Patch (diff listing) abstract type. */ + +FILE *pfp = NULL; /* patch file pointer */ +LINENUM p_input_line = 0; /* current line # from patch file */ + +static off_t p_filesize; /* size of the patch file */ +static LINENUM p_first; /* 1st line number */ +static LINENUM p_newfirst; /* 1st line number of replacement */ +static LINENUM p_ptrn_lines; /* # lines in pattern */ +static LINENUM p_repl_lines; /* # lines in replacement text */ +static LINENUM p_end = -1; /* last line in hunk */ +static LINENUM p_max; /* max allowed value of p_end */ +static LINENUM p_context = 3; /* # of context lines */ +static char **p_line = NULL;/* the text of the hunk */ +static short *p_len = NULL; /* length of each line */ +static char *p_char = NULL; /* +, -, and ! */ +static int hunkmax = INITHUNKMAX; /* size of above arrays to begin with */ +static int p_indent; /* indent to patch */ +static off_t p_base; /* where to intuit this time */ +static LINENUM p_bline; /* line # of p_base */ +static off_t p_start; /* where intuit found a patch */ +static LINENUM p_sline; /* and the line number for it */ +static LINENUM p_hunk_beg; /* line number of current hunk */ +static LINENUM p_efake = -1; /* end of faked up lines--don't free */ +static LINENUM p_bfake = -1; /* beg of faked up lines */ +static char *bestguess = NULL; /* guess at correct filename */ + +static void grow_hunkmax(void); +static int intuit_diff_type(void); +static void skip_to(off_t, LINENUM); +static char *best_name(const struct file_name *, bool); +static char *posix_name(const struct file_name *, bool); +static size_t num_components(const char *); + +/* + * Prepare to look for the next patch in the patch file. + */ +void +re_patch(void) +{ + p_first = 0; + p_newfirst = 0; + p_ptrn_lines = 0; + p_repl_lines = 0; + p_end = (LINENUM) - 1; + p_max = 0; + p_indent = 0; +} + +/* + * Open the patch file at the beginning of time. + */ +void +open_patch_file(const char *filename) +{ + struct stat filestat; + + if (filename == NULL || *filename == '\0' || strEQ(filename, "-")) { + pfp = fopen(TMPPATNAME, "w"); + if (pfp == NULL) + pfatal("can't create %s", TMPPATNAME); + while (getline(&buf, &bufsz, stdin) != -1) + fputs(buf, pfp); + fclose(pfp); + filename = TMPPATNAME; + } + pfp = fopen(filename, "r"); + if (pfp == NULL) + pfatal("patch file %s not found", filename); + if (fstat(fileno(pfp), &filestat)) + pfatal("can't stat %s", filename); + p_filesize = filestat.st_size; + next_intuit_at(0, 1L); /* start at the beginning */ + set_hunkmax(); +} + +/* + * Make sure our dynamically realloced tables are malloced to begin with. + */ +void +set_hunkmax(void) +{ + if (p_line == NULL) + p_line = calloc((size_t) hunkmax, sizeof(char *)); + if (p_len == NULL) + p_len = calloc((size_t) hunkmax, sizeof(short)); + if (p_char == NULL) + p_char = calloc((size_t) hunkmax, sizeof(char)); +} + +/* + * Enlarge the arrays containing the current hunk of patch. + */ +static void +grow_hunkmax(void) +{ + int new_hunkmax; + char **new_p_line; + short *new_p_len; + char *new_p_char; + + new_hunkmax = hunkmax * 2; + + if (p_line == NULL || p_len == NULL || p_char == NULL) + fatal("Internal memory allocation error\n"); + + new_p_line = reallocarray(p_line, new_hunkmax, sizeof(char *)); + if (new_p_line == NULL) + free(p_line); + + new_p_len = reallocarray(p_len, new_hunkmax, sizeof(short)); + if (new_p_len == NULL) + free(p_len); + + new_p_char = recallocarray(p_char, hunkmax, new_hunkmax, sizeof(char)); + if (new_p_char == NULL) + free(p_char); + + p_char = new_p_char; + p_len = new_p_len; + p_line = new_p_line; + + if (p_line != NULL && p_len != NULL && p_char != NULL) { + hunkmax = new_hunkmax; + return; + } + + if (!using_plan_a) + fatal("out of memory\n"); + out_of_mem = true; /* whatever is null will be allocated again */ + /* from within plan_a(), of all places */ +} + +/* True if the remainder of the patch file contains a diff of some sort. */ + +bool +there_is_another_patch(void) +{ + bool exists = false; + + if (p_base != 0 && p_base >= p_filesize) { + if (verbose) + say("done\n"); + return false; + } + if (verbose) + say("Hmm..."); + diff_type = intuit_diff_type(); + if (!diff_type) { + if (p_base != 0) { + if (verbose) + say(" Ignoring the trailing garbage.\ndone\n"); + } else + say(" I can't seem to find a patch in there anywhere.\n"); + return false; + } + if (verbose) + say(" %sooks like %s to me...\n", + (p_base == 0 ? "L" : "The next patch l"), + diff_type == UNI_DIFF ? "a unified diff" : + diff_type == CONTEXT_DIFF ? "a context diff" : + diff_type == NEW_CONTEXT_DIFF ? "a new-style context diff" : + diff_type == NORMAL_DIFF ? "a normal diff" : + "an ed script"); + if (p_indent && verbose) + say("(Patch is indented %d space%s.)\n", p_indent, + p_indent == 1 ? "" : "s"); + skip_to(p_start, p_sline); + while (filearg[0] == NULL) { + if (force || batch) { + say("No file to patch. Skipping...\n"); + filearg[0] = xstrdup(bestguess); + skip_rest_of_patch = true; + return true; + } + ask("File to patch: "); + if (*buf != '\n') { + free(bestguess); + bestguess = xstrdup(buf); + filearg[0] = fetchname(buf, &exists, 0); + } + if (!exists) { + int def_skip = *bestguess == '\0'; + ask("No file found--skip this patch? [%c] ", + def_skip ? 'y' : 'n'); + if (*buf == 'n' || (!def_skip && *buf != 'y')) + continue; + if (verbose) + say("Skipping patch...\n"); + free(filearg[0]); + filearg[0] = fetchname(bestguess, &exists, 0); + skip_rest_of_patch = true; + return true; + } + } + return true; +} + +/* Determine what kind of diff is in the remaining part of the patch file. */ + +static int +intuit_diff_type(void) +{ + off_t this_line = 0, previous_line; + off_t first_command_line = -1; + LINENUM fcl_line = -1; + bool last_line_was_command = false, this_is_a_command = false; + bool stars_last_line = false, stars_this_line = false; + char *s, *t; + int indent, retval; + struct file_name names[MAX_FILE]; + int piece_of_git = 0; + + memset(names, 0, sizeof(names)); + ok_to_create_file = false; + fseeko(pfp, p_base, SEEK_SET); + p_input_line = p_bline - 1; + for (;;) { + previous_line = this_line; + last_line_was_command = this_is_a_command; + stars_last_line = stars_this_line; + this_line = ftello(pfp); + indent = 0; + p_input_line++; + if (getline(&buf, &bufsz, pfp) == -1) { + if (first_command_line >= 0) { + /* nothing but deletes!? */ + p_start = first_command_line; + p_sline = fcl_line; + retval = ED_DIFF; + goto scan_exit; + } else { + p_start = this_line; + p_sline = p_input_line; + retval = 0; + goto scan_exit; + } + } + for (s = buf; *s == ' ' || *s == '\t' || *s == 'X'; s++) { + if (*s == '\t') + indent += 8 - (indent % 8); + else + indent++; + } + for (t = s; isdigit((unsigned char)*t) || *t == ','; t++) + ; + this_is_a_command = (isdigit((unsigned char)*s) && + (*t == 'd' || *t == 'c' || *t == 'a')); + if (first_command_line < 0 && this_is_a_command) { + first_command_line = this_line; + fcl_line = p_input_line; + p_indent = indent; /* assume this for now */ + } + if (!stars_last_line && strnEQ(s, "*** ", 4)) + names[OLD_FILE].path = fetchname(s + 4, + &names[OLD_FILE].exists, strippath); + else if (strnEQ(s, "--- ", 4)) { + size_t off = 4; + if (piece_of_git && strippath == 957 && + strnEQ(s, "--- a/", 6)) + off = 6; + names[NEW_FILE].path = fetchname(s + off, + &names[NEW_FILE].exists, strippath); + } else if (strnEQ(s, "+++ ", 4)) { + /* pretend it is the old name */ + size_t off = 4; + if (piece_of_git && strippath == 957 && + strnEQ(s, "+++ b/", 6)) + off = 6; + names[OLD_FILE].path = fetchname(s + off, + &names[OLD_FILE].exists, strippath); + } else if (strnEQ(s, "Index:", 6)) + names[INDEX_FILE].path = fetchname(s + 6, + &names[INDEX_FILE].exists, strippath); + else if (strnEQ(s, "Prereq:", 7)) { + for (t = s + 7; isspace((unsigned char)*t); t++) + ; + revision = xstrdup(t); + for (t = revision; + *t && !isspace((unsigned char)*t); t++) + ; + *t = '\0'; + if (*revision == '\0') { + free(revision); + revision = NULL; + } + } else if (strnEQ(s, "diff --git a/", 13)) + piece_of_git = 1; + if ((!diff_type || diff_type == ED_DIFF) && + first_command_line >= 0 && + strEQ(s, ".\n")) { + p_indent = indent; + p_start = first_command_line; + p_sline = fcl_line; + retval = ED_DIFF; + goto scan_exit; + } + if ((!diff_type || diff_type == UNI_DIFF) && strnEQ(s, "@@ -", 4)) { + if (strnEQ(s + 4, "0,0", 3)) + ok_to_create_file = true; + p_indent = indent; + p_start = this_line; + p_sline = p_input_line; + retval = UNI_DIFF; + goto scan_exit; + } + stars_this_line = strnEQ(s, "********", 8); + if ((!diff_type || diff_type == CONTEXT_DIFF) && stars_last_line && + strnEQ(s, "*** ", 4)) { + if (strtolinenum(s + 4, &s) == 0) + ok_to_create_file = true; + /* + * If this is a new context diff the character just + * at the end of the line is a '*'. + */ + while (*s && *s != '\n') + s++; + p_indent = indent; + p_start = previous_line; + p_sline = p_input_line - 1; + retval = (*(s - 1) == '*' ? NEW_CONTEXT_DIFF : CONTEXT_DIFF); + goto scan_exit; + } + if ((!diff_type || diff_type == NORMAL_DIFF) && + last_line_was_command && + (strnEQ(s, "< ", 2) || strnEQ(s, "> ", 2))) { + p_start = previous_line; + p_sline = p_input_line - 1; + p_indent = indent; + retval = NORMAL_DIFF; + goto scan_exit; + } + } +scan_exit: + if (retval == UNI_DIFF) { + /* unswap old and new */ + struct file_name tmp = names[OLD_FILE]; + names[OLD_FILE] = names[NEW_FILE]; + names[NEW_FILE] = tmp; + } + if (filearg[0] == NULL) { + if (posix) + filearg[0] = posix_name(names, ok_to_create_file); + else { + /* Ignore the Index: name for context diffs, like GNU */ + if (names[OLD_FILE].path != NULL || + names[NEW_FILE].path != NULL) { + free(names[INDEX_FILE].path); + names[INDEX_FILE].path = NULL; + } + filearg[0] = best_name(names, ok_to_create_file); + } + } + + free(bestguess); + bestguess = NULL; + if (filearg[0] != NULL) + bestguess = xstrdup(filearg[0]); + else if (!ok_to_create_file) { + /* + * We don't want to create a new file but we need a + * filename to set bestguess. Avoid setting filearg[0] + * so the file is not created automatically. + */ + if (posix) + bestguess = posix_name(names, true); + else + bestguess = best_name(names, true); + } + free(names[OLD_FILE].path); + free(names[NEW_FILE].path); + free(names[INDEX_FILE].path); + return retval; +} + +/* + * Remember where this patch ends so we know where to start up again. + */ +void +next_intuit_at(off_t file_pos, LINENUM file_line) +{ + p_base = file_pos; + p_bline = file_line; +} + +/* + * Basically a verbose fseeko() to the actual diff listing. + */ +static void +skip_to(off_t file_pos, LINENUM file_line) +{ + int ret; + + if (p_base > file_pos) + fatal("Internal error: seek %lld>%lld\n", + (long long)p_base, (long long)file_pos); + if (verbose && p_base < file_pos) { + fseeko(pfp, p_base, SEEK_SET); + say("The text leading up to this was:\n--------------------------\n"); + while (ftello(pfp) < file_pos) { + ret = getline(&buf, &bufsz, pfp); + if (ret == -1) + fatal("Unexpected end of file\n"); + say("|%s", buf); + } + say("--------------------------\n"); + } else + fseeko(pfp, file_pos, SEEK_SET); + p_input_line = file_line - 1; +} + +/* Make this a function for better debugging. */ +static void +malformed(void) +{ + fatal("malformed patch at line %ld: %s", p_input_line, buf); + /* about as informative as "Syntax error" in C */ +} + +/* + * True if the line has been discarded (i.e. it is a line saying + * "\ No newline at end of file".) + */ +static bool +remove_special_line(void) +{ + int c; + + c = fgetc(pfp); + if (c == '\\') { + do { + c = fgetc(pfp); + } while (c != EOF && c != '\n'); + + return true; + } + if (c != EOF) + fseeko(pfp, -1, SEEK_CUR); + + return false; +} + +/* + * True if there is more of the current diff listing to process. + */ +bool +another_hunk(void) +{ + off_t line_beginning; /* file pos of the current line */ + LINENUM repl_beginning; /* index of --- line */ + LINENUM fillcnt; /* #lines of missing ptrn or repl */ + LINENUM fillsrc; /* index of first line to copy */ + LINENUM filldst; /* index of first missing line */ + bool ptrn_spaces_eaten; /* ptrn was slightly misformed */ + bool repl_could_be_missing; /* no + or ! lines in this hunk */ + bool repl_missing; /* we are now backtracking */ + off_t repl_backtrack_position; /* file pos of first repl line */ + LINENUM repl_patch_line; /* input line number for same */ + LINENUM ptrn_copiable; /* # of copiable lines in ptrn */ + char *s; + int context = 0; + int ret; + + while (p_end >= 0) { + if (p_end == p_efake) + p_end = p_bfake; /* don't free twice */ + else + free(p_line[p_end]); + p_end--; + } + p_efake = -1; + + p_max = hunkmax; /* gets reduced when --- found */ + if (diff_type == CONTEXT_DIFF || diff_type == NEW_CONTEXT_DIFF) { + line_beginning = ftello(pfp); + repl_beginning = 0; + fillcnt = 0; + fillsrc = 0; + ptrn_spaces_eaten = false; + repl_could_be_missing = true; + repl_missing = false; + repl_backtrack_position = 0; + repl_patch_line = 0; + ptrn_copiable = 0; + + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1 || strnNE(buf, "********", 8)) { + next_intuit_at(line_beginning, p_input_line); + return false; + } + p_context = 100; + p_hunk_beg = p_input_line + 1; + while (p_end < p_max) { + line_beginning = ftello(pfp); + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1) { + if (p_max - p_end < 4) { + /* assume blank lines got chopped */ + strlcpy(buf, " \n", bufsz); + } else { + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + fatal("unexpected end of file in patch\n"); + } + } + p_end++; + if (p_end >= hunkmax) + fatal("Internal error: hunk larger than hunk " + "buffer size"); + p_char[p_end] = *buf; + p_line[p_end] = NULL; + switch (*buf) { + case '*': + if (strnEQ(buf, "********", 8)) { + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } else + fatal("unexpected end of hunk " + "at line %ld\n", + p_input_line); + } + if (p_end != 0) { + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + fatal("unexpected *** at line %ld: %s", + p_input_line, buf); + } + context = 0; + p_line[p_end] = savestr(buf); + if (out_of_mem) { + p_end--; + return false; + } + for (s = buf; + *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + if (strnEQ(s, "0,0", 3)) + memmove(s, s + 2, strlen(s + 2) + 1); + p_first = strtolinenum(s, &s); + if (*s == ',') { + for (; *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + p_ptrn_lines = strtolinenum(s, &s) - p_first + 1; + if (p_ptrn_lines < 0) + malformed(); + } else if (p_first) + p_ptrn_lines = 1; + else { + p_ptrn_lines = 0; + p_first = 1; + } + if (p_first >= LINENUM_MAX - p_ptrn_lines || + p_ptrn_lines >= LINENUM_MAX - 6) + malformed(); + + /* we need this much at least */ + p_max = p_ptrn_lines + 6; + while (p_max >= hunkmax) + grow_hunkmax(); + p_max = hunkmax; + break; + case '-': + if (buf[1] == '-') { + if (repl_beginning || + (p_end != p_ptrn_lines + 1 + + (p_char[p_end - 1] == '\n'))) { + if (p_end == 1) { + /* + * `old' lines were omitted; + * set up to fill them in + * from 'new' context lines. + */ + p_end = p_ptrn_lines + 1; + fillsrc = p_end + 1; + filldst = 1; + fillcnt = p_ptrn_lines; + } else { + if (repl_beginning) { + if (repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + fatal("duplicate \"---\" at line %ld--check line numbers at line %ld\n", + p_input_line, p_hunk_beg + repl_beginning); + } else { + fatal("%s \"---\" at line %ld--check line numbers at line %ld\n", + (p_end <= p_ptrn_lines + ? "Premature" + : "Overdue"), + p_input_line, p_hunk_beg); + } + } + } + repl_beginning = p_end; + repl_backtrack_position = ftello(pfp); + repl_patch_line = p_input_line; + p_line[p_end] = savestr(buf); + if (out_of_mem) { + p_end--; + return false; + } + p_char[p_end] = '='; + for (s = buf; + *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + p_newfirst = strtolinenum(s, &s); + if (*s == ',') { + for (; *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + p_repl_lines = strtolinenum(s, &s) - + p_newfirst + 1; + if (p_repl_lines < 0) + malformed(); + } else if (p_newfirst) + p_repl_lines = 1; + else { + p_repl_lines = 0; + p_newfirst = 1; + } + if (p_newfirst >= LINENUM_MAX - p_repl_lines || + p_repl_lines >= LINENUM_MAX - p_end) + malformed(); + p_max = p_repl_lines + p_end; + if (p_max > MAXHUNKSIZE) + fatal("hunk too large (%ld lines) at line %ld: %s", + p_max, p_input_line, buf); + while (p_max >= hunkmax) + grow_hunkmax(); + if (p_repl_lines != ptrn_copiable && + (p_context != 0 || p_repl_lines != 1)) + repl_could_be_missing = false; + break; + } + goto change_line; + case '+': + case '!': + repl_could_be_missing = false; + change_line: + if (buf[1] == '\n' && canonicalize) + strlcpy(buf + 1, " \n", bufsz - 1); + if (!isspace((unsigned char)buf[1]) && + buf[1] != '>' && buf[1] != '<' && + repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + if (context >= 0) { + if (context < p_context) + p_context = context; + context = -1000; + } + p_line[p_end] = savestr(buf + 2); + if (out_of_mem) { + p_end--; + return false; + } + if (p_end == p_ptrn_lines) { + if (remove_special_line()) { + int len; + + len = strlen(p_line[p_end]) - 1; + (p_line[p_end])[len] = 0; + } + } + break; + case '\t': + case '\n': /* assume the 2 spaces got eaten */ + if (repl_beginning && repl_could_be_missing && + (!ptrn_spaces_eaten || + diff_type == NEW_CONTEXT_DIFF)) { + repl_missing = true; + goto hunk_done; + } + p_line[p_end] = savestr(buf); + if (out_of_mem) { + p_end--; + return false; + } + if (p_end != p_ptrn_lines + 1) { + ptrn_spaces_eaten |= (repl_beginning != 0); + context++; + if (!repl_beginning) + ptrn_copiable++; + p_char[p_end] = ' '; + } + break; + case ' ': + if (!isspace((unsigned char)buf[1]) && + repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + context++; + if (!repl_beginning) + ptrn_copiable++; + p_line[p_end] = savestr(buf + 2); + if (out_of_mem) { + p_end--; + return false; + } + break; + default: + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + malformed(); + } + /* set up p_len for strncmp() so we don't have to */ + /* assume null termination */ + if (p_line[p_end]) + p_len[p_end] = strlen(p_line[p_end]); + else + p_len[p_end] = 0; + } + +hunk_done: + if (p_end >= 0 && !repl_beginning) + fatal("no --- found in patch at line %ld\n", pch_hunk_beg()); + + if (repl_missing) { + + /* reset state back to just after --- */ + p_input_line = repl_patch_line; + for (p_end--; p_end > repl_beginning; p_end--) + free(p_line[p_end]); + fseeko(pfp, repl_backtrack_position, SEEK_SET); + + /* redundant 'new' context lines were omitted - set */ + /* up to fill them in from the old file context */ + if (!p_context && p_repl_lines == 1) { + p_repl_lines = 0; + p_max--; + } + fillsrc = 1; + filldst = repl_beginning + 1; + fillcnt = p_repl_lines; + p_end = p_max; + } else if (!p_context && fillcnt == 1) { + /* the first hunk was a null hunk with no context */ + /* and we were expecting one line -- fix it up. */ + while (filldst < p_end) { + p_line[filldst] = p_line[filldst + 1]; + p_char[filldst] = p_char[filldst + 1]; + p_len[filldst] = p_len[filldst + 1]; + filldst++; + } +#if 0 + repl_beginning--; /* this doesn't need to be fixed */ +#endif + p_end--; + p_first++; /* do append rather than insert */ + fillcnt = 0; + p_ptrn_lines = 0; + } + if (diff_type == CONTEXT_DIFF && + (fillcnt || (p_first > 1 && ptrn_copiable > 2 * p_context))) { + if (verbose) + say("%s\n%s\n%s\n", + "(Fascinating--this is really a new-style context diff but without", + "the telltale extra asterisks on the *** line that usually indicate", + "the new style...)"); + diff_type = NEW_CONTEXT_DIFF; + } + /* if there were omitted context lines, fill them in now */ + if (fillcnt) { + p_bfake = filldst; /* remember where not to free() */ + p_efake = filldst + fillcnt - 1; + while (fillcnt-- > 0) { + while (fillsrc <= p_end && p_char[fillsrc] != ' ') + fillsrc++; + if (fillsrc > p_end) + fatal("replacement text or line numbers mangled in hunk at line %ld\n", + p_hunk_beg); + p_line[filldst] = p_line[fillsrc]; + p_char[filldst] = p_char[fillsrc]; + p_len[filldst] = p_len[fillsrc]; + fillsrc++; + filldst++; + } + while (fillsrc <= p_end && fillsrc != repl_beginning && + p_char[fillsrc] != ' ') + fillsrc++; +#ifdef DEBUGGING + if (debug & 64) + printf("fillsrc %ld, filldst %ld, rb %ld, e+1 %ld\n", + fillsrc, filldst, repl_beginning, p_end + 1); +#endif + if (fillsrc != p_end + 1 && fillsrc != repl_beginning) + malformed(); + if (filldst != p_end + 1 && filldst != repl_beginning) + malformed(); + } + if (p_line[p_end] != NULL) { + if (remove_special_line()) { + p_len[p_end] -= 1; + (p_line[p_end])[p_len[p_end]] = 0; + } + } + } else if (diff_type == UNI_DIFF) { + off_t line_beginning = ftello(pfp); /* file pos of the current line */ + LINENUM fillsrc; /* index of old lines */ + LINENUM filldst; /* index of new lines */ + char ch; + + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1 || strnNE(buf, "@@ -", 4)) { + next_intuit_at(line_beginning, p_input_line); + return false; + } + s = buf + 4; + if (!*s) + malformed(); + p_first = strtolinenum(s, &s); + if (*s == ',') { + p_ptrn_lines = strtolinenum(s + 1, &s); + } else + p_ptrn_lines = 1; + if (*s == ' ') + s++; + if (*s != '+' || !*++s) + malformed(); + p_newfirst = strtolinenum(s, &s); + if (*s == ',') { + p_repl_lines = strtolinenum(s + 1, &s); + } else + p_repl_lines = 1; + if (*s == ' ') + s++; + if (*s != '@') + malformed(); + if (p_first >= LINENUM_MAX - p_ptrn_lines || + p_newfirst > LINENUM_MAX - p_repl_lines || + p_ptrn_lines >= LINENUM_MAX - p_repl_lines - 1) + malformed(); + if (!p_ptrn_lines) + p_first++; /* do append rather than insert */ + p_max = p_ptrn_lines + p_repl_lines + 1; + while (p_max >= hunkmax) + grow_hunkmax(); + fillsrc = 1; + filldst = fillsrc + p_ptrn_lines; + p_end = filldst + p_repl_lines; + snprintf(buf, bufsz, "*** %ld,%ld ****\n", p_first, + p_first + p_ptrn_lines - 1); + p_line[0] = savestr(buf); + if (out_of_mem) { + p_end = -1; + return false; + } + p_char[0] = '*'; + snprintf(buf, bufsz, "--- %ld,%ld ----\n", p_newfirst, + p_newfirst + p_repl_lines - 1); + p_line[filldst] = savestr(buf); + if (out_of_mem) { + p_end = 0; + return false; + } + p_char[filldst++] = '='; + p_context = 100; + context = 0; + p_hunk_beg = p_input_line + 1; + while (fillsrc <= p_ptrn_lines || filldst <= p_end) { + line_beginning = ftello(pfp); + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1) { + if (p_max - filldst < 3) { + /* assume blank lines got chopped */ + strlcpy(buf, " \n", bufsz); + } else { + fatal("unexpected end of file in patch\n"); + } + } + if (*buf == '\t' || *buf == '\n') { + ch = ' '; /* assume the space got eaten */ + s = savestr(buf); + } else { + ch = *buf; + s = savestr(buf + 1); + } + if (out_of_mem) { + while (--filldst > p_ptrn_lines) + free(p_line[filldst]); + p_end = fillsrc - 1; + return false; + } + switch (ch) { + case '-': + if (fillsrc > p_ptrn_lines) { + free(s); + p_end = filldst - 1; + malformed(); + } + p_char[fillsrc] = ch; + p_line[fillsrc] = s; + p_len[fillsrc++] = strlen(s); + if (fillsrc > p_ptrn_lines) { + if (remove_special_line()) { + p_len[fillsrc - 1] -= 1; + s[p_len[fillsrc - 1]] = 0; + } + } + break; + case '=': + ch = ' '; + /* FALL THROUGH */ + case ' ': + if (fillsrc > p_ptrn_lines) { + free(s); + while (--filldst > p_ptrn_lines) + free(p_line[filldst]); + p_end = fillsrc - 1; + malformed(); + } + context++; + p_char[fillsrc] = ch; + p_line[fillsrc] = s; + p_len[fillsrc++] = strlen(s); + s = savestr(s); + if (out_of_mem) { + while (--filldst > p_ptrn_lines) + free(p_line[filldst]); + p_end = fillsrc - 1; + return false; + } + if (fillsrc > p_ptrn_lines) { + if (remove_special_line()) { + p_len[fillsrc - 1] -= 1; + s[p_len[fillsrc - 1]] = 0; + } + } + /* FALL THROUGH */ + case '+': + if (filldst > p_end) { + free(s); + while (--filldst > p_ptrn_lines) + free(p_line[filldst]); + p_end = fillsrc - 1; + malformed(); + } + p_char[filldst] = ch; + p_line[filldst] = s; + p_len[filldst++] = strlen(s); + if (fillsrc > p_ptrn_lines) { + if (remove_special_line()) { + p_len[filldst - 1] -= 1; + s[p_len[filldst - 1]] = 0; + } + } + break; + default: + p_end = filldst; + malformed(); + } + if (ch != ' ' && context > 0) { + if (context < p_context) + p_context = context; + context = -1000; + } + } /* while */ + } else { /* normal diff--fake it up */ + char hunk_type; + int i; + LINENUM min, max; + off_t line_beginning = ftello(pfp); + + p_context = 0; + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1 || !isdigit((unsigned char)*buf)) { + next_intuit_at(line_beginning, p_input_line); + return false; + } + p_first = strtolinenum(buf, &s); + if (*s == ',') { + p_ptrn_lines = strtolinenum(s + 1, &s) - p_first + 1; + if (p_ptrn_lines < 0) + malformed(); + } else + p_ptrn_lines = (*s != 'a'); + if (p_first >= LINENUM_MAX - p_ptrn_lines) + malformed(); + hunk_type = *s; + if (hunk_type == 'a') + p_first++; /* do append rather than insert */ + min = strtolinenum(s + 1, &s); + if (*s == ',') + max = strtolinenum(s + 1, &s); + else + max = min; + if (min < 0 || min > max || max - min == LINENUM_MAX) + malformed(); + if (hunk_type == 'd') + min++; + p_newfirst = min; + p_repl_lines = max - min + 1; + if (p_newfirst > LINENUM_MAX - p_repl_lines || + p_ptrn_lines >= LINENUM_MAX - p_repl_lines - 1) + malformed(); + p_end = p_ptrn_lines + p_repl_lines + 1; + if (p_end > MAXHUNKSIZE) + fatal("hunk too large (%ld lines) at line %ld: %s", + p_end, p_input_line, buf); + while (p_end >= hunkmax) + grow_hunkmax(); + snprintf(buf, bufsz, "*** %ld,%ld\n", p_first, + p_first + p_ptrn_lines - 1); + p_line[0] = savestr(buf); + if (out_of_mem) { + p_end = -1; + return false; + } + p_char[0] = '*'; + for (i = 1; i <= p_ptrn_lines; i++) { + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1) + fatal("unexpected end of file in patch at line %ld\n", + p_input_line); + if (*buf != '<') + fatal("< expected at line %ld of patch\n", + p_input_line); + p_line[i] = savestr(buf + 2); + if (out_of_mem) { + p_end = i - 1; + return false; + } + p_len[i] = strlen(p_line[i]); + p_char[i] = '-'; + } + + if (remove_special_line()) { + p_len[i - 1] -= 1; + (p_line[i - 1])[p_len[i - 1]] = 0; + } + if (hunk_type == 'c') { + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1) + fatal("unexpected end of file in patch at line %ld\n", + p_input_line); + if (*buf != '-') + fatal("--- expected at line %ld of patch\n", + p_input_line); + } + snprintf(buf, bufsz, "--- %ld,%ld\n", min, max); + p_line[i] = savestr(buf); + if (out_of_mem) { + p_end = i - 1; + return false; + } + p_char[i] = '='; + for (i++; i <= p_end; i++) { + ret = pgetline(&buf, &bufsz, pfp); + p_input_line++; + if (ret == -1) + fatal("unexpected end of file in patch at line %ld\n", + p_input_line); + if (*buf != '>') + fatal("> expected at line %ld of patch\n", + p_input_line); + p_line[i] = savestr(buf + 2); + if (out_of_mem) { + p_end = i - 1; + return false; + } + p_len[i] = strlen(p_line[i]); + p_char[i] = '+'; + } + + if (remove_special_line()) { + p_len[i - 1] -= 1; + (p_line[i - 1])[p_len[i - 1]] = 0; + } + } + if (reverse) /* backwards patch? */ + if (!pch_swap()) + say("Not enough memory to swap next hunk!\n"); +#ifdef DEBUGGING + if (debug & 2) { + int i; + char special; + + for (i = 0; i <= p_end; i++) { + if (i == p_ptrn_lines) + special = '^'; + else + special = ' '; + fprintf(stderr, "%3d %c %c %s", i, p_char[i], + special, p_line[i]); + fflush(stderr); + } + } +#endif + if (p_end + 1 < hunkmax)/* paranoia reigns supreme... */ + p_char[p_end + 1] = '^'; /* add a stopper for apply_hunk */ + return true; +} + +/* + * Input a line from the patch file, worrying about indentation. + */ +int +pgetline(char **bf, size_t *sz, FILE *fp) +{ + char *s; + int indent = 0; + int ret; + + ret = getline(bf, sz, fp); + + if (p_indent && ret != -1) { + for (s = buf; + indent < p_indent && (*s == ' ' || *s == '\t' || *s == 'X'); + s++) { + if (*s == '\t') + indent += 8 - (indent % 7); + else + indent++; + } + if (buf != s && strlcpy(buf, s, bufsz) >= bufsz) + fatal("buffer too small in pgetline()\n"); + } + return ret; +} + +/* + * Reverse the old and new portions of the current hunk. + */ +bool +pch_swap(void) +{ + char **tp_line; /* the text of the hunk */ + short *tp_len; /* length of each line */ + char *tp_char; /* +, -, and ! */ + LINENUM i; + LINENUM n; + bool blankline = false; + char *s; + + i = p_first; + p_first = p_newfirst; + p_newfirst = i; + + /* make a scratch copy */ + + tp_line = p_line; + tp_len = p_len; + tp_char = p_char; + p_line = NULL; /* force set_hunkmax to allocate again */ + p_len = NULL; + p_char = NULL; + set_hunkmax(); + if (p_line == NULL || p_len == NULL || p_char == NULL) { + + free(p_line); + p_line = tp_line; + free(p_len); + p_len = tp_len; + free(p_char); + p_char = tp_char; + return false; /* not enough memory to swap hunk! */ + } + /* now turn the new into the old */ + + i = p_ptrn_lines + 1; + if (tp_char[i] == '\n') { /* account for possible blank line */ + blankline = true; + i++; + } + if (p_efake >= 0) { /* fix non-freeable ptr range */ + if (p_efake <= i) + n = p_end - i + 1; + else + n = -i; + p_efake += n; + p_bfake += n; + } + for (n = 0; i <= p_end; i++, n++) { + p_line[n] = tp_line[i]; + p_char[n] = tp_char[i]; + if (p_char[n] == '+') + p_char[n] = '-'; + p_len[n] = tp_len[i]; + } + if (blankline) { + i = p_ptrn_lines + 1; + p_line[n] = tp_line[i]; + p_char[n] = tp_char[i]; + p_len[n] = tp_len[i]; + n++; + } + if (p_char[0] != '=') + fatal("Malformed patch at line %ld: expected '=' found '%c'\n", + p_input_line, p_char[0]); + p_char[0] = '*'; + for (s = p_line[0]; *s; s++) + if (*s == '-') + *s = '*'; + + /* now turn the old into the new */ + + if (p_char[0] != '*') + fatal("Malformed patch at line %ld: expected '*' found '%c'\n", + p_input_line, p_char[0]); + tp_char[0] = '='; + for (s = tp_line[0]; *s; s++) + if (*s == '*') + *s = '-'; + for (i = 0; n <= p_end; i++, n++) { + p_line[n] = tp_line[i]; + p_char[n] = tp_char[i]; + if (p_char[n] == '-') + p_char[n] = '+'; + p_len[n] = tp_len[i]; + } + + if (i != p_ptrn_lines + 1) + fatal("Malformed patch at line %ld: expected %ld lines, " + "got %ld\n", + p_input_line, p_ptrn_lines + 1, i); + + i = p_ptrn_lines; + p_ptrn_lines = p_repl_lines; + p_repl_lines = i; + + free(tp_line); + free(tp_len); + free(tp_char); + + return true; +} + +/* + * Return the specified line position in the old file of the old context. + */ +LINENUM +pch_first(void) +{ + return p_first; +} + +/* + * Return the number of lines of old context. + */ +LINENUM +pch_ptrn_lines(void) +{ + return p_ptrn_lines; +} + +/* + * Return the probable line position in the new file of the first line. + */ +LINENUM +pch_newfirst(void) +{ + return p_newfirst; +} + +/* + * Return the number of lines in the replacement text including context. + */ +LINENUM +pch_repl_lines(void) +{ + return p_repl_lines; +} + +/* + * Return the number of lines in the whole hunk. + */ +LINENUM +pch_end(void) +{ + return p_end; +} + +/* + * Return the number of context lines before the first changed line. + */ +LINENUM +pch_context(void) +{ + return p_context; +} + +/* + * Return the length of a particular patch line. + */ +short +pch_line_len(LINENUM line) +{ + return p_len[line]; +} + +/* + * Return the control character (+, -, *, !, etc) for a patch line. + */ +char +pch_char(LINENUM line) +{ + return p_char[line]; +} + +/* + * Return a pointer to a particular patch line. + */ +char * +pfetch(LINENUM line) +{ + return p_line[line]; +} + +/* + * Return where in the patch file this hunk began, for error messages. + */ +LINENUM +pch_hunk_beg(void) +{ + return p_hunk_beg; +} + +/* + * Choose the name of the file to be patched based on POSIX rules. + * NOTE: the POSIX rules are amazingly stupid and we only follow them + * if the user specified --posix or set POSIXLY_CORRECT. + */ +static char * +posix_name(const struct file_name *names, bool assume_exists) +{ + char *path = NULL; + int i; + + /* + * POSIX states that the filename will be chosen from one + * of the old, new and index names (in that order) if + * the file exists relative to CWD after -p stripping. + */ + for (i = 0; i < MAX_FILE; i++) { + if (names[i].path != NULL && names[i].exists) { + path = names[i].path; + break; + } + } + if (path == NULL && !assume_exists) { + /* + * No files found, check to see if the diff could be + * creating a new file. + */ + if (path == NULL && ok_to_create_file && + names[NEW_FILE].path != NULL) + path = names[NEW_FILE].path; + } + + return path ? xstrdup(path) : NULL; +} + +static char * +compare_names(const struct file_name *names, bool assume_exists) +{ + size_t min_components, min_baselen, min_len, tmp; + char *best = NULL; + char *path; + int i; + + /* + * The "best" name is the one with the fewest number of path + * components, the shortest basename length, and the shortest + * overall length (in that order). We only use the Index: file + * if neither of the old or new files could be intuited from + * the diff header. + */ + min_components = min_baselen = min_len = SIZE_MAX; + for (i = INDEX_FILE; i >= OLD_FILE; i--) { + path = names[i].path; + if (path == NULL || (!names[i].exists && !assume_exists)) + continue; + if ((tmp = num_components(path)) > min_components) + continue; + if (tmp < min_components) { + min_components = tmp; + best = path; + } + if ((tmp = strlen(basename(path))) > min_baselen) + continue; + if (tmp < min_baselen) { + min_baselen = tmp; + best = path; + } + if ((tmp = strlen(path)) > min_len) + continue; + min_len = tmp; + best = path; + } + return best; +} + +/* + * Choose the name of the file to be patched based the "best" one + * available. + */ +static char * +best_name(const struct file_name *names, bool assume_exists) +{ + char *best; + + best = compare_names(names, assume_exists); + + /* No match? Check to see if the diff could be creating a new file. */ + if (best == NULL && ok_to_create_file) + best = names[NEW_FILE].path; + + return best ? xstrdup(best) : NULL; +} + +static size_t +num_components(const char *path) +{ + size_t n; + const char *cp; + + for (n = 0, cp = path; (cp = strchr(cp, '/')) != NULL; n++, cp++) { + while (*cp == '/') + cp++; /* skip consecutive slashes */ + } + return n; +} + +/* + * Convert number at NPTR into LINENUM and save address of first + * character that is not a digit in ENDPTR. If conversion is not + * possible, call fatal. + */ +LINENUM +strtolinenum(char *nptr, char **endptr) +{ + LINENUM rv; + char c; + char *p; + const char *errstr; + + for (p = nptr; isdigit((unsigned char)*p); p++) + ; + + if (p == nptr) + malformed(); + + c = *p; + *p = '\0'; + + rv = strtonum(nptr, 0, LINENUM_MAX, &errstr); + if (errstr != NULL) + fatal("invalid line number at line %ld: `%s' is %s\n", + p_input_line, nptr, errstr); + + *p = c; + *endptr = p; + + return rv; +} diff --git a/usr.bin/patch/pch.h b/usr.bin/patch/pch.h new file mode 100644 index 0000000..3367056 --- /dev/null +++ b/usr.bin/patch/pch.h @@ -0,0 +1,60 @@ +/* $OpenBSD: pch.h,v 1.13 2019/12/11 20:10:17 jca Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +#define OLD_FILE 0 +#define NEW_FILE 1 +#define INDEX_FILE 2 +#define MAX_FILE 3 + +struct file_name { + char *path; + bool exists; +}; + +extern FILE *pfp; +extern LINENUM p_input_line; + +void re_patch(void); +void open_patch_file(const char *); +void set_hunkmax(void); +bool there_is_another_patch(void); +void next_intuit_at(off_t, LINENUM); +bool another_hunk(void); +int pgetline(char **, size_t *, FILE *); +bool pch_swap(void); +LINENUM pch_first(void); +LINENUM pch_ptrn_lines(void); +LINENUM pch_newfirst(void); +LINENUM pch_repl_lines(void); +LINENUM pch_end(void); +LINENUM pch_context(void); +short pch_line_len(LINENUM); +char pch_char(LINENUM); +char *pfetch(LINENUM); +LINENUM pch_hunk_beg(void); +LINENUM strtolinenum(char *, char **); diff --git a/usr.bin/patch/util.c b/usr.bin/patch/util.c new file mode 100644 index 0000000..f079b51 --- /dev/null +++ b/usr.bin/patch/util.c @@ -0,0 +1,425 @@ +/* $OpenBSD: util.c,v 1.45 2019/12/02 22:17:32 jca Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <paths.h> +#include <signal.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "common.h" +#include "util.h" +#include "backupfile.h" + +/* Rename a file, copying it if necessary. */ + +int +move_file(const char *from, const char *to) +{ + int fromfd; + ssize_t i; + + /* to stdout? */ + + if (strEQ(to, "-")) { +#ifdef DEBUGGING + if (debug & 4) + say("Moving %s to stdout.\n", from); +#endif + fromfd = open(from, O_RDONLY); + if (fromfd == -1) + pfatal("internal error, can't reopen %s", from); + while ((i = read(fromfd, buf, bufsz)) > 0) + if (write(STDOUT_FILENO, buf, i) != i) + pfatal("write failed"); + close(fromfd); + return 0; + } + if (backup_file(to) < 0) { + say("Can't backup %s, output is in %s: %s\n", to, from, + strerror(errno)); + return -1; + } +#ifdef DEBUGGING + if (debug & 4) + say("Moving %s to %s.\n", from, to); +#endif + if (rename(from, to) == -1) { + if (errno != EXDEV || copy_file(from, to) < 0) { + say("Can't create %s, output is in %s: %s\n", + to, from, strerror(errno)); + return -1; + } + } + return 0; +} + +/* Backup the original file. */ + +int +backup_file(const char *orig) +{ + struct stat filestat; + char bakname[PATH_MAX], *s, *simplename; + dev_t orig_device; + ino_t orig_inode; + + if (backup_type == none || stat(orig, &filestat) != 0) + return 0; /* nothing to do */ + orig_device = filestat.st_dev; + orig_inode = filestat.st_ino; + + if (origprae) { + if (strlcpy(bakname, origprae, sizeof(bakname)) >= sizeof(bakname) || + strlcat(bakname, orig, sizeof(bakname)) >= sizeof(bakname)) + fatal("filename %s too long for buffer\n", origprae); + } else { + if ((s = find_backup_file_name(orig)) == NULL) + fatal("out of memory\n"); + if (strlcpy(bakname, s, sizeof(bakname)) >= sizeof(bakname)) + fatal("filename %s too long for buffer\n", s); + free(s); + } + + if ((simplename = strrchr(bakname, '/')) != NULL) + simplename = simplename + 1; + else + simplename = bakname; + + /* + * Find a backup name that is not the same file. Change the + * first lowercase char into uppercase; if that isn't + * sufficient, chop off the first char and try again. + */ + while (stat(bakname, &filestat) == 0 && + orig_device == filestat.st_dev && orig_inode == filestat.st_ino) { + /* Skip initial non-lowercase chars. */ + for (s = simplename; *s && !islower((unsigned char)*s); s++) + ; + if (*s) + *s = toupper((unsigned char)*s); + else + memmove(simplename, simplename + 1, + strlen(simplename + 1) + 1); + } +#ifdef DEBUGGING + if (debug & 4) + say("Moving %s to %s.\n", orig, bakname); +#endif + if (rename(orig, bakname) == -1) { + if (errno != EXDEV || copy_file(orig, bakname) < 0) + return -1; + } + return 0; +} + +/* + * Copy a file. + */ +int +copy_file(const char *from, const char *to) +{ + int tofd, fromfd; + ssize_t i; + + tofd = open(to, O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (tofd == -1) + return -1; + fromfd = open(from, O_RDONLY, 0); + if (fromfd == -1) + pfatal("internal error, can't reopen %s", from); + while ((i = read(fromfd, buf, bufsz)) > 0) + if (write(tofd, buf, i) != i) + pfatal("write to %s failed", to); + close(fromfd); + close(tofd); + return 0; +} + +/* + * Allocate a unique area for a string. + */ +char * +savestr(const char *s) +{ + char *rv; + + if (!s) + s = "Oops"; + rv = strdup(s); + if (rv == NULL) { + if (using_plan_a) + out_of_mem = true; + else + fatal("out of memory\n"); + } + return rv; +} + +/* + * Allocate a unique area for a string. Call fatal if out of memory. + */ +char * +xstrdup(const char *s) +{ + char *rv; + + if (!s) + s = "Oops"; + rv = strdup(s); + if (rv == NULL) + fatal("out of memory\n"); + return rv; +} + +/* + * Vanilla terminal output (buffered). + */ +void +say(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + fflush(stdout); +} + +/* + * Terminal output, pun intended. + */ +void +fatal(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "patch: **** "); + vfprintf(stderr, fmt, ap); + va_end(ap); + my_exit(2); +} + +/* + * Say something from patch, something from the system, then silence . . . + */ +void +pfatal(const char *fmt, ...) +{ + va_list ap; + int errnum = errno; + + fprintf(stderr, "patch: **** "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, ": %s\n", strerror(errnum)); + my_exit(2); +} + +/* + * Get a response from the user via /dev/tty + */ +void +ask(const char *fmt, ...) +{ + va_list ap; + ssize_t nr; + static int ttyfd = -1; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + fflush(stdout); + if (ttyfd < 0) + ttyfd = open(_PATH_TTY, O_RDONLY); + if (ttyfd >= 0) { + if ((nr = read(ttyfd, buf, bufsz)) > 0 && + buf[nr - 1] == '\n') + buf[nr - 1] = '\0'; + } + if (ttyfd == -1 || nr <= 0) { + /* no tty or error reading, pretend user entered 'return' */ + putchar('\n'); + buf[0] = '\0'; + } +} + +/* + * How to handle certain events when not in a critical region. + */ +void +set_signals(int reset) +{ + static sig_t hupval, intval; + + if (!reset) { + hupval = signal(SIGHUP, SIG_IGN); + if (hupval != SIG_IGN) + hupval = my_sigexit; + intval = signal(SIGINT, SIG_IGN); + if (intval != SIG_IGN) + intval = my_sigexit; + } + signal(SIGHUP, hupval); + signal(SIGINT, intval); +} + +/* + * How to handle certain events when in a critical region. + */ +void +ignore_signals(void) +{ + signal(SIGHUP, SIG_IGN); + signal(SIGINT, SIG_IGN); +} + +/* + * Make sure we'll have the directories to create a file. If `striplast' is + * true, ignore the last element of `filename'. + */ + +void +makedirs(const char *filename, bool striplast) +{ + char *tmpbuf; + + if ((tmpbuf = strdup(filename)) == NULL) + fatal("out of memory\n"); + + if (striplast) { + char *s = strrchr(tmpbuf, '/'); + if (s == NULL) { + free(tmpbuf); + return; /* nothing to be done */ + } + *s = '\0'; + } + if (mkpath(tmpbuf) != 0) + pfatal("creation of %s failed", tmpbuf); + free(tmpbuf); +} + +/* + * Make filenames more reasonable. + */ +char * +fetchname(const char *at, bool *exists, int strip_leading) +{ + char *fullname, *name, *t; + int sleading, tab; + struct stat filestat; + + if (at == NULL || *at == '\0') + return NULL; + while (isspace((unsigned char)*at)) + at++; +#ifdef DEBUGGING + if (debug & 128) + say("fetchname %s %d\n", at, strip_leading); +#endif + /* So files can be created by diffing against /dev/null. */ + if (strnEQ(at, _PATH_DEVNULL, sizeof(_PATH_DEVNULL) - 1)) + return NULL; + name = fullname = t = savestr(at); + + tab = strchr(t, '\t') != NULL; + /* Strip off up to `strip_leading' path components and NUL terminate. */ + for (sleading = strip_leading; *t != '\0' && ((tab && *t != '\t') || + !isspace((unsigned char)*t)); t++) { + if (t[0] == '/' && t[1] != '/' && t[1] != '\0') + if (--sleading >= 0) + name = t + 1; + } + *t = '\0'; + + /* + * If no -p option was given (957 is the default value!), we were + * given a relative pathname, and the leading directories that we + * just stripped off all exist, put them back on. + */ + if (strip_leading == 957 && name != fullname && *fullname != '/') { + name[-1] = '\0'; + if (stat(fullname, &filestat) == 0 && S_ISDIR(filestat.st_mode)) { + name[-1] = '/'; + name = fullname; + } + } + name = savestr(name); + free(fullname); + + *exists = stat(name, &filestat) == 0; + return name; +} + +void +version(void) +{ + fprintf(stderr, "Patch version 2.0-12u8-OpenBSD\n"); + my_exit(EXIT_SUCCESS); +} + +void +my_cleanup(void) +{ + unlink(TMPINNAME); + if (!toutkeep) + unlink(TMPOUTNAME); + if (!trejkeep) + unlink(TMPREJNAME); + unlink(TMPPATNAME); +} + +/* + * Exit with cleanup. + */ +void +my_exit(int status) +{ + my_cleanup(); + exit(status); +} + +/* + * Exit with cleanup, from a signal handler. + */ +void +my_sigexit(int signo) +{ + my_cleanup(); + _exit(2); +} diff --git a/usr.bin/patch/util.h b/usr.bin/patch/util.h new file mode 100644 index 0000000..a21929b --- /dev/null +++ b/usr.bin/patch/util.h @@ -0,0 +1,51 @@ +/* $OpenBSD: util.h,v 1.18 2019/08/17 14:25:06 deraadt Exp $ */ + +/* + * patch - a program to apply diffs to original files + * + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + */ + +char *fetchname(const char *, bool *, int); +int backup_file(const char *); +int move_file(const char *, const char *); +int copy_file(const char *, const char *); +void say(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +void fatal(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +void pfatal(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +void ask(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +char *savestr(const char *); +char *xstrdup(const char *); +void set_signals(int); +void ignore_signals(void); +void makedirs(const char *, bool); +void version(void); +void my_exit(int) __attribute__((noreturn)); +void my_sigexit(int) __attribute__((noreturn)); + +/* in mkpath.c */ +extern int mkpath(char *); diff --git a/usr.bin/signify/CVS/Entries b/usr.bin/signify/CVS/Entries new file mode 100644 index 0000000..7feef06 --- /dev/null +++ b/usr.bin/signify/CVS/Entries @@ -0,0 +1,16 @@ +/Makefile/1.12/Sun May 28 21:59:56 2017// +/crypto_api.c/1.1/Wed Jan 8 03:59:46 2014// +/crypto_api.h/1.2/Tue Jan 16 21:42:40 2018// +/fe25519.c/1.1/Tue Jul 22 00:41:19 2014// +/fe25519.h/1.1/Tue Jul 22 00:41:19 2014// +/ge25519.h/1.2/Mon Feb 16 18:26:26 2015// +/ge25519_base.data/1.1/Tue Jul 22 00:41:19 2014// +/mod_ed25519.c/1.1/Wed Jan 8 05:00:01 2014// +/mod_ge25519.c/1.2/Wed Jan 8 05:51:35 2014// +/sc25519.c/1.1/Tue Jul 22 00:41:19 2014// +/sc25519.h/1.1/Tue Jul 22 00:41:19 2014// +/signify.1/1.50/Sun Apr 5 06:34:20 2020// +/signify.c/1.135/Tue Jan 21 12:13:21 2020// +/signify.h/1.2/Sat Mar 23 07:10:06 2019// +/zsig.c/1.18/Sun Dec 22 06:37:25 2019// +D diff --git a/usr.bin/signify/CVS/Repository b/usr.bin/signify/CVS/Repository new file mode 100644 index 0000000..17ac2fe --- /dev/null +++ b/usr.bin/signify/CVS/Repository @@ -0,0 +1 @@ +src/usr.bin/signify diff --git a/usr.bin/signify/CVS/Root b/usr.bin/signify/CVS/Root new file mode 100644 index 0000000..3811072 --- /dev/null +++ b/usr.bin/signify/CVS/Root @@ -0,0 +1 @@ +/cvs diff --git a/usr.bin/signify/Makefile b/usr.bin/signify/Makefile new file mode 100644 index 0000000..de94571 --- /dev/null +++ b/usr.bin/signify/Makefile @@ -0,0 +1,16 @@ +# $OpenBSD: Makefile,v 1.12 2017/05/28 21:59:56 tedu Exp $ + +SRCS= signify.c +SRCS+= zsig.c +SRCS+= fe25519.c sc25519.c +SRCS+= mod_ed25519.c mod_ge25519.c +SRCS+= crypto_api.c + +PROG= signify + +LDADD= -lutil +DPADD= ${LIBUTIL} + +COPTS+= -Wall + +.include <bsd.prog.mk> diff --git a/usr.bin/signify/crypto_api.c b/usr.bin/signify/crypto_api.c new file mode 100644 index 0000000..d9d8791 --- /dev/null +++ b/usr.bin/signify/crypto_api.c @@ -0,0 +1,30 @@ +/* $OpenBSD: crypto_api.c,v 1.1 2014/01/08 03:59:46 tedu Exp $ */ +/* + * Public domain. Author: Ted Unangst <tedu@openbsd.org> + * API compatible reimplementation of functions from nacl + */ +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <string.h> +#include <sha2.h> + +#include "crypto_api.h" + +int +crypto_hash_sha512(unsigned char *out, const unsigned char *in, + unsigned long long inlen) +{ + SHA2_CTX ctx; + + SHA512Init(&ctx); + SHA512Update(&ctx, in, inlen); + SHA512Final(out, &ctx); + return 0; +} + +int +crypto_verify_32(const unsigned char *x, const unsigned char *y) +{ + return timingsafe_bcmp(x, y, 32) ? -1 : 0; +} diff --git a/usr.bin/signify/crypto_api.h b/usr.bin/signify/crypto_api.h new file mode 100644 index 0000000..540a7c2 --- /dev/null +++ b/usr.bin/signify/crypto_api.h @@ -0,0 +1,39 @@ +/* $OpenBSD: crypto_api.h,v 1.2 2018/01/16 21:42:40 naddy Exp $ */ + +/* + * Assembled from generated headers and source files by Markus Friedl. + * Placed in the public domain. + */ + +#ifndef crypto_api_h +#define crypto_api_h + +#include <stdint.h> +#include <stdlib.h> + +typedef int32_t crypto_int32; +typedef uint32_t crypto_uint32; + +#define randombytes(buf, buf_len) arc4random_buf((buf), (buf_len)) + +int crypto_hashblocks_sha512(unsigned char *, const unsigned char *, + unsigned long long); + +#define crypto_hash_sha512_BYTES 64U + +int crypto_hash_sha512(unsigned char *, const unsigned char *, + unsigned long long); + +int crypto_verify_32(const unsigned char *, const unsigned char *); + +#define crypto_sign_ed25519_SECRETKEYBYTES 64U +#define crypto_sign_ed25519_PUBLICKEYBYTES 32U +#define crypto_sign_ed25519_BYTES 64U + +int crypto_sign_ed25519(unsigned char *, unsigned long long *, + const unsigned char *, unsigned long long, const unsigned char *); +int crypto_sign_ed25519_open(unsigned char *, unsigned long long *, + const unsigned char *, unsigned long long, const unsigned char *); +int crypto_sign_ed25519_keypair(unsigned char *, unsigned char *); + +#endif /* crypto_api_h */ diff --git a/usr.bin/signify/fe25519.c b/usr.bin/signify/fe25519.c new file mode 100644 index 0000000..5a6b77d --- /dev/null +++ b/usr.bin/signify/fe25519.c @@ -0,0 +1,335 @@ +/* $OpenBSD: fe25519.c,v 1.1 2014/07/22 00:41:19 deraadt Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/fe25519.c + */ + +#define WINDOWSIZE 1 /* Should be 1,2, or 4 */ +#define WINDOWMASK ((1<<WINDOWSIZE)-1) + +#include "fe25519.h" + +static crypto_uint32 equal(crypto_uint32 a,crypto_uint32 b) /* 16-bit inputs */ +{ + crypto_uint32 x = a ^ b; /* 0: yes; 1..65535: no */ + x -= 1; /* 4294967295: yes; 0..65534: no */ + x >>= 31; /* 1: yes; 0: no */ + return x; +} + +static crypto_uint32 ge(crypto_uint32 a,crypto_uint32 b) /* 16-bit inputs */ +{ + unsigned int x = a; + x -= (unsigned int) b; /* 0..65535: yes; 4294901761..4294967295: no */ + x >>= 31; /* 0: yes; 1: no */ + x ^= 1; /* 1: yes; 0: no */ + return x; +} + +static crypto_uint32 times19(crypto_uint32 a) +{ + return (a << 4) + (a << 1) + a; +} + +static crypto_uint32 times38(crypto_uint32 a) +{ + return (a << 5) + (a << 2) + (a << 1); +} + +static void reduce_add_sub(fe25519 *r) +{ + crypto_uint32 t; + int i,rep; + + for(rep=0;rep<4;rep++) + { + t = r->v[31] >> 7; + r->v[31] &= 127; + t = times19(t); + r->v[0] += t; + for(i=0;i<31;i++) + { + t = r->v[i] >> 8; + r->v[i+1] += t; + r->v[i] &= 255; + } + } +} + +static void reduce_mul(fe25519 *r) +{ + crypto_uint32 t; + int i,rep; + + for(rep=0;rep<2;rep++) + { + t = r->v[31] >> 7; + r->v[31] &= 127; + t = times19(t); + r->v[0] += t; + for(i=0;i<31;i++) + { + t = r->v[i] >> 8; + r->v[i+1] += t; + r->v[i] &= 255; + } + } +} + +/* reduction modulo 2^255-19 */ +void fe25519_freeze(fe25519 *r) +{ + int i; + crypto_uint32 m = equal(r->v[31],127); + for(i=30;i>0;i--) + m &= equal(r->v[i],255); + m &= ge(r->v[0],237); + + m = -m; + + r->v[31] -= m&127; + for(i=30;i>0;i--) + r->v[i] -= m&255; + r->v[0] -= m&237; +} + +void fe25519_unpack(fe25519 *r, const unsigned char x[32]) +{ + int i; + for(i=0;i<32;i++) r->v[i] = x[i]; + r->v[31] &= 127; +} + +/* Assumes input x being reduced below 2^255 */ +void fe25519_pack(unsigned char r[32], const fe25519 *x) +{ + int i; + fe25519 y = *x; + fe25519_freeze(&y); + for(i=0;i<32;i++) + r[i] = y.v[i]; +} + +int fe25519_iszero(const fe25519 *x) +{ + int i; + int r; + fe25519 t = *x; + fe25519_freeze(&t); + r = equal(t.v[0],0); + for(i=1;i<32;i++) + r &= equal(t.v[i],0); + return r; +} + +int fe25519_iseq_vartime(const fe25519 *x, const fe25519 *y) +{ + int i; + fe25519 t1 = *x; + fe25519 t2 = *y; + fe25519_freeze(&t1); + fe25519_freeze(&t2); + for(i=0;i<32;i++) + if(t1.v[i] != t2.v[i]) return 0; + return 1; +} + +void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b) +{ + int i; + crypto_uint32 mask = b; + mask = -mask; + for(i=0;i<32;i++) r->v[i] ^= mask & (x->v[i] ^ r->v[i]); +} + +unsigned char fe25519_getparity(const fe25519 *x) +{ + fe25519 t = *x; + fe25519_freeze(&t); + return t.v[0] & 1; +} + +void fe25519_setone(fe25519 *r) +{ + int i; + r->v[0] = 1; + for(i=1;i<32;i++) r->v[i]=0; +} + +void fe25519_setzero(fe25519 *r) +{ + int i; + for(i=0;i<32;i++) r->v[i]=0; +} + +void fe25519_neg(fe25519 *r, const fe25519 *x) +{ + fe25519 t; + int i; + for(i=0;i<32;i++) t.v[i]=x->v[i]; + fe25519_setzero(r); + fe25519_sub(r, r, &t); +} + +void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y) +{ + int i; + for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i]; + reduce_add_sub(r); +} + +void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y) +{ + int i; + crypto_uint32 t[32]; + t[0] = x->v[0] + 0x1da; + t[31] = x->v[31] + 0xfe; + for(i=1;i<31;i++) t[i] = x->v[i] + 0x1fe; + for(i=0;i<32;i++) r->v[i] = t[i] - y->v[i]; + reduce_add_sub(r); +} + +void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y) +{ + int i,j; + crypto_uint32 t[63]; + for(i=0;i<63;i++)t[i] = 0; + + for(i=0;i<32;i++) + for(j=0;j<32;j++) + t[i+j] += x->v[i] * y->v[j]; + + for(i=32;i<63;i++) + r->v[i-32] = t[i-32] + times38(t[i]); + r->v[31] = t[31]; /* result now in r[0]...r[31] */ + + reduce_mul(r); +} + +void fe25519_square(fe25519 *r, const fe25519 *x) +{ + fe25519_mul(r, x, x); +} + +void fe25519_invert(fe25519 *r, const fe25519 *x) +{ + fe25519 z2; + fe25519 z9; + fe25519 z11; + fe25519 z2_5_0; + fe25519 z2_10_0; + fe25519 z2_20_0; + fe25519 z2_50_0; + fe25519 z2_100_0; + fe25519 t0; + fe25519 t1; + int i; + + /* 2 */ fe25519_square(&z2,x); + /* 4 */ fe25519_square(&t1,&z2); + /* 8 */ fe25519_square(&t0,&t1); + /* 9 */ fe25519_mul(&z9,&t0,x); + /* 11 */ fe25519_mul(&z11,&z9,&z2); + /* 22 */ fe25519_square(&t0,&z11); + /* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t0,&z9); + + /* 2^6 - 2^1 */ fe25519_square(&t0,&z2_5_0); + /* 2^7 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^8 - 2^3 */ fe25519_square(&t0,&t1); + /* 2^9 - 2^4 */ fe25519_square(&t1,&t0); + /* 2^10 - 2^5 */ fe25519_square(&t0,&t1); + /* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t0,&z2_5_0); + + /* 2^11 - 2^1 */ fe25519_square(&t0,&z2_10_0); + /* 2^12 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t1,&z2_10_0); + + /* 2^21 - 2^1 */ fe25519_square(&t0,&z2_20_0); + /* 2^22 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^40 - 2^0 */ fe25519_mul(&t0,&t1,&z2_20_0); + + /* 2^41 - 2^1 */ fe25519_square(&t1,&t0); + /* 2^42 - 2^2 */ fe25519_square(&t0,&t1); + /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); } + /* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t0,&z2_10_0); + + /* 2^51 - 2^1 */ fe25519_square(&t0,&z2_50_0); + /* 2^52 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t1,&z2_50_0); + + /* 2^101 - 2^1 */ fe25519_square(&t1,&z2_100_0); + /* 2^102 - 2^2 */ fe25519_square(&t0,&t1); + /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); } + /* 2^200 - 2^0 */ fe25519_mul(&t1,&t0,&z2_100_0); + + /* 2^201 - 2^1 */ fe25519_square(&t0,&t1); + /* 2^202 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^250 - 2^0 */ fe25519_mul(&t0,&t1,&z2_50_0); + + /* 2^251 - 2^1 */ fe25519_square(&t1,&t0); + /* 2^252 - 2^2 */ fe25519_square(&t0,&t1); + /* 2^253 - 2^3 */ fe25519_square(&t1,&t0); + /* 2^254 - 2^4 */ fe25519_square(&t0,&t1); + /* 2^255 - 2^5 */ fe25519_square(&t1,&t0); + /* 2^255 - 21 */ fe25519_mul(r,&t1,&z11); +} + +void fe25519_pow2523(fe25519 *r, const fe25519 *x) +{ + fe25519 z2; + fe25519 z9; + fe25519 z11; + fe25519 z2_5_0; + fe25519 z2_10_0; + fe25519 z2_20_0; + fe25519 z2_50_0; + fe25519 z2_100_0; + fe25519 t; + int i; + + /* 2 */ fe25519_square(&z2,x); + /* 4 */ fe25519_square(&t,&z2); + /* 8 */ fe25519_square(&t,&t); + /* 9 */ fe25519_mul(&z9,&t,x); + /* 11 */ fe25519_mul(&z11,&z9,&z2); + /* 22 */ fe25519_square(&t,&z11); + /* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t,&z9); + + /* 2^6 - 2^1 */ fe25519_square(&t,&z2_5_0); + /* 2^10 - 2^5 */ for (i = 1;i < 5;i++) { fe25519_square(&t,&t); } + /* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t,&z2_5_0); + + /* 2^11 - 2^1 */ fe25519_square(&t,&z2_10_0); + /* 2^20 - 2^10 */ for (i = 1;i < 10;i++) { fe25519_square(&t,&t); } + /* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t,&z2_10_0); + + /* 2^21 - 2^1 */ fe25519_square(&t,&z2_20_0); + /* 2^40 - 2^20 */ for (i = 1;i < 20;i++) { fe25519_square(&t,&t); } + /* 2^40 - 2^0 */ fe25519_mul(&t,&t,&z2_20_0); + + /* 2^41 - 2^1 */ fe25519_square(&t,&t); + /* 2^50 - 2^10 */ for (i = 1;i < 10;i++) { fe25519_square(&t,&t); } + /* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t,&z2_10_0); + + /* 2^51 - 2^1 */ fe25519_square(&t,&z2_50_0); + /* 2^100 - 2^50 */ for (i = 1;i < 50;i++) { fe25519_square(&t,&t); } + /* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t,&z2_50_0); + + /* 2^101 - 2^1 */ fe25519_square(&t,&z2_100_0); + /* 2^200 - 2^100 */ for (i = 1;i < 100;i++) { fe25519_square(&t,&t); } + /* 2^200 - 2^0 */ fe25519_mul(&t,&t,&z2_100_0); + + /* 2^201 - 2^1 */ fe25519_square(&t,&t); + /* 2^250 - 2^50 */ for (i = 1;i < 50;i++) { fe25519_square(&t,&t); } + /* 2^250 - 2^0 */ fe25519_mul(&t,&t,&z2_50_0); + + /* 2^251 - 2^1 */ fe25519_square(&t,&t); + /* 2^252 - 2^2 */ fe25519_square(&t,&t); + /* 2^252 - 3 */ fe25519_mul(r,&t,x); +} diff --git a/usr.bin/signify/fe25519.h b/usr.bin/signify/fe25519.h new file mode 100644 index 0000000..b2852aa --- /dev/null +++ b/usr.bin/signify/fe25519.h @@ -0,0 +1,70 @@ +/* $OpenBSD: fe25519.h,v 1.1 2014/07/22 00:41:19 deraadt Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/fe25519.h + */ + +#ifndef FE25519_H +#define FE25519_H + +#include "crypto_api.h" + +#define fe25519 crypto_sign_ed25519_ref_fe25519 +#define fe25519_freeze crypto_sign_ed25519_ref_fe25519_freeze +#define fe25519_unpack crypto_sign_ed25519_ref_fe25519_unpack +#define fe25519_pack crypto_sign_ed25519_ref_fe25519_pack +#define fe25519_iszero crypto_sign_ed25519_ref_fe25519_iszero +#define fe25519_iseq_vartime crypto_sign_ed25519_ref_fe25519_iseq_vartime +#define fe25519_cmov crypto_sign_ed25519_ref_fe25519_cmov +#define fe25519_setone crypto_sign_ed25519_ref_fe25519_setone +#define fe25519_setzero crypto_sign_ed25519_ref_fe25519_setzero +#define fe25519_neg crypto_sign_ed25519_ref_fe25519_neg +#define fe25519_getparity crypto_sign_ed25519_ref_fe25519_getparity +#define fe25519_add crypto_sign_ed25519_ref_fe25519_add +#define fe25519_sub crypto_sign_ed25519_ref_fe25519_sub +#define fe25519_mul crypto_sign_ed25519_ref_fe25519_mul +#define fe25519_square crypto_sign_ed25519_ref_fe25519_square +#define fe25519_invert crypto_sign_ed25519_ref_fe25519_invert +#define fe25519_pow2523 crypto_sign_ed25519_ref_fe25519_pow2523 + +typedef struct +{ + crypto_uint32 v[32]; +} +fe25519; + +void fe25519_freeze(fe25519 *r); + +void fe25519_unpack(fe25519 *r, const unsigned char x[32]); + +void fe25519_pack(unsigned char r[32], const fe25519 *x); + +int fe25519_iszero(const fe25519 *x); + +int fe25519_iseq_vartime(const fe25519 *x, const fe25519 *y); + +void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b); + +void fe25519_setone(fe25519 *r); + +void fe25519_setzero(fe25519 *r); + +void fe25519_neg(fe25519 *r, const fe25519 *x); + +unsigned char fe25519_getparity(const fe25519 *x); + +void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y); + +void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y); + +void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y); + +void fe25519_square(fe25519 *r, const fe25519 *x); + +void fe25519_invert(fe25519 *r, const fe25519 *x); + +void fe25519_pow2523(fe25519 *r, const fe25519 *x); + +#endif diff --git a/usr.bin/signify/ge25519.h b/usr.bin/signify/ge25519.h new file mode 100644 index 0000000..165844e --- /dev/null +++ b/usr.bin/signify/ge25519.h @@ -0,0 +1,43 @@ +/* $OpenBSD: ge25519.h,v 1.2 2015/02/16 18:26:26 miod Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/ge25519.h + */ + +#ifndef GE25519_H +#define GE25519_H + +#include "fe25519.h" +#include "sc25519.h" + +#define ge25519 crypto_sign_ed25519_ref_ge25519 +#define ge25519_base crypto_sign_ed25519_ref_ge25519_base +#define ge25519_unpackneg_vartime crypto_sign_ed25519_ref_unpackneg_vartime +#define ge25519_pack crypto_sign_ed25519_ref_pack +#define ge25519_isneutral_vartime crypto_sign_ed25519_ref_isneutral_vartime +#define ge25519_double_scalarmult_vartime crypto_sign_ed25519_ref_double_scalarmult_vartime +#define ge25519_scalarmult_base crypto_sign_ed25519_ref_scalarmult_base + +typedef struct +{ + fe25519 x; + fe25519 y; + fe25519 z; + fe25519 t; +} ge25519; + +extern const ge25519 ge25519_base; + +int ge25519_unpackneg_vartime(ge25519 *r, const unsigned char p[32]); + +void ge25519_pack(unsigned char r[32], const ge25519 *p); + +int ge25519_isneutral_vartime(const ge25519 *p); + +void ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const sc25519 *s1, const ge25519 *p2, const sc25519 *s2); + +void ge25519_scalarmult_base(ge25519 *r, const sc25519 *s); + +#endif diff --git a/usr.bin/signify/ge25519_base.data b/usr.bin/signify/ge25519_base.data new file mode 100644 index 0000000..8f6a62b --- /dev/null +++ b/usr.bin/signify/ge25519_base.data @@ -0,0 +1,858 @@ +/* $OpenBSD: ge25519_base.data,v 1.1 2014/07/22 00:41:19 deraadt Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/ge25519_base.data + */ + +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69, 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21}} , + {{0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66}}}, +{{{0x0e, 0xce, 0x43, 0x28, 0x4e, 0xa1, 0xc5, 0x83, 0x5f, 0xa4, 0xd7, 0x15, 0x45, 0x8e, 0x0d, 0x08, 0xac, 0xe7, 0x33, 0x18, 0x7d, 0x3b, 0x04, 0x3d, 0x6c, 0x04, 0x5a, 0x9f, 0x4c, 0x38, 0xab, 0x36}} , + {{0xc9, 0xa3, 0xf8, 0x6a, 0xae, 0x46, 0x5f, 0x0e, 0x56, 0x51, 0x38, 0x64, 0x51, 0x0f, 0x39, 0x97, 0x56, 0x1f, 0xa2, 0xc9, 0xe8, 0x5e, 0xa2, 0x1d, 0xc2, 0x29, 0x23, 0x09, 0xf3, 0xcd, 0x60, 0x22}}}, +{{{0x5c, 0xe2, 0xf8, 0xd3, 0x5f, 0x48, 0x62, 0xac, 0x86, 0x48, 0x62, 0x81, 0x19, 0x98, 0x43, 0x63, 0x3a, 0xc8, 0xda, 0x3e, 0x74, 0xae, 0xf4, 0x1f, 0x49, 0x8f, 0x92, 0x22, 0x4a, 0x9c, 0xae, 0x67}} , + {{0xd4, 0xb4, 0xf5, 0x78, 0x48, 0x68, 0xc3, 0x02, 0x04, 0x03, 0x24, 0x67, 0x17, 0xec, 0x16, 0x9f, 0xf7, 0x9e, 0x26, 0x60, 0x8e, 0xa1, 0x26, 0xa1, 0xab, 0x69, 0xee, 0x77, 0xd1, 0xb1, 0x67, 0x12}}}, +{{{0x70, 0xf8, 0xc9, 0xc4, 0x57, 0xa6, 0x3a, 0x49, 0x47, 0x15, 0xce, 0x93, 0xc1, 0x9e, 0x73, 0x1a, 0xf9, 0x20, 0x35, 0x7a, 0xb8, 0xd4, 0x25, 0x83, 0x46, 0xf1, 0xcf, 0x56, 0xdb, 0xa8, 0x3d, 0x20}} , + {{0x2f, 0x11, 0x32, 0xca, 0x61, 0xab, 0x38, 0xdf, 0xf0, 0x0f, 0x2f, 0xea, 0x32, 0x28, 0xf2, 0x4c, 0x6c, 0x71, 0xd5, 0x80, 0x85, 0xb8, 0x0e, 0x47, 0xe1, 0x95, 0x15, 0xcb, 0x27, 0xe8, 0xd0, 0x47}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xc8, 0x84, 0xa5, 0x08, 0xbc, 0xfd, 0x87, 0x3b, 0x99, 0x8b, 0x69, 0x80, 0x7b, 0xc6, 0x3a, 0xeb, 0x93, 0xcf, 0x4e, 0xf8, 0x5c, 0x2d, 0x86, 0x42, 0xb6, 0x71, 0xd7, 0x97, 0x5f, 0xe1, 0x42, 0x67}} , + {{0xb4, 0xb9, 0x37, 0xfc, 0xa9, 0x5b, 0x2f, 0x1e, 0x93, 0xe4, 0x1e, 0x62, 0xfc, 0x3c, 0x78, 0x81, 0x8f, 0xf3, 0x8a, 0x66, 0x09, 0x6f, 0xad, 0x6e, 0x79, 0x73, 0xe5, 0xc9, 0x00, 0x06, 0xd3, 0x21}}}, +{{{0xf8, 0xf9, 0x28, 0x6c, 0x6d, 0x59, 0xb2, 0x59, 0x74, 0x23, 0xbf, 0xe7, 0x33, 0x8d, 0x57, 0x09, 0x91, 0x9c, 0x24, 0x08, 0x15, 0x2b, 0xe2, 0xb8, 0xee, 0x3a, 0xe5, 0x27, 0x06, 0x86, 0xa4, 0x23}} , + {{0xeb, 0x27, 0x67, 0xc1, 0x37, 0xab, 0x7a, 0xd8, 0x27, 0x9c, 0x07, 0x8e, 0xff, 0x11, 0x6a, 0xb0, 0x78, 0x6e, 0xad, 0x3a, 0x2e, 0x0f, 0x98, 0x9f, 0x72, 0xc3, 0x7f, 0x82, 0xf2, 0x96, 0x96, 0x70}}}, +{{{0x81, 0x6b, 0x88, 0xe8, 0x1e, 0xc7, 0x77, 0x96, 0x0e, 0xa1, 0xa9, 0x52, 0xe0, 0xd8, 0x0e, 0x61, 0x9e, 0x79, 0x2d, 0x95, 0x9c, 0x8d, 0x96, 0xe0, 0x06, 0x40, 0x5d, 0x87, 0x28, 0x5f, 0x98, 0x70}} , + {{0xf1, 0x79, 0x7b, 0xed, 0x4f, 0x44, 0xb2, 0xe7, 0x08, 0x0d, 0xc2, 0x08, 0x12, 0xd2, 0x9f, 0xdf, 0xcd, 0x93, 0x20, 0x8a, 0xcf, 0x33, 0xca, 0x6d, 0x89, 0xb9, 0x77, 0xc8, 0x93, 0x1b, 0x4e, 0x60}}}, +{{{0x26, 0x4f, 0x7e, 0x97, 0xf6, 0x40, 0xdd, 0x4f, 0xfc, 0x52, 0x78, 0xf9, 0x90, 0x31, 0x03, 0xe6, 0x7d, 0x56, 0x39, 0x0b, 0x1d, 0x56, 0x82, 0x85, 0xf9, 0x1a, 0x42, 0x17, 0x69, 0x6c, 0xcf, 0x39}} , + {{0x69, 0xd2, 0x06, 0x3a, 0x4f, 0x39, 0x2d, 0xf9, 0x38, 0x40, 0x8c, 0x4c, 0xe7, 0x05, 0x12, 0xb4, 0x78, 0x8b, 0xf8, 0xc0, 0xec, 0x93, 0xde, 0x7a, 0x6b, 0xce, 0x2c, 0xe1, 0x0e, 0xa9, 0x34, 0x44}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x0b, 0xa4, 0x3c, 0xb0, 0x0f, 0x7a, 0x51, 0xf1, 0x78, 0xd6, 0xd9, 0x6a, 0xfd, 0x46, 0xe8, 0xb8, 0xa8, 0x79, 0x1d, 0x87, 0xf9, 0x90, 0xf2, 0x9c, 0x13, 0x29, 0xf8, 0x0b, 0x20, 0x64, 0xfa, 0x05}} , + {{0x26, 0x09, 0xda, 0x17, 0xaf, 0x95, 0xd6, 0xfb, 0x6a, 0x19, 0x0d, 0x6e, 0x5e, 0x12, 0xf1, 0x99, 0x4c, 0xaa, 0xa8, 0x6f, 0x79, 0x86, 0xf4, 0x72, 0x28, 0x00, 0x26, 0xf9, 0xea, 0x9e, 0x19, 0x3d}}}, +{{{0x87, 0xdd, 0xcf, 0xf0, 0x5b, 0x49, 0xa2, 0x5d, 0x40, 0x7a, 0x23, 0x26, 0xa4, 0x7a, 0x83, 0x8a, 0xb7, 0x8b, 0xd2, 0x1a, 0xbf, 0xea, 0x02, 0x24, 0x08, 0x5f, 0x7b, 0xa9, 0xb1, 0xbe, 0x9d, 0x37}} , + {{0xfc, 0x86, 0x4b, 0x08, 0xee, 0xe7, 0xa0, 0xfd, 0x21, 0x45, 0x09, 0x34, 0xc1, 0x61, 0x32, 0x23, 0xfc, 0x9b, 0x55, 0x48, 0x53, 0x99, 0xf7, 0x63, 0xd0, 0x99, 0xce, 0x01, 0xe0, 0x9f, 0xeb, 0x28}}}, +{{{0x47, 0xfc, 0xab, 0x5a, 0x17, 0xf0, 0x85, 0x56, 0x3a, 0x30, 0x86, 0x20, 0x28, 0x4b, 0x8e, 0x44, 0x74, 0x3a, 0x6e, 0x02, 0xf1, 0x32, 0x8f, 0x9f, 0x3f, 0x08, 0x35, 0xe9, 0xca, 0x16, 0x5f, 0x6e}} , + {{0x1c, 0x59, 0x1c, 0x65, 0x5d, 0x34, 0xa4, 0x09, 0xcd, 0x13, 0x9c, 0x70, 0x7d, 0xb1, 0x2a, 0xc5, 0x88, 0xaf, 0x0b, 0x60, 0xc7, 0x9f, 0x34, 0x8d, 0xd6, 0xb7, 0x7f, 0xea, 0x78, 0x65, 0x8d, 0x77}}}, +{{{0x56, 0xa5, 0xc2, 0x0c, 0xdd, 0xbc, 0xb8, 0x20, 0x6d, 0x57, 0x61, 0xb5, 0xfb, 0x78, 0xb5, 0xd4, 0x49, 0x54, 0x90, 0x26, 0xc1, 0xcb, 0xe9, 0xe6, 0xbf, 0xec, 0x1d, 0x4e, 0xed, 0x07, 0x7e, 0x5e}} , + {{0xc7, 0xf6, 0x6c, 0x56, 0x31, 0x20, 0x14, 0x0e, 0xa8, 0xd9, 0x27, 0xc1, 0x9a, 0x3d, 0x1b, 0x7d, 0x0e, 0x26, 0xd3, 0x81, 0xaa, 0xeb, 0xf5, 0x6b, 0x79, 0x02, 0xf1, 0x51, 0x5c, 0x75, 0x55, 0x0f}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x0a, 0x34, 0xcd, 0x82, 0x3c, 0x33, 0x09, 0x54, 0xd2, 0x61, 0x39, 0x30, 0x9b, 0xfd, 0xef, 0x21, 0x26, 0xd4, 0x70, 0xfa, 0xee, 0xf9, 0x31, 0x33, 0x73, 0x84, 0xd0, 0xb3, 0x81, 0xbf, 0xec, 0x2e}} , + {{0xe8, 0x93, 0x8b, 0x00, 0x64, 0xf7, 0x9c, 0xb8, 0x74, 0xe0, 0xe6, 0x49, 0x48, 0x4d, 0x4d, 0x48, 0xb6, 0x19, 0xa1, 0x40, 0xb7, 0xd9, 0x32, 0x41, 0x7c, 0x82, 0x37, 0xa1, 0x2d, 0xdc, 0xd2, 0x54}}}, +{{{0x68, 0x2b, 0x4a, 0x5b, 0xd5, 0xc7, 0x51, 0x91, 0x1d, 0xe1, 0x2a, 0x4b, 0xc4, 0x47, 0xf1, 0xbc, 0x7a, 0xb3, 0xcb, 0xc8, 0xb6, 0x7c, 0xac, 0x90, 0x05, 0xfd, 0xf3, 0xf9, 0x52, 0x3a, 0x11, 0x6b}} , + {{0x3d, 0xc1, 0x27, 0xf3, 0x59, 0x43, 0x95, 0x90, 0xc5, 0x96, 0x79, 0xf5, 0xf4, 0x95, 0x65, 0x29, 0x06, 0x9c, 0x51, 0x05, 0x18, 0xda, 0xb8, 0x2e, 0x79, 0x7e, 0x69, 0x59, 0x71, 0x01, 0xeb, 0x1a}}}, +{{{0x15, 0x06, 0x49, 0xb6, 0x8a, 0x3c, 0xea, 0x2f, 0x34, 0x20, 0x14, 0xc3, 0xaa, 0xd6, 0xaf, 0x2c, 0x3e, 0xbd, 0x65, 0x20, 0xe2, 0x4d, 0x4b, 0x3b, 0xeb, 0x9f, 0x4a, 0xc3, 0xad, 0xa4, 0x3b, 0x60}} , + {{0xbc, 0x58, 0xe6, 0xc0, 0x95, 0x2a, 0x2a, 0x81, 0x9a, 0x7a, 0xf3, 0xd2, 0x06, 0xbe, 0x48, 0xbc, 0x0c, 0xc5, 0x46, 0xe0, 0x6a, 0xd4, 0xac, 0x0f, 0xd9, 0xcc, 0x82, 0x34, 0x2c, 0xaf, 0xdb, 0x1f}}}, +{{{0xf7, 0x17, 0x13, 0xbd, 0xfb, 0xbc, 0xd2, 0xec, 0x45, 0xb3, 0x15, 0x31, 0xe9, 0xaf, 0x82, 0x84, 0x3d, 0x28, 0xc6, 0xfc, 0x11, 0xf5, 0x41, 0xb5, 0x8b, 0xd3, 0x12, 0x76, 0x52, 0xe7, 0x1a, 0x3c}} , + {{0x4e, 0x36, 0x11, 0x07, 0xa2, 0x15, 0x20, 0x51, 0xc4, 0x2a, 0xc3, 0x62, 0x8b, 0x5e, 0x7f, 0xa6, 0x0f, 0xf9, 0x45, 0x85, 0x6c, 0x11, 0x86, 0xb7, 0x7e, 0xe5, 0xd7, 0xf9, 0xc3, 0x91, 0x1c, 0x05}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xea, 0xd6, 0xde, 0x29, 0x3a, 0x00, 0xb9, 0x02, 0x59, 0xcb, 0x26, 0xc4, 0xba, 0x99, 0xb1, 0x97, 0x2f, 0x8e, 0x00, 0x92, 0x26, 0x4f, 0x52, 0xeb, 0x47, 0x1b, 0x89, 0x8b, 0x24, 0xc0, 0x13, 0x7d}} , + {{0xd5, 0x20, 0x5b, 0x80, 0xa6, 0x80, 0x20, 0x95, 0xc3, 0xe9, 0x9f, 0x8e, 0x87, 0x9e, 0x1e, 0x9e, 0x7a, 0xc7, 0xcc, 0x75, 0x6c, 0xa5, 0xf1, 0x91, 0x1a, 0xa8, 0x01, 0x2c, 0xab, 0x76, 0xa9, 0x59}}}, +{{{0xde, 0xc9, 0xb1, 0x31, 0x10, 0x16, 0xaa, 0x35, 0x14, 0x6a, 0xd4, 0xb5, 0x34, 0x82, 0x71, 0xd2, 0x4a, 0x5d, 0x9a, 0x1f, 0x53, 0x26, 0x3c, 0xe5, 0x8e, 0x8d, 0x33, 0x7f, 0xff, 0xa9, 0xd5, 0x17}} , + {{0x89, 0xaf, 0xf6, 0xa4, 0x64, 0xd5, 0x10, 0xe0, 0x1d, 0xad, 0xef, 0x44, 0xbd, 0xda, 0x83, 0xac, 0x7a, 0xa8, 0xf0, 0x1c, 0x07, 0xf9, 0xc3, 0x43, 0x6c, 0x3f, 0xb7, 0xd3, 0x87, 0x22, 0x02, 0x73}}}, +{{{0x64, 0x1d, 0x49, 0x13, 0x2f, 0x71, 0xec, 0x69, 0x87, 0xd0, 0x42, 0xee, 0x13, 0xec, 0xe3, 0xed, 0x56, 0x7b, 0xbf, 0xbd, 0x8c, 0x2f, 0x7d, 0x7b, 0x9d, 0x28, 0xec, 0x8e, 0x76, 0x2f, 0x6f, 0x08}} , + {{0x22, 0xf5, 0x5f, 0x4d, 0x15, 0xef, 0xfc, 0x4e, 0x57, 0x03, 0x36, 0x89, 0xf0, 0xeb, 0x5b, 0x91, 0xd6, 0xe2, 0xca, 0x01, 0xa5, 0xee, 0x52, 0xec, 0xa0, 0x3c, 0x8f, 0x33, 0x90, 0x5a, 0x94, 0x72}}}, +{{{0x8a, 0x4b, 0xe7, 0x38, 0xbc, 0xda, 0xc2, 0xb0, 0x85, 0xe1, 0x4a, 0xfe, 0x2d, 0x44, 0x84, 0xcb, 0x20, 0x6b, 0x2d, 0xbf, 0x11, 0x9c, 0xd7, 0xbe, 0xd3, 0x3e, 0x5f, 0xbf, 0x68, 0xbc, 0xa8, 0x07}} , + {{0x01, 0x89, 0x28, 0x22, 0x6a, 0x78, 0xaa, 0x29, 0x03, 0xc8, 0x74, 0x95, 0x03, 0x3e, 0xdc, 0xbd, 0x07, 0x13, 0xa8, 0xa2, 0x20, 0x2d, 0xb3, 0x18, 0x70, 0x42, 0xfd, 0x7a, 0xc4, 0xd7, 0x49, 0x72}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x02, 0xff, 0x32, 0x2b, 0x5c, 0x93, 0x54, 0x32, 0xe8, 0x57, 0x54, 0x1a, 0x8b, 0x33, 0x60, 0x65, 0xd3, 0x67, 0xa4, 0xc1, 0x26, 0xc4, 0xa4, 0x34, 0x1f, 0x9b, 0xa7, 0xa9, 0xf4, 0xd9, 0x4f, 0x5b}} , + {{0x46, 0x8d, 0xb0, 0x33, 0x54, 0x26, 0x5b, 0x68, 0xdf, 0xbb, 0xc5, 0xec, 0xc2, 0xf9, 0x3c, 0x5a, 0x37, 0xc1, 0x8e, 0x27, 0x47, 0xaa, 0x49, 0x5a, 0xf8, 0xfb, 0x68, 0x04, 0x23, 0xd1, 0xeb, 0x40}}}, +{{{0x65, 0xa5, 0x11, 0x84, 0x8a, 0x67, 0x9d, 0x9e, 0xd1, 0x44, 0x68, 0x7a, 0x34, 0xe1, 0x9f, 0xa3, 0x54, 0xcd, 0x07, 0xca, 0x79, 0x1f, 0x54, 0x2f, 0x13, 0x70, 0x4e, 0xee, 0xa2, 0xfa, 0xe7, 0x5d}} , + {{0x36, 0xec, 0x54, 0xf8, 0xce, 0xe4, 0x85, 0xdf, 0xf6, 0x6f, 0x1d, 0x90, 0x08, 0xbc, 0xe8, 0xc0, 0x92, 0x2d, 0x43, 0x6b, 0x92, 0xa9, 0x8e, 0xab, 0x0a, 0x2e, 0x1c, 0x1e, 0x64, 0x23, 0x9f, 0x2c}}}, +{{{0xa7, 0xd6, 0x2e, 0xd5, 0xcc, 0xd4, 0xcb, 0x5a, 0x3b, 0xa7, 0xf9, 0x46, 0x03, 0x1d, 0xad, 0x2b, 0x34, 0x31, 0x90, 0x00, 0x46, 0x08, 0x82, 0x14, 0xc4, 0xe0, 0x9c, 0xf0, 0xe3, 0x55, 0x43, 0x31}} , + {{0x60, 0xd6, 0xdd, 0x78, 0xe6, 0xd4, 0x22, 0x42, 0x1f, 0x00, 0xf9, 0xb1, 0x6a, 0x63, 0xe2, 0x92, 0x59, 0xd1, 0x1a, 0xb7, 0x00, 0x54, 0x29, 0xc9, 0xc1, 0xf6, 0x6f, 0x7a, 0xc5, 0x3c, 0x5f, 0x65}}}, +{{{0x27, 0x4f, 0xd0, 0x72, 0xb1, 0x11, 0x14, 0x27, 0x15, 0x94, 0x48, 0x81, 0x7e, 0x74, 0xd8, 0x32, 0xd5, 0xd1, 0x11, 0x28, 0x60, 0x63, 0x36, 0x32, 0x37, 0xb5, 0x13, 0x1c, 0xa0, 0x37, 0xe3, 0x74}} , + {{0xf1, 0x25, 0x4e, 0x11, 0x96, 0x67, 0xe6, 0x1c, 0xc2, 0xb2, 0x53, 0xe2, 0xda, 0x85, 0xee, 0xb2, 0x9f, 0x59, 0xf3, 0xba, 0xbd, 0xfa, 0xcf, 0x6e, 0xf9, 0xda, 0xa4, 0xb3, 0x02, 0x8f, 0x64, 0x08}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x34, 0x94, 0xf2, 0x64, 0x54, 0x47, 0x37, 0x07, 0x40, 0x8a, 0x20, 0xba, 0x4a, 0x55, 0xd7, 0x3f, 0x47, 0xba, 0x25, 0x23, 0x14, 0xb0, 0x2c, 0xe8, 0x55, 0xa8, 0xa6, 0xef, 0x51, 0xbd, 0x6f, 0x6a}} , + {{0x71, 0xd6, 0x16, 0x76, 0xb2, 0x06, 0xea, 0x79, 0xf5, 0xc4, 0xc3, 0x52, 0x7e, 0x61, 0xd1, 0xe1, 0xad, 0x70, 0x78, 0x1d, 0x16, 0x11, 0xf8, 0x7c, 0x2b, 0xfc, 0x55, 0x9f, 0x52, 0xf8, 0xf5, 0x16}}}, +{{{0x34, 0x96, 0x9a, 0xf6, 0xc5, 0xe0, 0x14, 0x03, 0x24, 0x0e, 0x4c, 0xad, 0x9e, 0x9a, 0x70, 0x23, 0x96, 0xb2, 0xf1, 0x2e, 0x9d, 0xc3, 0x32, 0x9b, 0x54, 0xa5, 0x73, 0xde, 0x88, 0xb1, 0x3e, 0x24}} , + {{0xf6, 0xe2, 0x4c, 0x1f, 0x5b, 0xb2, 0xaf, 0x82, 0xa5, 0xcf, 0x81, 0x10, 0x04, 0xef, 0xdb, 0xa2, 0xcc, 0x24, 0xb2, 0x7e, 0x0b, 0x7a, 0xeb, 0x01, 0xd8, 0x52, 0xf4, 0x51, 0x89, 0x29, 0x79, 0x37}}}, +{{{0x74, 0xde, 0x12, 0xf3, 0x68, 0xb7, 0x66, 0xc3, 0xee, 0x68, 0xdc, 0x81, 0xb5, 0x55, 0x99, 0xab, 0xd9, 0x28, 0x63, 0x6d, 0x8b, 0x40, 0x69, 0x75, 0x6c, 0xcd, 0x5c, 0x2a, 0x7e, 0x32, 0x7b, 0x29}} , + {{0x02, 0xcc, 0x22, 0x74, 0x4d, 0x19, 0x07, 0xc0, 0xda, 0xb5, 0x76, 0x51, 0x2a, 0xaa, 0xa6, 0x0a, 0x5f, 0x26, 0xd4, 0xbc, 0xaf, 0x48, 0x88, 0x7f, 0x02, 0xbc, 0xf2, 0xe1, 0xcf, 0xe9, 0xdd, 0x15}}}, +{{{0xed, 0xb5, 0x9a, 0x8c, 0x9a, 0xdd, 0x27, 0xf4, 0x7f, 0x47, 0xd9, 0x52, 0xa7, 0xcd, 0x65, 0xa5, 0x31, 0x22, 0xed, 0xa6, 0x63, 0x5b, 0x80, 0x4a, 0xad, 0x4d, 0xed, 0xbf, 0xee, 0x49, 0xb3, 0x06}} , + {{0xf8, 0x64, 0x8b, 0x60, 0x90, 0xe9, 0xde, 0x44, 0x77, 0xb9, 0x07, 0x36, 0x32, 0xc2, 0x50, 0xf5, 0x65, 0xdf, 0x48, 0x4c, 0x37, 0xaa, 0x68, 0xab, 0x9a, 0x1f, 0x3e, 0xff, 0x89, 0x92, 0xa0, 0x07}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x7d, 0x4f, 0x9c, 0x19, 0xc0, 0x4a, 0x31, 0xec, 0xf9, 0xaa, 0xeb, 0xb2, 0x16, 0x9c, 0xa3, 0x66, 0x5f, 0xd1, 0xd4, 0xed, 0xb8, 0x92, 0x1c, 0xab, 0xda, 0xea, 0xd9, 0x57, 0xdf, 0x4c, 0x2a, 0x48}} , + {{0x4b, 0xb0, 0x4e, 0x6e, 0x11, 0x3b, 0x51, 0xbd, 0x6a, 0xfd, 0xe4, 0x25, 0xa5, 0x5f, 0x11, 0x3f, 0x98, 0x92, 0x51, 0x14, 0xc6, 0x5f, 0x3c, 0x0b, 0xa8, 0xf7, 0xc2, 0x81, 0x43, 0xde, 0x91, 0x73}}}, +{{{0x3c, 0x8f, 0x9f, 0x33, 0x2a, 0x1f, 0x43, 0x33, 0x8f, 0x68, 0xff, 0x1f, 0x3d, 0x73, 0x6b, 0xbf, 0x68, 0xcc, 0x7d, 0x13, 0x6c, 0x24, 0x4b, 0xcc, 0x4d, 0x24, 0x0d, 0xfe, 0xde, 0x86, 0xad, 0x3b}} , + {{0x79, 0x51, 0x81, 0x01, 0xdc, 0x73, 0x53, 0xe0, 0x6e, 0x9b, 0xea, 0x68, 0x3f, 0x5c, 0x14, 0x84, 0x53, 0x8d, 0x4b, 0xc0, 0x9f, 0x9f, 0x89, 0x2b, 0x8c, 0xba, 0x86, 0xfa, 0xf2, 0xcd, 0xe3, 0x2d}}}, +{{{0x06, 0xf9, 0x29, 0x5a, 0xdb, 0x3d, 0x84, 0x52, 0xab, 0xcc, 0x6b, 0x60, 0x9d, 0xb7, 0x4a, 0x0e, 0x36, 0x63, 0x91, 0xad, 0xa0, 0x95, 0xb0, 0x97, 0x89, 0x4e, 0xcf, 0x7d, 0x3c, 0xe5, 0x7c, 0x28}} , + {{0x2e, 0x69, 0x98, 0xfd, 0xc6, 0xbd, 0xcc, 0xca, 0xdf, 0x9a, 0x44, 0x7e, 0x9d, 0xca, 0x89, 0x6d, 0xbf, 0x27, 0xc2, 0xf8, 0xcd, 0x46, 0x00, 0x2b, 0xb5, 0x58, 0x4e, 0xb7, 0x89, 0x09, 0xe9, 0x2d}}}, +{{{0x54, 0xbe, 0x75, 0xcb, 0x05, 0xb0, 0x54, 0xb7, 0xe7, 0x26, 0x86, 0x4a, 0xfc, 0x19, 0xcf, 0x27, 0x46, 0xd4, 0x22, 0x96, 0x5a, 0x11, 0xe8, 0xd5, 0x1b, 0xed, 0x71, 0xc5, 0x5d, 0xc8, 0xaf, 0x45}} , + {{0x40, 0x7b, 0x77, 0x57, 0x49, 0x9e, 0x80, 0x39, 0x23, 0xee, 0x81, 0x0b, 0x22, 0xcf, 0xdb, 0x7a, 0x2f, 0x14, 0xb8, 0x57, 0x8f, 0xa1, 0x39, 0x1e, 0x77, 0xfc, 0x0b, 0xa6, 0xbf, 0x8a, 0x0c, 0x6c}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x77, 0x3a, 0xd4, 0xd8, 0x27, 0xcf, 0xe8, 0xa1, 0x72, 0x9d, 0xca, 0xdd, 0x0d, 0x96, 0xda, 0x79, 0xed, 0x56, 0x42, 0x15, 0x60, 0xc7, 0x1c, 0x6b, 0x26, 0x30, 0xf6, 0x6a, 0x95, 0x67, 0xf3, 0x0a}} , + {{0xc5, 0x08, 0xa4, 0x2b, 0x2f, 0xbd, 0x31, 0x81, 0x2a, 0xa6, 0xb6, 0xe4, 0x00, 0x91, 0xda, 0x3d, 0xb2, 0xb0, 0x96, 0xce, 0x8a, 0xd2, 0x8d, 0x70, 0xb3, 0xd3, 0x34, 0x01, 0x90, 0x8d, 0x10, 0x21}}}, +{{{0x33, 0x0d, 0xe7, 0xba, 0x4f, 0x07, 0xdf, 0x8d, 0xea, 0x7d, 0xa0, 0xc5, 0xd6, 0xb1, 0xb0, 0xe5, 0x57, 0x1b, 0x5b, 0xf5, 0x45, 0x13, 0x14, 0x64, 0x5a, 0xeb, 0x5c, 0xfc, 0x54, 0x01, 0x76, 0x2b}} , + {{0x02, 0x0c, 0xc2, 0xaf, 0x96, 0x36, 0xfe, 0x4a, 0xe2, 0x54, 0x20, 0x6a, 0xeb, 0xb2, 0x9f, 0x62, 0xd7, 0xce, 0xa2, 0x3f, 0x20, 0x11, 0x34, 0x37, 0xe0, 0x42, 0xed, 0x6f, 0xf9, 0x1a, 0xc8, 0x7d}}}, +{{{0xd8, 0xb9, 0x11, 0xe8, 0x36, 0x3f, 0x42, 0xc1, 0xca, 0xdc, 0xd3, 0xf1, 0xc8, 0x23, 0x3d, 0x4f, 0x51, 0x7b, 0x9d, 0x8d, 0xd8, 0xe4, 0xa0, 0xaa, 0xf3, 0x04, 0xd6, 0x11, 0x93, 0xc8, 0x35, 0x45}} , + {{0x61, 0x36, 0xd6, 0x08, 0x90, 0xbf, 0xa7, 0x7a, 0x97, 0x6c, 0x0f, 0x84, 0xd5, 0x33, 0x2d, 0x37, 0xc9, 0x6a, 0x80, 0x90, 0x3d, 0x0a, 0xa2, 0xaa, 0xe1, 0xb8, 0x84, 0xba, 0x61, 0x36, 0xdd, 0x69}}}, +{{{0x6b, 0xdb, 0x5b, 0x9c, 0xc6, 0x92, 0xbc, 0x23, 0xaf, 0xc5, 0xb8, 0x75, 0xf8, 0x42, 0xfa, 0xd6, 0xb6, 0x84, 0x94, 0x63, 0x98, 0x93, 0x48, 0x78, 0x38, 0xcd, 0xbb, 0x18, 0x34, 0xc3, 0xdb, 0x67}} , + {{0x96, 0xf3, 0x3a, 0x09, 0x56, 0xb0, 0x6f, 0x7c, 0x51, 0x1e, 0x1b, 0x39, 0x48, 0xea, 0xc9, 0x0c, 0x25, 0xa2, 0x7a, 0xca, 0xe7, 0x92, 0xfc, 0x59, 0x30, 0xa3, 0x89, 0x85, 0xdf, 0x6f, 0x43, 0x38}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x79, 0x84, 0x44, 0x19, 0xbd, 0xe9, 0x54, 0xc4, 0xc0, 0x6e, 0x2a, 0xa8, 0xa8, 0x9b, 0x43, 0xd5, 0x71, 0x22, 0x5f, 0xdc, 0x01, 0xfa, 0xdf, 0xb3, 0xb8, 0x47, 0x4b, 0x0a, 0xa5, 0x44, 0xea, 0x29}} , + {{0x05, 0x90, 0x50, 0xaf, 0x63, 0x5f, 0x9d, 0x9e, 0xe1, 0x9d, 0x38, 0x97, 0x1f, 0x6c, 0xac, 0x30, 0x46, 0xb2, 0x6a, 0x19, 0xd1, 0x4b, 0xdb, 0xbb, 0x8c, 0xda, 0x2e, 0xab, 0xc8, 0x5a, 0x77, 0x6c}}}, +{{{0x2b, 0xbe, 0xaf, 0xa1, 0x6d, 0x2f, 0x0b, 0xb1, 0x8f, 0xe3, 0xe0, 0x38, 0xcd, 0x0b, 0x41, 0x1b, 0x4a, 0x15, 0x07, 0xf3, 0x6f, 0xdc, 0xb8, 0xe9, 0xde, 0xb2, 0xa3, 0x40, 0x01, 0xa6, 0x45, 0x1e}} , + {{0x76, 0x0a, 0xda, 0x8d, 0x2c, 0x07, 0x3f, 0x89, 0x7d, 0x04, 0xad, 0x43, 0x50, 0x6e, 0xd2, 0x47, 0xcb, 0x8a, 0xe6, 0x85, 0x1a, 0x24, 0xf3, 0xd2, 0x60, 0xfd, 0xdf, 0x73, 0xa4, 0x0d, 0x73, 0x0e}}}, +{{{0xfd, 0x67, 0x6b, 0x71, 0x9b, 0x81, 0x53, 0x39, 0x39, 0xf4, 0xb8, 0xd5, 0xc3, 0x30, 0x9b, 0x3b, 0x7c, 0xa3, 0xf0, 0xd0, 0x84, 0x21, 0xd6, 0xbf, 0xb7, 0x4c, 0x87, 0x13, 0x45, 0x2d, 0xa7, 0x55}} , + {{0x5d, 0x04, 0xb3, 0x40, 0x28, 0x95, 0x2d, 0x30, 0x83, 0xec, 0x5e, 0xe4, 0xff, 0x75, 0xfe, 0x79, 0x26, 0x9d, 0x1d, 0x36, 0xcd, 0x0a, 0x15, 0xd2, 0x24, 0x14, 0x77, 0x71, 0xd7, 0x8a, 0x1b, 0x04}}}, +{{{0x5d, 0x93, 0xc9, 0xbe, 0xaa, 0x90, 0xcd, 0x9b, 0xfb, 0x73, 0x7e, 0xb0, 0x64, 0x98, 0x57, 0x44, 0x42, 0x41, 0xb1, 0xaf, 0xea, 0xc1, 0xc3, 0x22, 0xff, 0x60, 0x46, 0xcb, 0x61, 0x81, 0x70, 0x61}} , + {{0x0d, 0x82, 0xb9, 0xfe, 0x21, 0xcd, 0xc4, 0xf5, 0x98, 0x0c, 0x4e, 0x72, 0xee, 0x87, 0x49, 0xf8, 0xa1, 0x95, 0xdf, 0x8f, 0x2d, 0xbd, 0x21, 0x06, 0x7c, 0x15, 0xe8, 0x12, 0x6d, 0x93, 0xd6, 0x38}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x91, 0xf7, 0x51, 0xd9, 0xef, 0x7d, 0x42, 0x01, 0x13, 0xe9, 0xb8, 0x7f, 0xa6, 0x49, 0x17, 0x64, 0x21, 0x80, 0x83, 0x2c, 0x63, 0x4c, 0x60, 0x09, 0x59, 0x91, 0x92, 0x77, 0x39, 0x51, 0xf4, 0x48}} , + {{0x60, 0xd5, 0x22, 0x83, 0x08, 0x2f, 0xff, 0x99, 0x3e, 0x69, 0x6d, 0x88, 0xda, 0xe7, 0x5b, 0x52, 0x26, 0x31, 0x2a, 0xe5, 0x89, 0xde, 0x68, 0x90, 0xb6, 0x22, 0x5a, 0xbd, 0xd3, 0x85, 0x53, 0x31}}}, +{{{0xd8, 0xce, 0xdc, 0xf9, 0x3c, 0x4b, 0xa2, 0x1d, 0x2c, 0x2f, 0x36, 0xbe, 0x7a, 0xfc, 0xcd, 0xbc, 0xdc, 0xf9, 0x30, 0xbd, 0xff, 0x05, 0xc7, 0xe4, 0x8e, 0x17, 0x62, 0xf8, 0x4d, 0xa0, 0x56, 0x79}} , + {{0x82, 0xe7, 0xf6, 0xba, 0x53, 0x84, 0x0a, 0xa3, 0x34, 0xff, 0x3c, 0xa3, 0x6a, 0xa1, 0x37, 0xea, 0xdd, 0xb6, 0x95, 0xb3, 0x78, 0x19, 0x76, 0x1e, 0x55, 0x2f, 0x77, 0x2e, 0x7f, 0xc1, 0xea, 0x5e}}}, +{{{0x83, 0xe1, 0x6e, 0xa9, 0x07, 0x33, 0x3e, 0x83, 0xff, 0xcb, 0x1c, 0x9f, 0xb1, 0xa3, 0xb4, 0xc9, 0xe1, 0x07, 0x97, 0xff, 0xf8, 0x23, 0x8f, 0xce, 0x40, 0xfd, 0x2e, 0x5e, 0xdb, 0x16, 0x43, 0x2d}} , + {{0xba, 0x38, 0x02, 0xf7, 0x81, 0x43, 0x83, 0xa3, 0x20, 0x4f, 0x01, 0x3b, 0x8a, 0x04, 0x38, 0x31, 0xc6, 0x0f, 0xc8, 0xdf, 0xd7, 0xfa, 0x2f, 0x88, 0x3f, 0xfc, 0x0c, 0x76, 0xc4, 0xa6, 0x45, 0x72}}}, +{{{0xbb, 0x0c, 0xbc, 0x6a, 0xa4, 0x97, 0x17, 0x93, 0x2d, 0x6f, 0xde, 0x72, 0x10, 0x1c, 0x08, 0x2c, 0x0f, 0x80, 0x32, 0x68, 0x27, 0xd4, 0xab, 0xdd, 0xc5, 0x58, 0x61, 0x13, 0x6d, 0x11, 0x1e, 0x4d}} , + {{0x1a, 0xb9, 0xc9, 0x10, 0xfb, 0x1e, 0x4e, 0xf4, 0x84, 0x4b, 0x8a, 0x5e, 0x7b, 0x4b, 0xe8, 0x43, 0x8c, 0x8f, 0x00, 0xb5, 0x54, 0x13, 0xc5, 0x5c, 0xb6, 0x35, 0x4e, 0x9d, 0xe4, 0x5b, 0x41, 0x6d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x15, 0x7d, 0x12, 0x48, 0x82, 0x14, 0x42, 0xcd, 0x32, 0xd4, 0x4b, 0xc1, 0x72, 0x61, 0x2a, 0x8c, 0xec, 0xe2, 0xf8, 0x24, 0x45, 0x94, 0xe3, 0xbe, 0xdd, 0x67, 0xa8, 0x77, 0x5a, 0xae, 0x5b, 0x4b}} , + {{0xcb, 0x77, 0x9a, 0x20, 0xde, 0xb8, 0x23, 0xd9, 0xa0, 0x0f, 0x8c, 0x7b, 0xa5, 0xcb, 0xae, 0xb6, 0xec, 0x42, 0x67, 0x0e, 0x58, 0xa4, 0x75, 0x98, 0x21, 0x71, 0x84, 0xb3, 0xe0, 0x76, 0x94, 0x73}}}, +{{{0xdf, 0xfc, 0x69, 0x28, 0x23, 0x3f, 0x5b, 0xf8, 0x3b, 0x24, 0x37, 0xf3, 0x1d, 0xd5, 0x22, 0x6b, 0xd0, 0x98, 0xa8, 0x6c, 0xcf, 0xff, 0x06, 0xe1, 0x13, 0xdf, 0xb9, 0xc1, 0x0c, 0xa9, 0xbf, 0x33}} , + {{0xd9, 0x81, 0xda, 0xb2, 0x4f, 0x82, 0x9d, 0x43, 0x81, 0x09, 0xf1, 0xd2, 0x01, 0xef, 0xac, 0xf4, 0x2d, 0x7d, 0x01, 0x09, 0xf1, 0xff, 0xa5, 0x9f, 0xe5, 0xca, 0x27, 0x63, 0xdb, 0x20, 0xb1, 0x53}}}, +{{{0x67, 0x02, 0xe8, 0xad, 0xa9, 0x34, 0xd4, 0xf0, 0x15, 0x81, 0xaa, 0xc7, 0x4d, 0x87, 0x94, 0xea, 0x75, 0xe7, 0x4c, 0x94, 0x04, 0x0e, 0x69, 0x87, 0xe7, 0x51, 0x91, 0x10, 0x03, 0xc7, 0xbe, 0x56}} , + {{0x32, 0xfb, 0x86, 0xec, 0x33, 0x6b, 0x2e, 0x51, 0x2b, 0xc8, 0xfa, 0x6c, 0x70, 0x47, 0x7e, 0xce, 0x05, 0x0c, 0x71, 0xf3, 0xb4, 0x56, 0xa6, 0xdc, 0xcc, 0x78, 0x07, 0x75, 0xd0, 0xdd, 0xb2, 0x6a}}}, +{{{0xc6, 0xef, 0xb9, 0xc0, 0x2b, 0x22, 0x08, 0x1e, 0x71, 0x70, 0xb3, 0x35, 0x9c, 0x7a, 0x01, 0x92, 0x44, 0x9a, 0xf6, 0xb0, 0x58, 0x95, 0xc1, 0x9b, 0x02, 0xed, 0x2d, 0x7c, 0x34, 0x29, 0x49, 0x44}} , + {{0x45, 0x62, 0x1d, 0x2e, 0xff, 0x2a, 0x1c, 0x21, 0xa4, 0x25, 0x7b, 0x0d, 0x8c, 0x15, 0x39, 0xfc, 0x8f, 0x7c, 0xa5, 0x7d, 0x1e, 0x25, 0xa3, 0x45, 0xd6, 0xab, 0xbd, 0xcb, 0xc5, 0x5e, 0x78, 0x77}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xd0, 0xd3, 0x42, 0xed, 0x1d, 0x00, 0x3c, 0x15, 0x2c, 0x9c, 0x77, 0x81, 0xd2, 0x73, 0xd1, 0x06, 0xd5, 0xc4, 0x7f, 0x94, 0xbb, 0x92, 0x2d, 0x2c, 0x4b, 0x45, 0x4b, 0xe9, 0x2a, 0x89, 0x6b, 0x2b}} , + {{0xd2, 0x0c, 0x88, 0xc5, 0x48, 0x4d, 0xea, 0x0d, 0x4a, 0xc9, 0x52, 0x6a, 0x61, 0x79, 0xe9, 0x76, 0xf3, 0x85, 0x52, 0x5c, 0x1b, 0x2c, 0xe1, 0xd6, 0xc4, 0x0f, 0x18, 0x0e, 0x4e, 0xf6, 0x1c, 0x7f}}}, +{{{0xb4, 0x04, 0x2e, 0x42, 0xcb, 0x1f, 0x2b, 0x11, 0x51, 0x7b, 0x08, 0xac, 0xaa, 0x3e, 0x9e, 0x52, 0x60, 0xb7, 0xc2, 0x61, 0x57, 0x8c, 0x84, 0xd5, 0x18, 0xa6, 0x19, 0xfc, 0xb7, 0x75, 0x91, 0x1b}} , + {{0xe8, 0x68, 0xca, 0x44, 0xc8, 0x38, 0x38, 0xcc, 0x53, 0x0a, 0x32, 0x35, 0xcc, 0x52, 0xcb, 0x0e, 0xf7, 0xc5, 0xe7, 0xec, 0x3d, 0x85, 0xcc, 0x58, 0xe2, 0x17, 0x47, 0xff, 0x9f, 0xa5, 0x30, 0x17}}}, +{{{0xe3, 0xae, 0xc8, 0xc1, 0x71, 0x75, 0x31, 0x00, 0x37, 0x41, 0x5c, 0x0e, 0x39, 0xda, 0x73, 0xa0, 0xc7, 0x97, 0x36, 0x6c, 0x5b, 0xf2, 0xee, 0x64, 0x0a, 0x3d, 0x89, 0x1e, 0x1d, 0x49, 0x8c, 0x37}} , + {{0x4c, 0xe6, 0xb0, 0xc1, 0xa5, 0x2a, 0x82, 0x09, 0x08, 0xad, 0x79, 0x9c, 0x56, 0xf6, 0xf9, 0xc1, 0xd7, 0x7c, 0x39, 0x7f, 0x93, 0xca, 0x11, 0x55, 0xbf, 0x07, 0x1b, 0x82, 0x29, 0x69, 0x95, 0x5c}}}, +{{{0x87, 0xee, 0xa6, 0x56, 0x9e, 0xc2, 0x9a, 0x56, 0x24, 0x42, 0x85, 0x4d, 0x98, 0x31, 0x1e, 0x60, 0x4d, 0x87, 0x85, 0x04, 0xae, 0x46, 0x12, 0xf9, 0x8e, 0x7f, 0xe4, 0x7f, 0xf6, 0x1c, 0x37, 0x01}} , + {{0x73, 0x4c, 0xb6, 0xc5, 0xc4, 0xe9, 0x6c, 0x85, 0x48, 0x4a, 0x5a, 0xac, 0xd9, 0x1f, 0x43, 0xf8, 0x62, 0x5b, 0xee, 0x98, 0x2a, 0x33, 0x8e, 0x79, 0xce, 0x61, 0x06, 0x35, 0xd8, 0xd7, 0xca, 0x71}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x72, 0xd3, 0xae, 0xa6, 0xca, 0x8f, 0xcd, 0xcc, 0x78, 0x8e, 0x19, 0x4d, 0xa7, 0xd2, 0x27, 0xe9, 0xa4, 0x3c, 0x16, 0x5b, 0x84, 0x80, 0xf9, 0xd0, 0xcc, 0x6a, 0x1e, 0xca, 0x1e, 0x67, 0xbd, 0x63}} , + {{0x7b, 0x6e, 0x2a, 0xd2, 0x87, 0x48, 0xff, 0xa1, 0xca, 0xe9, 0x15, 0x85, 0xdc, 0xdb, 0x2c, 0x39, 0x12, 0x91, 0xa9, 0x20, 0xaa, 0x4f, 0x29, 0xf4, 0x15, 0x7a, 0xd2, 0xf5, 0x32, 0xcc, 0x60, 0x04}}}, +{{{0xe5, 0x10, 0x47, 0x3b, 0xfa, 0x90, 0xfc, 0x30, 0xb5, 0xea, 0x6f, 0x56, 0x8f, 0xfb, 0x0e, 0xa7, 0x3b, 0xc8, 0xb2, 0xff, 0x02, 0x7a, 0x33, 0x94, 0x93, 0x2a, 0x03, 0xe0, 0x96, 0x3a, 0x6c, 0x0f}} , + {{0x5a, 0x63, 0x67, 0xe1, 0x9b, 0x47, 0x78, 0x9f, 0x38, 0x79, 0xac, 0x97, 0x66, 0x1d, 0x5e, 0x51, 0xee, 0x24, 0x42, 0xe8, 0x58, 0x4b, 0x8a, 0x03, 0x75, 0x86, 0x37, 0x86, 0xe2, 0x97, 0x4e, 0x3d}}}, +{{{0x3f, 0x75, 0x8e, 0xb4, 0xff, 0xd8, 0xdd, 0xd6, 0x37, 0x57, 0x9d, 0x6d, 0x3b, 0xbd, 0xd5, 0x60, 0x88, 0x65, 0x9a, 0xb9, 0x4a, 0x68, 0x84, 0xa2, 0x67, 0xdd, 0x17, 0x25, 0x97, 0x04, 0x8b, 0x5e}} , + {{0xbb, 0x40, 0x5e, 0xbc, 0x16, 0x92, 0x05, 0xc4, 0xc0, 0x4e, 0x72, 0x90, 0x0e, 0xab, 0xcf, 0x8a, 0xed, 0xef, 0xb9, 0x2d, 0x3b, 0xf8, 0x43, 0x5b, 0xba, 0x2d, 0xeb, 0x2f, 0x52, 0xd2, 0xd1, 0x5a}}}, +{{{0x40, 0xb4, 0xab, 0xe6, 0xad, 0x9f, 0x46, 0x69, 0x4a, 0xb3, 0x8e, 0xaa, 0xea, 0x9c, 0x8a, 0x20, 0x16, 0x5d, 0x8c, 0x13, 0xbd, 0xf6, 0x1d, 0xc5, 0x24, 0xbd, 0x90, 0x2a, 0x1c, 0xc7, 0x13, 0x3b}} , + {{0x54, 0xdc, 0x16, 0x0d, 0x18, 0xbe, 0x35, 0x64, 0x61, 0x52, 0x02, 0x80, 0xaf, 0x05, 0xf7, 0xa6, 0x42, 0xd3, 0x8f, 0x2e, 0x79, 0x26, 0xa8, 0xbb, 0xb2, 0x17, 0x48, 0xb2, 0x7a, 0x0a, 0x89, 0x14}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x20, 0xa8, 0x88, 0xe3, 0x91, 0xc0, 0x6e, 0xbb, 0x8a, 0x27, 0x82, 0x51, 0x83, 0xb2, 0x28, 0xa9, 0x83, 0xeb, 0xa6, 0xa9, 0x4d, 0x17, 0x59, 0x22, 0x54, 0x00, 0x50, 0x45, 0xcb, 0x48, 0x4b, 0x18}} , + {{0x33, 0x7c, 0xe7, 0x26, 0xba, 0x4d, 0x32, 0xfe, 0x53, 0xf4, 0xfa, 0x83, 0xe3, 0xa5, 0x79, 0x66, 0x73, 0xef, 0x80, 0x23, 0x68, 0xc2, 0x60, 0xdd, 0xa9, 0x33, 0xdc, 0x03, 0x7a, 0xe0, 0xe0, 0x3e}}}, +{{{0x34, 0x5c, 0x13, 0xfb, 0xc0, 0xe3, 0x78, 0x2b, 0x54, 0x58, 0x22, 0x9b, 0x76, 0x81, 0x7f, 0x93, 0x9c, 0x25, 0x3c, 0xd2, 0xe9, 0x96, 0x21, 0x26, 0x08, 0xf5, 0xed, 0x95, 0x11, 0xae, 0x04, 0x5a}} , + {{0xb9, 0xe8, 0xc5, 0x12, 0x97, 0x1f, 0x83, 0xfe, 0x3e, 0x94, 0x99, 0xd4, 0x2d, 0xf9, 0x52, 0x59, 0x5c, 0x82, 0xa6, 0xf0, 0x75, 0x7e, 0xe8, 0xec, 0xcc, 0xac, 0x18, 0x21, 0x09, 0x67, 0x66, 0x67}}}, +{{{0xb3, 0x40, 0x29, 0xd1, 0xcb, 0x1b, 0x08, 0x9e, 0x9c, 0xb7, 0x53, 0xb9, 0x3b, 0x71, 0x08, 0x95, 0x12, 0x1a, 0x58, 0xaf, 0x7e, 0x82, 0x52, 0x43, 0x4f, 0x11, 0x39, 0xf4, 0x93, 0x1a, 0x26, 0x05}} , + {{0x6e, 0x44, 0xa3, 0xf9, 0x64, 0xaf, 0xe7, 0x6d, 0x7d, 0xdf, 0x1e, 0xac, 0x04, 0xea, 0x3b, 0x5f, 0x9b, 0xe8, 0x24, 0x9d, 0x0e, 0xe5, 0x2e, 0x3e, 0xdf, 0xa9, 0xf7, 0xd4, 0x50, 0x71, 0xf0, 0x78}}}, +{{{0x3e, 0xa8, 0x38, 0xc2, 0x57, 0x56, 0x42, 0x9a, 0xb1, 0xe2, 0xf8, 0x45, 0xaa, 0x11, 0x48, 0x5f, 0x17, 0xc4, 0x54, 0x27, 0xdc, 0x5d, 0xaa, 0xdd, 0x41, 0xbc, 0xdf, 0x81, 0xb9, 0x53, 0xee, 0x52}} , + {{0xc3, 0xf1, 0xa7, 0x6d, 0xb3, 0x5f, 0x92, 0x6f, 0xcc, 0x91, 0xb8, 0x95, 0x05, 0xdf, 0x3c, 0x64, 0x57, 0x39, 0x61, 0x51, 0xad, 0x8c, 0x38, 0x7b, 0xc8, 0xde, 0x00, 0x34, 0xbe, 0xa1, 0xb0, 0x7e}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x25, 0x24, 0x1d, 0x8a, 0x67, 0x20, 0xee, 0x42, 0xeb, 0x38, 0xed, 0x0b, 0x8b, 0xcd, 0x46, 0x9d, 0x5e, 0x6b, 0x1e, 0x24, 0x9d, 0x12, 0x05, 0x1a, 0xcc, 0x05, 0x4e, 0x92, 0x38, 0xe1, 0x1f, 0x50}} , + {{0x4e, 0xee, 0x1c, 0x91, 0xe6, 0x11, 0xbd, 0x8e, 0x55, 0x1a, 0x18, 0x75, 0x66, 0xaf, 0x4d, 0x7b, 0x0f, 0xae, 0x6d, 0x85, 0xca, 0x82, 0x58, 0x21, 0x9c, 0x18, 0xe0, 0xed, 0xec, 0x22, 0x80, 0x2f}}}, +{{{0x68, 0x3b, 0x0a, 0x39, 0x1d, 0x6a, 0x15, 0x57, 0xfc, 0xf0, 0x63, 0x54, 0xdb, 0x39, 0xdb, 0xe8, 0x5c, 0x64, 0xff, 0xa0, 0x09, 0x4f, 0x3b, 0xb7, 0x32, 0x60, 0x99, 0x94, 0xfd, 0x94, 0x82, 0x2d}} , + {{0x24, 0xf6, 0x5a, 0x44, 0xf1, 0x55, 0x2c, 0xdb, 0xea, 0x7c, 0x84, 0x7c, 0x01, 0xac, 0xe3, 0xfd, 0xc9, 0x27, 0xc1, 0x5a, 0xb9, 0xde, 0x4f, 0x5a, 0x90, 0xdd, 0xc6, 0x67, 0xaa, 0x6f, 0x8a, 0x3a}}}, +{{{0x78, 0x52, 0x87, 0xc9, 0x97, 0x63, 0xb1, 0xdd, 0x54, 0x5f, 0xc1, 0xf8, 0xf1, 0x06, 0xa6, 0xa8, 0xa3, 0x88, 0x82, 0xd4, 0xcb, 0xa6, 0x19, 0xdd, 0xd1, 0x11, 0x87, 0x08, 0x17, 0x4c, 0x37, 0x2a}} , + {{0xa1, 0x0c, 0xf3, 0x08, 0x43, 0xd9, 0x24, 0x1e, 0x83, 0xa7, 0xdf, 0x91, 0xca, 0xbd, 0x69, 0x47, 0x8d, 0x1b, 0xe2, 0xb9, 0x4e, 0xb5, 0xe1, 0x76, 0xb3, 0x1c, 0x93, 0x03, 0xce, 0x5f, 0xb3, 0x5a}}}, +{{{0x1d, 0xda, 0xe4, 0x61, 0x03, 0x50, 0xa9, 0x8b, 0x68, 0x18, 0xef, 0xb2, 0x1c, 0x84, 0x3b, 0xa2, 0x44, 0x95, 0xa3, 0x04, 0x3b, 0xd6, 0x99, 0x00, 0xaf, 0x76, 0x42, 0x67, 0x02, 0x7d, 0x85, 0x56}} , + {{0xce, 0x72, 0x0e, 0x29, 0x84, 0xb2, 0x7d, 0xd2, 0x45, 0xbe, 0x57, 0x06, 0xed, 0x7f, 0xcf, 0xed, 0xcd, 0xef, 0x19, 0xd6, 0xbc, 0x15, 0x79, 0x64, 0xd2, 0x18, 0xe3, 0x20, 0x67, 0x3a, 0x54, 0x0b}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x52, 0xfd, 0x04, 0xc5, 0xfb, 0x99, 0xe7, 0xe8, 0xfb, 0x8c, 0xe1, 0x42, 0x03, 0xef, 0x9d, 0xd9, 0x9e, 0x4d, 0xf7, 0x80, 0xcf, 0x2e, 0xcc, 0x9b, 0x45, 0xc9, 0x7b, 0x7a, 0xbc, 0x37, 0xa8, 0x52}} , + {{0x96, 0x11, 0x41, 0x8a, 0x47, 0x91, 0xfe, 0xb6, 0xda, 0x7a, 0x54, 0x63, 0xd1, 0x14, 0x35, 0x05, 0x86, 0x8c, 0xa9, 0x36, 0x3f, 0xf2, 0x85, 0x54, 0x4e, 0x92, 0xd8, 0x85, 0x01, 0x46, 0xd6, 0x50}}}, +{{{0x53, 0xcd, 0xf3, 0x86, 0x40, 0xe6, 0x39, 0x42, 0x95, 0xd6, 0xcb, 0x45, 0x1a, 0x20, 0xc8, 0x45, 0x4b, 0x32, 0x69, 0x04, 0xb1, 0xaf, 0x20, 0x46, 0xc7, 0x6b, 0x23, 0x5b, 0x69, 0xee, 0x30, 0x3f}} , + {{0x70, 0x83, 0x47, 0xc0, 0xdb, 0x55, 0x08, 0xa8, 0x7b, 0x18, 0x6d, 0xf5, 0x04, 0x5a, 0x20, 0x0c, 0x4a, 0x8c, 0x60, 0xae, 0xae, 0x0f, 0x64, 0x55, 0x55, 0x2e, 0xd5, 0x1d, 0x53, 0x31, 0x42, 0x41}}}, +{{{0xca, 0xfc, 0x88, 0x6b, 0x96, 0x78, 0x0a, 0x8b, 0x83, 0xdc, 0xbc, 0xaf, 0x40, 0xb6, 0x8d, 0x7f, 0xef, 0xb4, 0xd1, 0x3f, 0xcc, 0xa2, 0x74, 0xc9, 0xc2, 0x92, 0x55, 0x00, 0xab, 0xdb, 0xbf, 0x4f}} , + {{0x93, 0x1c, 0x06, 0x2d, 0x66, 0x65, 0x02, 0xa4, 0x97, 0x18, 0xfd, 0x00, 0xe7, 0xab, 0x03, 0xec, 0xce, 0xc1, 0xbf, 0x37, 0xf8, 0x13, 0x53, 0xa5, 0xe5, 0x0c, 0x3a, 0xa8, 0x55, 0xb9, 0xff, 0x68}}}, +{{{0xe4, 0xe6, 0x6d, 0x30, 0x7d, 0x30, 0x35, 0xc2, 0x78, 0x87, 0xf9, 0xfc, 0x6b, 0x5a, 0xc3, 0xb7, 0x65, 0xd8, 0x2e, 0xc7, 0xa5, 0x0c, 0xc6, 0xdc, 0x12, 0xaa, 0xd6, 0x4f, 0xc5, 0x38, 0xbc, 0x0e}} , + {{0xe2, 0x3c, 0x76, 0x86, 0x38, 0xf2, 0x7b, 0x2c, 0x16, 0x78, 0x8d, 0xf5, 0xa4, 0x15, 0xda, 0xdb, 0x26, 0x85, 0xa0, 0x56, 0xdd, 0x1d, 0xe3, 0xb3, 0xfd, 0x40, 0xef, 0xf2, 0xd9, 0xa1, 0xb3, 0x04}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xdb, 0x49, 0x0e, 0xe6, 0x58, 0x10, 0x7a, 0x52, 0xda, 0xb5, 0x7d, 0x37, 0x6a, 0x3e, 0xa1, 0x78, 0xce, 0xc7, 0x1c, 0x24, 0x23, 0xdb, 0x7d, 0xfb, 0x8c, 0x8d, 0xdc, 0x30, 0x67, 0x69, 0x75, 0x3b}} , + {{0xa9, 0xea, 0x6d, 0x16, 0x16, 0x60, 0xf4, 0x60, 0x87, 0x19, 0x44, 0x8c, 0x4a, 0x8b, 0x3e, 0xfb, 0x16, 0x00, 0x00, 0x54, 0xa6, 0x9e, 0x9f, 0xef, 0xcf, 0xd9, 0xd2, 0x4c, 0x74, 0x31, 0xd0, 0x34}}}, +{{{0xa4, 0xeb, 0x04, 0xa4, 0x8c, 0x8f, 0x71, 0x27, 0x95, 0x85, 0x5d, 0x55, 0x4b, 0xb1, 0x26, 0x26, 0xc8, 0xae, 0x6a, 0x7d, 0xa2, 0x21, 0xca, 0xce, 0x38, 0xab, 0x0f, 0xd0, 0xd5, 0x2b, 0x6b, 0x00}} , + {{0xe5, 0x67, 0x0c, 0xf1, 0x3a, 0x9a, 0xea, 0x09, 0x39, 0xef, 0xd1, 0x30, 0xbc, 0x33, 0xba, 0xb1, 0x6a, 0xc5, 0x27, 0x08, 0x7f, 0x54, 0x80, 0x3d, 0xab, 0xf6, 0x15, 0x7a, 0xc2, 0x40, 0x73, 0x72}}}, +{{{0x84, 0x56, 0x82, 0xb6, 0x12, 0x70, 0x7f, 0xf7, 0xf0, 0xbd, 0x5b, 0xa9, 0xd5, 0xc5, 0x5f, 0x59, 0xbf, 0x7f, 0xb3, 0x55, 0x22, 0x02, 0xc9, 0x44, 0x55, 0x87, 0x8f, 0x96, 0x98, 0x64, 0x6d, 0x15}} , + {{0xb0, 0x8b, 0xaa, 0x1e, 0xec, 0xc7, 0xa5, 0x8f, 0x1f, 0x92, 0x04, 0xc6, 0x05, 0xf6, 0xdf, 0xa1, 0xcc, 0x1f, 0x81, 0xf5, 0x0e, 0x9c, 0x57, 0xdc, 0xe3, 0xbb, 0x06, 0x87, 0x1e, 0xfe, 0x23, 0x6c}}}, +{{{0xd8, 0x2b, 0x5b, 0x16, 0xea, 0x20, 0xf1, 0xd3, 0x68, 0x8f, 0xae, 0x5b, 0xd0, 0xa9, 0x1a, 0x19, 0xa8, 0x36, 0xfb, 0x2b, 0x57, 0x88, 0x7d, 0x90, 0xd5, 0xa6, 0xf3, 0xdc, 0x38, 0x89, 0x4e, 0x1f}} , + {{0xcc, 0x19, 0xda, 0x9b, 0x3b, 0x43, 0x48, 0x21, 0x2e, 0x23, 0x4d, 0x3d, 0xae, 0xf8, 0x8c, 0xfc, 0xdd, 0xa6, 0x74, 0x37, 0x65, 0xca, 0xee, 0x1a, 0x19, 0x8e, 0x9f, 0x64, 0x6f, 0x0c, 0x8b, 0x5a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x25, 0xb9, 0xc2, 0xf0, 0x72, 0xb8, 0x15, 0x16, 0xcc, 0x8d, 0x3c, 0x6f, 0x25, 0xed, 0xf4, 0x46, 0x2e, 0x0c, 0x60, 0x0f, 0xe2, 0x84, 0x34, 0x55, 0x89, 0x59, 0x34, 0x1b, 0xf5, 0x8d, 0xfe, 0x08}} , + {{0xf8, 0xab, 0x93, 0xbc, 0x44, 0xba, 0x1b, 0x75, 0x4b, 0x49, 0x6f, 0xd0, 0x54, 0x2e, 0x63, 0xba, 0xb5, 0xea, 0xed, 0x32, 0x14, 0xc9, 0x94, 0xd8, 0xc5, 0xce, 0xf4, 0x10, 0x68, 0xe0, 0x38, 0x27}}}, +{{{0x74, 0x1c, 0x14, 0x9b, 0xd4, 0x64, 0x61, 0x71, 0x5a, 0xb6, 0x21, 0x33, 0x4f, 0xf7, 0x8e, 0xba, 0xa5, 0x48, 0x9a, 0xc7, 0xfa, 0x9a, 0xf0, 0xb4, 0x62, 0xad, 0xf2, 0x5e, 0xcc, 0x03, 0x24, 0x1a}} , + {{0xf5, 0x76, 0xfd, 0xe4, 0xaf, 0xb9, 0x03, 0x59, 0xce, 0x63, 0xd2, 0x3b, 0x1f, 0xcd, 0x21, 0x0c, 0xad, 0x44, 0xa5, 0x97, 0xac, 0x80, 0x11, 0x02, 0x9b, 0x0c, 0xe5, 0x8b, 0xcd, 0xfb, 0x79, 0x77}}}, +{{{0x15, 0xbe, 0x9a, 0x0d, 0xba, 0x38, 0x72, 0x20, 0x8a, 0xf5, 0xbe, 0x59, 0x93, 0x79, 0xb7, 0xf6, 0x6a, 0x0c, 0x38, 0x27, 0x1a, 0x60, 0xf4, 0x86, 0x3b, 0xab, 0x5a, 0x00, 0xa0, 0xce, 0x21, 0x7d}} , + {{0x6c, 0xba, 0x14, 0xc5, 0xea, 0x12, 0x9e, 0x2e, 0x82, 0x63, 0xce, 0x9b, 0x4a, 0xe7, 0x1d, 0xec, 0xf1, 0x2e, 0x51, 0x1c, 0xf4, 0xd0, 0x69, 0x15, 0x42, 0x9d, 0xa3, 0x3f, 0x0e, 0xbf, 0xe9, 0x5c}}}, +{{{0xe4, 0x0d, 0xf4, 0xbd, 0xee, 0x31, 0x10, 0xed, 0xcb, 0x12, 0x86, 0xad, 0xd4, 0x2f, 0x90, 0x37, 0x32, 0xc3, 0x0b, 0x73, 0xec, 0x97, 0x85, 0xa4, 0x01, 0x1c, 0x76, 0x35, 0xfe, 0x75, 0xdd, 0x71}} , + {{0x11, 0xa4, 0x88, 0x9f, 0x3e, 0x53, 0x69, 0x3b, 0x1b, 0xe0, 0xf7, 0xba, 0x9b, 0xad, 0x4e, 0x81, 0x5f, 0xb5, 0x5c, 0xae, 0xbe, 0x67, 0x86, 0x37, 0x34, 0x8e, 0x07, 0x32, 0x45, 0x4a, 0x67, 0x39}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x90, 0x70, 0x58, 0x20, 0x03, 0x1e, 0x67, 0xb2, 0xc8, 0x9b, 0x58, 0xc5, 0xb1, 0xeb, 0x2d, 0x4a, 0xde, 0x82, 0x8c, 0xf2, 0xd2, 0x14, 0xb8, 0x70, 0x61, 0x4e, 0x73, 0xd6, 0x0b, 0x6b, 0x0d, 0x30}} , + {{0x81, 0xfc, 0x55, 0x5c, 0xbf, 0xa7, 0xc4, 0xbd, 0xe2, 0xf0, 0x4b, 0x8f, 0xe9, 0x7d, 0x99, 0xfa, 0xd3, 0xab, 0xbc, 0xc7, 0x83, 0x2b, 0x04, 0x7f, 0x0c, 0x19, 0x43, 0x03, 0x3d, 0x07, 0xca, 0x40}}}, +{{{0xf9, 0xc8, 0xbe, 0x8c, 0x16, 0x81, 0x39, 0x96, 0xf6, 0x17, 0x58, 0xc8, 0x30, 0x58, 0xfb, 0xc2, 0x03, 0x45, 0xd2, 0x52, 0x76, 0xe0, 0x6a, 0x26, 0x28, 0x5c, 0x88, 0x59, 0x6a, 0x5a, 0x54, 0x42}} , + {{0x07, 0xb5, 0x2e, 0x2c, 0x67, 0x15, 0x9b, 0xfb, 0x83, 0x69, 0x1e, 0x0f, 0xda, 0xd6, 0x29, 0xb1, 0x60, 0xe0, 0xb2, 0xba, 0x69, 0xa2, 0x9e, 0xbd, 0xbd, 0xe0, 0x1c, 0xbd, 0xcd, 0x06, 0x64, 0x70}}}, +{{{0x41, 0xfa, 0x8c, 0xe1, 0x89, 0x8f, 0x27, 0xc8, 0x25, 0x8f, 0x6f, 0x5f, 0x55, 0xf8, 0xde, 0x95, 0x6d, 0x2f, 0x75, 0x16, 0x2b, 0x4e, 0x44, 0xfd, 0x86, 0x6e, 0xe9, 0x70, 0x39, 0x76, 0x97, 0x7e}} , + {{0x17, 0x62, 0x6b, 0x14, 0xa1, 0x7c, 0xd0, 0x79, 0x6e, 0xd8, 0x8a, 0xa5, 0x6d, 0x8c, 0x93, 0xd2, 0x3f, 0xec, 0x44, 0x8d, 0x6e, 0x91, 0x01, 0x8c, 0x8f, 0xee, 0x01, 0x8f, 0xc0, 0xb4, 0x85, 0x0e}}}, +{{{0x02, 0x3a, 0x70, 0x41, 0xe4, 0x11, 0x57, 0x23, 0xac, 0xe6, 0xfc, 0x54, 0x7e, 0xcd, 0xd7, 0x22, 0xcb, 0x76, 0x9f, 0x20, 0xce, 0xa0, 0x73, 0x76, 0x51, 0x3b, 0xa4, 0xf8, 0xe3, 0x62, 0x12, 0x6c}} , + {{0x7f, 0x00, 0x9c, 0x26, 0x0d, 0x6f, 0x48, 0x7f, 0x3a, 0x01, 0xed, 0xc5, 0x96, 0xb0, 0x1f, 0x4f, 0xa8, 0x02, 0x62, 0x27, 0x8a, 0x50, 0x8d, 0x9a, 0x8b, 0x52, 0x0f, 0x1e, 0xcf, 0x41, 0x38, 0x19}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xf5, 0x6c, 0xd4, 0x2f, 0x0f, 0x69, 0x0f, 0x87, 0x3f, 0x61, 0x65, 0x1e, 0x35, 0x34, 0x85, 0xba, 0x02, 0x30, 0xac, 0x25, 0x3d, 0xe2, 0x62, 0xf1, 0xcc, 0xe9, 0x1b, 0xc2, 0xef, 0x6a, 0x42, 0x57}} , + {{0x34, 0x1f, 0x2e, 0xac, 0xd1, 0xc7, 0x04, 0x52, 0x32, 0x66, 0xb2, 0x33, 0x73, 0x21, 0x34, 0x54, 0xf7, 0x71, 0xed, 0x06, 0xb0, 0xff, 0xa6, 0x59, 0x6f, 0x8a, 0x4e, 0xfb, 0x02, 0xb0, 0x45, 0x6b}}}, +{{{0xf5, 0x48, 0x0b, 0x03, 0xc5, 0x22, 0x7d, 0x80, 0x08, 0x53, 0xfe, 0x32, 0xb1, 0xa1, 0x8a, 0x74, 0x6f, 0xbd, 0x3f, 0x85, 0xf4, 0xcf, 0xf5, 0x60, 0xaf, 0x41, 0x7e, 0x3e, 0x46, 0xa3, 0x5a, 0x20}} , + {{0xaa, 0x35, 0x87, 0x44, 0x63, 0x66, 0x97, 0xf8, 0x6e, 0x55, 0x0c, 0x04, 0x3e, 0x35, 0x50, 0xbf, 0x93, 0x69, 0xd2, 0x8b, 0x05, 0x55, 0x99, 0xbe, 0xe2, 0x53, 0x61, 0xec, 0xe8, 0x08, 0x0b, 0x32}}}, +{{{0xb3, 0x10, 0x45, 0x02, 0x69, 0x59, 0x2e, 0x97, 0xd9, 0x64, 0xf8, 0xdb, 0x25, 0x80, 0xdc, 0xc4, 0xd5, 0x62, 0x3c, 0xed, 0x65, 0x91, 0xad, 0xd1, 0x57, 0x81, 0x94, 0xaa, 0xa1, 0x29, 0xfc, 0x68}} , + {{0xdd, 0xb5, 0x7d, 0xab, 0x5a, 0x21, 0x41, 0x53, 0xbb, 0x17, 0x79, 0x0d, 0xd1, 0xa8, 0x0c, 0x0c, 0x20, 0x88, 0x09, 0xe9, 0x84, 0xe8, 0x25, 0x11, 0x67, 0x7a, 0x8b, 0x1a, 0xe4, 0x5d, 0xe1, 0x5d}}}, +{{{0x37, 0xea, 0xfe, 0x65, 0x3b, 0x25, 0xe8, 0xe1, 0xc2, 0xc5, 0x02, 0xa4, 0xbe, 0x98, 0x0a, 0x2b, 0x61, 0xc1, 0x9b, 0xe2, 0xd5, 0x92, 0xe6, 0x9e, 0x7d, 0x1f, 0xca, 0x43, 0x88, 0x8b, 0x2c, 0x59}} , + {{0xe0, 0xb5, 0x00, 0x1d, 0x2a, 0x6f, 0xaf, 0x79, 0x86, 0x2f, 0xa6, 0x5a, 0x93, 0xd1, 0xfe, 0xae, 0x3a, 0xee, 0xdb, 0x7c, 0x61, 0xbe, 0x7c, 0x01, 0xf9, 0xfe, 0x52, 0xdc, 0xd8, 0x52, 0xa3, 0x42}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x22, 0xaf, 0x13, 0x37, 0xbd, 0x37, 0x71, 0xac, 0x04, 0x46, 0x63, 0xac, 0xa4, 0x77, 0xed, 0x25, 0x38, 0xe0, 0x15, 0xa8, 0x64, 0x00, 0x0d, 0xce, 0x51, 0x01, 0xa9, 0xbc, 0x0f, 0x03, 0x1c, 0x04}} , + {{0x89, 0xf9, 0x80, 0x07, 0xcf, 0x3f, 0xb3, 0xe9, 0xe7, 0x45, 0x44, 0x3d, 0x2a, 0x7c, 0xe9, 0xe4, 0x16, 0x5c, 0x5e, 0x65, 0x1c, 0xc7, 0x7d, 0xc6, 0x7a, 0xfb, 0x43, 0xee, 0x25, 0x76, 0x46, 0x72}}}, +{{{0x02, 0xa2, 0xed, 0xf4, 0x8f, 0x6b, 0x0b, 0x3e, 0xeb, 0x35, 0x1a, 0xd5, 0x7e, 0xdb, 0x78, 0x00, 0x96, 0x8a, 0xa0, 0xb4, 0xcf, 0x60, 0x4b, 0xd4, 0xd5, 0xf9, 0x2d, 0xbf, 0x88, 0xbd, 0x22, 0x62}} , + {{0x13, 0x53, 0xe4, 0x82, 0x57, 0xfa, 0x1e, 0x8f, 0x06, 0x2b, 0x90, 0xba, 0x08, 0xb6, 0x10, 0x54, 0x4f, 0x7c, 0x1b, 0x26, 0xed, 0xda, 0x6b, 0xdd, 0x25, 0xd0, 0x4e, 0xea, 0x42, 0xbb, 0x25, 0x03}}}, +{{{0x51, 0x16, 0x50, 0x7c, 0xd5, 0x5d, 0xf6, 0x99, 0xe8, 0x77, 0x72, 0x4e, 0xfa, 0x62, 0xcb, 0x76, 0x75, 0x0c, 0xe2, 0x71, 0x98, 0x92, 0xd5, 0xfa, 0x45, 0xdf, 0x5c, 0x6f, 0x1e, 0x9e, 0x28, 0x69}} , + {{0x0d, 0xac, 0x66, 0x6d, 0xc3, 0x8b, 0xba, 0x16, 0xb5, 0xe2, 0xa0, 0x0d, 0x0c, 0xbd, 0xa4, 0x8e, 0x18, 0x6c, 0xf2, 0xdc, 0xf9, 0xdc, 0x4a, 0x86, 0x25, 0x95, 0x14, 0xcb, 0xd8, 0x1a, 0x04, 0x0f}}}, +{{{0x97, 0xa5, 0xdb, 0x8b, 0x2d, 0xaa, 0x42, 0x11, 0x09, 0xf2, 0x93, 0xbb, 0xd9, 0x06, 0x84, 0x4e, 0x11, 0xa8, 0xa0, 0x25, 0x2b, 0xa6, 0x5f, 0xae, 0xc4, 0xb4, 0x4c, 0xc8, 0xab, 0xc7, 0x3b, 0x02}} , + {{0xee, 0xc9, 0x29, 0x0f, 0xdf, 0x11, 0x85, 0xed, 0xce, 0x0d, 0x62, 0x2c, 0x8f, 0x4b, 0xf9, 0x04, 0xe9, 0x06, 0x72, 0x1d, 0x37, 0x20, 0x50, 0xc9, 0x14, 0xeb, 0xec, 0x39, 0xa7, 0x97, 0x2b, 0x4d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x69, 0xd1, 0x39, 0xbd, 0xfb, 0x33, 0xbe, 0xc4, 0xf0, 0x5c, 0xef, 0xf0, 0x56, 0x68, 0xfc, 0x97, 0x47, 0xc8, 0x72, 0xb6, 0x53, 0xa4, 0x0a, 0x98, 0xa5, 0xb4, 0x37, 0x71, 0xcf, 0x66, 0x50, 0x6d}} , + {{0x17, 0xa4, 0x19, 0x52, 0x11, 0x47, 0xb3, 0x5c, 0x5b, 0xa9, 0x2e, 0x22, 0xb4, 0x00, 0x52, 0xf9, 0x57, 0x18, 0xb8, 0xbe, 0x5a, 0xe3, 0xab, 0x83, 0xc8, 0x87, 0x0a, 0x2a, 0xd8, 0x8c, 0xbb, 0x54}}}, +{{{0xa9, 0x62, 0x93, 0x85, 0xbe, 0xe8, 0x73, 0x4a, 0x0e, 0xb0, 0xb5, 0x2d, 0x94, 0x50, 0xaa, 0xd3, 0xb2, 0xea, 0x9d, 0x62, 0x76, 0x3b, 0x07, 0x34, 0x4e, 0x2d, 0x70, 0xc8, 0x9a, 0x15, 0x66, 0x6b}} , + {{0xc5, 0x96, 0xca, 0xc8, 0x22, 0x1a, 0xee, 0x5f, 0xe7, 0x31, 0x60, 0x22, 0x83, 0x08, 0x63, 0xce, 0xb9, 0x32, 0x44, 0x58, 0x5d, 0x3a, 0x9b, 0xe4, 0x04, 0xd5, 0xef, 0x38, 0xef, 0x4b, 0xdd, 0x19}}}, +{{{0x4d, 0xc2, 0x17, 0x75, 0xa1, 0x68, 0xcd, 0xc3, 0xc6, 0x03, 0x44, 0xe3, 0x78, 0x09, 0x91, 0x47, 0x3f, 0x0f, 0xe4, 0x92, 0x58, 0xfa, 0x7d, 0x1f, 0x20, 0x94, 0x58, 0x5e, 0xbc, 0x19, 0x02, 0x6f}} , + {{0x20, 0xd6, 0xd8, 0x91, 0x54, 0xa7, 0xf3, 0x20, 0x4b, 0x34, 0x06, 0xfa, 0x30, 0xc8, 0x6f, 0x14, 0x10, 0x65, 0x74, 0x13, 0x4e, 0xf0, 0x69, 0x26, 0xce, 0xcf, 0x90, 0xf4, 0xd0, 0xc5, 0xc8, 0x64}}}, +{{{0x26, 0xa2, 0x50, 0x02, 0x24, 0x72, 0xf1, 0xf0, 0x4e, 0x2d, 0x93, 0xd5, 0x08, 0xe7, 0xae, 0x38, 0xf7, 0x18, 0xa5, 0x32, 0x34, 0xc2, 0xf0, 0xa6, 0xec, 0xb9, 0x61, 0x7b, 0x64, 0x99, 0xac, 0x71}} , + {{0x25, 0xcf, 0x74, 0x55, 0x1b, 0xaa, 0xa9, 0x38, 0x41, 0x40, 0xd5, 0x95, 0x95, 0xab, 0x1c, 0x5e, 0xbc, 0x41, 0x7e, 0x14, 0x30, 0xbe, 0x13, 0x89, 0xf4, 0xe5, 0xeb, 0x28, 0xc0, 0xc2, 0x96, 0x3a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x2b, 0x77, 0x45, 0xec, 0x67, 0x76, 0x32, 0x4c, 0xb9, 0xdf, 0x25, 0x32, 0x6b, 0xcb, 0xe7, 0x14, 0x61, 0x43, 0xee, 0xba, 0x9b, 0x71, 0xef, 0xd2, 0x48, 0x65, 0xbb, 0x1b, 0x8a, 0x13, 0x1b, 0x22}} , + {{0x84, 0xad, 0x0c, 0x18, 0x38, 0x5a, 0xba, 0xd0, 0x98, 0x59, 0xbf, 0x37, 0xb0, 0x4f, 0x97, 0x60, 0x20, 0xb3, 0x9b, 0x97, 0xf6, 0x08, 0x6c, 0xa4, 0xff, 0xfb, 0xb7, 0xfa, 0x95, 0xb2, 0x51, 0x79}}}, +{{{0x28, 0x5c, 0x3f, 0xdb, 0x6b, 0x18, 0x3b, 0x5c, 0xd1, 0x04, 0x28, 0xde, 0x85, 0x52, 0x31, 0xb5, 0xbb, 0xf6, 0xa9, 0xed, 0xbe, 0x28, 0x4f, 0xb3, 0x7e, 0x05, 0x6a, 0xdb, 0x95, 0x0d, 0x1b, 0x1c}} , + {{0xd5, 0xc5, 0xc3, 0x9a, 0x0a, 0xd0, 0x31, 0x3e, 0x07, 0x36, 0x8e, 0xc0, 0x8a, 0x62, 0xb1, 0xca, 0xd6, 0x0e, 0x1e, 0x9d, 0xef, 0xab, 0x98, 0x4d, 0xbb, 0x6c, 0x05, 0xe0, 0xe4, 0x5d, 0xbd, 0x57}}}, +{{{0xcc, 0x21, 0x27, 0xce, 0xfd, 0xa9, 0x94, 0x8e, 0xe1, 0xab, 0x49, 0xe0, 0x46, 0x26, 0xa1, 0xa8, 0x8c, 0xa1, 0x99, 0x1d, 0xb4, 0x27, 0x6d, 0x2d, 0xc8, 0x39, 0x30, 0x5e, 0x37, 0x52, 0xc4, 0x6e}} , + {{0xa9, 0x85, 0xf4, 0xe7, 0xb0, 0x15, 0x33, 0x84, 0x1b, 0x14, 0x1a, 0x02, 0xd9, 0x3b, 0xad, 0x0f, 0x43, 0x6c, 0xea, 0x3e, 0x0f, 0x7e, 0xda, 0xdd, 0x6b, 0x4c, 0x7f, 0x6e, 0xd4, 0x6b, 0xbf, 0x0f}}}, +{{{0x47, 0x9f, 0x7c, 0x56, 0x7c, 0x43, 0x91, 0x1c, 0xbb, 0x4e, 0x72, 0x3e, 0x64, 0xab, 0xa0, 0xa0, 0xdf, 0xb4, 0xd8, 0x87, 0x3a, 0xbd, 0xa8, 0x48, 0xc9, 0xb8, 0xef, 0x2e, 0xad, 0x6f, 0x84, 0x4f}} , + {{0x2d, 0x2d, 0xf0, 0x1b, 0x7e, 0x2a, 0x6c, 0xf8, 0xa9, 0x6a, 0xe1, 0xf0, 0x99, 0xa1, 0x67, 0x9a, 0xd4, 0x13, 0xca, 0xca, 0xba, 0x27, 0x92, 0xaa, 0xa1, 0x5d, 0x50, 0xde, 0xcc, 0x40, 0x26, 0x0a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x9f, 0x3e, 0xf2, 0xb2, 0x90, 0xce, 0xdb, 0x64, 0x3e, 0x03, 0xdd, 0x37, 0x36, 0x54, 0x70, 0x76, 0x24, 0xb5, 0x69, 0x03, 0xfc, 0xa0, 0x2b, 0x74, 0xb2, 0x05, 0x0e, 0xcc, 0xd8, 0x1f, 0x6a, 0x1f}} , + {{0x19, 0x5e, 0x60, 0x69, 0x58, 0x86, 0xa0, 0x31, 0xbd, 0x32, 0xe9, 0x2c, 0x5c, 0xd2, 0x85, 0xba, 0x40, 0x64, 0xa8, 0x74, 0xf8, 0x0e, 0x1c, 0xb3, 0xa9, 0x69, 0xe8, 0x1e, 0x40, 0x64, 0x99, 0x77}}}, +{{{0x6c, 0x32, 0x4f, 0xfd, 0xbb, 0x5c, 0xbb, 0x8d, 0x64, 0x66, 0x4a, 0x71, 0x1f, 0x79, 0xa3, 0xad, 0x8d, 0xf9, 0xd4, 0xec, 0xcf, 0x67, 0x70, 0xfa, 0x05, 0x4a, 0x0f, 0x6e, 0xaf, 0x87, 0x0a, 0x6f}} , + {{0xc6, 0x36, 0x6e, 0x6c, 0x8c, 0x24, 0x09, 0x60, 0xbe, 0x26, 0xd2, 0x4c, 0x5e, 0x17, 0xca, 0x5f, 0x1d, 0xcc, 0x87, 0xe8, 0x42, 0x6a, 0xcb, 0xcb, 0x7d, 0x92, 0x05, 0x35, 0x81, 0x13, 0x60, 0x6b}}}, +{{{0xf4, 0x15, 0xcd, 0x0f, 0x0a, 0xaf, 0x4e, 0x6b, 0x51, 0xfd, 0x14, 0xc4, 0x2e, 0x13, 0x86, 0x74, 0x44, 0xcb, 0x66, 0x6b, 0xb6, 0x9d, 0x74, 0x56, 0x32, 0xac, 0x8d, 0x8e, 0x8c, 0x8c, 0x8c, 0x39}} , + {{0xca, 0x59, 0x74, 0x1a, 0x11, 0xef, 0x6d, 0xf7, 0x39, 0x5c, 0x3b, 0x1f, 0xfa, 0xe3, 0x40, 0x41, 0x23, 0x9e, 0xf6, 0xd1, 0x21, 0xa2, 0xbf, 0xad, 0x65, 0x42, 0x6b, 0x59, 0x8a, 0xe8, 0xc5, 0x7f}}}, +{{{0x64, 0x05, 0x7a, 0x84, 0x4a, 0x13, 0xc3, 0xf6, 0xb0, 0x6e, 0x9a, 0x6b, 0x53, 0x6b, 0x32, 0xda, 0xd9, 0x74, 0x75, 0xc4, 0xba, 0x64, 0x3d, 0x3b, 0x08, 0xdd, 0x10, 0x46, 0xef, 0xc7, 0x90, 0x1f}} , + {{0x7b, 0x2f, 0x3a, 0xce, 0xc8, 0xa1, 0x79, 0x3c, 0x30, 0x12, 0x44, 0x28, 0xf6, 0xbc, 0xff, 0xfd, 0xf4, 0xc0, 0x97, 0xb0, 0xcc, 0xc3, 0x13, 0x7a, 0xb9, 0x9a, 0x16, 0xe4, 0xcb, 0x4c, 0x34, 0x63}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x07, 0x4e, 0xd3, 0x2d, 0x09, 0x33, 0x0e, 0xd2, 0x0d, 0xbe, 0x3e, 0xe7, 0xe4, 0xaa, 0xb7, 0x00, 0x8b, 0xe8, 0xad, 0xaa, 0x7a, 0x8d, 0x34, 0x28, 0xa9, 0x81, 0x94, 0xc5, 0xe7, 0x42, 0xac, 0x47}} , + {{0x24, 0x89, 0x7a, 0x8f, 0xb5, 0x9b, 0xf0, 0xc2, 0x03, 0x64, 0xd0, 0x1e, 0xf5, 0xa4, 0xb2, 0xf3, 0x74, 0xe9, 0x1a, 0x16, 0xfd, 0xcb, 0x15, 0xea, 0xeb, 0x10, 0x6c, 0x35, 0xd1, 0xc1, 0xa6, 0x28}}}, +{{{0xcc, 0xd5, 0x39, 0xfc, 0xa5, 0xa4, 0xad, 0x32, 0x15, 0xce, 0x19, 0xe8, 0x34, 0x2b, 0x1c, 0x60, 0x91, 0xfc, 0x05, 0xa9, 0xb3, 0xdc, 0x80, 0x29, 0xc4, 0x20, 0x79, 0x06, 0x39, 0xc0, 0xe2, 0x22}} , + {{0xbb, 0xa8, 0xe1, 0x89, 0x70, 0x57, 0x18, 0x54, 0x3c, 0xf6, 0x0d, 0x82, 0x12, 0x05, 0x87, 0x96, 0x06, 0x39, 0xe3, 0xf8, 0xb3, 0x95, 0xe5, 0xd7, 0x26, 0xbf, 0x09, 0x5a, 0x94, 0xf9, 0x1c, 0x63}}}, +{{{0x2b, 0x8c, 0x2d, 0x9a, 0x8b, 0x84, 0xf2, 0x56, 0xfb, 0xad, 0x2e, 0x7f, 0xb7, 0xfc, 0x30, 0xe1, 0x35, 0x89, 0xba, 0x4d, 0xa8, 0x6d, 0xce, 0x8c, 0x8b, 0x30, 0xe0, 0xda, 0x29, 0x18, 0x11, 0x17}} , + {{0x19, 0xa6, 0x5a, 0x65, 0x93, 0xc3, 0xb5, 0x31, 0x22, 0x4f, 0xf3, 0xf6, 0x0f, 0xeb, 0x28, 0xc3, 0x7c, 0xeb, 0xce, 0x86, 0xec, 0x67, 0x76, 0x6e, 0x35, 0x45, 0x7b, 0xd8, 0x6b, 0x92, 0x01, 0x65}}}, +{{{0x3d, 0xd5, 0x9a, 0x64, 0x73, 0x36, 0xb1, 0xd6, 0x86, 0x98, 0x42, 0x3f, 0x8a, 0xf1, 0xc7, 0xf5, 0x42, 0xa8, 0x9c, 0x52, 0xa8, 0xdc, 0xf9, 0x24, 0x3f, 0x4a, 0xa1, 0xa4, 0x5b, 0xe8, 0x62, 0x1a}} , + {{0xc5, 0xbd, 0xc8, 0x14, 0xd5, 0x0d, 0xeb, 0xe1, 0xa5, 0xe6, 0x83, 0x11, 0x09, 0x00, 0x1d, 0x55, 0x83, 0x51, 0x7e, 0x75, 0x00, 0x81, 0xb9, 0xcb, 0xd8, 0xc5, 0xe5, 0xa1, 0xd9, 0x17, 0x6d, 0x1f}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xea, 0xf9, 0xe4, 0xe9, 0xe1, 0x52, 0x3f, 0x51, 0x19, 0x0d, 0xdd, 0xd9, 0x9d, 0x93, 0x31, 0x87, 0x23, 0x09, 0xd5, 0x83, 0xeb, 0x92, 0x09, 0x76, 0x6e, 0xe3, 0xf8, 0xc0, 0xa2, 0x66, 0xb5, 0x36}} , + {{0x3a, 0xbb, 0x39, 0xed, 0x32, 0x02, 0xe7, 0x43, 0x7a, 0x38, 0x14, 0x84, 0xe3, 0x44, 0xd2, 0x5e, 0x94, 0xdd, 0x78, 0x89, 0x55, 0x4c, 0x73, 0x9e, 0xe1, 0xe4, 0x3e, 0x43, 0xd0, 0x4a, 0xde, 0x1b}}}, +{{{0xb2, 0xe7, 0x8f, 0xe3, 0xa3, 0xc5, 0xcb, 0x72, 0xee, 0x79, 0x41, 0xf8, 0xdf, 0xee, 0x65, 0xc5, 0x45, 0x77, 0x27, 0x3c, 0xbd, 0x58, 0xd3, 0x75, 0xe2, 0x04, 0x4b, 0xbb, 0x65, 0xf3, 0xc8, 0x0f}} , + {{0x24, 0x7b, 0x93, 0x34, 0xb5, 0xe2, 0x74, 0x48, 0xcd, 0xa0, 0x0b, 0x92, 0x97, 0x66, 0x39, 0xf4, 0xb0, 0xe2, 0x5d, 0x39, 0x6a, 0x5b, 0x45, 0x17, 0x78, 0x1e, 0xdb, 0x91, 0x81, 0x1c, 0xf9, 0x16}}}, +{{{0x16, 0xdf, 0xd1, 0x5a, 0xd5, 0xe9, 0x4e, 0x58, 0x95, 0x93, 0x5f, 0x51, 0x09, 0xc3, 0x2a, 0xc9, 0xd4, 0x55, 0x48, 0x79, 0xa4, 0xa3, 0xb2, 0xc3, 0x62, 0xaa, 0x8c, 0xe8, 0xad, 0x47, 0x39, 0x1b}} , + {{0x46, 0xda, 0x9e, 0x51, 0x3a, 0xe6, 0xd1, 0xa6, 0xbb, 0x4d, 0x7b, 0x08, 0xbe, 0x8c, 0xd5, 0xf3, 0x3f, 0xfd, 0xf7, 0x44, 0x80, 0x2d, 0x53, 0x4b, 0xd0, 0x87, 0x68, 0xc1, 0xb5, 0xd8, 0xf7, 0x07}}}, +{{{0xf4, 0x10, 0x46, 0xbe, 0xb7, 0xd2, 0xd1, 0xce, 0x5e, 0x76, 0xa2, 0xd7, 0x03, 0xdc, 0xe4, 0x81, 0x5a, 0xf6, 0x3c, 0xde, 0xae, 0x7a, 0x9d, 0x21, 0x34, 0xa5, 0xf6, 0xa9, 0x73, 0xe2, 0x8d, 0x60}} , + {{0xfa, 0x44, 0x71, 0xf6, 0x41, 0xd8, 0xc6, 0x58, 0x13, 0x37, 0xeb, 0x84, 0x0f, 0x96, 0xc7, 0xdc, 0xc8, 0xa9, 0x7a, 0x83, 0xb2, 0x2f, 0x31, 0xb1, 0x1a, 0xd8, 0x98, 0x3f, 0x11, 0xd0, 0x31, 0x3b}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x81, 0xd5, 0x34, 0x16, 0x01, 0xa3, 0x93, 0xea, 0x52, 0x94, 0xec, 0x93, 0xb7, 0x81, 0x11, 0x2d, 0x58, 0xf9, 0xb5, 0x0a, 0xaa, 0x4f, 0xf6, 0x2e, 0x3f, 0x36, 0xbf, 0x33, 0x5a, 0xe7, 0xd1, 0x08}} , + {{0x1a, 0xcf, 0x42, 0xae, 0xcc, 0xb5, 0x77, 0x39, 0xc4, 0x5b, 0x5b, 0xd0, 0x26, 0x59, 0x27, 0xd0, 0x55, 0x71, 0x12, 0x9d, 0x88, 0x3d, 0x9c, 0xea, 0x41, 0x6a, 0xf0, 0x50, 0x93, 0x93, 0xdd, 0x47}}}, +{{{0x6f, 0xc9, 0x51, 0x6d, 0x1c, 0xaa, 0xf5, 0xa5, 0x90, 0x3f, 0x14, 0xe2, 0x6e, 0x8e, 0x64, 0xfd, 0xac, 0xe0, 0x4e, 0x22, 0xe5, 0xc1, 0xbc, 0x29, 0x0a, 0x6a, 0x9e, 0xa1, 0x60, 0xcb, 0x2f, 0x0b}} , + {{0xdc, 0x39, 0x32, 0xf3, 0xa1, 0x44, 0xe9, 0xc5, 0xc3, 0x78, 0xfb, 0x95, 0x47, 0x34, 0x35, 0x34, 0xe8, 0x25, 0xde, 0x93, 0xc6, 0xb4, 0x76, 0x6d, 0x86, 0x13, 0xc6, 0xe9, 0x68, 0xb5, 0x01, 0x63}}}, +{{{0x1f, 0x9a, 0x52, 0x64, 0x97, 0xd9, 0x1c, 0x08, 0x51, 0x6f, 0x26, 0x9d, 0xaa, 0x93, 0x33, 0x43, 0xfa, 0x77, 0xe9, 0x62, 0x9b, 0x5d, 0x18, 0x75, 0xeb, 0x78, 0xf7, 0x87, 0x8f, 0x41, 0xb4, 0x4d}} , + {{0x13, 0xa8, 0x82, 0x3e, 0xe9, 0x13, 0xad, 0xeb, 0x01, 0xca, 0xcf, 0xda, 0xcd, 0xf7, 0x6c, 0xc7, 0x7a, 0xdc, 0x1e, 0x6e, 0xc8, 0x4e, 0x55, 0x62, 0x80, 0xea, 0x78, 0x0c, 0x86, 0xb9, 0x40, 0x51}}}, +{{{0x27, 0xae, 0xd3, 0x0d, 0x4c, 0x8f, 0x34, 0xea, 0x7d, 0x3c, 0xe5, 0x8a, 0xcf, 0x5b, 0x92, 0xd8, 0x30, 0x16, 0xb4, 0xa3, 0x75, 0xff, 0xeb, 0x27, 0xc8, 0x5c, 0x6c, 0xc2, 0xee, 0x6c, 0x21, 0x0b}} , + {{0xc3, 0xba, 0x12, 0x53, 0x2a, 0xaa, 0x77, 0xad, 0x19, 0x78, 0x55, 0x8a, 0x2e, 0x60, 0x87, 0xc2, 0x6e, 0x91, 0x38, 0x91, 0x3f, 0x7a, 0xc5, 0x24, 0x8f, 0x51, 0xc5, 0xde, 0xb0, 0x53, 0x30, 0x56}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x02, 0xfe, 0x54, 0x12, 0x18, 0xca, 0x7d, 0xa5, 0x68, 0x43, 0xa3, 0x6d, 0x14, 0x2a, 0x6a, 0xa5, 0x8e, 0x32, 0xe7, 0x63, 0x4f, 0xe3, 0xc6, 0x44, 0x3e, 0xab, 0x63, 0xca, 0x17, 0x86, 0x74, 0x3f}} , + {{0x1e, 0x64, 0xc1, 0x7d, 0x52, 0xdc, 0x13, 0x5a, 0xa1, 0x9c, 0x4e, 0xee, 0x99, 0x28, 0xbb, 0x4c, 0xee, 0xac, 0xa9, 0x1b, 0x89, 0xa2, 0x38, 0x39, 0x7b, 0xc4, 0x0f, 0x42, 0xe6, 0x89, 0xed, 0x0f}}}, +{{{0xf3, 0x3c, 0x8c, 0x80, 0x83, 0x10, 0x8a, 0x37, 0x50, 0x9c, 0xb4, 0xdf, 0x3f, 0x8c, 0xf7, 0x23, 0x07, 0xd6, 0xff, 0xa0, 0x82, 0x6c, 0x75, 0x3b, 0xe4, 0xb5, 0xbb, 0xe4, 0xe6, 0x50, 0xf0, 0x08}} , + {{0x62, 0xee, 0x75, 0x48, 0x92, 0x33, 0xf2, 0xf4, 0xad, 0x15, 0x7a, 0xa1, 0x01, 0x46, 0xa9, 0x32, 0x06, 0x88, 0xb6, 0x36, 0x47, 0x35, 0xb9, 0xb4, 0x42, 0x85, 0x76, 0xf0, 0x48, 0x00, 0x90, 0x38}}}, +{{{0x51, 0x15, 0x9d, 0xc3, 0x95, 0xd1, 0x39, 0xbb, 0x64, 0x9d, 0x15, 0x81, 0xc1, 0x68, 0xd0, 0xb6, 0xa4, 0x2c, 0x7d, 0x5e, 0x02, 0x39, 0x00, 0xe0, 0x3b, 0xa4, 0xcc, 0xca, 0x1d, 0x81, 0x24, 0x10}} , + {{0xe7, 0x29, 0xf9, 0x37, 0xd9, 0x46, 0x5a, 0xcd, 0x70, 0xfe, 0x4d, 0x5b, 0xbf, 0xa5, 0xcf, 0x91, 0xf4, 0xef, 0xee, 0x8a, 0x29, 0xd0, 0xe7, 0xc4, 0x25, 0x92, 0x8a, 0xff, 0x36, 0xfc, 0xe4, 0x49}}}, +{{{0xbd, 0x00, 0xb9, 0x04, 0x7d, 0x35, 0xfc, 0xeb, 0xd0, 0x0b, 0x05, 0x32, 0x52, 0x7a, 0x89, 0x24, 0x75, 0x50, 0xe1, 0x63, 0x02, 0x82, 0x8e, 0xe7, 0x85, 0x0c, 0xf2, 0x56, 0x44, 0x37, 0x83, 0x25}} , + {{0x8f, 0xa1, 0xce, 0xcb, 0x60, 0xda, 0x12, 0x02, 0x1e, 0x29, 0x39, 0x2a, 0x03, 0xb7, 0xeb, 0x77, 0x40, 0xea, 0xc9, 0x2b, 0x2c, 0xd5, 0x7d, 0x7e, 0x2c, 0xc7, 0x5a, 0xfd, 0xff, 0xc4, 0xd1, 0x62}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x1d, 0x88, 0x98, 0x5b, 0x4e, 0xfc, 0x41, 0x24, 0x05, 0xe6, 0x50, 0x2b, 0xae, 0x96, 0x51, 0xd9, 0x6b, 0x72, 0xb2, 0x33, 0x42, 0x98, 0x68, 0xbb, 0x10, 0x5a, 0x7a, 0x8c, 0x9d, 0x07, 0xb4, 0x05}} , + {{0x2f, 0x61, 0x9f, 0xd7, 0xa8, 0x3f, 0x83, 0x8c, 0x10, 0x69, 0x90, 0xe6, 0xcf, 0xd2, 0x63, 0xa3, 0xe4, 0x54, 0x7e, 0xe5, 0x69, 0x13, 0x1c, 0x90, 0x57, 0xaa, 0xe9, 0x53, 0x22, 0x43, 0x29, 0x23}}}, +{{{0xe5, 0x1c, 0xf8, 0x0a, 0xfd, 0x2d, 0x7e, 0xf5, 0xf5, 0x70, 0x7d, 0x41, 0x6b, 0x11, 0xfe, 0xbe, 0x99, 0xd1, 0x55, 0x29, 0x31, 0xbf, 0xc0, 0x97, 0x6c, 0xd5, 0x35, 0xcc, 0x5e, 0x8b, 0xd9, 0x69}} , + {{0x8e, 0x4e, 0x9f, 0x25, 0xf8, 0x81, 0x54, 0x2d, 0x0e, 0xd5, 0x54, 0x81, 0x9b, 0xa6, 0x92, 0xce, 0x4b, 0xe9, 0x8f, 0x24, 0x3b, 0xca, 0xe0, 0x44, 0xab, 0x36, 0xfe, 0xfb, 0x87, 0xd4, 0x26, 0x3e}}}, +{{{0x0f, 0x93, 0x9c, 0x11, 0xe7, 0xdb, 0xf1, 0xf0, 0x85, 0x43, 0x28, 0x15, 0x37, 0xdd, 0xde, 0x27, 0xdf, 0xad, 0x3e, 0x49, 0x4f, 0xe0, 0x5b, 0xf6, 0x80, 0x59, 0x15, 0x3c, 0x85, 0xb7, 0x3e, 0x12}} , + {{0xf5, 0xff, 0xcc, 0xf0, 0xb4, 0x12, 0x03, 0x5f, 0xc9, 0x84, 0xcb, 0x1d, 0x17, 0xe0, 0xbc, 0xcc, 0x03, 0x62, 0xa9, 0x8b, 0x94, 0xa6, 0xaa, 0x18, 0xcb, 0x27, 0x8d, 0x49, 0xa6, 0x17, 0x15, 0x07}}}, +{{{0xd9, 0xb6, 0xd4, 0x9d, 0xd4, 0x6a, 0xaf, 0x70, 0x07, 0x2c, 0x10, 0x9e, 0xbd, 0x11, 0xad, 0xe4, 0x26, 0x33, 0x70, 0x92, 0x78, 0x1c, 0x74, 0x9f, 0x75, 0x60, 0x56, 0xf4, 0x39, 0xa8, 0xa8, 0x62}} , + {{0x3b, 0xbf, 0x55, 0x35, 0x61, 0x8b, 0x44, 0x97, 0xe8, 0x3a, 0x55, 0xc1, 0xc8, 0x3b, 0xfd, 0x95, 0x29, 0x11, 0x60, 0x96, 0x1e, 0xcb, 0x11, 0x9d, 0xc2, 0x03, 0x8a, 0x1b, 0xc6, 0xd6, 0x45, 0x3d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x7e, 0x0e, 0x50, 0xb2, 0xcc, 0x0d, 0x6b, 0xa6, 0x71, 0x5b, 0x42, 0xed, 0xbd, 0xaf, 0xac, 0xf0, 0xfc, 0x12, 0xa2, 0x3f, 0x4e, 0xda, 0xe8, 0x11, 0xf3, 0x23, 0xe1, 0x04, 0x62, 0x03, 0x1c, 0x4e}} , + {{0xc8, 0xb1, 0x1b, 0x6f, 0x73, 0x61, 0x3d, 0x27, 0x0d, 0x7d, 0x7a, 0x25, 0x5f, 0x73, 0x0e, 0x2f, 0x93, 0xf6, 0x24, 0xd8, 0x4f, 0x90, 0xac, 0xa2, 0x62, 0x0a, 0xf0, 0x61, 0xd9, 0x08, 0x59, 0x6a}}}, +{{{0x6f, 0x2d, 0x55, 0xf8, 0x2f, 0x8e, 0xf0, 0x18, 0x3b, 0xea, 0xdd, 0x26, 0x72, 0xd1, 0xf5, 0xfe, 0xe5, 0xb8, 0xe6, 0xd3, 0x10, 0x48, 0x46, 0x49, 0x3a, 0x9f, 0x5e, 0x45, 0x6b, 0x90, 0xe8, 0x7f}} , + {{0xd3, 0x76, 0x69, 0x33, 0x7b, 0xb9, 0x40, 0x70, 0xee, 0xa6, 0x29, 0x6b, 0xdd, 0xd0, 0x5d, 0x8d, 0xc1, 0x3e, 0x4a, 0xea, 0x37, 0xb1, 0x03, 0x02, 0x03, 0x35, 0xf1, 0x28, 0x9d, 0xff, 0x00, 0x13}}}, +{{{0x7a, 0xdb, 0x12, 0xd2, 0x8a, 0x82, 0x03, 0x1b, 0x1e, 0xaf, 0xf9, 0x4b, 0x9c, 0xbe, 0xae, 0x7c, 0xe4, 0x94, 0x2a, 0x23, 0xb3, 0x62, 0x86, 0xe7, 0xfd, 0x23, 0xaa, 0x99, 0xbd, 0x2b, 0x11, 0x6c}} , + {{0x8d, 0xa6, 0xd5, 0xac, 0x9d, 0xcc, 0x68, 0x75, 0x7f, 0xc3, 0x4d, 0x4b, 0xdd, 0x6c, 0xbb, 0x11, 0x5a, 0x60, 0xe5, 0xbd, 0x7d, 0x27, 0x8b, 0xda, 0xb4, 0x95, 0xf6, 0x03, 0x27, 0xa4, 0x92, 0x3f}}}, +{{{0x22, 0xd6, 0xb5, 0x17, 0x84, 0xbf, 0x12, 0xcc, 0x23, 0x14, 0x4a, 0xdf, 0x14, 0x31, 0xbc, 0xa1, 0xac, 0x6e, 0xab, 0xfa, 0x57, 0x11, 0x53, 0xb3, 0x27, 0xe6, 0xf9, 0x47, 0x33, 0x44, 0x34, 0x1e}} , + {{0x79, 0xfc, 0xa6, 0xb4, 0x0b, 0x35, 0x20, 0xc9, 0x4d, 0x22, 0x84, 0xc4, 0xa9, 0x20, 0xec, 0x89, 0x94, 0xba, 0x66, 0x56, 0x48, 0xb9, 0x87, 0x7f, 0xca, 0x1e, 0x06, 0xed, 0xa5, 0x55, 0x59, 0x29}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x56, 0xe1, 0xf5, 0xf1, 0xd5, 0xab, 0xa8, 0x2b, 0xae, 0x89, 0xf3, 0xcf, 0x56, 0x9f, 0xf2, 0x4b, 0x31, 0xbc, 0x18, 0xa9, 0x06, 0x5b, 0xbe, 0xb4, 0x61, 0xf8, 0xb2, 0x06, 0x9c, 0x81, 0xab, 0x4c}} , + {{0x1f, 0x68, 0x76, 0x01, 0x16, 0x38, 0x2b, 0x0f, 0x77, 0x97, 0x92, 0x67, 0x4e, 0x86, 0x6a, 0x8b, 0xe5, 0xe8, 0x0c, 0xf7, 0x36, 0x39, 0xb5, 0x33, 0xe6, 0xcf, 0x5e, 0xbd, 0x18, 0xfb, 0x10, 0x1f}}}, +{{{0x83, 0xf0, 0x0d, 0x63, 0xef, 0x53, 0x6b, 0xb5, 0x6b, 0xf9, 0x83, 0xcf, 0xde, 0x04, 0x22, 0x9b, 0x2c, 0x0a, 0xe0, 0xa5, 0xd8, 0xc7, 0x9c, 0xa5, 0xa3, 0xf6, 0x6f, 0xcf, 0x90, 0x6b, 0x68, 0x7c}} , + {{0x33, 0x15, 0xd7, 0x7f, 0x1a, 0xd5, 0x21, 0x58, 0xc4, 0x18, 0xa5, 0xf0, 0xcc, 0x73, 0xa8, 0xfd, 0xfa, 0x18, 0xd1, 0x03, 0x91, 0x8d, 0x52, 0xd2, 0xa3, 0xa4, 0xd3, 0xb1, 0xea, 0x1d, 0x0f, 0x00}}}, +{{{0xcc, 0x48, 0x83, 0x90, 0xe5, 0xfd, 0x3f, 0x84, 0xaa, 0xf9, 0x8b, 0x82, 0x59, 0x24, 0x34, 0x68, 0x4f, 0x1c, 0x23, 0xd9, 0xcc, 0x71, 0xe1, 0x7f, 0x8c, 0xaf, 0xf1, 0xee, 0x00, 0xb6, 0xa0, 0x77}} , + {{0xf5, 0x1a, 0x61, 0xf7, 0x37, 0x9d, 0x00, 0xf4, 0xf2, 0x69, 0x6f, 0x4b, 0x01, 0x85, 0x19, 0x45, 0x4d, 0x7f, 0x02, 0x7c, 0x6a, 0x05, 0x47, 0x6c, 0x1f, 0x81, 0x20, 0xd4, 0xe8, 0x50, 0x27, 0x72}}}, +{{{0x2c, 0x3a, 0xe5, 0xad, 0xf4, 0xdd, 0x2d, 0xf7, 0x5c, 0x44, 0xb5, 0x5b, 0x21, 0xa3, 0x89, 0x5f, 0x96, 0x45, 0xca, 0x4d, 0xa4, 0x21, 0x99, 0x70, 0xda, 0xc4, 0xc4, 0xa0, 0xe5, 0xf4, 0xec, 0x0a}} , + {{0x07, 0x68, 0x21, 0x65, 0xe9, 0x08, 0xa0, 0x0b, 0x6a, 0x4a, 0xba, 0xb5, 0x80, 0xaf, 0xd0, 0x1b, 0xc5, 0xf5, 0x4b, 0x73, 0x50, 0x60, 0x2d, 0x71, 0x69, 0x61, 0x0e, 0xc0, 0x20, 0x40, 0x30, 0x19}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xd0, 0x75, 0x57, 0x3b, 0xeb, 0x5c, 0x14, 0x56, 0x50, 0xc9, 0x4f, 0xb8, 0xb8, 0x1e, 0xa3, 0xf4, 0xab, 0xf5, 0xa9, 0x20, 0x15, 0x94, 0x82, 0xda, 0x96, 0x1c, 0x9b, 0x59, 0x8c, 0xff, 0xf4, 0x51}} , + {{0xc1, 0x3a, 0x86, 0xd7, 0xb0, 0x06, 0x84, 0x7f, 0x1b, 0xbd, 0xd4, 0x07, 0x78, 0x80, 0x2e, 0xb1, 0xb4, 0xee, 0x52, 0x38, 0xee, 0x9a, 0xf9, 0xf6, 0xf3, 0x41, 0x6e, 0xd4, 0x88, 0x95, 0xac, 0x35}}}, +{{{0x41, 0x97, 0xbf, 0x71, 0x6a, 0x9b, 0x72, 0xec, 0xf3, 0xf8, 0x6b, 0xe6, 0x0e, 0x6c, 0x69, 0xa5, 0x2f, 0x68, 0x52, 0xd8, 0x61, 0x81, 0xc0, 0x63, 0x3f, 0xa6, 0x3c, 0x13, 0x90, 0xe6, 0x8d, 0x56}} , + {{0xe8, 0x39, 0x30, 0x77, 0x23, 0xb1, 0xfd, 0x1b, 0x3d, 0x3e, 0x74, 0x4d, 0x7f, 0xae, 0x5b, 0x3a, 0xb4, 0x65, 0x0e, 0x3a, 0x43, 0xdc, 0xdc, 0x41, 0x47, 0xe6, 0xe8, 0x92, 0x09, 0x22, 0x48, 0x4c}}}, +{{{0x85, 0x57, 0x9f, 0xb5, 0xc8, 0x06, 0xb2, 0x9f, 0x47, 0x3f, 0xf0, 0xfa, 0xe6, 0xa9, 0xb1, 0x9b, 0x6f, 0x96, 0x7d, 0xf9, 0xa4, 0x65, 0x09, 0x75, 0x32, 0xa6, 0x6c, 0x7f, 0x47, 0x4b, 0x2f, 0x4f}} , + {{0x34, 0xe9, 0x59, 0x93, 0x9d, 0x26, 0x80, 0x54, 0xf2, 0xcc, 0x3c, 0xc2, 0x25, 0x85, 0xe3, 0x6a, 0xc1, 0x62, 0x04, 0xa7, 0x08, 0x32, 0x6d, 0xa1, 0x39, 0x84, 0x8a, 0x3b, 0x87, 0x5f, 0x11, 0x13}}}, +{{{0xda, 0x03, 0x34, 0x66, 0xc4, 0x0c, 0x73, 0x6e, 0xbc, 0x24, 0xb5, 0xf9, 0x70, 0x81, 0x52, 0xe9, 0xf4, 0x7c, 0x23, 0xdd, 0x9f, 0xb8, 0x46, 0xef, 0x1d, 0x22, 0x55, 0x7d, 0x71, 0xc4, 0x42, 0x33}} , + {{0xc5, 0x37, 0x69, 0x5b, 0xa8, 0xc6, 0x9d, 0xa4, 0xfc, 0x61, 0x6e, 0x68, 0x46, 0xea, 0xd7, 0x1c, 0x67, 0xd2, 0x7d, 0xfa, 0xf1, 0xcc, 0x54, 0x8d, 0x36, 0x35, 0xc9, 0x00, 0xdf, 0x6c, 0x67, 0x50}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x9a, 0x4d, 0x42, 0x29, 0x5d, 0xa4, 0x6b, 0x6f, 0xa8, 0x8a, 0x4d, 0x91, 0x7b, 0xd2, 0xdf, 0x36, 0xef, 0x01, 0x22, 0xc5, 0xcc, 0x8d, 0xeb, 0x58, 0x3d, 0xb3, 0x50, 0xfc, 0x8b, 0x97, 0x96, 0x33}} , + {{0x93, 0x33, 0x07, 0xc8, 0x4a, 0xca, 0xd0, 0xb1, 0xab, 0xbd, 0xdd, 0xa7, 0x7c, 0xac, 0x3e, 0x45, 0xcb, 0xcc, 0x07, 0x91, 0xbf, 0x35, 0x9d, 0xcb, 0x7d, 0x12, 0x3c, 0x11, 0x59, 0x13, 0xcf, 0x5c}}}, +{{{0x45, 0xb8, 0x41, 0xd7, 0xab, 0x07, 0x15, 0x00, 0x8e, 0xce, 0xdf, 0xb2, 0x43, 0x5c, 0x01, 0xdc, 0xf4, 0x01, 0x51, 0x95, 0x10, 0x5a, 0xf6, 0x24, 0x24, 0xa0, 0x19, 0x3a, 0x09, 0x2a, 0xaa, 0x3f}} , + {{0xdc, 0x8e, 0xeb, 0xc6, 0xbf, 0xdd, 0x11, 0x7b, 0xe7, 0x47, 0xe6, 0xce, 0xe7, 0xb6, 0xc5, 0xe8, 0x8a, 0xdc, 0x4b, 0x57, 0x15, 0x3b, 0x66, 0xca, 0x89, 0xa3, 0xfd, 0xac, 0x0d, 0xe1, 0x1d, 0x7a}}}, +{{{0x89, 0xef, 0xbf, 0x03, 0x75, 0xd0, 0x29, 0x50, 0xcb, 0x7d, 0xd6, 0xbe, 0xad, 0x5f, 0x7b, 0x00, 0x32, 0xaa, 0x98, 0xed, 0x3f, 0x8f, 0x92, 0xcb, 0x81, 0x56, 0x01, 0x63, 0x64, 0xa3, 0x38, 0x39}} , + {{0x8b, 0xa4, 0xd6, 0x50, 0xb4, 0xaa, 0x5d, 0x64, 0x64, 0x76, 0x2e, 0xa1, 0xa6, 0xb3, 0xb8, 0x7c, 0x7a, 0x56, 0xf5, 0x5c, 0x4e, 0x84, 0x5c, 0xfb, 0xdd, 0xca, 0x48, 0x8b, 0x48, 0xb9, 0xba, 0x34}}}, +{{{0xc5, 0xe3, 0xe8, 0xae, 0x17, 0x27, 0xe3, 0x64, 0x60, 0x71, 0x47, 0x29, 0x02, 0x0f, 0x92, 0x5d, 0x10, 0x93, 0xc8, 0x0e, 0xa1, 0xed, 0xba, 0xa9, 0x96, 0x1c, 0xc5, 0x76, 0x30, 0xcd, 0xf9, 0x30}} , + {{0x95, 0xb0, 0xbd, 0x8c, 0xbc, 0xa7, 0x4f, 0x7e, 0xfd, 0x4e, 0x3a, 0xbf, 0x5f, 0x04, 0x79, 0x80, 0x2b, 0x5a, 0x9f, 0x4f, 0x68, 0x21, 0x19, 0x71, 0xc6, 0x20, 0x01, 0x42, 0xaa, 0xdf, 0xae, 0x2c}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x90, 0x6e, 0x7e, 0x4b, 0x71, 0x93, 0xc0, 0x72, 0xed, 0xeb, 0x71, 0x24, 0x97, 0x26, 0x9c, 0xfe, 0xcb, 0x3e, 0x59, 0x19, 0xa8, 0x0f, 0x75, 0x7d, 0xbe, 0x18, 0xe6, 0x96, 0x1e, 0x95, 0x70, 0x60}} , + {{0x89, 0x66, 0x3e, 0x1d, 0x4c, 0x5f, 0xfe, 0xc0, 0x04, 0x43, 0xd6, 0x44, 0x19, 0xb5, 0xad, 0xc7, 0x22, 0xdc, 0x71, 0x28, 0x64, 0xde, 0x41, 0x38, 0x27, 0x8f, 0x2c, 0x6b, 0x08, 0xb8, 0xb8, 0x7b}}}, +{{{0x3d, 0x70, 0x27, 0x9d, 0xd9, 0xaf, 0xb1, 0x27, 0xaf, 0xe3, 0x5d, 0x1e, 0x3a, 0x30, 0x54, 0x61, 0x60, 0xe8, 0xc3, 0x26, 0x3a, 0xbc, 0x7e, 0xf5, 0x81, 0xdd, 0x64, 0x01, 0x04, 0xeb, 0xc0, 0x1e}} , + {{0xda, 0x2c, 0xa4, 0xd1, 0xa1, 0xc3, 0x5c, 0x6e, 0x32, 0x07, 0x1f, 0xb8, 0x0e, 0x19, 0x9e, 0x99, 0x29, 0x33, 0x9a, 0xae, 0x7a, 0xed, 0x68, 0x42, 0x69, 0x7c, 0x07, 0xb3, 0x38, 0x2c, 0xf6, 0x3d}}}, +{{{0x64, 0xaa, 0xb5, 0x88, 0x79, 0x65, 0x38, 0x8c, 0x94, 0xd6, 0x62, 0x37, 0x7d, 0x64, 0xcd, 0x3a, 0xeb, 0xff, 0xe8, 0x81, 0x09, 0xc7, 0x6a, 0x50, 0x09, 0x0d, 0x28, 0x03, 0x0d, 0x9a, 0x93, 0x0a}} , + {{0x42, 0xa3, 0xf1, 0xc5, 0xb4, 0x0f, 0xd8, 0xc8, 0x8d, 0x15, 0x31, 0xbd, 0xf8, 0x07, 0x8b, 0xcd, 0x08, 0x8a, 0xfb, 0x18, 0x07, 0xfe, 0x8e, 0x52, 0x86, 0xef, 0xbe, 0xec, 0x49, 0x52, 0x99, 0x08}}}, +{{{0x0f, 0xa9, 0xd5, 0x01, 0xaa, 0x48, 0x4f, 0x28, 0x66, 0x32, 0x1a, 0xba, 0x7c, 0xea, 0x11, 0x80, 0x17, 0x18, 0x9b, 0x56, 0x88, 0x25, 0x06, 0x69, 0x12, 0x2c, 0xea, 0x56, 0x69, 0x41, 0x24, 0x19}} , + {{0xde, 0x21, 0xf0, 0xda, 0x8a, 0xfb, 0xb1, 0xb8, 0xcd, 0xc8, 0x6a, 0x82, 0x19, 0x73, 0xdb, 0xc7, 0xcf, 0x88, 0xeb, 0x96, 0xee, 0x6f, 0xfb, 0x06, 0xd2, 0xcd, 0x7d, 0x7b, 0x12, 0x28, 0x8e, 0x0c}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x93, 0x44, 0x97, 0xce, 0x28, 0xff, 0x3a, 0x40, 0xc4, 0xf5, 0xf6, 0x9b, 0xf4, 0x6b, 0x07, 0x84, 0xfb, 0x98, 0xd8, 0xec, 0x8c, 0x03, 0x57, 0xec, 0x49, 0xed, 0x63, 0xb6, 0xaa, 0xff, 0x98, 0x28}} , + {{0x3d, 0x16, 0x35, 0xf3, 0x46, 0xbc, 0xb3, 0xf4, 0xc6, 0xb6, 0x4f, 0xfa, 0xf4, 0xa0, 0x13, 0xe6, 0x57, 0x45, 0x93, 0xb9, 0xbc, 0xd6, 0x59, 0xe7, 0x77, 0x94, 0x6c, 0xab, 0x96, 0x3b, 0x4f, 0x09}}}, +{{{0x5a, 0xf7, 0x6b, 0x01, 0x12, 0x4f, 0x51, 0xc1, 0x70, 0x84, 0x94, 0x47, 0xb2, 0x01, 0x6c, 0x71, 0xd7, 0xcc, 0x17, 0x66, 0x0f, 0x59, 0x5d, 0x5d, 0x10, 0x01, 0x57, 0x11, 0xf5, 0xdd, 0xe2, 0x34}} , + {{0x26, 0xd9, 0x1f, 0x5c, 0x58, 0xac, 0x8b, 0x03, 0xd2, 0xc3, 0x85, 0x0f, 0x3a, 0xc3, 0x7f, 0x6d, 0x8e, 0x86, 0xcd, 0x52, 0x74, 0x8f, 0x55, 0x77, 0x17, 0xb7, 0x8e, 0xb7, 0x88, 0xea, 0xda, 0x1b}}}, +{{{0xb6, 0xea, 0x0e, 0x40, 0x93, 0x20, 0x79, 0x35, 0x6a, 0x61, 0x84, 0x5a, 0x07, 0x6d, 0xf9, 0x77, 0x6f, 0xed, 0x69, 0x1c, 0x0d, 0x25, 0x76, 0xcc, 0xf0, 0xdb, 0xbb, 0xc5, 0xad, 0xe2, 0x26, 0x57}} , + {{0xcf, 0xe8, 0x0e, 0x6b, 0x96, 0x7d, 0xed, 0x27, 0xd1, 0x3c, 0xa9, 0xd9, 0x50, 0xa9, 0x98, 0x84, 0x5e, 0x86, 0xef, 0xd6, 0xf0, 0xf8, 0x0e, 0x89, 0x05, 0x2f, 0xd9, 0x5f, 0x15, 0x5f, 0x73, 0x79}}}, +{{{0xc8, 0x5c, 0x16, 0xfe, 0xed, 0x9f, 0x26, 0x56, 0xf6, 0x4b, 0x9f, 0xa7, 0x0a, 0x85, 0xfe, 0xa5, 0x8c, 0x87, 0xdd, 0x98, 0xce, 0x4e, 0xc3, 0x58, 0x55, 0xb2, 0x7b, 0x3d, 0xd8, 0x6b, 0xb5, 0x4c}} , + {{0x65, 0x38, 0xa0, 0x15, 0xfa, 0xa7, 0xb4, 0x8f, 0xeb, 0xc4, 0x86, 0x9b, 0x30, 0xa5, 0x5e, 0x4d, 0xea, 0x8a, 0x9a, 0x9f, 0x1a, 0xd8, 0x5b, 0x53, 0x14, 0x19, 0x25, 0x63, 0xb4, 0x6f, 0x1f, 0x5d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xac, 0x8f, 0xbc, 0x1e, 0x7d, 0x8b, 0x5a, 0x0b, 0x8d, 0xaf, 0x76, 0x2e, 0x71, 0xe3, 0x3b, 0x6f, 0x53, 0x2f, 0x3e, 0x90, 0x95, 0xd4, 0x35, 0x14, 0x4f, 0x8c, 0x3c, 0xce, 0x57, 0x1c, 0x76, 0x49}} , + {{0xa8, 0x50, 0xe1, 0x61, 0x6b, 0x57, 0x35, 0xeb, 0x44, 0x0b, 0x0c, 0x6e, 0xf9, 0x25, 0x80, 0x74, 0xf2, 0x8f, 0x6f, 0x7a, 0x3e, 0x7f, 0x2d, 0xf3, 0x4e, 0x09, 0x65, 0x10, 0x5e, 0x03, 0x25, 0x32}}}, +{{{0xa9, 0x60, 0xdc, 0x0f, 0x64, 0xe5, 0x1d, 0xe2, 0x8d, 0x4f, 0x79, 0x2f, 0x0e, 0x24, 0x02, 0x00, 0x05, 0x77, 0x43, 0x25, 0x3d, 0x6a, 0xc7, 0xb7, 0xbf, 0x04, 0x08, 0x65, 0xf4, 0x39, 0x4b, 0x65}} , + {{0x96, 0x19, 0x12, 0x6b, 0x6a, 0xb7, 0xe3, 0xdc, 0x45, 0x9b, 0xdb, 0xb4, 0xa8, 0xae, 0xdc, 0xa8, 0x14, 0x44, 0x65, 0x62, 0xce, 0x34, 0x9a, 0x84, 0x18, 0x12, 0x01, 0xf1, 0xe2, 0x7b, 0xce, 0x50}}}, +{{{0x41, 0x21, 0x30, 0x53, 0x1b, 0x47, 0x01, 0xb7, 0x18, 0xd8, 0x82, 0x57, 0xbd, 0xa3, 0x60, 0xf0, 0x32, 0xf6, 0x5b, 0xf0, 0x30, 0x88, 0x91, 0x59, 0xfd, 0x90, 0xa2, 0xb9, 0x55, 0x93, 0x21, 0x34}} , + {{0x97, 0x67, 0x9e, 0xeb, 0x6a, 0xf9, 0x6e, 0xd6, 0x73, 0xe8, 0x6b, 0x29, 0xec, 0x63, 0x82, 0x00, 0xa8, 0x99, 0x1c, 0x1d, 0x30, 0xc8, 0x90, 0x52, 0x90, 0xb6, 0x6a, 0x80, 0x4e, 0xff, 0x4b, 0x51}}}, +{{{0x0f, 0x7d, 0x63, 0x8c, 0x6e, 0x5c, 0xde, 0x30, 0xdf, 0x65, 0xfa, 0x2e, 0xb0, 0xa3, 0x25, 0x05, 0x54, 0xbd, 0x25, 0xba, 0x06, 0xae, 0xdf, 0x8b, 0xd9, 0x1b, 0xea, 0x38, 0xb3, 0x05, 0x16, 0x09}} , + {{0xc7, 0x8c, 0xbf, 0x64, 0x28, 0xad, 0xf8, 0xa5, 0x5a, 0x6f, 0xc9, 0xba, 0xd5, 0x7f, 0xd5, 0xd6, 0xbd, 0x66, 0x2f, 0x3d, 0xaa, 0x54, 0xf6, 0xba, 0x32, 0x22, 0x9a, 0x1e, 0x52, 0x05, 0xf4, 0x1d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xaa, 0x1f, 0xbb, 0xeb, 0xfe, 0xe4, 0x87, 0xfc, 0xb1, 0x2c, 0xb7, 0x88, 0xf4, 0xc6, 0xb9, 0xf5, 0x24, 0x46, 0xf2, 0xa5, 0x9f, 0x8f, 0x8a, 0x93, 0x70, 0x69, 0xd4, 0x56, 0xec, 0xfd, 0x06, 0x46}} , + {{0x4e, 0x66, 0xcf, 0x4e, 0x34, 0xce, 0x0c, 0xd9, 0xa6, 0x50, 0xd6, 0x5e, 0x95, 0xaf, 0xe9, 0x58, 0xfa, 0xee, 0x9b, 0xb8, 0xa5, 0x0f, 0x35, 0xe0, 0x43, 0x82, 0x6d, 0x65, 0xe6, 0xd9, 0x00, 0x0f}}}, +{{{0x7b, 0x75, 0x3a, 0xfc, 0x64, 0xd3, 0x29, 0x7e, 0xdd, 0x49, 0x9a, 0x59, 0x53, 0xbf, 0xb4, 0xa7, 0x52, 0xb3, 0x05, 0xab, 0xc3, 0xaf, 0x16, 0x1a, 0x85, 0x42, 0x32, 0xa2, 0x86, 0xfa, 0x39, 0x43}} , + {{0x0e, 0x4b, 0xa3, 0x63, 0x8a, 0xfe, 0xa5, 0x58, 0xf1, 0x13, 0xbd, 0x9d, 0xaa, 0x7f, 0x76, 0x40, 0x70, 0x81, 0x10, 0x75, 0x99, 0xbb, 0xbe, 0x0b, 0x16, 0xe9, 0xba, 0x62, 0x34, 0xcc, 0x07, 0x6d}}}, +{{{0xc3, 0xf1, 0xc6, 0x93, 0x65, 0xee, 0x0b, 0xbc, 0xea, 0x14, 0xf0, 0xc1, 0xf8, 0x84, 0x89, 0xc2, 0xc9, 0xd7, 0xea, 0x34, 0xca, 0xa7, 0xc4, 0x99, 0xd5, 0x50, 0x69, 0xcb, 0xd6, 0x21, 0x63, 0x7c}} , + {{0x99, 0xeb, 0x7c, 0x31, 0x73, 0x64, 0x67, 0x7f, 0x0c, 0x66, 0xaa, 0x8c, 0x69, 0x91, 0xe2, 0x26, 0xd3, 0x23, 0xe2, 0x76, 0x5d, 0x32, 0x52, 0xdf, 0x5d, 0xc5, 0x8f, 0xb7, 0x7c, 0x84, 0xb3, 0x70}}}, +{{{0xeb, 0x01, 0xc7, 0x36, 0x97, 0x4e, 0xb6, 0xab, 0x5f, 0x0d, 0x2c, 0xba, 0x67, 0x64, 0x55, 0xde, 0xbc, 0xff, 0xa6, 0xec, 0x04, 0xd3, 0x8d, 0x39, 0x56, 0x5e, 0xee, 0xf8, 0xe4, 0x2e, 0x33, 0x62}} , + {{0x65, 0xef, 0xb8, 0x9f, 0xc8, 0x4b, 0xa7, 0xfd, 0x21, 0x49, 0x9b, 0x92, 0x35, 0x82, 0xd6, 0x0a, 0x9b, 0xf2, 0x79, 0xf1, 0x47, 0x2f, 0x6a, 0x7e, 0x9f, 0xcf, 0x18, 0x02, 0x3c, 0xfb, 0x1b, 0x3e}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x2f, 0x8b, 0xc8, 0x40, 0x51, 0xd1, 0xac, 0x1a, 0x0b, 0xe4, 0xa9, 0xa2, 0x42, 0x21, 0x19, 0x2f, 0x7b, 0x97, 0xbf, 0xf7, 0x57, 0x6d, 0x3f, 0x3d, 0x4f, 0x0f, 0xe2, 0xb2, 0x81, 0x00, 0x9e, 0x7b}} , + {{0x8c, 0x85, 0x2b, 0xc4, 0xfc, 0xf1, 0xab, 0xe8, 0x79, 0x22, 0xc4, 0x84, 0x17, 0x3a, 0xfa, 0x86, 0xa6, 0x7d, 0xf9, 0xf3, 0x6f, 0x03, 0x57, 0x20, 0x4d, 0x79, 0xf9, 0x6e, 0x71, 0x54, 0x38, 0x09}}}, +{{{0x40, 0x29, 0x74, 0xa8, 0x2f, 0x5e, 0xf9, 0x79, 0xa4, 0xf3, 0x3e, 0xb9, 0xfd, 0x33, 0x31, 0xac, 0x9a, 0x69, 0x88, 0x1e, 0x77, 0x21, 0x2d, 0xf3, 0x91, 0x52, 0x26, 0x15, 0xb2, 0xa6, 0xcf, 0x7e}} , + {{0xc6, 0x20, 0x47, 0x6c, 0xa4, 0x7d, 0xcb, 0x63, 0xea, 0x5b, 0x03, 0xdf, 0x3e, 0x88, 0x81, 0x6d, 0xce, 0x07, 0x42, 0x18, 0x60, 0x7e, 0x7b, 0x55, 0xfe, 0x6a, 0xf3, 0xda, 0x5c, 0x8b, 0x95, 0x10}}}, +{{{0x62, 0xe4, 0x0d, 0x03, 0xb4, 0xd7, 0xcd, 0xfa, 0xbd, 0x46, 0xdf, 0x93, 0x71, 0x10, 0x2c, 0xa8, 0x3b, 0xb6, 0x09, 0x05, 0x70, 0x84, 0x43, 0x29, 0xa8, 0x59, 0xf5, 0x8e, 0x10, 0xe4, 0xd7, 0x20}} , + {{0x57, 0x82, 0x1c, 0xab, 0xbf, 0x62, 0x70, 0xe8, 0xc4, 0xcf, 0xf0, 0x28, 0x6e, 0x16, 0x3c, 0x08, 0x78, 0x89, 0x85, 0x46, 0x0f, 0xf6, 0x7f, 0xcf, 0xcb, 0x7e, 0xb8, 0x25, 0xe9, 0x5a, 0xfa, 0x03}}}, +{{{0xfb, 0x95, 0x92, 0x63, 0x50, 0xfc, 0x62, 0xf0, 0xa4, 0x5e, 0x8c, 0x18, 0xc2, 0x17, 0x24, 0xb7, 0x78, 0xc2, 0xa9, 0xe7, 0x6a, 0x32, 0xd6, 0x29, 0x85, 0xaf, 0xcb, 0x8d, 0x91, 0x13, 0xda, 0x6b}} , + {{0x36, 0x0a, 0xc2, 0xb6, 0x4b, 0xa5, 0x5d, 0x07, 0x17, 0x41, 0x31, 0x5f, 0x62, 0x46, 0xf8, 0x92, 0xf9, 0x66, 0x48, 0x73, 0xa6, 0x97, 0x0d, 0x7d, 0x88, 0xee, 0x62, 0xb1, 0x03, 0xa8, 0x3f, 0x2c}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x4a, 0xb1, 0x70, 0x8a, 0xa9, 0xe8, 0x63, 0x79, 0x00, 0xe2, 0x25, 0x16, 0xca, 0x4b, 0x0f, 0xa4, 0x66, 0xad, 0x19, 0x9f, 0x88, 0x67, 0x0c, 0x8b, 0xc2, 0x4a, 0x5b, 0x2b, 0x6d, 0x95, 0xaf, 0x19}} , + {{0x8b, 0x9d, 0xb6, 0xcc, 0x60, 0xb4, 0x72, 0x4f, 0x17, 0x69, 0x5a, 0x4a, 0x68, 0x34, 0xab, 0xa1, 0x45, 0x32, 0x3c, 0x83, 0x87, 0x72, 0x30, 0x54, 0x77, 0x68, 0xae, 0xfb, 0xb5, 0x8b, 0x22, 0x5e}}}, +{{{0xf1, 0xb9, 0x87, 0x35, 0xc5, 0xbb, 0xb9, 0xcf, 0xf5, 0xd6, 0xcd, 0xd5, 0x0c, 0x7c, 0x0e, 0xe6, 0x90, 0x34, 0xfb, 0x51, 0x42, 0x1e, 0x6d, 0xac, 0x9a, 0x46, 0xc4, 0x97, 0x29, 0x32, 0xbf, 0x45}} , + {{0x66, 0x9e, 0xc6, 0x24, 0xc0, 0xed, 0xa5, 0x5d, 0x88, 0xd4, 0xf0, 0x73, 0x97, 0x7b, 0xea, 0x7f, 0x42, 0xff, 0x21, 0xa0, 0x9b, 0x2f, 0x9a, 0xfd, 0x53, 0x57, 0x07, 0x84, 0x48, 0x88, 0x9d, 0x52}}}, +{{{0xc6, 0x96, 0x48, 0x34, 0x2a, 0x06, 0xaf, 0x94, 0x3d, 0xf4, 0x1a, 0xcf, 0xf2, 0xc0, 0x21, 0xc2, 0x42, 0x5e, 0xc8, 0x2f, 0x35, 0xa2, 0x3e, 0x29, 0xfa, 0x0c, 0x84, 0xe5, 0x89, 0x72, 0x7c, 0x06}} , + {{0x32, 0x65, 0x03, 0xe5, 0x89, 0xa6, 0x6e, 0xb3, 0x5b, 0x8e, 0xca, 0xeb, 0xfe, 0x22, 0x56, 0x8b, 0x5d, 0x14, 0x4b, 0x4d, 0xf9, 0xbe, 0xb5, 0xf5, 0xe6, 0x5c, 0x7b, 0x8b, 0xf4, 0x13, 0x11, 0x34}}}, +{{{0x07, 0xc6, 0x22, 0x15, 0xe2, 0x9c, 0x60, 0xa2, 0x19, 0xd9, 0x27, 0xae, 0x37, 0x4e, 0xa6, 0xc9, 0x80, 0xa6, 0x91, 0x8f, 0x12, 0x49, 0xe5, 0x00, 0x18, 0x47, 0xd1, 0xd7, 0x28, 0x22, 0x63, 0x39}} , + {{0xe8, 0xe2, 0x00, 0x7e, 0xf2, 0x9e, 0x1e, 0x99, 0x39, 0x95, 0x04, 0xbd, 0x1e, 0x67, 0x7b, 0xb2, 0x26, 0xac, 0xe6, 0xaa, 0xe2, 0x46, 0xd5, 0xe4, 0xe8, 0x86, 0xbd, 0xab, 0x7c, 0x55, 0x59, 0x6f}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x24, 0x64, 0x6e, 0x9b, 0x35, 0x71, 0x78, 0xce, 0x33, 0x03, 0x21, 0x33, 0x36, 0xf1, 0x73, 0x9b, 0xb9, 0x15, 0x8b, 0x2c, 0x69, 0xcf, 0x4d, 0xed, 0x4f, 0x4d, 0x57, 0x14, 0x13, 0x82, 0xa4, 0x4d}} , + {{0x65, 0x6e, 0x0a, 0xa4, 0x59, 0x07, 0x17, 0xf2, 0x6b, 0x4a, 0x1f, 0x6e, 0xf6, 0xb5, 0xbc, 0x62, 0xe4, 0xb6, 0xda, 0xa2, 0x93, 0xbc, 0x29, 0x05, 0xd2, 0xd2, 0x73, 0x46, 0x03, 0x16, 0x40, 0x31}}}, +{{{0x4c, 0x73, 0x6d, 0x15, 0xbd, 0xa1, 0x4d, 0x5c, 0x13, 0x0b, 0x24, 0x06, 0x98, 0x78, 0x1c, 0x5b, 0xeb, 0x1f, 0x18, 0x54, 0x43, 0xd9, 0x55, 0x66, 0xda, 0x29, 0x21, 0xe8, 0xb8, 0x3c, 0x42, 0x22}} , + {{0xb4, 0xcd, 0x08, 0x6f, 0x15, 0x23, 0x1a, 0x0b, 0x22, 0xed, 0xd1, 0xf1, 0xa7, 0xc7, 0x73, 0x45, 0xf3, 0x9e, 0xce, 0x76, 0xb7, 0xf6, 0x39, 0xb6, 0x8e, 0x79, 0xbe, 0xe9, 0x9b, 0xcf, 0x7d, 0x62}}}, +{{{0x92, 0x5b, 0xfc, 0x72, 0xfd, 0xba, 0xf1, 0xfd, 0xa6, 0x7c, 0x95, 0xe3, 0x61, 0x3f, 0xe9, 0x03, 0xd4, 0x2b, 0xd4, 0x20, 0xd9, 0xdb, 0x4d, 0x32, 0x3e, 0xf5, 0x11, 0x64, 0xe3, 0xb4, 0xbe, 0x32}} , + {{0x86, 0x17, 0x90, 0xe7, 0xc9, 0x1f, 0x10, 0xa5, 0x6a, 0x2d, 0x39, 0xd0, 0x3b, 0xc4, 0xa6, 0xe9, 0x59, 0x13, 0xda, 0x1a, 0xe6, 0xa0, 0xb9, 0x3c, 0x50, 0xb8, 0x40, 0x7c, 0x15, 0x36, 0x5a, 0x42}}}, +{{{0xb4, 0x0b, 0x32, 0xab, 0xdc, 0x04, 0x51, 0x55, 0x21, 0x1e, 0x0b, 0x75, 0x99, 0x89, 0x73, 0x35, 0x3a, 0x91, 0x2b, 0xfe, 0xe7, 0x49, 0xea, 0x76, 0xc1, 0xf9, 0x46, 0xb9, 0x53, 0x02, 0x23, 0x04}} , + {{0xfc, 0x5a, 0x1e, 0x1d, 0x74, 0x58, 0x95, 0xa6, 0x8f, 0x7b, 0x97, 0x3e, 0x17, 0x3b, 0x79, 0x2d, 0xa6, 0x57, 0xef, 0x45, 0x02, 0x0b, 0x4d, 0x6e, 0x9e, 0x93, 0x8d, 0x2f, 0xd9, 0x9d, 0xdb, 0x04}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xc0, 0xd7, 0x56, 0x97, 0x58, 0x91, 0xde, 0x09, 0x4f, 0x9f, 0xbe, 0x63, 0xb0, 0x83, 0x86, 0x43, 0x5d, 0xbc, 0xe0, 0xf3, 0xc0, 0x75, 0xbf, 0x8b, 0x8e, 0xaa, 0xf7, 0x8b, 0x64, 0x6e, 0xb0, 0x63}} , + {{0x16, 0xae, 0x8b, 0xe0, 0x9b, 0x24, 0x68, 0x5c, 0x44, 0xc2, 0xd0, 0x08, 0xb7, 0x7b, 0x62, 0xfd, 0x7f, 0xd8, 0xd4, 0xb7, 0x50, 0xfd, 0x2c, 0x1b, 0xbf, 0x41, 0x95, 0xd9, 0x8e, 0xd8, 0x17, 0x1b}}}, +{{{0x86, 0x55, 0x37, 0x8e, 0xc3, 0x38, 0x48, 0x14, 0xb5, 0x97, 0xd2, 0xa7, 0x54, 0x45, 0xf1, 0x35, 0x44, 0x38, 0x9e, 0xf1, 0x1b, 0xb6, 0x34, 0x00, 0x3c, 0x96, 0xee, 0x29, 0x00, 0xea, 0x2c, 0x0b}} , + {{0xea, 0xda, 0x99, 0x9e, 0x19, 0x83, 0x66, 0x6d, 0xe9, 0x76, 0x87, 0x50, 0xd1, 0xfd, 0x3c, 0x60, 0x87, 0xc6, 0x41, 0xd9, 0x8e, 0xdb, 0x5e, 0xde, 0xaa, 0x9a, 0xd3, 0x28, 0xda, 0x95, 0xea, 0x47}}}, +{{{0xd0, 0x80, 0xba, 0x19, 0xae, 0x1d, 0xa9, 0x79, 0xf6, 0x3f, 0xac, 0x5d, 0x6f, 0x96, 0x1f, 0x2a, 0xce, 0x29, 0xb2, 0xff, 0x37, 0xf1, 0x94, 0x8f, 0x0c, 0xb5, 0x28, 0xba, 0x9a, 0x21, 0xf6, 0x66}} , + {{0x02, 0xfb, 0x54, 0xb8, 0x05, 0xf3, 0x81, 0x52, 0x69, 0x34, 0x46, 0x9d, 0x86, 0x76, 0x8f, 0xd7, 0xf8, 0x6a, 0x66, 0xff, 0xe6, 0xa7, 0x90, 0xf7, 0x5e, 0xcd, 0x6a, 0x9b, 0x55, 0xfc, 0x9d, 0x48}}}, +{{{0xbd, 0xaa, 0x13, 0xe6, 0xcd, 0x45, 0x4a, 0xa4, 0x59, 0x0a, 0x64, 0xb1, 0x98, 0xd6, 0x34, 0x13, 0x04, 0xe6, 0x97, 0x94, 0x06, 0xcb, 0xd4, 0x4e, 0xbb, 0x96, 0xcd, 0xd1, 0x57, 0xd1, 0xe3, 0x06}} , + {{0x7a, 0x6c, 0x45, 0x27, 0xc4, 0x93, 0x7f, 0x7d, 0x7c, 0x62, 0x50, 0x38, 0x3a, 0x6b, 0xb5, 0x88, 0xc6, 0xd9, 0xf1, 0x78, 0x19, 0xb9, 0x39, 0x93, 0x3d, 0xc9, 0xe0, 0x9c, 0x3c, 0xce, 0xf5, 0x72}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x24, 0xea, 0x23, 0x7d, 0x56, 0x2c, 0xe2, 0x59, 0x0e, 0x85, 0x60, 0x04, 0x88, 0x5a, 0x74, 0x1e, 0x4b, 0xef, 0x13, 0xda, 0x4c, 0xff, 0x83, 0x45, 0x85, 0x3f, 0x08, 0x95, 0x2c, 0x20, 0x13, 0x1f}} , + {{0x48, 0x5f, 0x27, 0x90, 0x5c, 0x02, 0x42, 0xad, 0x78, 0x47, 0x5c, 0xb5, 0x7e, 0x08, 0x85, 0x00, 0xfa, 0x7f, 0xfd, 0xfd, 0xe7, 0x09, 0x11, 0xf2, 0x7e, 0x1b, 0x38, 0x6c, 0x35, 0x6d, 0x33, 0x66}}}, +{{{0x93, 0x03, 0x36, 0x81, 0xac, 0xe4, 0x20, 0x09, 0x35, 0x4c, 0x45, 0xb2, 0x1e, 0x4c, 0x14, 0x21, 0xe6, 0xe9, 0x8a, 0x7b, 0x8d, 0xfe, 0x1e, 0xc6, 0x3e, 0xc1, 0x35, 0xfa, 0xe7, 0x70, 0x4e, 0x1d}} , + {{0x61, 0x2e, 0xc2, 0xdd, 0x95, 0x57, 0xd1, 0xab, 0x80, 0xe8, 0x63, 0x17, 0xb5, 0x48, 0xe4, 0x8a, 0x11, 0x9e, 0x72, 0xbe, 0x85, 0x8d, 0x51, 0x0a, 0xf2, 0x9f, 0xe0, 0x1c, 0xa9, 0x07, 0x28, 0x7b}}}, +{{{0xbb, 0x71, 0x14, 0x5e, 0x26, 0x8c, 0x3d, 0xc8, 0xe9, 0x7c, 0xd3, 0xd6, 0xd1, 0x2f, 0x07, 0x6d, 0xe6, 0xdf, 0xfb, 0x79, 0xd6, 0x99, 0x59, 0x96, 0x48, 0x40, 0x0f, 0x3a, 0x7b, 0xb2, 0xa0, 0x72}} , + {{0x4e, 0x3b, 0x69, 0xc8, 0x43, 0x75, 0x51, 0x6c, 0x79, 0x56, 0xe4, 0xcb, 0xf7, 0xa6, 0x51, 0xc2, 0x2c, 0x42, 0x0b, 0xd4, 0x82, 0x20, 0x1c, 0x01, 0x08, 0x66, 0xd7, 0xbf, 0x04, 0x56, 0xfc, 0x02}}}, +{{{0x24, 0xe8, 0xb7, 0x60, 0xae, 0x47, 0x80, 0xfc, 0xe5, 0x23, 0xe7, 0xc2, 0xc9, 0x85, 0xe6, 0x98, 0xa0, 0x29, 0x4e, 0xe1, 0x84, 0x39, 0x2d, 0x95, 0x2c, 0xf3, 0x45, 0x3c, 0xff, 0xaf, 0x27, 0x4c}} , + {{0x6b, 0xa6, 0xf5, 0x4b, 0x11, 0xbd, 0xba, 0x5b, 0x9e, 0xc4, 0xa4, 0x51, 0x1e, 0xbe, 0xd0, 0x90, 0x3a, 0x9c, 0xc2, 0x26, 0xb6, 0x1e, 0xf1, 0x95, 0x7d, 0xc8, 0x6d, 0x52, 0xe6, 0x99, 0x2c, 0x5f}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x85, 0xe0, 0x24, 0x32, 0xb4, 0xd1, 0xef, 0xfc, 0x69, 0xa2, 0xbf, 0x8f, 0x72, 0x2c, 0x95, 0xf6, 0xe4, 0x6e, 0x7d, 0x90, 0xf7, 0x57, 0x81, 0xa0, 0xf7, 0xda, 0xef, 0x33, 0x07, 0xe3, 0x6b, 0x78}} , + {{0x36, 0x27, 0x3e, 0xc6, 0x12, 0x07, 0xab, 0x4e, 0xbe, 0x69, 0x9d, 0xb3, 0xbe, 0x08, 0x7c, 0x2a, 0x47, 0x08, 0xfd, 0xd4, 0xcd, 0x0e, 0x27, 0x34, 0x5b, 0x98, 0x34, 0x2f, 0x77, 0x5f, 0x3a, 0x65}}}, +{{{0x13, 0xaa, 0x2e, 0x4c, 0xf0, 0x22, 0xb8, 0x6c, 0xb3, 0x19, 0x4d, 0xeb, 0x6b, 0xd0, 0xa4, 0xc6, 0x9c, 0xdd, 0xc8, 0x5b, 0x81, 0x57, 0x89, 0xdf, 0x33, 0xa9, 0x68, 0x49, 0x80, 0xe4, 0xfe, 0x21}} , + {{0x00, 0x17, 0x90, 0x30, 0xe9, 0xd3, 0x60, 0x30, 0x31, 0xc2, 0x72, 0x89, 0x7a, 0x36, 0xa5, 0xbd, 0x39, 0x83, 0x85, 0x50, 0xa1, 0x5d, 0x6c, 0x41, 0x1d, 0xb5, 0x2c, 0x07, 0x40, 0x77, 0x0b, 0x50}}}, +{{{0x64, 0x34, 0xec, 0xc0, 0x9e, 0x44, 0x41, 0xaf, 0xa0, 0x36, 0x05, 0x6d, 0xea, 0x30, 0x25, 0x46, 0x35, 0x24, 0x9d, 0x86, 0xbd, 0x95, 0xf1, 0x6a, 0x46, 0xd7, 0x94, 0x54, 0xf9, 0x3b, 0xbd, 0x5d}} , + {{0x77, 0x5b, 0xe2, 0x37, 0xc7, 0xe1, 0x7c, 0x13, 0x8c, 0x9f, 0x7b, 0x7b, 0x2a, 0xce, 0x42, 0xa3, 0xb9, 0x2a, 0x99, 0xa8, 0xc0, 0xd8, 0x3c, 0x86, 0xb0, 0xfb, 0xe9, 0x76, 0x77, 0xf7, 0xf5, 0x56}}}, +{{{0xdf, 0xb3, 0x46, 0x11, 0x6e, 0x13, 0xb7, 0x28, 0x4e, 0x56, 0xdd, 0xf1, 0xac, 0xad, 0x58, 0xc3, 0xf8, 0x88, 0x94, 0x5e, 0x06, 0x98, 0xa1, 0xe4, 0x6a, 0xfb, 0x0a, 0x49, 0x5d, 0x8a, 0xfe, 0x77}} , + {{0x46, 0x02, 0xf5, 0xa5, 0xaf, 0xc5, 0x75, 0x6d, 0xba, 0x45, 0x35, 0x0a, 0xfe, 0xc9, 0xac, 0x22, 0x91, 0x8d, 0x21, 0x95, 0x33, 0x03, 0xc0, 0x8a, 0x16, 0xf3, 0x39, 0xe0, 0x01, 0x0f, 0x53, 0x3c}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x34, 0x75, 0x37, 0x1f, 0x34, 0x4e, 0xa9, 0x1d, 0x68, 0x67, 0xf8, 0x49, 0x98, 0x96, 0xfc, 0x4c, 0x65, 0x97, 0xf7, 0x02, 0x4a, 0x52, 0x6c, 0x01, 0xbd, 0x48, 0xbb, 0x1b, 0xed, 0xa4, 0xe2, 0x53}} , + {{0x59, 0xd5, 0x9b, 0x5a, 0xa2, 0x90, 0xd3, 0xb8, 0x37, 0x4c, 0x55, 0x82, 0x28, 0x08, 0x0f, 0x7f, 0xaa, 0x81, 0x65, 0xe0, 0x0c, 0x52, 0xc9, 0xa3, 0x32, 0x27, 0x64, 0xda, 0xfd, 0x34, 0x23, 0x5a}}}, +{{{0xb5, 0xb0, 0x0c, 0x4d, 0xb3, 0x7b, 0x23, 0xc8, 0x1f, 0x8a, 0x39, 0x66, 0xe6, 0xba, 0x4c, 0x10, 0x37, 0xca, 0x9c, 0x7c, 0x05, 0x9e, 0xff, 0xc0, 0xf8, 0x8e, 0xb1, 0x8f, 0x6f, 0x67, 0x18, 0x26}} , + {{0x4b, 0x41, 0x13, 0x54, 0x23, 0x1a, 0xa4, 0x4e, 0xa9, 0x8b, 0x1e, 0x4b, 0xfc, 0x15, 0x24, 0xbb, 0x7e, 0xcb, 0xb6, 0x1e, 0x1b, 0xf5, 0xf2, 0xc8, 0x56, 0xec, 0x32, 0xa2, 0x60, 0x5b, 0xa0, 0x2a}}}, +{{{0xa4, 0x29, 0x47, 0x86, 0x2e, 0x92, 0x4f, 0x11, 0x4f, 0xf3, 0xb2, 0x5c, 0xd5, 0x3e, 0xa6, 0xb9, 0xc8, 0xe2, 0x33, 0x11, 0x1f, 0x01, 0x8f, 0xb0, 0x9b, 0xc7, 0xa5, 0xff, 0x83, 0x0f, 0x1e, 0x28}} , + {{0x1d, 0x29, 0x7a, 0xa1, 0xec, 0x8e, 0xb5, 0xad, 0xea, 0x02, 0x68, 0x60, 0x74, 0x29, 0x1c, 0xa5, 0xcf, 0xc8, 0x3b, 0x7d, 0x8b, 0x2b, 0x7c, 0xad, 0xa4, 0x40, 0x17, 0x51, 0x59, 0x7c, 0x2e, 0x5d}}}, +{{{0x0a, 0x6c, 0x4f, 0xbc, 0x3e, 0x32, 0xe7, 0x4a, 0x1a, 0x13, 0xc1, 0x49, 0x38, 0xbf, 0xf7, 0xc2, 0xd3, 0x8f, 0x6b, 0xad, 0x52, 0xf7, 0xcf, 0xbc, 0x27, 0xcb, 0x40, 0x67, 0x76, 0xcd, 0x6d, 0x56}} , + {{0xe5, 0xb0, 0x27, 0xad, 0xbe, 0x9b, 0xf2, 0xb5, 0x63, 0xde, 0x3a, 0x23, 0x95, 0xb7, 0x0a, 0x7e, 0xf3, 0x9e, 0x45, 0x6f, 0x19, 0x39, 0x75, 0x8f, 0x39, 0x3d, 0x0f, 0xc0, 0x9f, 0xf1, 0xe9, 0x51}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x88, 0xaa, 0x14, 0x24, 0x86, 0x94, 0x11, 0x12, 0x3e, 0x1a, 0xb5, 0xcc, 0xbb, 0xe0, 0x9c, 0xd5, 0x9c, 0x6d, 0xba, 0x58, 0x72, 0x8d, 0xfb, 0x22, 0x7b, 0x9f, 0x7c, 0x94, 0x30, 0xb3, 0x51, 0x21}} , + {{0xf6, 0x74, 0x3d, 0xf2, 0xaf, 0xd0, 0x1e, 0x03, 0x7c, 0x23, 0x6b, 0xc9, 0xfc, 0x25, 0x70, 0x90, 0xdc, 0x9a, 0xa4, 0xfb, 0x49, 0xfc, 0x3d, 0x0a, 0x35, 0x38, 0x6f, 0xe4, 0x7e, 0x50, 0x01, 0x2a}}}, +{{{0xd6, 0xe3, 0x96, 0x61, 0x3a, 0xfd, 0xef, 0x9b, 0x1f, 0x90, 0xa4, 0x24, 0x14, 0x5b, 0xc8, 0xde, 0x50, 0xb1, 0x1d, 0xaf, 0xe8, 0x55, 0x8a, 0x87, 0x0d, 0xfe, 0xaa, 0x3b, 0x82, 0x2c, 0x8d, 0x7b}} , + {{0x85, 0x0c, 0xaf, 0xf8, 0x83, 0x44, 0x49, 0xd9, 0x45, 0xcf, 0xf7, 0x48, 0xd9, 0x53, 0xb4, 0xf1, 0x65, 0xa0, 0xe1, 0xc3, 0xb3, 0x15, 0xed, 0x89, 0x9b, 0x4f, 0x62, 0xb3, 0x57, 0xa5, 0x45, 0x1c}}}, +{{{0x8f, 0x12, 0xea, 0xaf, 0xd1, 0x1f, 0x79, 0x10, 0x0b, 0xf6, 0xa3, 0x7b, 0xea, 0xac, 0x8b, 0x57, 0x32, 0x62, 0xe7, 0x06, 0x12, 0x51, 0xa0, 0x3b, 0x43, 0x5e, 0xa4, 0x20, 0x78, 0x31, 0xce, 0x0d}} , + {{0x84, 0x7c, 0xc2, 0xa6, 0x91, 0x23, 0xce, 0xbd, 0xdc, 0xf9, 0xce, 0xd5, 0x75, 0x30, 0x22, 0xe6, 0xf9, 0x43, 0x62, 0x0d, 0xf7, 0x75, 0x9d, 0x7f, 0x8c, 0xff, 0x7d, 0xe4, 0x72, 0xac, 0x9f, 0x1c}}}, +{{{0x88, 0xc1, 0x99, 0xd0, 0x3c, 0x1c, 0x5d, 0xb4, 0xef, 0x13, 0x0f, 0x90, 0xb9, 0x36, 0x2f, 0x95, 0x95, 0xc6, 0xdc, 0xde, 0x0a, 0x51, 0xe2, 0x8d, 0xf3, 0xbc, 0x51, 0xec, 0xdf, 0xb1, 0xa2, 0x5f}} , + {{0x2e, 0x68, 0xa1, 0x23, 0x7d, 0x9b, 0x40, 0x69, 0x85, 0x7b, 0x42, 0xbf, 0x90, 0x4b, 0xd6, 0x40, 0x2f, 0xd7, 0x52, 0x52, 0xb2, 0x21, 0xde, 0x64, 0xbd, 0x88, 0xc3, 0x6d, 0xa5, 0xfa, 0x81, 0x3f}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xfb, 0xfd, 0x47, 0x7b, 0x8a, 0x66, 0x9e, 0x79, 0x2e, 0x64, 0x82, 0xef, 0xf7, 0x21, 0xec, 0xf6, 0xd8, 0x86, 0x09, 0x31, 0x7c, 0xdd, 0x03, 0x6a, 0x58, 0xa0, 0x77, 0xb7, 0x9b, 0x8c, 0x87, 0x1f}} , + {{0x55, 0x47, 0xe4, 0xa8, 0x3d, 0x55, 0x21, 0x34, 0xab, 0x1d, 0xae, 0xe0, 0xf4, 0xea, 0xdb, 0xc5, 0xb9, 0x58, 0xbf, 0xc4, 0x2a, 0x89, 0x31, 0x1a, 0xf4, 0x2d, 0xe1, 0xca, 0x37, 0x99, 0x47, 0x59}}}, +{{{0xc7, 0xca, 0x63, 0xc1, 0x49, 0xa9, 0x35, 0x45, 0x55, 0x7e, 0xda, 0x64, 0x32, 0x07, 0x50, 0xf7, 0x32, 0xac, 0xde, 0x75, 0x58, 0x9b, 0x11, 0xb2, 0x3a, 0x1f, 0xf5, 0xf7, 0x79, 0x04, 0xe6, 0x08}} , + {{0x46, 0xfa, 0x22, 0x4b, 0xfa, 0xe1, 0xfe, 0x96, 0xfc, 0x67, 0xba, 0x67, 0x97, 0xc4, 0xe7, 0x1b, 0x86, 0x90, 0x5f, 0xee, 0xf4, 0x5b, 0x11, 0xb2, 0xcd, 0xad, 0xee, 0xc2, 0x48, 0x6c, 0x2b, 0x1b}}}, +{{{0xe3, 0x39, 0x62, 0xb4, 0x4f, 0x31, 0x04, 0xc9, 0xda, 0xd5, 0x73, 0x51, 0x57, 0xc5, 0xb8, 0xf3, 0xa3, 0x43, 0x70, 0xe4, 0x61, 0x81, 0x84, 0xe2, 0xbb, 0xbf, 0x4f, 0x9e, 0xa4, 0x5e, 0x74, 0x06}} , + {{0x29, 0xac, 0xff, 0x27, 0xe0, 0x59, 0xbe, 0x39, 0x9c, 0x0d, 0x83, 0xd7, 0x10, 0x0b, 0x15, 0xb7, 0xe1, 0xc2, 0x2c, 0x30, 0x73, 0x80, 0x3a, 0x7d, 0x5d, 0xab, 0x58, 0x6b, 0xc1, 0xf0, 0xf4, 0x22}}}, +{{{0xfe, 0x7f, 0xfb, 0x35, 0x7d, 0xc6, 0x01, 0x23, 0x28, 0xc4, 0x02, 0xac, 0x1f, 0x42, 0xb4, 0x9d, 0xfc, 0x00, 0x94, 0xa5, 0xee, 0xca, 0xda, 0x97, 0x09, 0x41, 0x77, 0x87, 0x5d, 0x7b, 0x87, 0x78}} , + {{0xf5, 0xfb, 0x90, 0x2d, 0x81, 0x19, 0x9e, 0x2f, 0x6d, 0x85, 0x88, 0x8c, 0x40, 0x5c, 0x77, 0x41, 0x4d, 0x01, 0x19, 0x76, 0x60, 0xe8, 0x4c, 0x48, 0xe4, 0x33, 0x83, 0x32, 0x6c, 0xb4, 0x41, 0x03}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xff, 0x10, 0xc2, 0x09, 0x4f, 0x6e, 0xf4, 0xd2, 0xdf, 0x7e, 0xca, 0x7b, 0x1c, 0x1d, 0xba, 0xa3, 0xb6, 0xda, 0x67, 0x33, 0xd4, 0x87, 0x36, 0x4b, 0x11, 0x20, 0x05, 0xa6, 0x29, 0xc1, 0x87, 0x17}} , + {{0xf6, 0x96, 0xca, 0x2f, 0xda, 0x38, 0xa7, 0x1b, 0xfc, 0xca, 0x7d, 0xfe, 0x08, 0x89, 0xe2, 0x47, 0x2b, 0x6a, 0x5d, 0x4b, 0xfa, 0xa1, 0xb4, 0xde, 0xb6, 0xc2, 0x31, 0x51, 0xf5, 0xe0, 0xa4, 0x0b}}}, +{{{0x5c, 0xe5, 0xc6, 0x04, 0x8e, 0x2b, 0x57, 0xbe, 0x38, 0x85, 0x23, 0xcb, 0xb7, 0xbe, 0x4f, 0xa9, 0xd3, 0x6e, 0x12, 0xaa, 0xd5, 0xb2, 0x2e, 0x93, 0x29, 0x9a, 0x4a, 0x88, 0x18, 0x43, 0xf5, 0x01}} , + {{0x50, 0xfc, 0xdb, 0xa2, 0x59, 0x21, 0x8d, 0xbd, 0x7e, 0x33, 0xae, 0x2f, 0x87, 0x1a, 0xd0, 0x97, 0xc7, 0x0d, 0x4d, 0x63, 0x01, 0xef, 0x05, 0x84, 0xec, 0x40, 0xdd, 0xa8, 0x0a, 0x4f, 0x70, 0x0b}}}, +{{{0x41, 0x69, 0x01, 0x67, 0x5c, 0xd3, 0x8a, 0xc5, 0xcf, 0x3f, 0xd1, 0x57, 0xd1, 0x67, 0x3e, 0x01, 0x39, 0xb5, 0xcb, 0x81, 0x56, 0x96, 0x26, 0xb6, 0xc2, 0xe7, 0x5c, 0xfb, 0x63, 0x97, 0x58, 0x06}} , + {{0x0c, 0x0e, 0xf3, 0xba, 0xf0, 0xe5, 0xba, 0xb2, 0x57, 0x77, 0xc6, 0x20, 0x9b, 0x89, 0x24, 0xbe, 0xf2, 0x9c, 0x8a, 0xba, 0x69, 0xc1, 0xf1, 0xb0, 0x4f, 0x2a, 0x05, 0x9a, 0xee, 0x10, 0x7e, 0x36}}}, +{{{0x3f, 0x26, 0xe9, 0x40, 0xe9, 0x03, 0xad, 0x06, 0x69, 0x91, 0xe0, 0xd1, 0x89, 0x60, 0x84, 0x79, 0xde, 0x27, 0x6d, 0xe6, 0x76, 0xbd, 0xea, 0xe6, 0xae, 0x48, 0xc3, 0x67, 0xc0, 0x57, 0xcd, 0x2f}} , + {{0x7f, 0xc1, 0xdc, 0xb9, 0xc7, 0xbc, 0x86, 0x3d, 0x55, 0x4b, 0x28, 0x7a, 0xfb, 0x4d, 0xc7, 0xf8, 0xbc, 0x67, 0x2a, 0x60, 0x4d, 0x8f, 0x07, 0x0b, 0x1a, 0x17, 0xbf, 0xfa, 0xac, 0xa7, 0x3d, 0x1a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x91, 0x3f, 0xed, 0x5e, 0x18, 0x78, 0x3f, 0x23, 0x2c, 0x0d, 0x8c, 0x44, 0x00, 0xe8, 0xfb, 0xe9, 0x8e, 0xd6, 0xd1, 0x36, 0x58, 0x57, 0x9e, 0xae, 0x4b, 0x5c, 0x0b, 0x07, 0xbc, 0x6b, 0x55, 0x2b}} , + {{0x6f, 0x4d, 0x17, 0xd7, 0xe1, 0x84, 0xd9, 0x78, 0xb1, 0x90, 0xfd, 0x2e, 0xb3, 0xb5, 0x19, 0x3f, 0x1b, 0xfa, 0xc0, 0x68, 0xb3, 0xdd, 0x00, 0x2e, 0x89, 0xbd, 0x7e, 0x80, 0x32, 0x13, 0xa0, 0x7b}}}, +{{{0x1a, 0x6f, 0x40, 0xaf, 0x44, 0x44, 0xb0, 0x43, 0x8f, 0x0d, 0xd0, 0x1e, 0xc4, 0x0b, 0x19, 0x5d, 0x8e, 0xfe, 0xc1, 0xf3, 0xc5, 0x5c, 0x91, 0xf8, 0x04, 0x4e, 0xbe, 0x90, 0xb4, 0x47, 0x5c, 0x3f}} , + {{0xb0, 0x3b, 0x2c, 0xf3, 0xfe, 0x32, 0x71, 0x07, 0x3f, 0xaa, 0xba, 0x45, 0x60, 0xa8, 0x8d, 0xea, 0x54, 0xcb, 0x39, 0x10, 0xb4, 0xf2, 0x8b, 0xd2, 0x14, 0x82, 0x42, 0x07, 0x8e, 0xe9, 0x7c, 0x53}}}, +{{{0xb0, 0xae, 0xc1, 0x8d, 0xc9, 0x8f, 0xb9, 0x7a, 0x77, 0xef, 0xba, 0x79, 0xa0, 0x3c, 0xa8, 0xf5, 0x6a, 0xe2, 0x3f, 0x5d, 0x00, 0xe3, 0x4b, 0x45, 0x24, 0x7b, 0x43, 0x78, 0x55, 0x1d, 0x2b, 0x1e}} , + {{0x01, 0xb8, 0xd6, 0x16, 0x67, 0xa0, 0x15, 0xb9, 0xe1, 0x58, 0xa4, 0xa7, 0x31, 0x37, 0x77, 0x2f, 0x8b, 0x12, 0x9f, 0xf4, 0x3f, 0xc7, 0x36, 0x66, 0xd2, 0xa8, 0x56, 0xf7, 0x7f, 0x74, 0xc6, 0x41}}}, +{{{0x5d, 0xf8, 0xb4, 0xa8, 0x30, 0xdd, 0xcc, 0x38, 0xa5, 0xd3, 0xca, 0xd8, 0xd1, 0xf8, 0xb2, 0x31, 0x91, 0xd4, 0x72, 0x05, 0x57, 0x4a, 0x3b, 0x82, 0x4a, 0xc6, 0x68, 0x20, 0xe2, 0x18, 0x41, 0x61}} , + {{0x19, 0xd4, 0x8d, 0x47, 0x29, 0x12, 0x65, 0xb0, 0x11, 0x78, 0x47, 0xb5, 0xcb, 0xa3, 0xa5, 0xfa, 0x05, 0x85, 0x54, 0xa9, 0x33, 0x97, 0x8d, 0x2b, 0xc2, 0xfe, 0x99, 0x35, 0x28, 0xe5, 0xeb, 0x63}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xb1, 0x3f, 0x3f, 0xef, 0xd8, 0xf4, 0xfc, 0xb3, 0xa0, 0x60, 0x50, 0x06, 0x2b, 0x29, 0x52, 0x70, 0x15, 0x0b, 0x24, 0x24, 0xf8, 0x5f, 0x79, 0x18, 0xcc, 0xff, 0x89, 0x99, 0x84, 0xa1, 0xae, 0x13}} , + {{0x44, 0x1f, 0xb8, 0xc2, 0x01, 0xc1, 0x30, 0x19, 0x55, 0x05, 0x60, 0x10, 0xa4, 0x6c, 0x2d, 0x67, 0x70, 0xe5, 0x25, 0x1b, 0xf2, 0xbf, 0xdd, 0xfb, 0x70, 0x2b, 0xa1, 0x8c, 0x9c, 0x94, 0x84, 0x08}}}, +{{{0xe7, 0xc4, 0x43, 0x4d, 0xc9, 0x2b, 0x69, 0x5d, 0x1d, 0x3c, 0xaf, 0xbb, 0x43, 0x38, 0x4e, 0x98, 0x3d, 0xed, 0x0d, 0x21, 0x03, 0xfd, 0xf0, 0x99, 0x47, 0x04, 0xb0, 0x98, 0x69, 0x55, 0x72, 0x0f}} , + {{0x5e, 0xdf, 0x15, 0x53, 0x3b, 0x86, 0x80, 0xb0, 0xf1, 0x70, 0x68, 0x8f, 0x66, 0x7c, 0x0e, 0x49, 0x1a, 0xd8, 0x6b, 0xfe, 0x4e, 0xef, 0xca, 0x47, 0xd4, 0x03, 0xc1, 0x37, 0x50, 0x9c, 0xc1, 0x16}}}, +{{{0xcd, 0x24, 0xc6, 0x3e, 0x0c, 0x82, 0x9b, 0x91, 0x2b, 0x61, 0x4a, 0xb2, 0x0f, 0x88, 0x55, 0x5f, 0x5a, 0x57, 0xff, 0xe5, 0x74, 0x0b, 0x13, 0x43, 0x00, 0xd8, 0x6b, 0xcf, 0xd2, 0x15, 0x03, 0x2c}} , + {{0xdc, 0xff, 0x15, 0x61, 0x2f, 0x4a, 0x2f, 0x62, 0xf2, 0x04, 0x2f, 0xb5, 0x0c, 0xb7, 0x1e, 0x3f, 0x74, 0x1a, 0x0f, 0xd7, 0xea, 0xcd, 0xd9, 0x7d, 0xf6, 0x12, 0x0e, 0x2f, 0xdb, 0x5a, 0x3b, 0x16}}}, +{{{0x1b, 0x37, 0x47, 0xe3, 0xf5, 0x9e, 0xea, 0x2c, 0x2a, 0xe7, 0x82, 0x36, 0xf4, 0x1f, 0x81, 0x47, 0x92, 0x4b, 0x69, 0x0e, 0x11, 0x8c, 0x5d, 0x53, 0x5b, 0x81, 0x27, 0x08, 0xbc, 0xa0, 0xae, 0x25}} , + {{0x69, 0x32, 0xa1, 0x05, 0x11, 0x42, 0x00, 0xd2, 0x59, 0xac, 0x4d, 0x62, 0x8b, 0x13, 0xe2, 0x50, 0x5d, 0xa0, 0x9d, 0x9b, 0xfd, 0xbb, 0x12, 0x41, 0x75, 0x41, 0x9e, 0xcc, 0xdc, 0xc7, 0xdc, 0x5d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xd9, 0xe3, 0x38, 0x06, 0x46, 0x70, 0x82, 0x5e, 0x28, 0x49, 0x79, 0xff, 0x25, 0xd2, 0x4e, 0x29, 0x8d, 0x06, 0xb0, 0x23, 0xae, 0x9b, 0x66, 0xe4, 0x7d, 0xc0, 0x70, 0x91, 0xa3, 0xfc, 0xec, 0x4e}} , + {{0x62, 0x12, 0x37, 0x6a, 0x30, 0xf6, 0x1e, 0xfb, 0x14, 0x5c, 0x0d, 0x0e, 0xb7, 0x81, 0x6a, 0xe7, 0x08, 0x05, 0xac, 0xaa, 0x38, 0x46, 0xe2, 0x73, 0xea, 0x4b, 0x07, 0x81, 0x43, 0x7c, 0x9e, 0x5e}}}, +{{{0xfc, 0xf9, 0x21, 0x4f, 0x2e, 0x76, 0x9b, 0x1f, 0x28, 0x60, 0x77, 0x43, 0x32, 0x9d, 0xbe, 0x17, 0x30, 0x2a, 0xc6, 0x18, 0x92, 0x66, 0x62, 0x30, 0x98, 0x40, 0x11, 0xa6, 0x7f, 0x18, 0x84, 0x28}} , + {{0x3f, 0xab, 0xd3, 0xf4, 0x8a, 0x76, 0xa1, 0x3c, 0xca, 0x2d, 0x49, 0xc3, 0xea, 0x08, 0x0b, 0x85, 0x17, 0x2a, 0xc3, 0x6c, 0x08, 0xfd, 0x57, 0x9f, 0x3d, 0x5f, 0xdf, 0x67, 0x68, 0x42, 0x00, 0x32}}}, +{{{0x51, 0x60, 0x1b, 0x06, 0x4f, 0x8a, 0x21, 0xba, 0x38, 0xa8, 0xba, 0xd6, 0x40, 0xf6, 0xe9, 0x9b, 0x76, 0x4d, 0x56, 0x21, 0x5b, 0x0a, 0x9b, 0x2e, 0x4f, 0x3d, 0x81, 0x32, 0x08, 0x9f, 0x97, 0x5b}} , + {{0xe5, 0x44, 0xec, 0x06, 0x9d, 0x90, 0x79, 0x9f, 0xd3, 0xe0, 0x79, 0xaf, 0x8f, 0x10, 0xfd, 0xdd, 0x04, 0xae, 0x27, 0x97, 0x46, 0x33, 0x79, 0xea, 0xb8, 0x4e, 0xca, 0x5a, 0x59, 0x57, 0xe1, 0x0e}}}, +{{{0x1a, 0xda, 0xf3, 0xa5, 0x41, 0x43, 0x28, 0xfc, 0x7e, 0xe7, 0x71, 0xea, 0xc6, 0x3b, 0x59, 0xcc, 0x2e, 0xd3, 0x40, 0xec, 0xb3, 0x13, 0x6f, 0x44, 0xcd, 0x13, 0xb2, 0x37, 0xf2, 0x6e, 0xd9, 0x1c}} , + {{0xe3, 0xdb, 0x60, 0xcd, 0x5c, 0x4a, 0x18, 0x0f, 0xef, 0x73, 0x36, 0x71, 0x8c, 0xf6, 0x11, 0xb4, 0xd8, 0xce, 0x17, 0x5e, 0x4f, 0x26, 0x77, 0x97, 0x5f, 0xcb, 0xef, 0x91, 0xeb, 0x6a, 0x62, 0x7a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x18, 0x4a, 0xa2, 0x97, 0x08, 0x81, 0x2d, 0x83, 0xc4, 0xcc, 0xf0, 0x83, 0x7e, 0xec, 0x0d, 0x95, 0x4c, 0x5b, 0xfb, 0xfa, 0x98, 0x80, 0x4a, 0x66, 0x56, 0x0c, 0x51, 0xb3, 0xf2, 0x04, 0x5d, 0x27}} , + {{0x3b, 0xb9, 0xb8, 0x06, 0x5a, 0x2e, 0xfe, 0xc3, 0x82, 0x37, 0x9c, 0xa3, 0x11, 0x1f, 0x9c, 0xa6, 0xda, 0x63, 0x48, 0x9b, 0xad, 0xde, 0x2d, 0xa6, 0xbc, 0x6e, 0x32, 0xda, 0x27, 0x65, 0xdd, 0x57}}}, +{{{0x84, 0x4f, 0x37, 0x31, 0x7d, 0x2e, 0xbc, 0xad, 0x87, 0x07, 0x2a, 0x6b, 0x37, 0xfc, 0x5f, 0xeb, 0x4e, 0x75, 0x35, 0xa6, 0xde, 0xab, 0x0a, 0x19, 0x3a, 0xb7, 0xb1, 0xef, 0x92, 0x6a, 0x3b, 0x3c}} , + {{0x3b, 0xb2, 0x94, 0x6d, 0x39, 0x60, 0xac, 0xee, 0xe7, 0x81, 0x1a, 0x3b, 0x76, 0x87, 0x5c, 0x05, 0x94, 0x2a, 0x45, 0xb9, 0x80, 0xe9, 0x22, 0xb1, 0x07, 0xcb, 0x40, 0x9e, 0x70, 0x49, 0x6d, 0x12}}}, +{{{0xfd, 0x18, 0x78, 0x84, 0xa8, 0x4c, 0x7d, 0x6e, 0x59, 0xa6, 0xe5, 0x74, 0xf1, 0x19, 0xa6, 0x84, 0x2e, 0x51, 0xc1, 0x29, 0x13, 0xf2, 0x14, 0x6b, 0x5d, 0x53, 0x51, 0xf7, 0xef, 0xbf, 0x01, 0x22}} , + {{0xa4, 0x4b, 0x62, 0x4c, 0xe6, 0xfd, 0x72, 0x07, 0xf2, 0x81, 0xfc, 0xf2, 0xbd, 0x12, 0x7c, 0x68, 0x76, 0x2a, 0xba, 0xf5, 0x65, 0xb1, 0x1f, 0x17, 0x0a, 0x38, 0xb0, 0xbf, 0xc0, 0xf8, 0xf4, 0x2a}}}, +{{{0x55, 0x60, 0x55, 0x5b, 0xe4, 0x1d, 0x71, 0x4c, 0x9d, 0x5b, 0x9f, 0x70, 0xa6, 0x85, 0x9a, 0x2c, 0xa0, 0xe2, 0x32, 0x48, 0xce, 0x9e, 0x2a, 0xa5, 0x07, 0x3b, 0xc7, 0x6c, 0x86, 0x77, 0xde, 0x3c}} , + {{0xf7, 0x18, 0x7a, 0x96, 0x7e, 0x43, 0x57, 0xa9, 0x55, 0xfc, 0x4e, 0xb6, 0x72, 0x00, 0xf2, 0xe4, 0xd7, 0x52, 0xd3, 0xd3, 0xb6, 0x85, 0xf6, 0x71, 0xc7, 0x44, 0x3f, 0x7f, 0xd7, 0xb3, 0xf2, 0x79}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x46, 0xca, 0xa7, 0x55, 0x7b, 0x79, 0xf3, 0xca, 0x5a, 0x65, 0xf6, 0xed, 0x50, 0x14, 0x7b, 0xe4, 0xc4, 0x2a, 0x65, 0x9e, 0xe2, 0xf9, 0xca, 0xa7, 0x22, 0x26, 0x53, 0xcb, 0x21, 0x5b, 0xa7, 0x31}} , + {{0x90, 0xd7, 0xc5, 0x26, 0x08, 0xbd, 0xb0, 0x53, 0x63, 0x58, 0xc3, 0x31, 0x5e, 0x75, 0x46, 0x15, 0x91, 0xa6, 0xf8, 0x2f, 0x1a, 0x08, 0x65, 0x88, 0x2f, 0x98, 0x04, 0xf1, 0x7c, 0x6e, 0x00, 0x77}}}, +{{{0x81, 0x21, 0x61, 0x09, 0xf6, 0x4e, 0xf1, 0x92, 0xee, 0x63, 0x61, 0x73, 0x87, 0xc7, 0x54, 0x0e, 0x42, 0x4b, 0xc9, 0x47, 0xd1, 0xb8, 0x7e, 0x91, 0x75, 0x37, 0x99, 0x28, 0xb8, 0xdd, 0x7f, 0x50}} , + {{0x89, 0x8f, 0xc0, 0xbe, 0x5d, 0xd6, 0x9f, 0xa0, 0xf0, 0x9d, 0x81, 0xce, 0x3a, 0x7b, 0x98, 0x58, 0xbb, 0xd7, 0x78, 0xc8, 0x3f, 0x13, 0xf1, 0x74, 0x19, 0xdf, 0xf8, 0x98, 0x89, 0x5d, 0xfa, 0x5f}}}, +{{{0x9e, 0x35, 0x85, 0x94, 0x47, 0x1f, 0x90, 0x15, 0x26, 0xd0, 0x84, 0xed, 0x8a, 0x80, 0xf7, 0x63, 0x42, 0x86, 0x27, 0xd7, 0xf4, 0x75, 0x58, 0xdc, 0x9c, 0xc0, 0x22, 0x7e, 0x20, 0x35, 0xfd, 0x1f}} , + {{0x68, 0x0e, 0x6f, 0x97, 0xba, 0x70, 0xbb, 0xa3, 0x0e, 0xe5, 0x0b, 0x12, 0xf4, 0xa2, 0xdc, 0x47, 0xf8, 0xe6, 0xd0, 0x23, 0x6c, 0x33, 0xa8, 0x99, 0x46, 0x6e, 0x0f, 0x44, 0xba, 0x76, 0x48, 0x0f}}}, +{{{0xa3, 0x2a, 0x61, 0x37, 0xe2, 0x59, 0x12, 0x0e, 0x27, 0xba, 0x64, 0x43, 0xae, 0xc0, 0x42, 0x69, 0x79, 0xa4, 0x1e, 0x29, 0x8b, 0x15, 0xeb, 0xf8, 0xaf, 0xd4, 0xa2, 0x68, 0x33, 0xb5, 0x7a, 0x24}} , + {{0x2c, 0x19, 0x33, 0xdd, 0x1b, 0xab, 0xec, 0x01, 0xb0, 0x23, 0xf8, 0x42, 0x2b, 0x06, 0x88, 0xea, 0x3d, 0x2d, 0x00, 0x2a, 0x78, 0x45, 0x4d, 0x38, 0xed, 0x2e, 0x2e, 0x44, 0x49, 0xed, 0xcb, 0x33}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xa0, 0x68, 0xe8, 0x41, 0x8f, 0x91, 0xf8, 0x11, 0x13, 0x90, 0x2e, 0xa7, 0xab, 0x30, 0xef, 0xad, 0xa0, 0x61, 0x00, 0x88, 0xef, 0xdb, 0xce, 0x5b, 0x5c, 0xbb, 0x62, 0xc8, 0x56, 0xf9, 0x00, 0x73}} , + {{0x3f, 0x60, 0xc1, 0x82, 0x2d, 0xa3, 0x28, 0x58, 0x24, 0x9e, 0x9f, 0xe3, 0x70, 0xcc, 0x09, 0x4e, 0x1a, 0x3f, 0x11, 0x11, 0x15, 0x07, 0x3c, 0xa4, 0x41, 0xe0, 0x65, 0xa3, 0x0a, 0x41, 0x6d, 0x11}}}, +{{{0x31, 0x40, 0x01, 0x52, 0x56, 0x94, 0x5b, 0x28, 0x8a, 0xaa, 0x52, 0xee, 0xd8, 0x0a, 0x05, 0x8d, 0xcd, 0xb5, 0xaa, 0x2e, 0x38, 0xaa, 0xb7, 0x87, 0xf7, 0x2b, 0xfb, 0x04, 0xcb, 0x84, 0x3d, 0x54}} , + {{0x20, 0xef, 0x59, 0xde, 0xa4, 0x2b, 0x93, 0x6e, 0x2e, 0xec, 0x42, 0x9a, 0xd4, 0x2d, 0xf4, 0x46, 0x58, 0x27, 0x2b, 0x18, 0x8f, 0x83, 0x3d, 0x69, 0x9e, 0xd4, 0x3e, 0xb6, 0xc5, 0xfd, 0x58, 0x03}}}, +{{{0x33, 0x89, 0xc9, 0x63, 0x62, 0x1c, 0x17, 0xb4, 0x60, 0xc4, 0x26, 0x68, 0x09, 0xc3, 0x2e, 0x37, 0x0f, 0x7b, 0xb4, 0x9c, 0xb6, 0xf9, 0xfb, 0xd4, 0x51, 0x78, 0xc8, 0x63, 0xea, 0x77, 0x47, 0x07}} , + {{0x32, 0xb4, 0x18, 0x47, 0x79, 0xcb, 0xd4, 0x5a, 0x07, 0x14, 0x0f, 0xa0, 0xd5, 0xac, 0xd0, 0x41, 0x40, 0xab, 0x61, 0x23, 0xe5, 0x2a, 0x2a, 0x6f, 0xf7, 0xa8, 0xd4, 0x76, 0xef, 0xe7, 0x45, 0x6c}}}, +{{{0xa1, 0x5e, 0x60, 0x4f, 0xfb, 0xe1, 0x70, 0x6a, 0x1f, 0x55, 0x4f, 0x09, 0xb4, 0x95, 0x33, 0x36, 0xc6, 0x81, 0x01, 0x18, 0x06, 0x25, 0x27, 0xa4, 0xb4, 0x24, 0xa4, 0x86, 0x03, 0x4c, 0xac, 0x02}} , + {{0x77, 0x38, 0xde, 0xd7, 0x60, 0x48, 0x07, 0xf0, 0x74, 0xa8, 0xff, 0x54, 0xe5, 0x30, 0x43, 0xff, 0x77, 0xfb, 0x21, 0x07, 0xff, 0xb2, 0x07, 0x6b, 0xe4, 0xe5, 0x30, 0xfc, 0x19, 0x6c, 0xa3, 0x01}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x13, 0xc5, 0x2c, 0xac, 0xd3, 0x83, 0x82, 0x7c, 0x29, 0xf7, 0x05, 0xa5, 0x00, 0xb6, 0x1f, 0x86, 0x55, 0xf4, 0xd6, 0x2f, 0x0c, 0x99, 0xd0, 0x65, 0x9b, 0x6b, 0x46, 0x0d, 0x43, 0xf8, 0x16, 0x28}} , + {{0x1e, 0x7f, 0xb4, 0x74, 0x7e, 0xb1, 0x89, 0x4f, 0x18, 0x5a, 0xab, 0x64, 0x06, 0xdf, 0x45, 0x87, 0xe0, 0x6a, 0xc6, 0xf0, 0x0e, 0xc9, 0x24, 0x35, 0x38, 0xea, 0x30, 0x54, 0xb4, 0xc4, 0x52, 0x54}}}, +{{{0xe9, 0x9f, 0xdc, 0x3f, 0xc1, 0x89, 0x44, 0x74, 0x27, 0xe4, 0xc1, 0x90, 0xff, 0x4a, 0xa7, 0x3c, 0xee, 0xcd, 0xf4, 0x1d, 0x25, 0x94, 0x7f, 0x63, 0x16, 0x48, 0xbc, 0x64, 0xfe, 0x95, 0xc4, 0x0c}} , + {{0x8b, 0x19, 0x75, 0x6e, 0x03, 0x06, 0x5e, 0x6a, 0x6f, 0x1a, 0x8c, 0xe3, 0xd3, 0x28, 0xf2, 0xe0, 0xb9, 0x7a, 0x43, 0x69, 0xe6, 0xd3, 0xc0, 0xfe, 0x7e, 0x97, 0xab, 0x6c, 0x7b, 0x8e, 0x13, 0x42}}}, +{{{0xd4, 0xca, 0x70, 0x3d, 0xab, 0xfb, 0x5f, 0x5e, 0x00, 0x0c, 0xcc, 0x77, 0x22, 0xf8, 0x78, 0x55, 0xae, 0x62, 0x35, 0xfb, 0x9a, 0xc6, 0x03, 0xe4, 0x0c, 0xee, 0xab, 0xc7, 0xc0, 0x89, 0x87, 0x54}} , + {{0x32, 0xad, 0xae, 0x85, 0x58, 0x43, 0xb8, 0xb1, 0xe6, 0x3e, 0x00, 0x9c, 0x78, 0x88, 0x56, 0xdb, 0x9c, 0xfc, 0x79, 0xf6, 0xf9, 0x41, 0x5f, 0xb7, 0xbc, 0x11, 0xf9, 0x20, 0x36, 0x1c, 0x53, 0x2b}}}, +{{{0x5a, 0x20, 0x5b, 0xa1, 0xa5, 0x44, 0x91, 0x24, 0x02, 0x63, 0x12, 0x64, 0xb8, 0x55, 0xf6, 0xde, 0x2c, 0xdb, 0x47, 0xb8, 0xc6, 0x0a, 0xc3, 0x00, 0x78, 0x93, 0xd8, 0xf5, 0xf5, 0x18, 0x28, 0x0a}} , + {{0xd6, 0x1b, 0x9a, 0x6c, 0xe5, 0x46, 0xea, 0x70, 0x96, 0x8d, 0x4e, 0x2a, 0x52, 0x21, 0x26, 0x4b, 0xb1, 0xbb, 0x0f, 0x7c, 0xa9, 0x9b, 0x04, 0xbb, 0x51, 0x08, 0xf1, 0x9a, 0xa4, 0x76, 0x7c, 0x18}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xfa, 0x94, 0xf7, 0x40, 0xd0, 0xd7, 0xeb, 0xa9, 0x82, 0x36, 0xd5, 0x15, 0xb9, 0x33, 0x7a, 0xbf, 0x8a, 0xf2, 0x63, 0xaa, 0x37, 0xf5, 0x59, 0xac, 0xbd, 0xbb, 0x32, 0x36, 0xbe, 0x73, 0x99, 0x38}} , + {{0x2c, 0xb3, 0xda, 0x7a, 0xd8, 0x3d, 0x99, 0xca, 0xd2, 0xf4, 0xda, 0x99, 0x8e, 0x4f, 0x98, 0xb7, 0xf4, 0xae, 0x3e, 0x9f, 0x8e, 0x35, 0x60, 0xa4, 0x33, 0x75, 0xa4, 0x04, 0x93, 0xb1, 0x6b, 0x4d}}}, +{{{0x97, 0x9d, 0xa8, 0xcd, 0x97, 0x7b, 0x9d, 0xb9, 0xe7, 0xa5, 0xef, 0xfd, 0xa8, 0x42, 0x6b, 0xc3, 0x62, 0x64, 0x7d, 0xa5, 0x1b, 0xc9, 0x9e, 0xd2, 0x45, 0xb9, 0xee, 0x03, 0xb0, 0xbf, 0xc0, 0x68}} , + {{0xed, 0xb7, 0x84, 0x2c, 0xf6, 0xd3, 0xa1, 0x6b, 0x24, 0x6d, 0x87, 0x56, 0x97, 0x59, 0x79, 0x62, 0x9f, 0xac, 0xed, 0xf3, 0xc9, 0x89, 0x21, 0x2e, 0x04, 0xb3, 0xcc, 0x2f, 0xbe, 0xd6, 0x0a, 0x4b}}}, +{{{0x39, 0x61, 0x05, 0xed, 0x25, 0x89, 0x8b, 0x5d, 0x1b, 0xcb, 0x0c, 0x55, 0xf4, 0x6a, 0x00, 0x8a, 0x46, 0xe8, 0x1e, 0xc6, 0x83, 0xc8, 0x5a, 0x76, 0xdb, 0xcc, 0x19, 0x7a, 0xcc, 0x67, 0x46, 0x0b}} , + {{0x53, 0xcf, 0xc2, 0xa1, 0xad, 0x6a, 0xf3, 0xcd, 0x8f, 0xc9, 0xde, 0x1c, 0xf8, 0x6c, 0x8f, 0xf8, 0x76, 0x42, 0xe7, 0xfe, 0xb2, 0x72, 0x21, 0x0a, 0x66, 0x74, 0x8f, 0xb7, 0xeb, 0xe4, 0x6f, 0x01}}}, +{{{0x22, 0x8c, 0x6b, 0xbe, 0xfc, 0x4d, 0x70, 0x62, 0x6e, 0x52, 0x77, 0x99, 0x88, 0x7e, 0x7b, 0x57, 0x7a, 0x0d, 0xfe, 0xdc, 0x72, 0x92, 0xf1, 0x68, 0x1d, 0x97, 0xd7, 0x7c, 0x8d, 0x53, 0x10, 0x37}} , + {{0x53, 0x88, 0x77, 0x02, 0xca, 0x27, 0xa8, 0xe5, 0x45, 0xe2, 0xa8, 0x48, 0x2a, 0xab, 0x18, 0xca, 0xea, 0x2d, 0x2a, 0x54, 0x17, 0x37, 0x32, 0x09, 0xdc, 0xe0, 0x4a, 0xb7, 0x7d, 0x82, 0x10, 0x7d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x8a, 0x64, 0x1e, 0x14, 0x0a, 0x57, 0xd4, 0xda, 0x5c, 0x96, 0x9b, 0x01, 0x4c, 0x67, 0xbf, 0x8b, 0x30, 0xfe, 0x08, 0xdb, 0x0d, 0xd5, 0xa8, 0xd7, 0x09, 0x11, 0x85, 0xa2, 0xd3, 0x45, 0xfb, 0x7e}} , + {{0xda, 0x8c, 0xc2, 0xd0, 0xac, 0x18, 0xe8, 0x52, 0x36, 0xd4, 0x21, 0xa3, 0xdd, 0x57, 0x22, 0x79, 0xb7, 0xf8, 0x71, 0x9d, 0xc6, 0x91, 0x70, 0x86, 0x56, 0xbf, 0xa1, 0x11, 0x8b, 0x19, 0xe1, 0x0f}}}, +{{{0x18, 0x32, 0x98, 0x2c, 0x8f, 0x91, 0xae, 0x12, 0xf0, 0x8c, 0xea, 0xf3, 0x3c, 0xb9, 0x5d, 0xe4, 0x69, 0xed, 0xb2, 0x47, 0x18, 0xbd, 0xce, 0x16, 0x52, 0x5c, 0x23, 0xe2, 0xa5, 0x25, 0x52, 0x5d}} , + {{0xb9, 0xb1, 0xe7, 0x5d, 0x4e, 0xbc, 0xee, 0xbb, 0x40, 0x81, 0x77, 0x82, 0x19, 0xab, 0xb5, 0xc6, 0xee, 0xab, 0x5b, 0x6b, 0x63, 0x92, 0x8a, 0x34, 0x8d, 0xcd, 0xee, 0x4f, 0x49, 0xe5, 0xc9, 0x7e}}}, +{{{0x21, 0xac, 0x8b, 0x22, 0xcd, 0xc3, 0x9a, 0xe9, 0x5e, 0x78, 0xbd, 0xde, 0xba, 0xad, 0xab, 0xbf, 0x75, 0x41, 0x09, 0xc5, 0x58, 0xa4, 0x7d, 0x92, 0xb0, 0x7f, 0xf2, 0xa1, 0xd1, 0xc0, 0xb3, 0x6d}} , + {{0x62, 0x4f, 0xd0, 0x75, 0x77, 0xba, 0x76, 0x77, 0xd7, 0xb8, 0xd8, 0x92, 0x6f, 0x98, 0x34, 0x3d, 0xd6, 0x4e, 0x1c, 0x0f, 0xf0, 0x8f, 0x2e, 0xf1, 0xb3, 0xbd, 0xb1, 0xb9, 0xec, 0x99, 0xb4, 0x07}}}, +{{{0x60, 0x57, 0x2e, 0x9a, 0x72, 0x1d, 0x6b, 0x6e, 0x58, 0x33, 0x24, 0x8c, 0x48, 0x39, 0x46, 0x8e, 0x89, 0x6a, 0x88, 0x51, 0x23, 0x62, 0xb5, 0x32, 0x09, 0x36, 0xe3, 0x57, 0xf5, 0x98, 0xde, 0x6f}} , + {{0x8b, 0x2c, 0x00, 0x48, 0x4a, 0xf9, 0x5b, 0x87, 0x69, 0x52, 0xe5, 0x5b, 0xd1, 0xb1, 0xe5, 0x25, 0x25, 0xe0, 0x9c, 0xc2, 0x13, 0x44, 0xe8, 0xb9, 0x0a, 0x70, 0xad, 0xbd, 0x0f, 0x51, 0x94, 0x69}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xa2, 0xdc, 0xab, 0xa9, 0x25, 0x2d, 0xac, 0x5f, 0x03, 0x33, 0x08, 0xe7, 0x7e, 0xfe, 0x95, 0x36, 0x3c, 0x5b, 0x3a, 0xd3, 0x05, 0x82, 0x1c, 0x95, 0x2d, 0xd8, 0x77, 0x7e, 0x02, 0xd9, 0x5b, 0x70}} , + {{0xc2, 0xfe, 0x1b, 0x0c, 0x67, 0xcd, 0xd6, 0xe0, 0x51, 0x8e, 0x2c, 0xe0, 0x79, 0x88, 0xf0, 0xcf, 0x41, 0x4a, 0xad, 0x23, 0xd4, 0x46, 0xca, 0x94, 0xa1, 0xc3, 0xeb, 0x28, 0x06, 0xfa, 0x17, 0x14}}}, +{{{0x7b, 0xaa, 0x70, 0x0a, 0x4b, 0xfb, 0xf5, 0xbf, 0x80, 0xc5, 0xcf, 0x08, 0x7a, 0xdd, 0xa1, 0xf4, 0x9d, 0x54, 0x50, 0x53, 0x23, 0x77, 0x23, 0xf5, 0x34, 0xa5, 0x22, 0xd1, 0x0d, 0x96, 0x2e, 0x47}} , + {{0xcc, 0xb7, 0x32, 0x89, 0x57, 0xd0, 0x98, 0x75, 0xe4, 0x37, 0x99, 0xa9, 0xe8, 0xba, 0xed, 0xba, 0xeb, 0xc7, 0x4f, 0x15, 0x76, 0x07, 0x0c, 0x4c, 0xef, 0x9f, 0x52, 0xfc, 0x04, 0x5d, 0x58, 0x10}}}, +{{{0xce, 0x82, 0xf0, 0x8f, 0x79, 0x02, 0xa8, 0xd1, 0xda, 0x14, 0x09, 0x48, 0xee, 0x8a, 0x40, 0x98, 0x76, 0x60, 0x54, 0x5a, 0xde, 0x03, 0x24, 0xf5, 0xe6, 0x2f, 0xe1, 0x03, 0xbf, 0x68, 0x82, 0x7f}} , + {{0x64, 0xe9, 0x28, 0xc7, 0xa4, 0xcf, 0x2a, 0xf9, 0x90, 0x64, 0x72, 0x2c, 0x8b, 0xeb, 0xec, 0xa0, 0xf2, 0x7d, 0x35, 0xb5, 0x90, 0x4d, 0x7f, 0x5b, 0x4a, 0x49, 0xe4, 0xb8, 0x3b, 0xc8, 0xa1, 0x2f}}}, +{{{0x8b, 0xc5, 0xcc, 0x3d, 0x69, 0xa6, 0xa1, 0x18, 0x44, 0xbc, 0x4d, 0x77, 0x37, 0xc7, 0x86, 0xec, 0x0c, 0xc9, 0xd6, 0x44, 0xa9, 0x23, 0x27, 0xb9, 0x03, 0x34, 0xa7, 0x0a, 0xd5, 0xc7, 0x34, 0x37}} , + {{0xf9, 0x7e, 0x3e, 0x66, 0xee, 0xf9, 0x99, 0x28, 0xff, 0xad, 0x11, 0xd8, 0xe2, 0x66, 0xc5, 0xcd, 0x0f, 0x0d, 0x0b, 0x6a, 0xfc, 0x7c, 0x24, 0xa8, 0x4f, 0xa8, 0x5e, 0x80, 0x45, 0x8b, 0x6c, 0x41}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xef, 0x1e, 0xec, 0xf7, 0x8d, 0x77, 0xf2, 0xea, 0xdb, 0x60, 0x03, 0x21, 0xc0, 0xff, 0x5e, 0x67, 0xc3, 0x71, 0x0b, 0x21, 0xb4, 0x41, 0xa0, 0x68, 0x38, 0xc6, 0x01, 0xa3, 0xd3, 0x51, 0x3c, 0x3c}} , + {{0x92, 0xf8, 0xd6, 0x4b, 0xef, 0x42, 0x13, 0xb2, 0x4a, 0xc4, 0x2e, 0x72, 0x3f, 0xc9, 0x11, 0xbd, 0x74, 0x02, 0x0e, 0xf5, 0x13, 0x9d, 0x83, 0x1a, 0x1b, 0xd5, 0x54, 0xde, 0xc4, 0x1e, 0x16, 0x6c}}}, +{{{0x27, 0x52, 0xe4, 0x63, 0xaa, 0x94, 0xe6, 0xc3, 0x28, 0x9c, 0xc6, 0x56, 0xac, 0xfa, 0xb6, 0xbd, 0xe2, 0xcc, 0x76, 0xc6, 0x27, 0x27, 0xa2, 0x8e, 0x78, 0x2b, 0x84, 0x72, 0x10, 0xbd, 0x4e, 0x2a}} , + {{0xea, 0xa7, 0x23, 0xef, 0x04, 0x61, 0x80, 0x50, 0xc9, 0x6e, 0xa5, 0x96, 0xd1, 0xd1, 0xc8, 0xc3, 0x18, 0xd7, 0x2d, 0xfd, 0x26, 0xbd, 0xcb, 0x7b, 0x92, 0x51, 0x0e, 0x4a, 0x65, 0x57, 0xb8, 0x49}}}, +{{{0xab, 0x55, 0x36, 0xc3, 0xec, 0x63, 0x55, 0x11, 0x55, 0xf6, 0xa5, 0xc7, 0x01, 0x5f, 0xfe, 0x79, 0xd8, 0x0a, 0xf7, 0x03, 0xd8, 0x98, 0x99, 0xf5, 0xd0, 0x00, 0x54, 0x6b, 0x66, 0x28, 0xf5, 0x25}} , + {{0x7a, 0x8d, 0xa1, 0x5d, 0x70, 0x5d, 0x51, 0x27, 0xee, 0x30, 0x65, 0x56, 0x95, 0x46, 0xde, 0xbd, 0x03, 0x75, 0xb4, 0x57, 0x59, 0x89, 0xeb, 0x02, 0x9e, 0xcc, 0x89, 0x19, 0xa7, 0xcb, 0x17, 0x67}}}, +{{{0x6a, 0xeb, 0xfc, 0x9a, 0x9a, 0x10, 0xce, 0xdb, 0x3a, 0x1c, 0x3c, 0x6a, 0x9d, 0xea, 0x46, 0xbc, 0x45, 0x49, 0xac, 0xe3, 0x41, 0x12, 0x7c, 0xf0, 0xf7, 0x4f, 0xf9, 0xf7, 0xff, 0x2c, 0x89, 0x04}} , + {{0x30, 0x31, 0x54, 0x1a, 0x46, 0xca, 0xe6, 0xc6, 0xcb, 0xe2, 0xc3, 0xc1, 0x8b, 0x75, 0x81, 0xbe, 0xee, 0xf8, 0xa3, 0x11, 0x1c, 0x25, 0xa3, 0xa7, 0x35, 0x51, 0x55, 0xe2, 0x25, 0xaa, 0xe2, 0x3a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xb4, 0x48, 0x10, 0x9f, 0x8a, 0x09, 0x76, 0xfa, 0xf0, 0x7a, 0xb0, 0x70, 0xf7, 0x83, 0x80, 0x52, 0x84, 0x2b, 0x26, 0xa2, 0xc4, 0x5d, 0x4f, 0xba, 0xb1, 0xc8, 0x40, 0x0d, 0x78, 0x97, 0xc4, 0x60}} , + {{0xd4, 0xb1, 0x6c, 0x08, 0xc7, 0x40, 0x38, 0x73, 0x5f, 0x0b, 0xf3, 0x76, 0x5d, 0xb2, 0xa5, 0x2f, 0x57, 0x57, 0x07, 0xed, 0x08, 0xa2, 0x6c, 0x4f, 0x08, 0x02, 0xb5, 0x0e, 0xee, 0x44, 0xfa, 0x22}}}, +{{{0x0f, 0x00, 0x3f, 0xa6, 0x04, 0x19, 0x56, 0x65, 0x31, 0x7f, 0x8b, 0xeb, 0x0d, 0xe1, 0x47, 0x89, 0x97, 0x16, 0x53, 0xfa, 0x81, 0xa7, 0xaa, 0xb2, 0xbf, 0x67, 0xeb, 0x72, 0x60, 0x81, 0x0d, 0x48}} , + {{0x7e, 0x13, 0x33, 0xcd, 0xa8, 0x84, 0x56, 0x1e, 0x67, 0xaf, 0x6b, 0x43, 0xac, 0x17, 0xaf, 0x16, 0xc0, 0x52, 0x99, 0x49, 0x5b, 0x87, 0x73, 0x7e, 0xb5, 0x43, 0xda, 0x6b, 0x1d, 0x0f, 0x2d, 0x55}}}, +{{{0xe9, 0x58, 0x1f, 0xff, 0x84, 0x3f, 0x93, 0x1c, 0xcb, 0xe1, 0x30, 0x69, 0xa5, 0x75, 0x19, 0x7e, 0x14, 0x5f, 0xf8, 0xfc, 0x09, 0xdd, 0xa8, 0x78, 0x9d, 0xca, 0x59, 0x8b, 0xd1, 0x30, 0x01, 0x13}} , + {{0xff, 0x76, 0x03, 0xc5, 0x4b, 0x89, 0x99, 0x70, 0x00, 0x59, 0x70, 0x9c, 0xd5, 0xd9, 0x11, 0x89, 0x5a, 0x46, 0xfe, 0xef, 0xdc, 0xd9, 0x55, 0x2b, 0x45, 0xa7, 0xb0, 0x2d, 0xfb, 0x24, 0xc2, 0x29}}}, +{{{0x38, 0x06, 0xf8, 0x0b, 0xac, 0x82, 0xc4, 0x97, 0x2b, 0x90, 0xe0, 0xf7, 0xa8, 0xab, 0x6c, 0x08, 0x80, 0x66, 0x90, 0x46, 0xf7, 0x26, 0x2d, 0xf8, 0xf1, 0xc4, 0x6b, 0x4a, 0x82, 0x98, 0x8e, 0x37}} , + {{0x8e, 0xb4, 0xee, 0xb8, 0xd4, 0x3f, 0xb2, 0x1b, 0xe0, 0x0a, 0x3d, 0x75, 0x34, 0x28, 0xa2, 0x8e, 0xc4, 0x92, 0x7b, 0xfe, 0x60, 0x6e, 0x6d, 0xb8, 0x31, 0x1d, 0x62, 0x0d, 0x78, 0x14, 0x42, 0x11}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x5e, 0xa8, 0xd8, 0x04, 0x9b, 0x73, 0xc9, 0xc9, 0xdc, 0x0d, 0x73, 0xbf, 0x0a, 0x0a, 0x73, 0xff, 0x18, 0x1f, 0x9c, 0x51, 0xaa, 0xc6, 0xf1, 0x83, 0x25, 0xfd, 0xab, 0xa3, 0x11, 0xd3, 0x01, 0x24}} , + {{0x4d, 0xe3, 0x7e, 0x38, 0x62, 0x5e, 0x64, 0xbb, 0x2b, 0x53, 0xb5, 0x03, 0x68, 0xc4, 0xf2, 0x2b, 0x5a, 0x03, 0x32, 0x99, 0x4a, 0x41, 0x9a, 0xe1, 0x1a, 0xae, 0x8c, 0x48, 0xf3, 0x24, 0x32, 0x65}}}, +{{{0xe8, 0xdd, 0xad, 0x3a, 0x8c, 0xea, 0xf4, 0xb3, 0xb2, 0xe5, 0x73, 0xf2, 0xed, 0x8b, 0xbf, 0xed, 0xb1, 0x0c, 0x0c, 0xfb, 0x2b, 0xf1, 0x01, 0x48, 0xe8, 0x26, 0x03, 0x8e, 0x27, 0x4d, 0x96, 0x72}} , + {{0xc8, 0x09, 0x3b, 0x60, 0xc9, 0x26, 0x4d, 0x7c, 0xf2, 0x9c, 0xd4, 0xa1, 0x3b, 0x26, 0xc2, 0x04, 0x33, 0x44, 0x76, 0x3c, 0x02, 0xbb, 0x11, 0x42, 0x0c, 0x22, 0xb7, 0xc6, 0xe1, 0xac, 0xb4, 0x0e}}}, +{{{0x6f, 0x85, 0xe7, 0xef, 0xde, 0x67, 0x30, 0xfc, 0xbf, 0x5a, 0xe0, 0x7b, 0x7a, 0x2a, 0x54, 0x6b, 0x5d, 0x62, 0x85, 0xa1, 0xf8, 0x16, 0x88, 0xec, 0x61, 0xb9, 0x96, 0xb5, 0xef, 0x2d, 0x43, 0x4d}} , + {{0x7c, 0x31, 0x33, 0xcc, 0xe4, 0xcf, 0x6c, 0xff, 0x80, 0x47, 0x77, 0xd1, 0xd8, 0xe9, 0x69, 0x97, 0x98, 0x7f, 0x20, 0x57, 0x1d, 0x1d, 0x4f, 0x08, 0x27, 0xc8, 0x35, 0x57, 0x40, 0xc6, 0x21, 0x0c}}}, +{{{0xd2, 0x8e, 0x9b, 0xfa, 0x42, 0x8e, 0xdf, 0x8f, 0xc7, 0x86, 0xf9, 0xa4, 0xca, 0x70, 0x00, 0x9d, 0x21, 0xbf, 0xec, 0x57, 0x62, 0x30, 0x58, 0x8c, 0x0d, 0x35, 0xdb, 0x5d, 0x8b, 0x6a, 0xa0, 0x5a}} , + {{0xc1, 0x58, 0x7c, 0x0d, 0x20, 0xdd, 0x11, 0x26, 0x5f, 0x89, 0x3b, 0x97, 0x58, 0xf8, 0x8b, 0xe3, 0xdf, 0x32, 0xe2, 0xfc, 0xd8, 0x67, 0xf2, 0xa5, 0x37, 0x1e, 0x6d, 0xec, 0x7c, 0x27, 0x20, 0x79}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xd0, 0xe9, 0xc0, 0xfa, 0x95, 0x45, 0x23, 0x96, 0xf1, 0x2c, 0x79, 0x25, 0x14, 0xce, 0x40, 0x14, 0x44, 0x2c, 0x36, 0x50, 0xd9, 0x63, 0x56, 0xb7, 0x56, 0x3b, 0x9e, 0xa7, 0xef, 0x89, 0xbb, 0x0e}} , + {{0xce, 0x7f, 0xdc, 0x0a, 0xcc, 0x82, 0x1c, 0x0a, 0x78, 0x71, 0xe8, 0x74, 0x8d, 0x01, 0x30, 0x0f, 0xa7, 0x11, 0x4c, 0xdf, 0x38, 0xd7, 0xa7, 0x0d, 0xf8, 0x48, 0x52, 0x00, 0x80, 0x7b, 0x5f, 0x0e}}}, +{{{0x25, 0x83, 0xe6, 0x94, 0x7b, 0x81, 0xb2, 0x91, 0xae, 0x0e, 0x05, 0xc9, 0xa3, 0x68, 0x2d, 0xd9, 0x88, 0x25, 0x19, 0x2a, 0x61, 0x61, 0x21, 0x97, 0x15, 0xa1, 0x35, 0xa5, 0x46, 0xc8, 0xa2, 0x0e}} , + {{0x1b, 0x03, 0x0d, 0x8b, 0x5a, 0x1b, 0x97, 0x4b, 0xf2, 0x16, 0x31, 0x3d, 0x1f, 0x33, 0xa0, 0x50, 0x3a, 0x18, 0xbe, 0x13, 0xa1, 0x76, 0xc1, 0xba, 0x1b, 0xf1, 0x05, 0x7b, 0x33, 0xa8, 0x82, 0x3b}}}, +{{{0xba, 0x36, 0x7b, 0x6d, 0xa9, 0xea, 0x14, 0x12, 0xc5, 0xfa, 0x91, 0x00, 0xba, 0x9b, 0x99, 0xcc, 0x56, 0x02, 0xe9, 0xa0, 0x26, 0x40, 0x66, 0x8c, 0xc4, 0xf8, 0x85, 0x33, 0x68, 0xe7, 0x03, 0x20}} , + {{0x50, 0x5b, 0xff, 0xa9, 0xb2, 0xf1, 0xf1, 0x78, 0xcf, 0x14, 0xa4, 0xa9, 0xfc, 0x09, 0x46, 0x94, 0x54, 0x65, 0x0d, 0x9c, 0x5f, 0x72, 0x21, 0xe2, 0x97, 0xa5, 0x2d, 0x81, 0xce, 0x4a, 0x5f, 0x79}}}, +{{{0x3d, 0x5f, 0x5c, 0xd2, 0xbc, 0x7d, 0x77, 0x0e, 0x2a, 0x6d, 0x22, 0x45, 0x84, 0x06, 0xc4, 0xdd, 0xc6, 0xa6, 0xc6, 0xd7, 0x49, 0xad, 0x6d, 0x87, 0x91, 0x0e, 0x3a, 0x67, 0x1d, 0x2c, 0x1d, 0x56}} , + {{0xfe, 0x7a, 0x74, 0xcf, 0xd4, 0xd2, 0xe5, 0x19, 0xde, 0xd0, 0xdb, 0x70, 0x23, 0x69, 0xe6, 0x6d, 0xec, 0xec, 0xcc, 0x09, 0x33, 0x6a, 0x77, 0xdc, 0x6b, 0x22, 0x76, 0x5d, 0x92, 0x09, 0xac, 0x2d}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x23, 0x15, 0x17, 0xeb, 0xd3, 0xdb, 0x12, 0x5e, 0x01, 0xf0, 0x91, 0xab, 0x2c, 0x41, 0xce, 0xac, 0xed, 0x1b, 0x4b, 0x2d, 0xbc, 0xdb, 0x17, 0x66, 0x89, 0x46, 0xad, 0x4b, 0x1e, 0x6f, 0x0b, 0x14}} , + {{0x11, 0xce, 0xbf, 0xb6, 0x77, 0x2d, 0x48, 0x22, 0x18, 0x4f, 0xa3, 0x5d, 0x4a, 0xb0, 0x70, 0x12, 0x3e, 0x54, 0xd7, 0xd8, 0x0e, 0x2b, 0x27, 0xdc, 0x53, 0xff, 0xca, 0x8c, 0x59, 0xb3, 0x4e, 0x44}}}, +{{{0x07, 0x76, 0x61, 0x0f, 0x66, 0xb2, 0x21, 0x39, 0x7e, 0xc0, 0xec, 0x45, 0x28, 0x82, 0xa1, 0x29, 0x32, 0x44, 0x35, 0x13, 0x5e, 0x61, 0x5e, 0x54, 0xcb, 0x7c, 0xef, 0xf6, 0x41, 0xcf, 0x9f, 0x0a}} , + {{0xdd, 0xf9, 0xda, 0x84, 0xc3, 0xe6, 0x8a, 0x9f, 0x24, 0xd2, 0x96, 0x5d, 0x39, 0x6f, 0x58, 0x8c, 0xc1, 0x56, 0x93, 0xab, 0xb5, 0x79, 0x3b, 0xd2, 0xa8, 0x73, 0x16, 0xed, 0xfa, 0xb4, 0x2f, 0x73}}}, +{{{0x8b, 0xb1, 0x95, 0xe5, 0x92, 0x50, 0x35, 0x11, 0x76, 0xac, 0xf4, 0x4d, 0x24, 0xc3, 0x32, 0xe6, 0xeb, 0xfe, 0x2c, 0x87, 0xc4, 0xf1, 0x56, 0xc4, 0x75, 0x24, 0x7a, 0x56, 0x85, 0x5a, 0x3a, 0x13}} , + {{0x0d, 0x16, 0xac, 0x3c, 0x4a, 0x58, 0x86, 0x3a, 0x46, 0x7f, 0x6c, 0xa3, 0x52, 0x6e, 0x37, 0xe4, 0x96, 0x9c, 0xe9, 0x5c, 0x66, 0x41, 0x67, 0xe4, 0xfb, 0x79, 0x0c, 0x05, 0xf6, 0x64, 0xd5, 0x7c}}}, +{{{0x28, 0xc1, 0xe1, 0x54, 0x73, 0xf2, 0xbf, 0x76, 0x74, 0x19, 0x19, 0x1b, 0xe4, 0xb9, 0xa8, 0x46, 0x65, 0x73, 0xf3, 0x77, 0x9b, 0x29, 0x74, 0x5b, 0xc6, 0x89, 0x6c, 0x2c, 0x7c, 0xf8, 0xb3, 0x0f}} , + {{0xf7, 0xd5, 0xe9, 0x74, 0x5d, 0xb8, 0x25, 0x16, 0xb5, 0x30, 0xbc, 0x84, 0xc5, 0xf0, 0xad, 0xca, 0x12, 0x28, 0xbc, 0x9d, 0xd4, 0xfa, 0x82, 0xe6, 0xe3, 0xbf, 0xa2, 0x15, 0x2c, 0xd4, 0x34, 0x10}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x61, 0xb1, 0x46, 0xba, 0x0e, 0x31, 0xa5, 0x67, 0x6c, 0x7f, 0xd6, 0xd9, 0x27, 0x85, 0x0f, 0x79, 0x14, 0xc8, 0x6c, 0x2f, 0x5f, 0x5b, 0x9c, 0x35, 0x3d, 0x38, 0x86, 0x77, 0x65, 0x55, 0x6a, 0x7b}} , + {{0xd3, 0xb0, 0x3a, 0x66, 0x60, 0x1b, 0x43, 0xf1, 0x26, 0x58, 0x99, 0x09, 0x8f, 0x2d, 0xa3, 0x14, 0x71, 0x85, 0xdb, 0xed, 0xf6, 0x26, 0xd5, 0x61, 0x9a, 0x73, 0xac, 0x0e, 0xea, 0xac, 0xb7, 0x0c}}}, +{{{0x5e, 0xf4, 0xe5, 0x17, 0x0e, 0x10, 0x9f, 0xe7, 0x43, 0x5f, 0x67, 0x5c, 0xac, 0x4b, 0xe5, 0x14, 0x41, 0xd2, 0xbf, 0x48, 0xf5, 0x14, 0xb0, 0x71, 0xc6, 0x61, 0xc1, 0xb2, 0x70, 0x58, 0xd2, 0x5a}} , + {{0x2d, 0xba, 0x16, 0x07, 0x92, 0x94, 0xdc, 0xbd, 0x50, 0x2b, 0xc9, 0x7f, 0x42, 0x00, 0xba, 0x61, 0xed, 0xf8, 0x43, 0xed, 0xf5, 0xf9, 0x40, 0x60, 0xb2, 0xb0, 0x82, 0xcb, 0xed, 0x75, 0xc7, 0x65}}}, +{{{0x80, 0xba, 0x0d, 0x09, 0x40, 0xa7, 0x39, 0xa6, 0x67, 0x34, 0x7e, 0x66, 0xbe, 0x56, 0xfb, 0x53, 0x78, 0xc4, 0x46, 0xe8, 0xed, 0x68, 0x6c, 0x7f, 0xce, 0xe8, 0x9f, 0xce, 0xa2, 0x64, 0x58, 0x53}} , + {{0xe8, 0xc1, 0xa9, 0xc2, 0x7b, 0x59, 0x21, 0x33, 0xe2, 0x43, 0x73, 0x2b, 0xac, 0x2d, 0xc1, 0x89, 0x3b, 0x15, 0xe2, 0xd5, 0xc0, 0x97, 0x8a, 0xfd, 0x6f, 0x36, 0x33, 0xb7, 0xb9, 0xc3, 0x88, 0x09}}}, +{{{0xd0, 0xb6, 0x56, 0x30, 0x5c, 0xae, 0xb3, 0x75, 0x44, 0xa4, 0x83, 0x51, 0x6e, 0x01, 0x65, 0xef, 0x45, 0x76, 0xe6, 0xf5, 0xa2, 0x0d, 0xd4, 0x16, 0x3b, 0x58, 0x2f, 0xf2, 0x2f, 0x36, 0x18, 0x3f}} , + {{0xfd, 0x2f, 0xe0, 0x9b, 0x1e, 0x8c, 0xc5, 0x18, 0xa9, 0xca, 0xd4, 0x2b, 0x35, 0xb6, 0x95, 0x0a, 0x9f, 0x7e, 0xfb, 0xc4, 0xef, 0x88, 0x7b, 0x23, 0x43, 0xec, 0x2f, 0x0d, 0x0f, 0x7a, 0xfc, 0x5c}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x8d, 0xd2, 0xda, 0xc7, 0x44, 0xd6, 0x7a, 0xdb, 0x26, 0x7d, 0x1d, 0xb8, 0xe1, 0xde, 0x9d, 0x7a, 0x7d, 0x17, 0x7e, 0x1c, 0x37, 0x04, 0x8d, 0x2d, 0x7c, 0x5e, 0x18, 0x38, 0x1e, 0xaf, 0xc7, 0x1b}} , + {{0x33, 0x48, 0x31, 0x00, 0x59, 0xf6, 0xf2, 0xca, 0x0f, 0x27, 0x1b, 0x63, 0x12, 0x7e, 0x02, 0x1d, 0x49, 0xc0, 0x5d, 0x79, 0x87, 0xef, 0x5e, 0x7a, 0x2f, 0x1f, 0x66, 0x55, 0xd8, 0x09, 0xd9, 0x61}}}, +{{{0x54, 0x83, 0x02, 0x18, 0x82, 0x93, 0x99, 0x07, 0xd0, 0xa7, 0xda, 0xd8, 0x75, 0x89, 0xfa, 0xf2, 0xd9, 0xa3, 0xb8, 0x6b, 0x5a, 0x35, 0x28, 0xd2, 0x6b, 0x59, 0xc2, 0xf8, 0x45, 0xe2, 0xbc, 0x06}} , + {{0x65, 0xc0, 0xa3, 0x88, 0x51, 0x95, 0xfc, 0x96, 0x94, 0x78, 0xe8, 0x0d, 0x8b, 0x41, 0xc9, 0xc2, 0x58, 0x48, 0x75, 0x10, 0x2f, 0xcd, 0x2a, 0xc9, 0xa0, 0x6d, 0x0f, 0xdd, 0x9c, 0x98, 0x26, 0x3d}}}, +{{{0x2f, 0x66, 0x29, 0x1b, 0x04, 0x89, 0xbd, 0x7e, 0xee, 0x6e, 0xdd, 0xb7, 0x0e, 0xef, 0xb0, 0x0c, 0xb4, 0xfc, 0x7f, 0xc2, 0xc9, 0x3a, 0x3c, 0x64, 0xef, 0x45, 0x44, 0xaf, 0x8a, 0x90, 0x65, 0x76}} , + {{0xa1, 0x4c, 0x70, 0x4b, 0x0e, 0xa0, 0x83, 0x70, 0x13, 0xa4, 0xaf, 0xb8, 0x38, 0x19, 0x22, 0x65, 0x09, 0xb4, 0x02, 0x4f, 0x06, 0xf8, 0x17, 0xce, 0x46, 0x45, 0xda, 0x50, 0x7c, 0x8a, 0xd1, 0x4e}}}, +{{{0xf7, 0xd4, 0x16, 0x6c, 0x4e, 0x95, 0x9d, 0x5d, 0x0f, 0x91, 0x2b, 0x52, 0xfe, 0x5c, 0x34, 0xe5, 0x30, 0xe6, 0xa4, 0x3b, 0xf3, 0xf3, 0x34, 0x08, 0xa9, 0x4a, 0xa0, 0xb5, 0x6e, 0xb3, 0x09, 0x0a}} , + {{0x26, 0xd9, 0x5e, 0xa3, 0x0f, 0xeb, 0xa2, 0xf3, 0x20, 0x3b, 0x37, 0xd4, 0xe4, 0x9e, 0xce, 0x06, 0x3d, 0x53, 0xed, 0xae, 0x2b, 0xeb, 0xb6, 0x24, 0x0a, 0x11, 0xa3, 0x0f, 0xd6, 0x7f, 0xa4, 0x3a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xdb, 0x9f, 0x2c, 0xfc, 0xd6, 0xb2, 0x1e, 0x2e, 0x52, 0x7a, 0x06, 0x87, 0x2d, 0x86, 0x72, 0x2b, 0x6d, 0x90, 0x77, 0x46, 0x43, 0xb5, 0x7a, 0xf8, 0x60, 0x7d, 0x91, 0x60, 0x5b, 0x9d, 0x9e, 0x07}} , + {{0x97, 0x87, 0xc7, 0x04, 0x1c, 0x38, 0x01, 0x39, 0x58, 0xc7, 0x85, 0xa3, 0xfc, 0x64, 0x00, 0x64, 0x25, 0xa2, 0xbf, 0x50, 0x94, 0xca, 0x26, 0x31, 0x45, 0x0a, 0x24, 0xd2, 0x51, 0x29, 0x51, 0x16}}}, +{{{0x4d, 0x4a, 0xd7, 0x98, 0x71, 0x57, 0xac, 0x7d, 0x8b, 0x37, 0xbd, 0x63, 0xff, 0x87, 0xb1, 0x49, 0x95, 0x20, 0x7c, 0xcf, 0x7c, 0x59, 0xc4, 0x91, 0x9c, 0xef, 0xd0, 0xdb, 0x60, 0x09, 0x9d, 0x46}} , + {{0xcb, 0x78, 0x94, 0x90, 0xe4, 0x45, 0xb3, 0xf6, 0xd9, 0xf6, 0x57, 0x74, 0xd5, 0xf8, 0x83, 0x4f, 0x39, 0xc9, 0xbd, 0x88, 0xc2, 0x57, 0x21, 0x1f, 0x24, 0x32, 0x68, 0xf8, 0xc7, 0x21, 0x5f, 0x0b}}}, +{{{0x2a, 0x36, 0x68, 0xfc, 0x5f, 0xb6, 0x4f, 0xa5, 0xe3, 0x9d, 0x24, 0x2f, 0xc0, 0x93, 0x61, 0xcf, 0xf8, 0x0a, 0xed, 0xe1, 0xdb, 0x27, 0xec, 0x0e, 0x14, 0x32, 0x5f, 0x8e, 0xa1, 0x62, 0x41, 0x16}} , + {{0x95, 0x21, 0x01, 0xce, 0x95, 0x5b, 0x0e, 0x57, 0xc7, 0xb9, 0x62, 0xb5, 0x28, 0xca, 0x11, 0xec, 0xb4, 0x46, 0x06, 0x73, 0x26, 0xff, 0xfb, 0x66, 0x7d, 0xee, 0x5f, 0xb2, 0x56, 0xfd, 0x2a, 0x08}}}, +{{{0x92, 0x67, 0x77, 0x56, 0xa1, 0xff, 0xc4, 0xc5, 0x95, 0xf0, 0xe3, 0x3a, 0x0a, 0xca, 0x94, 0x4d, 0x9e, 0x7e, 0x3d, 0xb9, 0x6e, 0xb6, 0xb0, 0xce, 0xa4, 0x30, 0x89, 0x99, 0xe9, 0xad, 0x11, 0x59}} , + {{0xf6, 0x48, 0x95, 0xa1, 0x6f, 0x5f, 0xb7, 0xa5, 0xbb, 0x30, 0x00, 0x1c, 0xd2, 0x8a, 0xd6, 0x25, 0x26, 0x1b, 0xb2, 0x0d, 0x37, 0x6a, 0x05, 0xf4, 0x9d, 0x3e, 0x17, 0x2a, 0x43, 0xd2, 0x3a, 0x06}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x32, 0x99, 0x93, 0xd1, 0x9a, 0x72, 0xf3, 0xa9, 0x16, 0xbd, 0xb4, 0x4c, 0xdd, 0xf9, 0xd4, 0xb2, 0x64, 0x9a, 0xd3, 0x05, 0xe4, 0xa3, 0x73, 0x1c, 0xcb, 0x7e, 0x57, 0x67, 0xff, 0x04, 0xb3, 0x10}} , + {{0xb9, 0x4b, 0xa4, 0xad, 0xd0, 0x6d, 0x61, 0x23, 0xb4, 0xaf, 0x34, 0xa9, 0xaa, 0x65, 0xec, 0xd9, 0x69, 0xe3, 0x85, 0xcd, 0xcc, 0xe7, 0xb0, 0x9b, 0x41, 0xc1, 0x1c, 0xf9, 0xa0, 0xfa, 0xb7, 0x13}}}, +{{{0x04, 0xfd, 0x88, 0x3c, 0x0c, 0xd0, 0x09, 0x52, 0x51, 0x4f, 0x06, 0x19, 0xcc, 0xc3, 0xbb, 0xde, 0x80, 0xc5, 0x33, 0xbc, 0xf9, 0xf3, 0x17, 0x36, 0xdd, 0xc6, 0xde, 0xe8, 0x9b, 0x5d, 0x79, 0x1b}} , + {{0x65, 0x0a, 0xbe, 0x51, 0x57, 0xad, 0x50, 0x79, 0x08, 0x71, 0x9b, 0x07, 0x95, 0x8f, 0xfb, 0xae, 0x4b, 0x38, 0xba, 0xcf, 0x53, 0x2a, 0x86, 0x1e, 0xc0, 0x50, 0x5c, 0x67, 0x1b, 0xf6, 0x87, 0x6c}}}, +{{{0x4f, 0x00, 0xb2, 0x66, 0x55, 0xed, 0x4a, 0xed, 0x8d, 0xe1, 0x66, 0x18, 0xb2, 0x14, 0x74, 0x8d, 0xfd, 0x1a, 0x36, 0x0f, 0x26, 0x5c, 0x8b, 0x89, 0xf3, 0xab, 0xf2, 0xf3, 0x24, 0x67, 0xfd, 0x70}} , + {{0xfd, 0x4e, 0x2a, 0xc1, 0x3a, 0xca, 0x8f, 0x00, 0xd8, 0xec, 0x74, 0x67, 0xef, 0x61, 0xe0, 0x28, 0xd0, 0x96, 0xf4, 0x48, 0xde, 0x81, 0xe3, 0xef, 0xdc, 0xaa, 0x7d, 0xf3, 0xb6, 0x55, 0xa6, 0x65}}}, +{{{0xeb, 0xcb, 0xc5, 0x70, 0x91, 0x31, 0x10, 0x93, 0x0d, 0xc8, 0xd0, 0xef, 0x62, 0xe8, 0x6f, 0x82, 0xe3, 0x69, 0x3d, 0x91, 0x7f, 0x31, 0xe1, 0x26, 0x35, 0x3c, 0x4a, 0x2f, 0xab, 0xc4, 0x9a, 0x5e}} , + {{0xab, 0x1b, 0xb5, 0xe5, 0x2b, 0xc3, 0x0e, 0x29, 0xb0, 0xd0, 0x73, 0xe6, 0x4f, 0x64, 0xf2, 0xbc, 0xe4, 0xe4, 0xe1, 0x9a, 0x52, 0x33, 0x2f, 0xbd, 0xcc, 0x03, 0xee, 0x8a, 0xfa, 0x00, 0x5f, 0x50}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xf6, 0xdb, 0x0d, 0x22, 0x3d, 0xb5, 0x14, 0x75, 0x31, 0xf0, 0x81, 0xe2, 0xb9, 0x37, 0xa2, 0xa9, 0x84, 0x11, 0x9a, 0x07, 0xb5, 0x53, 0x89, 0x78, 0xa9, 0x30, 0x27, 0xa1, 0xf1, 0x4e, 0x5c, 0x2e}} , + {{0x8b, 0x00, 0x54, 0xfb, 0x4d, 0xdc, 0xcb, 0x17, 0x35, 0x40, 0xff, 0xb7, 0x8c, 0xfe, 0x4a, 0xe4, 0x4e, 0x99, 0x4e, 0xa8, 0x74, 0x54, 0x5d, 0x5c, 0x96, 0xa3, 0x12, 0x55, 0x36, 0x31, 0x17, 0x5c}}}, +{{{0xce, 0x24, 0xef, 0x7b, 0x86, 0xf2, 0x0f, 0x77, 0xe8, 0x5c, 0x7d, 0x87, 0x38, 0x2d, 0xef, 0xaf, 0xf2, 0x8c, 0x72, 0x2e, 0xeb, 0xb6, 0x55, 0x4b, 0x6e, 0xf1, 0x4e, 0x8a, 0x0e, 0x9a, 0x6c, 0x4c}} , + {{0x25, 0xea, 0x86, 0xc2, 0xd1, 0x4f, 0xb7, 0x3e, 0xa8, 0x5c, 0x8d, 0x66, 0x81, 0x25, 0xed, 0xc5, 0x4c, 0x05, 0xb9, 0xd8, 0xd6, 0x70, 0xbe, 0x73, 0x82, 0xe8, 0xa1, 0xe5, 0x1e, 0x71, 0xd5, 0x26}}}, +{{{0x4e, 0x6d, 0xc3, 0xa7, 0x4f, 0x22, 0x45, 0x26, 0xa2, 0x7e, 0x16, 0xf7, 0xf7, 0x63, 0xdc, 0x86, 0x01, 0x2a, 0x71, 0x38, 0x5c, 0x33, 0xc3, 0xce, 0x30, 0xff, 0xf9, 0x2c, 0x91, 0x71, 0x8a, 0x72}} , + {{0x8c, 0x44, 0x09, 0x28, 0xd5, 0x23, 0xc9, 0x8f, 0xf3, 0x84, 0x45, 0xc6, 0x9a, 0x5e, 0xff, 0xd2, 0xc7, 0x57, 0x93, 0xa3, 0xc1, 0x69, 0xdd, 0x62, 0x0f, 0xda, 0x5c, 0x30, 0x59, 0x5d, 0xe9, 0x4c}}}, +{{{0x92, 0x7e, 0x50, 0x27, 0x72, 0xd7, 0x0c, 0xd6, 0x69, 0x96, 0x81, 0x35, 0x84, 0x94, 0x35, 0x8b, 0x6c, 0xaa, 0x62, 0x86, 0x6e, 0x1c, 0x15, 0xf3, 0x6c, 0xb3, 0xff, 0x65, 0x1b, 0xa2, 0x9b, 0x59}} , + {{0xe2, 0xa9, 0x65, 0x88, 0xc4, 0x50, 0xfa, 0xbb, 0x3b, 0x6e, 0x5f, 0x44, 0x01, 0xca, 0x97, 0xd4, 0xdd, 0xf6, 0xcd, 0x3f, 0x3f, 0xe5, 0x97, 0x67, 0x2b, 0x8c, 0x66, 0x0f, 0x35, 0x9b, 0xf5, 0x07}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xf1, 0x59, 0x27, 0xd8, 0xdb, 0x5a, 0x11, 0x5e, 0x82, 0xf3, 0x38, 0xff, 0x1c, 0xed, 0xfe, 0x3f, 0x64, 0x54, 0x3f, 0x7f, 0xd1, 0x81, 0xed, 0xef, 0x65, 0xc5, 0xcb, 0xfd, 0xe1, 0x80, 0xcd, 0x11}} , + {{0xe0, 0xdb, 0x22, 0x28, 0xe6, 0xff, 0x61, 0x9d, 0x41, 0x14, 0x2d, 0x3b, 0x26, 0x22, 0xdf, 0xf1, 0x34, 0x81, 0xe9, 0x45, 0xee, 0x0f, 0x98, 0x8b, 0xa6, 0x3f, 0xef, 0xf7, 0x43, 0x19, 0xf1, 0x43}}}, +{{{0xee, 0xf3, 0x00, 0xa1, 0x50, 0xde, 0xc0, 0xb6, 0x01, 0xe3, 0x8c, 0x3c, 0x4d, 0x31, 0xd2, 0xb0, 0x58, 0xcd, 0xed, 0x10, 0x4a, 0x7a, 0xef, 0x80, 0xa9, 0x19, 0x32, 0xf3, 0xd8, 0x33, 0x8c, 0x06}} , + {{0xcb, 0x7d, 0x4f, 0xff, 0x30, 0xd8, 0x12, 0x3b, 0x39, 0x1c, 0x06, 0xf9, 0x4c, 0x34, 0x35, 0x71, 0xb5, 0x16, 0x94, 0x67, 0xdf, 0xee, 0x11, 0xde, 0xa4, 0x1d, 0x88, 0x93, 0x35, 0xa9, 0x32, 0x10}}}, +{{{0xe9, 0xc3, 0xbc, 0x7b, 0x5c, 0xfc, 0xb2, 0xf9, 0xc9, 0x2f, 0xe5, 0xba, 0x3a, 0x0b, 0xab, 0x64, 0x38, 0x6f, 0x5b, 0x4b, 0x93, 0xda, 0x64, 0xec, 0x4d, 0x3d, 0xa0, 0xf5, 0xbb, 0xba, 0x47, 0x48}} , + {{0x60, 0xbc, 0x45, 0x1f, 0x23, 0xa2, 0x3b, 0x70, 0x76, 0xe6, 0x97, 0x99, 0x4f, 0x77, 0x54, 0x67, 0x30, 0x9a, 0xe7, 0x66, 0xd6, 0xcd, 0x2e, 0x51, 0x24, 0x2c, 0x42, 0x4a, 0x11, 0xfe, 0x6f, 0x7e}}}, +{{{0x87, 0xc0, 0xb1, 0xf0, 0xa3, 0x6f, 0x0c, 0x93, 0xa9, 0x0a, 0x72, 0xef, 0x5c, 0xbe, 0x65, 0x35, 0xa7, 0x6a, 0x4e, 0x2c, 0xbf, 0x21, 0x23, 0xe8, 0x2f, 0x97, 0xc7, 0x3e, 0xc8, 0x17, 0xac, 0x1e}} , + {{0x7b, 0xef, 0x21, 0xe5, 0x40, 0xcc, 0x1e, 0xdc, 0xd6, 0xbd, 0x97, 0x7a, 0x7c, 0x75, 0x86, 0x7a, 0x25, 0x5a, 0x6e, 0x7c, 0xe5, 0x51, 0x3c, 0x1b, 0x5b, 0x82, 0x9a, 0x07, 0x60, 0xa1, 0x19, 0x04}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x96, 0x88, 0xa6, 0xab, 0x8f, 0xe3, 0x3a, 0x49, 0xf8, 0xfe, 0x34, 0xe7, 0x6a, 0xb2, 0xfe, 0x40, 0x26, 0x74, 0x57, 0x4c, 0xf6, 0xd4, 0x99, 0xce, 0x5d, 0x7b, 0x2f, 0x67, 0xd6, 0x5a, 0xe4, 0x4e}} , + {{0x5c, 0x82, 0xb3, 0xbd, 0x55, 0x25, 0xf6, 0x6a, 0x93, 0xa4, 0x02, 0xc6, 0x7d, 0x5c, 0xb1, 0x2b, 0x5b, 0xff, 0xfb, 0x56, 0xf8, 0x01, 0x41, 0x90, 0xc6, 0xb6, 0xac, 0x4f, 0xfe, 0xa7, 0x41, 0x70}}}, +{{{0xdb, 0xfa, 0x9b, 0x2c, 0xd4, 0x23, 0x67, 0x2c, 0x8a, 0x63, 0x6c, 0x07, 0x26, 0x48, 0x4f, 0xc2, 0x03, 0xd2, 0x53, 0x20, 0x28, 0xed, 0x65, 0x71, 0x47, 0xa9, 0x16, 0x16, 0x12, 0xbc, 0x28, 0x33}} , + {{0x39, 0xc0, 0xfa, 0xfa, 0xcd, 0x33, 0x43, 0xc7, 0x97, 0x76, 0x9b, 0x93, 0x91, 0x72, 0xeb, 0xc5, 0x18, 0x67, 0x4c, 0x11, 0xf0, 0xf4, 0xe5, 0x73, 0xb2, 0x5c, 0x1b, 0xc2, 0x26, 0x3f, 0xbf, 0x2b}}}, +{{{0x86, 0xe6, 0x8c, 0x1d, 0xdf, 0xca, 0xfc, 0xd5, 0xf8, 0x3a, 0xc3, 0x44, 0x72, 0xe6, 0x78, 0x9d, 0x2b, 0x97, 0xf8, 0x28, 0x45, 0xb4, 0x20, 0xc9, 0x2a, 0x8c, 0x67, 0xaa, 0x11, 0xc5, 0x5b, 0x2f}} , + {{0x17, 0x0f, 0x86, 0x52, 0xd7, 0x9d, 0xc3, 0x44, 0x51, 0x76, 0x32, 0x65, 0xb4, 0x37, 0x81, 0x99, 0x46, 0x37, 0x62, 0xed, 0xcf, 0x64, 0x9d, 0x72, 0x40, 0x7a, 0x4c, 0x0b, 0x76, 0x2a, 0xfb, 0x56}}}, +{{{0x33, 0xa7, 0x90, 0x7c, 0xc3, 0x6f, 0x17, 0xa5, 0xa0, 0x67, 0x72, 0x17, 0xea, 0x7e, 0x63, 0x14, 0x83, 0xde, 0xc1, 0x71, 0x2d, 0x41, 0x32, 0x7a, 0xf3, 0xd1, 0x2b, 0xd8, 0x2a, 0xa6, 0x46, 0x36}} , + {{0xac, 0xcc, 0x6b, 0x7c, 0xf9, 0xb8, 0x8b, 0x08, 0x5c, 0xd0, 0x7d, 0x8f, 0x73, 0xea, 0x20, 0xda, 0x86, 0xca, 0x00, 0xc7, 0xad, 0x73, 0x4d, 0xe9, 0xe8, 0xa9, 0xda, 0x1f, 0x03, 0x06, 0xdd, 0x24}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x9c, 0xb2, 0x61, 0x0a, 0x98, 0x2a, 0xa5, 0xd7, 0xee, 0xa9, 0xac, 0x65, 0xcb, 0x0a, 0x1e, 0xe2, 0xbe, 0xdc, 0x85, 0x59, 0x0f, 0x9c, 0xa6, 0x57, 0x34, 0xa5, 0x87, 0xeb, 0x7b, 0x1e, 0x0c, 0x3c}} , + {{0x2f, 0xbd, 0x84, 0x63, 0x0d, 0xb5, 0xa0, 0xf0, 0x4b, 0x9e, 0x93, 0xc6, 0x34, 0x9a, 0x34, 0xff, 0x73, 0x19, 0x2f, 0x6e, 0x54, 0x45, 0x2c, 0x92, 0x31, 0x76, 0x34, 0xf1, 0xb2, 0x26, 0xe8, 0x74}}}, +{{{0x0a, 0x67, 0x90, 0x6d, 0x0c, 0x4c, 0xcc, 0xc0, 0xe6, 0xbd, 0xa7, 0x5e, 0x55, 0x8c, 0xcd, 0x58, 0x9b, 0x11, 0xa2, 0xbb, 0x4b, 0xb1, 0x43, 0x04, 0x3c, 0x55, 0xed, 0x23, 0xfe, 0xcd, 0xb1, 0x53}} , + {{0x05, 0xfb, 0x75, 0xf5, 0x01, 0xaf, 0x38, 0x72, 0x58, 0xfc, 0x04, 0x29, 0x34, 0x7a, 0x67, 0xa2, 0x08, 0x50, 0x6e, 0xd0, 0x2b, 0x73, 0xd5, 0xb8, 0xe4, 0x30, 0x96, 0xad, 0x45, 0xdf, 0xa6, 0x5c}}}, +{{{0x0d, 0x88, 0x1a, 0x90, 0x7e, 0xdc, 0xd8, 0xfe, 0xc1, 0x2f, 0x5d, 0x67, 0xee, 0x67, 0x2f, 0xed, 0x6f, 0x55, 0x43, 0x5f, 0x87, 0x14, 0x35, 0x42, 0xd3, 0x75, 0xae, 0xd5, 0xd3, 0x85, 0x1a, 0x76}} , + {{0x87, 0xc8, 0xa0, 0x6e, 0xe1, 0xb0, 0xad, 0x6a, 0x4a, 0x34, 0x71, 0xed, 0x7c, 0xd6, 0x44, 0x03, 0x65, 0x4a, 0x5c, 0x5c, 0x04, 0xf5, 0x24, 0x3f, 0xb0, 0x16, 0x5e, 0x8c, 0xb2, 0xd2, 0xc5, 0x20}}}, +{{{0x98, 0x83, 0xc2, 0x37, 0xa0, 0x41, 0xa8, 0x48, 0x5c, 0x5f, 0xbf, 0xc8, 0xfa, 0x24, 0xe0, 0x59, 0x2c, 0xbd, 0xf6, 0x81, 0x7e, 0x88, 0xe6, 0xca, 0x04, 0xd8, 0x5d, 0x60, 0xbb, 0x74, 0xa7, 0x0b}} , + {{0x21, 0x13, 0x91, 0xbf, 0x77, 0x7a, 0x33, 0xbc, 0xe9, 0x07, 0x39, 0x0a, 0xdd, 0x7d, 0x06, 0x10, 0x9a, 0xee, 0x47, 0x73, 0x1b, 0x15, 0x5a, 0xfb, 0xcd, 0x4d, 0xd0, 0xd2, 0x3a, 0x01, 0xba, 0x54}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x48, 0xd5, 0x39, 0x4a, 0x0b, 0x20, 0x6a, 0x43, 0xa0, 0x07, 0x82, 0x5e, 0x49, 0x7c, 0xc9, 0x47, 0xf1, 0x7c, 0x37, 0xb9, 0x23, 0xef, 0x6b, 0x46, 0x45, 0x8c, 0x45, 0x76, 0xdf, 0x14, 0x6b, 0x6e}} , + {{0x42, 0xc9, 0xca, 0x29, 0x4c, 0x76, 0x37, 0xda, 0x8a, 0x2d, 0x7c, 0x3a, 0x58, 0xf2, 0x03, 0xb4, 0xb5, 0xb9, 0x1a, 0x13, 0x2d, 0xde, 0x5f, 0x6b, 0x9d, 0xba, 0x52, 0xc9, 0x5d, 0xb3, 0xf3, 0x30}}}, +{{{0x4c, 0x6f, 0xfe, 0x6b, 0x0c, 0x62, 0xd7, 0x48, 0x71, 0xef, 0xb1, 0x85, 0x79, 0xc0, 0xed, 0x24, 0xb1, 0x08, 0x93, 0x76, 0x8e, 0xf7, 0x38, 0x8e, 0xeb, 0xfe, 0x80, 0x40, 0xaf, 0x90, 0x64, 0x49}} , + {{0x4a, 0x88, 0xda, 0xc1, 0x98, 0x44, 0x3c, 0x53, 0x4e, 0xdb, 0x4b, 0xb9, 0x12, 0x5f, 0xcd, 0x08, 0x04, 0xef, 0x75, 0xe7, 0xb1, 0x3a, 0xe5, 0x07, 0xfa, 0xca, 0x65, 0x7b, 0x72, 0x10, 0x64, 0x7f}}}, +{{{0x3d, 0x81, 0xf0, 0xeb, 0x16, 0xfd, 0x58, 0x33, 0x8d, 0x7c, 0x1a, 0xfb, 0x20, 0x2c, 0x8a, 0xee, 0x90, 0xbb, 0x33, 0x6d, 0x45, 0xe9, 0x8e, 0x99, 0x85, 0xe1, 0x08, 0x1f, 0xc5, 0xf1, 0xb5, 0x46}} , + {{0xe4, 0xe7, 0x43, 0x4b, 0xa0, 0x3f, 0x2b, 0x06, 0xba, 0x17, 0xae, 0x3d, 0xe6, 0xce, 0xbd, 0xb8, 0xed, 0x74, 0x11, 0x35, 0xec, 0x96, 0xfe, 0x31, 0xe3, 0x0e, 0x7a, 0x4e, 0xc9, 0x1d, 0xcb, 0x20}}}, +{{{0xe0, 0x67, 0xe9, 0x7b, 0xdb, 0x96, 0x5c, 0xb0, 0x32, 0xd0, 0x59, 0x31, 0x90, 0xdc, 0x92, 0x97, 0xac, 0x09, 0x38, 0x31, 0x0f, 0x7e, 0xd6, 0x5d, 0xd0, 0x06, 0xb6, 0x1f, 0xea, 0xf0, 0x5b, 0x07}} , + {{0x81, 0x9f, 0xc7, 0xde, 0x6b, 0x41, 0x22, 0x35, 0x14, 0x67, 0x77, 0x3e, 0x90, 0x81, 0xb0, 0xd9, 0x85, 0x4c, 0xca, 0x9b, 0x3f, 0x04, 0x59, 0xd6, 0xaa, 0x17, 0xc3, 0x88, 0x34, 0x37, 0xba, 0x43}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x4c, 0xb6, 0x69, 0xc8, 0x81, 0x95, 0x94, 0x33, 0x92, 0x34, 0xe9, 0x3c, 0x84, 0x0d, 0x3d, 0x5a, 0x37, 0x9c, 0x22, 0xa0, 0xaa, 0x65, 0xce, 0xb4, 0xc2, 0x2d, 0x66, 0x67, 0x02, 0xff, 0x74, 0x10}} , + {{0x22, 0xb0, 0xd5, 0xe6, 0xc7, 0xef, 0xb1, 0xa7, 0x13, 0xda, 0x60, 0xb4, 0x80, 0xc1, 0x42, 0x7d, 0x10, 0x70, 0x97, 0x04, 0x4d, 0xda, 0x23, 0x89, 0xc2, 0x0e, 0x68, 0xcb, 0xde, 0xe0, 0x9b, 0x29}}}, +{{{0x33, 0xfe, 0x42, 0x2a, 0x36, 0x2b, 0x2e, 0x36, 0x64, 0x5c, 0x8b, 0xcc, 0x81, 0x6a, 0x15, 0x08, 0xa1, 0x27, 0xe8, 0x57, 0xe5, 0x78, 0x8e, 0xf2, 0x58, 0x19, 0x12, 0x42, 0xae, 0xc4, 0x63, 0x3e}} , + {{0x78, 0x96, 0x9c, 0xa7, 0xca, 0x80, 0xae, 0x02, 0x85, 0xb1, 0x7c, 0x04, 0x5c, 0xc1, 0x5b, 0x26, 0xc1, 0xba, 0xed, 0xa5, 0x59, 0x70, 0x85, 0x8c, 0x8c, 0xe8, 0x87, 0xac, 0x6a, 0x28, 0x99, 0x35}}}, +{{{0x9f, 0x04, 0x08, 0x28, 0xbe, 0x87, 0xda, 0x80, 0x28, 0x38, 0xde, 0x9f, 0xcd, 0xe4, 0xe3, 0x62, 0xfb, 0x2e, 0x46, 0x8d, 0x01, 0xb3, 0x06, 0x51, 0xd4, 0x19, 0x3b, 0x11, 0xfa, 0xe2, 0xad, 0x1e}} , + {{0xa0, 0x20, 0x99, 0x69, 0x0a, 0xae, 0xa3, 0x70, 0x4e, 0x64, 0x80, 0xb7, 0x85, 0x9c, 0x87, 0x54, 0x43, 0x43, 0x55, 0x80, 0x6d, 0x8d, 0x7c, 0xa9, 0x64, 0xca, 0x6c, 0x2e, 0x21, 0xd8, 0xc8, 0x6c}}}, +{{{0x91, 0x4a, 0x07, 0xad, 0x08, 0x75, 0xc1, 0x4f, 0xa4, 0xb2, 0xc3, 0x6f, 0x46, 0x3e, 0xb1, 0xce, 0x52, 0xab, 0x67, 0x09, 0x54, 0x48, 0x6b, 0x6c, 0xd7, 0x1d, 0x71, 0x76, 0xcb, 0xff, 0xdd, 0x31}} , + {{0x36, 0x88, 0xfa, 0xfd, 0xf0, 0x36, 0x6f, 0x07, 0x74, 0x88, 0x50, 0xd0, 0x95, 0x38, 0x4a, 0x48, 0x2e, 0x07, 0x64, 0x97, 0x11, 0x76, 0x01, 0x1a, 0x27, 0x4d, 0x8e, 0x25, 0x9a, 0x9b, 0x1c, 0x22}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xbe, 0x57, 0xbd, 0x0e, 0x0f, 0xac, 0x5e, 0x76, 0xa3, 0x71, 0xad, 0x2b, 0x10, 0x45, 0x02, 0xec, 0x59, 0xd5, 0x5d, 0xa9, 0x44, 0xcc, 0x25, 0x4c, 0xb3, 0x3c, 0x5b, 0x69, 0x07, 0x55, 0x26, 0x6b}} , + {{0x30, 0x6b, 0xd4, 0xa7, 0x51, 0x29, 0xe3, 0xf9, 0x7a, 0x75, 0x2a, 0x82, 0x2f, 0xd6, 0x1d, 0x99, 0x2b, 0x80, 0xd5, 0x67, 0x1e, 0x15, 0x9d, 0xca, 0xfd, 0xeb, 0xac, 0x97, 0x35, 0x09, 0x7f, 0x3f}}}, +{{{0x35, 0x0d, 0x34, 0x0a, 0xb8, 0x67, 0x56, 0x29, 0x20, 0xf3, 0x19, 0x5f, 0xe2, 0x83, 0x42, 0x73, 0x53, 0xa8, 0xc5, 0x02, 0x19, 0x33, 0xb4, 0x64, 0xbd, 0xc3, 0x87, 0x8c, 0xd7, 0x76, 0xed, 0x25}} , + {{0x47, 0x39, 0x37, 0x76, 0x0d, 0x1d, 0x0c, 0xf5, 0x5a, 0x6d, 0x43, 0x88, 0x99, 0x15, 0xb4, 0x52, 0x0f, 0x2a, 0xb3, 0xb0, 0x3f, 0xa6, 0xb3, 0x26, 0xb3, 0xc7, 0x45, 0xf5, 0x92, 0x5f, 0x9b, 0x17}}}, +{{{0x9d, 0x23, 0xbd, 0x15, 0xfe, 0x52, 0x52, 0x15, 0x26, 0x79, 0x86, 0xba, 0x06, 0x56, 0x66, 0xbb, 0x8c, 0x2e, 0x10, 0x11, 0xd5, 0x4a, 0x18, 0x52, 0xda, 0x84, 0x44, 0xf0, 0x3e, 0xe9, 0x8c, 0x35}} , + {{0xad, 0xa0, 0x41, 0xec, 0xc8, 0x4d, 0xb9, 0xd2, 0x6e, 0x96, 0x4e, 0x5b, 0xc5, 0xc2, 0xa0, 0x1b, 0xcf, 0x0c, 0xbf, 0x17, 0x66, 0x57, 0xc1, 0x17, 0x90, 0x45, 0x71, 0xc2, 0xe1, 0x24, 0xeb, 0x27}}}, +{{{0x2c, 0xb9, 0x42, 0xa4, 0xaf, 0x3b, 0x42, 0x0e, 0xc2, 0x0f, 0xf2, 0xea, 0x83, 0xaf, 0x9a, 0x13, 0x17, 0xb0, 0xbd, 0x89, 0x17, 0xe3, 0x72, 0xcb, 0x0e, 0x76, 0x7e, 0x41, 0x63, 0x04, 0x88, 0x71}} , + {{0x75, 0x78, 0x38, 0x86, 0x57, 0xdd, 0x9f, 0xee, 0x54, 0x70, 0x65, 0xbf, 0xf1, 0x2c, 0xe0, 0x39, 0x0d, 0xe3, 0x89, 0xfd, 0x8e, 0x93, 0x4f, 0x43, 0xdc, 0xd5, 0x5b, 0xde, 0xf9, 0x98, 0xe5, 0x7b}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xe7, 0x3b, 0x65, 0x11, 0xdf, 0xb2, 0xf2, 0x63, 0x94, 0x12, 0x6f, 0x5c, 0x9e, 0x77, 0xc1, 0xb6, 0xd8, 0xab, 0x58, 0x7a, 0x1d, 0x95, 0x73, 0xdd, 0xe7, 0xe3, 0x6f, 0xf2, 0x03, 0x1d, 0xdb, 0x76}} , + {{0xae, 0x06, 0x4e, 0x2c, 0x52, 0x1b, 0xbc, 0x5a, 0x5a, 0xa5, 0xbe, 0x27, 0xbd, 0xeb, 0xe1, 0x14, 0x17, 0x68, 0x26, 0x07, 0x03, 0xd1, 0x18, 0x0b, 0xdf, 0xf1, 0x06, 0x5c, 0xa6, 0x1b, 0xb9, 0x24}}}, +{{{0xc5, 0x66, 0x80, 0x13, 0x0e, 0x48, 0x8c, 0x87, 0x31, 0x84, 0xb4, 0x60, 0xed, 0xc5, 0xec, 0xb6, 0xc5, 0x05, 0x33, 0x5f, 0x2f, 0x7d, 0x40, 0xb6, 0x32, 0x1d, 0x38, 0x74, 0x1b, 0xf1, 0x09, 0x3d}} , + {{0xd4, 0x69, 0x82, 0xbc, 0x8d, 0xf8, 0x34, 0x36, 0x75, 0x55, 0x18, 0x55, 0x58, 0x3c, 0x79, 0xaf, 0x26, 0x80, 0xab, 0x9b, 0x95, 0x00, 0xf1, 0xcb, 0xda, 0xc1, 0x9f, 0xf6, 0x2f, 0xa2, 0xf4, 0x45}}}, +{{{0x17, 0xbe, 0xeb, 0x85, 0xed, 0x9e, 0xcd, 0x56, 0xf5, 0x17, 0x45, 0x42, 0xb4, 0x1f, 0x44, 0x4c, 0x05, 0x74, 0x15, 0x47, 0x00, 0xc6, 0x6a, 0x3d, 0x24, 0x09, 0x0d, 0x58, 0xb1, 0x42, 0xd7, 0x04}} , + {{0x8d, 0xbd, 0xa3, 0xc4, 0x06, 0x9b, 0x1f, 0x90, 0x58, 0x60, 0x74, 0xb2, 0x00, 0x3b, 0x3c, 0xd2, 0xda, 0x82, 0xbb, 0x10, 0x90, 0x69, 0x92, 0xa9, 0xb4, 0x30, 0x81, 0xe3, 0x7c, 0xa8, 0x89, 0x45}}}, +{{{0x3f, 0xdc, 0x05, 0xcb, 0x41, 0x3c, 0xc8, 0x23, 0x04, 0x2c, 0x38, 0x99, 0xe3, 0x68, 0x55, 0xf9, 0xd3, 0x32, 0xc7, 0xbf, 0xfa, 0xd4, 0x1b, 0x5d, 0xde, 0xdc, 0x10, 0x42, 0xc0, 0x42, 0xd9, 0x75}} , + {{0x2d, 0xab, 0x35, 0x4e, 0x87, 0xc4, 0x65, 0x97, 0x67, 0x24, 0xa4, 0x47, 0xad, 0x3f, 0x8e, 0xf3, 0xcb, 0x31, 0x17, 0x77, 0xc5, 0xe2, 0xd7, 0x8f, 0x3c, 0xc1, 0xcd, 0x56, 0x48, 0xc1, 0x6c, 0x69}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x14, 0xae, 0x5f, 0x88, 0x7b, 0xa5, 0x90, 0xdf, 0x10, 0xb2, 0x8b, 0x5e, 0x24, 0x17, 0xc3, 0xa3, 0xd4, 0x0f, 0x92, 0x61, 0x1a, 0x19, 0x5a, 0xad, 0x76, 0xbd, 0xd8, 0x1c, 0xdd, 0xe0, 0x12, 0x6d}} , + {{0x8e, 0xbd, 0x70, 0x8f, 0x02, 0xa3, 0x24, 0x4d, 0x5a, 0x67, 0xc4, 0xda, 0xf7, 0x20, 0x0f, 0x81, 0x5b, 0x7a, 0x05, 0x24, 0x67, 0x83, 0x0b, 0x2a, 0x80, 0xe7, 0xfd, 0x74, 0x4b, 0x9e, 0x5c, 0x0d}}}, +{{{0x94, 0xd5, 0x5f, 0x1f, 0xa2, 0xfb, 0xeb, 0xe1, 0x07, 0x34, 0xf8, 0x20, 0xad, 0x81, 0x30, 0x06, 0x2d, 0xa1, 0x81, 0x95, 0x36, 0xcf, 0x11, 0x0b, 0xaf, 0xc1, 0x2b, 0x9a, 0x6c, 0x55, 0xc1, 0x16}} , + {{0x36, 0x4f, 0xf1, 0x5e, 0x74, 0x35, 0x13, 0x28, 0xd7, 0x11, 0xcf, 0xb8, 0xde, 0x93, 0xb3, 0x05, 0xb8, 0xb5, 0x73, 0xe9, 0xeb, 0xad, 0x19, 0x1e, 0x89, 0x0f, 0x8b, 0x15, 0xd5, 0x8c, 0xe3, 0x23}}}, +{{{0x33, 0x79, 0xe7, 0x18, 0xe6, 0x0f, 0x57, 0x93, 0x15, 0xa0, 0xa7, 0xaa, 0xc4, 0xbf, 0x4f, 0x30, 0x74, 0x95, 0x5e, 0x69, 0x4a, 0x5b, 0x45, 0xe4, 0x00, 0xeb, 0x23, 0x74, 0x4c, 0xdf, 0x6b, 0x45}} , + {{0x97, 0x29, 0x6c, 0xc4, 0x42, 0x0b, 0xdd, 0xc0, 0x29, 0x5c, 0x9b, 0x34, 0x97, 0xd0, 0xc7, 0x79, 0x80, 0x63, 0x74, 0xe4, 0x8e, 0x37, 0xb0, 0x2b, 0x7c, 0xe8, 0x68, 0x6c, 0xc3, 0x82, 0x97, 0x57}}}, +{{{0x22, 0xbe, 0x83, 0xb6, 0x4b, 0x80, 0x6b, 0x43, 0x24, 0x5e, 0xef, 0x99, 0x9b, 0xa8, 0xfc, 0x25, 0x8d, 0x3b, 0x03, 0x94, 0x2b, 0x3e, 0xe7, 0x95, 0x76, 0x9b, 0xcc, 0x15, 0xdb, 0x32, 0xe6, 0x66}} , + {{0x84, 0xf0, 0x4a, 0x13, 0xa6, 0xd6, 0xfa, 0x93, 0x46, 0x07, 0xf6, 0x7e, 0x5c, 0x6d, 0x5e, 0xf6, 0xa6, 0xe7, 0x48, 0xf0, 0x06, 0xea, 0xff, 0x90, 0xc1, 0xcc, 0x4c, 0x19, 0x9c, 0x3c, 0x4e, 0x53}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x2a, 0x50, 0xe3, 0x07, 0x15, 0x59, 0xf2, 0x8b, 0x81, 0xf2, 0xf3, 0xd3, 0x6c, 0x99, 0x8c, 0x70, 0x67, 0xec, 0xcc, 0xee, 0x9e, 0x59, 0x45, 0x59, 0x7d, 0x47, 0x75, 0x69, 0xf5, 0x24, 0x93, 0x5d}} , + {{0x6a, 0x4f, 0x1b, 0xbe, 0x6b, 0x30, 0xcf, 0x75, 0x46, 0xe3, 0x7b, 0x9d, 0xfc, 0xcd, 0xd8, 0x5c, 0x1f, 0xb4, 0xc8, 0xe2, 0x24, 0xec, 0x1a, 0x28, 0x05, 0x32, 0x57, 0xfd, 0x3c, 0x5a, 0x98, 0x10}}}, +{{{0xa3, 0xdb, 0xf7, 0x30, 0xd8, 0xc2, 0x9a, 0xe1, 0xd3, 0xce, 0x22, 0xe5, 0x80, 0x1e, 0xd9, 0xe4, 0x1f, 0xab, 0xc0, 0x71, 0x1a, 0x86, 0x0e, 0x27, 0x99, 0x5b, 0xfa, 0x76, 0x99, 0xb0, 0x08, 0x3c}} , + {{0x2a, 0x93, 0xd2, 0x85, 0x1b, 0x6a, 0x5d, 0xa6, 0xee, 0xd1, 0xd1, 0x33, 0xbd, 0x6a, 0x36, 0x73, 0x37, 0x3a, 0x44, 0xb4, 0xec, 0xa9, 0x7a, 0xde, 0x83, 0x40, 0xd7, 0xdf, 0x28, 0xba, 0xa2, 0x30}}}, +{{{0xd3, 0xb5, 0x6d, 0x05, 0x3f, 0x9f, 0xf3, 0x15, 0x8d, 0x7c, 0xca, 0xc9, 0xfc, 0x8a, 0x7c, 0x94, 0xb0, 0x63, 0x36, 0x9b, 0x78, 0xd1, 0x91, 0x1f, 0x93, 0xd8, 0x57, 0x43, 0xde, 0x76, 0xa3, 0x43}} , + {{0x9b, 0x35, 0xe2, 0xa9, 0x3d, 0x32, 0x1e, 0xbb, 0x16, 0x28, 0x70, 0xe9, 0x45, 0x2f, 0x8f, 0x70, 0x7f, 0x08, 0x7e, 0x53, 0xc4, 0x7a, 0xbf, 0xf7, 0xe1, 0xa4, 0x6a, 0xd8, 0xac, 0x64, 0x1b, 0x11}}}, +{{{0xb2, 0xeb, 0x47, 0x46, 0x18, 0x3e, 0x1f, 0x99, 0x0c, 0xcc, 0xf1, 0x2c, 0xe0, 0xe7, 0x8f, 0xe0, 0x01, 0x7e, 0x65, 0xb8, 0x0c, 0xd0, 0xfb, 0xc8, 0xb9, 0x90, 0x98, 0x33, 0x61, 0x3b, 0xd8, 0x27}} , + {{0xa0, 0xbe, 0x72, 0x3a, 0x50, 0x4b, 0x74, 0xab, 0x01, 0xc8, 0x93, 0xc5, 0xe4, 0xc7, 0x08, 0x6c, 0xb4, 0xca, 0xee, 0xeb, 0x8e, 0xd7, 0x4e, 0x26, 0xc6, 0x1d, 0xe2, 0x71, 0xaf, 0x89, 0xa0, 0x2a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x98, 0x0b, 0xe4, 0xde, 0xdb, 0xa8, 0xfa, 0x82, 0x74, 0x06, 0x52, 0x6d, 0x08, 0x52, 0x8a, 0xff, 0x62, 0xc5, 0x6a, 0x44, 0x0f, 0x51, 0x8c, 0x1f, 0x6e, 0xb6, 0xc6, 0x2c, 0x81, 0xd3, 0x76, 0x46}} , + {{0xf4, 0x29, 0x74, 0x2e, 0x80, 0xa7, 0x1a, 0x8f, 0xf6, 0xbd, 0xd6, 0x8e, 0xbf, 0xc1, 0x95, 0x2a, 0xeb, 0xa0, 0x7f, 0x45, 0xa0, 0x50, 0x14, 0x05, 0xb1, 0x57, 0x4c, 0x74, 0xb7, 0xe2, 0x89, 0x7d}}}, +{{{0x07, 0xee, 0xa7, 0xad, 0xb7, 0x09, 0x0b, 0x49, 0x4e, 0xbf, 0xca, 0xe5, 0x21, 0xe6, 0xe6, 0xaf, 0xd5, 0x67, 0xf3, 0xce, 0x7e, 0x7c, 0x93, 0x7b, 0x5a, 0x10, 0x12, 0x0e, 0x6c, 0x06, 0x11, 0x75}} , + {{0xd5, 0xfc, 0x86, 0xa3, 0x3b, 0xa3, 0x3e, 0x0a, 0xfb, 0x0b, 0xf7, 0x36, 0xb1, 0x5b, 0xda, 0x70, 0xb7, 0x00, 0xa7, 0xda, 0x88, 0x8f, 0x84, 0xa8, 0xbc, 0x1c, 0x39, 0xb8, 0x65, 0xf3, 0x4d, 0x60}}}, +{{{0x96, 0x9d, 0x31, 0xf4, 0xa2, 0xbe, 0x81, 0xb9, 0xa5, 0x59, 0x9e, 0xba, 0x07, 0xbe, 0x74, 0x58, 0xd8, 0xeb, 0xc5, 0x9f, 0x3d, 0xd1, 0xf4, 0xae, 0xce, 0x53, 0xdf, 0x4f, 0xc7, 0x2a, 0x89, 0x4d}} , + {{0x29, 0xd8, 0xf2, 0xaa, 0xe9, 0x0e, 0xf7, 0x2e, 0x5f, 0x9d, 0x8a, 0x5b, 0x09, 0xed, 0xc9, 0x24, 0x22, 0xf4, 0x0f, 0x25, 0x8f, 0x1c, 0x84, 0x6e, 0x34, 0x14, 0x6c, 0xea, 0xb3, 0x86, 0x5d, 0x04}}}, +{{{0x07, 0x98, 0x61, 0xe8, 0x6a, 0xd2, 0x81, 0x49, 0x25, 0xd5, 0x5b, 0x18, 0xc7, 0x35, 0x52, 0x51, 0xa4, 0x46, 0xad, 0x18, 0x0d, 0xc9, 0x5f, 0x18, 0x91, 0x3b, 0xb4, 0xc0, 0x60, 0x59, 0x8d, 0x66}} , + {{0x03, 0x1b, 0x79, 0x53, 0x6e, 0x24, 0xae, 0x57, 0xd9, 0x58, 0x09, 0x85, 0x48, 0xa2, 0xd3, 0xb5, 0xe2, 0x4d, 0x11, 0x82, 0xe6, 0x86, 0x3c, 0xe9, 0xb1, 0x00, 0x19, 0xc2, 0x57, 0xf7, 0x66, 0x7a}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x0f, 0xe3, 0x89, 0x03, 0xd7, 0x22, 0x95, 0x9f, 0xca, 0xb4, 0x8d, 0x9e, 0x6d, 0x97, 0xff, 0x8d, 0x21, 0x59, 0x07, 0xef, 0x03, 0x2d, 0x5e, 0xf8, 0x44, 0x46, 0xe7, 0x85, 0x80, 0xc5, 0x89, 0x50}} , + {{0x8b, 0xd8, 0x53, 0x86, 0x24, 0x86, 0x29, 0x52, 0x01, 0xfa, 0x20, 0xc3, 0x4e, 0x95, 0xcb, 0xad, 0x7b, 0x34, 0x94, 0x30, 0xb7, 0x7a, 0xfa, 0x96, 0x41, 0x60, 0x2b, 0xcb, 0x59, 0xb9, 0xca, 0x50}}}, +{{{0xc2, 0x5b, 0x9b, 0x78, 0x23, 0x1b, 0x3a, 0x88, 0x94, 0x5f, 0x0a, 0x9b, 0x98, 0x2b, 0x6e, 0x53, 0x11, 0xf6, 0xff, 0xc6, 0x7d, 0x42, 0xcc, 0x02, 0x80, 0x40, 0x0d, 0x1e, 0xfb, 0xaf, 0x61, 0x07}} , + {{0xb0, 0xe6, 0x2f, 0x81, 0x70, 0xa1, 0x2e, 0x39, 0x04, 0x7c, 0xc4, 0x2c, 0x87, 0x45, 0x4a, 0x5b, 0x69, 0x97, 0xac, 0x6d, 0x2c, 0x10, 0x42, 0x7c, 0x3b, 0x15, 0x70, 0x60, 0x0e, 0x11, 0x6d, 0x3a}}}, +{{{0x9b, 0x18, 0x80, 0x5e, 0xdb, 0x05, 0xbd, 0xc6, 0xb7, 0x3c, 0xc2, 0x40, 0x4d, 0x5d, 0xce, 0x97, 0x8a, 0x34, 0x15, 0xab, 0x28, 0x5d, 0x10, 0xf0, 0x37, 0x0c, 0xcc, 0x16, 0xfa, 0x1f, 0x33, 0x0d}} , + {{0x19, 0xf9, 0x35, 0xaa, 0x59, 0x1a, 0x0c, 0x5c, 0x06, 0xfc, 0x6a, 0x0b, 0x97, 0x53, 0x36, 0xfc, 0x2a, 0xa5, 0x5a, 0x9b, 0x30, 0xef, 0x23, 0xaf, 0x39, 0x5d, 0x9a, 0x6b, 0x75, 0x57, 0x48, 0x0b}}}, +{{{0x26, 0xdc, 0x76, 0x3b, 0xfc, 0xf9, 0x9c, 0x3f, 0x89, 0x0b, 0x62, 0x53, 0xaf, 0x83, 0x01, 0x2e, 0xbc, 0x6a, 0xc6, 0x03, 0x0d, 0x75, 0x2a, 0x0d, 0xe6, 0x94, 0x54, 0xcf, 0xb3, 0xe5, 0x96, 0x25}} , + {{0xfe, 0x82, 0xb1, 0x74, 0x31, 0x8a, 0xa7, 0x6f, 0x56, 0xbd, 0x8d, 0xf4, 0xe0, 0x94, 0x51, 0x59, 0xde, 0x2c, 0x5a, 0xf4, 0x84, 0x6b, 0x4a, 0x88, 0x93, 0xc0, 0x0c, 0x9a, 0xac, 0xa7, 0xa0, 0x68}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x25, 0x0d, 0xd6, 0xc7, 0x23, 0x47, 0x10, 0xad, 0xc7, 0x08, 0x5c, 0x87, 0x87, 0x93, 0x98, 0x18, 0xb8, 0xd3, 0x9c, 0xac, 0x5a, 0x3d, 0xc5, 0x75, 0xf8, 0x49, 0x32, 0x14, 0xcc, 0x51, 0x96, 0x24}} , + {{0x65, 0x9c, 0x5d, 0xf0, 0x37, 0x04, 0xf0, 0x34, 0x69, 0x2a, 0xf0, 0xa5, 0x64, 0xca, 0xde, 0x2b, 0x5b, 0x15, 0x10, 0xd2, 0xab, 0x06, 0xdd, 0xc4, 0xb0, 0xb6, 0x5b, 0xc1, 0x17, 0xdf, 0x8f, 0x02}}}, +{{{0xbd, 0x59, 0x3d, 0xbf, 0x5c, 0x31, 0x44, 0x2c, 0x32, 0x94, 0x04, 0x60, 0x84, 0x0f, 0xad, 0x00, 0xb6, 0x8f, 0xc9, 0x1d, 0xcc, 0x5c, 0xa2, 0x49, 0x0e, 0x50, 0x91, 0x08, 0x9a, 0x43, 0x55, 0x05}} , + {{0x5d, 0x93, 0x55, 0xdf, 0x9b, 0x12, 0x19, 0xec, 0x93, 0x85, 0x42, 0x9e, 0x66, 0x0f, 0x9d, 0xaf, 0x99, 0xaf, 0x26, 0x89, 0xbc, 0x61, 0xfd, 0xff, 0xce, 0x4b, 0xf4, 0x33, 0x95, 0xc9, 0x35, 0x58}}}, +{{{0x12, 0x55, 0xf9, 0xda, 0xcb, 0x44, 0xa7, 0xdc, 0x57, 0xe2, 0xf9, 0x9a, 0xe6, 0x07, 0x23, 0x60, 0x54, 0xa7, 0x39, 0xa5, 0x9b, 0x84, 0x56, 0x6e, 0xaa, 0x8b, 0x8f, 0xb0, 0x2c, 0x87, 0xaf, 0x67}} , + {{0x00, 0xa9, 0x4c, 0xb2, 0x12, 0xf8, 0x32, 0xa8, 0x7a, 0x00, 0x4b, 0x49, 0x32, 0xba, 0x1f, 0x5d, 0x44, 0x8e, 0x44, 0x7a, 0xdc, 0x11, 0xfb, 0x39, 0x08, 0x57, 0x87, 0xa5, 0x12, 0x42, 0x93, 0x0e}}}, +{{{0x17, 0xb4, 0xae, 0x72, 0x59, 0xd0, 0xaa, 0xa8, 0x16, 0x8b, 0x63, 0x11, 0xb3, 0x43, 0x04, 0xda, 0x0c, 0xa8, 0xb7, 0x68, 0xdd, 0x4e, 0x54, 0xe7, 0xaf, 0x5d, 0x5d, 0x05, 0x76, 0x36, 0xec, 0x0d}} , + {{0x6d, 0x7c, 0x82, 0x32, 0x38, 0x55, 0x57, 0x74, 0x5b, 0x7d, 0xc3, 0xc4, 0xfb, 0x06, 0x29, 0xf0, 0x13, 0x55, 0x54, 0xc6, 0xa7, 0xdc, 0x4c, 0x9f, 0x98, 0x49, 0x20, 0xa8, 0xc3, 0x8d, 0xfa, 0x48}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x87, 0x47, 0x9d, 0xe9, 0x25, 0xd5, 0xe3, 0x47, 0x78, 0xdf, 0x85, 0xa7, 0x85, 0x5e, 0x7a, 0x4c, 0x5f, 0x79, 0x1a, 0xf3, 0xa2, 0xb2, 0x28, 0xa0, 0x9c, 0xdd, 0x30, 0x40, 0xd4, 0x38, 0xbd, 0x28}} , + {{0xfc, 0xbb, 0xd5, 0x78, 0x6d, 0x1d, 0xd4, 0x99, 0xb4, 0xaa, 0x44, 0x44, 0x7a, 0x1b, 0xd8, 0xfe, 0xb4, 0x99, 0xb9, 0xcc, 0xe7, 0xc4, 0xd3, 0x3a, 0x73, 0x83, 0x41, 0x5c, 0x40, 0xd7, 0x2d, 0x55}}}, +{{{0x26, 0xe1, 0x7b, 0x5f, 0xe5, 0xdc, 0x3f, 0x7d, 0xa1, 0xa7, 0x26, 0x44, 0x22, 0x23, 0xc0, 0x8f, 0x7d, 0xf1, 0xb5, 0x11, 0x47, 0x7b, 0x19, 0xd4, 0x75, 0x6f, 0x1e, 0xa5, 0x27, 0xfe, 0xc8, 0x0e}} , + {{0xd3, 0x11, 0x3d, 0xab, 0xef, 0x2c, 0xed, 0xb1, 0x3d, 0x7c, 0x32, 0x81, 0x6b, 0xfe, 0xf8, 0x1c, 0x3c, 0x7b, 0xc0, 0x61, 0xdf, 0xb8, 0x75, 0x76, 0x7f, 0xaa, 0xd8, 0x93, 0xaf, 0x3d, 0xe8, 0x3d}}}, +{{{0xfd, 0x5b, 0x4e, 0x8d, 0xb6, 0x7e, 0x82, 0x9b, 0xef, 0xce, 0x04, 0x69, 0x51, 0x52, 0xff, 0xef, 0xa0, 0x52, 0xb5, 0x79, 0x17, 0x5e, 0x2f, 0xde, 0xd6, 0x3c, 0x2d, 0xa0, 0x43, 0xb4, 0x0b, 0x19}} , + {{0xc0, 0x61, 0x48, 0x48, 0x17, 0xf4, 0x9e, 0x18, 0x51, 0x2d, 0xea, 0x2f, 0xf2, 0xf2, 0xe0, 0xa3, 0x14, 0xb7, 0x8b, 0x3a, 0x30, 0xf5, 0x81, 0xc1, 0x5d, 0x71, 0x39, 0x62, 0x55, 0x1f, 0x60, 0x5a}}}, +{{{0xe5, 0x89, 0x8a, 0x76, 0x6c, 0xdb, 0x4d, 0x0a, 0x5b, 0x72, 0x9d, 0x59, 0x6e, 0x63, 0x63, 0x18, 0x7c, 0xe3, 0xfa, 0xe2, 0xdb, 0xa1, 0x8d, 0xf4, 0xa5, 0xd7, 0x16, 0xb2, 0xd0, 0xb3, 0x3f, 0x39}} , + {{0xce, 0x60, 0x09, 0x6c, 0xf5, 0x76, 0x17, 0x24, 0x80, 0x3a, 0x96, 0xc7, 0x94, 0x2e, 0xf7, 0x6b, 0xef, 0xb5, 0x05, 0x96, 0xef, 0xd3, 0x7b, 0x51, 0xda, 0x05, 0x44, 0x67, 0xbc, 0x07, 0x21, 0x4e}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xe9, 0x73, 0x6f, 0x21, 0xb9, 0xde, 0x22, 0x7d, 0xeb, 0x97, 0x31, 0x10, 0xa3, 0xea, 0xe1, 0xc6, 0x37, 0xeb, 0x8f, 0x43, 0x58, 0xde, 0x41, 0x64, 0x0e, 0x3e, 0x07, 0x99, 0x3d, 0xf1, 0xdf, 0x1e}} , + {{0xf8, 0xad, 0x43, 0xc2, 0x17, 0x06, 0xe2, 0xe4, 0xa9, 0x86, 0xcd, 0x18, 0xd7, 0x78, 0xc8, 0x74, 0x66, 0xd2, 0x09, 0x18, 0xa5, 0xf1, 0xca, 0xa6, 0x62, 0x92, 0xc1, 0xcb, 0x00, 0xeb, 0x42, 0x2e}}}, +{{{0x7b, 0x34, 0x24, 0x4c, 0xcf, 0x38, 0xe5, 0x6c, 0x0a, 0x01, 0x2c, 0x22, 0x0b, 0x24, 0x38, 0xad, 0x24, 0x7e, 0x19, 0xf0, 0x6c, 0xf9, 0x31, 0xf4, 0x35, 0x11, 0xf6, 0x46, 0x33, 0x3a, 0x23, 0x59}} , + {{0x20, 0x0b, 0xa1, 0x08, 0x19, 0xad, 0x39, 0x54, 0xea, 0x3e, 0x23, 0x09, 0xb6, 0xe2, 0xd2, 0xbc, 0x4d, 0xfc, 0x9c, 0xf0, 0x13, 0x16, 0x22, 0x3f, 0xb9, 0xd2, 0x11, 0x86, 0x90, 0x55, 0xce, 0x3c}}}, +{{{0xc4, 0x0b, 0x4b, 0x62, 0x99, 0x37, 0x84, 0x3f, 0x74, 0xa2, 0xf9, 0xce, 0xe2, 0x0b, 0x0f, 0x2a, 0x3d, 0xa3, 0xe3, 0xdb, 0x5a, 0x9d, 0x93, 0xcc, 0xa5, 0xef, 0x82, 0x91, 0x1d, 0xe6, 0x6c, 0x68}} , + {{0xa3, 0x64, 0x17, 0x9b, 0x8b, 0xc8, 0x3a, 0x61, 0xe6, 0x9d, 0xc6, 0xed, 0x7b, 0x03, 0x52, 0x26, 0x9d, 0x3a, 0xb3, 0x13, 0xcc, 0x8a, 0xfd, 0x2c, 0x1a, 0x1d, 0xed, 0x13, 0xd0, 0x55, 0x57, 0x0e}}}, +{{{0x1a, 0xea, 0xbf, 0xfd, 0x4a, 0x3c, 0x8e, 0xec, 0x29, 0x7e, 0x77, 0x77, 0x12, 0x99, 0xd7, 0x84, 0xf9, 0x55, 0x7f, 0xf1, 0x8b, 0xb4, 0xd2, 0x95, 0xa3, 0x8d, 0xf0, 0x8a, 0xa7, 0xeb, 0x82, 0x4b}} , + {{0x2c, 0x28, 0xf4, 0x3a, 0xf6, 0xde, 0x0a, 0xe0, 0x41, 0x44, 0x23, 0xf8, 0x3f, 0x03, 0x64, 0x9f, 0xc3, 0x55, 0x4c, 0xc6, 0xc1, 0x94, 0x1c, 0x24, 0x5d, 0x5f, 0x92, 0x45, 0x96, 0x57, 0x37, 0x14}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xc1, 0xcd, 0x90, 0x66, 0xb9, 0x76, 0xa0, 0x5b, 0xa5, 0x85, 0x75, 0x23, 0xf9, 0x89, 0xa5, 0x82, 0xb2, 0x6f, 0xb1, 0xeb, 0xc4, 0x69, 0x6f, 0x18, 0x5a, 0xed, 0x94, 0x3d, 0x9d, 0xd9, 0x2c, 0x1a}} , + {{0x35, 0xb0, 0xe6, 0x73, 0x06, 0xb7, 0x37, 0xe0, 0xf8, 0xb0, 0x22, 0xe8, 0xd2, 0xed, 0x0b, 0xef, 0xe6, 0xc6, 0x5a, 0x99, 0x9e, 0x1a, 0x9f, 0x04, 0x97, 0xe4, 0x4d, 0x0b, 0xbe, 0xba, 0x44, 0x40}}}, +{{{0xc1, 0x56, 0x96, 0x91, 0x5f, 0x1f, 0xbb, 0x54, 0x6f, 0x88, 0x89, 0x0a, 0xb2, 0xd6, 0x41, 0x42, 0x6a, 0x82, 0xee, 0x14, 0xaa, 0x76, 0x30, 0x65, 0x0f, 0x67, 0x39, 0xa6, 0x51, 0x7c, 0x49, 0x24}} , + {{0x35, 0xa3, 0x78, 0xd1, 0x11, 0x0f, 0x75, 0xd3, 0x70, 0x46, 0xdb, 0x20, 0x51, 0xcb, 0x92, 0x80, 0x54, 0x10, 0x74, 0x36, 0x86, 0xa9, 0xd7, 0xa3, 0x08, 0x78, 0xf1, 0x01, 0x29, 0xf8, 0x80, 0x3b}}}, +{{{0xdb, 0xa7, 0x9d, 0x9d, 0xbf, 0xa0, 0xcc, 0xed, 0x53, 0xa2, 0xa2, 0x19, 0x39, 0x48, 0x83, 0x19, 0x37, 0x58, 0xd1, 0x04, 0x28, 0x40, 0xf7, 0x8a, 0xc2, 0x08, 0xb7, 0xa5, 0x42, 0xcf, 0x53, 0x4c}} , + {{0xa7, 0xbb, 0xf6, 0x8e, 0xad, 0xdd, 0xf7, 0x90, 0xdd, 0x5f, 0x93, 0x89, 0xae, 0x04, 0x37, 0xe6, 0x9a, 0xb7, 0xe8, 0xc0, 0xdf, 0x16, 0x2a, 0xbf, 0xc4, 0x3a, 0x3c, 0x41, 0xd5, 0x89, 0x72, 0x5a}}}, +{{{0x1f, 0x96, 0xff, 0x34, 0x2c, 0x13, 0x21, 0xcb, 0x0a, 0x89, 0x85, 0xbe, 0xb3, 0x70, 0x9e, 0x1e, 0xde, 0x97, 0xaf, 0x96, 0x30, 0xf7, 0x48, 0x89, 0x40, 0x8d, 0x07, 0xf1, 0x25, 0xf0, 0x30, 0x58}} , + {{0x1e, 0xd4, 0x93, 0x57, 0xe2, 0x17, 0xe7, 0x9d, 0xab, 0x3c, 0x55, 0x03, 0x82, 0x2f, 0x2b, 0xdb, 0x56, 0x1e, 0x30, 0x2e, 0x24, 0x47, 0x6e, 0xe6, 0xff, 0x33, 0x24, 0x2c, 0x75, 0x51, 0xd4, 0x67}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0x2b, 0x06, 0xd9, 0xa1, 0x5d, 0xe1, 0xf4, 0xd1, 0x1e, 0x3c, 0x9a, 0xc6, 0x29, 0x2b, 0x13, 0x13, 0x78, 0xc0, 0xd8, 0x16, 0x17, 0x2d, 0x9e, 0xa9, 0xc9, 0x79, 0x57, 0xab, 0x24, 0x91, 0x92, 0x19}} , + {{0x69, 0xfb, 0xa1, 0x9c, 0xa6, 0x75, 0x49, 0x7d, 0x60, 0x73, 0x40, 0x42, 0xc4, 0x13, 0x0a, 0x95, 0x79, 0x1e, 0x04, 0x83, 0x94, 0x99, 0x9b, 0x1e, 0x0c, 0xe8, 0x1f, 0x54, 0xef, 0xcb, 0xc0, 0x52}}}, +{{{0x14, 0x89, 0x73, 0xa1, 0x37, 0x87, 0x6a, 0x7a, 0xcf, 0x1d, 0xd9, 0x2e, 0x1a, 0x67, 0xed, 0x74, 0xc0, 0xf0, 0x9c, 0x33, 0xdd, 0xdf, 0x08, 0xbf, 0x7b, 0xd1, 0x66, 0xda, 0xe6, 0xc9, 0x49, 0x08}} , + {{0xe9, 0xdd, 0x5e, 0x55, 0xb0, 0x0a, 0xde, 0x21, 0x4c, 0x5a, 0x2e, 0xd4, 0x80, 0x3a, 0x57, 0x92, 0x7a, 0xf1, 0xc4, 0x2c, 0x40, 0xaf, 0x2f, 0xc9, 0x92, 0x03, 0xe5, 0x5a, 0xbc, 0xdc, 0xf4, 0x09}}}, +{{{0xf3, 0xe1, 0x2b, 0x7c, 0x05, 0x86, 0x80, 0x93, 0x4a, 0xad, 0xb4, 0x8f, 0x7e, 0x99, 0x0c, 0xfd, 0xcd, 0xef, 0xd1, 0xff, 0x2c, 0x69, 0x34, 0x13, 0x41, 0x64, 0xcf, 0x3b, 0xd0, 0x90, 0x09, 0x1e}} , + {{0x9d, 0x45, 0xd6, 0x80, 0xe6, 0x45, 0xaa, 0xf4, 0x15, 0xaa, 0x5c, 0x34, 0x87, 0x99, 0xa2, 0x8c, 0x26, 0x84, 0x62, 0x7d, 0xb6, 0x29, 0xc0, 0x52, 0xea, 0xf5, 0x81, 0x18, 0x0f, 0x35, 0xa9, 0x0e}}}, +{{{0xe7, 0x20, 0x72, 0x7c, 0x6d, 0x94, 0x5f, 0x52, 0x44, 0x54, 0xe3, 0xf1, 0xb2, 0xb0, 0x36, 0x46, 0x0f, 0xae, 0x92, 0xe8, 0x70, 0x9d, 0x6e, 0x79, 0xb1, 0xad, 0x37, 0xa9, 0x5f, 0xc0, 0xde, 0x03}} , + {{0x15, 0x55, 0x37, 0xc6, 0x1c, 0x27, 0x1c, 0x6d, 0x14, 0x4f, 0xca, 0xa4, 0xc4, 0x88, 0x25, 0x46, 0x39, 0xfc, 0x5a, 0xe5, 0xfe, 0x29, 0x11, 0x69, 0xf5, 0x72, 0x84, 0x4d, 0x78, 0x9f, 0x94, 0x15}}}, +{{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}}, +{{{0xec, 0xd3, 0xff, 0x57, 0x0b, 0xb0, 0xb2, 0xdc, 0xf8, 0x4f, 0xe2, 0x12, 0xd5, 0x36, 0xbe, 0x6b, 0x09, 0x43, 0x6d, 0xa3, 0x4d, 0x90, 0x2d, 0xb8, 0x74, 0xe8, 0x71, 0x45, 0x19, 0x8b, 0x0c, 0x6a}} , + {{0xb8, 0x42, 0x1c, 0x03, 0xad, 0x2c, 0x03, 0x8e, 0xac, 0xd7, 0x98, 0x29, 0x13, 0xc6, 0x02, 0x29, 0xb5, 0xd4, 0xe7, 0xcf, 0xcc, 0x8b, 0x83, 0xec, 0x35, 0xc7, 0x9c, 0x74, 0xb7, 0xad, 0x85, 0x5f}}}, +{{{0x78, 0x84, 0xe1, 0x56, 0x45, 0x69, 0x68, 0x5a, 0x4f, 0xb8, 0xb1, 0x29, 0xff, 0x33, 0x03, 0x31, 0xb7, 0xcb, 0x96, 0x25, 0xe6, 0xe6, 0x41, 0x98, 0x1a, 0xbb, 0x03, 0x56, 0xf2, 0xb2, 0x91, 0x34}} , + {{0x2c, 0x6c, 0xf7, 0x66, 0xa4, 0x62, 0x6b, 0x39, 0xb3, 0xba, 0x65, 0xd3, 0x1c, 0xf8, 0x11, 0xaa, 0xbe, 0xdc, 0x80, 0x59, 0x87, 0xf5, 0x7b, 0xe5, 0xe3, 0xb3, 0x3e, 0x39, 0xda, 0xbe, 0x88, 0x09}}}, +{{{0x8b, 0xf1, 0xa0, 0xf5, 0xdc, 0x29, 0xb4, 0xe2, 0x07, 0xc6, 0x7a, 0x00, 0xd0, 0x89, 0x17, 0x51, 0xd4, 0xbb, 0xd4, 0x22, 0xea, 0x7e, 0x7d, 0x7c, 0x24, 0xea, 0xf2, 0xe8, 0x22, 0x12, 0x95, 0x06}} , + {{0xda, 0x7c, 0xa4, 0x0c, 0xf4, 0xba, 0x6e, 0xe1, 0x89, 0xb5, 0x59, 0xca, 0xf1, 0xc0, 0x29, 0x36, 0x09, 0x44, 0xe2, 0x7f, 0xd1, 0x63, 0x15, 0x99, 0xea, 0x25, 0xcf, 0x0c, 0x9d, 0xc0, 0x44, 0x6f}}}, +{{{0x1d, 0x86, 0x4e, 0xcf, 0xf7, 0x37, 0x10, 0x25, 0x8f, 0x12, 0xfb, 0x19, 0xfb, 0xe0, 0xed, 0x10, 0xc8, 0xe2, 0xf5, 0x75, 0xb1, 0x33, 0xc0, 0x96, 0x0d, 0xfb, 0x15, 0x6c, 0x0d, 0x07, 0x5f, 0x05}} , + {{0x69, 0x3e, 0x47, 0x97, 0x2c, 0xaf, 0x52, 0x7c, 0x78, 0x83, 0xad, 0x1b, 0x39, 0x82, 0x2f, 0x02, 0x6f, 0x47, 0xdb, 0x2a, 0xb0, 0xe1, 0x91, 0x99, 0x55, 0xb8, 0x99, 0x3a, 0xa0, 0x44, 0x11, 0x51}}} diff --git a/usr.bin/signify/mod_ed25519.c b/usr.bin/signify/mod_ed25519.c new file mode 100644 index 0000000..aac481a --- /dev/null +++ b/usr.bin/signify/mod_ed25519.c @@ -0,0 +1,143 @@ +/* $OpenBSD: mod_ed25519.c,v 1.1 2014/01/08 05:00:01 tedu Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/ed25519.c + */ + +#include "crypto_api.h" + +#include "ge25519.h" + +static void get_hram(unsigned char *hram, const unsigned char *sm, const unsigned char *pk, unsigned char *playground, unsigned long long smlen) +{ + unsigned long long i; + + for (i = 0;i < 32;++i) playground[i] = sm[i]; + for (i = 32;i < 64;++i) playground[i] = pk[i-32]; + for (i = 64;i < smlen;++i) playground[i] = sm[i]; + + crypto_hash_sha512(hram,playground,smlen); +} + +#ifndef VERIFYONLY +int crypto_sign_ed25519_keypair( + unsigned char *pk, + unsigned char *sk + ) +{ + sc25519 scsk; + ge25519 gepk; + unsigned char extsk[64]; + int i; + + randombytes(sk, 32); + crypto_hash_sha512(extsk, sk, 32); + extsk[0] &= 248; + extsk[31] &= 127; + extsk[31] |= 64; + + sc25519_from32bytes(&scsk,extsk); + + ge25519_scalarmult_base(&gepk, &scsk); + ge25519_pack(pk, &gepk); + for(i=0;i<32;i++) + sk[32 + i] = pk[i]; + return 0; +} + +int crypto_sign_ed25519( + unsigned char *sm,unsigned long long *smlen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *sk + ) +{ + sc25519 sck, scs, scsk; + ge25519 ger; + unsigned char r[32]; + unsigned char s[32]; + unsigned char extsk[64]; + unsigned long long i; + unsigned char hmg[crypto_hash_sha512_BYTES]; + unsigned char hram[crypto_hash_sha512_BYTES]; + + crypto_hash_sha512(extsk, sk, 32); + extsk[0] &= 248; + extsk[31] &= 127; + extsk[31] |= 64; + + *smlen = mlen+64; + for(i=0;i<mlen;i++) + sm[64 + i] = m[i]; + for(i=0;i<32;i++) + sm[32 + i] = extsk[32+i]; + + crypto_hash_sha512(hmg, sm+32, mlen+32); /* Generate k as h(extsk[32],...,extsk[63],m) */ + + /* Computation of R */ + sc25519_from64bytes(&sck, hmg); + ge25519_scalarmult_base(&ger, &sck); + ge25519_pack(r, &ger); + + /* Computation of s */ + for(i=0;i<32;i++) + sm[i] = r[i]; + + get_hram(hram, sm, sk+32, sm, mlen+64); + + sc25519_from64bytes(&scs, hram); + sc25519_from32bytes(&scsk, extsk); + sc25519_mul(&scs, &scs, &scsk); + + sc25519_add(&scs, &scs, &sck); + + sc25519_to32bytes(s,&scs); /* cat s */ + for(i=0;i<32;i++) + sm[32 + i] = s[i]; + + return 0; +} +#endif +int crypto_sign_ed25519_open( + unsigned char *m,unsigned long long *mlen, + const unsigned char *sm,unsigned long long smlen, + const unsigned char *pk + ) +{ + unsigned int i; + int ret; + unsigned char t2[32]; + ge25519 get1, get2; + sc25519 schram, scs; + unsigned char hram[crypto_hash_sha512_BYTES]; + + *mlen = (unsigned long long) -1; + if (smlen < 64) return -1; + + if (ge25519_unpackneg_vartime(&get1, pk)) return -1; + + get_hram(hram,sm,pk,m,smlen); + + sc25519_from64bytes(&schram, hram); + + sc25519_from32bytes(&scs, sm+32); + + ge25519_double_scalarmult_vartime(&get2, &get1, &schram, &ge25519_base, &scs); + ge25519_pack(t2, &get2); + + ret = crypto_verify_32(sm, t2); + + if (!ret) + { + for(i=0;i<smlen-64;i++) + m[i] = sm[i + 64]; + *mlen = smlen-64; + } + else + { + for(i=0;i<smlen-64;i++) + m[i] = 0; + } + return ret; +} diff --git a/usr.bin/signify/mod_ge25519.c b/usr.bin/signify/mod_ge25519.c new file mode 100644 index 0000000..16da322 --- /dev/null +++ b/usr.bin/signify/mod_ge25519.c @@ -0,0 +1,327 @@ +/* $OpenBSD: mod_ge25519.c,v 1.2 2014/01/08 05:51:35 deraadt Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/ge25519.c + */ + +#include "fe25519.h" +#include "sc25519.h" +#include "ge25519.h" + +/* + * Arithmetic on the twisted Edwards curve -x^2 + y^2 = 1 + dx^2y^2 + * with d = -(121665/121666) = 37095705934669439343138083508754565189542113879843219016388785533085940283555 + * Base point: (15112221349535400772501151409588531511454012693041857206046113283949847762202,46316835694926478169428394003475163141307993866256225615783033603165251855960); + */ + +/* d */ +static const fe25519 ge25519_ecd = {{0xA3, 0x78, 0x59, 0x13, 0xCA, 0x4D, 0xEB, 0x75, 0xAB, 0xD8, 0x41, 0x41, 0x4D, 0x0A, 0x70, 0x00, + 0x98, 0xE8, 0x79, 0x77, 0x79, 0x40, 0xC7, 0x8C, 0x73, 0xFE, 0x6F, 0x2B, 0xEE, 0x6C, 0x03, 0x52}}; +/* 2*d */ +static const fe25519 ge25519_ec2d = {{0x59, 0xF1, 0xB2, 0x26, 0x94, 0x9B, 0xD6, 0xEB, 0x56, 0xB1, 0x83, 0x82, 0x9A, 0x14, 0xE0, 0x00, + 0x30, 0xD1, 0xF3, 0xEE, 0xF2, 0x80, 0x8E, 0x19, 0xE7, 0xFC, 0xDF, 0x56, 0xDC, 0xD9, 0x06, 0x24}}; +/* sqrt(-1) */ +static const fe25519 ge25519_sqrtm1 = {{0xB0, 0xA0, 0x0E, 0x4A, 0x27, 0x1B, 0xEE, 0xC4, 0x78, 0xE4, 0x2F, 0xAD, 0x06, 0x18, 0x43, 0x2F, + 0xA7, 0xD7, 0xFB, 0x3D, 0x99, 0x00, 0x4D, 0x2B, 0x0B, 0xDF, 0xC1, 0x4F, 0x80, 0x24, 0x83, 0x2B}}; + +#define ge25519_p3 ge25519 + +typedef struct +{ + fe25519 x; + fe25519 z; + fe25519 y; + fe25519 t; +} ge25519_p1p1; + +typedef struct +{ + fe25519 x; + fe25519 y; + fe25519 z; +} ge25519_p2; + +typedef struct +{ + fe25519 x; + fe25519 y; +} ge25519_aff; + + +/* Packed coordinates of the base point */ +const ge25519 ge25519_base = {{{0x1A, 0xD5, 0x25, 0x8F, 0x60, 0x2D, 0x56, 0xC9, 0xB2, 0xA7, 0x25, 0x95, 0x60, 0xC7, 0x2C, 0x69, + 0x5C, 0xDC, 0xD6, 0xFD, 0x31, 0xE2, 0xA4, 0xC0, 0xFE, 0x53, 0x6E, 0xCD, 0xD3, 0x36, 0x69, 0x21}}, + {{0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66}}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, + {{0xA3, 0xDD, 0xB7, 0xA5, 0xB3, 0x8A, 0xDE, 0x6D, 0xF5, 0x52, 0x51, 0x77, 0x80, 0x9F, 0xF0, 0x20, + 0x7D, 0xE3, 0xAB, 0x64, 0x8E, 0x4E, 0xEA, 0x66, 0x65, 0x76, 0x8B, 0xD7, 0x0F, 0x5F, 0x87, 0x67}}}; + +#ifndef VERIFYONLY +/* Multiples of the base point in affine representation */ +static const ge25519_aff ge25519_base_multiples_affine[425] = { +#include "ge25519_base.data" +}; +#endif + +static void p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p) +{ + fe25519_mul(&r->x, &p->x, &p->t); + fe25519_mul(&r->y, &p->y, &p->z); + fe25519_mul(&r->z, &p->z, &p->t); +} + +static void p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p) +{ + p1p1_to_p2((ge25519_p2 *)r, p); + fe25519_mul(&r->t, &p->x, &p->y); +} + +#ifndef VERIFYONLY +static void ge25519_mixadd2(ge25519_p3 *r, const ge25519_aff *q) +{ + fe25519 a,b,t1,t2,c,d,e,f,g,h,qt; + fe25519_mul(&qt, &q->x, &q->y); + fe25519_sub(&a, &r->y, &r->x); /* A = (Y1-X1)*(Y2-X2) */ + fe25519_add(&b, &r->y, &r->x); /* B = (Y1+X1)*(Y2+X2) */ + fe25519_sub(&t1, &q->y, &q->x); + fe25519_add(&t2, &q->y, &q->x); + fe25519_mul(&a, &a, &t1); + fe25519_mul(&b, &b, &t2); + fe25519_sub(&e, &b, &a); /* E = B-A */ + fe25519_add(&h, &b, &a); /* H = B+A */ + fe25519_mul(&c, &r->t, &qt); /* C = T1*k*T2 */ + fe25519_mul(&c, &c, &ge25519_ec2d); + fe25519_add(&d, &r->z, &r->z); /* D = Z1*2 */ + fe25519_sub(&f, &d, &c); /* F = D-C */ + fe25519_add(&g, &d, &c); /* G = D+C */ + fe25519_mul(&r->x, &e, &f); + fe25519_mul(&r->y, &h, &g); + fe25519_mul(&r->z, &g, &f); + fe25519_mul(&r->t, &e, &h); +} +#endif + +static void add_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_p3 *q) +{ + fe25519 a, b, c, d, t; + + fe25519_sub(&a, &p->y, &p->x); /* A = (Y1-X1)*(Y2-X2) */ + fe25519_sub(&t, &q->y, &q->x); + fe25519_mul(&a, &a, &t); + fe25519_add(&b, &p->x, &p->y); /* B = (Y1+X1)*(Y2+X2) */ + fe25519_add(&t, &q->x, &q->y); + fe25519_mul(&b, &b, &t); + fe25519_mul(&c, &p->t, &q->t); /* C = T1*k*T2 */ + fe25519_mul(&c, &c, &ge25519_ec2d); + fe25519_mul(&d, &p->z, &q->z); /* D = Z1*2*Z2 */ + fe25519_add(&d, &d, &d); + fe25519_sub(&r->x, &b, &a); /* E = B-A */ + fe25519_sub(&r->t, &d, &c); /* F = D-C */ + fe25519_add(&r->z, &d, &c); /* G = D+C */ + fe25519_add(&r->y, &b, &a); /* H = B+A */ +} + +/* See http://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd */ +static void dbl_p1p1(ge25519_p1p1 *r, const ge25519_p2 *p) +{ + fe25519 a,b,c,d; + fe25519_square(&a, &p->x); + fe25519_square(&b, &p->y); + fe25519_square(&c, &p->z); + fe25519_add(&c, &c, &c); + fe25519_neg(&d, &a); + + fe25519_add(&r->x, &p->x, &p->y); + fe25519_square(&r->x, &r->x); + fe25519_sub(&r->x, &r->x, &a); + fe25519_sub(&r->x, &r->x, &b); + fe25519_add(&r->z, &d, &b); + fe25519_sub(&r->t, &r->z, &c); + fe25519_sub(&r->y, &d, &b); +} + +#ifndef VERIFYONLY +/* Constant-time version of: if(b) r = p */ +static void cmov_aff(ge25519_aff *r, const ge25519_aff *p, unsigned char b) +{ + fe25519_cmov(&r->x, &p->x, b); + fe25519_cmov(&r->y, &p->y, b); +} + +static unsigned char equal(signed char b,signed char c) +{ + unsigned char ub = b; + unsigned char uc = c; + unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */ + crypto_uint32 y = x; /* 0: yes; 1..255: no */ + y -= 1; /* 4294967295: yes; 0..254: no */ + y >>= 31; /* 1: yes; 0: no */ + return y; +} + +static unsigned char negative(signed char b) +{ + unsigned long long x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */ + x >>= 63; /* 1: yes; 0: no */ + return x; +} + +static void choose_t(ge25519_aff *t, unsigned long long pos, signed char b) +{ + /* constant time */ + fe25519 v; + *t = ge25519_base_multiples_affine[5*pos+0]; + cmov_aff(t, &ge25519_base_multiples_affine[5*pos+1],equal(b,1) | equal(b,-1)); + cmov_aff(t, &ge25519_base_multiples_affine[5*pos+2],equal(b,2) | equal(b,-2)); + cmov_aff(t, &ge25519_base_multiples_affine[5*pos+3],equal(b,3) | equal(b,-3)); + cmov_aff(t, &ge25519_base_multiples_affine[5*pos+4],equal(b,-4)); + fe25519_neg(&v, &t->x); + fe25519_cmov(&t->x, &v, negative(b)); +} +#endif + +static void setneutral(ge25519 *r) +{ + fe25519_setzero(&r->x); + fe25519_setone(&r->y); + fe25519_setone(&r->z); + fe25519_setzero(&r->t); +} + +/* ******************************************************************** + * EXPORTED FUNCTIONS + ******************************************************************** */ + +/* return 0 on success, -1 otherwise */ +int ge25519_unpackneg_vartime(ge25519_p3 *r, const unsigned char p[32]) +{ + unsigned char par; + fe25519 t, chk, num, den, den2, den4, den6; + fe25519_setone(&r->z); + par = p[31] >> 7; + fe25519_unpack(&r->y, p); + fe25519_square(&num, &r->y); /* x = y^2 */ + fe25519_mul(&den, &num, &ge25519_ecd); /* den = dy^2 */ + fe25519_sub(&num, &num, &r->z); /* x = y^2-1 */ + fe25519_add(&den, &r->z, &den); /* den = dy^2+1 */ + + /* Computation of sqrt(num/den) */ + /* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */ + fe25519_square(&den2, &den); + fe25519_square(&den4, &den2); + fe25519_mul(&den6, &den4, &den2); + fe25519_mul(&t, &den6, &num); + fe25519_mul(&t, &t, &den); + + fe25519_pow2523(&t, &t); + /* 2. computation of r->x = t * num * den^3 */ + fe25519_mul(&t, &t, &num); + fe25519_mul(&t, &t, &den); + fe25519_mul(&t, &t, &den); + fe25519_mul(&r->x, &t, &den); + + /* 3. Check whether sqrt computation gave correct result, multiply by sqrt(-1) if not: */ + fe25519_square(&chk, &r->x); + fe25519_mul(&chk, &chk, &den); + if (!fe25519_iseq_vartime(&chk, &num)) + fe25519_mul(&r->x, &r->x, &ge25519_sqrtm1); + + /* 4. Now we have one of the two square roots, except if input was not a square */ + fe25519_square(&chk, &r->x); + fe25519_mul(&chk, &chk, &den); + if (!fe25519_iseq_vartime(&chk, &num)) + return -1; + + /* 5. Choose the desired square root according to parity: */ + if(fe25519_getparity(&r->x) != (1-par)) + fe25519_neg(&r->x, &r->x); + + fe25519_mul(&r->t, &r->x, &r->y); + return 0; +} + +void ge25519_pack(unsigned char r[32], const ge25519_p3 *p) +{ + fe25519 tx, ty, zi; + fe25519_invert(&zi, &p->z); + fe25519_mul(&tx, &p->x, &zi); + fe25519_mul(&ty, &p->y, &zi); + fe25519_pack(r, &ty); + r[31] ^= fe25519_getparity(&tx) << 7; +} + +int ge25519_isneutral_vartime(const ge25519_p3 *p) +{ + int ret = 1; + if(!fe25519_iszero(&p->x)) ret = 0; + if(!fe25519_iseq_vartime(&p->y, &p->z)) ret = 0; + return ret; +} + +/* computes [s1]p1 + [s2]p2 */ +void ge25519_double_scalarmult_vartime(ge25519_p3 *r, const ge25519_p3 *p1, const sc25519 *s1, const ge25519_p3 *p2, const sc25519 *s2) +{ + ge25519_p1p1 tp1p1; + ge25519_p3 pre[16]; + unsigned char b[127]; + int i; + + /* precomputation s2 s1 */ + setneutral(pre); /* 00 00 */ + pre[1] = *p1; /* 00 01 */ + dbl_p1p1(&tp1p1,(ge25519_p2 *)p1); p1p1_to_p3( &pre[2], &tp1p1); /* 00 10 */ + add_p1p1(&tp1p1,&pre[1], &pre[2]); p1p1_to_p3( &pre[3], &tp1p1); /* 00 11 */ + pre[4] = *p2; /* 01 00 */ + add_p1p1(&tp1p1,&pre[1], &pre[4]); p1p1_to_p3( &pre[5], &tp1p1); /* 01 01 */ + add_p1p1(&tp1p1,&pre[2], &pre[4]); p1p1_to_p3( &pre[6], &tp1p1); /* 01 10 */ + add_p1p1(&tp1p1,&pre[3], &pre[4]); p1p1_to_p3( &pre[7], &tp1p1); /* 01 11 */ + dbl_p1p1(&tp1p1,(ge25519_p2 *)p2); p1p1_to_p3( &pre[8], &tp1p1); /* 10 00 */ + add_p1p1(&tp1p1,&pre[1], &pre[8]); p1p1_to_p3( &pre[9], &tp1p1); /* 10 01 */ + dbl_p1p1(&tp1p1,(ge25519_p2 *)&pre[5]); p1p1_to_p3(&pre[10], &tp1p1); /* 10 10 */ + add_p1p1(&tp1p1,&pre[3], &pre[8]); p1p1_to_p3(&pre[11], &tp1p1); /* 10 11 */ + add_p1p1(&tp1p1,&pre[4], &pre[8]); p1p1_to_p3(&pre[12], &tp1p1); /* 11 00 */ + add_p1p1(&tp1p1,&pre[1],&pre[12]); p1p1_to_p3(&pre[13], &tp1p1); /* 11 01 */ + add_p1p1(&tp1p1,&pre[2],&pre[12]); p1p1_to_p3(&pre[14], &tp1p1); /* 11 10 */ + add_p1p1(&tp1p1,&pre[3],&pre[12]); p1p1_to_p3(&pre[15], &tp1p1); /* 11 11 */ + + sc25519_2interleave2(b,s1,s2); + + /* scalar multiplication */ + *r = pre[b[126]]; + for(i=125;i>=0;i--) + { + dbl_p1p1(&tp1p1, (ge25519_p2 *)r); + p1p1_to_p2((ge25519_p2 *) r, &tp1p1); + dbl_p1p1(&tp1p1, (ge25519_p2 *)r); + if(b[i]!=0) + { + p1p1_to_p3(r, &tp1p1); + add_p1p1(&tp1p1, r, &pre[b[i]]); + } + if(i != 0) p1p1_to_p2((ge25519_p2 *)r, &tp1p1); + else p1p1_to_p3(r, &tp1p1); + } +} + +#ifndef VERIFYONLY +void ge25519_scalarmult_base(ge25519_p3 *r, const sc25519 *s) +{ + signed char b[85]; + int i; + ge25519_aff t; + sc25519_window3(b,s); + + choose_t((ge25519_aff *)r, 0, b[0]); + fe25519_setone(&r->z); + fe25519_mul(&r->t, &r->x, &r->y); + for(i=1;i<85;i++) + { + choose_t(&t, (unsigned long long) i, b[i]); + ge25519_mixadd2(r, &t); + } +} +#endif diff --git a/usr.bin/signify/sc25519.c b/usr.bin/signify/sc25519.c new file mode 100644 index 0000000..0e324eb --- /dev/null +++ b/usr.bin/signify/sc25519.c @@ -0,0 +1,306 @@ +/* $OpenBSD: sc25519.c,v 1.1 2014/07/22 00:41:19 deraadt Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/sc25519.c + */ + +#include "sc25519.h" + +/*Arithmetic modulo the group order m = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989 */ + +static const crypto_uint32 m[32] = {0xED, 0xD3, 0xF5, 0x5C, 0x1A, 0x63, 0x12, 0x58, 0xD6, 0x9C, 0xF7, 0xA2, 0xDE, 0xF9, 0xDE, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; + +static const crypto_uint32 mu[33] = {0x1B, 0x13, 0x2C, 0x0A, 0xA3, 0xE5, 0x9C, 0xED, 0xA7, 0x29, 0x63, 0x08, 0x5D, 0x21, 0x06, 0x21, + 0xEB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F}; + +static crypto_uint32 lt(crypto_uint32 a,crypto_uint32 b) /* 16-bit inputs */ +{ + unsigned int x = a; + x -= (unsigned int) b; /* 0..65535: no; 4294901761..4294967295: yes */ + x >>= 31; /* 0: no; 1: yes */ + return x; +} + +/* Reduce coefficients of r before calling reduce_add_sub */ +static void reduce_add_sub(sc25519 *r) +{ + crypto_uint32 pb = 0; + crypto_uint32 b; + crypto_uint32 mask; + int i; + unsigned char t[32]; + + for(i=0;i<32;i++) + { + pb += m[i]; + b = lt(r->v[i],pb); + t[i] = r->v[i]-pb+(b<<8); + pb = b; + } + mask = b - 1; + for(i=0;i<32;i++) + r->v[i] ^= mask & (r->v[i] ^ t[i]); +} + +/* Reduce coefficients of x before calling barrett_reduce */ +static void barrett_reduce(sc25519 *r, const crypto_uint32 x[64]) +{ + /* See HAC, Alg. 14.42 */ + int i,j; + crypto_uint32 q2[66]; + crypto_uint32 *q3 = q2 + 33; + crypto_uint32 r1[33]; + crypto_uint32 r2[33]; + crypto_uint32 carry; + crypto_uint32 pb = 0; + crypto_uint32 b; + + for (i = 0;i < 66;++i) q2[i] = 0; + for (i = 0;i < 33;++i) r2[i] = 0; + + for(i=0;i<33;i++) + for(j=0;j<33;j++) + if(i+j >= 31) q2[i+j] += mu[i]*x[j+31]; + carry = q2[31] >> 8; + q2[32] += carry; + carry = q2[32] >> 8; + q2[33] += carry; + + for(i=0;i<33;i++)r1[i] = x[i]; + for(i=0;i<32;i++) + for(j=0;j<33;j++) + if(i+j < 33) r2[i+j] += m[i]*q3[j]; + + for(i=0;i<32;i++) + { + carry = r2[i] >> 8; + r2[i+1] += carry; + r2[i] &= 0xff; + } + + for(i=0;i<32;i++) + { + pb += r2[i]; + b = lt(r1[i],pb); + r->v[i] = r1[i]-pb+(b<<8); + pb = b; + } + + /* XXX: Can it really happen that r<0?, See HAC, Alg 14.42, Step 3 + * If so: Handle it here! + */ + + reduce_add_sub(r); + reduce_add_sub(r); +} + +void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]) +{ + int i; + crypto_uint32 t[64]; + for(i=0;i<32;i++) t[i] = x[i]; + for(i=32;i<64;++i) t[i] = 0; + barrett_reduce(r, t); +} + +void shortsc25519_from16bytes(shortsc25519 *r, const unsigned char x[16]) +{ + int i; + for(i=0;i<16;i++) r->v[i] = x[i]; +} + +void sc25519_from64bytes(sc25519 *r, const unsigned char x[64]) +{ + int i; + crypto_uint32 t[64]; + for(i=0;i<64;i++) t[i] = x[i]; + barrett_reduce(r, t); +} + +void sc25519_from_shortsc(sc25519 *r, const shortsc25519 *x) +{ + int i; + for(i=0;i<16;i++) + r->v[i] = x->v[i]; + for(i=0;i<16;i++) + r->v[16+i] = 0; +} + +void sc25519_to32bytes(unsigned char r[32], const sc25519 *x) +{ + int i; + for(i=0;i<32;i++) r[i] = x->v[i]; +} + +int sc25519_iszero_vartime(const sc25519 *x) +{ + int i; + for(i=0;i<32;i++) + if(x->v[i] != 0) return 0; + return 1; +} + +int sc25519_isshort_vartime(const sc25519 *x) +{ + int i; + for(i=31;i>15;i--) + if(x->v[i] != 0) return 0; + return 1; +} + +int sc25519_lt_vartime(const sc25519 *x, const sc25519 *y) +{ + int i; + for(i=31;i>=0;i--) + { + if(x->v[i] < y->v[i]) return 1; + if(x->v[i] > y->v[i]) return 0; + } + return 0; +} + +void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y) +{ + int i, carry; + for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i]; + for(i=0;i<31;i++) + { + carry = r->v[i] >> 8; + r->v[i+1] += carry; + r->v[i] &= 0xff; + } + reduce_add_sub(r); +} + +void sc25519_sub_nored(sc25519 *r, const sc25519 *x, const sc25519 *y) +{ + crypto_uint32 b = 0; + crypto_uint32 t; + int i; + for(i=0;i<32;i++) + { + t = x->v[i] - y->v[i] - b; + r->v[i] = t & 255; + b = (t >> 8) & 1; + } +} + +void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y) +{ + int i,j,carry; + crypto_uint32 t[64]; + for(i=0;i<64;i++)t[i] = 0; + + for(i=0;i<32;i++) + for(j=0;j<32;j++) + t[i+j] += x->v[i] * y->v[j]; + + /* Reduce coefficients */ + for(i=0;i<63;i++) + { + carry = t[i] >> 8; + t[i+1] += carry; + t[i] &= 0xff; + } + + barrett_reduce(r, t); +} + +void sc25519_mul_shortsc(sc25519 *r, const sc25519 *x, const shortsc25519 *y) +{ + sc25519 t; + sc25519_from_shortsc(&t, y); + sc25519_mul(r, x, &t); +} + +void sc25519_window3(signed char r[85], const sc25519 *s) +{ + char carry; + int i; + for(i=0;i<10;i++) + { + r[8*i+0] = s->v[3*i+0] & 7; + r[8*i+1] = (s->v[3*i+0] >> 3) & 7; + r[8*i+2] = (s->v[3*i+0] >> 6) & 7; + r[8*i+2] ^= (s->v[3*i+1] << 2) & 7; + r[8*i+3] = (s->v[3*i+1] >> 1) & 7; + r[8*i+4] = (s->v[3*i+1] >> 4) & 7; + r[8*i+5] = (s->v[3*i+1] >> 7) & 7; + r[8*i+5] ^= (s->v[3*i+2] << 1) & 7; + r[8*i+6] = (s->v[3*i+2] >> 2) & 7; + r[8*i+7] = (s->v[3*i+2] >> 5) & 7; + } + r[8*i+0] = s->v[3*i+0] & 7; + r[8*i+1] = (s->v[3*i+0] >> 3) & 7; + r[8*i+2] = (s->v[3*i+0] >> 6) & 7; + r[8*i+2] ^= (s->v[3*i+1] << 2) & 7; + r[8*i+3] = (s->v[3*i+1] >> 1) & 7; + r[8*i+4] = (s->v[3*i+1] >> 4) & 7; + + /* Making it signed */ + carry = 0; + for(i=0;i<84;i++) + { + r[i] += carry; + r[i+1] += r[i] >> 3; + r[i] &= 7; + carry = r[i] >> 2; + r[i] -= carry<<3; + } + r[84] += carry; +} + +void sc25519_window5(signed char r[51], const sc25519 *s) +{ + char carry; + int i; + for(i=0;i<6;i++) + { + r[8*i+0] = s->v[5*i+0] & 31; + r[8*i+1] = (s->v[5*i+0] >> 5) & 31; + r[8*i+1] ^= (s->v[5*i+1] << 3) & 31; + r[8*i+2] = (s->v[5*i+1] >> 2) & 31; + r[8*i+3] = (s->v[5*i+1] >> 7) & 31; + r[8*i+3] ^= (s->v[5*i+2] << 1) & 31; + r[8*i+4] = (s->v[5*i+2] >> 4) & 31; + r[8*i+4] ^= (s->v[5*i+3] << 4) & 31; + r[8*i+5] = (s->v[5*i+3] >> 1) & 31; + r[8*i+6] = (s->v[5*i+3] >> 6) & 31; + r[8*i+6] ^= (s->v[5*i+4] << 2) & 31; + r[8*i+7] = (s->v[5*i+4] >> 3) & 31; + } + r[8*i+0] = s->v[5*i+0] & 31; + r[8*i+1] = (s->v[5*i+0] >> 5) & 31; + r[8*i+1] ^= (s->v[5*i+1] << 3) & 31; + r[8*i+2] = (s->v[5*i+1] >> 2) & 31; + + /* Making it signed */ + carry = 0; + for(i=0;i<50;i++) + { + r[i] += carry; + r[i+1] += r[i] >> 5; + r[i] &= 31; + carry = r[i] >> 4; + r[i] -= carry<<5; + } + r[50] += carry; +} + +void sc25519_2interleave2(unsigned char r[127], const sc25519 *s1, const sc25519 *s2) +{ + int i; + for(i=0;i<31;i++) + { + r[4*i] = ( s1->v[i] & 3) ^ (( s2->v[i] & 3) << 2); + r[4*i+1] = ((s1->v[i] >> 2) & 3) ^ (((s2->v[i] >> 2) & 3) << 2); + r[4*i+2] = ((s1->v[i] >> 4) & 3) ^ (((s2->v[i] >> 4) & 3) << 2); + r[4*i+3] = ((s1->v[i] >> 6) & 3) ^ (((s2->v[i] >> 6) & 3) << 2); + } + r[124] = ( s1->v[31] & 3) ^ (( s2->v[31] & 3) << 2); + r[125] = ((s1->v[31] >> 2) & 3) ^ (((s2->v[31] >> 2) & 3) << 2); + r[126] = ((s1->v[31] >> 4) & 3) ^ (((s2->v[31] >> 4) & 3) << 2); +} diff --git a/usr.bin/signify/sc25519.h b/usr.bin/signify/sc25519.h new file mode 100644 index 0000000..5211fa8 --- /dev/null +++ b/usr.bin/signify/sc25519.h @@ -0,0 +1,80 @@ +/* $OpenBSD: sc25519.h,v 1.1 2014/07/22 00:41:19 deraadt Exp $ */ + +/* + * Public Domain, Authors: Daniel J. Bernstein, Niels Duif, Tanja Lange, + * Peter Schwabe, Bo-Yin Yang. + * Copied from supercop-20130419/crypto_sign/ed25519/ref/sc25519.h + */ + +#ifndef SC25519_H +#define SC25519_H + +#include "crypto_api.h" + +#define sc25519 crypto_sign_ed25519_ref_sc25519 +#define shortsc25519 crypto_sign_ed25519_ref_shortsc25519 +#define sc25519_from32bytes crypto_sign_ed25519_ref_sc25519_from32bytes +#define shortsc25519_from16bytes crypto_sign_ed25519_ref_shortsc25519_from16bytes +#define sc25519_from64bytes crypto_sign_ed25519_ref_sc25519_from64bytes +#define sc25519_from_shortsc crypto_sign_ed25519_ref_sc25519_from_shortsc +#define sc25519_to32bytes crypto_sign_ed25519_ref_sc25519_to32bytes +#define sc25519_iszero_vartime crypto_sign_ed25519_ref_sc25519_iszero_vartime +#define sc25519_isshort_vartime crypto_sign_ed25519_ref_sc25519_isshort_vartime +#define sc25519_lt_vartime crypto_sign_ed25519_ref_sc25519_lt_vartime +#define sc25519_add crypto_sign_ed25519_ref_sc25519_add +#define sc25519_sub_nored crypto_sign_ed25519_ref_sc25519_sub_nored +#define sc25519_mul crypto_sign_ed25519_ref_sc25519_mul +#define sc25519_mul_shortsc crypto_sign_ed25519_ref_sc25519_mul_shortsc +#define sc25519_window3 crypto_sign_ed25519_ref_sc25519_window3 +#define sc25519_window5 crypto_sign_ed25519_ref_sc25519_window5 +#define sc25519_2interleave2 crypto_sign_ed25519_ref_sc25519_2interleave2 + +typedef struct +{ + crypto_uint32 v[32]; +} +sc25519; + +typedef struct +{ + crypto_uint32 v[16]; +} +shortsc25519; + +void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]); + +void shortsc25519_from16bytes(shortsc25519 *r, const unsigned char x[16]); + +void sc25519_from64bytes(sc25519 *r, const unsigned char x[64]); + +void sc25519_from_shortsc(sc25519 *r, const shortsc25519 *x); + +void sc25519_to32bytes(unsigned char r[32], const sc25519 *x); + +int sc25519_iszero_vartime(const sc25519 *x); + +int sc25519_isshort_vartime(const sc25519 *x); + +int sc25519_lt_vartime(const sc25519 *x, const sc25519 *y); + +void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y); + +void sc25519_sub_nored(sc25519 *r, const sc25519 *x, const sc25519 *y); + +void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y); + +void sc25519_mul_shortsc(sc25519 *r, const sc25519 *x, const shortsc25519 *y); + +/* Convert s into a representation of the form \sum_{i=0}^{84}r[i]2^3 + * with r[i] in {-4,...,3} + */ +void sc25519_window3(signed char r[85], const sc25519 *s); + +/* Convert s into a representation of the form \sum_{i=0}^{50}r[i]2^5 + * with r[i] in {-16,...,15} + */ +void sc25519_window5(signed char r[51], const sc25519 *s); + +void sc25519_2interleave2(unsigned char r[127], const sc25519 *s1, const sc25519 *s2); + +#endif diff --git a/usr.bin/signify/signify.1 b/usr.bin/signify/signify.1 new file mode 100644 index 0000000..ab612d5 --- /dev/null +++ b/usr.bin/signify/signify.1 @@ -0,0 +1,206 @@ +.\" $OpenBSD: signify.1,v 1.50 2020/04/05 06:34:20 deraadt Exp $ +.\" +.\"Copyright (c) 2013 Marc Espie <espie@openbsd.org> +.\"Copyright (c) 2013 Ted Unangst <tedu@openbsd.org> +.\" +.\"Permission to use, copy, modify, and distribute this software for any +.\"purpose with or without fee is hereby granted, provided that the above +.\"copyright notice and this permission notice appear in all copies. +.\" +.\"THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\"WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\"MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\"ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\"WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\"ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\"OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.Dd $Mdocdate: April 5 2020 $ +.Dt SIGNIFY 1 +.Os +.Sh NAME +.Nm signify +.Nd cryptographically sign and verify files +.Sh SYNOPSIS +.Nm signify +.Fl C +.Op Fl q +.Op Fl p Ar pubkey +.Op Fl t Ar keytype +.Fl x Ar sigfile +.Op Ar +.Nm signify +.Fl G +.Op Fl n +.Op Fl c Ar comment +.Fl p Ar pubkey +.Fl s Ar seckey +.Nm signify +.Fl S +.Op Fl enz +.Op Fl x Ar sigfile +.Fl s Ar seckey +.Fl m Ar message +.Nm signify +.Fl V +.Op Fl eqz +.Op Fl p Ar pubkey +.Op Fl t Ar keytype +.Op Fl x Ar sigfile +.Fl m Ar message +.Sh DESCRIPTION +The +.Nm +utility creates and verifies cryptographic signatures. +A signature verifies the integrity of a +.Ar message . +The mode of operation is selected with the following options: +.Bl -tag -width Dsssigfile +.It Fl C +Verify a signed checksum list, and then verify the checksum for +each file. +If no files are specified, all of them are checked. +.Ar sigfile +should be the signed output of +.Xr sha256 1 . +.It Fl G +Generate a new key pair. +Keynames should follow the convention of +.Pa keyname.pub +and +.Pa keyname.sec +for the public and secret keys, respectively. +.It Fl S +Sign the specified message file and create a signature. +.It Fl V +Verify the message and signature match. +.El +.Pp +The other options are as follows: +.Bl -tag -width Dsssignature +.It Fl c Ar comment +Specify the comment to be added during key generation. +.It Fl e +When signing, embed the message after the signature. +When verifying, extract the message from the signature. +(This requires that the signature was created using +.Fl e +and creates a new message file as output.) +.It Fl m Ar message +When signing, the file containing the message to sign. +When verifying, the file containing the message to verify. +When verifying with +.Fl e , +the file to create. +.It Fl n +When generating a key pair, do not ask for a passphrase. +Otherwise, +.Nm +will prompt the user for a passphrase to protect the secret key. +When signing with +.Fl z , +store a zero time stamp in the +.Xr gzip 1 +header. +.It Fl p Ar pubkey +Public key produced by +.Fl G , +and used by +.Fl V +to check a signature. +.It Fl q +Quiet mode. +Suppress informational output. +.It Fl s Ar seckey +Secret (private) key produced by +.Fl G , +and used by +.Fl S +to sign a message. +.It Fl t Ar keytype +When deducing the correct key to check a signature, make sure +the actual key matches +.Pa /etc/signify/*-keytype.pub . +.It Fl x Ar sigfile +The signature file to create or verify. +The default is +.Ar message Ns .sig . +.It Fl z +Sign and verify +.Xr gzip 1 +archives, where the signing data +is embedded in the +.Xr gzip 1 +header. +.El +.Pp +The key and signature files created by +.Nm +have the same format. +The first line of the file is a free form text comment that may be edited, +so long as it does not exceed a single line. +Signature comments will be generated based on the name of the secret +key used for signing. +This comment can then be used as a hint for the name of the public key +when verifying. +The second line of the file is the actual key or signature base64 encoded. +.Sh EXIT STATUS +.Ex -std signify +It may fail because of one of the following reasons: +.Pp +.Bl -bullet -compact +.It +Some necessary files do not exist. +.It +Entered passphrase is incorrect. +.It +The message file was corrupted and its signature does not match. +.It +The message file is too large. +.El +.Sh EXAMPLES +Create a new key pair: +.Dl $ signify -G -p newkey.pub -s newkey.sec +.Pp +Sign a file, specifying a signature name: +.Dl $ signify -S -s key.sec -m message.txt -x msg.sig +.Pp +Verify a signature, using the default signature name: +.Dl $ signify -V -p key.pub -m generalsorders.txt +.Pp +Verify a release directory containing +.Pa SHA256.sig +and a full set of release files: +.Bd -literal -offset indent -compact +$ signify -C -p /etc/signify/openbsd-68-base.pub -x SHA256.sig +.Ed +.Pp +Verify a bsd.rd before an upgrade: +.Bd -literal -offset indent -compact +$ signify -C -p /etc/signify/openbsd-68-base.pub -x SHA256.sig bsd.rd +.Ed +.Pp +Sign a gzip archive: +.Bd -literal -offset indent -compact +$ signify -Sz -s key-arc.sec -m in.tgz -x out.tgz +.Ed +.Pp +Verify a gzip pipeline: +.Bd -literal -offset indent -compact +$ ftp url | signify -Vz -t arc | tar ztf - +.Ed +.Sh SEE ALSO +.Xr fw_update 1 , +.Xr gzip 1 , +.Xr pkg_add 1 , +.Xr sha256 1 , +.Xr sysupgrade 8 +.Sh HISTORY +The +.Nm +command first appeared in +.Ox 5.5 . +.Sh AUTHORS +.An -nosplit +.An Ted Unangst Aq Mt tedu@openbsd.org +and +.An Marc Espie Aq Mt espie@openbsd.org . diff --git a/usr.bin/signify/signify.c b/usr.bin/signify/signify.c new file mode 100644 index 0000000..6ebf33b --- /dev/null +++ b/usr.bin/signify/signify.c @@ -0,0 +1,918 @@ +/* $OpenBSD: signify.c,v 1.135 2020/01/21 12:13:21 tb Exp $ */ +/* + * Copyright (c) 2013 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/cdefs.h> +#include <sys/stat.h> + +#include <netinet/in.h> +#include <resolv.h> + +#include <limits.h> +#include <stdint.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <ohash.h> +#include <err.h> +#include <unistd.h> +#include <readpassphrase.h> +#include <util.h> +#include <sha2.h> + +#include "crypto_api.h" +#include "signify.h" + +#define SIGBYTES crypto_sign_ed25519_BYTES +#define SECRETBYTES crypto_sign_ed25519_SECRETKEYBYTES +#define PUBLICBYTES crypto_sign_ed25519_PUBLICKEYBYTES + +#define PKALG "Ed" +#define KDFALG "BK" +#define KEYNUMLEN 8 + +#define COMMENTHDR "untrusted comment: " +#define COMMENTHDRLEN 19 +#define COMMENTMAXLEN 1024 +#define VERIFYWITH "verify with " + +struct enckey { + uint8_t pkalg[2]; + uint8_t kdfalg[2]; + uint32_t kdfrounds; + uint8_t salt[16]; + uint8_t checksum[8]; + uint8_t keynum[KEYNUMLEN]; + uint8_t seckey[SECRETBYTES]; +}; + +struct pubkey { + uint8_t pkalg[2]; + uint8_t keynum[KEYNUMLEN]; + uint8_t pubkey[PUBLICBYTES]; +}; + +struct sig { + uint8_t pkalg[2]; + uint8_t keynum[KEYNUMLEN]; + uint8_t sig[SIGBYTES]; +}; + +static void __dead +usage(const char *error) +{ + if (error) + fprintf(stderr, "%s\n", error); + fprintf(stderr, "usage:" +#ifndef VERIFYONLY + "\t%1$s -C [-q] [-p pubkey] [-t keytype] -x sigfile [file ...]\n" + "\t%1$s -G [-n] [-c comment] -p pubkey -s seckey\n" + "\t%1$s -S [-enz] [-x sigfile] -s seckey -m message\n" +#endif + "\t%1$s -V [-eqz] [-p pubkey] [-t keytype] [-x sigfile] -m message\n", + getprogname()); + exit(1); +} + +int +xopen(const char *fname, int oflags, mode_t mode) +{ + struct stat sb; + int fd; + + if (strcmp(fname, "-") == 0) { + if ((oflags & O_WRONLY)) + fd = dup(STDOUT_FILENO); + else + fd = dup(STDIN_FILENO); + if (fd == -1) + err(1, "dup failed"); + } else { + fd = open(fname, oflags, mode); + if (fd == -1) + err(1, "can't open %s for %s", fname, + (oflags & O_WRONLY) ? "writing" : "reading"); + } + if (fstat(fd, &sb) == -1 || S_ISDIR(sb.st_mode)) + errx(1, "not a valid file: %s", fname); + return fd; +} + +void * +xmalloc(size_t len) +{ + void *p; + + if (!(p = malloc(len))) + err(1, "malloc %zu", len); + return p; +} + +static size_t +parseb64file(const char *filename, char *b64, void *buf, size_t buflen, + char *comment) +{ + char *commentend, *b64end; + + commentend = strchr(b64, '\n'); + if (!commentend || commentend - b64 <= COMMENTHDRLEN || + memcmp(b64, COMMENTHDR, COMMENTHDRLEN) != 0) + errx(1, "invalid comment in %s; must start with '%s'", + filename, COMMENTHDR); + *commentend = '\0'; + if (comment) { + if (strlcpy(comment, b64 + COMMENTHDRLEN, + COMMENTMAXLEN) >= COMMENTMAXLEN) + errx(1, "comment too long"); + } + if (!(b64end = strchr(commentend + 1, '\n'))) + errx(1, "missing new line after base64 in %s", filename); + *b64end = '\0'; + if (b64_pton(commentend + 1, buf, buflen) != buflen) + errx(1, "unable to parse %s", filename); + if (memcmp(buf, PKALG, 2) != 0) + errx(1, "unsupported file %s", filename); + *commentend = '\n'; + *b64end = '\n'; + return b64end - b64 + 1; +} + +static void +readb64file(const char *filename, void *buf, size_t buflen, char *comment) +{ + char b64[2048]; + int rv, fd; + + fd = xopen(filename, O_RDONLY | O_NOFOLLOW, 0); + if ((rv = read(fd, b64, sizeof(b64) - 1)) == -1) + err(1, "read from %s", filename); + b64[rv] = '\0'; + parseb64file(filename, b64, buf, buflen, comment); + explicit_bzero(b64, sizeof(b64)); + close(fd); +} + +static uint8_t * +readmsg(const char *filename, unsigned long long *msglenp) +{ + unsigned long long msglen = 0; + uint8_t *msg = NULL; + struct stat sb; + ssize_t x, space; + int fd; + const unsigned long long maxmsgsize = 1UL << 30; + + fd = xopen(filename, O_RDONLY | O_NOFOLLOW, 0); + if (fstat(fd, &sb) == 0 && S_ISREG(sb.st_mode)) { + if (sb.st_size > maxmsgsize) + errx(1, "msg too large in %s", filename); + space = sb.st_size + 1; + } else { + space = 64 * 1024 - 1; + } + + msg = xmalloc(space + 1); + while (1) { + if (space == 0) { + if (msglen * 2 > maxmsgsize) + errx(1, "msg too large in %s", filename); + space = msglen; + if (!(msg = realloc(msg, msglen + space + 1))) + err(1, "realloc"); + } + if ((x = read(fd, msg + msglen, space)) == -1) + err(1, "read from %s", filename); + if (x == 0) + break; + space -= x; + msglen += x; + } + + msg[msglen] = '\0'; + close(fd); + + *msglenp = msglen; + return msg; +} + +void +writeall(int fd, const void *buf, size_t buflen, const char *filename) +{ + ssize_t x; + + while (buflen != 0) { + if ((x = write(fd, buf, buflen)) == -1) + err(1, "write to %s", filename); + buflen -= x; + buf = (char *)buf + x; + } +} + +#ifndef VERIFYONLY +static char * +createheader(const char *comment, const void *buf, size_t buflen) +{ + char *header; + char b64[1024]; + + if (b64_ntop(buf, buflen, b64, sizeof(b64)) == -1) + errx(1, "base64 encode failed"); + if (asprintf(&header, "%s%s\n%s\n", COMMENTHDR, comment, b64) == -1) + err(1, "asprintf failed"); + explicit_bzero(b64, sizeof(b64)); + return header; +} + +static void +writekeyfile(const char *filename, const char *comment, const void *buf, + size_t buflen, int oflags, mode_t mode) +{ + char *header; + int fd; + + fd = xopen(filename, O_CREAT|oflags|O_NOFOLLOW|O_WRONLY, mode); + header = createheader(comment, buf, buflen); + writeall(fd, header, strlen(header), filename); + freezero(header, strlen(header)); + close(fd); +} + +static void +kdf(uint8_t *salt, size_t saltlen, int rounds, int allowstdin, int confirm, + uint8_t *key, size_t keylen) +{ + char pass[1024]; + int rppflags = RPP_ECHO_OFF; + const char *errstr = NULL; + + if (rounds == 0) { + memset(key, 0, keylen); + return; + } + + if (allowstdin && !isatty(STDIN_FILENO)) + rppflags |= RPP_STDIN; + if (!readpassphrase("passphrase: ", pass, sizeof(pass), rppflags)) + errx(1, "unable to read passphrase"); + if (strlen(pass) == 0) + errx(1, "please provide a password"); + if (confirm && !(rppflags & RPP_STDIN)) { + char pass2[1024]; + if (!readpassphrase("confirm passphrase: ", pass2, + sizeof(pass2), rppflags)) + errstr = "unable to read passphrase"; + if (!errstr && strcmp(pass, pass2) != 0) + errstr = "passwords don't match"; + explicit_bzero(pass2, sizeof(pass2)); + } + if (!errstr && bcrypt_pbkdf(pass, strlen(pass), salt, saltlen, key, + keylen, rounds) == -1) + errstr = "bcrypt pbkdf"; + explicit_bzero(pass, sizeof(pass)); + if (errstr) + errx(1, "%s", errstr); +} + +static void +signmsg(uint8_t *seckey, uint8_t *msg, unsigned long long msglen, + uint8_t *sig) +{ + unsigned long long siglen; + uint8_t *sigbuf; + + sigbuf = xmalloc(msglen + SIGBYTES); + crypto_sign_ed25519(sigbuf, &siglen, msg, msglen, seckey); + memcpy(sig, sigbuf, SIGBYTES); + free(sigbuf); +} + +static void +generate(const char *pubkeyfile, const char *seckeyfile, int rounds, + const char *comment) +{ + uint8_t digest[SHA512_DIGEST_LENGTH]; + struct pubkey pubkey; + struct enckey enckey; + uint8_t xorkey[sizeof(enckey.seckey)]; + uint8_t keynum[KEYNUMLEN]; + char commentbuf[COMMENTMAXLEN]; + SHA2_CTX ctx; + int i, nr; + + crypto_sign_ed25519_keypair(pubkey.pubkey, enckey.seckey); + arc4random_buf(keynum, sizeof(keynum)); + + SHA512Init(&ctx); + SHA512Update(&ctx, enckey.seckey, sizeof(enckey.seckey)); + SHA512Final(digest, &ctx); + + memcpy(enckey.pkalg, PKALG, 2); + memcpy(enckey.kdfalg, KDFALG, 2); + enckey.kdfrounds = htonl(rounds); + memcpy(enckey.keynum, keynum, KEYNUMLEN); + arc4random_buf(enckey.salt, sizeof(enckey.salt)); + kdf(enckey.salt, sizeof(enckey.salt), rounds, 1, 1, xorkey, sizeof(xorkey)); + memcpy(enckey.checksum, digest, sizeof(enckey.checksum)); + for (i = 0; i < sizeof(enckey.seckey); i++) + enckey.seckey[i] ^= xorkey[i]; + explicit_bzero(digest, sizeof(digest)); + explicit_bzero(xorkey, sizeof(xorkey)); + + nr = snprintf(commentbuf, sizeof(commentbuf), "%s secret key", comment); + if (nr < 0 || nr >= sizeof(commentbuf)) + errx(1, "comment too long"); + writekeyfile(seckeyfile, commentbuf, &enckey, + sizeof(enckey), O_EXCL, 0600); + explicit_bzero(&enckey, sizeof(enckey)); + + memcpy(pubkey.pkalg, PKALG, 2); + memcpy(pubkey.keynum, keynum, KEYNUMLEN); + nr = snprintf(commentbuf, sizeof(commentbuf), "%s public key", comment); + if (nr < 0 || nr >= sizeof(commentbuf)) + errx(1, "comment too long"); + writekeyfile(pubkeyfile, commentbuf, &pubkey, + sizeof(pubkey), O_EXCL, 0666); +} + +static const char * +check_keyname_compliance(const char *pubkeyfile, const char *seckeyfile) +{ + const char *pos; + size_t len; + + /* basename may or may not modify input */ + pos = strrchr(seckeyfile, '/'); + if (pos != NULL) + seckeyfile = pos + 1; + + len = strlen(seckeyfile); + if (len < 5) /* ?.key */ + goto bad; + if (strcmp(seckeyfile + len - 4, ".sec") != 0) + goto bad; + if (pubkeyfile != NULL) { + pos = strrchr(pubkeyfile, '/'); + if (pos != NULL) + pubkeyfile = pos + 1; + + if (strlen(pubkeyfile) != len) + goto bad; + if (strcmp(pubkeyfile + len - 4, ".pub") != 0) + goto bad; + if (strncmp(pubkeyfile, seckeyfile, len - 4) != 0) + goto bad; + } + + return seckeyfile; +bad: + errx(1, "please use naming scheme of keyname.pub and keyname.sec"); +} + +uint8_t * +createsig(const char *seckeyfile, const char *msgfile, uint8_t *msg, + unsigned long long msglen) +{ + struct enckey enckey; + uint8_t xorkey[sizeof(enckey.seckey)]; + struct sig sig; + char *sighdr; + uint8_t digest[SHA512_DIGEST_LENGTH]; + int i, nr, rounds; + SHA2_CTX ctx; + char comment[COMMENTMAXLEN], sigcomment[COMMENTMAXLEN]; + + readb64file(seckeyfile, &enckey, sizeof(enckey), comment); + + if (strcmp(seckeyfile, "-") == 0) { + nr = snprintf(sigcomment, sizeof(sigcomment), + "signature from %s", comment); + } else { + const char *keyname = check_keyname_compliance(NULL, + seckeyfile); + nr = snprintf(sigcomment, sizeof(sigcomment), + VERIFYWITH "%.*s.pub", (int)strlen(keyname) - 4, keyname); + } + if (nr < 0 || nr >= sizeof(sigcomment)) + errx(1, "comment too long"); + + if (memcmp(enckey.kdfalg, KDFALG, 2) != 0) + errx(1, "unsupported KDF"); + rounds = ntohl(enckey.kdfrounds); + kdf(enckey.salt, sizeof(enckey.salt), rounds, strcmp(msgfile, "-") != 0, + 0, xorkey, sizeof(xorkey)); + for (i = 0; i < sizeof(enckey.seckey); i++) + enckey.seckey[i] ^= xorkey[i]; + explicit_bzero(xorkey, sizeof(xorkey)); + SHA512Init(&ctx); + SHA512Update(&ctx, enckey.seckey, sizeof(enckey.seckey)); + SHA512Final(digest, &ctx); + if (memcmp(enckey.checksum, digest, sizeof(enckey.checksum)) != 0) + errx(1, "incorrect passphrase"); + explicit_bzero(digest, sizeof(digest)); + + signmsg(enckey.seckey, msg, msglen, sig.sig); + memcpy(sig.keynum, enckey.keynum, KEYNUMLEN); + explicit_bzero(&enckey, sizeof(enckey)); + + memcpy(sig.pkalg, PKALG, 2); + + sighdr = createheader(sigcomment, &sig, sizeof(sig)); + return sighdr; +} + +static void +sign(const char *seckeyfile, const char *msgfile, const char *sigfile, + int embedded) +{ + uint8_t *msg; + char *sighdr; + int fd; + unsigned long long msglen; + + msg = readmsg(msgfile, &msglen); + + sighdr = createsig(seckeyfile, msgfile, msg, msglen); + + fd = xopen(sigfile, O_CREAT|O_TRUNC|O_NOFOLLOW|O_WRONLY, 0666); + writeall(fd, sighdr, strlen(sighdr), sigfile); + free(sighdr); + if (embedded) + writeall(fd, msg, msglen, sigfile); + close(fd); + + free(msg); +} +#endif + +static void +verifymsg(struct pubkey *pubkey, uint8_t *msg, unsigned long long msglen, + struct sig *sig, int quiet) +{ + uint8_t *sigbuf, *dummybuf; + unsigned long long siglen, dummylen; + + if (memcmp(pubkey->keynum, sig->keynum, KEYNUMLEN) != 0) + errx(1, "verification failed: checked against wrong key"); + + siglen = SIGBYTES + msglen; + sigbuf = xmalloc(siglen); + dummybuf = xmalloc(siglen); + memcpy(sigbuf, sig->sig, SIGBYTES); + memcpy(sigbuf + SIGBYTES, msg, msglen); + if (crypto_sign_ed25519_open(dummybuf, &dummylen, sigbuf, siglen, + pubkey->pubkey) == -1) + errx(1, "signature verification failed"); + if (!quiet) + printf("Signature Verified\n"); + free(sigbuf); + free(dummybuf); +} + +static void +check_keytype(const char *pubkeyfile, const char *keytype) +{ + const char *p; + size_t typelen; + + if (!(p = strrchr(pubkeyfile, '-'))) + goto bad; + p++; + typelen = strlen(keytype); + if (strncmp(p, keytype, typelen) != 0) + goto bad; + if (strcmp(p + typelen, ".pub") != 0) + goto bad; + return; + +bad: + errx(1, "incorrect keytype: %s is not %s", pubkeyfile, keytype); +} + +static void +readpubkey(const char *pubkeyfile, struct pubkey *pubkey, + const char *sigcomment, const char *keytype) +{ + const char *safepath = "/etc/signify"; + char keypath[PATH_MAX]; + + if (!pubkeyfile) { + pubkeyfile = strstr(sigcomment, VERIFYWITH); + if (pubkeyfile && strchr(pubkeyfile, '/') == NULL) { + pubkeyfile += strlen(VERIFYWITH); + if (keytype) + check_keytype(pubkeyfile, keytype); + if (snprintf(keypath, sizeof(keypath), "%s/%s", + safepath, pubkeyfile) >= sizeof(keypath)) + errx(1, "name too long %s", pubkeyfile); + pubkeyfile = keypath; + } else + usage("must specify pubkey"); + } + readb64file(pubkeyfile, pubkey, sizeof(*pubkey), NULL); +} + +static void +verifysimple(const char *pubkeyfile, const char *msgfile, const char *sigfile, + int quiet, const char *keytype) +{ + char sigcomment[COMMENTMAXLEN]; + struct sig sig; + struct pubkey pubkey; + unsigned long long msglen; + uint8_t *msg; + + msg = readmsg(msgfile, &msglen); + + readb64file(sigfile, &sig, sizeof(sig), sigcomment); + readpubkey(pubkeyfile, &pubkey, sigcomment, keytype); + + verifymsg(&pubkey, msg, msglen, &sig, quiet); + + free(msg); +} + +static uint8_t * +verifyembedded(const char *pubkeyfile, const char *sigfile, + int quiet, unsigned long long *msglenp, const char *keytype) +{ + char sigcomment[COMMENTMAXLEN]; + struct sig sig; + struct pubkey pubkey; + unsigned long long msglen, siglen; + uint8_t *msg; + + msg = readmsg(sigfile, &msglen); + + siglen = parseb64file(sigfile, msg, &sig, sizeof(sig), sigcomment); + readpubkey(pubkeyfile, &pubkey, sigcomment, keytype); + + msglen -= siglen; + memmove(msg, msg + siglen, msglen); + msg[msglen] = 0; + + verifymsg(&pubkey, msg, msglen, &sig, quiet); + + *msglenp = msglen; + return msg; +} + +static void +verify(const char *pubkeyfile, const char *msgfile, const char *sigfile, + int embedded, int quiet, const char *keytype) +{ + unsigned long long msglen; + uint8_t *msg; + int fd; + + if (embedded) { + msg = verifyembedded(pubkeyfile, sigfile, quiet, &msglen, + keytype); + fd = xopen(msgfile, O_CREAT|O_TRUNC|O_NOFOLLOW|O_WRONLY, 0666); + writeall(fd, msg, msglen, msgfile); + free(msg); + close(fd); + } else { + verifysimple(pubkeyfile, msgfile, sigfile, quiet, keytype); + } +} + +#ifndef VERIFYONLY +#define HASHBUFSIZE 224 +struct checksum { + char file[PATH_MAX]; + char hash[HASHBUFSIZE]; + char algo[32]; +}; + +static void * +ecalloc(size_t s1, size_t s2, void *data) +{ + void *p; + + if (!(p = calloc(s1, s2))) + err(1, "calloc"); + return p; +} + +static void +efree(void *p, void *data) +{ + free(p); +} + +static void +recodehash(char *hash, size_t len) +{ + uint8_t data[HASHBUFSIZE / 2]; + int i, rv; + + if (strlen(hash) == len) + return; + if ((rv = b64_pton(hash, data, sizeof(data))) == -1) + errx(1, "invalid base64 encoding"); + for (i = 0; i < rv; i++) + snprintf(hash + i * 2, HASHBUFSIZE - i * 2, "%2.2x", data[i]); +} + +static int +verifychecksum(struct checksum *c, int quiet) +{ + char buf[HASHBUFSIZE]; + + if (strcmp(c->algo, "SHA256") == 0) { + recodehash(c->hash, SHA256_DIGEST_STRING_LENGTH-1); + if (!SHA256File(c->file, buf)) + return 0; + } else if (strcmp(c->algo, "SHA512") == 0) { + recodehash(c->hash, SHA512_DIGEST_STRING_LENGTH-1); + if (!SHA512File(c->file, buf)) + return 0; + } else { + errx(1, "can't handle algorithm %s", c->algo); + } + if (strcmp(c->hash, buf) != 0) + return 0; + if (!quiet) + printf("%s: OK\n", c->file); + return 1; +} + +static void +verifychecksums(char *msg, int argc, char **argv, int quiet) +{ + struct ohash_info info = { 0, NULL, ecalloc, efree, NULL }; + struct ohash myh; + struct checksum c; + char *e, *line, *endline; + int hasfailed = 0; + int i, rv; + unsigned int slot; + + ohash_init(&myh, 6, &info); + if (argc) { + for (i = 0; i < argc; i++) { + slot = ohash_qlookup(&myh, argv[i]); + e = ohash_find(&myh, slot); + if (e == NULL) + ohash_insert(&myh, slot, argv[i]); + } + } + + line = msg; + while (line && *line) { + if ((endline = strchr(line, '\n'))) + *endline++ = '\0'; +#if PATH_MAX < 1024 || HASHBUFSIZE < 224 +#error sizes are wrong +#endif + rv = sscanf(line, "%31s (%1023[^)]) = %223s", + c.algo, c.file, c.hash); + if (rv != 3) + errx(1, "unable to parse checksum line %s", line); + line = endline; + if (argc) { + slot = ohash_qlookup(&myh, c.file); + e = ohash_find(&myh, slot); + if (e != NULL) { + if (verifychecksum(&c, quiet) != 0) + ohash_remove(&myh, slot); + } + } else { + if (verifychecksum(&c, quiet) == 0) { + slot = ohash_qlookup(&myh, c.file); + e = ohash_find(&myh, slot); + if (e == NULL) { + if (!(e = strdup(c.file))) + err(1, "strdup"); + ohash_insert(&myh, slot, e); + } + } + } + } + + for (e = ohash_first(&myh, &slot); e != NULL; e = ohash_next(&myh, &slot)) { + fprintf(stderr, "%s: FAIL\n", e); + hasfailed = 1; + if (argc == 0) + free(e); + } + ohash_delete(&myh); + if (hasfailed) + exit(1); +} + +static void +check(const char *pubkeyfile, const char *sigfile, const char *keytype, + int quiet, int argc, char **argv) +{ + unsigned long long msglen; + uint8_t *msg; + + msg = verifyembedded(pubkeyfile, sigfile, quiet, &msglen, keytype); + verifychecksums((char *)msg, argc, argv, quiet); + + free(msg); +} + +void * +verifyzdata(uint8_t *zdata, unsigned long long zdatalen, + const char *filename, const char *pubkeyfile, const char *keytype) +{ + struct sig sig; + char sigcomment[COMMENTMAXLEN]; + unsigned long long siglen; + struct pubkey pubkey; + + if (zdatalen < sizeof(sig)) + errx(1, "signature too short in %s", filename); + siglen = parseb64file(filename, zdata, &sig, sizeof(sig), + sigcomment); + readpubkey(pubkeyfile, &pubkey, sigcomment, keytype); + zdata += siglen; + zdatalen -= siglen; + verifymsg(&pubkey, zdata, zdatalen, &sig, 1); + return zdata; +} +#endif + +int +main(int argc, char **argv) +{ + const char *pubkeyfile = NULL, *seckeyfile = NULL, *msgfile = NULL, + *sigfile = NULL; + char sigfilebuf[PATH_MAX]; + const char *comment = "signify"; + char *keytype = NULL; + int ch; + int none = 0; + int embedded = 0; + int quiet = 0; + int gzip = 0; + enum { + NONE, + CHECK, + GENERATE, + SIGN, + VERIFY + } verb = NONE; + + if (pledge("stdio rpath wpath cpath tty", NULL) == -1) + err(1, "pledge"); + + while ((ch = getopt(argc, argv, "CGSVzc:em:np:qs:t:x:")) != -1) { + switch (ch) { +#ifndef VERIFYONLY + case 'C': + if (verb) + usage(NULL); + verb = CHECK; + break; + case 'G': + if (verb) + usage(NULL); + verb = GENERATE; + break; + case 'S': + if (verb) + usage(NULL); + verb = SIGN; + break; + case 'z': + gzip = 1; + break; +#endif + case 'V': + if (verb) + usage(NULL); + verb = VERIFY; + break; + case 'c': + comment = optarg; + break; + case 'e': + embedded = 1; + break; + case 'm': + msgfile = optarg; + break; + case 'n': + none = 1; + break; + case 'p': + pubkeyfile = optarg; + break; + case 'q': + quiet = 1; + break; + case 's': + seckeyfile = optarg; + break; + case 't': + keytype = optarg; + break; + case 'x': + sigfile = optarg; + break; + default: + usage(NULL); + break; + } + } + argc -= optind; + argv += optind; + + if (embedded && gzip) + errx(1, "can't combine -e and -z options"); + + if (setvbuf(stdout, NULL, _IOLBF, 0) != 0) + err(1, "setvbuf"); + +#ifndef VERIFYONLY + if (verb == CHECK) { + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + if (!sigfile) + usage("must specify sigfile"); + check(pubkeyfile, sigfile, keytype, quiet, argc, argv); + return 0; + } +#endif + + if (argc != 0) + usage(NULL); + + if (!sigfile && msgfile) { + int nr; + if (strcmp(msgfile, "-") == 0) + usage("must specify sigfile with - message"); + nr = snprintf(sigfilebuf, sizeof(sigfilebuf), + "%s.sig", msgfile); + if (nr < 0 || nr >= sizeof(sigfilebuf)) + errx(1, "path too long"); + sigfile = sigfilebuf; + } + + switch (verb) { +#ifndef VERIFYONLY + case GENERATE: + /* no pledge */ + if (!pubkeyfile || !seckeyfile) + usage("must specify pubkey and seckey"); + check_keyname_compliance(pubkeyfile, seckeyfile); + generate(pubkeyfile, seckeyfile, none ? 0 : 42, comment); + break; + case SIGN: + /* no pledge */ + if (gzip) { + if (!msgfile || !seckeyfile || !sigfile) + usage("must specify message sigfile seckey"); + zsign(seckeyfile, msgfile, sigfile, none); + } else { + if (!msgfile || !seckeyfile) + usage("must specify message and seckey"); + sign(seckeyfile, msgfile, sigfile, embedded); + } + break; +#endif + case VERIFY: + if ((embedded || gzip) && + (msgfile && strcmp(msgfile, "-") != 0)) { + /* will need to create output file */ + if (pledge("stdio rpath wpath cpath", NULL) == -1) + err(1, "pledge"); + } else { + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + } + if (gzip) { + zverify(pubkeyfile, msgfile, sigfile, keytype); + } else { + if (!msgfile) + usage("must specify message"); + verify(pubkeyfile, msgfile, sigfile, embedded, + quiet, keytype); + } + break; + default: + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); + usage(NULL); + break; + } + + return 0; +} diff --git a/usr.bin/signify/signify.h b/usr.bin/signify/signify.h new file mode 100644 index 0000000..db7df8f --- /dev/null +++ b/usr.bin/signify/signify.h @@ -0,0 +1,33 @@ +/* $OpenBSD: signify.h,v 1.2 2019/03/23 07:10:06 tedu Exp $ */ +/* + * Copyright (c) 2016 Marc Espie <espie@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* common interface to signify.c/zsig.c */ +#ifndef signify_h +#define signify_h +extern void zverify(const char *, const char *, const char *, const char *); +extern void zsign(const char *, const char *, const char *, int); + +extern void *xmalloc(size_t); +extern void writeall(int, const void *, size_t, const char *); +extern int xopen(const char *, int, mode_t); +extern void *verifyzdata(uint8_t *, unsigned long long, + const char *, const char *, const char *); +extern uint8_t *createsig(const char *, const char *, uint8_t *, + unsigned long long); + + +#endif diff --git a/usr.bin/signify/zsig.c b/usr.bin/signify/zsig.c new file mode 100644 index 0000000..64293f4 --- /dev/null +++ b/usr.bin/signify/zsig.c @@ -0,0 +1,317 @@ +/* $OpenBSD: zsig.c,v 1.18 2019/12/22 06:37:25 espie Exp $ */ +/* + * Copyright (c) 2016 Marc Espie <espie@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef VERIFYONLY +#include <sys/cdefs.h> +#include <sys/compat.h> +#include <stdint.h> +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sha2.h> +#include <string.h> +#include <sys/stat.h> +#include <time.h> +#include <fcntl.h> +#include "signify.h" + +struct gzheader { + uint8_t flg; + uint32_t mtime; + uint8_t xflg; + uint8_t os; + uint8_t *name; + uint8_t *comment; + uint8_t *endcomment; + unsigned long long headerlength; + uint8_t *buffer; +}; + +#define FTEXT_FLAG 1 +#define FHCRC_FLAG 2 +#define FEXTRA_FLAG 4 +#define FNAME_FLAG 8 +#define FCOMMENT_FLAG 16 + +#define GZHEADERLENGTH 10 +#define MYBUFSIZE 65536LU + + +static uint8_t fake[10] = { 0x1f, 0x8b, 8, FCOMMENT_FLAG, 0, 0, 0, 0, 0, 3 }; + +static uint8_t * +readgz_header(struct gzheader *h, int fd) +{ + size_t sz = 1023; + uint8_t *p; + size_t pos = 0; + size_t len = 0; + int state = 0; + ssize_t n; + uint8_t *buf; + + buf = xmalloc(sz); + + while (1) { + if (len == sz) { + sz *= 2; + buf = realloc(buf, sz); + if (!buf) + err(1, "realloc"); + } + n = read(fd, buf+len, sz-len); + if (n == -1) + err(1, "read"); + /* incomplete info */ + if (n == 0) + errx(1, "gzheader truncated"); + len += n; + h->comment = NULL; + h->name = NULL; + + switch(state) { + case 0: /* check header proper */ + /* need ten bytes */ + if (len < GZHEADERLENGTH) + continue; + h->flg = buf[3]; + h->mtime = buf[4] | (buf[5] << 8U) | (buf[6] << 16U) | + (buf[7] << 24U); + h->xflg = buf[8]; + h->os = buf[9]; + /* magic gzip header */ + if (buf[0] != 0x1f || buf[1] != 0x8b || buf[2] != 8) + err(1, "invalid magic in gzheader"); + /* XXX special code that only caters to our needs */ + if (h->flg & ~ (FCOMMENT_FLAG | FNAME_FLAG)) + err(1, "invalid flags in gzheader"); + pos = GZHEADERLENGTH; + state++; + /*FALLTHRU*/ + case 1: + if (h->flg & FNAME_FLAG) { + p = memchr(buf+pos, 0, len - pos); + if (!p) + continue; + pos = (p - buf) + 1; + } + state++; + /*FALLTHRU*/ + case 2: + if (h->flg & FCOMMENT_FLAG) { + p = memchr(buf+pos, 0, len - pos); + if (!p) + continue; + h->comment = buf + pos; + h->endcomment = p; + pos = (p - buf) + 1; + } + if (h->flg & FNAME_FLAG) + h->name = buf + GZHEADERLENGTH; + h->headerlength = pos; + h->buffer = buf; + return buf + len; + } + + } +} + +static void +copy_blocks(int fdout, int fdin, const char *sha, const char *endsha, + size_t bufsize, uint8_t *bufend) +{ + uint8_t *buffer; + uint8_t *residual; + uint8_t output[SHA512_256_DIGEST_STRING_LENGTH]; + + buffer = xmalloc(bufsize); + residual = (uint8_t *)endsha + 1; + + while (1) { + /* get the next block */ + size_t n = 0; + /* if we have residual data, we use it */ + if (residual != bufend) { + /* how much can we copy */ + size_t len = bufend - residual; + n = len >= bufsize ? bufsize : len; + memcpy(buffer, residual, n); + residual += n; + } + /* if we're not done yet, try to obtain more until EOF */ + while (n != bufsize) { + ssize_t more = read(fdin, buffer+n, bufsize-n); + if (more == -1) + err(1, "read"); + n += more; + if (more == 0) + break; + } + SHA512_256Data(buffer, n, output); + if (endsha - sha < SHA512_256_DIGEST_STRING_LENGTH-1) + errx(4, "signature truncated"); + if (memcmp(output, sha, SHA512_256_DIGEST_STRING_LENGTH-1) != 0) + errx(4, "signature mismatch"); + if (sha[SHA512_256_DIGEST_STRING_LENGTH-1] != '\n') + errx(4, "signature mismatch"); + sha += SHA512_256_DIGEST_STRING_LENGTH; + writeall(fdout, buffer, n, "stdout"); + if (n != bufsize) + break; + } + free(buffer); +} + +void +zverify(const char *pubkeyfile, const char *msgfile, const char *sigfile, + const char *keytype) +{ + struct gzheader h; + size_t bufsize, len; + char *p; + uint8_t *bufend; + int fdin, fdout; + + /* by default, verification will love pipes */ + if (!sigfile) + sigfile = "-"; + if (!msgfile) + msgfile = "-"; + + fdin = xopen(sigfile, O_RDONLY | O_NOFOLLOW, 0); + + bufend = readgz_header(&h, fdin); + if (!(h.flg & FCOMMENT_FLAG)) + errx(1, "unsigned gzip archive"); + fake[8] = h.xflg; + len = h.endcomment-h.comment; + + p = verifyzdata(h.comment, len, sigfile, + pubkeyfile, keytype); + + bufsize = MYBUFSIZE; + +#define BEGINS_WITH(x, y) memcmp((x), (y), sizeof(y)-1) == 0 + + while (BEGINS_WITH(p, "algorithm=SHA512/256") || + BEGINS_WITH(p, "date=") || + BEGINS_WITH(p, "key=") || + sscanf(p, "blocksize=%zu\n", &bufsize) > 0) { + while (*(p++) != '\n') + continue; + } + + if (*p != '\n') + errx(1, "invalid signature"); + + fdout = xopen(msgfile, O_CREAT|O_TRUNC|O_NOFOLLOW|O_WRONLY, 0666); + writeall(fdout, fake, sizeof fake, msgfile); + writeall(fdout, h.comment, len+1, msgfile); + *(p++) = 0; + copy_blocks(fdout, fdin, p, h.endcomment, bufsize, bufend); + free(h.buffer); + close(fdout); + close(fdin); +} + +void +zsign(const char *seckeyfile, const char *msgfile, const char *sigfile, + int skipdate) +{ + size_t bufsize = MYBUFSIZE; + int fdin, fdout; + struct gzheader h; + struct stat sb; + size_t space; + char *msg; + char *p; + uint8_t *buffer; + uint8_t *sighdr; + char date[80]; + time_t clock; + + fdin = xopen(msgfile, O_RDONLY, 0); + if (fstat(fdin, &sb) == -1 || !S_ISREG(sb.st_mode)) + errx(1, "Sorry can only sign regular files"); + + readgz_header(&h, fdin); + /* we don't care about the header, actually */ + free(h.buffer); + + if (lseek(fdin, h.headerlength, SEEK_SET) == -1) + err(1, "seek in %s", msgfile); + + space = (sb.st_size / MYBUFSIZE+1) * SHA512_256_DIGEST_STRING_LENGTH + + 1024; /* long enough for extra header information */ + + msg = xmalloc(space); + buffer = xmalloc(bufsize); + if (skipdate) { + clock = 0; + } else { + time(&clock); + } + strftime(date, sizeof date, "%Y-%m-%dT%H:%M:%SZ", gmtime(&clock)); + snprintf(msg, space, + "date=%s\n" + "key=%s\n" + "algorithm=SHA512/256\n" + "blocksize=%zu\n\n", + date, seckeyfile, bufsize); + p = strchr(msg, 0); + + while (1) { + size_t n = read(fdin, buffer, bufsize); + if (n == -1) + err(1, "read from %s", msgfile); + if (n == 0) + break; + SHA512_256Data(buffer, n, p); + p += SHA512_256_DIGEST_STRING_LENGTH; + p[-1] = '\n'; + if (msg + space < p) + errx(1, "file too long %s", msgfile); + } + *p = 0; + + fdout = xopen(sigfile, O_CREAT|O_TRUNC|O_NOFOLLOW|O_WRONLY, 0666); + sighdr = createsig(seckeyfile, msgfile, msg, p-msg); + fake[8] = h.xflg; + + writeall(fdout, fake, sizeof fake, sigfile); + writeall(fdout, sighdr, strlen(sighdr), sigfile); + free(sighdr); + /* need the 0 ! */ + writeall(fdout, msg, p - msg + 1, sigfile); + free(msg); + + if (lseek(fdin, h.headerlength, SEEK_SET) == -1) + err(1, "seek in %s", msgfile); + + while (1) { + size_t n = read(fdin, buffer, bufsize); + if (n == -1) + err(1, "read from %s", msgfile); + if (n == 0) + break; + writeall(fdout, buffer, n, sigfile); + } + free(buffer); + close(fdout); +} +#endif |