4 files changed, 1264 insertions, 0 deletions
diff --git a/lib/libcrypto/arc4random/arc4random_linux.h b/lib/libcrypto/arc4random/arc4random_linux.h
new file mode 100644
index 0000000..5e1cf34
--- /dev/null
+++ b/lib/libcrypto/arc4random/arc4random_linux.h
@@ -0,0 +1,88 @@
+/*	$OpenBSD: arc4random_linux.h,v 1.12 2019/07/11 10:37:28 inoguchi Exp $	*/
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK()   pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#if defined(__GLIBC__) && !(defined(__UCLIBC__) && !defined(__ARCH_USE_MMU__))
+extern void *__dso_handle;
+extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *);
+#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle)
+#else
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+#endif
+
+static inline void
+_getentropy_fail(void)
+{
+	raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+	_rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+	static pid_t _rs_pid = 0;
+	pid_t pid = getpid();
+
+        /* XXX unusual calls to clone() can bypass checks */
+	if (_rs_pid == 0 || _rs_pid == 1 || _rs_pid != pid || _rs_forked) {
+		_rs_pid = pid;
+		_rs_forked = 0;
+		if (rs)
+			memset(rs, 0, sizeof(*rs));
+	}
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+	if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+	    MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+		return (-1);
+
+	if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+	    MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+		munmap(*rsp, sizeof(**rsp));
+		*rsp = NULL;
+		return (-1);
+	}
+
+	_ARC4_ATFORK(_rs_forkhandler);
+	return (0);
+}
diff --git a/lib/libcrypto/md32_common.h b/lib/libcrypto/md32_common.h
new file mode 100644
index 0000000..0dca617
--- /dev/null
+++ b/lib/libcrypto/md32_common.h
@@ -0,0 +1,345 @@
+/* $OpenBSD: md32_common.h,v 1.22 2016/11/04 13:56:04 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1999-2007 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+/*
+ * This is a generic 32 bit "collector" for message digest algorithms.
+ * Whenever needed it collects input character stream into chunks of
+ * 32 bit values and invokes a block function that performs actual hash
+ * calculations.
+ *
+ * Porting guide.
+ *
+ * Obligatory macros:
+ *
+ * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
+ *	this macro defines byte order of input stream.
+ * HASH_CBLOCK
+ *	size of a unit chunk HASH_BLOCK operates on.
+ * HASH_LONG
+ *	has to be at least 32 bit wide.
+ * HASH_CTX
+ *	context structure that at least contains following
+ *	members:
+ *		typedef struct {
+ *			...
+ *			HASH_LONG	Nl,Nh;
+ *			either {
+ *			HASH_LONG	data[HASH_LBLOCK];
+ *			unsigned char	data[HASH_CBLOCK];
+ *			};
+ *			unsigned int	num;
+ *			...
+ *			} HASH_CTX;
+ *	data[] vector is expected to be zeroed upon first call to
+ *	HASH_UPDATE.
+ * HASH_UPDATE
+ *	name of "Update" function, implemented here.
+ * HASH_TRANSFORM
+ *	name of "Transform" function, implemented here.
+ * HASH_FINAL
+ *	name of "Final" function, implemented here.
+ * HASH_BLOCK_DATA_ORDER
+ *	name of "block" function capable of treating *unaligned* input
+ *	message in original (data) byte order, implemented externally.
+ * HASH_MAKE_STRING
+ *	macro convering context variables to an ASCII hash string.
+ *
+ * MD5 example:
+ *
+ *	#define DATA_ORDER_IS_LITTLE_ENDIAN
+ *
+ *	#define HASH_LONG		MD5_LONG
+ *	#define HASH_CTX		MD5_CTX
+ *	#define HASH_CBLOCK		MD5_CBLOCK
+ *	#define HASH_UPDATE		MD5_Update
+ *	#define HASH_TRANSFORM		MD5_Transform
+ *	#define HASH_FINAL		MD5_Final
+ *	#define HASH_BLOCK_DATA_ORDER	md5_block_data_order
+ *
+ *					<appro@fy.chalmers.se>
+ */
+
+#include <stdint.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+#error "DATA_ORDER must be defined!"
+#endif
+
+#ifndef HASH_CBLOCK
+#error "HASH_CBLOCK must be defined!"
+#endif
+#ifndef HASH_LONG
+#error "HASH_LONG must be defined!"
+#endif
+#ifndef HASH_CTX
+#error "HASH_CTX must be defined!"
+#endif
+
+#ifndef HASH_UPDATE
+#error "HASH_UPDATE must be defined!"
+#endif
+#ifndef HASH_TRANSFORM
+#error "HASH_TRANSFORM must be defined!"
+#endif
+#if !defined(HASH_FINAL) && !defined(HASH_NO_FINAL)
+#error "HASH_FINAL or HASH_NO_FINAL must be defined!"
+#endif
+
+#ifndef HASH_BLOCK_DATA_ORDER
+#error "HASH_BLOCK_DATA_ORDER must be defined!"
+#endif
+
+/*
+ * This common idiom is recognized by the compiler and turned into a
+ * CPU-specific intrinsic as appropriate. 
+ * e.g. GCC optimizes to roll on amd64 at -O0
+ */
+static inline uint32_t ROTATE(uint32_t a, uint32_t n)
+{
+	return (a<<n)|(a>>(32-n));
+}
+
+#if defined(DATA_ORDER_IS_BIG_ENDIAN)
+
+#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if (defined(__i386) || defined(__i386__) || \
+      defined(__x86_64) || defined(__x86_64__))
+    /*
+     * This gives ~30-40% performance improvement in SHA-256 compiled
+     * with gcc [on P4]. Well, first macro to be frank. We can pull
+     * this trick on x86* platforms only, because these CPUs can fetch
+     * unaligned data without raising an exception.
+     */
+#  define HOST_c2l(c,l)	({ unsigned int r=*((const unsigned int *)(c));	\
+				   asm ("bswapl %0":"=r"(r):"0"(r));	\
+				   (c)+=4; (l)=r;			})
+#  define HOST_l2c(l,c)	({ unsigned int r=(l);			\
+				   asm ("bswapl %0":"=r"(r):"0"(r));	\
+				   *((unsigned int *)(c))=r; (c)+=4;	})
+# endif
+#endif
+
+#ifndef HOST_c2l
+#define HOST_c2l(c,l) do {l =(((unsigned long)(*((c)++)))<<24);	\
+			  l|=(((unsigned long)(*((c)++)))<<16);	\
+			  l|=(((unsigned long)(*((c)++)))<< 8);	\
+			  l|=(((unsigned long)(*((c)++)))    );	\
+		      } while (0)
+#endif
+#ifndef HOST_l2c
+#define HOST_l2c(l,c) do {*((c)++)=(unsigned char)(((l)>>24)&0xff);	\
+			  *((c)++)=(unsigned char)(((l)>>16)&0xff);	\
+			  *((c)++)=(unsigned char)(((l)>> 8)&0xff);	\
+			  *((c)++)=(unsigned char)(((l)    )&0xff);	\
+		      } while (0)
+#endif
+
+#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+
+#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
+#  define HOST_c2l(c,l)	((l)=*((const unsigned int *)(c)), (c)+=4)
+#  define HOST_l2c(l,c)	(*((unsigned int *)(c))=(l), (c)+=4)
+#endif
+
+#ifndef HOST_c2l
+#define HOST_c2l(c,l) do {l =(((unsigned long)(*((c)++)))    );	\
+			  l|=(((unsigned long)(*((c)++)))<< 8);	\
+			  l|=(((unsigned long)(*((c)++)))<<16);	\
+			  l|=(((unsigned long)(*((c)++)))<<24);	\
+		      } while (0)
+#endif
+#ifndef HOST_l2c
+#define HOST_l2c(l,c) do {*((c)++)=(unsigned char)(((l)    )&0xff);	\
+			  *((c)++)=(unsigned char)(((l)>> 8)&0xff);	\
+			  *((c)++)=(unsigned char)(((l)>>16)&0xff);	\
+			  *((c)++)=(unsigned char)(((l)>>24)&0xff);	\
+		      } while (0)
+#endif
+
+#endif
+
+/*
+ * Time for some action:-)
+ */
+
+int
+HASH_UPDATE(HASH_CTX *c, const void *data_, size_t len)
+{
+	const unsigned char *data = data_;
+	unsigned char *p;
+	HASH_LONG l;
+	size_t n;
+
+	if (len == 0)
+		return 1;
+
+	l = (c->Nl + (((HASH_LONG)len) << 3))&0xffffffffUL;
+	/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
+	 * Wei Dai <weidai@eskimo.com> for pointing it out. */
+	if (l < c->Nl) /* overflow */
+		c->Nh++;
+	c->Nh+=(HASH_LONG)(len>>29);	/* might cause compiler warning on 16-bit */
+	c->Nl = l;
+
+	n = c->num;
+	if (n != 0) {
+		p = (unsigned char *)c->data;
+
+		if (len >= HASH_CBLOCK || len + n >= HASH_CBLOCK) {
+			memcpy (p + n, data, HASH_CBLOCK - n);
+			HASH_BLOCK_DATA_ORDER (c, p, 1);
+			n = HASH_CBLOCK - n;
+			data += n;
+			len -= n;
+			c->num = 0;
+			memset (p,0,HASH_CBLOCK);	/* keep it zeroed */
+		} else {
+			memcpy (p + n, data, len);
+			c->num += (unsigned int)len;
+			return 1;
+		}
+	}
+
+	n = len/HASH_CBLOCK;
+	if (n > 0) {
+		HASH_BLOCK_DATA_ORDER (c, data, n);
+		n    *= HASH_CBLOCK;
+		data += n;
+		len -= n;
+	}
+
+	if (len != 0) {
+		p = (unsigned char *)c->data;
+		c->num = (unsigned int)len;
+		memcpy (p, data, len);
+	}
+	return 1;
+}
+
+
+void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
+{
+	HASH_BLOCK_DATA_ORDER (c, data, 1);
+}
+
+
+#ifndef HASH_NO_FINAL
+int HASH_FINAL (unsigned char *md, HASH_CTX *c)
+{
+	unsigned char *p = (unsigned char *)c->data;
+	size_t n = c->num;
+
+	p[n] = 0x80; /* there is always room for one */
+	n++;
+
+	if (n > (HASH_CBLOCK - 8)) {
+		memset (p + n, 0, HASH_CBLOCK - n);
+		n = 0;
+		HASH_BLOCK_DATA_ORDER (c, p, 1);
+	}
+	memset (p + n, 0, HASH_CBLOCK - 8 - n);
+
+	p += HASH_CBLOCK - 8;
+#if   defined(DATA_ORDER_IS_BIG_ENDIAN)
+	HOST_l2c(c->Nh, p);
+	HOST_l2c(c->Nl, p);
+#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+	HOST_l2c(c->Nl, p);
+	HOST_l2c(c->Nh, p);
+#endif
+	p -= HASH_CBLOCK;
+	HASH_BLOCK_DATA_ORDER (c, p, 1);
+	c->num = 0;
+	memset (p, 0, HASH_CBLOCK);
+
+#ifndef HASH_MAKE_STRING
+#error "HASH_MAKE_STRING must be defined!"
+#else
+	HASH_MAKE_STRING(c, md);
+#endif
+
+	return 1;
+}
+#endif
+
+#ifndef MD32_REG_T
+#if defined(__alpha) || defined(__sparcv9) || defined(__mips)
+#define MD32_REG_T long
+/*
+ * This comment was originaly written for MD5, which is why it
+ * discusses A-D. But it basically applies to all 32-bit digests,
+ * which is why it was moved to common header file.
+ *
+ * In case you wonder why A-D are declared as long and not
+ * as MD5_LONG. Doing so results in slight performance
+ * boost on LP64 architectures. The catch is we don't
+ * really care if 32 MSBs of a 64-bit register get polluted
+ * with eventual overflows as we *save* only 32 LSBs in
+ * *either* case. Now declaring 'em long excuses the compiler
+ * from keeping 32 MSBs zeroed resulting in 13% performance
+ * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
+ * Well, to be honest it should say that this *prevents*
+ * performance degradation.
+ *				<appro@fy.chalmers.se>
+ */
+#else
+/*
+ * Above is not absolute and there are LP64 compilers that
+ * generate better code if MD32_REG_T is defined int. The above
+ * pre-processor condition reflects the circumstances under which
+ * the conclusion was made and is subject to further extension.
+ *				<appro@fy.chalmers.se>
+ */
+#define MD32_REG_T int
+#endif
+#endif
diff --git a/lib/libcrypto/sha/sha256.c b/lib/libcrypto/sha/sha256.c
new file mode 100644
index 0000000..9c05d3b
--- /dev/null
+++ b/lib/libcrypto/sha/sha256.c
@@ -0,0 +1,284 @@
+/* $OpenBSD: sha256.c,v 1.10 2019/01/21 23:20:31 jsg Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+#include <openssl/opensslv.h>
+
+int SHA224_Init(SHA256_CTX *c)
+	{
+	memset (c,0,sizeof(*c));
+	c->h[0]=0xc1059ed8UL;	c->h[1]=0x367cd507UL;
+	c->h[2]=0x3070dd17UL;	c->h[3]=0xf70e5939UL;
+	c->h[4]=0xffc00b31UL;	c->h[5]=0x68581511UL;
+	c->h[6]=0x64f98fa7UL;	c->h[7]=0xbefa4fa4UL;
+	c->md_len=SHA224_DIGEST_LENGTH;
+	return 1;
+	}
+
+int SHA256_Init(SHA256_CTX *c)
+	{
+	memset (c,0,sizeof(*c));
+	c->h[0]=0x6a09e667UL;	c->h[1]=0xbb67ae85UL;
+	c->h[2]=0x3c6ef372UL;	c->h[3]=0xa54ff53aUL;
+	c->h[4]=0x510e527fUL;	c->h[5]=0x9b05688cUL;
+	c->h[6]=0x1f83d9abUL;	c->h[7]=0x5be0cd19UL;
+	c->md_len=SHA256_DIGEST_LENGTH;
+	return 1;
+	}
+
+unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA256_CTX c;
+	static unsigned char m[SHA224_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA224_Init(&c);
+	SHA256_Update(&c,d,n);
+	SHA256_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA256_CTX c;
+	static unsigned char m[SHA256_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA256_Init(&c);
+	SHA256_Update(&c,d,n);
+	SHA256_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
+{   return SHA256_Update (c,data,len);   }
+int SHA224_Final (unsigned char *md, SHA256_CTX *c)
+{   return SHA256_Final (md,c);   }
+
+#define	DATA_ORDER_IS_BIG_ENDIAN
+
+#define	HASH_LONG		SHA_LONG
+#define	HASH_CTX		SHA256_CTX
+#define	HASH_CBLOCK		SHA_CBLOCK
+/*
+ * Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
+ * default: case below covers for it. It's not clear however if it's
+ * permitted to truncate to amount of bytes not divisible by 4. I bet not,
+ * but if it is, then default: case shall be extended. For reference.
+ * Idea behind separate cases for pre-defined lengths is to let the
+ * compiler decide if it's appropriate to unroll small loops.
+ */
+#define	HASH_MAKE_STRING(c,s)	do {	\
+	unsigned long ll;		\
+	unsigned int  nn;		\
+	switch ((c)->md_len)		\
+	{   case SHA224_DIGEST_LENGTH:	\
+		for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++)	\
+		{   ll=(c)->h[nn]; HOST_l2c(ll,(s));   }	\
+		break;			\
+	    case SHA256_DIGEST_LENGTH:	\
+		for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++)	\
+		{   ll=(c)->h[nn]; HOST_l2c(ll,(s));   }	\
+		break;			\
+	    default:			\
+		if ((c)->md_len > SHA256_DIGEST_LENGTH)	\
+		    return 0;				\
+		for (nn=0;nn<(c)->md_len/4;nn++)		\
+		{   ll=(c)->h[nn]; HOST_l2c(ll,(s));   }	\
+		break;			\
+	}				\
+	} while (0)
+
+#define	HASH_UPDATE		SHA256_Update
+#define	HASH_TRANSFORM		SHA256_Transform
+#define	HASH_FINAL		SHA256_Final
+#define	HASH_BLOCK_DATA_ORDER	sha256_block_data_order
+#ifndef SHA256_ASM
+static
+#endif
+void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
+
+#include "md32_common.h"
+
+#ifndef SHA256_ASM
+static const SHA_LONG K256[64] = {
+	0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
+	0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
+	0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL,
+	0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL,
+	0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL,
+	0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL,
+	0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL,
+	0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL,
+	0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL,
+	0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL,
+	0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL,
+	0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL,
+	0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL,
+	0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL,
+	0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL,
+	0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL };
+
+/*
+ * FIPS specification refers to right rotations, while our ROTATE macro
+ * is left one. This is why you might notice that rotation coefficients
+ * differ from those observed in FIPS document by 32-N...
+ */
+#define Sigma0(x)	(ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
+#define Sigma1(x)	(ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
+#define sigma0(x)	(ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
+#define sigma1(x)	(ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
+
+#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+#ifdef OPENSSL_SMALL_FOOTPRINT
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+	{
+	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+	SHA_LONG	X[16],l;
+	int i;
+	const unsigned char *data=in;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+	for (i=0;i<16;i++)
+		{
+		HOST_c2l(data,l); T1 = X[i] = l;
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	for (;i<64;i++)
+		{
+		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
+		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
+
+		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			}
+}
+
+#else
+
+#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
+	T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];	\
+	h = Sigma0(a) + Maj(a,b,c);			\
+	d += T1;	h += T1;		} while (0)
+
+#define	ROUND_16_63(i,a,b,c,d,e,f,g,h,X)	do {	\
+	s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);	\
+	s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);	\
+	T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];	\
+	ROUND_00_15(i,a,b,c,d,e,f,g,h);		} while (0)
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+	{
+	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
+	SHA_LONG	X[16];
+	int i;
+	const unsigned char *data=in;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+	if (BYTE_ORDER != LITTLE_ENDIAN &&
+	    sizeof(SHA_LONG)==4 && ((size_t)in%4)==0)
+		{
+		const SHA_LONG *W=(const SHA_LONG *)data;
+
+		T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
+		T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
+		T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
+		T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
+		T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
+		T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
+		T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
+		T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
+		T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
+		T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
+		T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
+		T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
+		T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
+		T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
+		T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
+		T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
+
+		data += SHA256_CBLOCK;
+		}
+	else
+		{
+		SHA_LONG l;
+
+		HOST_c2l(data,l); T1 = X[0] = l;  ROUND_00_15(0,a,b,c,d,e,f,g,h);
+		HOST_c2l(data,l); T1 = X[1] = l;  ROUND_00_15(1,h,a,b,c,d,e,f,g);
+		HOST_c2l(data,l); T1 = X[2] = l;  ROUND_00_15(2,g,h,a,b,c,d,e,f);
+		HOST_c2l(data,l); T1 = X[3] = l;  ROUND_00_15(3,f,g,h,a,b,c,d,e);
+		HOST_c2l(data,l); T1 = X[4] = l;  ROUND_00_15(4,e,f,g,h,a,b,c,d);
+		HOST_c2l(data,l); T1 = X[5] = l;  ROUND_00_15(5,d,e,f,g,h,a,b,c);
+		HOST_c2l(data,l); T1 = X[6] = l;  ROUND_00_15(6,c,d,e,f,g,h,a,b);
+		HOST_c2l(data,l); T1 = X[7] = l;  ROUND_00_15(7,b,c,d,e,f,g,h,a);
+		HOST_c2l(data,l); T1 = X[8] = l;  ROUND_00_15(8,a,b,c,d,e,f,g,h);
+		HOST_c2l(data,l); T1 = X[9] = l;  ROUND_00_15(9,h,a,b,c,d,e,f,g);
+		HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+		HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+		HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+		HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+		HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+		HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+		}
+
+	for (i=16;i<64;i+=8)
+		{
+		ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X);
+		ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X);
+		ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X);
+		ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X);
+		ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X);
+		ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X);
+		ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X);
+		ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X);
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			}
+	}
+
+#endif
+#endif /* SHA256_ASM */
+
+#endif /* OPENSSL_NO_SHA256 */
diff --git a/lib/libcrypto/sha/sha512.c b/lib/libcrypto/sha/sha512.c
new file mode 100644
index 0000000..6b95cfa
--- /dev/null
+++ b/lib/libcrypto/sha/sha512.c
@@ -0,0 +1,547 @@
+/* $OpenBSD: sha512.c,v 1.15 2016/11/04 13:56:05 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
+/*
+ * IMPLEMENTATION NOTES.
+ *
+ * As you might have noticed 32-bit hash algorithms:
+ *
+ * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
+ * - optimized versions implement two transform functions: one operating
+ *   on [aligned] data in host byte order and one - on data in input
+ *   stream byte order;
+ * - share common byte-order neutral collector and padding function
+ *   implementations, ../md32_common.h;
+ *
+ * Neither of the above applies to this SHA-512 implementations. Reasons
+ * [in reverse order] are:
+ *
+ * - it's the only 64-bit hash algorithm for the moment of this writing,
+ *   there is no need for common collector/padding implementation [yet];
+ * - by supporting only one transform function [which operates on
+ *   *aligned* data in input stream byte order, big-endian in this case]
+ *   we minimize burden of maintenance in two ways: a) collector/padding
+ *   function is simpler; b) only one transform function to stare at;
+ * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
+ *   apply a number of optimizations to mitigate potential performance
+ *   penalties caused by previous design decision;
+ *
+ * Caveat lector.
+ *
+ * Implementation relies on the fact that "long long" is 64-bit on
+ * both 32- and 64-bit platforms. If some compiler vendor comes up
+ * with 128-bit long long, adjustment to sha.h would be required.
+ * As this implementation relies on 64-bit integer type, it's totally
+ * inappropriate for platforms which don't support it, most notably
+ * 16-bit platforms.
+ *					<appro@fy.chalmers.se>
+ */
+
+#include <openssl/crypto.h>
+#include <openssl/opensslv.h>
+#include <openssl/sha.h>
+
+#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
+#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+#endif
+
+int SHA384_Init(SHA512_CTX *c)
+	{
+	c->h[0]=U64(0xcbbb9d5dc1059ed8);
+	c->h[1]=U64(0x629a292a367cd507);
+	c->h[2]=U64(0x9159015a3070dd17);
+	c->h[3]=U64(0x152fecd8f70e5939);
+	c->h[4]=U64(0x67332667ffc00b31);
+	c->h[5]=U64(0x8eb44a8768581511);
+	c->h[6]=U64(0xdb0c2e0d64f98fa7);
+	c->h[7]=U64(0x47b5481dbefa4fa4);
+
+        c->Nl=0;        c->Nh=0;
+        c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
+        return 1;
+	}
+
+int SHA512_Init(SHA512_CTX *c)
+	{
+	c->h[0]=U64(0x6a09e667f3bcc908);
+	c->h[1]=U64(0xbb67ae8584caa73b);
+	c->h[2]=U64(0x3c6ef372fe94f82b);
+	c->h[3]=U64(0xa54ff53a5f1d36f1);
+	c->h[4]=U64(0x510e527fade682d1);
+	c->h[5]=U64(0x9b05688c2b3e6c1f);
+	c->h[6]=U64(0x1f83d9abfb41bd6b);
+	c->h[7]=U64(0x5be0cd19137e2179);
+
+        c->Nl=0;        c->Nh=0;
+        c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
+        return 1;
+	}
+
+#ifndef SHA512_ASM
+static
+#endif
+void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
+
+int SHA512_Final (unsigned char *md, SHA512_CTX *c)
+	{
+	unsigned char *p=(unsigned char *)c->u.p;
+	size_t n=c->num;
+
+	p[n]=0x80;	/* There always is a room for one */
+	n++;
+	if (n > (sizeof(c->u)-16))
+		memset (p+n,0,sizeof(c->u)-n), n=0,
+		sha512_block_data_order (c,p,1);
+
+	memset (p+n,0,sizeof(c->u)-16-n);
+#if BYTE_ORDER == BIG_ENDIAN
+	c->u.d[SHA_LBLOCK-2] = c->Nh;
+	c->u.d[SHA_LBLOCK-1] = c->Nl;
+#else
+	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
+	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
+	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
+	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
+	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
+	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
+	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
+	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
+	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
+	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
+	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
+	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
+	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
+	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
+	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
+	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
+#endif
+
+	sha512_block_data_order (c,p,1);
+
+	if (md==0) return 0;
+
+	switch (c->md_len)
+		{
+		/* Let compiler decide if it's appropriate to unroll... */
+		case SHA384_DIGEST_LENGTH:
+			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
+				{
+				SHA_LONG64 t = c->h[n];
+
+				*(md++)	= (unsigned char)(t>>56);
+				*(md++)	= (unsigned char)(t>>48);
+				*(md++)	= (unsigned char)(t>>40);
+				*(md++)	= (unsigned char)(t>>32);
+				*(md++)	= (unsigned char)(t>>24);
+				*(md++)	= (unsigned char)(t>>16);
+				*(md++)	= (unsigned char)(t>>8);
+				*(md++)	= (unsigned char)(t);
+				}
+			break;
+		case SHA512_DIGEST_LENGTH:
+			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
+				{
+				SHA_LONG64 t = c->h[n];
+
+				*(md++)	= (unsigned char)(t>>56);
+				*(md++)	= (unsigned char)(t>>48);
+				*(md++)	= (unsigned char)(t>>40);
+				*(md++)	= (unsigned char)(t>>32);
+				*(md++)	= (unsigned char)(t>>24);
+				*(md++)	= (unsigned char)(t>>16);
+				*(md++)	= (unsigned char)(t>>8);
+				*(md++)	= (unsigned char)(t);
+				}
+			break;
+		/* ... as well as make sure md_len is not abused. */
+		default:	return 0;
+		}
+
+	return 1;
+	}
+
+int SHA384_Final (unsigned char *md,SHA512_CTX *c)
+{   return SHA512_Final (md,c);   }
+
+int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
+	{
+	SHA_LONG64	l;
+	unsigned char  *p=c->u.p;
+	const unsigned char *data=(const unsigned char *)_data;
+
+	if (len==0) return  1;
+
+	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
+	if (l < c->Nl)		c->Nh++;
+	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
+	c->Nl=l;
+
+	if (c->num != 0)
+		{
+		size_t n = sizeof(c->u) - c->num;
+
+		if (len < n)
+			{
+			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
+			return 1;
+			}
+		else	{
+			memcpy (p+c->num,data,n), c->num = 0;
+			len-=n, data+=n;
+			sha512_block_data_order (c,p,1);
+			}
+		}
+
+	if (len >= sizeof(c->u))
+		{
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+		if ((size_t)data%sizeof(c->u.d[0]) != 0)
+			while (len >= sizeof(c->u))
+				memcpy (p,data,sizeof(c->u)),
+				sha512_block_data_order (c,p,1),
+				len  -= sizeof(c->u),
+				data += sizeof(c->u);
+		else
+#endif
+			sha512_block_data_order (c,data,len/sizeof(c->u)),
+			data += len,
+			len  %= sizeof(c->u),
+			data -= len;
+		}
+
+	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
+
+	return 1;
+	}
+
+int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
+{   return SHA512_Update (c,data,len);   }
+
+void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
+	{
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+	if ((size_t)data%sizeof(c->u.d[0]) != 0)
+		memcpy(c->u.p,data,sizeof(c->u.p)),
+		data = c->u.p;
+#endif
+	sha512_block_data_order (c,data,1);
+	}
+
+unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA512_CTX c;
+	static unsigned char m[SHA384_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA384_Init(&c);
+	SHA512_Update(&c,d,n);
+	SHA512_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA512_CTX c;
+	static unsigned char m[SHA512_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA512_Init(&c);
+	SHA512_Update(&c,d,n);
+	SHA512_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+#ifndef SHA512_ASM
+static const SHA_LONG64 K512[80] = {
+        U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
+        U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
+        U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
+        U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
+        U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
+        U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
+        U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
+        U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
+        U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
+        U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
+        U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
+        U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
+        U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
+        U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
+        U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
+        U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
+        U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
+        U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
+        U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
+        U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
+        U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
+        U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
+        U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
+        U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
+        U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
+        U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
+        U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
+        U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
+        U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
+        U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
+        U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
+        U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
+        U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
+        U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
+        U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
+        U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
+        U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
+        U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
+        U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
+        U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
+
+#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if defined(__x86_64) || defined(__x86_64__)
+#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
+				asm ("rorq %1,%0"	\
+				: "=r"(ret)		\
+				: "J"(n),"0"(a)		\
+				: "cc"); ret;		})
+#   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
+				asm ("bswapq	%0"		\
+				: "=r"(ret)			\
+				: "0"(ret)); ret;		})
+# elif (defined(__i386) || defined(__i386__))
+#   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
+			 unsigned int hi=p[0],lo=p[1];		\
+				asm ("bswapl %0; bswapl %1;"	\
+				: "=r"(lo),"=r"(hi)		\
+				: "0"(lo),"1"(hi));		\
+				((SHA_LONG64)hi)<<32|lo;	})
+# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
+#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
+				asm ("rotrdi %0,%1,%2"	\
+				: "=r"(ret)		\
+				: "r"(a),"K"(n)); ret;	})
+# endif
+#endif
+
+#ifndef PULL64
+#define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
+#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
+#endif
+
+#ifndef ROTR
+#define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
+#endif
+
+#define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+#define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+#define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+/*
+ * This code should give better results on 32-bit CPU with less than
+ * ~24 registers, both size and performance wise...
+ */
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+	{
+	const SHA_LONG64 *W=in;
+	SHA_LONG64	A,E,T;
+	SHA_LONG64	X[9+80],*F;
+	int i;
+
+			while (num--) {
+
+	F    = X+80;
+	A    = ctx->h[0];	F[1] = ctx->h[1];
+	F[2] = ctx->h[2];	F[3] = ctx->h[3];
+	E    = ctx->h[4];	F[5] = ctx->h[5];
+	F[6] = ctx->h[6];	F[7] = ctx->h[7];
+
+	for (i=0;i<16;i++,F--)
+		{
+		T = PULL64(W[i]);
+		F[0] = A;
+		F[4] = E;
+		F[8] = T;
+		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+		E    = F[3] + T;
+		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
+		}
+
+	for (;i<80;i++,F--)
+		{
+		T    = sigma0(F[8+16-1]);
+		T   += sigma1(F[8+16-14]);
+		T   += F[8+16] + F[8+16-9];
+
+		F[0] = A;
+		F[4] = E;
+		F[8] = T;
+		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+		E    = F[3] + T;
+		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
+		}
+
+	ctx->h[0] += A;		ctx->h[1] += F[1];
+	ctx->h[2] += F[2];	ctx->h[3] += F[3];
+	ctx->h[4] += E;		ctx->h[5] += F[5];
+	ctx->h[6] += F[6];	ctx->h[7] += F[7];
+
+			W+=SHA_LBLOCK;
+			}
+	}
+
+#elif defined(OPENSSL_SMALL_FOOTPRINT)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+	{
+	const SHA_LONG64 *W=in;
+	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+	SHA_LONG64	X[16];
+	int i;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+	for (i=0;i<16;i++)
+		{
+#if BYTE_ORDER == BIG_ENDIAN
+		T1 = X[i] = W[i];
+#else
+		T1 = X[i] = PULL64(W[i]);
+#endif
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	for (;i<80;i++)
+		{
+		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
+		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
+
+		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			W+=SHA_LBLOCK;
+			}
+	}
+
+#else
+
+#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
+	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
+	h = Sigma0(a) + Maj(a,b,c);			\
+	d += T1;	h += T1;		} while (0)
+
+#define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
+	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
+	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
+	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
+	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+	{
+	const SHA_LONG64 *W=in;
+	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
+	SHA_LONG64	X[16];
+	int i;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+#if BYTE_ORDER == BIG_ENDIAN
+	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
+	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
+	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
+	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
+	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
+	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
+	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
+	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
+	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
+	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
+	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
+	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
+	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
+	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
+	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
+	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#else
+	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
+	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
+	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
+	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
+	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
+	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
+	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
+	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
+	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
+	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
+	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
+	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
+	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
+	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
+	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
+	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#endif
+
+	for (i=16;i<80;i+=16)
+		{
+		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
+		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
+		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
+		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
+		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
+		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
+		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
+		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
+		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
+		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
+		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
+		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
+		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
+		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
+		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
+		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			W+=SHA_LBLOCK;
+			}
+	}
+
+#endif
+
+#endif /* SHA512_ASM */
+
+#endif /* !OPENSSL_NO_SHA512 */