/* * Copyright (C) 2018 Denys Vlasenko * * Licensed under GPLv2, see file LICENSE in this source tree. */ #include "tls.h" typedef uint8_t byte; typedef uint32_t word32; #define XMEMSET memset #define XMEMCPY memcpy /* from wolfssl-3.15.3/wolfcrypt/src/aes.c */ #ifdef UNUSED static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz) { /* Multiply the sz by 8 */ //bbox: these sizes are never even close to 2^32/8 // word32 szHi = (sz >> (8*sizeof(sz) - 3)); sz <<= 3; /* copy over the words of the sz into the destination buffer */ // buf[0] = (szHi >> 24) & 0xff; // buf[1] = (szHi >> 16) & 0xff; // buf[2] = (szHi >> 8) & 0xff; // buf[3] = szHi & 0xff; *(uint32_t*)(buf + 0) = 0; // buf[4] = (sz >> 24) & 0xff; // buf[5] = (sz >> 16) & 0xff; // buf[6] = (sz >> 8) & 0xff; // buf[7] = sz & 0xff; *(uint32_t*)(buf + 4) = SWAP_BE32(sz); } #endif static void RIGHTSHIFTX(byte* x) { #define l ((unsigned long*)x) #if 0 // Generic byte-at-a-time algorithm int i; byte carryIn = (x[15] & 0x01) ? 0xE1 : 0; for (i = 0; i < AES_BLOCK_SIZE; i++) { byte carryOut = (x[i] << 7); // zero, or 0x80 x[i] = (x[i] >> 1) ^ carryIn; carryIn = carryOut; } #elif BB_BIG_ENDIAN // Big-endian can shift-right in larger than byte chunks // (we use the fact that 'x' is long-aligned) unsigned long carryIn = (x[15] & 0x01) ? ((unsigned long)0xE1 << (LONG_BIT-8)) : 0; # if ULONG_MAX <= 0xffffffff int i; for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) { unsigned long carryOut = l[i] << (LONG_BIT-1); // zero, or 0x800..00 l[i] = (l[i] >> 1) ^ carryIn; carryIn = carryOut; } # else // 64-bit code: need to process only 2 words unsigned long carryOut = l[0] << (LONG_BIT-1); // zero, or 0x800..00 l[0] = (l[0] >> 1) ^ carryIn; l[1] = (l[1] >> 1) ^ carryOut; # endif #else /* LITTLE_ENDIAN */ // In order to use word-sized ops, little-endian needs to byteswap. // On x86, code size increase is ~10 bytes compared to byte-by-byte. unsigned long carryIn = (x[15] & 0x01) ? ((unsigned long)0xE1 << (LONG_BIT-8)) : 0; # if ULONG_MAX <= 0xffffffff int i; for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) { unsigned long ti = SWAP_BE32(l[i]); unsigned long carryOut = ti << (LONG_BIT-1); // zero, or 0x800..00 ti = (ti >> 1) ^ carryIn; l[i] = SWAP_BE32(ti); carryIn = carryOut; } # else // 64-bit code: need to process only 2 words unsigned long tt = SWAP_BE64(l[0]); unsigned long carryOut = tt << (LONG_BIT-1); // zero, or 0x800..00 tt = (tt >> 1) ^ carryIn; l[0] = SWAP_BE64(tt); tt = SWAP_BE64(l[1]); tt = (tt >> 1) ^ carryOut; l[1] = SWAP_BE64(tt); # endif #endif /* LITTLE_ENDIAN */ #undef l } // Caller guarantees X is aligned static void GMULT(byte* X, byte* Y) { byte Z[AES_BLOCK_SIZE] ALIGNED_long; //byte V[AES_BLOCK_SIZE] ALIGNED_long; int i; XMEMSET(Z, 0, AES_BLOCK_SIZE); //XMEMCPY(V, X, AES_BLOCK_SIZE); for (i = 0; i < AES_BLOCK_SIZE; i++) { uint32_t y = 0x800000 | Y[i]; for (;;) { // for every bit in Y[i], from msb to lsb if (y & 0x80) { xorbuf_aligned_AES_BLOCK_SIZE(Z, X); // was V, not X } RIGHTSHIFTX(X); // was V, not X y = y << 1; if ((int32_t)y < 0) // if bit 0x80000000 set = if 8 iterations done break; } } XMEMCPY(X, Z, AES_BLOCK_SIZE); } //bbox: // for TLS AES-GCM, a (which is AAD) is always 13 bytes long, and bbox code provides // extra 3 zeroed bytes, making it a[16], or a[AES_BLOCK_SIZE]. // Resulting auth tag in s[] is also always AES_BLOCK_SIZE bytes. // // This allows some simplifications. #define aSz 13 #define sSz AES_BLOCK_SIZE void FAST_FUNC aesgcm_GHASH(byte* h, const byte* a, //unsigned aSz, const byte* c, unsigned cSz, byte* s //, unsigned sSz ) { byte x[AES_BLOCK_SIZE] ALIGNED_long; // byte scratch[AES_BLOCK_SIZE] ALIGNED_long; unsigned blocks, partial; //was: byte* h = aes->H; //XMEMSET(x, 0, AES_BLOCK_SIZE); /* Hash in A, the Additional Authentication Data */ // if (aSz != 0 && a != NULL) { // blocks = aSz / AES_BLOCK_SIZE; // partial = aSz % AES_BLOCK_SIZE; // while (blocks--) { //xorbuf(x, a, AES_BLOCK_SIZE); XMEMCPY(x, a, AES_BLOCK_SIZE);// memcpy(x,a) = memset(x,0)+xorbuf(x,a) GMULT(x, h); // a += AES_BLOCK_SIZE; // } // if (partial != 0) { // XMEMSET(scratch, 0, AES_BLOCK_SIZE); // XMEMCPY(scratch, a, partial); // xorbuf(x, scratch, AES_BLOCK_SIZE); // GMULT(x, h); // } // } /* Hash in C, the Ciphertext */ if (cSz != 0 /*&& c != NULL*/) { blocks = cSz / AES_BLOCK_SIZE; partial = cSz % AES_BLOCK_SIZE; while (blocks--) { if (BB_UNALIGNED_MEMACCESS_OK) // c is not guaranteed to be aligned xorbuf_aligned_AES_BLOCK_SIZE(x, c); else xorbuf(x, c, AES_BLOCK_SIZE); GMULT(x, h); c += AES_BLOCK_SIZE; } if (partial != 0) { //XMEMSET(scratch, 0, AES_BLOCK_SIZE); //XMEMCPY(scratch, c, partial); //xorbuf(x, scratch, AES_BLOCK_SIZE); xorbuf(x, c, partial);//same result as above GMULT(x, h); } } /* Hash in the lengths of A and C in bits */ //FlattenSzInBits(&scratch[0], aSz); //FlattenSzInBits(&scratch[8], cSz); //xorbuf_aligned_AES_BLOCK_SIZE(x, scratch); // simpler: #define P32(v) ((uint32_t*)v) //P32(x)[0] ^= 0; P32(x)[1] ^= SWAP_BE32(aSz * 8); //P32(x)[2] ^= 0; P32(x)[3] ^= SWAP_BE32(cSz * 8); #undef P32 GMULT(x, h); /* Copy the result into s. */ XMEMCPY(s, x, sSz); }