mirror of
https://github.com/Jimmy-Z/TWLbf.git
synced 2025-06-18 18:55:31 -04:00
205 lines
5.6 KiB
C
205 lines
5.6 KiB
C
|
|
#include <stdint.h>
|
|
|
|
/* sha1_16
|
|
* specialized to only take 16 bytes input and spit out the first 16 bytes of the digest
|
|
*
|
|
* code dug out from mbed TLS 2.5.1
|
|
* https://github.com/ARMmbed/mbedtls/blob/development/library/sha1.c
|
|
*
|
|
* also took a look at OpenSSL SHA1
|
|
* the C implementation is wrapped in some crazy macro
|
|
* https://github.com/openssl/openssl/blob/master/crypto/sha/sha_locl.h
|
|
* https://github.com/openssl/openssl/blob/master/crypto/include/internal/md32_common.h
|
|
* they have some exotic SIMD ASM implementations including AVX2 and SHAEXT, impressive!
|
|
* https://github.com/openssl/openssl/blob/master/crypto/sha/asm/sha1-x86_64.pl
|
|
*
|
|
* BTW about SHAEXT, Intel has SHAEXT document dated back to Jul 2013
|
|
* but only first implemented in Goldmont(Atom) in Apr 2016
|
|
* but strangely not available on Kaby Lake(Oct 2016) and Coffee Lake(Oct 2017)
|
|
* for Intel desktop processor we'd wait until Cannonlake(expected H1 2018)
|
|
* AMD supported this in Ryzen(Feb 2017)
|
|
*
|
|
* anyway in my tests OpenSSL is only a tiny bit faster than mbed TLS on 16 bytes blocks
|
|
* and slower than this specialized version
|
|
*/
|
|
|
|
#ifndef GET_UINT32_BE
|
|
#define GET_UINT32_BE(n,b,i) \
|
|
{ \
|
|
(n) = ( (uint32_t) (b)[(i) ] << 24 ) \
|
|
| ( (uint32_t) (b)[(i) + 1] << 16 ) \
|
|
| ( (uint32_t) (b)[(i) + 2] << 8 ) \
|
|
| ( (uint32_t) (b)[(i) + 3] ); \
|
|
}
|
|
#endif
|
|
|
|
#ifndef PUT_UINT32_BE
|
|
#define PUT_UINT32_BE(n,b,i) \
|
|
{ \
|
|
(b)[(i) ] = (unsigned char) ( (n) >> 24 ); \
|
|
(b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \
|
|
(b)[(i) + 2] = (unsigned char) ( (n) >> 8 ); \
|
|
(b)[(i) + 3] = (unsigned char) ( (n) ); \
|
|
}
|
|
#endif
|
|
|
|
static const uint32_t
|
|
h0 = 0x67452301,
|
|
h1 = 0xEFCDAB89,
|
|
h2 = 0x98BADCFE,
|
|
h3 = 0x10325476,
|
|
h4 = 0xC3D2E1F0;
|
|
|
|
void sha1_16(unsigned char out[16], const unsigned char in[16]) {
|
|
uint32_t temp, W[16],
|
|
A = h0, B = h1, C = h2, D = h3, E = h4;
|
|
// only 16 bytes taken
|
|
GET_UINT32_BE(W[0], in, 0);
|
|
GET_UINT32_BE(W[1], in, 4);
|
|
GET_UINT32_BE(W[2], in, 8);
|
|
GET_UINT32_BE(W[3], in, 12);
|
|
// padding and length(bit length in big endian)
|
|
W[4] = 0x80000000u; W[5] = 0; W[6] = 0; W[7] = 0;
|
|
W[8] = 0; W[9] = 0; W[10] = 0; W[11] = 0;
|
|
W[12] = 0; W[13] = 0; W[14] = 0; W[15] = 0x80u;
|
|
|
|
#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
|
|
|
|
#define R(t) \
|
|
( \
|
|
temp = W[( t - 3 ) & 0x0F] ^ W[( t - 8 ) & 0x0F] ^ \
|
|
W[( t - 14 ) & 0x0F] ^ W[ t & 0x0F], \
|
|
( W[t & 0x0F] = S(temp,1) ) \
|
|
)
|
|
|
|
#define P(a,b,c,d,e,x) \
|
|
{ \
|
|
e += S(a,5) + F(b,c,d) + K + x; b = S(b,30); \
|
|
}
|
|
|
|
#define F(x,y,z) (z ^ (x & (y ^ z)))
|
|
#define K 0x5A827999
|
|
|
|
P( A, B, C, D, E, W[0] );
|
|
P( E, A, B, C, D, W[1] );
|
|
P( D, E, A, B, C, W[2] );
|
|
P( C, D, E, A, B, W[3] );
|
|
P( B, C, D, E, A, W[4] );
|
|
P( A, B, C, D, E, W[5] );
|
|
P( E, A, B, C, D, W[6] );
|
|
P( D, E, A, B, C, W[7] );
|
|
P( C, D, E, A, B, W[8] );
|
|
P( B, C, D, E, A, W[9] );
|
|
P( A, B, C, D, E, W[10] );
|
|
P( E, A, B, C, D, W[11] );
|
|
P( D, E, A, B, C, W[12] );
|
|
P( C, D, E, A, B, W[13] );
|
|
P( B, C, D, E, A, W[14] );
|
|
P( A, B, C, D, E, W[15] );
|
|
P( E, A, B, C, D, R(16) );
|
|
P( D, E, A, B, C, R(17) );
|
|
P( C, D, E, A, B, R(18) );
|
|
P( B, C, D, E, A, R(19) );
|
|
|
|
#undef K
|
|
#undef F
|
|
|
|
#define F(x,y,z) (x ^ y ^ z)
|
|
#define K 0x6ED9EBA1
|
|
|
|
P( A, B, C, D, E, R(20) );
|
|
P( E, A, B, C, D, R(21) );
|
|
P( D, E, A, B, C, R(22) );
|
|
P( C, D, E, A, B, R(23) );
|
|
P( B, C, D, E, A, R(24) );
|
|
P( A, B, C, D, E, R(25) );
|
|
P( E, A, B, C, D, R(26) );
|
|
P( D, E, A, B, C, R(27) );
|
|
P( C, D, E, A, B, R(28) );
|
|
P( B, C, D, E, A, R(29) );
|
|
P( A, B, C, D, E, R(30) );
|
|
P( E, A, B, C, D, R(31) );
|
|
P( D, E, A, B, C, R(32) );
|
|
P( C, D, E, A, B, R(33) );
|
|
P( B, C, D, E, A, R(34) );
|
|
P( A, B, C, D, E, R(35) );
|
|
P( E, A, B, C, D, R(36) );
|
|
P( D, E, A, B, C, R(37) );
|
|
P( C, D, E, A, B, R(38) );
|
|
P( B, C, D, E, A, R(39) );
|
|
|
|
#undef K
|
|
#undef F
|
|
|
|
#define F(x,y,z) ((x & y) | (z & (x | y)))
|
|
#define K 0x8F1BBCDC
|
|
|
|
P( A, B, C, D, E, R(40) );
|
|
P( E, A, B, C, D, R(41) );
|
|
P( D, E, A, B, C, R(42) );
|
|
P( C, D, E, A, B, R(43) );
|
|
P( B, C, D, E, A, R(44) );
|
|
P( A, B, C, D, E, R(45) );
|
|
P( E, A, B, C, D, R(46) );
|
|
P( D, E, A, B, C, R(47) );
|
|
P( C, D, E, A, B, R(48) );
|
|
P( B, C, D, E, A, R(49) );
|
|
P( A, B, C, D, E, R(50) );
|
|
P( E, A, B, C, D, R(51) );
|
|
P( D, E, A, B, C, R(52) );
|
|
P( C, D, E, A, B, R(53) );
|
|
P( B, C, D, E, A, R(54) );
|
|
P( A, B, C, D, E, R(55) );
|
|
P( E, A, B, C, D, R(56) );
|
|
P( D, E, A, B, C, R(57) );
|
|
P( C, D, E, A, B, R(58) );
|
|
P( B, C, D, E, A, R(59) );
|
|
|
|
#undef K
|
|
#undef F
|
|
|
|
#define F(x,y,z) (x ^ y ^ z)
|
|
#define K 0xCA62C1D6
|
|
|
|
P( A, B, C, D, E, R(60) );
|
|
P( E, A, B, C, D, R(61) );
|
|
P( D, E, A, B, C, R(62) );
|
|
P( C, D, E, A, B, R(63) );
|
|
P( B, C, D, E, A, R(64) );
|
|
P( A, B, C, D, E, R(65) );
|
|
P( E, A, B, C, D, R(66) );
|
|
P( D, E, A, B, C, R(67) );
|
|
P( C, D, E, A, B, R(68) );
|
|
P( B, C, D, E, A, R(69) );
|
|
P( A, B, C, D, E, R(70) );
|
|
P( E, A, B, C, D, R(71) );
|
|
P( D, E, A, B, C, R(72) );
|
|
P( C, D, E, A, B, R(73) );
|
|
P( B, C, D, E, A, R(74) );
|
|
P( A, B, C, D, E, R(75) );
|
|
P( E, A, B, C, D, R(76) );
|
|
P( D, E, A, B, C, R(77) );
|
|
P( C, D, E, A, B, R(78) );
|
|
P( B, C, D, E, A, R(79) );
|
|
|
|
#undef K
|
|
#undef F
|
|
|
|
#undef S
|
|
#undef R
|
|
#undef P
|
|
|
|
// only 16 bytes needed
|
|
A += h0;
|
|
B += h1;
|
|
C += h2;
|
|
D += h3;
|
|
|
|
PUT_UINT32_BE(A, out, 0);
|
|
PUT_UINT32_BE(B, out, 4);
|
|
PUT_UINT32_BE(C, out, 8);
|
|
PUT_UINT32_BE(D, out, 12);
|
|
}
|
|
|