#include #include #include "aes.h" /* AES 128 ECB dug out from mbed TLS 2.5.1 * https://github.com/ARMmbed/mbedtls/blob/development/include/mbedtls/aes.h * https://github.com/ARMmbed/mbedtls/blob/development/library/aes.c * * C style comments are mbed TLS comments * C++ style comments are mine */ // make VC happy #ifdef _MSC_VER #define DTCM_BSS #define ITCM_CODE #endif // it's interesting they mix unsigned char with uint32_t DTCM_BSS static unsigned char FSb[256]; DTCM_BSS static uint32_t FT0[256]; DTCM_BSS static uint32_t FT1[256]; DTCM_BSS static uint32_t FT2[256]; DTCM_BSS static uint32_t FT3[256]; // AES-CTR/CCM only uses encrypt, so R tables are not used #define NO_R_TABLES #ifndef NO_R_TABLES static unsigned char RSb[256]; static uint32_t RT0[256]; static uint32_t RT1[256]; static uint32_t RT2[256]; static uint32_t RT3[256]; #endif static uint32_t RCON[256]; /* * Tables generation code */ #define ROTL8(x) ( ( x << 8 ) & 0xFFFFFFFF ) | ( x >> 24 ) #define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) ) #define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 ) void aes_gen_tables(void) { #ifdef NO_R_TABLES unsigned char *RSb = memalign(32, 256); uint32_t *RT0 = memalign(32, 256 * sizeof(uint32_t)); uint32_t *RT1 = memalign(32, 256 * sizeof(uint32_t)); uint32_t *RT2 = memalign(32, 256 * sizeof(uint32_t)); uint32_t *RT3 = memalign(32, 256 * sizeof(uint32_t)); #endif int i, x, y, z; int pow[256]; int log[256]; /* * compute pow and log tables over GF(2^8) */ for (i = 0, x = 1; i < 256; i++) { pow[i] = x; log[x] = i; x = (x ^ XTIME(x)) & 0xFF; } /* * calculate the round constants */ for (i = 0, x = 1; i < 10; i++) { RCON[i] = (uint32_t)x; x = XTIME(x) & 0xFF; } /* * generate the forward and reverse S-boxes */ FSb[0x00] = 0x63; RSb[0x63] = 0x00; for (i = 1; i < 256; i++) { x = pow[255 - log[i]]; y = x; y = ((y << 1) | (y >> 7)) & 0xFF; x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF; x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF; x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF; x ^= y ^ 0x63; FSb[i] = (unsigned char)x; RSb[x] = (unsigned char)i; } /* * generate the forward and reverse tables */ for (i = 0; i < 256; i++) { x = FSb[i]; y = XTIME(x) & 0xFF; z = (y ^ x) & 0xFF; FT0[i] = ((uint32_t)y) ^ ((uint32_t)x << 8) ^ ((uint32_t)x << 16) ^ ((uint32_t)z << 24); FT1[i] = ROTL8(FT0[i]); FT2[i] = ROTL8(FT1[i]); FT3[i] = ROTL8(FT2[i]); x = RSb[i]; RT0[i] = ((uint32_t)MUL(0x0E, x)) ^ ((uint32_t)MUL(0x09, x) << 8) ^ ((uint32_t)MUL(0x0D, x) << 16) ^ ((uint32_t)MUL(0x0B, x) << 24); RT1[i] = ROTL8(RT0[i]); RT2[i] = ROTL8(RT1[i]); RT3[i] = ROTL8(RT2[i]); } #ifdef NO_R_TABLES free(RSb); free(RT0); free(RT1); free(RT2); free(RT3); #endif } // did a little counting to understand why original mbedTLS buf is [68] // in set key, they generated: // 128 bits key: 10 rounds of += 4, plus 4 after, 44 // 192 bits key: 8 rounds of += 6, plus 6 after, 56 // 256 bits key: 7 rounds of += 8, plus 8 after, 64 // and in ecb encrypt, it used: // 4 + 4 * 2 * 4 + 4 + 4 "++"s, 44 // 4 + 4 * 2 * 5 + 4 + 4 "++"s, 52 // 4 + 4 * 2 * 6 + 4 + 4 "++"s, 60 // so they generated several bytes more in 192 and 256 modes to simplify the loop // "able to hold 32 extra bytes" in their comment makes senses now void aes_set_key_enc_128_be(uint32_t rk[RK_LEN], const unsigned char *key) { uint32_t *RK = rk; GET_UINT32_BE(RK[0], key, 12); GET_UINT32_BE(RK[1], key, 8); GET_UINT32_BE(RK[2], key, 4); GET_UINT32_BE(RK[3], key, 0); for (unsigned i = 0; i < 10; ++i, RK += 4) { RK[4] = RK[0] ^ RCON[i] ^ ((uint32_t)FSb[(RK[3] >> 8) & 0xFF]) ^ ((uint32_t)FSb[(RK[3] >> 16) & 0xFF] << 8) ^ ((uint32_t)FSb[(RK[3] >> 24) & 0xFF] << 16) ^ ((uint32_t)FSb[(RK[3]) & 0xFF] << 24); RK[5] = RK[1] ^ RK[4]; RK[6] = RK[2] ^ RK[5]; RK[7] = RK[3] ^ RK[6]; } } #define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ { \ X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \ FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y3 >> 24 ) & 0xFF ]; \ \ X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \ FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y0 >> 24 ) & 0xFF ]; \ \ X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \ FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y1 >> 24 ) & 0xFF ]; \ \ X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \ FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y2 >> 24 ) & 0xFF ]; \ } DTCM_BSS uint32_t X0, X1, X2, X3, Y0, Y1, Y2, Y3; DTCM_BSS const uint32_t *RK; ITCM_CODE void aes_encrypt_128_be(const uint32_t rk[RK_LEN], const unsigned char input[16], unsigned char output[16]) { RK = rk; GET_UINT32_BE(X0, input, 12); GET_UINT32_BE(X1, input, 8); GET_UINT32_BE(X2, input, 4); GET_UINT32_BE(X3, input, 0); X0 ^= *RK++; X1 ^= *RK++; X2 ^= *RK++; X3 ^= *RK++; // loop unrolled AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); X0 = *RK++ ^ \ ((uint32_t)FSb[(Y0) & 0xFF]) ^ ((uint32_t)FSb[(Y1 >> 8) & 0xFF] << 8) ^ ((uint32_t)FSb[(Y2 >> 16) & 0xFF] << 16) ^ ((uint32_t)FSb[(Y3 >> 24) & 0xFF] << 24); X1 = *RK++ ^ \ ((uint32_t)FSb[(Y1) & 0xFF]) ^ ((uint32_t)FSb[(Y2 >> 8) & 0xFF] << 8) ^ ((uint32_t)FSb[(Y3 >> 16) & 0xFF] << 16) ^ ((uint32_t)FSb[(Y0 >> 24) & 0xFF] << 24); X2 = *RK++ ^ \ ((uint32_t)FSb[(Y2) & 0xFF]) ^ ((uint32_t)FSb[(Y3 >> 8) & 0xFF] << 8) ^ ((uint32_t)FSb[(Y0 >> 16) & 0xFF] << 16) ^ ((uint32_t)FSb[(Y1 >> 24) & 0xFF] << 24); // removed a ++ here X3 = *RK ^ \ ((uint32_t)FSb[(Y3) & 0xFF]) ^ ((uint32_t)FSb[(Y0 >> 8) & 0xFF] << 8) ^ ((uint32_t)FSb[(Y1 >> 16) & 0xFF] << 16) ^ ((uint32_t)FSb[(Y2 >> 24) & 0xFF] << 24); PUT_UINT32_BE(X0, output, 12); PUT_UINT32_BE(X1, output, 8); PUT_UINT32_BE(X2, output, 4); PUT_UINT32_BE(X3, output, 0); }