ITCM/DTCM

This commit is contained in:
JimmyZ 2017-09-22 14:21:07 +08:00
parent 7157619a9b
commit bec9cac916
8 changed files with 198 additions and 154 deletions

3
.gitignore vendored
View File

@ -2,3 +2,6 @@
*.nds *.nds
arm7/build arm7/build
arm9/build arm9/build
.*
*.sln
*.vcxproj*

View File

@ -1,4 +1,5 @@
#include <nds.h>
#include "aes.h" #include "aes.h"
/* AES 128 ECB dug out from mbed TLS 2.5.1 /* AES 128 ECB dug out from mbed TLS 2.5.1
@ -9,22 +10,28 @@
* C++ style comments are mine * C++ style comments are mine
*/ */
// x86/x86_64 CPU's are little endian // x86/x86_64 CPU's are little endian
// popular OpenCL platforms(AMD, NVIDIA) are all little endian too // popular OpenCL platforms(AMD, NVIDIA) are all little endian too
// so pointer cast instead of bit operations // so pointer cast instead of bit operations
// just make sure buffers are aligned // just make sure buffers are aligned
#define GET_UINT32_LE(n, b, i) \ #define GET_UINT32_LE(n, b, i) \
(n) = *(uint32_t*)(b + i) (n) = *(uint32_t*)(b + i)
#define PUT_UINT32_LE(n, b, i) \ #define PUT_UINT32_LE(n, b, i) \
*(uint32_t*)(b + i) = (n) *(uint32_t*)(b + i) = (n)
// make VC happy
#ifdef _MSC_VER
#define DTCM_BSS
#define ITCM_CODE
#endif
// it's interesting they mix unsigned char with uint32_t // it's interesting they mix unsigned char with uint32_t
static unsigned char FSb[256]; DTCM_BSS static unsigned char FSb[256];
static uint32_t FT0[256]; DTCM_BSS static uint32_t FT0[256];
static uint32_t FT1[256]; DTCM_BSS static uint32_t FT1[256];
static uint32_t FT2[256]; DTCM_BSS static uint32_t FT2[256];
static uint32_t FT3[256]; DTCM_BSS static uint32_t FT3[256];
static unsigned char RSb[256]; static unsigned char RSb[256];
static uint32_t RT0[256]; static uint32_t RT0[256];
static uint32_t RT1[256]; static uint32_t RT1[256];
@ -40,80 +47,80 @@ static uint32_t RCON[256];
#define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) ) #define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) )
#define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 ) #define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 )
void aes_gen_tables( void ) void aes_gen_tables(void)
{ {
int i, x, y, z; int i, x, y, z;
int pow[256]; int pow[256];
int log[256]; int log[256];
/* /*
* compute pow and log tables over GF(2^8) * compute pow and log tables over GF(2^8)
*/ */
for( i = 0, x = 1; i < 256; i++ ) for (i = 0, x = 1; i < 256; i++)
{ {
pow[i] = x; pow[i] = x;
log[x] = i; log[x] = i;
x = ( x ^ XTIME( x ) ) & 0xFF; x = (x ^ XTIME(x)) & 0xFF;
} }
/* /*
* calculate the round constants * calculate the round constants
*/ */
for( i = 0, x = 1; i < 10; i++ ) for (i = 0, x = 1; i < 10; i++)
{ {
RCON[i] = (uint32_t) x; RCON[i] = (uint32_t)x;
x = XTIME( x ) & 0xFF; x = XTIME(x) & 0xFF;
} }
/* /*
* generate the forward and reverse S-boxes * generate the forward and reverse S-boxes
*/ */
FSb[0x00] = 0x63; FSb[0x00] = 0x63;
RSb[0x63] = 0x00; RSb[0x63] = 0x00;
for( i = 1; i < 256; i++ ) for (i = 1; i < 256; i++)
{ {
x = pow[255 - log[i]]; x = pow[255 - log[i]];
y = x; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF; y = x; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF; x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF; x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF; x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y ^ 0x63; x ^= y ^ 0x63;
FSb[i] = (unsigned char) x; FSb[i] = (unsigned char)x;
RSb[x] = (unsigned char) i; RSb[x] = (unsigned char)i;
} }
/* /*
* generate the forward and reverse tables * generate the forward and reverse tables
*/ */
for( i = 0; i < 256; i++ ) for (i = 0; i < 256; i++)
{ {
x = FSb[i]; x = FSb[i];
y = XTIME( x ) & 0xFF; y = XTIME(x) & 0xFF;
z = ( y ^ x ) & 0xFF; z = (y ^ x) & 0xFF;
FT0[i] = ( (uint32_t) y ) ^ FT0[i] = ((uint32_t)y) ^
( (uint32_t) x << 8 ) ^ ((uint32_t)x << 8) ^
( (uint32_t) x << 16 ) ^ ((uint32_t)x << 16) ^
( (uint32_t) z << 24 ); ((uint32_t)z << 24);
FT1[i] = ROTL8( FT0[i] ); FT1[i] = ROTL8(FT0[i]);
FT2[i] = ROTL8( FT1[i] ); FT2[i] = ROTL8(FT1[i]);
FT3[i] = ROTL8( FT2[i] ); FT3[i] = ROTL8(FT2[i]);
x = RSb[i]; x = RSb[i];
RT0[i] = ( (uint32_t) MUL( 0x0E, x ) ) ^ RT0[i] = ((uint32_t)MUL(0x0E, x)) ^
( (uint32_t) MUL( 0x09, x ) << 8 ) ^ ((uint32_t)MUL(0x09, x) << 8) ^
( (uint32_t) MUL( 0x0D, x ) << 16 ) ^ ((uint32_t)MUL(0x0D, x) << 16) ^
( (uint32_t) MUL( 0x0B, x ) << 24 ); ((uint32_t)MUL(0x0B, x) << 24);
RT1[i] = ROTL8( RT0[i] ); RT1[i] = ROTL8(RT0[i]);
RT2[i] = ROTL8( RT1[i] ); RT2[i] = ROTL8(RT1[i]);
RT3[i] = ROTL8( RT2[i] ); RT3[i] = ROTL8(RT2[i]);
} }
} }
// did a little counting to understand why buf is [68] // did a little counting to understand why buf is [68]
@ -137,15 +144,15 @@ void aes_set_key_enc_128(uint32_t rk[RK_LEN], const unsigned char *key) {
GET_UINT32_LE(RK[3], key, 12); GET_UINT32_LE(RK[3], key, 12);
for (unsigned i = 0; i < 10; ++i, RK += 4) { for (unsigned i = 0; i < 10; ++i, RK += 4) {
RK[4] = RK[0] ^ RCON[i] ^ RK[4] = RK[0] ^ RCON[i] ^
( (uint32_t) FSb[ ( RK[3] >> 8 ) & 0xFF ] ) ^ ((uint32_t)FSb[(RK[3] >> 8) & 0xFF]) ^
( (uint32_t) FSb[ ( RK[3] >> 16 ) & 0xFF ] << 8 ) ^ ((uint32_t)FSb[(RK[3] >> 16) & 0xFF] << 8) ^
( (uint32_t) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^ ((uint32_t)FSb[(RK[3] >> 24) & 0xFF] << 16) ^
( (uint32_t) FSb[ ( RK[3] ) & 0xFF ] << 24 ); ((uint32_t)FSb[(RK[3]) & 0xFF] << 24);
RK[5] = RK[1] ^ RK[4]; RK[5] = RK[1] ^ RK[4];
RK[6] = RK[2] ^ RK[5]; RK[6] = RK[2] ^ RK[5];
RK[7] = RK[3] ^ RK[6]; RK[7] = RK[3] ^ RK[6];
} }
} }
@ -172,17 +179,18 @@ void aes_set_key_enc_128(uint32_t rk[RK_LEN], const unsigned char *key) {
FT3[ ( Y2 >> 24 ) & 0xFF ]; \ FT3[ ( Y2 >> 24 ) & 0xFF ]; \
} }
void aes_encrypt_128( const uint32_t rk[RK_LEN], DTCM_BSS uint32_t X0, X1, X2, X3, Y0, Y1, Y2, Y3;
const unsigned char input[16], DTCM_BSS const uint32_t *RK;
unsigned char output[16] )
{
uint32_t X0, X1, X2, X3, Y0, Y1, Y2, Y3;
const uint32_t *RK = rk;
GET_UINT32_LE( X0, input, 0 ); ITCM_CODE void aes_encrypt_128(const uint32_t rk[RK_LEN],
GET_UINT32_LE( X1, input, 4 ); const unsigned char input[16], unsigned char output[16])
GET_UINT32_LE( X2, input, 8 ); {
GET_UINT32_LE( X3, input, 12 ); RK = rk;
GET_UINT32_LE(X0, input, 0);
GET_UINT32_LE(X1, input, 4);
GET_UINT32_LE(X2, input, 8);
GET_UINT32_LE(X3, input, 12);
X0 ^= *RK++; X0 ^= *RK++;
X1 ^= *RK++; X1 ^= *RK++;
@ -190,44 +198,44 @@ void aes_encrypt_128( const uint32_t rk[RK_LEN],
X3 ^= *RK++; X3 ^= *RK++;
// loop unrolled // loop unrolled
AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
X0 = *RK++ ^ \ X0 = *RK++ ^ \
( (uint32_t) FSb[ ( Y0 ) & 0xFF ] ) ^ ((uint32_t)FSb[(Y0) & 0xFF]) ^
( (uint32_t) FSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ ((uint32_t)FSb[(Y1 >> 8) & 0xFF] << 8) ^
( (uint32_t) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ ((uint32_t)FSb[(Y2 >> 16) & 0xFF] << 16) ^
( (uint32_t) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); ((uint32_t)FSb[(Y3 >> 24) & 0xFF] << 24);
X1 = *RK++ ^ \ X1 = *RK++ ^ \
( (uint32_t) FSb[ ( Y1 ) & 0xFF ] ) ^ ((uint32_t)FSb[(Y1) & 0xFF]) ^
( (uint32_t) FSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ ((uint32_t)FSb[(Y2 >> 8) & 0xFF] << 8) ^
( (uint32_t) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ ((uint32_t)FSb[(Y3 >> 16) & 0xFF] << 16) ^
( (uint32_t) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); ((uint32_t)FSb[(Y0 >> 24) & 0xFF] << 24);
X2 = *RK++ ^ \ X2 = *RK++ ^ \
( (uint32_t) FSb[ ( Y2 ) & 0xFF ] ) ^ ((uint32_t)FSb[(Y2) & 0xFF]) ^
( (uint32_t) FSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ ((uint32_t)FSb[(Y3 >> 8) & 0xFF] << 8) ^
( (uint32_t) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ ((uint32_t)FSb[(Y0 >> 16) & 0xFF] << 16) ^
( (uint32_t) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); ((uint32_t)FSb[(Y1 >> 24) & 0xFF] << 24);
// removed a ++ here // removed a ++ here
X3 = *RK ^ \ X3 = *RK ^ \
( (uint32_t) FSb[ ( Y3 ) & 0xFF ] ) ^ ((uint32_t)FSb[(Y3) & 0xFF]) ^
( (uint32_t) FSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ ((uint32_t)FSb[(Y0 >> 8) & 0xFF] << 8) ^
( (uint32_t) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ ((uint32_t)FSb[(Y1 >> 16) & 0xFF] << 16) ^
( (uint32_t) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); ((uint32_t)FSb[(Y2 >> 24) & 0xFF] << 24);
PUT_UINT32_LE( X0, output, 0 ); PUT_UINT32_LE(X0, output, 0);
PUT_UINT32_LE( X1, output, 4 ); PUT_UINT32_LE(X1, output, 4);
PUT_UINT32_LE( X2, output, 8 ); PUT_UINT32_LE(X2, output, 8);
PUT_UINT32_LE( X3, output, 12 ); PUT_UINT32_LE(X3, output, 12);
} }

View File

@ -147,7 +147,11 @@ static void dsi_make_key(u32 *key, u32 console_id_l, u32 console_id_h, int is3DS
byte_reverse_16_ip((u8*)key); byte_reverse_16_ip((u8*)key);
} }
static u32 rk[RK_LEN]; #ifdef _MSC_VER
#define DTCM_BSS
#endif
DTCM_BSS static u32 rk[RK_LEN];
static u32 ctr_base[4]; static u32 ctr_base[4];
int tables_generated = 0; int tables_generated = 0;
@ -181,14 +185,15 @@ void dsi_nand_crypt_init(const u8 *console_id, const u8 *emmc_cid, int is3DS) {
// crypt one AES block, in/out must be aligned to 32 bits // crypt one AES block, in/out must be aligned to 32 bits
// offset as block offset // offset as block offset
void dsi_nand_crypt_1(u8* out, const u8* in, u32 offset) { void dsi_nand_crypt_1(u8* out, const u8* in, u32 offset) {
u32 buf[4] = { ctr_base[0], ctr_base[1], ctr_base[2], ctr_base[3] }; u32 buf0[4] = { ctr_base[0], ctr_base[1], ctr_base[2], ctr_base[3] };
add_128_32(buf, offset); u32 buf1[4];
byte_reverse_16_ip((u8*)buf); add_128_32(buf0, offset);
byte_reverse_16((u8*)buf1, (u8*)buf0);
// iprintf("AES CTR:\n"); // iprintf("AES CTR:\n");
// print_bytes(buf, 16); // print_bytes(buf, 16);
aes_encrypt_128(rk, (u8*)buf, (u8*)buf); aes_encrypt_128(rk, (u8*)buf1, (u8*)buf1);
byte_reverse_16_ip((u8*)buf); byte_reverse_16((u8*)buf0, (u8*)buf1);
xor_128((u32*)out, (u32*)in, buf); xor_128((u32*)out, (u32*)in, buf0);
} }
void dsi_nand_crypt(u8* out, const u8* in, u32 offset, unsigned count) { void dsi_nand_crypt(u8* out, const u8* in, u32 offset, unsigned count) {

View File

@ -275,37 +275,47 @@ int main(int argc, const char * const argv[]) {
consoleSelect(&bottomScreen); consoleSelect(&bottomScreen);
int mode = MODE_IMAGE;
if (argc > 1) {
for (unsigned i = 1; i < argc; ++i) {
if (!strcmp(argv[i], "--image-test")) {
iprintf("image test mode\n");
mode = MODE_IMAGE_TEST;
}
else if (!strcmp(argv[i], "--direct-test")) {
iprintf("direct test mode\n");
mode = MODE_DIRECT_TEST;
}
}
}
u32 bat_reg = getBatteryLevel(); u32 bat_reg = getBatteryLevel();
if (!(bat_reg & 1)) { if (!(bat_reg & 1)) {
iprintf("battery level too low: %08" PRIx32 "\n", bat_reg); iprintf("battery level too low: %08" PRIx32 "\n", bat_reg);
exit_with_prompt(0); exit_with_prompt(0);
} }
iprintf("FAT init..."); int mode = MODE_IMAGE;
if (!fatInitDefault()) { if (argc > 1) {
iprintf("\x1b[3D failed!\n"); if (argc == 2 && !strcmp(argv[1], "image-test")) {
exit_with_prompt(-1); iprintf("image test mode\n");
} else { mode = MODE_IMAGE_TEST;
iprintf("\x1b[3D succeed\n"); } else if (argc == 2 && !strcmp(argv[1], "direct-test")) {
iprintf("direct test mode\n");
mode = MODE_DIRECT_TEST;
} else if (argc == 5 && !strcmp(argv[1], "aes-test")) {
iprintf("AES test default\n");
aes_test(atoi(argv[2]), argv[3], argv[4]);
setCpuClock(false);
iprintf("AES test clock low\n");
aes_test(atoi(argv[2]), argv[3], argv[4]);
setCpuClock(true);
iprintf("AES test clock true\n");
aes_test(atoi(argv[2]), argv[3], argv[4]);
exit_with_prompt(0);
}
} }
int ret; int ret;
iprintf("FAT init...");
cpuStartTiming(0);
ret = fatInitDefault();
u32 td = timerTicks2usec(cpuEndTiming());
if (!ret) {
iprintf("\x1b[3D failed!\n");
exit_with_prompt(-1);
} else {
iprintf("\x1b[3D succeed, %" PRIu32 "us\n", td);
}
if (mode == MODE_IMAGE_TEST) { if (mode == MODE_IMAGE_TEST) {
if ((ret = test_image_against_footer()) != 0) { if ((ret = test_image_against_footer()) != 0) {
exit_with_prompt(ret); exit_with_prompt(ret);

View File

@ -89,10 +89,10 @@ int get_ids() {
char *p_console_id_file = 0; char *p_console_id_file = 0;
size_t console_id_file_size; size_t console_id_file_size;
bool console_id_from_file = false; int console_id_from_file = 0;
if (load_file((void**)&p_console_id_file, &console_id_file_size, "console_id.txt", false, 0) == 0) { if (load_file((void**)&p_console_id_file, &console_id_file_size, "console_id.txt", 1, 0) == 0) {
if (console_id_file_size >= 16 && hex2bytes(console_id, 8, p_console_id_file) == 0) { if (console_id_file_size >= 16 && hex2bytes(console_id, 8, p_console_id_file) == 0) {
console_id_from_file = true; console_id_from_file = 1;
} }
free(p_console_id_file); free(p_console_id_file);
} }
@ -313,3 +313,19 @@ int restore() {
return -1; return -1;
} }
void aes_test(int loops, const char * s_console_id, const char * s_emmc_cid) {
hex2bytes(console_id, 8, s_console_id);
hex2bytes(emmc_cid, 16, s_emmc_cid);
dsi_nand_crypt_init(console_id, emmc_cid, 0);
cpuStartTiming(0);
for (int i = 0; i < loops; ++i) {
dsi_nand_crypt((u8*)dump_buf, (u8*)dump_buf,
i * (DUMP_BUF_SIZE / AES_BLOCK_SIZE), DUMP_BUF_SIZE / AES_BLOCK_SIZE);
}
u32 td = timerTicks2usec(cpuEndTiming());
printf("%" PRIu32 " us %u KB\n%.2f KB/s\n", td, (DUMP_BUF_SIZE * loops) >> 10,
1000.0f * DUMP_BUF_SIZE * loops / td);
}

View File

@ -15,3 +15,5 @@ int mount(int direct);
int backup(); int backup();
int restore(); int restore();
void aes_test(int loops, const char * s_console_id, const char * s_emmc_cid);

View File

@ -1,2 +1,2 @@
twlnf.nds twlnf.nds
--direct-test direct-test

View File

@ -1,2 +1,2 @@
twlnf.nds twlnf.nds
--image-test image-test