Implement NAND browsing

Code from twlnf.
The NAND write code is stripped for safety reasons.
This commit is contained in:
RocketRobz 2020-02-11 18:42:04 -07:00
parent 2bd4e57081
commit bf53a3c037
30 changed files with 5938 additions and 70 deletions

View File

@ -43,5 +43,6 @@ Once everything is downloaded and installed, `git clone` this repository, naviga
* [RocketRobz](https://github.com/RocketRobz): Creator of GodMode9i.
* [zacchi4k](https://github.com/zacchi4k): Creator of the GodMode9i logo used in v1.3.1 and onwards.
* [Edo9300](https://github.com/edo9300): Save reading code from his save manager tool.
* [JimmyZ](https://github.com/JimmyZ): NAND code from twlnf (with writing code stripped for safety reasons).
* [devkitPro](https://github.com/devkitPro): devkitARM, libnds, original nds-hb-menu code, and screenshot code.
* [d0k3](https://github.com/d0k3): Original GM9 app and name for the Nintendo 3DS, which this is inspired by.

View File

@ -93,7 +93,11 @@ int main() {
irqEnable( IRQ_VBLANK | IRQ_VCOUNT );
setPowerButtonCB(powerButtonCB);
for (int i = 0; i < 8; i++) {
*(u8*)(0x2FFFD00+i) = *(u8*)(0x4004D07-i); // Get ConsoleID
}
fifoSendValue32(FIFO_USER_03, *SCFG_EXT);
fifoSendValue32(FIFO_USER_07, *(u16*)(0x4004700));
fifoSendValue32(FIFO_USER_06, 1);
@ -103,6 +107,10 @@ int main() {
if ( 0 == (REG_KEYINPUT & (KEY_SELECT | KEY_START | KEY_L | KEY_R))) {
exitflag = true;
}
if (*(u32*)(0x2FFFD0C) == 0x454D4D43) {
sdmmc_nand_cid((u32*)0x2FFD7BC); // Get eMMC CID
*(u32*)(0x2FFFD0C) = 0;
}
resyncClock();
swiWaitForVBlank();
}

View File

@ -19,7 +19,7 @@ include $(DEVKITARM)/ds_rules
#---------------------------------------------------------------------------------
TARGET := GodMode9i
BUILD := build
SOURCES := source dldi-include ramdrive-include
SOURCES := source dldi-include ramdrive-include mbedtls
INCLUDES := include dldi-include ramdrive-include source
DATA := ../data
GRAPHICS := ../gfx

252
arm9/mbedtls/aes.c Normal file
View File

@ -0,0 +1,252 @@
#include <nds.h>
#include <malloc.h>
#include "aes.h"
/* AES 128 ECB dug out from mbed TLS 2.5.1
* https://github.com/ARMmbed/mbedtls/blob/development/include/mbedtls/aes.h
* https://github.com/ARMmbed/mbedtls/blob/development/library/aes.c
*
* C style comments are mbed TLS comments
* C++ style comments are mine
*/
// make VC happy
#ifdef _MSC_VER
#define DTCM_BSS
#define ITCM_CODE
#endif
// it's interesting they mix unsigned char with uint32_t
DTCM_BSS static unsigned char FSb[256];
DTCM_BSS static uint32_t FT0[256];
DTCM_BSS static uint32_t FT1[256];
DTCM_BSS static uint32_t FT2[256];
DTCM_BSS static uint32_t FT3[256];
// AES-CTR/CCM only uses encrypt, so R tables are not used
#define NO_R_TABLES
#ifndef NO_R_TABLES
static unsigned char RSb[256];
static uint32_t RT0[256];
static uint32_t RT1[256];
static uint32_t RT2[256];
static uint32_t RT3[256];
#endif
static uint32_t RCON[256];
/*
* Tables generation code
*/
#define ROTL8(x) ( ( x << 8 ) & 0xFFFFFFFF ) | ( x >> 24 )
#define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) )
#define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 )
void aes_gen_tables(void)
{
#ifdef NO_R_TABLES
unsigned char *RSb = memalign(32, 256);
uint32_t *RT0 = memalign(32, 256 * sizeof(uint32_t));
uint32_t *RT1 = memalign(32, 256 * sizeof(uint32_t));
uint32_t *RT2 = memalign(32, 256 * sizeof(uint32_t));
uint32_t *RT3 = memalign(32, 256 * sizeof(uint32_t));
#endif
int i, x, y, z;
int pow[256];
int log[256];
/*
* compute pow and log tables over GF(2^8)
*/
for (i = 0, x = 1; i < 256; i++)
{
pow[i] = x;
log[x] = i;
x = (x ^ XTIME(x)) & 0xFF;
}
/*
* calculate the round constants
*/
for (i = 0, x = 1; i < 10; i++)
{
RCON[i] = (uint32_t)x;
x = XTIME(x) & 0xFF;
}
/*
* generate the forward and reverse S-boxes
*/
FSb[0x00] = 0x63;
RSb[0x63] = 0x00;
for (i = 1; i < 256; i++)
{
x = pow[255 - log[i]];
y = x; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y; y = ((y << 1) | (y >> 7)) & 0xFF;
x ^= y ^ 0x63;
FSb[i] = (unsigned char)x;
RSb[x] = (unsigned char)i;
}
/*
* generate the forward and reverse tables
*/
for (i = 0; i < 256; i++)
{
x = FSb[i];
y = XTIME(x) & 0xFF;
z = (y ^ x) & 0xFF;
FT0[i] = ((uint32_t)y) ^
((uint32_t)x << 8) ^
((uint32_t)x << 16) ^
((uint32_t)z << 24);
FT1[i] = ROTL8(FT0[i]);
FT2[i] = ROTL8(FT1[i]);
FT3[i] = ROTL8(FT2[i]);
x = RSb[i];
RT0[i] = ((uint32_t)MUL(0x0E, x)) ^
((uint32_t)MUL(0x09, x) << 8) ^
((uint32_t)MUL(0x0D, x) << 16) ^
((uint32_t)MUL(0x0B, x) << 24);
RT1[i] = ROTL8(RT0[i]);
RT2[i] = ROTL8(RT1[i]);
RT3[i] = ROTL8(RT2[i]);
}
#ifdef NO_R_TABLES
free(RSb);
free(RT0);
free(RT1);
free(RT2);
free(RT3);
#endif
}
// did a little counting to understand why original mbedTLS buf is [68]
// in set key, they generated:
// 128 bits key: 10 rounds of += 4, plus 4 after, 44
// 192 bits key: 8 rounds of += 6, plus 6 after, 56
// 256 bits key: 7 rounds of += 8, plus 8 after, 64
// and in ecb encrypt, it used:
// 4 + 4 * 2 * 4 + 4 + 4 "++"s, 44
// 4 + 4 * 2 * 5 + 4 + 4 "++"s, 52
// 4 + 4 * 2 * 6 + 4 + 4 "++"s, 60
// so they generated several bytes more in 192 and 256 modes to simplify the loop
// "able to hold 32 extra bytes" in their comment makes senses now
void aes_set_key_enc_128_be(uint32_t rk[RK_LEN], const unsigned char *key) {
uint32_t *RK = rk;
GET_UINT32_BE(RK[0], key, 12);
GET_UINT32_BE(RK[1], key, 8);
GET_UINT32_BE(RK[2], key, 4);
GET_UINT32_BE(RK[3], key, 0);
for (unsigned i = 0; i < 10; ++i, RK += 4) {
RK[4] = RK[0] ^ RCON[i] ^
((uint32_t)FSb[(RK[3] >> 8) & 0xFF]) ^
((uint32_t)FSb[(RK[3] >> 16) & 0xFF] << 8) ^
((uint32_t)FSb[(RK[3] >> 24) & 0xFF] << 16) ^
((uint32_t)FSb[(RK[3]) & 0xFF] << 24);
RK[5] = RK[1] ^ RK[4];
RK[6] = RK[2] ^ RK[5];
RK[7] = RK[3] ^ RK[6];
}
}
#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \
FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y3 >> 24 ) & 0xFF ]; \
\
X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \
FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y0 >> 24 ) & 0xFF ]; \
\
X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \
FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y1 >> 24 ) & 0xFF ]; \
\
X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \
FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y2 >> 24 ) & 0xFF ]; \
}
DTCM_BSS uint32_t X0, X1, X2, X3, Y0, Y1, Y2, Y3;
DTCM_BSS const uint32_t *RK;
ITCM_CODE void aes_encrypt_128_be(const uint32_t rk[RK_LEN],
const unsigned char input[16], unsigned char output[16])
{
RK = rk;
GET_UINT32_BE(X0, input, 12);
GET_UINT32_BE(X1, input, 8);
GET_UINT32_BE(X2, input, 4);
GET_UINT32_BE(X3, input, 0);
X0 ^= *RK++;
X1 ^= *RK++;
X2 ^= *RK++;
X3 ^= *RK++;
// loop unrolled
AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3);
AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
X0 = *RK++ ^ \
((uint32_t)FSb[(Y0) & 0xFF]) ^
((uint32_t)FSb[(Y1 >> 8) & 0xFF] << 8) ^
((uint32_t)FSb[(Y2 >> 16) & 0xFF] << 16) ^
((uint32_t)FSb[(Y3 >> 24) & 0xFF] << 24);
X1 = *RK++ ^ \
((uint32_t)FSb[(Y1) & 0xFF]) ^
((uint32_t)FSb[(Y2 >> 8) & 0xFF] << 8) ^
((uint32_t)FSb[(Y3 >> 16) & 0xFF] << 16) ^
((uint32_t)FSb[(Y0 >> 24) & 0xFF] << 24);
X2 = *RK++ ^ \
((uint32_t)FSb[(Y2) & 0xFF]) ^
((uint32_t)FSb[(Y3 >> 8) & 0xFF] << 8) ^
((uint32_t)FSb[(Y0 >> 16) & 0xFF] << 16) ^
((uint32_t)FSb[(Y1 >> 24) & 0xFF] << 24);
// removed a ++ here
X3 = *RK ^ \
((uint32_t)FSb[(Y3) & 0xFF]) ^
((uint32_t)FSb[(Y0 >> 8) & 0xFF] << 8) ^
((uint32_t)FSb[(Y1 >> 16) & 0xFF] << 16) ^
((uint32_t)FSb[(Y2 >> 24) & 0xFF] << 24);
PUT_UINT32_BE(X0, output, 12);
PUT_UINT32_BE(X1, output, 8);
PUT_UINT32_BE(X2, output, 4);
PUT_UINT32_BE(X3, output, 0);
}

32
arm9/mbedtls/aes.h Normal file
View File

@ -0,0 +1,32 @@
#pragma once
#include <stdint.h>
#define RK_LEN 44 //round key length
// modified to work on reversed byte order input/output
// it could work by wrapping it between byte reversed I/O, minmize modification to actual AES code
// this is just my OCD to eliminate some copy
// original mbedTLS AES GET/PUT_UINT32 macros on little endian I/O regardless of CPU endianness
// seems like Nintendo used big endian hardware AES with little endian CPU
// by byte reversing on I/O, this mimics Nintendo behavior on little endian CPU
// calling it BE is not very accurate, it becomes little endian on big endian CPU
#define GET_UINT32_BE(n, b, i) \
((uint8_t*)&(n))[0] = (b)[i + 3]; \
((uint8_t*)&(n))[1] = (b)[i + 2]; \
((uint8_t*)&(n))[2] = (b)[i + 1]; \
((uint8_t*)&(n))[3] = (b)[i + 0]
#define PUT_UINT32_BE(n, b, i) \
(b)[i + 0] = ((uint8_t*)&(n))[3]; \
(b)[i + 1] = ((uint8_t*)&(n))[2]; \
(b)[i + 2] = ((uint8_t*)&(n))[1]; \
(b)[i + 3] = ((uint8_t*)&(n))[0]
void aes_gen_tables(void);
void aes_set_key_enc_128_be(uint32_t rk[RK_LEN], const unsigned char *key);
void aes_encrypt_128_be(const uint32_t rk[RK_LEN], const unsigned char input[16], unsigned char output[16]);

2452
arm9/mbedtls/bignum.c Normal file

File diff suppressed because it is too large Load Diff

761
arm9/mbedtls/bignum.h Normal file
View File

@ -0,0 +1,761 @@
/**
* \file bignum.h
*
* \brief Multi-precision integer library
*
* Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of mbed TLS (https://tls.mbed.org)
*/
#ifndef MBEDTLS_BIGNUM_H
#define MBEDTLS_BIGNUM_H
#if !defined(MBEDTLS_CONFIG_FILE)
#include "config.h"
#else
#include MBEDTLS_CONFIG_FILE
#endif
#include <stddef.h>
#include <stdint.h>
#if defined(MBEDTLS_FS_IO)
#include <stdio.h>
#endif
#define MBEDTLS_ERR_MPI_FILE_IO_ERROR -0x0002 /**< An error occurred while reading from or writing to a file. */
#define MBEDTLS_ERR_MPI_BAD_INPUT_DATA -0x0004 /**< Bad input parameters to function. */
#define MBEDTLS_ERR_MPI_INVALID_CHARACTER -0x0006 /**< There is an invalid character in the digit string. */
#define MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL -0x0008 /**< The buffer is too small to write to. */
#define MBEDTLS_ERR_MPI_NEGATIVE_VALUE -0x000A /**< The input arguments are negative or result in illegal output. */
#define MBEDTLS_ERR_MPI_DIVISION_BY_ZERO -0x000C /**< The input argument for division is zero, which is not allowed. */
#define MBEDTLS_ERR_MPI_NOT_ACCEPTABLE -0x000E /**< The input arguments are not acceptable. */
#define MBEDTLS_ERR_MPI_ALLOC_FAILED -0x0010 /**< Memory allocation failed. */
#define MBEDTLS_MPI_CHK(f) do { if( ( ret = f ) != 0 ) goto cleanup; } while( 0 )
/*
* Maximum size MPIs are allowed to grow to in number of limbs.
*/
#define MBEDTLS_MPI_MAX_LIMBS 10000
#if !defined(MBEDTLS_MPI_WINDOW_SIZE)
/*
* Maximum window size used for modular exponentiation. Default: 6
* Minimum value: 1. Maximum value: 6.
*
* Result is an array of ( 2 << MBEDTLS_MPI_WINDOW_SIZE ) MPIs used
* for the sliding window calculation. (So 64 by default)
*
* Reduction in size, reduces speed.
*/
#define MBEDTLS_MPI_WINDOW_SIZE 6 /**< Maximum windows size used. */
#endif /* !MBEDTLS_MPI_WINDOW_SIZE */
#if !defined(MBEDTLS_MPI_MAX_SIZE)
/*
* Maximum size of MPIs allowed in bits and bytes for user-MPIs.
* ( Default: 512 bytes => 4096 bits, Maximum tested: 2048 bytes => 16384 bits )
*
* Note: Calculations can results temporarily in larger MPIs. So the number
* of limbs required (MBEDTLS_MPI_MAX_LIMBS) is higher.
*/
#define MBEDTLS_MPI_MAX_SIZE 1024 /**< Maximum number of bytes for usable MPIs. */
#endif /* !MBEDTLS_MPI_MAX_SIZE */
#define MBEDTLS_MPI_MAX_BITS ( 8 * MBEDTLS_MPI_MAX_SIZE ) /**< Maximum number of bits for usable MPIs. */
/*
* When reading from files with mbedtls_mpi_read_file() and writing to files with
* mbedtls_mpi_write_file() the buffer should have space
* for a (short) label, the MPI (in the provided radix), the newline
* characters and the '\0'.
*
* By default we assume at least a 10 char label, a minimum radix of 10
* (decimal) and a maximum of 4096 bit numbers (1234 decimal chars).
* Autosized at compile time for at least a 10 char label, a minimum radix
* of 10 (decimal) for a number of MBEDTLS_MPI_MAX_BITS size.
*
* This used to be statically sized to 1250 for a maximum of 4096 bit
* numbers (1234 decimal chars).
*
* Calculate using the formula:
* MBEDTLS_MPI_RW_BUFFER_SIZE = ceil(MBEDTLS_MPI_MAX_BITS / ln(10) * ln(2)) +
* LabelSize + 6
*/
#define MBEDTLS_MPI_MAX_BITS_SCALE100 ( 100 * MBEDTLS_MPI_MAX_BITS )
#define MBEDTLS_LN_2_DIV_LN_10_SCALE100 332
#define MBEDTLS_MPI_RW_BUFFER_SIZE ( ((MBEDTLS_MPI_MAX_BITS_SCALE100 + MBEDTLS_LN_2_DIV_LN_10_SCALE100 - 1) / MBEDTLS_LN_2_DIV_LN_10_SCALE100) + 10 + 6 )
/*
* Define the base integer type, architecture-wise.
*
* 32 or 64-bit integer types can be forced regardless of the underlying
* architecture by defining MBEDTLS_HAVE_INT32 or MBEDTLS_HAVE_INT64
* respectively and undefining MBEDTLS_HAVE_ASM.
*
* Double-width integers (e.g. 128-bit in 64-bit architectures) can be
* disabled by defining MBEDTLS_NO_UDBL_DIVISION.
*/
#if !defined(MBEDTLS_HAVE_INT32)
#if defined(_MSC_VER) && defined(_M_AMD64)
/* Always choose 64-bit when using MSC */
#if !defined(MBEDTLS_HAVE_INT64)
#define MBEDTLS_HAVE_INT64
#endif /* !MBEDTLS_HAVE_INT64 */
typedef int64_t mbedtls_mpi_sint;
typedef uint64_t mbedtls_mpi_uint;
#elif defined(__GNUC__) && ( \
defined(__amd64__) || defined(__x86_64__) || \
defined(__ppc64__) || defined(__powerpc64__) || \
defined(__ia64__) || defined(__alpha__) || \
( defined(__sparc__) && defined(__arch64__) ) || \
defined(__s390x__) || defined(__mips64) )
#if !defined(MBEDTLS_HAVE_INT64)
#define MBEDTLS_HAVE_INT64
#endif /* MBEDTLS_HAVE_INT64 */
typedef int64_t mbedtls_mpi_sint;
typedef uint64_t mbedtls_mpi_uint;
#if !defined(MBEDTLS_NO_UDBL_DIVISION)
/* mbedtls_t_udbl defined as 128-bit unsigned int */
typedef unsigned int mbedtls_t_udbl __attribute__((mode(TI)));
#define MBEDTLS_HAVE_UDBL
#endif /* !MBEDTLS_NO_UDBL_DIVISION */
#elif defined(__ARMCC_VERSION) && defined(__aarch64__)
/*
* __ARMCC_VERSION is defined for both armcc and armclang and
* __aarch64__ is only defined by armclang when compiling 64-bit code
*/
#if !defined(MBEDTLS_HAVE_INT64)
#define MBEDTLS_HAVE_INT64
#endif /* !MBEDTLS_HAVE_INT64 */
typedef int64_t mbedtls_mpi_sint;
typedef uint64_t mbedtls_mpi_uint;
#if !defined(MBEDTLS_NO_UDBL_DIVISION)
/* mbedtls_t_udbl defined as 128-bit unsigned int */
typedef __uint128_t mbedtls_t_udbl;
#define MBEDTLS_HAVE_UDBL
#endif /* !MBEDTLS_NO_UDBL_DIVISION */
#elif defined(MBEDTLS_HAVE_INT64)
/* Force 64-bit integers with unknown compiler */
typedef int64_t mbedtls_mpi_sint;
typedef uint64_t mbedtls_mpi_uint;
#endif
#endif /* !MBEDTLS_HAVE_INT32 */
#if !defined(MBEDTLS_HAVE_INT64)
/* Default to 32-bit compilation */
#if !defined(MBEDTLS_HAVE_INT32)
#define MBEDTLS_HAVE_INT32
#endif /* !MBEDTLS_HAVE_INT32 */
typedef int32_t mbedtls_mpi_sint;
typedef uint32_t mbedtls_mpi_uint;
#if !defined(MBEDTLS_NO_UDBL_DIVISION)
typedef uint64_t mbedtls_t_udbl;
#define MBEDTLS_HAVE_UDBL
#endif /* !MBEDTLS_NO_UDBL_DIVISION */
#endif /* !MBEDTLS_HAVE_INT64 */
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief MPI structure
*/
typedef struct
{
int s; /*!< integer sign */
size_t n; /*!< total # of limbs */
mbedtls_mpi_uint *p; /*!< pointer to limbs */
}
mbedtls_mpi;
/**
* \brief Initialize one MPI (make internal references valid)
* This just makes it ready to be set or freed,
* but does not define a value for the MPI.
*
* \param X One MPI to initialize.
*/
void mbedtls_mpi_init( mbedtls_mpi *X );
/**
* \brief Unallocate one MPI
*
* \param X One MPI to unallocate.
*/
void mbedtls_mpi_free( mbedtls_mpi *X );
/**
* \brief Enlarge to the specified number of limbs
*
* \param X MPI to grow
* \param nblimbs The target number of limbs
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_grow( mbedtls_mpi *X, size_t nblimbs );
/**
* \brief Resize down, keeping at least the specified number of limbs
*
* \param X MPI to shrink
* \param nblimbs The minimum number of limbs to keep
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_shrink( mbedtls_mpi *X, size_t nblimbs );
/**
* \brief Copy the contents of Y into X
*
* \param X Destination MPI
* \param Y Source MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_copy( mbedtls_mpi *X, const mbedtls_mpi *Y );
/**
* \brief Swap the contents of X and Y
*
* \param X First MPI value
* \param Y Second MPI value
*/
void mbedtls_mpi_swap( mbedtls_mpi *X, mbedtls_mpi *Y );
/**
* \brief Safe conditional assignement X = Y if assign is 1
*
* \param X MPI to conditionally assign to
* \param Y Value to be assigned
* \param assign 1: perform the assignment, 0: keep X's original value
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
*
* \note This function is equivalent to
* if( assign ) mbedtls_mpi_copy( X, Y );
* except that it avoids leaking any information about whether
* the assignment was done or not (the above code may leak
* information through branch prediction and/or memory access
* patterns analysis).
*/
int mbedtls_mpi_safe_cond_assign( mbedtls_mpi *X, const mbedtls_mpi *Y, unsigned char assign );
/**
* \brief Safe conditional swap X <-> Y if swap is 1
*
* \param X First mbedtls_mpi value
* \param Y Second mbedtls_mpi value
* \param assign 1: perform the swap, 0: keep X and Y's original values
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
*
* \note This function is equivalent to
* if( assign ) mbedtls_mpi_swap( X, Y );
* except that it avoids leaking any information about whether
* the assignment was done or not (the above code may leak
* information through branch prediction and/or memory access
* patterns analysis).
*/
int mbedtls_mpi_safe_cond_swap( mbedtls_mpi *X, mbedtls_mpi *Y, unsigned char assign );
/**
* \brief Set value from integer
*
* \param X MPI to set
* \param z Value to use
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_lset( mbedtls_mpi *X, mbedtls_mpi_sint z );
/**
* \brief Get a specific bit from X
*
* \param X MPI to use
* \param pos Zero-based index of the bit in X
*
* \return Either a 0 or a 1
*/
int mbedtls_mpi_get_bit( const mbedtls_mpi *X, size_t pos );
/**
* \brief Set a bit of X to a specific value of 0 or 1
*
* \note Will grow X if necessary to set a bit to 1 in a not yet
* existing limb. Will not grow if bit should be set to 0
*
* \param X MPI to use
* \param pos Zero-based index of the bit in X
* \param val The value to set the bit to (0 or 1)
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_BAD_INPUT_DATA if val is not 0 or 1
*/
int mbedtls_mpi_set_bit( mbedtls_mpi *X, size_t pos, unsigned char val );
/**
* \brief Return the number of zero-bits before the least significant
* '1' bit
*
* Note: Thus also the zero-based index of the least significant '1' bit
*
* \param X MPI to use
*/
size_t mbedtls_mpi_lsb( const mbedtls_mpi *X );
/**
* \brief Return the number of bits up to and including the most
* significant '1' bit'
*
* Note: Thus also the one-based index of the most significant '1' bit
*
* \param X MPI to use
*/
size_t mbedtls_mpi_bitlen( const mbedtls_mpi *X );
/**
* \brief Return the total size in bytes
*
* \param X MPI to use
*/
size_t mbedtls_mpi_size( const mbedtls_mpi *X );
/**
* \brief Import from an ASCII string
*
* \param X Destination MPI
* \param radix Input numeric base
* \param s Null-terminated string buffer
*
* \return 0 if successful, or a MBEDTLS_ERR_MPI_XXX error code
*/
int mbedtls_mpi_read_string( mbedtls_mpi *X, int radix, const char *s );
/**
* \brief Export into an ASCII string
*
* \param X Source MPI
* \param radix Output numeric base
* \param buf Buffer to write the string to
* \param buflen Length of buf
* \param olen Length of the string written, including final NUL byte
*
* \return 0 if successful, or a MBEDTLS_ERR_MPI_XXX error code.
* *olen is always updated to reflect the amount
* of data that has (or would have) been written.
*
* \note Call this function with buflen = 0 to obtain the
* minimum required buffer size in *olen.
*/
int mbedtls_mpi_write_string( const mbedtls_mpi *X, int radix,
char *buf, size_t buflen, size_t *olen );
#if defined(MBEDTLS_FS_IO)
/**
* \brief Read MPI from a line in an opened file
*
* \param X Destination MPI
* \param radix Input numeric base
* \param fin Input file handle
*
* \return 0 if successful, MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL if
* the file read buffer is too small or a
* MBEDTLS_ERR_MPI_XXX error code
*
* \note On success, this function advances the file stream
* to the end of the current line or to EOF.
*
* The function returns 0 on an empty line.
*
* Leading whitespaces are ignored, as is a
* '0x' prefix for radix 16.
*
*/
int mbedtls_mpi_read_file( mbedtls_mpi *X, int radix, FILE *fin );
/**
* \brief Write X into an opened file, or stdout if fout is NULL
*
* \param p Prefix, can be NULL
* \param X Source MPI
* \param radix Output numeric base
* \param fout Output file handle (can be NULL)
*
* \return 0 if successful, or a MBEDTLS_ERR_MPI_XXX error code
*
* \note Set fout == NULL to print X on the console.
*/
int mbedtls_mpi_write_file( const char *p, const mbedtls_mpi *X, int radix, FILE *fout );
#endif /* MBEDTLS_FS_IO */
/**
* \brief Import X from unsigned binary data, big endian
*
* \param X Destination MPI
* \param buf Input buffer
* \param buflen Input buffer size
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_read_binary( mbedtls_mpi *X, const unsigned char *buf, size_t buflen );
/**
* \brief Export X into unsigned binary data, big endian.
* Always fills the whole buffer, which will start with zeros
* if the number is smaller.
*
* \param X Source MPI
* \param buf Output buffer
* \param buflen Output buffer size
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL if buf isn't large enough
*/
int mbedtls_mpi_write_binary( const mbedtls_mpi *X, unsigned char *buf, size_t buflen );
/**
* \brief Left-shift: X <<= count
*
* \param X MPI to shift
* \param count Amount to shift
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_shift_l( mbedtls_mpi *X, size_t count );
/**
* \brief Right-shift: X >>= count
*
* \param X MPI to shift
* \param count Amount to shift
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_shift_r( mbedtls_mpi *X, size_t count );
/**
* \brief Compare unsigned values
*
* \param X Left-hand MPI
* \param Y Right-hand MPI
*
* \return 1 if |X| is greater than |Y|,
* -1 if |X| is lesser than |Y| or
* 0 if |X| is equal to |Y|
*/
int mbedtls_mpi_cmp_abs( const mbedtls_mpi *X, const mbedtls_mpi *Y );
/**
* \brief Compare signed values
*
* \param X Left-hand MPI
* \param Y Right-hand MPI
*
* \return 1 if X is greater than Y,
* -1 if X is lesser than Y or
* 0 if X is equal to Y
*/
int mbedtls_mpi_cmp_mpi( const mbedtls_mpi *X, const mbedtls_mpi *Y );
/**
* \brief Compare signed values
*
* \param X Left-hand MPI
* \param z The integer value to compare to
*
* \return 1 if X is greater than z,
* -1 if X is lesser than z or
* 0 if X is equal to z
*/
int mbedtls_mpi_cmp_int( const mbedtls_mpi *X, mbedtls_mpi_sint z );
/**
* \brief Unsigned addition: X = |A| + |B|
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Unsigned subtraction: X = |A| - |B|
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_NEGATIVE_VALUE if B is greater than A
*/
int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Signed addition: X = A + B
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_add_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Signed subtraction: X = A - B
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_sub_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Signed addition: X = A + b
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param b The integer value to add
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_add_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b );
/**
* \brief Signed subtraction: X = A - b
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param b The integer value to subtract
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b );
/**
* \brief Baseline multiplication: X = A * B
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Baseline multiplication: X = A * b
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param b The unsigned integer value to multiply with
*
* \note b is unsigned
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b );
/**
* \brief Division by mbedtls_mpi: A = Q * B + R
*
* \param Q Destination MPI for the quotient
* \param R Destination MPI for the rest value
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if B == 0
*
* \note Either Q or R can be NULL.
*/
int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Division by int: A = Q * b + R
*
* \param Q Destination MPI for the quotient
* \param R Destination MPI for the rest value
* \param A Left-hand MPI
* \param b Integer to divide by
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if b == 0
*
* \note Either Q or R can be NULL.
*/
int mbedtls_mpi_div_int( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, mbedtls_mpi_sint b );
/**
* \brief Modulo: R = A mod B
*
* \param R Destination MPI for the rest value
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if B == 0,
* MBEDTLS_ERR_MPI_NEGATIVE_VALUE if B < 0
*/
int mbedtls_mpi_mod_mpi( mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Modulo: r = A mod b
*
* \param r Destination mbedtls_mpi_uint
* \param A Left-hand MPI
* \param b Integer to divide by
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if b == 0,
* MBEDTLS_ERR_MPI_NEGATIVE_VALUE if b < 0
*/
int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b );
/**
* \brief Sliding-window exponentiation: X = A^E mod N
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param E Exponent MPI
* \param N Modular MPI
* \param _RR Speed-up MPI used for recalculations
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_BAD_INPUT_DATA if N is negative or even or
* if E is negative
*
* \note _RR is used to avoid re-computing R*R mod N across
* multiple calls, which speeds up things a bit. It can
* be set to NULL if the extra performance is unneeded.
*/
int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *E, const mbedtls_mpi *N, mbedtls_mpi *_RR );
/**
* \brief Fill an MPI X with size bytes of random
*
* \param X Destination MPI
* \param size Size in bytes
* \param f_rng RNG function
* \param p_rng RNG parameter
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size,
int (*f_rng)(void *, unsigned char *, size_t),
void *p_rng );
/**
* \brief Greatest common divisor: G = gcd(A, B)
*
* \param G Destination MPI
* \param A Left-hand MPI
* \param B Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
*/
int mbedtls_mpi_gcd( mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B );
/**
* \brief Modular inverse: X = A^-1 mod N
*
* \param X Destination MPI
* \param A Left-hand MPI
* \param N Right-hand MPI
*
* \return 0 if successful,
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_BAD_INPUT_DATA if N is <= 1,
MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if A has no inverse mod N.
*/
int mbedtls_mpi_inv_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N );
/**
* \brief Miller-Rabin primality test
*
* \param X MPI to check
* \param f_rng RNG function
* \param p_rng RNG parameter
*
* \return 0 if successful (probably prime),
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if X is not prime
*/
int mbedtls_mpi_is_prime( const mbedtls_mpi *X,
int (*f_rng)(void *, unsigned char *, size_t),
void *p_rng );
/**
* \brief Prime number generation
*
* \param X Destination MPI
* \param nbits Required size of X in bits
* ( 3 <= nbits <= MBEDTLS_MPI_MAX_BITS )
* \param dh_flag If 1, then (X-1)/2 will be prime too
* \param f_rng RNG function
* \param p_rng RNG parameter
*
* \return 0 if successful (probably prime),
* MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
* MBEDTLS_ERR_MPI_BAD_INPUT_DATA if nbits is < 3
*/
int mbedtls_mpi_gen_prime( mbedtls_mpi *X, size_t nbits, int dh_flag,
int (*f_rng)(void *, unsigned char *, size_t),
void *p_rng );
/**
* \brief Checkup routine
*
* \return 0 if successful, or 1 if the test failed
*/
int mbedtls_mpi_self_test( int verbose );
#ifdef __cplusplus
}
#endif
#endif /* bignum.h */

887
arm9/mbedtls/bn_mul.h Normal file
View File

@ -0,0 +1,887 @@
/**
* \file bn_mul.h
*
* \brief Multi-precision integer library
*
* Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of mbed TLS (https://tls.mbed.org)
*/
/*
* Multiply source vector [s] with b, add result
* to destination vector [d] and set carry c.
*
* Currently supports:
*
* . IA-32 (386+) . AMD64 / EM64T
* . IA-32 (SSE2) . Motorola 68000
* . PowerPC, 32-bit . MicroBlaze
* . PowerPC, 64-bit . TriCore
* . SPARC v8 . ARM v3+
* . Alpha . MIPS32
* . C, longlong . C, generic
*/
#ifndef MBEDTLS_BN_MUL_H
#define MBEDTLS_BN_MUL_H
#include "bignum.h"
#if defined(MBEDTLS_HAVE_ASM)
#ifndef asm
#define asm __asm
#endif
/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
#if defined(__GNUC__) && \
( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
#if defined(__i386__)
#define MULADDC_INIT \
asm( \
"movl %%ebx, %0 \n\t" \
"movl %5, %%esi \n\t" \
"movl %6, %%edi \n\t" \
"movl %7, %%ecx \n\t" \
"movl %8, %%ebx \n\t"
#define MULADDC_CORE \
"lodsl \n\t" \
"mull %%ebx \n\t" \
"addl %%ecx, %%eax \n\t" \
"adcl $0, %%edx \n\t" \
"addl (%%edi), %%eax \n\t" \
"adcl $0, %%edx \n\t" \
"movl %%edx, %%ecx \n\t" \
"stosl \n\t"
#if defined(MBEDTLS_HAVE_SSE2)
#define MULADDC_HUIT \
"movd %%ecx, %%mm1 \n\t" \
"movd %%ebx, %%mm0 \n\t" \
"movd (%%edi), %%mm3 \n\t" \
"paddq %%mm3, %%mm1 \n\t" \
"movd (%%esi), %%mm2 \n\t" \
"pmuludq %%mm0, %%mm2 \n\t" \
"movd 4(%%esi), %%mm4 \n\t" \
"pmuludq %%mm0, %%mm4 \n\t" \
"movd 8(%%esi), %%mm6 \n\t" \
"pmuludq %%mm0, %%mm6 \n\t" \
"movd 12(%%esi), %%mm7 \n\t" \
"pmuludq %%mm0, %%mm7 \n\t" \
"paddq %%mm2, %%mm1 \n\t" \
"movd 4(%%edi), %%mm3 \n\t" \
"paddq %%mm4, %%mm3 \n\t" \
"movd 8(%%edi), %%mm5 \n\t" \
"paddq %%mm6, %%mm5 \n\t" \
"movd 12(%%edi), %%mm4 \n\t" \
"paddq %%mm4, %%mm7 \n\t" \
"movd %%mm1, (%%edi) \n\t" \
"movd 16(%%esi), %%mm2 \n\t" \
"pmuludq %%mm0, %%mm2 \n\t" \
"psrlq $32, %%mm1 \n\t" \
"movd 20(%%esi), %%mm4 \n\t" \
"pmuludq %%mm0, %%mm4 \n\t" \
"paddq %%mm3, %%mm1 \n\t" \
"movd 24(%%esi), %%mm6 \n\t" \
"pmuludq %%mm0, %%mm6 \n\t" \
"movd %%mm1, 4(%%edi) \n\t" \
"psrlq $32, %%mm1 \n\t" \
"movd 28(%%esi), %%mm3 \n\t" \
"pmuludq %%mm0, %%mm3 \n\t" \
"paddq %%mm5, %%mm1 \n\t" \
"movd 16(%%edi), %%mm5 \n\t" \
"paddq %%mm5, %%mm2 \n\t" \
"movd %%mm1, 8(%%edi) \n\t" \
"psrlq $32, %%mm1 \n\t" \
"paddq %%mm7, %%mm1 \n\t" \
"movd 20(%%edi), %%mm5 \n\t" \
"paddq %%mm5, %%mm4 \n\t" \
"movd %%mm1, 12(%%edi) \n\t" \
"psrlq $32, %%mm1 \n\t" \
"paddq %%mm2, %%mm1 \n\t" \
"movd 24(%%edi), %%mm5 \n\t" \
"paddq %%mm5, %%mm6 \n\t" \
"movd %%mm1, 16(%%edi) \n\t" \
"psrlq $32, %%mm1 \n\t" \
"paddq %%mm4, %%mm1 \n\t" \
"movd 28(%%edi), %%mm5 \n\t" \
"paddq %%mm5, %%mm3 \n\t" \
"movd %%mm1, 20(%%edi) \n\t" \
"psrlq $32, %%mm1 \n\t" \
"paddq %%mm6, %%mm1 \n\t" \
"movd %%mm1, 24(%%edi) \n\t" \
"psrlq $32, %%mm1 \n\t" \
"paddq %%mm3, %%mm1 \n\t" \
"movd %%mm1, 28(%%edi) \n\t" \
"addl $32, %%edi \n\t" \
"addl $32, %%esi \n\t" \
"psrlq $32, %%mm1 \n\t" \
"movd %%mm1, %%ecx \n\t"
#define MULADDC_STOP \
"emms \n\t" \
"movl %4, %%ebx \n\t" \
"movl %%ecx, %1 \n\t" \
"movl %%edi, %2 \n\t" \
"movl %%esi, %3 \n\t" \
: "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
: "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
: "eax", "ecx", "edx", "esi", "edi" \
);
#else
#define MULADDC_STOP \
"movl %4, %%ebx \n\t" \
"movl %%ecx, %1 \n\t" \
"movl %%edi, %2 \n\t" \
"movl %%esi, %3 \n\t" \
: "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
: "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
: "eax", "ecx", "edx", "esi", "edi" \
);
#endif /* SSE2 */
#endif /* i386 */
#if defined(__amd64__) || defined (__x86_64__)
#define MULADDC_INIT \
asm( \
"xorq %%r8, %%r8 \n\t"
#define MULADDC_CORE \
"movq (%%rsi), %%rax \n\t" \
"mulq %%rbx \n\t" \
"addq $8, %%rsi \n\t" \
"addq %%rcx, %%rax \n\t" \
"movq %%r8, %%rcx \n\t" \
"adcq $0, %%rdx \n\t" \
"nop \n\t" \
"addq %%rax, (%%rdi) \n\t" \
"adcq %%rdx, %%rcx \n\t" \
"addq $8, %%rdi \n\t"
#define MULADDC_STOP \
: "+c" (c), "+D" (d), "+S" (s) \
: "b" (b) \
: "rax", "rdx", "r8" \
);
#endif /* AMD64 */
#if defined(__mc68020__) || defined(__mcpu32__)
#define MULADDC_INIT \
asm( \
"movl %3, %%a2 \n\t" \
"movl %4, %%a3 \n\t" \
"movl %5, %%d3 \n\t" \
"movl %6, %%d2 \n\t" \
"moveq #0, %%d0 \n\t"
#define MULADDC_CORE \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d4:%%d1 \n\t" \
"addl %%d3, %%d1 \n\t" \
"addxl %%d0, %%d4 \n\t" \
"moveq #0, %%d3 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"addxl %%d4, %%d3 \n\t"
#define MULADDC_STOP \
"movl %%d3, %0 \n\t" \
"movl %%a3, %1 \n\t" \
"movl %%a2, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "d0", "d1", "d2", "d3", "d4", "a2", "a3" \
);
#define MULADDC_HUIT \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d4:%%d1 \n\t" \
"addxl %%d3, %%d1 \n\t" \
"addxl %%d0, %%d4 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d3:%%d1 \n\t" \
"addxl %%d4, %%d1 \n\t" \
"addxl %%d0, %%d3 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d4:%%d1 \n\t" \
"addxl %%d3, %%d1 \n\t" \
"addxl %%d0, %%d4 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d3:%%d1 \n\t" \
"addxl %%d4, %%d1 \n\t" \
"addxl %%d0, %%d3 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d4:%%d1 \n\t" \
"addxl %%d3, %%d1 \n\t" \
"addxl %%d0, %%d4 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d3:%%d1 \n\t" \
"addxl %%d4, %%d1 \n\t" \
"addxl %%d0, %%d3 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d4:%%d1 \n\t" \
"addxl %%d3, %%d1 \n\t" \
"addxl %%d0, %%d4 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"movel %%a2@+, %%d1 \n\t" \
"mulul %%d2, %%d3:%%d1 \n\t" \
"addxl %%d4, %%d1 \n\t" \
"addxl %%d0, %%d3 \n\t" \
"addl %%d1, %%a3@+ \n\t" \
"addxl %%d0, %%d3 \n\t"
#endif /* MC68000 */
#if defined(__powerpc64__) || defined(__ppc64__)
#if defined(__MACH__) && defined(__APPLE__)
#define MULADDC_INIT \
asm( \
"ld r3, %3 \n\t" \
"ld r4, %4 \n\t" \
"ld r5, %5 \n\t" \
"ld r6, %6 \n\t" \
"addi r3, r3, -8 \n\t" \
"addi r4, r4, -8 \n\t" \
"addic r5, r5, 0 \n\t"
#define MULADDC_CORE \
"ldu r7, 8(r3) \n\t" \
"mulld r8, r7, r6 \n\t" \
"mulhdu r9, r7, r6 \n\t" \
"adde r8, r8, r5 \n\t" \
"ld r7, 8(r4) \n\t" \
"addze r5, r9 \n\t" \
"addc r8, r8, r7 \n\t" \
"stdu r8, 8(r4) \n\t"
#define MULADDC_STOP \
"addze r5, r5 \n\t" \
"addi r4, r4, 8 \n\t" \
"addi r3, r3, 8 \n\t" \
"std r5, %0 \n\t" \
"std r4, %1 \n\t" \
"std r3, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
);
#else /* __MACH__ && __APPLE__ */
#define MULADDC_INIT \
asm( \
"ld %%r3, %3 \n\t" \
"ld %%r4, %4 \n\t" \
"ld %%r5, %5 \n\t" \
"ld %%r6, %6 \n\t" \
"addi %%r3, %%r3, -8 \n\t" \
"addi %%r4, %%r4, -8 \n\t" \
"addic %%r5, %%r5, 0 \n\t"
#define MULADDC_CORE \
"ldu %%r7, 8(%%r3) \n\t" \
"mulld %%r8, %%r7, %%r6 \n\t" \
"mulhdu %%r9, %%r7, %%r6 \n\t" \
"adde %%r8, %%r8, %%r5 \n\t" \
"ld %%r7, 8(%%r4) \n\t" \
"addze %%r5, %%r9 \n\t" \
"addc %%r8, %%r8, %%r7 \n\t" \
"stdu %%r8, 8(%%r4) \n\t"
#define MULADDC_STOP \
"addze %%r5, %%r5 \n\t" \
"addi %%r4, %%r4, 8 \n\t" \
"addi %%r3, %%r3, 8 \n\t" \
"std %%r5, %0 \n\t" \
"std %%r4, %1 \n\t" \
"std %%r3, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
);
#endif /* __MACH__ && __APPLE__ */
#elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32 */
#if defined(__MACH__) && defined(__APPLE__)
#define MULADDC_INIT \
asm( \
"lwz r3, %3 \n\t" \
"lwz r4, %4 \n\t" \
"lwz r5, %5 \n\t" \
"lwz r6, %6 \n\t" \
"addi r3, r3, -4 \n\t" \
"addi r4, r4, -4 \n\t" \
"addic r5, r5, 0 \n\t"
#define MULADDC_CORE \
"lwzu r7, 4(r3) \n\t" \
"mullw r8, r7, r6 \n\t" \
"mulhwu r9, r7, r6 \n\t" \
"adde r8, r8, r5 \n\t" \
"lwz r7, 4(r4) \n\t" \
"addze r5, r9 \n\t" \
"addc r8, r8, r7 \n\t" \
"stwu r8, 4(r4) \n\t"
#define MULADDC_STOP \
"addze r5, r5 \n\t" \
"addi r4, r4, 4 \n\t" \
"addi r3, r3, 4 \n\t" \
"stw r5, %0 \n\t" \
"stw r4, %1 \n\t" \
"stw r3, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
);
#else /* __MACH__ && __APPLE__ */
#define MULADDC_INIT \
asm( \
"lwz %%r3, %3 \n\t" \
"lwz %%r4, %4 \n\t" \
"lwz %%r5, %5 \n\t" \
"lwz %%r6, %6 \n\t" \
"addi %%r3, %%r3, -4 \n\t" \
"addi %%r4, %%r4, -4 \n\t" \
"addic %%r5, %%r5, 0 \n\t"
#define MULADDC_CORE \
"lwzu %%r7, 4(%%r3) \n\t" \
"mullw %%r8, %%r7, %%r6 \n\t" \
"mulhwu %%r9, %%r7, %%r6 \n\t" \
"adde %%r8, %%r8, %%r5 \n\t" \
"lwz %%r7, 4(%%r4) \n\t" \
"addze %%r5, %%r9 \n\t" \
"addc %%r8, %%r8, %%r7 \n\t" \
"stwu %%r8, 4(%%r4) \n\t"
#define MULADDC_STOP \
"addze %%r5, %%r5 \n\t" \
"addi %%r4, %%r4, 4 \n\t" \
"addi %%r3, %%r3, 4 \n\t" \
"stw %%r5, %0 \n\t" \
"stw %%r4, %1 \n\t" \
"stw %%r3, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
);
#endif /* __MACH__ && __APPLE__ */
#endif /* PPC32 */
/*
* The Sparc(64) assembly is reported to be broken.
* Disable it for now, until we're able to fix it.
*/
#if 0 && defined(__sparc__)
#if defined(__sparc64__)
#define MULADDC_INIT \
asm( \
"ldx %3, %%o0 \n\t" \
"ldx %4, %%o1 \n\t" \
"ld %5, %%o2 \n\t" \
"ld %6, %%o3 \n\t"
#define MULADDC_CORE \
"ld [%%o0], %%o4 \n\t" \
"inc 4, %%o0 \n\t" \
"ld [%%o1], %%o5 \n\t" \
"umul %%o3, %%o4, %%o4 \n\t" \
"addcc %%o4, %%o2, %%o4 \n\t" \
"rd %%y, %%g1 \n\t" \
"addx %%g1, 0, %%g1 \n\t" \
"addcc %%o4, %%o5, %%o4 \n\t" \
"st %%o4, [%%o1] \n\t" \
"addx %%g1, 0, %%o2 \n\t" \
"inc 4, %%o1 \n\t"
#define MULADDC_STOP \
"st %%o2, %0 \n\t" \
"stx %%o1, %1 \n\t" \
"stx %%o0, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "g1", "o0", "o1", "o2", "o3", "o4", \
"o5" \
);
#else /* __sparc64__ */
#define MULADDC_INIT \
asm( \
"ld %3, %%o0 \n\t" \
"ld %4, %%o1 \n\t" \
"ld %5, %%o2 \n\t" \
"ld %6, %%o3 \n\t"
#define MULADDC_CORE \
"ld [%%o0], %%o4 \n\t" \
"inc 4, %%o0 \n\t" \
"ld [%%o1], %%o5 \n\t" \
"umul %%o3, %%o4, %%o4 \n\t" \
"addcc %%o4, %%o2, %%o4 \n\t" \
"rd %%y, %%g1 \n\t" \
"addx %%g1, 0, %%g1 \n\t" \
"addcc %%o4, %%o5, %%o4 \n\t" \
"st %%o4, [%%o1] \n\t" \
"addx %%g1, 0, %%o2 \n\t" \
"inc 4, %%o1 \n\t"
#define MULADDC_STOP \
"st %%o2, %0 \n\t" \
"st %%o1, %1 \n\t" \
"st %%o0, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "g1", "o0", "o1", "o2", "o3", "o4", \
"o5" \
);
#endif /* __sparc64__ */
#endif /* __sparc__ */
#if defined(__microblaze__) || defined(microblaze)
#define MULADDC_INIT \
asm( \
"lwi r3, %3 \n\t" \
"lwi r4, %4 \n\t" \
"lwi r5, %5 \n\t" \
"lwi r6, %6 \n\t" \
"andi r7, r6, 0xffff \n\t" \
"bsrli r6, r6, 16 \n\t"
#define MULADDC_CORE \
"lhui r8, r3, 0 \n\t" \
"addi r3, r3, 2 \n\t" \
"lhui r9, r3, 0 \n\t" \
"addi r3, r3, 2 \n\t" \
"mul r10, r9, r6 \n\t" \
"mul r11, r8, r7 \n\t" \
"mul r12, r9, r7 \n\t" \
"mul r13, r8, r6 \n\t" \
"bsrli r8, r10, 16 \n\t" \
"bsrli r9, r11, 16 \n\t" \
"add r13, r13, r8 \n\t" \
"add r13, r13, r9 \n\t" \
"bslli r10, r10, 16 \n\t" \
"bslli r11, r11, 16 \n\t" \
"add r12, r12, r10 \n\t" \
"addc r13, r13, r0 \n\t" \
"add r12, r12, r11 \n\t" \
"addc r13, r13, r0 \n\t" \
"lwi r10, r4, 0 \n\t" \
"add r12, r12, r10 \n\t" \
"addc r13, r13, r0 \n\t" \
"add r12, r12, r5 \n\t" \
"addc r5, r13, r0 \n\t" \
"swi r12, r4, 0 \n\t" \
"addi r4, r4, 4 \n\t"
#define MULADDC_STOP \
"swi r5, %0 \n\t" \
"swi r4, %1 \n\t" \
"swi r3, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r3", "r4" "r5", "r6", "r7", "r8", \
"r9", "r10", "r11", "r12", "r13" \
);
#endif /* MicroBlaze */
#if defined(__tricore__)
#define MULADDC_INIT \
asm( \
"ld.a %%a2, %3 \n\t" \
"ld.a %%a3, %4 \n\t" \
"ld.w %%d4, %5 \n\t" \
"ld.w %%d1, %6 \n\t" \
"xor %%d5, %%d5 \n\t"
#define MULADDC_CORE \
"ld.w %%d0, [%%a2+] \n\t" \
"madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \
"ld.w %%d0, [%%a3] \n\t" \
"addx %%d2, %%d2, %%d0 \n\t" \
"addc %%d3, %%d3, 0 \n\t" \
"mov %%d4, %%d3 \n\t" \
"st.w [%%a3+], %%d2 \n\t"
#define MULADDC_STOP \
"st.w %0, %%d4 \n\t" \
"st.a %1, %%a3 \n\t" \
"st.a %2, %%a2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "d0", "d1", "e2", "d4", "a2", "a3" \
);
#endif /* TriCore */
/*
* gcc -O0 by default uses r7 for the frame pointer, so it complains about our
* use of r7 below, unless -fomit-frame-pointer is passed. Unfortunately,
* passing that option is not easy when building with yotta.
*
* On the other hand, -fomit-frame-pointer is implied by any -Ox options with
* x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
* clang and armcc5 under the same conditions).
*
* So, only use the optimized assembly below for optimized build, which avoids
* the build error and is pretty reasonable anyway.
*/
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
#define MULADDC_CANNOT_USE_R7
#endif
#if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
#if defined(__thumb__) && !defined(__thumb2__)
#pragma message "using ARM THUMB MULADDC"
#define MULADDC_INIT \
asm( \
"ldr r0, %3 \n\t" \
"ldr r1, %4 \n\t" \
"ldr r2, %5 \n\t" \
"ldr r3, %6 \n\t" \
"lsr r7, r3, #16 \n\t" \
"mov r9, r7 \n\t" \
"lsl r7, r3, #16 \n\t" \
"lsr r7, r7, #16 \n\t" \
"mov r8, r7 \n\t"
#define MULADDC_CORE \
"ldmia r0!, {r6} \n\t" \
"lsr r7, r6, #16 \n\t" \
"lsl r6, r6, #16 \n\t" \
"lsr r6, r6, #16 \n\t" \
"mov r4, r8 \n\t" \
"mul r4, r6 \n\t" \
"mov r3, r9 \n\t" \
"mul r6, r3 \n\t" \
"mov r5, r9 \n\t" \
"mul r5, r7 \n\t" \
"mov r3, r8 \n\t" \
"mul r7, r3 \n\t" \
"lsr r3, r6, #16 \n\t" \
"add r5, r5, r3 \n\t" \
"lsr r3, r7, #16 \n\t" \
"add r5, r5, r3 \n\t" \
"add r4, r4, r2 \n\t" \
"mov r2, #0 \n\t" \
"adc r5, r2 \n\t" \
"lsl r3, r6, #16 \n\t" \
"add r4, r4, r3 \n\t" \
"adc r5, r2 \n\t" \
"lsl r3, r7, #16 \n\t" \
"add r4, r4, r3 \n\t" \
"adc r5, r2 \n\t" \
"ldr r3, [r1] \n\t" \
"add r4, r4, r3 \n\t" \
"adc r2, r5 \n\t" \
"stmia r1!, {r4} \n\t"
#define MULADDC_STOP \
"str r2, %0 \n\t" \
"str r1, %1 \n\t" \
"str r0, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r0", "r1", "r2", "r3", "r4", "r5", \
"r6", "r7", "r8", "r9", "cc" \
);
#else
#define MULADDC_INIT \
asm( \
"ldr r0, %3 \n\t" \
"ldr r1, %4 \n\t" \
"ldr r2, %5 \n\t" \
"ldr r3, %6 \n\t"
#define MULADDC_CORE \
"ldr r4, [r0], #4 \n\t" \
"mov r5, #0 \n\t" \
"ldr r6, [r1] \n\t" \
"umlal r2, r5, r3, r4 \n\t" \
"adds r7, r6, r2 \n\t" \
"adc r2, r5, #0 \n\t" \
"str r7, [r1], #4 \n\t"
#define MULADDC_STOP \
"str r2, %0 \n\t" \
"str r1, %1 \n\t" \
"str r0, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "r0", "r1", "r2", "r3", "r4", "r5", \
"r6", "r7", "cc" \
);
#endif /* Thumb */
#endif /* ARMv3 */
#if defined(__alpha__)
#define MULADDC_INIT \
asm( \
"ldq $1, %3 \n\t" \
"ldq $2, %4 \n\t" \
"ldq $3, %5 \n\t" \
"ldq $4, %6 \n\t"
#define MULADDC_CORE \
"ldq $6, 0($1) \n\t" \
"addq $1, 8, $1 \n\t" \
"mulq $6, $4, $7 \n\t" \
"umulh $6, $4, $6 \n\t" \
"addq $7, $3, $7 \n\t" \
"cmpult $7, $3, $3 \n\t" \
"ldq $5, 0($2) \n\t" \
"addq $7, $5, $7 \n\t" \
"cmpult $7, $5, $5 \n\t" \
"stq $7, 0($2) \n\t" \
"addq $2, 8, $2 \n\t" \
"addq $6, $3, $3 \n\t" \
"addq $5, $3, $3 \n\t"
#define MULADDC_STOP \
"stq $3, %0 \n\t" \
"stq $2, %1 \n\t" \
"stq $1, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "$1", "$2", "$3", "$4", "$5", "$6", "$7" \
);
#endif /* Alpha */
#if defined(__mips__) && !defined(__mips64)
#define MULADDC_INIT \
asm( \
"lw $10, %3 \n\t" \
"lw $11, %4 \n\t" \
"lw $12, %5 \n\t" \
"lw $13, %6 \n\t"
#define MULADDC_CORE \
"lw $14, 0($10) \n\t" \
"multu $13, $14 \n\t" \
"addi $10, $10, 4 \n\t" \
"mflo $14 \n\t" \
"mfhi $9 \n\t" \
"addu $14, $12, $14 \n\t" \
"lw $15, 0($11) \n\t" \
"sltu $12, $14, $12 \n\t" \
"addu $15, $14, $15 \n\t" \
"sltu $14, $15, $14 \n\t" \
"addu $12, $12, $9 \n\t" \
"sw $15, 0($11) \n\t" \
"addu $12, $12, $14 \n\t" \
"addi $11, $11, 4 \n\t"
#define MULADDC_STOP \
"sw $12, %0 \n\t" \
"sw $11, %1 \n\t" \
"sw $10, %2 \n\t" \
: "=m" (c), "=m" (d), "=m" (s) \
: "m" (s), "m" (d), "m" (c), "m" (b) \
: "$9", "$10", "$11", "$12", "$13", "$14", "$15" \
);
#endif /* MIPS */
#endif /* GNUC */
#if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
#define MULADDC_INIT \
__asm mov esi, s \
__asm mov edi, d \
__asm mov ecx, c \
__asm mov ebx, b
#define MULADDC_CORE \
__asm lodsd \
__asm mul ebx \
__asm add eax, ecx \
__asm adc edx, 0 \
__asm add eax, [edi] \
__asm adc edx, 0 \
__asm mov ecx, edx \
__asm stosd
#if defined(MBEDTLS_HAVE_SSE2)
#define EMIT __asm _emit
#define MULADDC_HUIT \
EMIT 0x0F EMIT 0x6E EMIT 0xC9 \
EMIT 0x0F EMIT 0x6E EMIT 0xC3 \
EMIT 0x0F EMIT 0x6E EMIT 0x1F \
EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
EMIT 0x0F EMIT 0x6E EMIT 0x16 \
EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \
EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \
EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \
EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \
EMIT 0x0F EMIT 0xD4 EMIT 0xDC \
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \
EMIT 0x0F EMIT 0xD4 EMIT 0xEE \
EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \
EMIT 0x0F EMIT 0xD4 EMIT 0xFC \
EMIT 0x0F EMIT 0x7E EMIT 0x0F \
EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \
EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \
EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \
EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \
EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCD \
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \
EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCF \
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \
EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \
EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCC \
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \
EMIT 0x0F EMIT 0xD4 EMIT 0xDD \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCE \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \
EMIT 0x83 EMIT 0xC7 EMIT 0x20 \
EMIT 0x83 EMIT 0xC6 EMIT 0x20 \
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
EMIT 0x0F EMIT 0x7E EMIT 0xC9
#define MULADDC_STOP \
EMIT 0x0F EMIT 0x77 \
__asm mov c, ecx \
__asm mov d, edi \
__asm mov s, esi \
#else
#define MULADDC_STOP \
__asm mov c, ecx \
__asm mov d, edi \
__asm mov s, esi \
#endif /* SSE2 */
#endif /* MSVC */
#endif /* MBEDTLS_HAVE_ASM */
#if !defined(MULADDC_CORE)
#if defined(MBEDTLS_HAVE_UDBL)
#define MULADDC_INIT \
{ \
mbedtls_t_udbl r; \
mbedtls_mpi_uint r0, r1;
#define MULADDC_CORE \
r = *(s++) * (mbedtls_t_udbl) b; \
r0 = (mbedtls_mpi_uint) r; \
r1 = (mbedtls_mpi_uint)( r >> biL ); \
r0 += c; r1 += (r0 < c); \
r0 += *d; r1 += (r0 < *d); \
c = r1; *(d++) = r0;
#define MULADDC_STOP \
}
#else
#define MULADDC_INIT \
{ \
mbedtls_mpi_uint s0, s1, b0, b1; \
mbedtls_mpi_uint r0, r1, rx, ry; \
b0 = ( b << biH ) >> biH; \
b1 = ( b >> biH );
#define MULADDC_CORE \
s0 = ( *s << biH ) >> biH; \
s1 = ( *s >> biH ); s++; \
rx = s0 * b1; r0 = s0 * b0; \
ry = s1 * b0; r1 = s1 * b1; \
r1 += ( rx >> biH ); \
r1 += ( ry >> biH ); \
rx <<= biH; ry <<= biH; \
r0 += rx; r1 += (r0 < rx); \
r0 += ry; r1 += (r0 < ry); \
r0 += c; r1 += (r0 < c); \
r0 += *d; r1 += (r0 < *d); \
c = r1; *(d++) = r0;
#define MULADDC_STOP \
}
#endif /* C (generic) */
#endif /* C (longlong) */
#endif /* bn_mul.h */

4
arm9/mbedtls/config.h Normal file
View File

@ -0,0 +1,4 @@
#define MBEDTLS_BIGNUM_C
#define MBEDTLS_HAVE_ASM

7
arm9/mbedtls/readme.txt Normal file
View File

@ -0,0 +1,7 @@
aes.c/.h rsa.c/.h are heavily modified/reduced
bignum.c/.h bn_mul.h only had some minor modifications:
headers location moved from mbedtls/ to .
disabled some unused functions by "#if 0 // unused"
ASCII I/O
everything below mbedtls_mpi_exp_mod

61
arm9/mbedtls/rsa.c Normal file
View File

@ -0,0 +1,61 @@
// mbedtls RSA public
// only the pubkey function for signatures verifying
// original rsa.c had too many extra functions not used and too many dependencies
#include <string.h>
#include "bignum.h"
#include "rsa.h"
void rsa_init(rsa_context_t *ctx) {
memset(ctx, 0, sizeof(rsa_context_t));
}
// I don't know why mbedtls doesn't provide this
// instead, all callers set N/E/len manually
// this could be seen in mbedtls_rsa_self_test(rsa.c), main(dh_client.c) and main(rsa_verify.c)
int rsa_set_pubkey(rsa_context_t *ctx, const unsigned char * n_buf, size_t n_len,
const unsigned char * e_buf, size_t e_len)
{
int ret0 = (mbedtls_mpi_read_binary(&ctx->N, n_buf, n_len));
int ret1 = (mbedtls_mpi_read_binary(&ctx->E, e_buf, e_len));
if (ret0 == 0 && ret1 == 0) {
ctx->len = (mbedtls_mpi_bitlen(&ctx->N) + 7) >> 3;
// we should check the key now to be safe?
// anyway usually we load known working keys, so it's omitted
return 0;
} else {
return ret0 || ret1;
}
}
// basically mbedtls_rsa_public
int rsa_public(rsa_context_t *ctx, const unsigned char *input, unsigned char *output) {
int ret;
size_t olen;
mbedtls_mpi T;
mbedtls_mpi_init(&T);
MBEDTLS_MPI_CHK(mbedtls_mpi_read_binary(&T, input, ctx->len));
if (mbedtls_mpi_cmp_mpi(&T, &ctx->N) >= 0)
{
ret = MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
goto cleanup;
}
olen = ctx->len;
MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&T, &T, &ctx->E, &ctx->N, &ctx->RN));
MBEDTLS_MPI_CHK(mbedtls_mpi_write_binary(&T, output, olen));
cleanup:
mbedtls_mpi_free(&T);
if (ret != 0)
return(MBEDTLS_ERR_RSA_PUBLIC_FAILED + ret);
return(0);
}

18
arm9/mbedtls/rsa.h Normal file
View File

@ -0,0 +1,18 @@
#define MBEDTLS_ERR_RSA_PUBLIC_FAILED -0x4280 /**< The public key operation failed. */
#include "bignum.h"
typedef struct {
size_t len;
mbedtls_mpi N;
mbedtls_mpi E;
mbedtls_mpi RN;
} rsa_context_t;
void rsa_init(rsa_context_t *rsa);
int rsa_set_pubkey(rsa_context_t *rsa, const unsigned char * n_buf, size_t n_len,
const unsigned char * e_buf, size_t e_len);
int rsa_public(rsa_context_t *rsa, const unsigned char *input, unsigned char *output);

323
arm9/source/crypto.c Normal file
View File

@ -0,0 +1,323 @@
#include <stdint.h>
#include "../mbedtls/aes.h"
#include "crypto.h"
//#include "ticket0.h"
#include "utils.h"
// more info:
// https://github.com/Jimmy-Z/TWLbf/blob/master/dsi.c
// https://github.com/Jimmy-Z/bfCL/blob/master/dsi.h
// ported back to 32 bit for ARM9
static const uint32_t DSi_NAND_KEY_Y[4] =
{0x0ab9dc76u, 0xbd4dc4d3u, 0x202ddd1du, 0xe1a00005u};
static const uint32_t DSi_ES_KEY_Y[4] =
{0x8b5acce5u, 0x72c9d056u, 0xdce8179cu, 0xa9361239u};
static const uint32_t DSi_BOOT2_KEY[4] =
{0x8080ee98u, 0xf6b46c00u, 0x626ec23au, 0xad34ecf9u};
static const uint32_t DSi_KEY_MAGIC[4] =
{0x1a4f3e79u, 0x2a680f5fu, 0x29590258u, 0xfffefb4eu};
static inline void xor_128(uint32_t *x, const uint32_t *a, const uint32_t *b){
x[0] = a[0] ^ b[0];
x[1] = a[1] ^ b[1];
x[2] = a[2] ^ b[2];
x[3] = a[3] ^ b[3];
}
static inline void add_128(uint32_t *a, const uint32_t *b){
unsigned c1, c2, c3; // carry
// round 1
a[3] += b[3];
a[2] += b[2];
a[1] += b[1];
a[0] += b[0];
// carry
c3 = a[2] < b[2];
c2 = a[1] < b[1];
c1 = a[0] < b[0];
// round 2
a[3] += c3;
a[2] += c2;
a[1] += c1;
// carry
c3 = a[2] < c2;
c2 = a[1] < c1;
// round 3
a[3] += c3;
a[2] += c2;
// carry
c3 = a[2] < c2;
// round 4
a[3] += c3;
}
static inline void add_128_32(uint32_t *a, uint32_t b){
a[0] += b;
if(a[0] < b){
a[1] += 1;
if (a[1] == 0) {
a[2] += 1;
if (a[2] == 0) {
a[3] += 1;
}
}
}
}
// Answer to life, universe and everything.
static inline void rol42_128(uint32_t *a){
uint32_t t3 = a[3], t2 = a[2];
a[3] = (a[2] << 10) | (a[1] >> 22);
a[2] = (a[1] << 10) | (a[0] >> 22);
a[1] = (a[0] << 10) | (t3 >> 22);
a[0] = (t3 << 10) | (t2 >> 22);
}
static void dsi_aes_set_key(uint32_t *rk, const uint32_t *console_id, key_mode_t mode) {
uint32_t key[4];
switch (mode) {
case NAND:
key[0] = console_id[0];
key[1] = console_id[0] ^ 0x24ee6906;
key[2] = console_id[1] ^ 0xe65b601d;
key[3] = console_id[1];
break;
case NAND_3DS:
key[0] = (console_id[0] ^ 0xb358a6af) | 0x80000000;
key[1] = 0x544e494e;
key[2] = 0x4f444e45;
key[3] = console_id[1] ^ 0x08c267b7;
break;
case ES:
key[0] = 0x4e00004a;
key[1] = 0x4a00004e;
key[2] = console_id[1] ^ 0xc80c4b72;
key[3] = console_id[0];
break;
default:
break;
}
// Key = ((Key_X XOR Key_Y) + FFFEFB4E295902582A680F5F1A4F3E79h) ROL 42
// equivalent to F_XY in twltool/f_xy.c
xor_128(key, key, mode == ES ? DSi_ES_KEY_Y : DSi_NAND_KEY_Y);
// iprintf("AES KEY: XOR KEY_Y:\n");
// print_bytes(key, 16);
add_128(key, DSi_KEY_MAGIC);
// iprintf("AES KEY: + MAGIC:\n");
// print_bytes(key, 16);
rol42_128(key);
// iprintf("AES KEY: ROL 42:\n");
// print_bytes(key, 16);
aes_set_key_enc_128_be(rk, (uint8_t*)key);
}
int dsi_sha1_verify(const void *digest_verify, const void *data, unsigned len) {
uint8_t digest[SHA1_LEN];
swiSHA1Calc(digest, data, len);
// return type of swiSHA1Verify() is declared void, so how exactly should we use it?
int ret = memcmp(digest, digest_verify, SHA1_LEN);
if (ret != 0) {
//printf(" ");
print_bytes(digest_verify, SHA1_LEN);
//printf("\n ");
print_bytes(digest, SHA1_LEN);
//printf("\n");
}
return ret;
}
static uint32_t nand_rk[RK_LEN];
static uint32_t nand_ctr_iv[4];
static uint32_t es_rk[RK_LEN];
static uint32_t boot2_rk[RK_LEN];
static int tables_generated = 0;
void dsi_crypt_init(const uint8_t *console_id_be, const uint8_t *emmc_cid, int is3DS) {
if (tables_generated == 0) {
aes_gen_tables();
tables_generated = 1;
}
uint32_t console_id[2];
GET_UINT32_BE(console_id[0], console_id_be, 4);
GET_UINT32_BE(console_id[1], console_id_be, 0);
dsi_aes_set_key(nand_rk, console_id, is3DS ? NAND_3DS : NAND);
dsi_aes_set_key(es_rk, console_id, ES);
aes_set_key_enc_128_be(boot2_rk, (uint8_t*)DSi_BOOT2_KEY);
uint32_t digest[SHA1_LEN / sizeof(uint32_t)];
swiSHA1Calc(digest, emmc_cid, 16);
nand_ctr_iv[0] = digest[0];
nand_ctr_iv[1] = digest[1];
nand_ctr_iv[2] = digest[2];
nand_ctr_iv[3] = digest[3];
}
static inline void aes_ctr(const uint32_t *rk, const uint32_t *ctr, uint32_t *in, uint32_t *out) {
uint32_t xor[4];
aes_encrypt_128_be(rk, (uint8_t*)ctr, (uint8_t*)xor);
xor_128(out, in, xor);
}
// crypt one block, in/out must be aligned to 32 bit(restriction induced by xor_128)
// offset as block offset, block as AES block
void dsi_nand_crypt_1(uint8_t* out, const uint8_t* in, uint32_t offset) {
uint32_t ctr[4] = { nand_ctr_iv[0], nand_ctr_iv[1], nand_ctr_iv[2], nand_ctr_iv[3] };
add_128_32(ctr, offset);
// iprintf("AES CTR:\n");
// print_bytes(buf, 16);
aes_ctr(nand_rk, ctr, (uint32_t*)in, (uint32_t*)out);
}
void dsi_nand_crypt(uint8_t* out, const uint8_t* in, uint32_t offset, unsigned count) {
uint32_t ctr[4] = { nand_ctr_iv[0], nand_ctr_iv[1], nand_ctr_iv[2], nand_ctr_iv[3] };
add_128_32(ctr, offset);
for (unsigned i = 0; i < count; ++i) {
aes_ctr(nand_rk, ctr, (uint32_t*)in, (uint32_t*)out);
out += AES_BLOCK_SIZE;
in += AES_BLOCK_SIZE;
add_128_32(ctr, 1);
}
}
static uint32_t boot2_ctr[4];
void dsi_boot2_crypt_set_ctr(uint32_t size_r) {
boot2_ctr[0] = size_r;
boot2_ctr[1] = -size_r;
boot2_ctr[2] = ~size_r;
boot2_ctr[3] = 0;
}
void dsi_boot2_crypt(uint8_t* out, const uint8_t* in, unsigned count) {
for (unsigned i = 0; i < count; ++i) {
aes_ctr(boot2_rk, boot2_ctr, (uint32_t*)in, (uint32_t*)out);
out += AES_BLOCK_SIZE;
in += AES_BLOCK_SIZE;
add_128_32(boot2_ctr, 1);
}
}
// http://problemkaputt.de/gbatek.htm#dsiesblockencryption
// works in place, also must be aligned to 32 bit
// why is it called ES?
/*int dsi_es_block_crypt(uint8_t *buf, unsigned buf_len, crypt_mode_t mode) {
es_block_footer_t *footer;
footer = (es_block_footer_t*)(buf + buf_len - sizeof(es_block_footer_t));
// backup mac since it might be overwritten by padding
// and also nonce, it becomes garbage after decryption
uint8_t ccm_mac[AES_CCM_MAC_LEN];
uint8_t nonce[AES_CCM_NONCE_LEN];
memcpy(ccm_mac, footer->ccm_mac, AES_CCM_MAC_LEN);
memcpy(nonce, footer->nonce, AES_CCM_NONCE_LEN);
uint32_t ctr32[4], pad32[4], mac32[4];
// I'm too paranoid to use more stack variables
#define ctr ((uint8_t*)ctr32)
#define pad ((uint8_t*)pad32)
#define mac ((uint8_t*)mac32)
#define zero(a) static_assert(sizeof(a[0]) == 4, "invalid operand"); \
a[0] = 0; a[1] = 0; a[2] = 0; a[3] = 0
if (mode == DECRYPT) {
// decrypt footer
zero(ctr32);
memcpy(ctr + 1, nonce, AES_CCM_NONCE_LEN);
// footer might not be 32 bit aligned after all, so we copy it out to decrypt
memcpy(pad, footer->encrypted, AES_BLOCK_SIZE);
aes_ctr(es_rk, ctr32, pad32, pad32);
memcpy(footer->encrypted, pad, AES_BLOCK_SIZE);
}
// check decrypted footer
if (footer->fixed_3a != 0x3a) {
i//printff("ES block footer offset 0x10 should be 0x3a, got 0x%02x\n", footer->fixed_3a);
return 1;
}
uint32_t block_size;
GET_UINT32_BE(block_size, footer->len32be, 0);
block_size &= 0xffffff;
if (block_size + sizeof(es_block_footer_t) != buf_len) {
i//printff("block size in footer doesn't match, %06x != %06x\n",
(unsigned)block_size, (unsigned)(buf_len - sizeof(es_block_footer_t)));
return 1;
}
// padding to multiple of 16
uint32_t remainder = block_size & 0xf;
if (remainder != 0) {
zero(pad32);
if (mode == DECRYPT) {
ctr32[0] = (block_size >> 4) + 1;
memcpy(ctr + 3, nonce, AES_CCM_NONCE_LEN);
ctr[0xf] = 2;
aes_ctr(es_rk, ctr32, pad32, pad32);
}
memcpy(buf + block_size, pad + remainder, 16 - remainder);
block_size += 16 - remainder;
}
// AES-CCM MAC
mac32[0] = block_size;
memcpy(mac + 3, nonce, AES_CCM_NONCE_LEN);
mac[0xf] = 0x3a;
aes_encrypt_128_be(es_rk, mac, mac);
// AES-CCM CTR
ctr32[0] = 0;
memcpy(ctr + 3, nonce, AES_CCM_NONCE_LEN);
ctr[0xf] = 2;
// AES-CCM start
zero(pad32);
aes_ctr(es_rk, ctr32, pad32, pad32);
add_128_32(ctr32, 1);
// AES-CCM loop
if (mode == DECRYPT) {
for (unsigned i = 0; i < block_size; i += 16) {
aes_ctr(es_rk, ctr32, (uint32_t*)(buf + i), (uint32_t*)(buf + i));
add_128_32(ctr32, 1);
xor_128(mac32, mac32, (uint32_t*)(buf + i));
aes_encrypt_128_be(es_rk, mac, mac);
}
} else {
for (unsigned i = 0; i < block_size; i += 16) {
xor_128(mac32, mac32, (uint32_t*)(buf + i));
aes_encrypt_128_be(es_rk, mac, mac);
aes_ctr(es_rk, ctr32, (uint32_t*)(buf + i), (uint32_t*)(buf + i));
add_128_32(ctr32, 1);
}
}
// AES-CCM MAC final
xor_128(mac32, mac32, pad32);
if (mode == DECRYPT) {
if (memcmp(mac, ccm_mac, 16) == 0) {
if (remainder != 0) {
// restore mac
memcpy(footer->ccm_mac, ccm_mac, AES_CCM_MAC_LEN);
}
// restore nonce
memcpy(footer->nonce, nonce, AES_CCM_NONCE_LEN);
return 0;
} else {
//printf("MAC verification failed\n");
return 1;
}
} else {
memcpy(footer->ccm_mac, mac, AES_CCM_MAC_LEN);
// AES-CTR crypt later half of footer
zero(ctr32);
memcpy(ctr + 1, nonce, AES_CCM_NONCE_LEN);
memcpy(pad, footer->encrypted, AES_BLOCK_SIZE);
aes_ctr(es_rk, ctr32, pad32, pad32);
memcpy(footer->encrypted, pad, AES_BLOCK_SIZE);
// restore nonce
memcpy(footer->nonce, nonce, AES_CCM_NONCE_LEN);
return 0;
}
#undef ctr
#undef pad
#undef mac
#undef zero
}*/

35
arm9/source/crypto.h Normal file
View File

@ -0,0 +1,35 @@
#pragma once
#include <nds.h>
#define SHA1_LEN 20
#define AES_BLOCK_SIZE 16
typedef enum {
ENCRYPT,
DECRYPT
} crypt_mode_t;
typedef enum {
NAND,
NAND_3DS,
ES
} key_mode_t;
// don't want to include nds.h just for this
void swiSHA1Calc(void *digest, const void *buf, size_t len);
int dsi_sha1_verify(const void *digest_verify, const void *data, unsigned len);
void dsi_crypt_init(const uint8_t *console_id_be, const uint8_t *emmc_cid, int is3DS);
void dsi_nand_crypt_1(uint8_t *out, const uint8_t* in, u32 offset);
void dsi_nand_crypt(uint8_t *out, const uint8_t* in, u32 offset, unsigned count);
int dsi_es_block_crypt(uint8_t *buf, unsigned buf_len, crypt_mode_t mode);
void dsi_boot2_crypt_set_ctr(uint32_t size_r);
void dsi_boot2_crypt(uint8_t* out, const uint8_t* in, unsigned count);

View File

@ -222,10 +222,14 @@ void driveMenu (void) {
dmAssignedOp[i] = -1;
}
dmMaxCursors = -1;
if (isDSiMode() && sdMounted){
if (sdMounted){
dmMaxCursors++;
dmAssignedOp[dmMaxCursors] = 0;
}
if (nandMounted) {
dmMaxCursors++;
dmAssignedOp[dmMaxCursors] = 7;
}
if (flashcardMounted) {
dmMaxCursors++;
dmAssignedOp[dmMaxCursors] = 1;
@ -238,10 +242,6 @@ void driveMenu (void) {
dmMaxCursors++;
dmAssignedOp[dmMaxCursors] = 6;
}
if (nandMounted) {
dmMaxCursors++;
dmAssignedOp[dmMaxCursors] = 7;
}
if (expansionPakFound
|| (io_dldi_data->ioInterface.features & FEATURE_SLOT_GBA)
|| (isDSiMode() && !(REG_SCFG_MC & BIT(0)))) {

View File

@ -10,7 +10,7 @@
#include "lzss.h"
#include "ramd.h"
#include "ramdrive-include.h"
#include "nand.h"
#include "nandio.h"
#include "tonccpy.h"
static sNDSHeader nds;
@ -134,7 +134,7 @@ bool bothSDandFlashcard(void) {
}
TWL_CODE bool nandMount(void) {
fatMountSimple("nand", &io_nand);
fatMountSimple("nand", &io_dsi_nand);
if (nandFound()) {
nandMountedDone = true;
struct statvfs st;

199
arm9/source/f_xy.c Normal file
View File

@ -0,0 +1,199 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include "types.h"
#include "utils.h"
//#define DEBUG
// flip each word and return as a u64 array
void aes_flip_to_64(u32 *in, u64* out)
{
u32 endian_flip[4];
u32 i;
for(i = 0; i < 4; i++)
endian_flip[i] = getbe32((u8*)&in[i]);
out[0] = (u64)endian_flip[1] | ((u64)endian_flip[0] << 32);
out[1] = (u64)endian_flip[3] | ((u64)endian_flip[2] << 32);
}
void aes_unflip_to_32(u64* in, u32* out)
{
out[0] = getbe32((u8*)&in[0]+4);
out[1] = getbe32((u8*)&in[0]);
out[2] = getbe32((u8*)&in[1]+4);
out[3] = getbe32((u8*)&in[1]);
}
void n128_lrot_3ds_internal(u32 *num, u32 shift)
{
u64 tmp[2];
u64 num_work[2];
aes_flip_to_64(num, num_work);
tmp[0] = num_work[0]<<shift;
tmp[1] = num_work[1]<<shift;
tmp[0] |= num_work[1]>>(64-shift);
tmp[1] |= num_work[0]>>(64-shift);
aes_unflip_to_32(tmp, num);
}
void n128_rrot_3ds_internal(u32 *num, u32 shift)
{
u64 tmp[2];
u64 num_work[2];
aes_flip_to_64(num, num_work);
tmp[0] = num_work[0]>>shift;
tmp[1] = num_work[1]>>shift;
tmp[0] |= (num_work[1]<<(64-shift));
tmp[1] |= (num_work[0]<<(64-shift));
aes_unflip_to_32(tmp, num);
}
void n128_lrot_3ds(u32 *num, u32 shift)
{
u32 shift_cycle;
while(shift > 0)
{
if(shift >= 32)
{
shift_cycle = 32;
shift -= 32;
}
else
{
shift_cycle = shift;
shift = 0;
}
n128_lrot_3ds_internal(num, shift_cycle);
}
}
void n128_rrot_3ds(u32 *num, u32 shift)
{
u32 shift_cycle;
while(shift > 0)
{
if(shift >= 32)
{
shift_cycle = 32;
shift -= 32;
}
else
{
shift_cycle = shift;
shift = 0;
}
n128_rrot_3ds_internal(num, shift_cycle);
}
}
void n128_add_3ds(u32 *a, u32 *b)
{
u64 a64[4];
u64 b64[4];
aes_flip_to_64(a, a64);
aes_flip_to_64(b, b64);
uint64_t tmp = (a64[0]>>1)+(b64[0]>>1) + (a64[0] & b64[0] & 1);
tmp = tmp >> 63;
a64[0] = a64[0] + b64[0];
a64[1] = a64[1] + b64[1] + tmp;
aes_unflip_to_32(a64, a);
}
void n128_lrot(uint64_t *num, uint32_t shift)
{
uint64_t tmp[2];
tmp[0] = num[0]<<shift;
tmp[1] = num[1]<<shift;
tmp[0] |= (num[1]>>(64-shift));
tmp[1] |= (num[0]>>(64-shift));
num[0] = tmp[0];
num[1] = tmp[1];
}
void n128_rrot(uint64_t *num, uint32_t shift)
{
uint64_t tmp[2];
tmp[0] = num[0]>>shift;
tmp[1] = num[1]>>shift;
tmp[0] |= (num[1]<<(64-shift));
tmp[1] |= (num[0]<<(64-shift));
num[0] = tmp[0];
num[1] = tmp[1];
}
void n128_add(uint64_t *a, uint64_t *b)
{
uint64_t *a64 = a;
uint64_t *b64 = b;
uint64_t tmp = (a64[0]>>1)+(b64[0]>>1) + (a64[0] & b64[0] & 1);
tmp = tmp >> 63;
a64[0] = a64[0] + b64[0];
a64[1] = a64[1] + b64[1] + tmp;
}
void n128_sub(uint64_t *a, uint64_t *b)
{
uint64_t *a64 = a;
uint64_t *b64 = b;
uint64_t tmp = (a64[0]>>1)-(b64[0]>>1) - ((a64[0]>>63) & (b64[0]>>63) & 1);
tmp = tmp >> 63;
a64[0] = a64[0] - b64[0];
a64[1] = a64[1] - b64[1] - tmp;
}
void F_XY(uint32_t *key, uint32_t *key_x, uint32_t *key_y)
{
int i;
unsigned char key_xy[16];
memset(key_xy, 0, 16);
memset(key, 0, 16);
for(i=0; i<16; i++)key_xy[i] = ((unsigned char*)key_x)[i] ^ ((unsigned char*)key_y)[i];
key[0] = 0x1a4f3e79;
key[1] = 0x2a680f5f;
key[2] = 0x29590258;
key[3] = 0xfffefb4e;
n128_add((uint64_t*)key, (uint64_t*)key_xy);
n128_lrot((uint64_t*)key, 42);
}
//F_XY_reverse does the reverse of F(X^Y): takes (normal)key, and does F in reverse to generate the original X^Y key_xy.
void F_XY_reverse(uint32_t *key, uint32_t *key_xy)
{
uint32_t tmpkey[4];
memset(key_xy, 0, 16);
memset(tmpkey, 0, 16);
memcpy(tmpkey, key, 16);
key_xy[0] = 0x1a4f3e79;
key_xy[1] = 0x2a680f5f;
key_xy[2] = 0x29590258;
key_xy[3] = 0xfffefb4e;
n128_rrot((uint64_t*)tmpkey, 42);
n128_sub((uint64_t*)tmpkey, (uint64_t*)key_xy);
memcpy(key_xy, tmpkey, 16);
}

20
arm9/source/f_xy.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef _H_F_XY
#define _H_F_XY
#ifdef __cplusplus
extern "C" {
#endif
void F_XY(uint32_t *key, uint32_t *key_x, uint32_t *key_y);
void F_XY_reverse(uint32_t *key, uint32_t *key_xy);
void n128_lrot_3ds(u32 *num, u32 shift);
void n128_rrot_3ds(u32 *num, u32 shift);
void n128_add_3ds(u32 *a, u32 *b);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -207,6 +207,9 @@ int main(int argc, char **argv) {
}
nandMounted = nandMount();
is3DS = ((access("sd:/Nintendo 3DS", F_OK) == 0) && (*(vu32*)(0x0DFFFE0C) == 0x474D3969));
/*FILE* cidFile = fopen("sd:/gm9i/CID.bin", "wb");
fwrite((void*)0x2FFD7BC, 1, 16, cidFile);
fclose(cidFile);*/
} /*else if (isRegularDS) {
*(vu32*)(0x08240000) = 1;
expansionPakFound = ((*(vu32*)(0x08240000) == 1) && (io_dldi_data->ioInterface.features & FEATURE_SLOT_NDS));

View File

@ -1,55 +0,0 @@
#include <nds.h>
#include <nds/disc_io.h>
#include <stdio.h>
#define SECTOR_SIZE 512
static FILE* nandFile;
bool nand_startup() {
nandFile = fopen("sd:/nand.bin", "rb");
if (nandFile) {
return true;
}
return false;
}
bool nand_is_inserted() {
if (nandFile) {
return true;
}
return false;
}
bool nand_read_sectors(sec_t sector, sec_t numSectors, void *buffer) {
if (!nandFile) return false;
fseek(nandFile, (sector << 9), SEEK_SET);
fread(buffer, 1, (numSectors << 9), nandFile);
return true;
}
bool nand_write_sectors(sec_t sector, sec_t numSectors, const void *buffer) {
return false;
}
bool nand_clear_status() {
return true;
}
bool nand_shutdown() {
fclose(nandFile);
return true;
}
const DISC_INTERFACE io_nand = {
('N' << 24) | ('A' << 16) | ('N' << 8) | 'D',
FEATURE_MEDIUM_CANREAD,
nand_startup,
nand_is_inserted,
nand_read_sectors,
nand_write_sectors,
nand_clear_status,
nand_shutdown
};

View File

@ -1,6 +0,0 @@
#pragma once
#include <nds.h>
#include <nds/disc_io.h>
extern const DISC_INTERFACE io_nand;

117
arm9/source/nandio.c Normal file
View File

@ -0,0 +1,117 @@
#include <nds.h>
#include <nds/disc_io.h>
#include <malloc.h>
#include "crypto.h"
#include "sector0.h"
//#define SECTOR_SIZE 512
#define CRYPT_BUF_LEN 64
extern bool nand_Startup();
static u8* crypt_buf = 0;
static u32 fat_sig_fix_offset = 0;
static u32 sector_buf32[SECTOR_SIZE/sizeof(u32)];
static u8 *sector_buf = (u8*)sector_buf32;
void nandio_set_fat_sig_fix(u32 offset) {
fat_sig_fix_offset = offset;
}
bool nandio_startup() {
if (!nand_Startup()) return false;
nand_ReadSectors(0, 1, sector_buf);
int is3DS = parse_ncsd(sector_buf, 0) == 0;
if (is3DS) return false;
if (*(u32*)(0x2FFD7BC) == 0) {
// Get eMMC CID
*(u32*)(0x2FFFD0C) = 0x454D4D43;
while (*(u32*)(0x2FFFD0C) != 0);
}
// iprintf("sector 0 is %s\n", is3DS ? "3DS" : "DSi");
dsi_crypt_init((const u8*)0x2FFFD00, (const u8*)0x2FFD7BC, is3DS);
//dsi_nand_crypt(sector_buf, sector_buf, 0, SECTOR_SIZE / AES_BLOCK_SIZE);
//parse_mbr(sector_buf, is3DS, 0);
if (crypt_buf == 0) {
crypt_buf = (u8*)memalign(32, SECTOR_SIZE * CRYPT_BUF_LEN);
//if (crypt_buf == 0) {
//printf("nandio: failed to alloc buffer\n");
//}
}
return crypt_buf != 0;
}
bool nandio_is_inserted() {
return true;
}
// len is guaranteed <= CRYPT_BUF_LEN
static bool read_sectors(sec_t start, sec_t len, void *buffer) {
if (nand_ReadSectors(start, len, crypt_buf)) {
dsi_nand_crypt(buffer, crypt_buf, start * SECTOR_SIZE / AES_BLOCK_SIZE, len * SECTOR_SIZE / AES_BLOCK_SIZE);
if (fat_sig_fix_offset &&
start == fat_sig_fix_offset
&& ((u8*)buffer)[0x36] == 0
&& ((u8*)buffer)[0x37] == 0
&& ((u8*)buffer)[0x38] == 0)
{
((u8*)buffer)[0x36] = 'F';
((u8*)buffer)[0x37] = 'A';
((u8*)buffer)[0x38] = 'T';
}
return true;
} else {
//printf("NANDIO: read error\n");
return false;
}
}
bool nandio_read_sectors(sec_t offset, sec_t len, void *buffer) {
// iprintf("R: %u(0x%08x), %u\n", (unsigned)offset, (unsigned)offset, (unsigned)len);
while (len >= CRYPT_BUF_LEN) {
if (!read_sectors(offset, CRYPT_BUF_LEN, buffer)) {
return false;
}
offset += CRYPT_BUF_LEN;
len -= CRYPT_BUF_LEN;
buffer = ((u8*)buffer) + SECTOR_SIZE * CRYPT_BUF_LEN;
}
if (len > 0) {
return read_sectors(offset, len, buffer);
} else {
return true;
}
}
bool nandio_write_sectors(sec_t offset, sec_t len, const void *buffer) {
// lol, nope
return false;
}
bool nandio_clear_status() {
return true;
}
bool nandio_shutdown() {
free(crypt_buf);
crypt_buf = 0;
return true;
}
const DISC_INTERFACE io_dsi_nand = {
('N' << 24) | ('A' << 16) | ('N' << 8) | 'D',
FEATURE_MEDIUM_CANREAD,
nandio_startup,
nandio_is_inserted,
nandio_read_sectors,
nandio_write_sectors,
nandio_clear_status,
nandio_shutdown
};

8
arm9/source/nandio.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include <nds.h>
#include <nds/disc_io.h>
void nandio_set_fat_sig_fix(u32 offset);
extern const DISC_INTERFACE io_dsi_nand;

100
arm9/source/sector0.c Normal file
View File

@ -0,0 +1,100 @@
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include "utils.h"
#include "sector0.h"
// return 0 for valid NCSD header
int parse_ncsd(const uint8_t sector0[SECTOR_SIZE], int verbose) {
const ncsd_header_t * h = (ncsd_header_t *)sector0;
if (h->magic == 0x4453434e) {
if (verbose) {
//printf("NCSD magic found\n");
}
} else {
if (verbose) {
//printf("NCSD magic not found\n");
}
return -1;
}
if (verbose) {
//iprintf("size: %" PRIu32 " sectors, %s MB\n", h->size, to_mebi(h->size * SECTOR_SIZE));
//iprintf("media ID: %08" PRIx32 "%08" PRIx32 "\n", h->media_id_h, h->media_id_l);
}
for (unsigned i = 0; i < NCSD_PARTITIONS; ++i) {
unsigned fs_type = h->fs_types[i];
if (fs_type == 0) {
break;
}
const char *s_fs_type;
switch (fs_type) {
case 1:
s_fs_type = "Normal";
break;
case 3:
s_fs_type = "FIRM";
break;
case 4:
s_fs_type = "AGB_FIRM save";
break;
default:
if (verbose) {
//iprintf("invalid partition type %d\n", fs_type);
}
return -2;
}
if (verbose) {
// yes I use MB for "MiB", bite me
//iprintf("partition %u, %s, crypt: %" PRIu8 ", offset: 0x%08" PRIx32 ", length: 0x%08" PRIx32 "(%s MB)\n",
//i, s_fs_type, h->crypt_types[i],
//h->partitions[i].offset, h->partitions[i].length, to_mebi(h->partitions[i].length * SECTOR_SIZE));
}
}
return 0;
}
const mbr_partition_t ptable_DSi[MBR_PARTITIONS] = {
{0, {3, 24, 4}, 6, {15, 224, 59}, 0x00000877, 0x00066f89},
{0, {2, 206, 60}, 6, {15, 224, 190}, 0x0006784d, 0x000105b3},
{0, {2, 222, 191}, 1, {15, 224, 191}, 0x00077e5d, 0x000001a3},
{0, {0, 0, 0}, 0, {0, 0, 0}, 0, 0}
};
const mbr_partition_t ptable_3DS[MBR_PARTITIONS] = {
{0, {4, 24, 0}, 6, {1, 160, 63}, 0x00000097, 0x00047da9},
{0, {4, 142, 64}, 6, {1, 160, 195}, 0x0004808d, 0x000105b3},
{0, {0, 0, 0}, 0, {0, 0, 0}, 0, 0},
{0, {0, 0, 0}, 0, {0, 0, 0}, 0, 0}
};
// return 0 for valid MBR
int parse_mbr(const uint8_t sector0[SECTOR_SIZE], int is3DS, int verbose) {
const mbr_t *m = (mbr_t*)sector0;
const mbr_partition_t *ref_ptable; // reference partition table
int ret = 0;
if (m->boot_signature_0 != 0x55 || m->boot_signature_1 != 0xaa) {
//printf("invalid boot signature(0x55, 0xaa)\n");
ret = -1;
}
if (!is3DS) {
for (unsigned i = 0; i < sizeof(m->bootstrap); ++i) {
if (m->bootstrap[i]) {
//printf("bootstrap on DSi should be all zero\n");
ret = 0;
break;
}
}
ref_ptable = ptable_DSi;
} else {
ref_ptable = ptable_3DS;
}
// only test the 1st partition now, we've seen variations on the 3rd partition
// and after all we only care about the 1st partition
if (memcmp(ref_ptable, m->partitions, sizeof(mbr_partition_t))) {
//printf("invalid partition table\n");
ret = -2;
}
return ret;
}

72
arm9/source/sector0.h Normal file
View File

@ -0,0 +1,72 @@
#pragma once
#include <stdint.h>
#include <assert.h>
// https://3dbrew.org/wiki/NCSD#NCSD_header
#define SECTOR_SIZE 0x200
#define NCSD_PARTITIONS 8
#ifdef _MSC_VER
#pragma pack(push, 1)
#define __PACKED
#elif defined __GNUC__
#define __PACKED __attribute__ ((__packed__))
#endif
typedef struct {
uint32_t offset;
uint32_t length;
} __PACKED ncsd_partition_t;
typedef struct {
uint8_t signature[0x100];
uint32_t magic;
uint32_t size;
uint32_t media_id_l;
uint32_t media_id_h;
uint8_t fs_types[NCSD_PARTITIONS];
uint8_t crypt_types[NCSD_PARTITIONS];
ncsd_partition_t partitions[NCSD_PARTITIONS];
} __PACKED ncsd_header_t;
typedef struct {
uint8_t head;
uint8_t sector;
uint8_t cylinder;
} __PACKED chs_t;
typedef struct {
uint8_t status;
chs_t chs_first;
uint8_t type;
chs_t chs_last;
uint32_t offset;
uint32_t length;
} __PACKED mbr_partition_t;
#define MBR_PARTITIONS 4
// or 446 in decimal, all zero on DSi in all my samples
#define MBR_BOOTSTRAP_SIZE 0x1be
typedef struct {
uint8_t bootstrap[MBR_BOOTSTRAP_SIZE];
mbr_partition_t partitions[MBR_PARTITIONS];
uint8_t boot_signature_0;
uint8_t boot_signature_1;
} __PACKED mbr_t;
#ifdef _MSC_VER
#pragma pack(pop)
#endif
#undef __PACKED
static_assert(sizeof(ncsd_header_t) == 0x160, "sizeof(ncsd_header_t) should equal 0x160");
static_assert(sizeof(mbr_t) == SECTOR_SIZE, "sizeof(mbr_t) should equal 0x200");
int parse_ncsd(const uint8_t sector0[SECTOR_SIZE], int verbose);
int parse_mbr(const uint8_t sector0[SECTOR_SIZE], int is3DS, int verbose);

242
arm9/source/sha1.c Normal file
View File

@ -0,0 +1,242 @@
/*
---------------------------------------------------------------------------
Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 01/08/2005
This is a byte oriented version of SHA1 that operates on arrays of bytes
stored in memory.
*/
#include <string.h> /* for memcpy() etc. */
#include "sha1.h"
#if defined(__cplusplus)
extern "C"
{
#endif
#define SHA1_BLOCK_SIZE 64
#define rotl32(x,n) (((x) << n) | ((x) >> (32 - n)))
#define rotr32(x,n) (((x) >> n) | ((x) << (32 - n)))
#define bswap_32(x) ((rotr32((x), 24) & 0x00ff00ff) | (rotr32((x), 8) & 0xff00ff00))
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define bsw_32(p,n) \
{ int _i = (n); while(_i--) ((uint32_t*)p)[_i] = bswap_32(((uint32_t*)p)[_i]); }
#else
#define bsw_32(p,n)
#endif
#define SHA1_MASK (SHA1_BLOCK_SIZE - 1)
#if 0
#define ch(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
#define parity(x,y,z) ((x) ^ (y) ^ (z))
#define maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#else /* Discovered by Rich Schroeppel and Colin Plumb */
#define ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
#define parity(x,y,z) ((x) ^ (y) ^ (z))
#define maj(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y))))
#endif
/* Compile 64 bytes of hash data into SHA1 context. Note */
/* that this routine assumes that the byte order in the */
/* ctx->wbuf[] at this point is in such an order that low */
/* address bytes in the ORIGINAL byte stream will go in */
/* this buffer to the high end of 32-bit words on BOTH big */
/* and little endian systems */
#ifdef ARRAY
#define q(v,n) v[n]
#else
#define q(v,n) v##n
#endif
#define one_cycle(v,a,b,c,d,e,f,k,h) \
q(v,e) += rotr32(q(v,a),27) + \
f(q(v,b),q(v,c),q(v,d)) + k + h; \
q(v,b) = rotr32(q(v,b), 2)
#define five_cycle(v,f,k,i) \
one_cycle(v, 0,1,2,3,4, f,k,hf(i )); \
one_cycle(v, 4,0,1,2,3, f,k,hf(i+1)); \
one_cycle(v, 3,4,0,1,2, f,k,hf(i+2)); \
one_cycle(v, 2,3,4,0,1, f,k,hf(i+3)); \
one_cycle(v, 1,2,3,4,0, f,k,hf(i+4))
static void sha1_compile(sha1_ctx ctx[1])
{ uint32_t *w = ctx->wbuf;
#ifdef ARRAY
uint32_t v[5];
memcpy(v, ctx->hash, 5 * sizeof(uint32_t));
#else
uint32_t v0, v1, v2, v3, v4;
v0 = ctx->hash[0]; v1 = ctx->hash[1];
v2 = ctx->hash[2]; v3 = ctx->hash[3];
v4 = ctx->hash[4];
#endif
#define hf(i) w[i]
five_cycle(v, ch, 0x5a827999, 0);
five_cycle(v, ch, 0x5a827999, 5);
five_cycle(v, ch, 0x5a827999, 10);
one_cycle(v,0,1,2,3,4, ch, 0x5a827999, hf(15)); \
#undef hf
#define hf(i) (w[(i) & 15] = rotl32( \
w[((i) + 13) & 15] ^ w[((i) + 8) & 15] \
^ w[((i) + 2) & 15] ^ w[(i) & 15], 1))
one_cycle(v,4,0,1,2,3, ch, 0x5a827999, hf(16));
one_cycle(v,3,4,0,1,2, ch, 0x5a827999, hf(17));
one_cycle(v,2,3,4,0,1, ch, 0x5a827999, hf(18));
one_cycle(v,1,2,3,4,0, ch, 0x5a827999, hf(19));
five_cycle(v, parity, 0x6ed9eba1, 20);
five_cycle(v, parity, 0x6ed9eba1, 25);
five_cycle(v, parity, 0x6ed9eba1, 30);
five_cycle(v, parity, 0x6ed9eba1, 35);
five_cycle(v, maj, 0x8f1bbcdc, 40);
five_cycle(v, maj, 0x8f1bbcdc, 45);
five_cycle(v, maj, 0x8f1bbcdc, 50);
five_cycle(v, maj, 0x8f1bbcdc, 55);
five_cycle(v, parity, 0xca62c1d6, 60);
five_cycle(v, parity, 0xca62c1d6, 65);
five_cycle(v, parity, 0xca62c1d6, 70);
five_cycle(v, parity, 0xca62c1d6, 75);
#ifdef ARRAY
ctx->hash[0] += v[0]; ctx->hash[1] += v[1];
ctx->hash[2] += v[2]; ctx->hash[3] += v[3];
ctx->hash[4] += v[4];
#else
ctx->hash[0] += v0; ctx->hash[1] += v1;
ctx->hash[2] += v2; ctx->hash[3] += v3;
ctx->hash[4] += v4;
#endif
}
void sha1_begin(sha1_ctx ctx[1])
{
ctx->count[0] = ctx->count[1] = 0;
ctx->hash[0] = 0x67452301;
ctx->hash[1] = 0xefcdab89;
ctx->hash[2] = 0x98badcfe;
ctx->hash[3] = 0x10325476;
ctx->hash[4] = 0xc3d2e1f0;
}
/* SHA1 hash data in an array of bytes into hash buffer and */
/* call the hash_compile function as required. */
void sha1_hash(const unsigned char data[], unsigned long len, sha1_ctx ctx[1])
{ uint32_t pos = (uint32_t)(ctx->count[0] & SHA1_MASK),
space = SHA1_BLOCK_SIZE - pos;
const unsigned char *sp = data;
if((ctx->count[0] += len) < len)
++(ctx->count[1]);
while(len >= space) /* tranfer whole blocks if possible */
{
memcpy(((unsigned char*)ctx->wbuf) + pos, sp, space);
sp += space; len -= space; space = SHA1_BLOCK_SIZE; pos = 0;
bsw_32(ctx->wbuf, SHA1_BLOCK_SIZE >> 2);
sha1_compile(ctx);
}
memcpy(((unsigned char*)ctx->wbuf) + pos, sp, len);
}
/* SHA1 final padding and digest calculation */
void sha1_end(unsigned char hval[], sha1_ctx ctx[1])
{ uint32_t i = (uint32_t)(ctx->count[0] & SHA1_MASK);
/* put bytes in the buffer in an order in which references to */
/* 32-bit words will put bytes with lower addresses into the */
/* top of 32 bit words on BOTH big and little endian machines */
bsw_32(ctx->wbuf, (i + 3) >> 2);
/* we now need to mask valid bytes and add the padding which is */
/* a single 1 bit and as many zero bits as necessary. Note that */
/* we can always add the first padding byte here because the */
/* buffer always has at least one empty slot */
ctx->wbuf[i >> 2] &= 0xffffff80 << 8 * (~i & 3);
ctx->wbuf[i >> 2] |= 0x00000080 << 8 * (~i & 3);
/* we need 9 or more empty positions, one for the padding byte */
/* (above) and eight for the length count. If there is not */
/* enough space, pad and empty the buffer */
if(i > SHA1_BLOCK_SIZE - 9)
{
if(i < 60) ctx->wbuf[15] = 0;
sha1_compile(ctx);
i = 0;
}
else /* compute a word index for the empty buffer positions */
i = (i >> 2) + 1;
while(i < 14) /* and zero pad all but last two positions */
ctx->wbuf[i++] = 0;
/* the following 32-bit length fields are assembled in the */
/* wrong byte order on little endian machines but this is */
/* corrected later since they are only ever used as 32-bit */
/* word values. */
ctx->wbuf[14] = (ctx->count[1] << 3) | (ctx->count[0] >> 29);
ctx->wbuf[15] = ctx->count[0] << 3;
sha1_compile(ctx);
/* extract the hash value as bytes in case the hash buffer is */
/* misaligned for 32-bit words */
for(i = 0; i < SHA1_DIGEST_SIZE; ++i)
hval[i] = (unsigned char)(ctx->hash[i >> 2] >> (8 * (~i & 3)));
}
void sha1(unsigned char hval[], const unsigned char data[], unsigned long len)
{ sha1_ctx cx[1];
sha1_begin(cx); sha1_hash(data, len, cx); sha1_end(hval, cx);
}
#if defined(__cplusplus)
}
#endif

67
arm9/source/sha1.h Normal file
View File

@ -0,0 +1,67 @@
/*
---------------------------------------------------------------------------
Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 01/08/2005
*/
#ifndef _SHA1_H
#define _SHA1_H
#if defined(__cplusplus)
extern "C"
{
#endif
#if 0
} /* Appleasing Emacs */
#endif
#include <stdint.h>
/* Size of SHA1 digest */
#define SHA1_DIGEST_SIZE 20
/* type to hold the SHA1 context */
typedef struct
{ uint32_t count[2];
uint32_t hash[5];
uint32_t wbuf[16];
} sha1_ctx;
void sha1_begin(sha1_ctx ctx[1]);
void sha1_hash(const unsigned char data[], unsigned long len, sha1_ctx ctx[1]);
void sha1_end(unsigned char hval[], sha1_ctx ctx[1]);
void sha1(unsigned char hval[], const unsigned char data[], unsigned long len);
#if defined(__cplusplus)
}
#endif
#endif

43
arm9/source/types.h Normal file
View File

@ -0,0 +1,43 @@
#ifndef __TYPES_H__
#define __TYPES_H__
#include <stdint.h>
#include <stdbool.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
enum flags
{
ExtractFlag = (1<<0),
InfoFlag = (1<<1),
PlainFlag = (1<<2),
VerboseFlag = (1<<3),
VerifyFlag = (1<<4),
RawFlag = (1<<5),
ShowKeysFlag = (1<<6),
DecompressCodeFlag = (1<<7)
};
enum validstate
{
Unchecked = 0,
Good = 1,
Fail = 2,
};
enum sizeunits
{
sizeKB = 0x400,
sizeMB = 0x100000,
};
#endif

194
arm9/source/utils.c Normal file
View File

@ -0,0 +1,194 @@
#include <stdio.h>
#include <sys/statvfs.h>
#include <nds.h>
#include "utils.h"
swiSHA1context_t sha1ctx;
static inline int htoi(char a){
if(a >= '0' && a <= '9'){
return a - '0';
}else if(a >= 'a' && a <= 'f'){
return a - ('a' - 0xa);
}else if(a >= 'A' && a <= 'F'){
return a - ('A' - 0xa);
}else{
return -1;
}
}
int hex2bytes(uint8_t *out, unsigned byte_len, const char *in){
if (strlen(in) < byte_len << 1){
iprintf("%s: invalid input length, expecting %u, got %u.\n",
__FUNCTION__, (unsigned)byte_len << 1, (unsigned)strlen(in));
return -1;
}
for(unsigned i = 0; i < byte_len; ++i){
int h = htoi(*in++), l = htoi(*in++);
if(h == -1 || l == -1){
iprintf("%s: invalid input \"%c%c\"\n",
__FUNCTION__, *(in - 2), *(in - 1));
return -2;
}
*out++ = (h << 4) + l;
}
return 0;
}
static char str_buf[0x10];
const char *to_mebi(size_t size) {
if (size % (1024 * 1024)) {
sprintf(str_buf, "%.2f", (float)(((double)size) / 1024 / 1024));
} else {
siprintf(str_buf, "%u", (unsigned)(size >> 20));
}
return str_buf;
}
int save_file(const char *filename, const void *buffer, size_t size, int save_sha1) {
FILE *f = fopen(filename, "wb");
if (f == 0) {
//iprintf("failed to open %s to write\n", filename);
return -1;
}
size_t written = fwrite(buffer, 1, size, f);
fclose(f);
if (written != size) {
//iprintf("error writting %s\n", filename);
return -2;
} else {
//iprintf("saved %s\n", filename);
}
if (save_sha1) {
sha1ctx.sha_block = 0;
swiSHA1Init(&sha1ctx);
swiSHA1Update(&sha1ctx, buffer, size);
save_sha1_file(filename);
}
return 0;
}
int load_file(void **pbuf, size_t *psize, const char *filename, int verify_sha1, int align) {
FILE *f = fopen(filename, "rb");
if (f == 0) {
//iprintf("failed to open %s to read\n", filename);
return -1;
}
int ret;
fseek(f, 0, SEEK_END);
*psize = ftell(f);
if (*psize == 0) {
*pbuf = 0;
ret = 1;
} else {
if (align) {
*pbuf = memalign(align, *psize);
} else {
*pbuf = malloc(*psize);
}
if (*pbuf == 0) {
//printf("failed to alloc memory\n");
ret = -1;
} else {
fseek(f, 0, SEEK_SET);
unsigned read = fread(*pbuf, 1, *psize, f);
if (read != *psize) {
//iprintf("error reading %s\n", filename);
free(*pbuf);
*pbuf = 0;
ret = -2;
} else {
//iprintf("loaded %s(%u)\n", filename, read);
if (verify_sha1) {
//TODO:
//iprintf("%s: not implemented\n", __FUNCTION__);
}
ret = 0;
}
}
}
fclose(f);
return ret;
}
int load_block_from_file(void *buf, const char *filename, unsigned offset, unsigned size) {
FILE *f = fopen(filename, "rb");
if (f == 0) {
//iprintf("failed to open %s\n", filename);
return -1;
}
unsigned read;
int ret;
if (offset != 0 && fseek(f, offset, SEEK_SET) != 0) {
//printf("seek error\n");
ret = -1;
} else if ((read = fread(buf, 1, size, f)) != size) {
//iprintf("read error, expecting %u, got %u\n", size, read);
ret = -1;
} else {
ret = 0;
}
fclose(f);
return ret;
}
// you should have updated the sha1 context before calling save_sha1_file
// example: save_file() in this file and backup() in nand.c
int save_sha1_file(const char *filename) {
size_t len_fn = strlen(filename);
char *sha1_fn = (char *)malloc(len_fn + 6);
siprintf(sha1_fn, "%s.sha1", filename);
// 20 bytes each use 2 chars, space, asterisk, filename, new line
size_t len_buf = 2 * 20 + 1 + 1 + len_fn + 1;
char *sha1_buf = (char *)malloc(len_buf + 1); // extra for \0
char *p = sha1_buf;
char *digest = (char *)malloc(20);
swiSHA1Final(digest, &sha1ctx);
for (int i = 0; i < 20; ++i) {
p += siprintf(p, "%02X", digest[i]);
}
free(digest);
siprintf(p, " *%s\n", filename);
int ret = save_file(sha1_fn, (u8*)sha1_buf, len_buf, false);
free(sha1_fn);
free(sha1_buf);
return ret;
}
void print_bytes(const void *buf, size_t len) {
const unsigned char *p = (const unsigned char *)buf;
for(size_t i = 0; i < len; ++i) {
iprintf("%02x", *p++);
}
}
// out must be big enough
// can work in place
void utf16_to_ascii(uint8_t *out, const uint16_t *in, unsigned len) {
const uint16_t *end = in + len;
while (in < end){
uint16_t c = *in++;
if (c == 0) {
*out = 0;
break;
} else if (c < 0x80) {
*out++ = (uint8_t)c;
}
}
}
size_t df(const char *path, int verbose) {
// it's amazing libfat even got this to work
struct statvfs s;
statvfs(path, &s);
size_t free = s.f_bsize * s.f_bfree;
if (verbose) {
//iprintf("%s", to_mebi(free));
//iprintf("/%s MB (free/total)\n", to_mebi(s.f_bsize * s.f_blocks));
}
return free;
}

23
arm9/source/utils.h Normal file
View File

@ -0,0 +1,23 @@
#pragma once
#include <nds.h>
#include <stdint.h>
int hex2bytes(uint8_t *out, unsigned byte_len, const char *in);
const char * to_mebi(size_t size);
int save_file(const char *filename, const void *buffer, size_t size, int save_sha1);
int load_file(void **pbuf, size_t *psize, const char *filename, int verify_sha1, int align);
int load_block_from_file(void *buf, const char *filename, unsigned offset, unsigned size);
int save_sha1_file(const char *filename);
void print_bytes(const void *buf, size_t len);
void utf16_to_ascii(uint8_t *out, const uint16_t *in, unsigned len);
size_t df(const char *path, int verbose);