mirror of
https://github.com/Jimmy-Z/bfCL.git
synced 2025-06-18 11:05:49 -04:00
some how it is broken now after some refactoring
This commit is contained in:
parent
e64a85470c
commit
ed7e0ccfd9
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,5 +2,5 @@
|
||||
*.o
|
||||
*.exe
|
||||
*.sln
|
||||
*.vcxproj*
|
||||
*.vc*
|
||||
x64/
|
||||
|
2
Makefile
2
Makefile
@ -1,6 +1,6 @@
|
||||
# only tested in mingw
|
||||
PNAME = bfcl
|
||||
OBJS = $(PNAME).o ocl_util.o utils.o sha1_16.o aes_128.o
|
||||
OBJS = $(PNAME).o ocl_util.o utils.o sha1_16.o aes_128.o ocl_test.o ocl_brute.o
|
||||
CFLAGS += -std=c11 -Wall -O2 -mrdrnd -I$(INTELOCLSDKROOT)/include
|
||||
LDFLAGS += -L$(INTELOCLSDKROOT)/lib/x64
|
||||
|
||||
|
276
bfcl.c
276
bfcl.c
@ -1,285 +1,19 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <immintrin.h>
|
||||
#include "ocl.h"
|
||||
#include "crypto.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <cpuid.h>
|
||||
#elif _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
void ocl_test();
|
||||
|
||||
int cpu_has_rdrand() {
|
||||
#if __GNUC__
|
||||
unsigned a = 0, b = 0, c = 0, d = 0;
|
||||
__get_cpuid(1, &a, &b, &c, &d);
|
||||
return c & bit_RDRND;
|
||||
#elif _MSC_VER
|
||||
int regs[4];
|
||||
__cpuid(regs, 1);
|
||||
return regs[2] & (1<<30);
|
||||
#else
|
||||
// ICL only?
|
||||
return _may_i_use_cpu_feature(_FEATURE_RDRND);
|
||||
#endif
|
||||
}
|
||||
|
||||
// CAUTION: caller is responsible to free the buf
|
||||
char * read_file(const char *file_name, size_t *p_size) {
|
||||
FILE * f = fopen(file_name, "rb");
|
||||
if (f == NULL) {
|
||||
printf("can't read file: %s", file_name);
|
||||
exit(-1);
|
||||
}
|
||||
fseek(f, 0, SEEK_END);
|
||||
*p_size = ftell(f);
|
||||
char * buf = malloc(*p_size);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
fread(buf, *p_size, 1, f);
|
||||
fclose(f);
|
||||
return buf;
|
||||
}
|
||||
|
||||
void read_files(unsigned num_files, const char *file_names[], char *ptrs[], size_t sizes[]) {
|
||||
for (unsigned i = 0; i < num_files; ++i) {
|
||||
ptrs[i] = read_file(file_names[i], &sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_to_file(const char *file_name, const void *buf, size_t len) {
|
||||
FILE *f = fopen(file_name, "wb");
|
||||
if (f == NULL) {
|
||||
printf("can't open file to write: %s\n", file_name);
|
||||
return;
|
||||
}
|
||||
fwrite(buf, len, 1, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
#define TEST_SHA1_16 1
|
||||
#define TEST_AES_128_ECB 2
|
||||
|
||||
#define BLOCK_SIZE 0x10
|
||||
#define NUM_BLOCKS (1 << 23)
|
||||
#define BLOCKS_PER_ITEM 1
|
||||
|
||||
void ocl_test(cl_device_id device_id, const cl_uchar *buf_in, int test_case) {
|
||||
cl_int err;
|
||||
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
||||
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
|
||||
HP_Time t0, t1;
|
||||
long long td;
|
||||
|
||||
const size_t num_items = NUM_BLOCKS / BLOCKS_PER_ITEM;
|
||||
const size_t io_buf_len = NUM_BLOCKS * BLOCK_SIZE;
|
||||
|
||||
const char *source_names[] = { "cl/sha1_16.cl", "cl/aes_tables.cl", "cl/aes_128.cl", "cl/kernels.cl" };
|
||||
const unsigned num_sources = sizeof(source_names) / sizeof(char *);
|
||||
char *sources[sizeof(source_names) / sizeof(char *)];
|
||||
size_t source_sizes[sizeof(source_names) / sizeof(char *)];
|
||||
read_files(num_sources, source_names, sources, source_sizes);
|
||||
|
||||
get_hp_time(&t0);
|
||||
// WTF? GCC complains if I pass char ** in to a function expecting const char **?
|
||||
cl_program program = OCL_ASSERT2(clCreateProgramWithSource(context, num_sources, (const char **)sources, source_sizes, &err));
|
||||
char options[0x100];
|
||||
sprintf(options, "-w -Werror -DBLOCKS_PER_ITEM=%d", BLOCKS_PER_ITEM);
|
||||
// printf("compiler options: %s\n", options);
|
||||
err = clBuildProgram(program, 0, NULL, options, NULL, NULL);
|
||||
get_hp_time(&t1);
|
||||
printf("%d microseconds for compile\n", (int)hp_time_diff(&t0, &t1));
|
||||
for (unsigned i = 0; i < num_sources; ++i) {
|
||||
free(sources[i]);
|
||||
}
|
||||
if (err != CL_SUCCESS) {
|
||||
fprintf(stderr, "failed to build program, error: %s, build log:\n", ocl_err_msg(err));
|
||||
size_t len;
|
||||
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
|
||||
char *buf_log = malloc(len + 1);
|
||||
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, buf_log, NULL);
|
||||
buf_log[len] = '\0';
|
||||
fprintf(stderr, "%s\n", buf_log);
|
||||
free(buf_log);
|
||||
exit(err);
|
||||
}
|
||||
|
||||
const char * test_name;
|
||||
switch (test_case) {
|
||||
case TEST_SHA1_16: test_name = "sha1_16_test"; break;
|
||||
case TEST_AES_128_ECB: test_name = "aes_128_ecb_test"; break;
|
||||
default: exit(-1);
|
||||
}
|
||||
|
||||
printf("%s on %u MB\n", test_name, (unsigned)io_buf_len >> 20);
|
||||
|
||||
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, test_name, &err));
|
||||
|
||||
cl_uchar key[16];
|
||||
unsigned int aes_rk[RK_LEN];
|
||||
if (test_case == TEST_AES_128_ECB) {
|
||||
aes_gen_tables();
|
||||
for (unsigned i = 0; i < 16; ++i) {
|
||||
key[i] = rand() & 0xff;
|
||||
}
|
||||
printf("Key: %s\n", hexdump(key, 16, 0));
|
||||
}
|
||||
|
||||
cl_mem mem_in = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_ONLY, io_buf_len, NULL, &err));
|
||||
cl_mem mem_out = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, io_buf_len, NULL, &err));
|
||||
cl_mem mem_key;
|
||||
if (test_case == TEST_AES_128_ECB) {
|
||||
mem_key = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_ONLY, 16, NULL, &err));
|
||||
}
|
||||
|
||||
get_hp_time(&t0);
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_in, CL_TRUE, 0, io_buf_len, buf_in, 0, NULL, NULL));
|
||||
if (test_case == TEST_AES_128_ECB) {
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_key, CL_TRUE, 0, 16, key, 0, NULL, NULL));
|
||||
}
|
||||
get_hp_time(&t1);
|
||||
td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for data upload, %.2f MB/s\n", (int)td, io_buf_len * 1.0f / td);
|
||||
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_in));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_out));
|
||||
if (test_case == TEST_AES_128_ECB) {
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_key));
|
||||
}
|
||||
|
||||
size_t local;
|
||||
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
||||
printf("local work size: %u\n", (unsigned)local);
|
||||
|
||||
get_hp_time(&t0);
|
||||
// apparently, setting local work size to NULL doesn't affect performance, at least in this kind of work
|
||||
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
||||
// OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, NULL, 0, NULL, NULL));
|
||||
clFinish(command_queue);
|
||||
get_hp_time(&t1);
|
||||
td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for OpenCL, %.2f MB/s\n", (int)td, io_buf_len * 1.0f / td);
|
||||
|
||||
cl_uchar *buf_out = malloc(io_buf_len);
|
||||
get_hp_time(&t0);
|
||||
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, io_buf_len, buf_out, 0, NULL, NULL));
|
||||
get_hp_time(&t1);
|
||||
td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for data download, %.2f MB/s\n", (int)td, io_buf_len * 1.0f / td);
|
||||
|
||||
/*
|
||||
if(test_case == TEST_AES_128_ECB){
|
||||
dump_to_file("r:/test_aes_in.bin", buf_in, io_buf_len);
|
||||
dump_to_file("r:/test_aes_out.bin", buf_out, io_buf_len);
|
||||
}
|
||||
*/
|
||||
|
||||
cl_uchar *buf_verify = malloc(io_buf_len);
|
||||
get_hp_time(&t0);
|
||||
if (test_case == TEST_SHA1_16) {
|
||||
for (unsigned offset = 0; offset < io_buf_len; offset += BLOCK_SIZE) {
|
||||
sha1_16(buf_in + offset, buf_verify + offset);
|
||||
}
|
||||
} else {
|
||||
for (unsigned offset = 0; offset < io_buf_len; offset += BLOCK_SIZE) {
|
||||
// setting the same key over and over is stupid
|
||||
// yet we still do it to keep it in line with the OpenCL port
|
||||
// otherwise we can't test the set key in OpenCL
|
||||
aes_set_key_enc_128(aes_rk, key);
|
||||
aes_encrypt_128(aes_rk, buf_in + offset, buf_verify + offset);
|
||||
}
|
||||
}
|
||||
get_hp_time(&t1);
|
||||
td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for C(single thread), %.2f MB/s\n", (int)td, io_buf_len * 1.0f / td);
|
||||
|
||||
if (memcmp(buf_verify, buf_out, io_buf_len)) {
|
||||
printf("%s: verification failed\n", test_name);
|
||||
for (unsigned offset = 0; offset < io_buf_len; offset += BLOCK_SIZE) {
|
||||
if (memcmp(buf_verify + offset, buf_out + offset, BLOCK_SIZE)) {
|
||||
printf("first difference @ 0x%08x/0x%08x:\n", offset, (unsigned)num_items );
|
||||
printf("\t%s\n", hexdump(buf_in + offset, BLOCK_SIZE, 0));
|
||||
printf("\t%s\n", hexdump(buf_verify + offset, BLOCK_SIZE, 0));
|
||||
printf("\t%s\n", hexdump(buf_out + offset, BLOCK_SIZE, 0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
printf("%s: succeed\n", test_name);
|
||||
}
|
||||
|
||||
free(buf_out);
|
||||
clReleaseMemObject(mem_in);
|
||||
clReleaseMemObject(mem_out);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseCommandQueue(command_queue);
|
||||
clReleaseContext(context);
|
||||
}
|
||||
void ocl_brute();
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
if (argc == 2 && !strcmp(argv[1], "info")) {
|
||||
cl_uint num_platforms;
|
||||
ocl_info(&num_platforms, 1);
|
||||
} else if (argc == 2 && !strcmp(argv[1], "console_id")){
|
||||
ocl_brute();
|
||||
} else if (argc == 1){
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
ocl_get_device(&platform_id, &device_id);
|
||||
if (platform_id == NULL || device_id == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_uchar *buf_in = malloc(BLOCK_SIZE * NUM_BLOCKS);
|
||||
srand(2501);
|
||||
HP_Time t0, t1; long long td;
|
||||
get_hp_time(&t0);
|
||||
if(cpu_has_rdrand()){
|
||||
// ~190 MB/s @ X230, ~200 without the success check
|
||||
printf("randomize source buffer using RDRAND\n");
|
||||
unsigned long long *p = (unsigned long long *)buf_in;
|
||||
unsigned long long *p_end = (unsigned long long *)(buf_in + BLOCK_SIZE * NUM_BLOCKS);
|
||||
int success = 1;
|
||||
while (p < p_end) {
|
||||
success &= _rdrand64_step(p++);
|
||||
}
|
||||
if (!success) {
|
||||
printf("RDRND failed\n");
|
||||
exit(-1);
|
||||
}
|
||||
}else {
|
||||
printf("randomize source buffer using AES OFB\n");
|
||||
// rand() & 0xff is about ~60 MB/s @ X230
|
||||
// it's worse than that AES single thread C, so OFB it is
|
||||
// ~240 MB/s, even faster than RDRAND
|
||||
srand(2501);
|
||||
unsigned int aes_rk[RK_LEN];
|
||||
unsigned char key_iv[16 * 2];
|
||||
for (unsigned i = 0; i < 16 * 2; ++i) {
|
||||
key_iv[i] = rand() & 0xff;
|
||||
}
|
||||
aes_set_key_enc_128(aes_rk, key_iv);
|
||||
aes_encrypt_128(aes_rk, key_iv + 16, buf_in);
|
||||
unsigned char *p_in = buf_in, *p_out = buf_in + 16,
|
||||
*p_end = buf_in + BLOCK_SIZE * NUM_BLOCKS;
|
||||
while (p_out < p_end) {
|
||||
aes_encrypt_128(aes_rk, p_in, p_out);
|
||||
p_in = p_out;
|
||||
p_out += 16;
|
||||
}
|
||||
}
|
||||
get_hp_time(&t1);
|
||||
td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for preparing test data, %.2f MB/s\n",
|
||||
(int)td, BLOCK_SIZE * NUM_BLOCKS * 1.0f / td);
|
||||
|
||||
ocl_test(device_id, buf_in, TEST_SHA1_16);
|
||||
ocl_test(device_id, buf_in, TEST_AES_128_ECB);
|
||||
|
||||
free(buf_in);
|
||||
ocl_test();
|
||||
#ifdef _WIN32
|
||||
system("pause");
|
||||
#endif
|
||||
|
@ -1,18 +1,9 @@
|
||||
|
||||
// OpenCL has these fancy address space qualifiers that can't be cast without
|
||||
#define GET_UINT32_LE(n, b, i) \
|
||||
(n) = *(uint32_t*)(b + i)
|
||||
#define GET_UINT32_LE_G(n, b, i) \
|
||||
(n) = *(__global uint32_t*)(b + i)
|
||||
#define GET_UINT32_LE_C(n, b, i) \
|
||||
(n) = *(__constant uint32_t*)(b + i)
|
||||
#define PUT_UINT32_LE(n, b, i) \
|
||||
*(uint32_t*)(b + i) = (n)
|
||||
#define PUT_UINT32_LE_G(n, b, i) \
|
||||
*(__global uint32_t*)(b + i) = (n)
|
||||
// AES 128 ECB adapted for OpenCL, see "aes_128.c" for more info
|
||||
|
||||
#define RK_LEN 44
|
||||
|
||||
// the caller is responsible to put the key in rk
|
||||
void aes_set_key_enc_128(uint32_t rk[RK_LEN])
|
||||
{
|
||||
uint32_t *RK = rk;
|
||||
|
@ -1,8 +1,11 @@
|
||||
|
||||
// I'm obsessed with the idea of generating this file in memory
|
||||
// on the other hand I know that's pointless...
|
||||
|
||||
/*
|
||||
* Forward S-box
|
||||
*/
|
||||
__constant static const unsigned char FSb[256] =
|
||||
static const unsigned char FSb[256] =
|
||||
{
|
||||
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
|
||||
0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
||||
@ -109,19 +112,19 @@ __constant static const unsigned char FSb[256] =
|
||||
V(CB,B0,B0,7B), V(FC,54,54,A8), V(D6,BB,BB,6D), V(3A,16,16,2C)
|
||||
|
||||
#define V(a,b,c,d) 0x##a##b##c##d
|
||||
__constant static const uint32_t FT0[256] = { FT };
|
||||
static const uint32_t FT0[256] = { FT };
|
||||
#undef V
|
||||
|
||||
#define V(a,b,c,d) 0x##b##c##d##a
|
||||
__constant static const uint32_t FT1[256] = { FT };
|
||||
static const uint32_t FT1[256] = { FT };
|
||||
#undef V
|
||||
|
||||
#define V(a,b,c,d) 0x##c##d##a##b
|
||||
__constant static const uint32_t FT2[256] = { FT };
|
||||
static const uint32_t FT2[256] = { FT };
|
||||
#undef V
|
||||
|
||||
#define V(a,b,c,d) 0x##d##a##b##c
|
||||
__constant static const uint32_t FT3[256] = { FT };
|
||||
static const uint32_t FT3[256] = { FT };
|
||||
#undef V
|
||||
|
||||
#undef FT
|
||||
@ -129,7 +132,7 @@ __constant static const uint32_t FT3[256] = { FT };
|
||||
/*
|
||||
* Round constants
|
||||
*/
|
||||
__constant static const uint32_t RCON[10] =
|
||||
static const uint32_t RCON[10] =
|
||||
{
|
||||
0x00000001, 0x00000002, 0x00000004, 0x00000008,
|
||||
0x00000010, 0x00000020, 0x00000040, 0x00000080,
|
||||
|
39
cl/common.h
Normal file
39
cl/common.h
Normal file
@ -0,0 +1,39 @@
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned long u64;
|
||||
|
||||
#ifndef GET_UINT32_BE
|
||||
#define GET_UINT32_BE(n,b,i) \
|
||||
{ \
|
||||
(n) = ( (uint32_t) (b)[(i) ] << 24 ) \
|
||||
| ( (uint32_t) (b)[(i) + 1] << 16 ) \
|
||||
| ( (uint32_t) (b)[(i) + 2] << 8 ) \
|
||||
| ( (uint32_t) (b)[(i) + 3] ); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef PUT_UINT32_BE
|
||||
#define PUT_UINT32_BE(n,b,i) \
|
||||
{ \
|
||||
(b)[(i) ] = (unsigned char) ( (n) >> 24 ); \
|
||||
(b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \
|
||||
(b)[(i) + 2] = (unsigned char) ( (n) >> 8 ); \
|
||||
(b)[(i) + 3] = (unsigned char) ( (n) ); \
|
||||
}
|
||||
#endif
|
||||
|
||||
// OpenCL has these fancy address space qualifiers that can't be cast without
|
||||
#define GET_UINT32_LE(n, b, i) \
|
||||
(n) = *(uint32_t*)(b + i)
|
||||
#define GET_UINT32_LE_G(n, b, i) \
|
||||
(n) = *(__global uint32_t*)(b + i)
|
||||
#define GET_UINT32_LE_C(n, b, i) \
|
||||
(n) = *(__constant uint32_t*)(b + i)
|
||||
#define PUT_UINT32_LE(n, b, i) \
|
||||
*(uint32_t*)(b + i) = (n)
|
||||
#define PUT_UINT32_LE_G(n, b, i) \
|
||||
*(__global uint32_t*)(b + i) = (n)
|
||||
|
67
cl/dsi.cl
Normal file
67
cl/dsi.cl
Normal file
@ -0,0 +1,67 @@
|
||||
|
||||
// more about this: https://github.com/Jimmy-Z/TWLbf/blob/master/dsi.c
|
||||
|
||||
static const u64 DSi_KEY_Y[2] =
|
||||
{0xbd4dc4d30ab9dc76ull, 0xe1a00005202ddd1dull};
|
||||
|
||||
static const u64 DSi_KEY_MAGIC[2] =
|
||||
{0x2a680f5f1a4f3e79ull, 0xfffefb4e29590258ull};
|
||||
|
||||
static inline void xor_128(u64 *x, const u64 *a, const u64 *b){
|
||||
x[0] = a[0] ^ b[0];
|
||||
x[1] = a[1] ^ b[1];
|
||||
}
|
||||
|
||||
static inline void add_128(u64 *a, const u64 *b){
|
||||
a[0] += b[0];
|
||||
if(a[0] < b[0]){
|
||||
a[1] += b[1] + 1;
|
||||
}else{
|
||||
a[1] += b[1];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void add_128_64(u64 *a, u64 b){
|
||||
a[0] += b;
|
||||
if(a[0] < b){
|
||||
a[1] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Answer to life, universe and everything.
|
||||
static inline void rol42_128(u64 *a){
|
||||
u64 t = a[1];
|
||||
a[1] = (t << 42 ) | (a[0] >> 22);
|
||||
a[0] = (a[0] << 42 ) | (t >> 22);
|
||||
}
|
||||
|
||||
// eMMC Encryption for MBR/Partitions (AES-CTR, with console-specific key)
|
||||
void dsi_make_key(u64 *key, u64 console_id){
|
||||
u32 h = console_id >> 32, l = (u32)console_id;
|
||||
u32 key_x[4] = {l, l ^ 0x24ee6906, h ^ 0xe65b601d, h};
|
||||
// Key = ((Key_X XOR Key_Y) + FFFEFB4E295902582A680F5F1A4F3E79h) ROL 42
|
||||
// equivalent to F_XY in twltool/f_xy.c
|
||||
xor_128(key, (u64*)key_x, DSi_KEY_Y);
|
||||
add_128(key, DSi_KEY_MAGIC);
|
||||
rol42_128(key);
|
||||
}
|
||||
|
||||
// CAUTION this one doesn't work in-place
|
||||
void byte_reverse_16(u8 *out, const u8 *in){
|
||||
out[0] = in[15];
|
||||
out[1] = in[14];
|
||||
out[2] = in[13];
|
||||
out[3] = in[12];
|
||||
out[4] = in[11];
|
||||
out[5] = in[10];
|
||||
out[6] = in[9];
|
||||
out[7] = in[8];
|
||||
out[8] = in[7];
|
||||
out[9] = in[6];
|
||||
out[10] = in[5];
|
||||
out[11] = in[4];
|
||||
out[12] = in[3];
|
||||
out[13] = in[2];
|
||||
out[14] = in[1];
|
||||
out[15] = in[0];
|
||||
}
|
31
cl/kernel_console_id.cl
Normal file
31
cl/kernel_console_id.cl
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
// the caller should feed the target xor pad byte reversed as two uint64_t
|
||||
// the ctr from emmc_cid_sha1 byte reversed as 4 uint32_t
|
||||
__kernel void test_console_id(
|
||||
u64 xor_l, u64 xor_h,
|
||||
u64 console_id_template,
|
||||
u32 ctr0, u32 ctr1, u32 ctr2, u32 ctr3,
|
||||
__global int *success,
|
||||
__global u64 *console_id_out)
|
||||
{
|
||||
if(success){
|
||||
return;
|
||||
}
|
||||
// TODO: BCD conversion
|
||||
u64 console_id = get_global_id(0) | console_id_template;
|
||||
u64 dsi_key[2];
|
||||
dsi_make_key(dsi_key, console_id);
|
||||
|
||||
u32 aes_rk[RK_LEN];
|
||||
byte_reverse_16((u8*)aes_rk, (u8*)dsi_key);
|
||||
aes_set_key_enc_128(aes_rk);
|
||||
|
||||
u32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
|
||||
u64 xor[2];
|
||||
aes_encrypt_128(aes_rk, ctr, (u32*)xor);
|
||||
|
||||
if(xor[0] == xor_l && xor[1] == xor_h){
|
||||
*success = 1;
|
||||
*console_id_out = console_id;
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
|
||||
__kernel void sha1_16_test(
|
||||
__global const unsigned char *in,
|
||||
__constant const unsigned char *in,
|
||||
__global unsigned char *out)
|
||||
{
|
||||
unsigned offset = get_global_id(0) * BLOCKS_PER_ITEM * 16;
|
||||
@ -26,9 +26,9 @@ __kernel void sha1_16_test(
|
||||
#define AES_BLOCK_SIZE 16
|
||||
|
||||
__kernel void aes_128_ecb_test(
|
||||
__global const uint32_t *in,
|
||||
__global uint32_t *out,
|
||||
__constant const uint32_t *key)
|
||||
__constant const uint32_t *key,
|
||||
__constant const uint32_t *in,
|
||||
__global uint32_t *out)
|
||||
{
|
||||
uint32_t rk[RK_LEN];
|
||||
rk[0] = key[0]; rk[1] = key[1]; rk[2] = key[2]; rk[3] = key[3];
|
||||
@ -43,7 +43,7 @@ __kernel void aes_128_ecb_test(
|
||||
aes_encrypt_128(rk, buf, buf);
|
||||
out[0] = buf[0]; out[1] = buf[1]; out[2] = buf[2]; out[3] = buf[3];
|
||||
#if BLOCKS_PER_ITEM != 1
|
||||
offset += AES_BLOCK_SIZE;
|
||||
offset += AES_BLOCK_SIZE / 4;
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,25 +1,5 @@
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
#ifndef GET_UINT32_BE
|
||||
#define GET_UINT32_BE(n,b,i) \
|
||||
{ \
|
||||
(n) = ( (uint32_t) (b)[(i) ] << 24 ) \
|
||||
| ( (uint32_t) (b)[(i) + 1] << 16 ) \
|
||||
| ( (uint32_t) (b)[(i) + 2] << 8 ) \
|
||||
| ( (uint32_t) (b)[(i) + 3] ); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef PUT_UINT32_BE
|
||||
#define PUT_UINT32_BE(n,b,i) \
|
||||
{ \
|
||||
(b)[(i) ] = (unsigned char) ( (n) >> 24 ); \
|
||||
(b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \
|
||||
(b)[(i) + 2] = (unsigned char) ( (n) >> 8 ); \
|
||||
(b)[(i) + 3] = (unsigned char) ( (n) ); \
|
||||
}
|
||||
#endif
|
||||
// sha1_16 adapted for OpenCL, see "sha1_16.c" for more information
|
||||
|
||||
__constant const uint32_t
|
||||
h0 = 0x67452301,
|
||||
|
4
ocl.h
4
ocl.h
@ -46,3 +46,7 @@ void ocl_assert(cl_int ret, const char * code, const char * file,
|
||||
OCL_Platform *ocl_info(cl_uint *p_num_platforms, int verbose);
|
||||
|
||||
void ocl_get_device(cl_platform_id *p_platform_id, cl_device_id *p_device_id);
|
||||
|
||||
cl_program ocl_build_from_sources(
|
||||
unsigned num_sources, const char *source_names[],
|
||||
cl_context context, cl_device_id device_id, const char * options);
|
||||
|
77
ocl_brute.c
Normal file
77
ocl_brute.c
Normal file
@ -0,0 +1,77 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include "utils.h"
|
||||
#include "crypto.h"
|
||||
#include "ocl.h"
|
||||
|
||||
void ocl_brute() {
|
||||
TimeHP t0, t1; long long td;
|
||||
|
||||
cl_int err;
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
ocl_get_device(&platform_id, &device_id);
|
||||
if (platform_id == NULL || device_id == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
||||
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
|
||||
const char *source_names[] = {
|
||||
"cl/common.h",
|
||||
"cl/aes_tables.cl",
|
||||
"cl/aes_128.cl",
|
||||
"cl/dsi.cl",
|
||||
"cl/kernel_console_id.cl" };
|
||||
cl_program program = ocl_build_from_sources(sizeof(source_names) / sizeof(char *),
|
||||
source_names, context, device_id, NULL /* "-w -Werror" */);
|
||||
|
||||
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, "test_console_id", &err));
|
||||
|
||||
cl_ulong xor_l, xor_h, console_id_template = 0x08a1522617110100ull, out;
|
||||
cl_uint ctr0, ctr1, ctr2, ctr3;
|
||||
cl_int success = 0;
|
||||
|
||||
cl_mem mem_success =
|
||||
OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err));
|
||||
cl_mem mem_out =
|
||||
OCL_ASSERT2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_ulong), NULL, &err));
|
||||
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_success, CL_TRUE, 0, sizeof(cl_int), &success, 0, NULL, NULL));
|
||||
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_ulong), &xor_l));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &xor_h));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_ulong), &console_id_template));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 3, sizeof(cl_uint), &ctr0));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 4, sizeof(cl_uint), &ctr1));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 5, sizeof(cl_uint), &ctr2));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 6, sizeof(cl_uint), &ctr3));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 7, sizeof(cl_mem), &mem_success));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 8, sizeof(cl_mem), &mem_out));
|
||||
|
||||
size_t local;
|
||||
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
||||
printf("local work size: %u\n", (unsigned)local);
|
||||
|
||||
size_t num_items = 0x100;
|
||||
|
||||
get_hp_time(&t0);
|
||||
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
||||
clFinish(command_queue);
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
|
||||
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_success, CL_TRUE, 0, sizeof(cl_int), &success, 0, NULL, NULL));
|
||||
if (success) {
|
||||
// if success, the speed measurement is invalid
|
||||
printf("got a hit in %d microseconds\n", (int)td);
|
||||
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_ulong), &out, 0, NULL, NULL));
|
||||
printf("%08x%08x\n", (unsigned)(out >> 32), (unsigned)(out|0xffffffffu));
|
||||
} else {
|
||||
printf("%d microseconds, %.2f M/s\n", (int)td, num_items * 1.0f / td);
|
||||
printf("sorry, no hit\n");
|
||||
}
|
||||
|
||||
clReleaseKernel(kernel);
|
||||
}
|
197
ocl_test.c
Normal file
197
ocl_test.c
Normal file
@ -0,0 +1,197 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include "utils.h"
|
||||
#include "ocl.h"
|
||||
#include "crypto.h"
|
||||
|
||||
#define BLOCK_SIZE 0x10
|
||||
#define NUM_BLOCKS (1 << 23)
|
||||
#define BLOCKS_PER_ITEM 1
|
||||
#define NUM_ITEMS (NUM_BLOCKS / BLOCKS_PER_ITEM)
|
||||
#define BUF_SIZE (BLOCK_SIZE * NUM_BLOCKS)
|
||||
|
||||
void ocl_test_run_and_read(const char * test_name, cl_kernel kernel,
|
||||
cl_device_id device_id, cl_command_queue command_queue,
|
||||
cl_mem mem_out, cl_uchar *buf_out)
|
||||
{
|
||||
printf("%s on %u MB\n", test_name, (unsigned)BUF_SIZE >> 20);
|
||||
TimeHP t0, t1; long long td;
|
||||
size_t local;
|
||||
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
||||
printf("local work size: %u\n", (unsigned)local);
|
||||
|
||||
size_t num_items = NUM_ITEMS;
|
||||
|
||||
get_hp_time(&t0);
|
||||
// apparently, setting local work size to NULL doesn't affect performance, at least in this kind of work
|
||||
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
||||
// OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, NULL, 0, NULL, NULL));
|
||||
clFinish(command_queue);
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for OpenCL, %.2f MB/s\n", (int)td, BUF_SIZE * 1.0f / td);
|
||||
|
||||
get_hp_time(&t0);
|
||||
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, BUF_SIZE, buf_out, 0, NULL, NULL));
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for data download, %.2f MB/s\n", (int)td, BUF_SIZE * 1.0f / td);
|
||||
|
||||
clReleaseKernel(kernel);
|
||||
}
|
||||
|
||||
void verify(const char *test_name, cl_uchar *buf_in, cl_uchar *buf_out, cl_uchar *buf_verify){
|
||||
if (memcmp(buf_verify, buf_out, BUF_SIZE)) {
|
||||
printf("%s: verification failed\n", test_name);
|
||||
unsigned count = 5;
|
||||
for (unsigned offset = 0; offset < BUF_SIZE; offset += BLOCK_SIZE) {
|
||||
if (memcmp(buf_verify + offset, buf_out + offset, BLOCK_SIZE)) {
|
||||
printf("difference @ 0x%08x/0x%08x:\n", offset, (unsigned)NUM_BLOCKS);
|
||||
printf("\t%s\n", hexdump(buf_in + offset, BLOCK_SIZE, 0));
|
||||
printf("\t%s\n", hexdump(buf_out + offset, BLOCK_SIZE, 0));
|
||||
printf("\t%s\n", hexdump(buf_verify + offset, BLOCK_SIZE, 0));
|
||||
if (!--count) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
printf("%s: succeed\n", test_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ocl_test() {
|
||||
TimeHP t0, t1; long long td;
|
||||
|
||||
cl_int err;
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id device_id;
|
||||
ocl_get_device(&platform_id, &device_id);
|
||||
if (platform_id == NULL || device_id == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_uchar *buf_in = malloc(BUF_SIZE);
|
||||
cl_uchar *buf_out = malloc(BUF_SIZE);
|
||||
cl_uchar *buf_verify = malloc(BUF_SIZE);
|
||||
|
||||
srand(2501);
|
||||
cl_uchar key[16];
|
||||
unsigned int aes_rk[RK_LEN];
|
||||
aes_gen_tables();
|
||||
for (unsigned i = 0; i < 16; ++i) {
|
||||
key[i] = rand() & 0xff;
|
||||
}
|
||||
printf("AES Key: %s\n", hexdump(key, 16, 0));
|
||||
get_hp_time(&t0);
|
||||
if(cpu_has_rdrand()){
|
||||
// ~190 MB/s @ X230, ~200 without the success check
|
||||
printf("randomize source buffer using RDRAND\n");
|
||||
if (!rdrand_fill((cl_ulong*)buf_in, BUF_SIZE >> 3)) {
|
||||
printf("RDRND failed\n");
|
||||
exit(-1);
|
||||
}
|
||||
}else {
|
||||
printf("randomize source buffer using AES OFB\n");
|
||||
// rand() & 0xff is about ~60 MB/s @ X230
|
||||
// it's worse than that AES single thread C, so OFB it is
|
||||
// ~240 MB/s, even faster than RDRAND
|
||||
unsigned int aes_rk[RK_LEN];
|
||||
unsigned char key_iv[16 * 2];
|
||||
for (unsigned i = 0; i < 16 * 2; ++i) {
|
||||
key_iv[i] = rand() & 0xff;
|
||||
}
|
||||
aes_set_key_enc_128(aes_rk, key_iv);
|
||||
aes_encrypt_128(aes_rk, key_iv + 16, buf_in);
|
||||
unsigned char *p_in = buf_in, *p_out = buf_in + 16,
|
||||
*p_end = buf_in + BUF_SIZE;
|
||||
while (p_out < p_end) {
|
||||
aes_encrypt_128(aes_rk, p_in, p_out);
|
||||
p_in = p_out;
|
||||
p_out += 16;
|
||||
}
|
||||
}
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for preparing test data, %.2f MB/s\n",
|
||||
(int)td, BUF_SIZE * 1.0f / td);
|
||||
|
||||
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
||||
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
|
||||
const char *source_names[] = {
|
||||
"cl/common.h",
|
||||
"cl/sha1_16.cl",
|
||||
"cl/aes_tables.cl",
|
||||
"cl/aes_128.cl",
|
||||
"cl/kernel_tests.cl" };
|
||||
char options[0x100];
|
||||
sprintf(options, "-w -Werror -DBLOCKS_PER_ITEM=%u", BLOCKS_PER_ITEM);
|
||||
cl_program program = ocl_build_from_sources(sizeof(source_names) / sizeof(char *),
|
||||
source_names, context, device_id, options);
|
||||
|
||||
// create buffer and upload data
|
||||
cl_mem mem_key = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_ONLY, 16, NULL, &err));
|
||||
cl_mem mem_in = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_ONLY, BUF_SIZE, NULL, &err));
|
||||
cl_mem mem_out = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, BUF_SIZE, NULL, &err));
|
||||
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_key, CL_TRUE, 0, 16, key, 0, NULL, NULL));
|
||||
get_hp_time(&t0);
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_in, CL_TRUE, 0, BUF_SIZE, buf_in, 0, NULL, NULL));
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for data upload, %.2f MB/s\n", (int)td, BUF_SIZE * 1.0f / td);
|
||||
|
||||
// SHA1_16 test
|
||||
const char * test_name = "sha1_16_test";
|
||||
|
||||
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, test_name, &err));
|
||||
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_in));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_out));
|
||||
|
||||
ocl_test_run_and_read(test_name, kernel, device_id, command_queue, mem_out, buf_out);
|
||||
|
||||
get_hp_time(&t0);
|
||||
for (unsigned offset = 0; offset < BUF_SIZE; offset += BLOCK_SIZE) {
|
||||
sha1_16(buf_in + offset, buf_verify + offset);
|
||||
}
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for C(single thread), %.2f MB/s\n", (int)td, BUF_SIZE * 1.0f / td);
|
||||
|
||||
verify(test_name, buf_in, buf_out, buf_verify);
|
||||
|
||||
// AES 128 ECB test
|
||||
test_name = "aes_128_ecb_test";
|
||||
|
||||
kernel = OCL_ASSERT2(clCreateKernel(program, test_name, &err));
|
||||
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_key));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_in));
|
||||
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_out));
|
||||
|
||||
ocl_test_run_and_read(test_name, kernel, device_id, command_queue, mem_out, buf_out);
|
||||
/*
|
||||
if(test_case == TEST_AES_128_ECB){
|
||||
dump_to_file("r:/test_aes_in.bin", buf_in, io_buf_len);
|
||||
dump_to_file("r:/test_aes_out.bin", buf_out, io_buf_len);
|
||||
}
|
||||
*/
|
||||
get_hp_time(&t0);
|
||||
for (unsigned offset = 0; offset < BUF_SIZE; offset += BLOCK_SIZE) {
|
||||
// setting the same key over and over is stupid
|
||||
// yet we still do it to make the results comparable
|
||||
aes_set_key_enc_128(aes_rk, key);
|
||||
aes_encrypt_128(aes_rk, buf_in + offset, buf_verify + offset);
|
||||
}
|
||||
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
||||
printf("%d microseconds for C(single thread), %.2f MB/s\n", (int)td, BUF_SIZE * 1.0f / td);
|
||||
|
||||
verify(test_name, buf_in, buf_out, buf_verify);
|
||||
|
||||
// cleanup
|
||||
free(buf_in); free(buf_out); free(buf_verify);
|
||||
clReleaseMemObject(mem_in);
|
||||
clReleaseMemObject(mem_out);
|
||||
clReleaseMemObject(mem_key);
|
||||
clReleaseProgram(program);
|
||||
clReleaseCommandQueue(command_queue);
|
||||
clReleaseContext(context);
|
||||
}
|
40
ocl_util.c
40
ocl_util.c
@ -4,6 +4,7 @@
|
||||
#include <malloc.h>
|
||||
#include <CL/cl_ext.h>
|
||||
#include "ocl.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define STATIC_ASSERT(c) static_assert(c, #c)
|
||||
STATIC_ASSERT(sizeof(char) == sizeof(cl_char));
|
||||
@ -192,3 +193,42 @@ void ocl_get_device(cl_platform_id *p_platform_id, cl_device_id *p_device_id) {
|
||||
*p_device_id = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
cl_program ocl_build_from_sources(
|
||||
unsigned num_sources, const char *source_names[],
|
||||
cl_context context, cl_device_id device_id, const char * options)
|
||||
{
|
||||
TimeHP t0, t1;
|
||||
cl_int err;
|
||||
// read sources
|
||||
char **sources = malloc(sizeof(char*) * num_sources);
|
||||
size_t *source_sizes = malloc(sizeof(size_t) * num_sources);
|
||||
read_files(num_sources, source_names, sources, source_sizes);
|
||||
|
||||
// compile
|
||||
get_hp_time(&t0);
|
||||
// WTF? GCC complains if I pass char ** to a function expecting const char **?
|
||||
cl_program program = OCL_ASSERT2(clCreateProgramWithSource(context, num_sources,
|
||||
(const char **)sources, source_sizes, &err));
|
||||
// printf("compiler options: %s\n", options);
|
||||
err = clBuildProgram(program, 0, NULL, options, NULL, NULL);
|
||||
get_hp_time(&t1);
|
||||
printf("%d microseconds for compile\n", (int)hp_time_diff(&t0, &t1));
|
||||
if (err != CL_SUCCESS) {
|
||||
fprintf(stderr, "failed to build program, error: %s, build log:\n", ocl_err_msg(err));
|
||||
size_t len;
|
||||
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
|
||||
char *buf_log = malloc(len + 1);
|
||||
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, buf_log, NULL);
|
||||
buf_log[len] = '\0';
|
||||
fprintf(stderr, "%s\n", buf_log);
|
||||
free(buf_log);
|
||||
exit(err);
|
||||
}
|
||||
for (unsigned i = 0; i < num_sources; ++i) {
|
||||
free(sources[i]);
|
||||
}
|
||||
free(sources);
|
||||
free(source_sizes);
|
||||
return program;
|
||||
}
|
||||
|
68
utils.c
68
utils.c
@ -3,8 +3,15 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <immintrin.h>
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <cpuid.h>
|
||||
#elif _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
int htoi(char a){
|
||||
if(a >= '0' && a <= '9'){
|
||||
return a - '0';
|
||||
@ -42,7 +49,7 @@ int hex2bytes(unsigned char *out, unsigned byte_len, const char *in, int critica
|
||||
#endif
|
||||
|
||||
static char hexdump_buf[HEXDUMP_BUF_SIZE];
|
||||
// CAUTION, this always assume you have a buffer big enough
|
||||
// CAUTION, this always assume the buffer is big enough
|
||||
const char *hexdump(const void *b, unsigned l, int space){
|
||||
const unsigned char *p = (unsigned char*)b;
|
||||
char *out = hexdump_buf;
|
||||
@ -80,4 +87,61 @@ long long hp_time_diff(struct timeval *pt0, struct timeval *pt1) {
|
||||
return diff;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// CAUTION: caller is responsible to free the buf
|
||||
char * read_file(const char *file_name, size_t *p_size) {
|
||||
FILE * f = fopen(file_name, "rb");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "can't read file: %s", file_name);
|
||||
exit(-1);
|
||||
}
|
||||
fseek(f, 0, SEEK_END);
|
||||
*p_size = ftell(f);
|
||||
char * buf = malloc(*p_size);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
fread(buf, *p_size, 1, f);
|
||||
fclose(f);
|
||||
return buf;
|
||||
}
|
||||
|
||||
void read_files(unsigned num_files, const char *file_names[], char *ptrs[], size_t sizes[]) {
|
||||
for (unsigned i = 0; i < num_files; ++i) {
|
||||
ptrs[i] = read_file(file_names[i], &sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_to_file(const char *file_name, const void *buf, size_t len) {
|
||||
FILE *f = fopen(file_name, "wb");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "can't open file to write: %s\n", file_name);
|
||||
return;
|
||||
}
|
||||
fwrite(buf, len, 1, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
int cpu_has_rdrand() {
|
||||
#if __GNUC__
|
||||
unsigned a = 0, b = 0, c = 0, d = 0;
|
||||
__get_cpuid(1, &a, &b, &c, &d);
|
||||
return c & bit_RDRND;
|
||||
#elif _MSC_VER
|
||||
int regs[4];
|
||||
__cpuid(regs, 1);
|
||||
return regs[2] & (1<<30);
|
||||
#else
|
||||
// ICL only?
|
||||
return _may_i_use_cpu_feature(_FEATURE_RDRND);
|
||||
#endif
|
||||
}
|
||||
|
||||
// input must be multiple of uint64_t
|
||||
int rdrand_fill(unsigned long long *p, size_t size) {
|
||||
unsigned long long *p_end = p + size;
|
||||
int success = 1;
|
||||
while (p < p_end) {
|
||||
success &= _rdrand64_step(p++);
|
||||
}
|
||||
return success;
|
||||
}
|
18
utils.h
18
utils.h
@ -5,19 +5,29 @@
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <Windows.h>
|
||||
typedef LARGE_INTEGER HP_Time;
|
||||
typedef LARGE_INTEGER TimeHP;
|
||||
#define get_hp_time QueryPerformanceCounter
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/time.h>
|
||||
typedef struct timeval HP_Time;
|
||||
void get_hp_time(HP_Time *pt);
|
||||
typedef struct timeval TimeHP;
|
||||
void get_hp_time(TimeHP *pt);
|
||||
|
||||
#endif
|
||||
|
||||
long long hp_time_diff(HP_Time *pt0, HP_Time *pt1);
|
||||
long long hp_time_diff(TimeHP *pt0, TimeHP *pt1);
|
||||
|
||||
int hex2bytes(unsigned char *out, unsigned byte_len, const char *in, int critical);
|
||||
|
||||
const char * hexdump(const void *a, unsigned l, int space);
|
||||
|
||||
char * read_file(const char *file_name, size_t *p_size);
|
||||
|
||||
void read_files(unsigned num_files, const char *file_names[], char *ptrs[], size_t sizes[]);
|
||||
|
||||
void dump_to_file(const char *file_name, const void *buf, size_t len);
|
||||
|
||||
int cpu_has_rdrand();
|
||||
|
||||
int rdrand_fill(unsigned long long *p, size_t size);
|
||||
|
Loading…
Reference in New Issue
Block a user