mirror of
https://github.com/Jimmy-Z/bfCL.git
synced 2025-06-18 11:05:49 -04:00
498 lines
17 KiB
C
498 lines
17 KiB
C
|
|
#include <stdio.h>
|
|
#include "utils.h"
|
|
#include "crypto.h"
|
|
#include "ocl.h"
|
|
#include "dsi.h"
|
|
#include "ocl_brute.h"
|
|
|
|
// 123 -> 0x123
|
|
static cl_ulong to_bcd(cl_ulong i) {
|
|
cl_ulong o = 0;
|
|
unsigned shift = 0;
|
|
while (i) {
|
|
o |= (i % 10) << (shift++ << 2);
|
|
i /= 10;
|
|
}
|
|
return o;
|
|
}
|
|
|
|
// 0x123 -> 123
|
|
static cl_ulong from_bcd(cl_ulong i) {
|
|
cl_ulong o = 0;
|
|
cl_ulong pow = 1;
|
|
while (i) {
|
|
o += (i & 0xf) * pow;
|
|
i >>= 4;
|
|
pow *= 10;
|
|
}
|
|
return o;
|
|
}
|
|
|
|
int ocl_brute_console_id(const cl_uchar *console_id, const cl_uchar *emmc_cid,
|
|
cl_uint offset0, const cl_uchar *src0, const cl_uchar *ver0,
|
|
cl_uint offset1, const cl_uchar *src1, const cl_uchar *ver1,
|
|
ocl_brute_mode mode)
|
|
{
|
|
// preparing args
|
|
cl_ulong console_id_template = u64be(console_id);
|
|
cl_ulong xor0[2] = { 0 }, xor1[2] = { 0 };
|
|
dsi_make_xor((u8*)xor0, src0, ver0);
|
|
if (src1 != 0) {
|
|
dsi_make_xor((u8*)xor1, src1, ver1);
|
|
}
|
|
cl_uint ctr[4] = { 0 };
|
|
if (emmc_cid != 0) {
|
|
dsi_make_ctr((u8*)ctr, emmc_cid, offset0);
|
|
}
|
|
cl_ulong out = 0;
|
|
#if DEBUG
|
|
{
|
|
printf("XOR : %s\n", hexdump(xor0, 16, 0));
|
|
u8 aes_key[16];
|
|
dsi_make_key(aes_key, u64be(console_id));
|
|
printf("AES KEY : %s\n", hexdump(aes_key, 16, 0));
|
|
aes_init();
|
|
aes_set_key_enc_128(aes_key);
|
|
printf("CTR : %s\n", hexdump(ctr, 16, 0));
|
|
aes_encrypt_128((u8*)ctr, (u8*)xor0);
|
|
printf("XOR TRY : %s\n", hexdump(xor0, 16, 0));
|
|
// exit(1);
|
|
}
|
|
#endif
|
|
|
|
TimeHP t0, t1; long long td = 0;
|
|
|
|
cl_int err;
|
|
cl_platform_id platform_id;
|
|
cl_device_id device_id;
|
|
ocl_get_device(&platform_id, &device_id);
|
|
if (platform_id == NULL || device_id == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
|
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
|
|
|
const char *source_names[] = {
|
|
"cl/common.h",
|
|
"cl/aes_tables.cl",
|
|
"cl/aes_128.cl",
|
|
"cl/dsi.h",
|
|
"cl/bcd.h",
|
|
emmc_cid != 0 ? "cl/kernel_console_id.cl" : "cl/kernel_console_id_ds3.cl" };
|
|
const char * options;
|
|
if (mode == NORMAL) {
|
|
options = "-w -Werror";
|
|
} else if (mode == BCD) {
|
|
options = "-w -Werror -DBCD";
|
|
} else if (mode == CTR) {
|
|
options = "-w -Werror -DCTR";
|
|
} else {
|
|
return -1;
|
|
}
|
|
cl_program program = ocl_build_from_sources(sizeof(source_names) / sizeof(char *),
|
|
source_names, context, device_id, options);
|
|
|
|
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, emmc_cid != 0 ? "test_console_id" : "test_console_id_ds3", &err));
|
|
|
|
size_t local;
|
|
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
|
printf("local work size: %u\n", (unsigned)local);
|
|
|
|
// there's no option to create it zero initialized
|
|
cl_mem mem_out = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_ulong), NULL, &err));
|
|
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_ulong), &out, 0, NULL, NULL));
|
|
|
|
unsigned template_bits;
|
|
cl_ulong total;
|
|
unsigned group_bits;
|
|
size_t num_items;
|
|
unsigned loops;
|
|
if (mode == NORMAL || mode == CTR) {
|
|
// CTR mode is actually quite similar to NORMAL except we brute the lower 31 bits
|
|
// since bit 31 doesn't matter, got ORed with 1 when making key
|
|
template_bits = 32;
|
|
total = 1ull << (64 - template_bits);
|
|
group_bits = 28;
|
|
num_items = 1ull << group_bits; // 268435456
|
|
loops = 1 << (64 - template_bits - group_bits);
|
|
if (mode == CTR) {
|
|
loops >>= 1;
|
|
}
|
|
} else {
|
|
// 0x08a15????????1?? as example, 10 digit to brute, beware the known digit near the end
|
|
// 5 BCD digits, used to calculate template mask
|
|
template_bits = 20;
|
|
// I wish we could use 1e10 in C, counting 0 is not good to your eye
|
|
total = from_bcd(1ull << 40);
|
|
// work items variations on lower bits per enqueue, 8 + 1 digits, including the known digit
|
|
// reduced from 36 to 28 to make nvidia runtime happy
|
|
group_bits = 28;
|
|
// work items per enqueue, don't count the known digit here
|
|
num_items = from_bcd(1ull << (group_bits - 4));
|
|
// between the template bits and group bits, it's the loop bits
|
|
loops = from_bcd(1 << (64 - template_bits - group_bits));
|
|
}
|
|
// it needs to be aligned, a little overhead hurts nobody
|
|
if (num_items % local) {
|
|
// printf("alignment: %"LL"d -> ", num_items);
|
|
num_items = (num_items / local + 1) * local;
|
|
// printf("%"LL"d\n", num_items);
|
|
}
|
|
console_id_template &= ((cl_ulong)-1ll) << (64 - template_bits);
|
|
// printf("total: %I64u, 0x%I64x\n", total, total);
|
|
// printf("num_items: %I64u, 0x%I64x\n", num_items, num_items);
|
|
// printf("steps: %u, 0x%x\n", steps, steps);
|
|
OCL_ASSERT(clSetKernelArg(kernel, 7, sizeof(cl_mem), &mem_out));
|
|
if (emmc_cid != 0) {
|
|
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_uint), &ctr[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_uint), &ctr[1]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 3, sizeof(cl_uint), &ctr[2]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 4, sizeof(cl_uint), &ctr[3]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &xor0[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 6, sizeof(cl_ulong), &xor0[1]));
|
|
} else {
|
|
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_uint), &offset0));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_ulong), &xor0[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &xor0[1]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 4, sizeof(cl_uint), &offset1));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &xor1[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 6, sizeof(cl_ulong), &xor1[1]));
|
|
}
|
|
get_hp_time(&t0);
|
|
for (unsigned i = 0; i < loops; ++i) {
|
|
cl_ulong console_id = console_id_template;
|
|
if (mode == BCD) {
|
|
console_id |= to_bcd(i) << group_bits;
|
|
} else {
|
|
console_id |= (u64)i << group_bits;
|
|
}
|
|
printf("%016"LL"x\n", console_id);
|
|
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_ulong), &console_id));
|
|
|
|
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
|
clFinish(command_queue);
|
|
|
|
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_ulong), &out, 0, NULL, NULL));
|
|
if (out) {
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
printf("got a hit: %016"LL"x\n", out);
|
|
// also write to a file
|
|
dump_to_file(emmc_cid ? hexdump(emmc_cid, 16, 0) : hexdump(src0, 16, 0), &out, 8);
|
|
break;
|
|
}
|
|
}
|
|
|
|
u64 tested = 0;
|
|
if (!out) {
|
|
tested = total;
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
} else {
|
|
tested = out - console_id_template;
|
|
if (mode == BCD) {
|
|
tested = from_bcd(((tested & ~0xfff) >> 4) | (tested & 0xff));
|
|
}
|
|
}
|
|
printf("%.2f seconds, %.2f M/s\n", td / 1000000.0, tested * 1.0 / td);
|
|
|
|
clReleaseKernel(kernel);
|
|
clReleaseMemObject(mem_out);
|
|
clReleaseProgram(program);
|
|
clReleaseCommandQueue(command_queue);
|
|
clReleaseContext(context);
|
|
return !out;
|
|
}
|
|
|
|
int ocl_brute_emmc_cid(const cl_uchar *console_id, cl_uchar *emmc_cid,
|
|
cl_uint offset, const cl_uchar *src, const cl_uchar *ver)
|
|
{
|
|
// preparing args
|
|
u8 aes_key[16];
|
|
dsi_make_key(aes_key, u64be(console_id));
|
|
aes_init();
|
|
aes_set_key_dec_128(aes_key);
|
|
cl_ulong xor[2];
|
|
dsi_make_xor((u8*)xor, src, ver);
|
|
cl_ulong ctr[2];
|
|
aes_decrypt_128((u8*)xor , (u8*)ctr);
|
|
cl_ulong emmc_cid_sha1_16[2];
|
|
byte_reverse_16((u8*)emmc_cid_sha1_16, (u8*)ctr);
|
|
sub_128_64(emmc_cid_sha1_16, offset);
|
|
cl_ulong out = 0;
|
|
#ifdef DEBUG
|
|
{
|
|
printf("SHA1 A: %s\n", hexdump(emmc_cid_sha1_16, 16, 0));
|
|
u8 sha1_verify[16];
|
|
sha1_16(emmc_cid, sha1_verify);
|
|
printf("SHA1 B: %s\n", hexdump(sha1_verify, 16, 0));
|
|
}
|
|
#endif
|
|
|
|
TimeHP t0, t1; long long td = 0;
|
|
|
|
cl_int err;
|
|
cl_platform_id platform_id;
|
|
cl_device_id device_id;
|
|
ocl_get_device(&platform_id, &device_id);
|
|
if (platform_id == NULL || device_id == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
|
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
|
|
|
const char *source_names[] = {
|
|
"cl/common.h",
|
|
"cl/sha1_16.cl",
|
|
"cl/kernel_emmc_cid.cl" };
|
|
cl_program program = ocl_build_from_sources(sizeof(source_names) / sizeof(char *),
|
|
source_names, context, device_id, "-w -Werror");
|
|
|
|
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, "test_emmc_cid", &err));
|
|
|
|
size_t local;
|
|
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
|
printf("local work size: %u\n", (unsigned)local);
|
|
|
|
// there's no option to create it zero initialized
|
|
cl_mem mem_out = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_ulong), NULL, &err));
|
|
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_ulong), &out, 0, NULL, NULL));
|
|
|
|
unsigned brute_bits = 32;
|
|
unsigned group_bits = 28;
|
|
unsigned loop_bits = brute_bits - group_bits;
|
|
unsigned loops = 1ull << loop_bits;
|
|
size_t num_items = 1ull << group_bits;
|
|
// it needs to be aligned, a little overhead hurts nobody
|
|
if (num_items % local) {
|
|
num_items = (num_items / local + 1) * local;
|
|
}
|
|
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_ulong), emmc_cid_sha1_16));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 3, sizeof(cl_ulong), emmc_cid_sha1_16 + 1));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 4, sizeof(cl_mem), &mem_out));
|
|
get_hp_time(&t0);
|
|
for (unsigned i = 0; i < loops; ++i) {
|
|
*(u32*)(emmc_cid + 1) = i << group_bits;
|
|
puts(hexdump(emmc_cid, 16, 0));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_ulong), emmc_cid));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_ulong), emmc_cid + 8));
|
|
|
|
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
|
clFinish(command_queue);
|
|
|
|
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_ulong), &out, 0, NULL, NULL));
|
|
if (out) {
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
*(u32*)(emmc_cid + 1) |= out;
|
|
printf("got a hit: %s\n", hexdump(emmc_cid, 16, 0));
|
|
// also write to a file
|
|
dump_to_file(hexdump(console_id, 8, 0), emmc_cid, 16);
|
|
break;
|
|
}
|
|
}
|
|
|
|
u64 tested = 0;
|
|
if (!out) {
|
|
tested = 1ull << 32;
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
} else {
|
|
tested = *(u32*)(emmc_cid + 1);
|
|
}
|
|
printf("%.2f seconds, %.2f M/s\n", td / 1000000.0, tested * 1.0 / td);
|
|
|
|
clReleaseKernel(kernel);
|
|
clReleaseMemObject(mem_out);
|
|
clReleaseProgram(program);
|
|
clReleaseCommandQueue(command_queue);
|
|
clReleaseContext(context);
|
|
return !out;
|
|
}
|
|
|
|
/* more info:
|
|
* https://zoogie.github.io/web/34%E2%85%95c3/#/22
|
|
* https://gbatemp.net/threads/eol-is-lol-the-34c3-talk-for-3ds-that-never-was.494698/
|
|
* what I'm doing here is simply brute the 3rd u32 of a u128 so that the first half of sha256 matches ver
|
|
*/
|
|
int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver)
|
|
{
|
|
TimeHP t0, t1; long long td = 0;
|
|
|
|
cl_int err;
|
|
cl_platform_id platform_id;
|
|
cl_device_id device_id;
|
|
ocl_get_device(&platform_id, &device_id);
|
|
if (platform_id == NULL || device_id == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
|
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
|
|
|
const char *source_names[] = {
|
|
"cl/common.h",
|
|
"cl/sha256_16.cl",
|
|
"cl/kernel_msky.cl" };
|
|
cl_program program = ocl_build_from_sources(sizeof(source_names) / sizeof(char *),
|
|
source_names, context, device_id, "-w -Werror -DSHA256_16");
|
|
|
|
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, "test_msky", &err));
|
|
|
|
size_t local;
|
|
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
|
printf("local work size: %u\n", (unsigned)local);
|
|
|
|
// there's no option to create it zero initialized
|
|
cl_uint out = 0;
|
|
cl_mem mem_out = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &err));
|
|
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));
|
|
|
|
unsigned brute_bits = 32;
|
|
unsigned group_bits = 20;
|
|
unsigned loop_bits = brute_bits - group_bits;
|
|
unsigned loops = 1ull << loop_bits;
|
|
size_t num_items = 1ull << group_bits;
|
|
// it needs to be aligned, a little overhead hurts nobody
|
|
if (num_items % local) {
|
|
num_items = (num_items / local + 1) * local;
|
|
}
|
|
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_uint), &msky[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_uint), &msky[1]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 4, sizeof(cl_uint), &ver[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 5, sizeof(cl_uint), &ver[1]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 6, sizeof(cl_uint), &ver[2]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 7, sizeof(cl_uint), &ver[3]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 8, sizeof(cl_mem), &mem_out));
|
|
get_hp_time(&t0);
|
|
int msky3_range = 16384; // "fan out" +/-8192 on msky3
|
|
unsigned i, j;
|
|
for (j = 0; j < msky3_range; ++j) {
|
|
cl_uint msky3 = msky[3] + (j & 1 ? 1 : -1) * ((j + 1) >> 1);
|
|
printf("%08x\r", msky3);
|
|
OCL_ASSERT(clSetKernelArg(kernel, 3, sizeof(cl_uint), &msky3));
|
|
for (i = 0; i < loops; ++i) {
|
|
cl_uint msky2 = i << group_bits;
|
|
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_uint), &msky2));
|
|
|
|
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
|
clFinish(command_queue);
|
|
|
|
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));
|
|
if (out) {
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
cl_uint msky_ret[4] = { msky[0], msky[1], out, msky3 };
|
|
printf("got a hit: %s\n", hexdump(msky_ret, 16, 0));
|
|
break;
|
|
}
|
|
}
|
|
if (out) {
|
|
break;
|
|
}
|
|
}
|
|
u64 tested = 0;
|
|
if (!out) {
|
|
tested = (1ull << brute_bits) * msky3_range;
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
} else {
|
|
tested = out + (1ull << brute_bits) * j;
|
|
}
|
|
printf("%.2f seconds, %.2f M/s\n", td / 1000000.0, tested * 1.0 / td);
|
|
|
|
clReleaseKernel(kernel);
|
|
clReleaseMemObject(mem_out);
|
|
clReleaseProgram(program);
|
|
clReleaseCommandQueue(command_queue);
|
|
clReleaseContext(context);
|
|
return !out;
|
|
}
|
|
|
|
// LFCS brute force, https://gist.github.com/zoogie/4046726878dba89eddfa1fc07c8a27da
|
|
int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver)
|
|
{
|
|
TimeHP t0, t1; long long td = 0;
|
|
|
|
cl_int err;
|
|
cl_platform_id platform_id;
|
|
cl_device_id device_id;
|
|
ocl_get_device(&platform_id, &device_id);
|
|
if (platform_id == NULL || device_id == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
cl_context context = OCL_ASSERT2(clCreateContext(0, 1, &device_id, NULL, NULL, &err));
|
|
cl_command_queue command_queue = OCL_ASSERT2(clCreateCommandQueue(context, device_id, 0, &err));
|
|
|
|
const char *source_names[] = {
|
|
"cl/common.h",
|
|
"cl/sha256_16.cl",
|
|
"cl/kernel_lfcs.cl" };
|
|
cl_program program = ocl_build_from_sources(sizeof(source_names) / sizeof(char *),
|
|
source_names, context, device_id, "-w -Werror -DSHA256_12");
|
|
|
|
cl_kernel kernel = OCL_ASSERT2(clCreateKernel(program, "test_lfcs", &err));
|
|
|
|
size_t local;
|
|
OCL_ASSERT(clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
|
|
printf("local work size: %u\n", (unsigned)local);
|
|
|
|
// there's no option to create it zero initialized
|
|
cl_uint out = 0;
|
|
cl_mem mem_out = OCL_ASSERT2(clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &err));
|
|
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));
|
|
|
|
unsigned brute_bits = 32;
|
|
unsigned group_bits = 20;
|
|
unsigned loop_bits = brute_bits - group_bits;
|
|
unsigned loops = 1ull << loop_bits;
|
|
size_t num_items = 1ull << group_bits;
|
|
// it needs to be aligned, a little overhead hurts nobody
|
|
if (num_items % local) {
|
|
num_items = (num_items / local + 1) * local;
|
|
}
|
|
OCL_ASSERT(clSetKernelArg(kernel, 1, sizeof(cl_ushort), &newflag));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_uint), &ver[0]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 3, sizeof(cl_uint), &ver[1]));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 4, sizeof(cl_mem), &mem_out));
|
|
get_hp_time(&t0);
|
|
int fan_range = 0x10000; // "fan out" full 16 bits
|
|
unsigned i, j;
|
|
for (j = 0; j < fan_range; ++j) {
|
|
int fan = (j & 1 ? 1 : -1) * ((j + 1) >> 1);
|
|
printf("%d \r", fan);
|
|
for (i = 0; i < loops; ++i) {
|
|
cl_uint lfcs = lfcs_template + fan * 0x10000 + (i << (group_bits - 16));
|
|
OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_uint), &lfcs));
|
|
|
|
OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL));
|
|
clFinish(command_queue);
|
|
|
|
OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));
|
|
if (out) {
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
lfcs += out >> 16;
|
|
printf("got a hit: %s (rand: 0x%04x)\n", hexdump(&lfcs, 4, 0), out & 0xffff);
|
|
break;
|
|
}
|
|
}
|
|
if (out) {
|
|
break;
|
|
}
|
|
}
|
|
u64 tested = 0;
|
|
if (!out) {
|
|
tested = (1ull << brute_bits) * fan_range;
|
|
get_hp_time(&t1); td = hp_time_diff(&t0, &t1);
|
|
} else {
|
|
tested = out + (1ull << brute_bits) * j;
|
|
}
|
|
printf("%.2f seconds, %.2f M/s\n", td / 1000000.0, tested * 1.0 / td);
|
|
|
|
clReleaseKernel(kernel);
|
|
clReleaseMemObject(mem_out);
|
|
clReleaseProgram(program);
|
|
clReleaseCommandQueue(command_queue);
|
|
clReleaseContext(context);
|
|
return !out;
|
|
}
|
|
|