mirror of
https://github.com/Jimmy-Z/bfCL.git
synced 2025-06-18 11:05:49 -04:00
reduced work size to make nvidia runtime happy
give nvidia GPU more weight
This commit is contained in:
parent
5f7e13a770
commit
687acab0c8
4
cl/dsi.h
4
cl/dsi.h
@ -2,10 +2,10 @@
|
||||
// more about this: https://github.com/Jimmy-Z/TWLbf/blob/master/dsi.c
|
||||
|
||||
__constant static const u64 DSi_KEY_Y[2] =
|
||||
{0xbd4dc4d30ab9dc76ull, 0xe1a00005202ddd1dull};
|
||||
{0xbd4dc4d30ab9dc76ul, 0xe1a00005202ddd1dul};
|
||||
|
||||
__constant static const u64 DSi_KEY_MAGIC[2] =
|
||||
{0x2a680f5f1a4f3e79ull, 0xfffefb4e29590258ull};
|
||||
{0x2a680f5f1a4f3e79ul, 0xfffefb4e29590258ul};
|
||||
|
||||
// CAUTION this one doesn't work in-place
|
||||
inline void byte_reverse_16(u8 *out, const u8 *in){
|
||||
|
@ -127,7 +127,8 @@ int ocl_brute_console_id(const cl_uchar *console_id, const cl_uchar *emmc_cid,
|
||||
// I wish we could use 1e10 in C, counting 0 is not good to your eye
|
||||
total = from_bcd(1ull << 40);
|
||||
// work items variations on lower bits per enqueue, 8 + 1 digits, including the known digit
|
||||
group_bits = 36;
|
||||
// reduced from 36 to 28 to make nvidia runtime happy
|
||||
group_bits = 28;
|
||||
// work items per enqueue, don't count the known digit here
|
||||
num_items = from_bcd(1ull << (group_bits - 4));
|
||||
// between the template bits and group bits, it's the loop bits
|
||||
@ -347,7 +348,7 @@ int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver)
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));
|
||||
|
||||
unsigned brute_bits = 32;
|
||||
unsigned group_bits = 28;
|
||||
unsigned group_bits = 20;
|
||||
unsigned loop_bits = brute_bits - group_bits;
|
||||
unsigned loops = 1ull << loop_bits;
|
||||
size_t num_items = 1ull << group_bits;
|
||||
@ -440,7 +441,7 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver)
|
||||
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));
|
||||
|
||||
unsigned brute_bits = 32;
|
||||
unsigned group_bits = 28;
|
||||
unsigned group_bits = 20;
|
||||
unsigned loop_bits = brute_bits - group_bits;
|
||||
unsigned loops = 1ull << loop_bits;
|
||||
size_t num_items = 1ull << group_bits;
|
||||
|
@ -182,8 +182,10 @@ void ocl_get_device(cl_platform_id *p_platform_id, cl_device_id *p_device_id) {
|
||||
&& devices[j].c_avail == CL_TRUE){
|
||||
cl_ulong cap = 1ull * devices[j].max_compute_units * devices[j].freq;
|
||||
// unfortunately that metric is not comparable between different vendors
|
||||
if (strstr((const char*)devices[j].name, "Intel") == 0) {
|
||||
if (strstr((const char*)devices[j].vendor, "Advanced Micro Devices") != 0) {
|
||||
cap *= 64;
|
||||
} else if(strstr((const char*)devices[j].vendor, "NVIDIA") != 0) {
|
||||
cap *= 128;
|
||||
}
|
||||
if (cap > maximum) {
|
||||
maximum = cap;
|
||||
|
Loading…
Reference in New Issue
Block a user