From 8cb4fc9d6e7bc1a7b1b7087e518b89a327184ebc Mon Sep 17 00:00:00 2001 From: troky Date: Tue, 10 Mar 2015 10:26:58 +0100 Subject: [PATCH] Added Pluck algo --- AUTHORS.md | 1 + Makefile.am | 1 + algorithm.c | 289 +++++++++--------- algorithm.h | 11 +- algorithm/pluck.c | 482 +++++++++++++++++++++++++++++++ algorithm/pluck.h | 10 + kernel/pluck.cl | 463 +++++++++++++++++++++++++++++ ocl.c | 236 ++++++++++----- sgminer.c | 2 +- winbuild/sgminer.vcxproj | 2 + winbuild/sgminer.vcxproj.filters | 6 + 11 files changed, 1299 insertions(+), 204 deletions(-) create mode 100644 algorithm/pluck.c create mode 100644 algorithm/pluck.h create mode 100644 kernel/pluck.cl diff --git a/AUTHORS.md b/AUTHORS.md index 51d70b36..ad42123c 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -53,6 +53,7 @@ updated by many others. * Perry Huang * Joseph Bruggeman * Badman74 +* djm34 ...and many others. See: diff --git a/Makefile.am b/Makefile.am index 9d272c96..39da01b1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -70,6 +70,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h +sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index fd9d9f07..d478b5f3 100644 --- a/algorithm.c +++ b/algorithm.c @@ -33,6 +33,7 @@ #include "algorithm/neoscrypt.h" #include "algorithm/whirlpoolx.h" #include "algorithm/lyra2re.h" +#include "algorithm/pluck.h" #include "compat.h" @@ -56,7 +57,8 @@ const char *algorithm_type_str[] = { "Whirlcoin", "Neoscrypt", "WhirlpoolX", - "Lyra2RE" + "Lyra2RE", + "Pluck" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) @@ -93,7 +95,7 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru { char buf[255]; sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%u -D NFACTOR=%d", - cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor); + cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor); strcat(data->compiler_options, buf); sprintf(buf, "lg%utc%unf%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor); @@ -104,10 +106,10 @@ static void append_neoscrypt_compiler_options(struct _build_kernel_data *data, s { char buf[255]; sprintf(buf, " %s-D MAX_GLOBAL_THREADS=%lu ", - ((cgpu->lookup_gap > 0)?" -D LOOKUP_GAP=2 ":""), (unsigned long)cgpu->thread_concurrency); + ((cgpu->lookup_gap > 0) ? " -D LOOKUP_GAP=2 " : ""), (unsigned long)cgpu->thread_concurrency); strcat(data->compiler_options, buf); - sprintf(buf, "%stc%lu", ((cgpu->lookup_gap > 0)?"lg":""), (unsigned long)cgpu->thread_concurrency); + sprintf(buf, "%stc%lu", ((cgpu->lookup_gap > 0) ? "lg" : ""), (unsigned long)cgpu->thread_concurrency); strcat(data->binary_filename, buf); } @@ -115,10 +117,10 @@ static void append_x11_compiler_options(struct _build_kernel_data *data, struct { char buf[255]; sprintf(buf, " -D SPH_COMPACT_BLAKE_64=%d -D SPH_LUFFA_PARALLEL=%d -D SPH_KECCAK_UNROLL=%u ", - ((opt_blake_compact)?1:0), ((opt_luffa_parallel)?1:0), (unsigned int)opt_keccak_unroll); + ((opt_blake_compact) ? 1 : 0), ((opt_luffa_parallel) ? 1 : 0), (unsigned int)opt_keccak_unroll); strcat(data->compiler_options, buf); - sprintf(buf, "ku%u%s%s", (unsigned int)opt_keccak_unroll, ((opt_blake_compact)?"bc":""), ((opt_luffa_parallel)?"lp":"")); + sprintf(buf, "ku%u%s%s", (unsigned int)opt_keccak_unroll, ((opt_blake_compact) ? "bc" : ""), ((opt_luffa_parallel) ? "lp" : "")); strcat(data->binary_filename, buf); } @@ -130,10 +132,10 @@ static void append_x13_compiler_options(struct _build_kernel_data *data, struct append_x11_compiler_options(data, cgpu, algorithm); sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d -D SPH_HAMSI_SHORT=%d ", - (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short)?1:0)); + (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short) ? 1 : 0)); strcat(data->compiler_options, buf); - sprintf(buf, "big%u%s", (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short)?"hs":"")); + sprintf(buf, "big%u%s", (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short) ? "hs" : "")); strcat(data->binary_filename, buf); } @@ -147,7 +149,7 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx le_target = *(cl_uint *)(blk->work->device_target + 28); memcpy(clState->cldata, blk->work->data, 80); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); CL_SET_ARG(clState->CLbuffer0); CL_SET_ARG(clState->outputBuffer); @@ -172,7 +174,7 @@ static cl_int queue_neoscrypt_kernel(_clState *clState, dev_blk_ctx *blk, __mayb * The compiler will get rid of it anyway. */ le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]); memcpy(clState->cldata, blk->work->data, 80); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); CL_SET_ARG(clState->CLbuffer0); CL_SET_ARG(clState->outputBuffer); @@ -189,7 +191,7 @@ static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ct cl_int status = 0; flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); CL_SET_ARG(clState->CLbuffer0); CL_SET_ARG(clState->outputBuffer); @@ -206,7 +208,7 @@ static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *b le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); CL_SET_ARG(clState->CLbuffer0); CL_SET_ARG(clState->outputBuffer); @@ -224,7 +226,7 @@ static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_b le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -268,7 +270,7 @@ static cl_int queue_bitblock_kernel(struct __clState *clState, struct _dev_blk_c le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -320,7 +322,7 @@ static cl_int queue_bitblockold_kernel(struct __clState *clState, struct _dev_bl le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -365,7 +367,7 @@ static cl_int queue_marucoin_mod_kernel(struct __clState *clState, struct _dev_b le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -413,7 +415,7 @@ static cl_int queue_marucoin_mod_old_kernel(struct __clState *clState, struct _d le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -457,7 +459,7 @@ static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_b le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -489,7 +491,7 @@ static cl_int queue_x14_kernel(struct __clState *clState, struct _dev_blk_ctx *b le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -539,7 +541,7 @@ static cl_int queue_x14_old_kernel(struct __clState *clState, struct _dev_blk_ct le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; @@ -583,7 +585,7 @@ static cl_int queue_fresh_kernel(struct __clState *clState, struct _dev_blk_ctx le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // shavite 1 - search kernel = &clState->kernel; @@ -614,22 +616,22 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_ le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); //clbuffer, hashes kernel = &clState->kernel; - CL_SET_ARG_N(0,clState->CLbuffer0); - CL_SET_ARG_N(1,clState->padbuffer8); + CL_SET_ARG_N(0, clState->CLbuffer0); + CL_SET_ARG_N(1, clState->padbuffer8); kernel = clState->extra_kernels; - CL_SET_ARG_N(0,clState->padbuffer8); + CL_SET_ARG_N(0, clState->padbuffer8); - CL_NEXTKERNEL_SET_ARG_N(0,clState->padbuffer8); + CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8); //hashes, output, target - CL_NEXTKERNEL_SET_ARG_N(0,clState->padbuffer8); - CL_SET_ARG_N(1,clState->outputBuffer); - CL_SET_ARG_N(2,le_target); + CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8); + CL_SET_ARG_N(1, clState->outputBuffer); + CL_SET_ARG_N(2, le_target); return status; } @@ -642,61 +644,80 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); //clbuffer, hashes kernel = &clState->kernel; - CL_SET_ARG_N(0,clState->CLbuffer0); - CL_SET_ARG_N(1,clState->padbuffer8); + CL_SET_ARG_N(0, clState->CLbuffer0); + CL_SET_ARG_N(1, clState->padbuffer8); - CL_SET_ARG_N(2,clState->outputBuffer); - CL_SET_ARG_N(3,le_target); + CL_SET_ARG_N(2, clState->outputBuffer); + CL_SET_ARG_N(3, le_target); return status; } static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { - cl_kernel *kernel; - unsigned int num; - cl_int status = 0; - cl_ulong le_target; - - le_target = *(cl_ulong *)(blk->work->device_target + 24); - flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); - - // blake - search - kernel = &clState->kernel; - num = 0; - - CL_SET_ARG(clState->padbuffer8); - CL_SET_ARG(blk->work->blk.ctx_a); - CL_SET_ARG(blk->work->blk.ctx_b); - CL_SET_ARG(blk->work->blk.ctx_c); - CL_SET_ARG(blk->work->blk.ctx_d); - CL_SET_ARG(blk->work->blk.ctx_e); - CL_SET_ARG(blk->work->blk.ctx_f); - CL_SET_ARG(blk->work->blk.ctx_g); - CL_SET_ARG(blk->work->blk.ctx_h); - CL_SET_ARG(blk->work->blk.cty_a); - CL_SET_ARG(blk->work->blk.cty_b); - CL_SET_ARG(blk->work->blk.cty_c); - - // bmw - search1 - kernel = clState->extra_kernels; - CL_SET_ARG_0(clState->padbuffer8); - // groestl - search2 - CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); - // skein - search3 - CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); - // jh - search4 - num = 0; - CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); - CL_SET_ARG(clState->outputBuffer); - CL_SET_ARG(le_target); - - return status; + cl_kernel *kernel; + unsigned int num; + cl_int status = 0; + cl_ulong le_target; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + + CL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(blk->work->blk.ctx_a); + CL_SET_ARG(blk->work->blk.ctx_b); + CL_SET_ARG(blk->work->blk.ctx_c); + CL_SET_ARG(blk->work->blk.ctx_d); + CL_SET_ARG(blk->work->blk.ctx_e); + CL_SET_ARG(blk->work->blk.ctx_f); + CL_SET_ARG(blk->work->blk.ctx_g); + CL_SET_ARG(blk->work->blk.ctx_h); + CL_SET_ARG(blk->work->blk.cty_a); + CL_SET_ARG(blk->work->blk.cty_b); + CL_SET_ARG(blk->work->blk.cty_c); + + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + +static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_uint le_target; + cl_int status = 0; + + le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(le_target); + + return status; } typedef struct _algorithm_settings_t { @@ -714,81 +735,85 @@ typedef struct _algorithm_settings_t { size_t n_extra_kernels; long rw_buffer_size; cl_command_queue_properties cq_properties; - void (*regenhash)(struct work *); - cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); - void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); - void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); + void(*regenhash)(struct work *); + cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); + void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *); + void(*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); } algorithm_settings_t; static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ - { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options} - A_SCRYPT( "ckolivas" ), - A_SCRYPT( "alexkarnew" ), - A_SCRYPT( "alexkarnold" ), - A_SCRYPT( "bufius" ), - A_SCRYPT( "psw" ), - A_SCRYPT( "zuikkis" ), - A_SCRYPT( "arebyp" ), + { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options } + A_SCRYPT("ckolivas"), + A_SCRYPT("alexkarnew"), + A_SCRYPT("alexkarnold"), + A_SCRYPT("bufius"), + A_SCRYPT("psw"), + A_SCRYPT("zuikkis"), + A_SCRYPT("arebyp"), #undef A_SCRYPT #define A_NEOSCRYPT(a) \ - { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options} + { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options } A_NEOSCRYPT("neoscrypt"), #undef A_NEOSCRYPT +#define A_PLUCK(a) \ + { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options } + A_PLUCK("pluck"), +#undef A_PLUCK // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ - { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options} - A_QUARK( "quarkcoin", quarkcoin_regenhash), - A_QUARK( "qubitcoin", qubitcoin_regenhash), - A_QUARK( "animecoin", animecoin_regenhash), - A_QUARK( "sifcoin", sifcoin_regenhash), + { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options } + A_QUARK("quarkcoin", quarkcoin_regenhash), + A_QUARK("qubitcoin", qubitcoin_regenhash), + A_QUARK("animecoin", animecoin_regenhash), + A_QUARK("sifcoin", sifcoin_regenhash), #undef A_QUARK // kernels starting from this will have difficulty calculated by using bitcoin algorithm #define A_DARK(a, b) \ - { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options} - A_DARK( "darkcoin", darkcoin_regenhash), - A_DARK( "inkcoin", inkcoin_regenhash), - A_DARK( "myriadcoin-groestl", myriadcoin_groestl_regenhash), + { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options } + A_DARK("darkcoin", darkcoin_regenhash), + A_DARK("inkcoin", inkcoin_regenhash), + A_DARK("myriadcoin-groestl", myriadcoin_groestl_regenhash), #undef A_DARK - { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256, NULL}, - { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256, NULL}, + { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256, NULL }, + { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256, NULL }, - { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options}, + { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, queue_sph_kernel, gen_hash, append_x13_compiler_options}, - { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options}, - { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options}, + { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, queue_sph_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options }, - { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_x13_compiler_options}, - { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_x13_compiler_options}, + { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_x13_compiler_options }, + { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_x13_compiler_options }, - { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_x13_compiler_options}, - { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options}, + { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_x13_compiler_options }, + { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options }, - { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options}, + { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL}, + { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL }, - { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL}, + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL }, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ - { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL} + { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL } A_FUGUE("fuguecoin", fuguecoin_regenhash, sha256), A_FUGUE("groestlcoin", groestlcoin_regenhash, sha256), A_FUGUE("diamond", groestlcoin_regenhash, gen_hash), - #undef A_FUGUE +#undef A_FUGUE - { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL}, + { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL }, { "whirlpoolx", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, 0, whirlpoolx_regenhash, queue_sph_kernel, gen_hash, NULL }, // Terminator (do not remove) - { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL} + { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } }; void copy_algorithm_settings(algorithm_t* dest, const char* algo) @@ -833,10 +858,10 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfactor) { - #define ALGO_ALIAS_NF(alias, name, nf) \ - if (strcasecmp(alias, lookup_alias) == 0) { *nfactor = nf; return name; } - #define ALGO_ALIAS(alias, name) \ - if (strcasecmp(alias, lookup_alias) == 0) return name; +#define ALGO_ALIAS_NF(alias, name, nf) \ + if (strcasecmp(alias, lookup_alias) == 0) { *nfactor = nf; return name; } +#define ALGO_ALIAS(alias, name) \ + if (strcasecmp(alias, lookup_alias) == 0) return name; ALGO_ALIAS_NF("scrypt", "ckolivas", 10); ALGO_ALIAS_NF("scrypt", "ckolivas", 10); @@ -862,8 +887,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS("Lyra2RE", "lyra2re"); ALGO_ALIAS("lyra2", "lyra2re"); - #undef ALGO_ALIAS - #undef ALGO_ALIAS_NF +#undef ALGO_ALIAS +#undef ALGO_ALIAS_NF return NULL; } @@ -873,7 +898,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias) const char *newname; //load previous algorithm nfactor in case nfactor was applied before algorithm... or default to 10 - uint8_t old_nfactor = ((algo->nfactor)?algo->nfactor:0); + uint8_t old_nfactor = ((algo->nfactor) ? algo->nfactor : 0); //load previous kernel file name if was applied before algorithm... const char *kernelfile = algo->kernelfile; uint8_t nfactor = 10; @@ -903,20 +928,20 @@ void set_algorithm_nfactor(algorithm_t* algo, const uint8_t nfactor) //adjust algo type accordingly switch (algo->type) { - case ALGO_SCRYPT: - //if nfactor isnt 10, switch to NSCRYPT - if(algo->nfactor != 10) - algo->type = ALGO_NSCRYPT; - break; + case ALGO_SCRYPT: + //if nfactor isnt 10, switch to NSCRYPT + if (algo->nfactor != 10) + algo->type = ALGO_NSCRYPT; + break; //nscrypt - case ALGO_NSCRYPT: - //if nfactor is 10, switch to SCRYPT - if(algo->nfactor == 10) - algo->type = ALGO_SCRYPT; - break; + case ALGO_NSCRYPT: + //if nfactor is 10, switch to SCRYPT + if (algo->nfactor == 10) + algo->type = ALGO_SCRYPT; + break; //ignore rest - default: - break; + default: + break; } } diff --git a/algorithm.h b/algorithm.h index a121be19..cc6ce346 100644 --- a/algorithm.h +++ b/algorithm.h @@ -26,7 +26,8 @@ typedef enum { ALGO_FRESH, ALGO_WHIRL, ALGO_NEOSCRYPT, - ALGO_LYRA2RE + ALGO_LYRA2RE, + ALGO_PLUCK } algorithm_type_t; extern const char *algorithm_type_str[]; @@ -59,10 +60,10 @@ typedef struct _algorithm_t { size_t n_extra_kernels; long rw_buffer_size; cl_command_queue_properties cq_properties; - void (*regenhash)(struct work *); - cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); - void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); - void (*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); + void(*regenhash)(struct work *); + cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); + void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *); + void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); } algorithm_t; /* Set default parameters based on name. */ diff --git a/algorithm/pluck.c b/algorithm/pluck.c new file mode 100644 index 00000000..093dd68e --- /dev/null +++ b/algorithm/pluck.c @@ -0,0 +1,482 @@ +/*- + * Copyright 2014 James Lovejoy + * Copyright 2014 phm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + + + +static const uint32_t sha256_h[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +static const uint32_t sha256_k[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +void sha256_init(uint32_t *state) +{ + memcpy(state, sha256_h, 32); +} + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + do { \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; \ + } while (0) + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + sha256_k[i]) + + +/* +* SHA256 block compression function. The 256-bit state is transformed via +* the 512-bit input block to produce a new state. +*/ +void sha256_transform(uint32_t *state, const uint32_t *block, int swap) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + + /* 1. Prepare message schedule W. */ + if (swap) { + for (i = 0; i < 16; i++) + W[i] = swab32(block[i]); + } + else + memcpy(W, block, 64); + for (i = 16; i < 64; i += 2) { + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + W[i + 1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15]; + } + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0); + RNDr(S, W, 1); + RNDr(S, W, 2); + RNDr(S, W, 3); + RNDr(S, W, 4); + RNDr(S, W, 5); + RNDr(S, W, 6); + RNDr(S, W, 7); + RNDr(S, W, 8); + RNDr(S, W, 9); + RNDr(S, W, 10); + RNDr(S, W, 11); + RNDr(S, W, 12); + RNDr(S, W, 13); + RNDr(S, W, 14); + RNDr(S, W, 15); + RNDr(S, W, 16); + RNDr(S, W, 17); + RNDr(S, W, 18); + RNDr(S, W, 19); + RNDr(S, W, 20); + RNDr(S, W, 21); + RNDr(S, W, 22); + RNDr(S, W, 23); + RNDr(S, W, 24); + RNDr(S, W, 25); + RNDr(S, W, 26); + RNDr(S, W, 27); + RNDr(S, W, 28); + RNDr(S, W, 29); + RNDr(S, W, 30); + RNDr(S, W, 31); + RNDr(S, W, 32); + RNDr(S, W, 33); + RNDr(S, W, 34); + RNDr(S, W, 35); + RNDr(S, W, 36); + RNDr(S, W, 37); + RNDr(S, W, 38); + RNDr(S, W, 39); + RNDr(S, W, 40); + RNDr(S, W, 41); + RNDr(S, W, 42); + RNDr(S, W, 43); + RNDr(S, W, 44); + RNDr(S, W, 45); + RNDr(S, W, 46); + RNDr(S, W, 47); + RNDr(S, W, 48); + RNDr(S, W, 49); + RNDr(S, W, 50); + RNDr(S, W, 51); + RNDr(S, W, 52); + RNDr(S, W, 53); + RNDr(S, W, 54); + RNDr(S, W, 55); + RNDr(S, W, 56); + RNDr(S, W, 57); + RNDr(S, W, 58); + RNDr(S, W, 59); + RNDr(S, W, 60); + RNDr(S, W, 61); + RNDr(S, W, 62); + RNDr(S, W, 63); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} +static inline void be32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[3] = x & 0xff; + p[2] = (x >> 8) & 0xff; + p[1] = (x >> 16) & 0xff; + p[0] = (x >> 24) & 0xff; +} +static inline uint32_t be32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + + ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); +} +#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) +//note, this is 64 bytes +static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) +{ +#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) + uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15; + int i; + + x00 = (B[0] ^= Bx[0]); + x01 = (B[1] ^= Bx[1]); + x02 = (B[2] ^= Bx[2]); + x03 = (B[3] ^= Bx[3]); + x04 = (B[4] ^= Bx[4]); + x05 = (B[5] ^= Bx[5]); + x06 = (B[6] ^= Bx[6]); + x07 = (B[7] ^= Bx[7]); + x08 = (B[8] ^= Bx[8]); + x09 = (B[9] ^= Bx[9]); + x10 = (B[10] ^= Bx[10]); + x11 = (B[11] ^= Bx[11]); + x12 = (B[12] ^= Bx[12]); + x13 = (B[13] ^= Bx[13]); + x14 = (B[14] ^= Bx[14]); + x15 = (B[15] ^= Bx[15]); + for (i = 0; i < 8; i += 2) { + /* Operate on columns. */ + x04 ^= ROTL(x00 + x12, 7); x09 ^= ROTL(x05 + x01, 7); + x14 ^= ROTL(x10 + x06, 7); x03 ^= ROTL(x15 + x11, 7); + + x08 ^= ROTL(x04 + x00, 9); x13 ^= ROTL(x09 + x05, 9); + x02 ^= ROTL(x14 + x10, 9); x07 ^= ROTL(x03 + x15, 9); + + x12 ^= ROTL(x08 + x04, 13); x01 ^= ROTL(x13 + x09, 13); + x06 ^= ROTL(x02 + x14, 13); x11 ^= ROTL(x07 + x03, 13); + + x00 ^= ROTL(x12 + x08, 18); x05 ^= ROTL(x01 + x13, 18); + x10 ^= ROTL(x06 + x02, 18); x15 ^= ROTL(x11 + x07, 18); + + /* Operate on rows. */ + x01 ^= ROTL(x00 + x03, 7); x06 ^= ROTL(x05 + x04, 7); + x11 ^= ROTL(x10 + x09, 7); x12 ^= ROTL(x15 + x14, 7); + + x02 ^= ROTL(x01 + x00, 9); x07 ^= ROTL(x06 + x05, 9); + x08 ^= ROTL(x11 + x10, 9); x13 ^= ROTL(x12 + x15, 9); + + x03 ^= ROTL(x02 + x01, 13); x04 ^= ROTL(x07 + x06, 13); + x09 ^= ROTL(x08 + x11, 13); x14 ^= ROTL(x13 + x12, 13); + + x00 ^= ROTL(x03 + x02, 18); x05 ^= ROTL(x04 + x07, 18); + x10 ^= ROTL(x09 + x08, 18); x15 ^= ROTL(x14 + x13, 18); + } + B[0] += x00; + B[1] += x01; + B[2] += x02; + B[3] += x03; + B[4] += x04; + B[5] += x05; + B[6] += x06; + B[7] += x07; + B[8] += x08; + B[9] += x09; + B[10] += x10; + B[11] += x11; + B[12] += x12; + B[13] += x13; + B[14] += x14; + B[15] += x15; +#undef ROTL +} + +void sha256_hash(unsigned char *hash, const unsigned char *data, int len) +{ + uint32_t S[16], T[16]; + int i, r; + + sha256_init(S); + for (r = len; r > -9; r -= 64) { + if (r < 64) + memset(T, 0, 64); + memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); + if (r >= 0 && r < 64) + ((unsigned char *)T)[r] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + + if (r < 56) + T[15] = 8 * len; + sha256_transform(S, T, 0); + } + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, S[i]); +} + +void sha256_hash512(unsigned char *hash, const unsigned char *data) +{ + uint32_t S[16], T[16]; + int i; + + sha256_init(S); + + memcpy(T, data, 64); + + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + sha256_transform(S, T, 0); + + memset(T, 0, 64); + //memcpy(T, data + 64, 0); + ((unsigned char *)T)[0] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + T[15] = 8 * 64; + sha256_transform(S, T, 0); + + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, S[i]); +} + +inline void pluckrehash(void *state, const void *input) +{ + + int i,j; + uint32_t data[20]; + + const int HASH_MEMORY = 128 * 1024; + uint8_t * scratchbuf = (uint8_t*)malloc(HASH_MEMORY); + memcpy(data,input,80); + + uint8_t hashbuffer[128*1024]; //don't allocate this on stack, since it's huge.. + int size = HASH_MEMORY; + memset(hashbuffer, 0, 64); + sha256_hash(&hashbuffer[0], (uint8_t*)data, 80); + for (i = 64; i < size - 32; i += 32) + { + int randmax = i - 4; //we could use size here, but then it's probable to use 0 as the value in most cases + uint32_t joint[16]; + uint32_t randbuffer[16]; + + uint32_t randseed[16]; + memcpy(randseed, &hashbuffer[i - 64], 64); + if (i>128) + { + memcpy(randbuffer, &hashbuffer[i - 128], 64); + } + else + { + memset(&randbuffer, 0, 64); + } + + xor_salsa8(randbuffer, randseed); + + memcpy(joint, &hashbuffer[i - 32], 32); + //use the last hash value as the seed + for (j = 32; j < 64; j += 4) + { + uint32_t rand = randbuffer[(j - 32) / 4] % (randmax - 32); + joint[j / 4] = *((uint32_t*)&hashbuffer[rand]); + + } + sha256_hash512(&hashbuffer[i], (uint8_t*)joint); + + memcpy(randseed, &hashbuffer[i - 32], 64); + if (i>128) + { + memcpy(randbuffer, &hashbuffer[i - 128], 64); + } + else + { + memset(randbuffer, 0, 64); + } + xor_salsa8(randbuffer, randseed); + for (j = 0; j < 32; j += 2) + { + uint32_t rand = randbuffer[j / 2] % randmax; + *((uint32_t*)&hashbuffer[rand]) = *((uint32_t*)&hashbuffer[j + i - 4]); + } + } + + + //printf("cpu hashbuffer %08x nonce %08x\n", ((uint32_t*)hashbuffer)[7],data[19]); + + memcpy(state, hashbuffer, 32); +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int pluck_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + pluckrehash(ohash, data); + + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + + if (tmp_hash7 > diff1targ) + return -1; + + if (tmp_hash7 > Htarg) + return 0; + + return 1; +} + +void pluck_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + + pluckrehash(ohash, data); +} + + +bool scanhash_pluck(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while (1) + { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + pluckrehash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) + { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) + { + *last_nonce = n; + break; + } + } + + return ret; +} \ No newline at end of file diff --git a/algorithm/pluck.h b/algorithm/pluck.h new file mode 100644 index 00000000..7582554e --- /dev/null +++ b/algorithm/pluck.h @@ -0,0 +1,10 @@ +#ifndef PLUCK_H +#define PLUCK_H + +#include "miner.h" +#define PLUCK_SCRATCHBUF_SIZE (128 * 1024) +extern int pluck_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void pluck_regenhash(struct work *work); + +#endif /* PLUCK_H */ diff --git a/kernel/pluck.cl b/kernel/pluck.cl new file mode 100644 index 00000000..4fa501c7 --- /dev/null +++ b/kernel/pluck.cl @@ -0,0 +1,463 @@ +/* +* "pluck" kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2015 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +#if !defined(cl_khr_byte_addressable_store) +#error "Device does not support unaligned stores" +#endif +#define ROL32(x, n) rotate(x, (uint) n) +//#define ROL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define HASH_MEMORY 4096 + + +#define SALSA(a,b,c,d) do { \ + t =a+d; b^=rotate(t, 7U); \ + t =b+a; c^=rotate(t, 9U); \ + t =c+b; d^=rotate(t, 13U); \ + t =d+c; a^=rotate(t, 18U); \ +} while(0) + + +#define SALSA_CORE(state) do { \ +\ +SALSA(state.s0,state.s4,state.s8,state.sc); \ +SALSA(state.s5,state.s9,state.sd,state.s1); \ +SALSA(state.sa,state.se,state.s2,state.s6); \ +SALSA(state.sf,state.s3,state.s7,state.sb); \ +SALSA(state.s0,state.s1,state.s2,state.s3); \ +SALSA(state.s5,state.s6,state.s7,state.s4); \ +SALSA(state.sa,state.sb,state.s8,state.s9); \ +SALSA(state.sf,state.sc,state.sd,state.se); \ + } while(0) + +/* +#define SALSA_CORE(state) do { \ + state.s4 ^= rotate(state.s0 + state.sc, 7U); state.s8 ^= rotate(state.s4 + state.s0, 9U); state.sc ^= rotate(state.s8 + state.s4, 13U); state.s0 ^= rotate(state.sc + state.s8, 18U); \ + state.s9 ^= rotate(state.s5 + state.s1, 7U); state.sd ^= rotate(state.s9 + state.s5, 9U); state.s1 ^= rotate(state.sd + state.s9, 13U); state.s5 ^= rotate(state.s1 + state.sd, 18U); \ + state.se ^= rotate(state.sa + state.s6, 7U); state.s2 ^= rotate(state.se + state.sa, 9U); state.s6 ^= rotate(state.s2 + state.se, 13U); state.sa ^= rotate(state.s6 + state.s2, 18U); \ + state.s3 ^= rotate(state.sf + state.sb, 7U); state.s7 ^= rotate(state.s3 + state.sf, 9U); state.sb ^= rotate(state.s7 + state.s3, 13U); state.sf ^= rotate(state.sb + state.s7, 18U); \ + state.s1 ^= rotate(state.s0 + state.s3, 7U); state.s2 ^= rotate(state.s1 + state.s0, 9U); state.s3 ^= rotate(state.s2 + state.s1, 13U); state.s0 ^= rotate(state.s3 + state.s2, 18U); \ + state.s6 ^= rotate(state.s5 + state.s4, 7U); state.s7 ^= rotate(state.s6 + state.s5, 9U); state.s4 ^= rotate(state.s7 + state.s6, 13U); state.s5 ^= rotate(state.s4 + state.s7, 18U); \ + state.sb ^= rotate(state.sa + state.s9, 7U); state.s8 ^= rotate(state.sb + state.sa, 9U); state.s9 ^= rotate(state.s8 + state.sb, 13U); state.sa ^= rotate(state.s9 + state.s8, 18U); \ + state.sc ^= rotate(state.sf + state.se, 7U); state.sd ^= rotate(state.sc + state.sf, 9U); state.se ^= rotate(state.sd + state.sc, 13U); state.sf ^= rotate(state.se + state.sd, 18U); \ +} while(0) +*/ +uint16 xor_salsa8(uint16 Bx) +{ +uint t; + uint16 st = Bx; + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + return(st + Bx); +} + + + +#define SHR(x, n) ((x) >> n) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} + +#define PLAST(a,b,c,d,e,f,g,h,x,K) \ +{ \ + d += h + S3(e) + F1(e,f,g) + (x + K); \ +} + +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) + +inline uint8 sha256_round1(uint16 data) +{ + uint temp1; + uint8 res; + uint W0 = SWAP32(data.s0); + uint W1 = SWAP32(data.s1); + uint W2 = SWAP32(data.s2); + uint W3 = SWAP32(data.s3); + uint W4 = SWAP32(data.s4); + uint W5 = SWAP32(data.s5); + uint W6 = SWAP32(data.s6); + uint W7 = SWAP32(data.s7); + uint W8 = SWAP32(data.s8); + uint W9 = SWAP32(data.s9); + uint W10 = SWAP32(data.sA); + uint W11 = SWAP32(data.sB); + uint W12 = SWAP32(data.sC); + uint W13 = SWAP32(data.sD); + uint W14 = SWAP32(data.sE); + uint W15 = SWAP32(data.sF); + + uint v0 = 0x6A09E667; + uint v1 = 0xBB67AE85; + uint v2 = 0x3C6EF372; + uint v3 = 0xA54FF53A; + uint v4 = 0x510E527F; + uint v5 = 0x9B05688C; + uint v6 = 0x1F83D9AB; + uint v7 = 0x5BE0CD19; + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); + + res.s0 = v0 + 0x6A09E667; + res.s1 = v1 + 0xBB67AE85; + res.s2 = v2 + 0x3C6EF372; + res.s3 = v3 + 0xA54FF53A; + res.s4 = v4 + 0x510E527F; + res.s5 = v5 + 0x9B05688C; + res.s6 = v6 + 0x1F83D9AB; + res.s7 = v7 + 0x5BE0CD19; + return (res); +} + + +inline uint8 sha256_round2(uint16 data,uint8 buf) +{ + uint temp1; + uint8 res; + uint W0 = data.s0; + uint W1 = data.s1; + uint W2 = data.s2; + uint W3 = data.s3; + uint W4 = data.s4; + uint W5 = data.s5; + uint W6 = data.s6; + uint W7 = data.s7; + uint W8 = data.s8; + uint W9 = data.s9; + uint W10 = data.sA; + uint W11 = data.sB; + uint W12 = data.sC; + uint W13 = data.sD; + uint W14 = data.sE; + uint W15 = data.sF; + + uint v0 = buf.s0; + uint v1 = buf.s1; + uint v2 = buf.s2; + uint v3 = buf.s3; + uint v4 = buf.s4; + uint v5 = buf.s5; + uint v6 = buf.s6; + uint v7 = buf.s7; + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); + + res.s0 = SWAP32(v0 + buf.s0); + res.s1 = SWAP32(v1 + buf.s1); + res.s2 = SWAP32(v2 + buf.s2); + res.s3 = SWAP32(v3 + buf.s3); + res.s4 = SWAP32(v4 + buf.s4); + res.s5 = SWAP32(v5 + buf.s5); + res.s6 = SWAP32(v6 + buf.s6); + res.s7 = SWAP32(v7 + buf.s7); + return (res); +} + +inline uint8 sha256_80(uint* data,uint nonce) +{ + +uint8 buf = sha256_round1( ((uint16*)data)[0]); +uint in[16]; +for (int i = 0; i<3; i++) { in[i] = SWAP32(data[i + 16]); } +in[3] = SWAP32(nonce); +in[4] = 0x80000000; +in[15] = 0x280; +for (int i = 5; i<15; i++) { in[i] = 0; } + +return(sha256_round2(((uint16*)in)[0], buf)); +} + +inline uint8 sha256_64(uint* data) +{ + +uint8 buf=sha256_round1(((uint16*)data)[0]); +uint in[16]; +for (int i = 1; i<15; i++) { in[i] = 0; } +in[0] = 0x80000000; +in[15] = 0x200; + + return(sha256_round2(((uint16*)in)[0],buf)); +} + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, const uint target) +{ + + __global uchar *hashbuffer = (__global uchar *)(padcache + (1024*128 * (get_global_id(0) % MAX_GLOBAL_THREADS))); + + uint data[20]; + + ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; + ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; + + ((__global uint8*)hashbuffer)[0] = sha256_80(data,get_global_id(0)); + ((__global uint8*)hashbuffer)[1] = 0; + + for (int i = 2; i < 4096 - 1; i++) + { + uint randmax = i * 32 - 4; + uint randseed[16]; + uint randbuffer[16]; + uint joint[16]; + + ((uint8*)randseed)[0] = ((__global uint8*)hashbuffer)[i - 2]; + ((uint8*)randseed)[1] = ((__global uint8*)hashbuffer)[i - 1]; + + if (i>4) + { + + ((uint8*)randseed)[0] ^= ((__global uint8*)hashbuffer)[i - 4]; + ((uint8*)randseed)[1] ^= ((__global uint8*)hashbuffer)[i - 3]; + } + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + + + ((uint8*)joint)[0] = ((__global uint8*)hashbuffer)[i - 1]; + for (int j = 0; j < 8; j++) + { + uint rand = randbuffer[j] % (randmax - 32); + + ((uchar4*)joint)[(j + 8)].x =((__global uchar*)(hashbuffer))[0+rand]; + ((uchar4*)joint)[(j + 8)].y =((__global uchar*)(hashbuffer))[1+rand]; + ((uchar4*)joint)[(j + 8)].z =((__global uchar*)(hashbuffer))[2+rand]; + ((uchar4*)joint)[(j + 8)].w =((__global uchar*)(hashbuffer))[3+rand]; +} + ((__global uint8*)(hashbuffer))[i] = sha256_64(joint); + + + + (( uint8*)randseed)[0] = ((__global uint8*)(hashbuffer))[i - 1]; + (( uint8*)randseed)[1] = ((__global uint8*)(hashbuffer))[i]; + + + if (i>4) + { + + ((uint8*)randseed)[0] ^= ((__global uint8*)(hashbuffer))[i - 4]; + ((uint8*)randseed)[1] ^= ((__global uint8*)(hashbuffer))[i - 3]; + + } + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + for (int j = 0; j < 32; j += 2) + { + uint rand = randbuffer[j / 2] % randmax; + uchar4 Tohere; + + Tohere.x = ((__global uchar*)(hashbuffer))[randmax + j]; + Tohere.y = ((__global uchar*)(hashbuffer))[randmax + j + 1]; + Tohere.z = ((__global uchar*)(hashbuffer))[randmax + j + 2]; + Tohere.w = ((__global uchar*)(hashbuffer))[randmax + j + 3]; + ((__global uchar*)(hashbuffer))[rand] = Tohere.x; + ((__global uchar*)(hashbuffer))[rand+1] = Tohere.y; + ((__global uchar*)(hashbuffer))[rand+2] = Tohere.z; + ((__global uchar*)(hashbuffer))[rand+3] = Tohere.w; + + } + + } // main loop + + + if( ((__global uint *)hashbuffer)[7] <= (target)) {output[atomic_inc(output + 0xFF)] = SWAP32(get_global_id(0)); +//printf("gpu hashbuffer %08x nonce %08x\n",((__global uint *)hashbuffer)[7] ,SWAP32(get_global_id(0))); +} + + + +///////////////////////////////////////////////////////////////// + +} \ No newline at end of file diff --git a/ocl.c b/ocl.c index 1d624e31..1ea198a2 100644 --- a/ocl.c +++ b/ocl.c @@ -17,11 +17,11 @@ #include #ifdef WIN32 - #include +#include #else - #include - #include - #include +#include +#include +#include #endif #include @@ -35,6 +35,7 @@ #include "ocl/build_kernel.h" #include "ocl/binary_kernel.h" #include "algorithm/neoscrypt.h" +#include "algorithm/pluck.h" /* FIXME: only here for global config vars, replace with configuration.h * or similar as soon as config is in a struct instead of littered all @@ -198,7 +199,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg struct cgpu_info *cgpu = &gpus[gpu]; cl_platform_id platform = NULL; char pbuff[256]; - build_kernel_data *build_data = (build_kernel_data *) alloca(sizeof(struct _build_kernel_data)); + build_kernel_data *build_data = (build_kernel_data *)alloca(sizeof(struct _build_kernel_data)); cl_uint preferred_vwidth; cl_device_id *devices; cl_uint numDevices; @@ -210,7 +211,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg numDevices = clDevicesNum(); - if (numDevices <= 0 ) return NULL; + if (numDevices <= 0) return NULL; devices = (cl_device_id *)alloca(numDevices*sizeof(cl_device_id)); @@ -284,7 +285,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg clState->compute_shaders = compute_units * 64; applog(LOG_DEBUG, "Max shaders calculated %d", (int)(clState->compute_shaders)); - status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL); + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL); if (status != CL_SUCCESS) { applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status); return NULL; @@ -299,7 +300,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg char filename[255]; char strbuf[32]; - sprintf(strbuf, "%s.cl", (!empty_string(cgpu->algorithm.kernelfile)?cgpu->algorithm.kernelfile:cgpu->algorithm.name)); + sprintf(strbuf, "%s.cl", (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name)); strcpy(filename, strbuf); applog(LOG_DEBUG, "Using source file %s", filename); @@ -333,7 +334,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg if (!cgpu->opt_lg) { applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu); cgpu->lookup_gap = 2; - } else + } + else cgpu->lookup_gap = cgpu->opt_lg; if ((strcmp(cgpu->algorithm.name, "zuikkis") == 0) && (cgpu->lookup_gap != 2)) { @@ -359,16 +361,16 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg type = 2; } else if (cgpu->xintensity > 0) { - glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift)?(1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)):cgpu->xintensity); + glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity); max_int = cgpu->xintensity; type = 1; } else { glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity); - max_int = ((cgpu->dynamic)?MAX_INTENSITY:cgpu->intensity); + max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity); } - glob_thread_count = ((glob_thread_count < cgpu->work_size)?cgpu->work_size:glob_thread_count); + glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count); // if TC * scratchbuf size is too big for memory... reduce to max if ((glob_thread_count * NEOSCRYPT_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) { @@ -378,49 +380,49 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc switch (type) { //raw intensity - case 2: - while ((glob_thread_count * NEOSCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { - --glob_thread_count; - } + case 2: + while ((glob_thread_count * NEOSCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { + --glob_thread_count; + } - max_int = glob_thread_count; - cgpu->rawintensity = glob_thread_count; - break; + max_int = glob_thread_count; + cgpu->rawintensity = glob_thread_count; + break; //x intensity - case 1: - glob_thread_count = cgpu->max_alloc / NEOSCRYPT_SCRATCHBUF_SIZE; - max_int = glob_thread_count / clState->compute_shaders; - - while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { - --max_int; - } - - /* Check if max_intensity is >0. */ - if (max_int < MIN_XINTENSITY) { - applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); - max_int = MIN_XINTENSITY; - } - - cgpu->xintensity = max_int; - glob_thread_count = clState->compute_shaders * (1UL << max_int); - break; - - default: - glob_thread_count = cgpu->max_alloc / NEOSCRYPT_SCRATCHBUF_SIZE; - while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { - --max_int; - } - - /* Check if max_intensity is >0. */ - if (max_int < MIN_INTENSITY) { - applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); - max_int = MIN_INTENSITY; - } - - cgpu->intensity = max_int; - glob_thread_count = 1UL << max_int; - break; + case 1: + glob_thread_count = cgpu->max_alloc / NEOSCRYPT_SCRATCHBUF_SIZE; + max_int = glob_thread_count / clState->compute_shaders; + + while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_XINTENSITY) { + applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); + max_int = MIN_XINTENSITY; + } + + cgpu->xintensity = max_int; + glob_thread_count = clState->compute_shaders * (1UL << max_int); + break; + + default: + glob_thread_count = cgpu->max_alloc / NEOSCRYPT_SCRATCHBUF_SIZE; + while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_INTENSITY) { + applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); + max_int = MIN_INTENSITY; + } + + cgpu->intensity = max_int; + glob_thread_count = 1UL << max_int; + break; } } @@ -429,10 +431,97 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); - } else if (!cgpu->opt_tc) { + } + + /////////////////////////////////// pluck + // neoscrypt TC + else if (!safe_cmp(cgpu->algorithm.name, "pluck") && !cgpu->opt_tc) { + size_t glob_thread_count; + long max_int; + unsigned char type = 0; + + // determine which intensity type to use + if (cgpu->rawintensity > 0) { + glob_thread_count = cgpu->rawintensity; + max_int = glob_thread_count; + type = 2; + } + else if (cgpu->xintensity > 0) { + glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity); + max_int = cgpu->xintensity; + type = 1; + } + else { + glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity); + max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity); + } + + glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count); + + // if TC * scratchbuf size is too big for memory... reduce to max + if ((glob_thread_count * PLUCK_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) { + + /* Selected intensity will not run on this GPU. Not enough memory. + * Adapt the memory setting. */ + // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc + switch (type) { + //raw intensity + case 2: + while ((glob_thread_count * PLUCK_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { + --glob_thread_count; + } + + max_int = glob_thread_count; + cgpu->rawintensity = glob_thread_count; + break; + + //x intensity + case 1: + glob_thread_count = cgpu->max_alloc / PLUCK_SCRATCHBUF_SIZE; + max_int = glob_thread_count / clState->compute_shaders; + + while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_XINTENSITY) { + applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); + max_int = MIN_XINTENSITY; + } + + cgpu->xintensity = max_int; + glob_thread_count = clState->compute_shaders * (1UL << max_int); + break; + + default: + glob_thread_count = cgpu->max_alloc / PLUCK_SCRATCHBUF_SIZE; + while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_INTENSITY) { + applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); + max_int = MIN_INTENSITY; + } + + cgpu->intensity = max_int; + glob_thread_count = 1UL << max_int; + break; + } + } + + // TC is glob thread count + cgpu->thread_concurrency = glob_thread_count; + + applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); + + } + else if (!cgpu->opt_tc) { unsigned int sixtyfours; - sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n/1024) - 1; + sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n / 1024) - 1; cgpu->thread_concurrency = sixtyfours * 64; if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) { cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders; @@ -442,8 +531,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } } applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency)); - } else { - cgpu->thread_concurrency = cgpu->opt_tc; + } + else { + cgpu->thread_concurrency = cgpu->opt_tc; } cl_uint slot, cpnd; @@ -470,7 +560,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg build_data->opencl_version = get_opencl_version(devices[gpu]); build_data->patch_bfi = needs_bfi_patch(build_data); - strcpy(build_data->binary_filename, (!empty_string(cgpu->algorithm.kernelfile)?cgpu->algorithm.kernelfile:cgpu->algorithm.name)); + strcpy(build_data->binary_filename, (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name)); strcat(build_data->binary_filename, name); if (clState->goffset) strcat(build_data->binary_filename, "g"); @@ -495,7 +585,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg clReleaseProgram(clState->program); clState->program = load_opencl_binary_kernel(build_data); } - } else { + } + else { if (build_data->patch_bfi) quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch"); } @@ -503,8 +594,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg // Load kernels applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d", - filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un", - algorithm->nfactor, algorithm->n); + filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un", + algorithm->nfactor, algorithm->n); /* get a kernel object handle for a kernel with the given name */ clState->kernel = clCreateKernel(clState->program, "search", &status); @@ -519,7 +610,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg unsigned int i; char kernel_name[9]; // max: search99 + 0x0 - clState->extra_kernels = (cl_kernel *)malloc(sizeof(cl_kernel) * clState->n_extra_kernels); + clState->extra_kernels = (cl_kernel *)malloc(sizeof(cl_kernel)* clState->n_extra_kernels); for (i = 0; i < clState->n_extra_kernels; i++) { snprintf(kernel_name, 9, "%s%d", "search", i + 1); @@ -545,14 +636,27 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg readbufsize = 80; applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); - // scrypt/n-scrypt - } else { + // scrypt/n-scrypt + } + else if (!safe_cmp(algorithm->name, "pluck")) { + /* The scratch/pad-buffer needs 32kBytes memory per thread. */ + bufsize = PLUCK_SCRATCHBUF_SIZE * cgpu->thread_concurrency; + + /* This is the input buffer. For pluck this is guaranteed to be + * 80 bytes only. */ + readbufsize = 80; + + applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); + // scrypt/n-scrypt + } + else { size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0)); bufsize = 128 * ipt * cgpu->thread_concurrency; applog(LOG_DEBUG, "Scrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); } - } else { - bufsize = (size_t) algorithm->rw_buffer_size; + } + else { + bufsize = (size_t)algorithm->rw_buffer_size; applog(LOG_DEBUG, "Buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); } @@ -564,7 +668,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg * 2 greater >= required amount earlier */ if (bufsize > cgpu->max_alloc) { applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", - gpu, (unsigned long)(cgpu->max_alloc)); + gpu, (unsigned long)(cgpu->max_alloc)); applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); } diff --git a/sgminer.c b/sgminer.c index 67ce4291..9c0ab0e3 100644 --- a/sgminer.c +++ b/sgminer.c @@ -7080,7 +7080,7 @@ bool test_nonce(struct work *work, uint32_t nonce) rebuild_nonce(work, nonce); // for Neoscrypt, the diff1targ value is in work->target - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) { + if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck")) { diff1targ = ((uint32_t *)work->target)[7]; } else { diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index a2a8e8f3..5557e019 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -266,6 +266,7 @@ + @@ -331,6 +332,7 @@ + diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index feeb8090..837392cd 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -218,6 +218,9 @@ Source Files\algorithm + + Source Files\algorithm + @@ -415,6 +418,9 @@ Header Files\algorithm + + Header Files\algorithm +