diff --git a/README.md b/README.md index 6f193ec9f..69bdfc437 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ Besides [Monero](https://getmonero.org), following coins can be mined using this - [Stellite](https://stellite.cash/) - [TurtleCoin](https://turtlecoin.lol) - [Zelerius](https://zelerius.org/) +- [X-CASH](https://x-network.io/) Ryo currency is a way for us to implement the ideas that we were unable to in Monero. See [here](https://github.com/fireice-uk/cryptonote-speedup-demo/) for details. @@ -78,6 +79,7 @@ If your prefered coin is not listed, you can choose one of the following algorit - cryptonight_v7 - cryptonight_v7_stellite - cryptonight_v8 + - cryptonight_v8_double (used by X-CASH) - cryptonight_v8_half (used by masari and stellite) - cryptonight_v8_reversewaltz (used by graft) - cryptonight_v8_zelerius diff --git a/doc/usage.md b/doc/usage.md index e36e91cad..6607cad0f 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -52,7 +52,6 @@ The miner will automatically detect if CUDA (for NVIDIA GPUs) or OpenCL (for AMD ``` xmr-stak --noCPU ``` -**CUDA** is currently not supported. I am currently try to get some performance out it. ### NVIDIA via OpenCL diff --git a/scripts/build_xmr-stak_docker/build_xmr-stak_docker.sh b/scripts/build_xmr-stak_docker/build_xmr-stak_docker.sh index e3338294b..3cabf1d7b 100755 --- a/scripts/build_xmr-stak_docker/build_xmr-stak_docker.sh +++ b/scripts/build_xmr-stak_docker/build_xmr-stak_docker.sh @@ -8,7 +8,7 @@ fi if [ -d xmr-stak ]; then git -C xmr-stak clean -fd else - git clone https://github.com/rapid821/xmr-stak-hide.git + git clone https://github.com/fireice-uk/xmr-stak.git fi wget -c https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda_9.0.176_384.81_linux-run diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.cpp b/xmrstak/backend/amd/OclCryptonightR_gen.cpp index 13785d64b..7358e9857 100644 --- a/xmrstak/backend/amd/OclCryptonightR_gen.cpp +++ b/xmrstak/backend/amd/OclCryptonightR_gen.cpp @@ -134,6 +134,7 @@ static cl_program CryptonightR_build_program( const GpuContext* ctx, xmrstak_algo algo, uint64_t height, + uint32_t precompile_count, cl_kernel old_kernel, std::string source_code, std::string options) @@ -151,7 +152,7 @@ static cl_program CryptonightR_build_program( for(size_t i = 0; i < CryptonightR_cache.size();) { const CacheEntry& entry = CryptonightR_cache[i]; - if ((entry.algo == algo) && (entry.height + 2 < height)) + if ((entry.algo == algo) && (entry.height + 2 + precompile_count < height)) { printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu released (old program)", entry.height); old_programs.push_back(entry.program); @@ -252,10 +253,12 @@ static cl_program CryptonightR_build_program( return program; } -cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height, bool background, cl_kernel old_kernel) +cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height, uint32_t precompile_count, bool background, cl_kernel old_kernel) { + printer::inst()->print_msg(LDEBUG, "CryptonightR: start %llu released",height); + if (background) { - background_exec([=](){ CryptonightR_get_program(ctx, algo, height, false, old_kernel); }); + background_exec([=](){ CryptonightR_get_program(ctx, algo, height, precompile_count, false, old_kernel); }); return nullptr; } @@ -347,7 +350,7 @@ cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t } - return CryptonightR_build_program(ctx, algo, height, old_kernel, source, options); + return CryptonightR_build_program(ctx, algo, height, precompile_count, old_kernel, source, options); } } // namespace amd diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.hpp b/xmrstak/backend/amd/OclCryptonightR_gen.hpp index a69df9074..5f97d1e51 100644 --- a/xmrstak/backend/amd/OclCryptonightR_gen.hpp +++ b/xmrstak/backend/amd/OclCryptonightR_gen.hpp @@ -20,7 +20,7 @@ namespace amd { cl_program CryptonightR_get_program(GpuContext* ctx, const xmrstak_algo algo, - uint64_t height, bool background = false, cl_kernel old_kernel = nullptr); + uint64_t height, uint32_t precompile_count, bool background = false, cl_kernel old_kernel = nullptr); } // namespace amd } // namespace xmrstak diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index ace1c34bb..9c9db2ee3 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -199,7 +199,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - ctx->InputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_ONLY, 88, NULL, &ret); + ctx->InputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_ONLY, 128, NULL, &ret); if(ret != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create input buffer.", err_to_str(ret)); @@ -334,6 +334,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ */ options += " -DOPENCL_DRIVER_MAJOR=" + std::to_string(std::stoi(openCLDriverVer.data()) / 100); + uint32_t isWindowsOs = 0; +#ifdef _WIN32 + isWindowsOs = 1; +#endif + options += " -DIS_WINDOWS_OS=" + std::to_string(isWindowsOs); + if(miner_algo == cryptonight_gpu) options += " -cl-fp32-correctly-rounded-divide-sqrt"; @@ -889,15 +895,15 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar cl_int ret; - if(input_len > 84) + if(input_len > 124) return ERR_STUPID_PARAMS; input[input_len] = 0x01; - memset(input + input_len + 1, 0, 88 - input_len - 1); + memset(input + input_len + 1, 0, 128 - input_len - 1); cl_uint numThreads = ctx->rawIntensity; - if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS) + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 128, input, 0, NULL, NULL)) != CL_SUCCESS) { printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fill input buffer.", err_to_str(ret)); return ERR_OCL_API; @@ -952,8 +958,10 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar if ((miner_algo == cryptonight_r) || (miner_algo == cryptonight_r_wow)) { + uint32_t PRECOMPILATION_DEPTH = 4; + // Get new kernel - cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height); + cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height, PRECOMPILATION_DEPTH); if (program != ctx->ProgramCryptonightR) { cl_int ret; @@ -969,12 +977,10 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar } ctx->ProgramCryptonightR = program; - uint32_t PRECOMPILATION_DEPTH = 4; - // Precompile next program in background - xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + 1, true, old_kernel); + xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + 1, PRECOMPILATION_DEPTH, true, old_kernel); for (int i = 2; i <= PRECOMPILATION_DEPTH; ++i) - xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + i, true, nullptr); + xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + i, PRECOMPILATION_DEPTH, true, nullptr); printer::inst()->print_msg(LDEBUG, "Thread #%zu updated CryptonightR", ctx->deviceIdx); } diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index b78f2bcf7..12478aefb 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -32,69 +32,6 @@ R"===( #define cryptonight_conceal 14 #define cryptonight_v8_reversewaltz 17 -/* For Mesa clover support */ -#ifdef cl_clang_storage_class_specifiers -# pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable -#endif - -#ifdef cl_amd_media_ops -#pragma OPENCL EXTENSION cl_amd_media_ops : enable -#else -/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt - * Build-in Function - * uintn amd_bitalign (uintn src0, uintn src1, uintn src2) - * Description - * dst.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2.s0 & 31)) - * similar operation applied to other components of the vectors. - * - * The implemented function is modified because the last is in our case always a scalar. - * We can ignore the bitwise AND operation. - */ -inline uint2 amd_bitalign( const uint2 src0, const uint2 src1, const uint src2) -{ - uint2 result; - result.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2)); - result.s1 = (uint) (((((ulong)src0.s1) << 32) | (ulong)src1.s1) >> (src2)); - return result; -} -#endif - -#ifdef cl_amd_media_ops2 -#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable -#else -/* taken from: https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops2.txt - * Built-in Function: - * uintn amd_bfe (uintn src0, uintn src1, uintn src2) - * Description - * NOTE: operator >> below represent logical right shift - * offset = src1.s0 & 31; - * width = src2.s0 & 31; - * if width = 0 - * dst.s0 = 0; - * else if (offset + width) < 32 - * dst.s0 = (src0.s0 << (32 - offset - width)) >> (32 - width); - * else - * dst.s0 = src0.s0 >> offset; - * similar operation applied to other components of the vectors - */ -inline int amd_bfe(const uint src0, const uint offset, const uint width) -{ - /* casts are removed because we can implement everything as uint - * int offset = src1; - * int width = src2; - * remove check for edge case, this function is always called with - * `width==8` - * @code - * if ( width == 0 ) - * return 0; - * @endcode - */ - if ( (offset + width) < 32u ) - return (src0 << (32u - offset - width)) >> (32u - width); - - return src0 >> offset; -} -#endif static const __constant ulong keccakf_rndc[24] = { @@ -128,6 +65,8 @@ static const __constant uchar sbox[256] = 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 }; +//#include "opencl/wolf-aes.cl" +XMRSTAK_INCLUDE_WOLF_AES void keccakf1600(ulong *s) { @@ -355,8 +294,6 @@ inline uint getIdx() XMRSTAK_INCLUDE_FAST_INT_MATH_V2 //#include "fast_div_heavy.cl" XMRSTAK_INCLUDE_FAST_DIV_HEAVY -//#include "opencl/wolf-aes.cl" -XMRSTAK_INCLUDE_WOLF_AES //#include "opencl/wolf-skein.cl" XMRSTAK_INCLUDE_WOLF_SKEIN //#include "opencl/jh.cl" @@ -461,8 +398,6 @@ void CNKeccak(ulong *output, ulong *input) static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 }; -#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U)) - #define SubWord(inw) ((sbox[BYTE(inw, 3)] << 24) | (sbox[BYTE(inw, 2)] << 16) | (sbox[BYTE(inw, 1)] << 8) | sbox[BYTE(inw, 0)]) void AESExpandKey256(uint *keybuf) @@ -539,6 +474,11 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, State[8] = input[8]; State[9] = input[9]; State[10] = input[10]; + State[11] = input[11]; + State[12] = input[12]; + State[13] = input[13]; + State[14] = input[14]; + State[15] = input[15]; ((__local uint *)State)[9] &= 0x00FFFFFFU; ((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24; @@ -550,13 +490,13 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, */ ((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8)); - for (int i = 11; i < 25; ++i) { - State[i] = 0x00UL; - } - // Last bit of padding State[16] = 0x8000000000000000UL; + for (int i = 17; i < 25; ++i) { + State[i] = 0x00UL; + } + keccakf1600_2(State); #pragma unroll @@ -1361,7 +1301,7 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global states += 25 * BranchBuf[idx]; ulong State[8] = { 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0x0001000000000000UL }; -#if defined(__clang__) && !defined(__NV_CL_C_VERSION) +#if defined(__clang__) && !defined(__NV_CL_C_VERSION) && (IS_WINDOWS_OS != 1) // on ROCM we need volatile for AMD RX5xx cards to avoid invalid shares volatile #endif diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl index b99b62d5c..f1457c0dc 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl @@ -2,6 +2,70 @@ R"===( #ifndef WOLF_AES_CL #define WOLF_AES_CL +/* For Mesa clover support */ +#ifdef cl_clang_storage_class_specifiers +# pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable +#endif + +#ifdef cl_amd_media_ops +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +#else +/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt + * Build-in Function + * uintn amd_bitalign (uintn src0, uintn src1, uintn src2) + * Description + * dst.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2.s0 & 31)) + * similar operation applied to other components of the vectors. + * + * The implemented function is modified because the last is in our case always a scalar. + * We can ignore the bitwise AND operation. + */ +inline uint2 amd_bitalign( const uint2 src0, const uint2 src1, const uint src2) +{ + uint2 result; + result.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2)); + result.s1 = (uint) (((((ulong)src0.s1) << 32) | (ulong)src1.s1) >> (src2)); + return result; +} +#endif + +#ifdef cl_amd_media_ops2 +#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable +#else +/* taken from: https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops2.txt + * Built-in Function: + * uintn amd_bfe (uintn src0, uintn src1, uintn src2) + * Description + * NOTE: operator >> below represent logical right shift + * offset = src1.s0 & 31; + * width = src2.s0 & 31; + * if width = 0 + * dst.s0 = 0; + * else if (offset + width) < 32 + * dst.s0 = (src0.s0 << (32 - offset - width)) >> (32 - width); + * else + * dst.s0 = src0.s0 >> offset; + * similar operation applied to other components of the vectors + */ +inline int amd_bfe(const uint src0, const uint offset, const uint width) +{ + /* casts are removed because we can implement everything as uint + * int offset = src1; + * int width = src2; + * remove check for edge case, this function is always called with + * `width==8` + * @code + * if ( width == 0 ) + * return 0; + * @endcode + */ + if ( (offset + width) < 32u ) + return (src0 << (32u - offset - width)) >> (32u - width); + + return src0 >> offset; +} +#endif + // AES table - the other three are generated on the fly static const __constant uint AES0_C[256] = diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp index ea688e053..120fb6898 100644 --- a/xmrstak/backend/amd/autoAdjust.hpp +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -187,8 +187,8 @@ class autoAdjust memPerThread = std::min(memPerThread, memDoubleThread); } - // 224byte extra memory is used per thread for meta data - size_t perThread = hashMemSize + 224u; + // 240byte extra memory is used per thread for meta data + size_t perThread = hashMemSize + 240u; size_t maxIntensity = memPerThread / perThread; size_t possibleIntensity = std::min( maxThreads , maxIntensity ); // map intensity to a multiple of the compute unit count, 8 is the number of threads per work group diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp index 09e030e66..3be593175 100644 --- a/xmrstak/backend/amd/minethd.cpp +++ b/xmrstak/backend/amd/minethd.cpp @@ -273,7 +273,7 @@ void minethd::work_main() for(size_t i = 0; i < results[0xFF]; i++) { - uint8_t bWorkBlob[112]; + uint8_t bWorkBlob[128]; uint8_t bResult[32]; memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); diff --git a/xmrstak/backend/cpu/cpuType.cpp b/xmrstak/backend/cpu/cpuType.cpp index 5959b75cc..c85682d4f 100644 --- a/xmrstak/backend/cpu/cpuType.cpp +++ b/xmrstak/backend/cpu/cpuType.cpp @@ -37,9 +37,9 @@ namespace cpu { int32_t mask = 1 << bit; return (val & mask) != 0u; - + } - + Model getModel() { int32_t cpu_info[4]; @@ -53,7 +53,7 @@ namespace cpu Model result; cpuid(1, 0, cpu_info); - + result.family = get_masked(cpu_info[0], 12, 8); result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4; result.type_name = cpustr; @@ -63,8 +63,8 @@ namespace cpu result.sse2 = has_feature(cpu_info[3], 26); // aes-ni result.aes = has_feature(cpu_info[2], 25); - // avx - result.avx = has_feature(cpu_info[2], 28); + // avx - 27 is the check if the OS overwrote cpu features + result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27) ; if(strcmp(cpustr, "AuthenticAMD") == 0) { diff --git a/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp b/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp index a289ac559..2fc1a8baa 100644 --- a/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp +++ b/xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp @@ -74,7 +74,7 @@ static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int } } -void v4_compile_code(cryptonight_ctx* ctx, int code_size) +void v4_compile_code(size_t N, cryptonight_ctx* ctx, int code_size) { printer::inst()->print_msg(LDEBUG, "CryptonightR update ASM code"); const int allocation_size = 65536; @@ -89,12 +89,24 @@ void v4_compile_code(cryptonight_ctx* ctx, int code_size) if(ctx->fun_data != nullptr) { - add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); - add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); - add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); - add_code(p, CryptonightR_template_part3, CryptonightR_template_end); - + if(N == 2) + { + add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0)); + add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end); + } + else + { + add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); + add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version); + add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); + add_code(p, CryptonightR_template_part3, CryptonightR_template_end); + } ctx->loop_fn = reinterpret_cast(ctx->fun_data); protectExecutableMemory(ctx->fun_data, allocation_size); diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h index bd0c4967e..488805ec0 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight.h +++ b/xmrstak/backend/cpu/crypto/cryptonight.h @@ -16,7 +16,7 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx *ctx); typedef void (*cn_double_mainloop_fun)(cryptonight_ctx*, cryptonight_ctx*); typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx**, const xmrstak_algo&); -void v4_compile_code(cryptonight_ctx* ctx, int code_size); +void v4_compile_code(size_t N, cryptonight_ctx* ctx, int code_size); struct extra_ctx_r { diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 22fd0f481..d7316b25e 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -744,7 +744,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) __m128i bx1; \ __m128i division_result_xmm; \ __m128 conc_var; \ - if(ALGO == cryptonight_conceal || ALGO == cryptonight_gpu) \ + if(ALGO == cryptonight_conceal) \ {\ set_float_rounding_mode_nearest(); \ conc_var = _mm_setzero_ps(); \ @@ -1143,7 +1143,15 @@ struct Cryptonight_hash_asm<2, 0> cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state, algo); } - reinterpret_cast(ctx[0]->loop_fn)(ctx[0], ctx[1]); + if(ALGO == cryptonight_r) + { + typedef void ABI_ATTRIBUTE (*cn_r_double_mainloop_fun)(cryptonight_ctx*, cryptonight_ctx*); + reinterpret_cast(ctx[0]->loop_fn)(ctx[0], ctx[1]); + } + else + { + reinterpret_cast(ctx[0]->loop_fn)(ctx[0], ctx[1]); + } for(size_t i = 0; i < N; ++i) { @@ -1298,6 +1306,7 @@ struct Cryptonight_hash_gpu template static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx, const xmrstak_algo& algo) { + set_float_rounding_mode_nearest(); keccak((const uint8_t *)input, len, ctx[0]->hash_state, 200); cn_explode_scratchpad_gpu(ctx[0]->hash_state, ctx[0]->long_state, algo); @@ -1318,7 +1327,10 @@ struct Cryptonight_R_generator template static void cn_on_new_job(const xmrstak::miner_work& work, cryptonight_ctx** ctx) { - if(ctx[0]->cn_r_ctx.height == work.iBlockHeight && ctx[0]->last_algo == POW(cryptonight_r)) + if(ctx[0]->cn_r_ctx.height == work.iBlockHeight && + ctx[0]->last_algo == POW(cryptonight_r) && + reinterpret_cast(ctx[0]->hash_fn) == ctx[0]->fun_data + ) return; ctx[0]->last_algo = POW(cryptonight_r); @@ -1327,8 +1339,11 @@ struct Cryptonight_R_generator int code_size = v4_random_math_init(ctx[0]->cn_r_ctx.code, work.iBlockHeight); if(ctx[0]->asm_version != 0) { - v4_compile_code(ctx[0], code_size); - ctx[0]->hash_fn = Cryptonight_hash_asm::template hash; + v4_compile_code(N, ctx[0], code_size); + if(N == 2) + ctx[0]->hash_fn = Cryptonight_hash_asm<2u, 0u>::template hash; + else + ctx[0]->hash_fn = Cryptonight_hash_asm::template hash; } for(size_t i=1; i < N; i++) diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 2b8b0e18d..e90b59500 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -473,11 +473,21 @@ bool minethd::self_test() { func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); - bResult = memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + bResult = bResult && memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); - bResult &= memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + bResult = bResult && memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + } + else if(algo == POW(cryptonight_v8_double)) + { + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), false, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); + bResult = bResult && memcmp(out, "\x63\x43\x8e\xd\x5c\x18\xff\xca\xd5\xb5\xdf\xe0\x26\x8a\x5b\x3f\xe9\xbc\x1\xef\xe6\x3a\xd3\x4f\x2c\x57\x1c\xda\xb2\xc\x32\x31", 32) == 0; + + func_selector(ctx, ::jconf::inst()->HaveHardwareAes(), true, algo); + ctx[0]->hash_fn("This is a test This is a test This is a test", 44, out, ctx, algo); + bResult = bResult && memcmp(out, "\x63\x43\x8e\xd\x5c\x18\xff\xca\xd5\xb5\xdf\xe0\x26\x8a\x5b\x3f\xe9\xbc\x1\xef\xe6\x3a\xd3\x4f\x2c\x57\x1c\xda\xb2\xc\x32\x31", 32) == 0; } else printer::inst()->print_msg(L0, @@ -738,9 +748,17 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job& std::string selected_asm = asm_version_str; if(selected_asm == "auto") selected_asm = cpu::getAsmName(N); - printer::inst()->print_msg(L0, "enable cryptonight_r asm '%s' cpu's", selected_asm.c_str()); - for(int h = 0; h < N; ++h) - ctx[h]->asm_version = selected_asm == "intel_avx" ? 1 : 2; // 1 == Intel; 2 == AMD + if(selected_asm == "off") + { + for(int h = 0; h < N; ++h) + ctx[h]->asm_version = 0; + } + else + { + printer::inst()->print_msg(L0, "enable cryptonight_r asm '%s' cpu's", selected_asm.c_str()); + for(int h = 0; h < N; ++h) + ctx[h]->asm_version = selected_asm == "intel_avx" ? 1 : 2; // 1 == Intel; 2 == AMD + } } for(int h = 1; h < N; ++h) diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 4f5d88dea..e58665922 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -30,7 +30,8 @@ enum xmrstak_algo_id cryptonight_turtle = start_derived_algo_id, cryptonight_v8_half = (start_derived_algo_id + 1), - cryptonight_v8_zelerius = (start_derived_algo_id + 2) + cryptonight_v8_zelerius = (start_derived_algo_id + 2), + cryptonight_v8_double = (start_derived_algo_id + 3) // please add the algorithm name to get_algo_name() }; @@ -62,11 +63,12 @@ inline std::string get_algo_name(xmrstak_algo_id algo_id) "cryptonight_v8_reversewaltz" // used by graft }}; - static std::array derived_algo_names = + static std::array derived_algo_names = {{ "cryptonight_turtle", "cryptonight_v8_half", // used by masari and stellite - "cryptonight_v8_zelerius" + "cryptonight_v8_zelerius", + "cryptonight_v8_double" }}; @@ -181,6 +183,8 @@ constexpr uint32_t CN_ZELERIUS_ITER = 0x60000; constexpr uint32_t CN_WALTZ_ITER = 0x60000; +constexpr uint32_t CN_DOUBLE_ITER = 0x100000; + inline xmrstak_algo POW(xmrstak_algo_id algo_id) { static std::array pow = {{ @@ -204,11 +208,12 @@ inline xmrstak_algo POW(xmrstak_algo_id algo_id) {cryptonight_v8_reversewaltz, cryptonight_v8_reversewaltz, CN_WALTZ_ITER, CN_MEMORY} }}; - static std::array derived_pow = + static std::array derived_pow = {{ {cryptonight_turtle, cryptonight_monero_v8, CN_ITER/8, CN_MEMORY/8, CN_TURTLE_MASK}, {cryptonight_v8_half, cryptonight_monero_v8, CN_ITER/2, CN_MEMORY}, - {cryptonight_v8_zelerius, cryptonight_monero_v8, CN_ZELERIUS_ITER, CN_MEMORY} + {cryptonight_v8_zelerius, cryptonight_monero_v8, CN_ZELERIUS_ITER, CN_MEMORY}, + {cryptonight_v8_double, cryptonight_monero_v8, CN_DOUBLE_ITER, CN_MEMORY} // {cryptonight_derived} }}; diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp index c8174df32..d0e5237f2 100644 --- a/xmrstak/backend/miner_work.hpp +++ b/xmrstak/backend/miner_work.hpp @@ -15,7 +15,7 @@ namespace xmrstak struct miner_work { char sJobID[64]; - uint8_t bWorkBlob[112]; + uint8_t bWorkBlob[128]; uint32_t iWorkSize; uint64_t iTarget; bool bNiceHash; @@ -28,7 +28,7 @@ namespace xmrstak miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, uint64_t iTarget, bool bNiceHash, size_t iPoolId, uint64_t iBlockHeiht) : iWorkSize(iWorkSize), - iTarget(iTarget), bNiceHash(bNiceHash), bStall(false), iPoolId(iPoolId), iBlockHeight(iBlockHeiht), ref_ptr((uint8_t*)&iBlockHeight) + iTarget(iTarget), bNiceHash(bNiceHash), bStall(false), iPoolId(iPoolId), iBlockHeight(iBlockHeiht), ref_ptr((uint8_t*)&iBlockHeight) { assert(iWorkSize <= sizeof(bWorkBlob)); memcpy(this->bWorkBlob, bWork, iWorkSize); @@ -36,7 +36,7 @@ namespace xmrstak } miner_work(miner_work&& from) : iWorkSize(from.iWorkSize), iTarget(from.iTarget), - bStall(from.bStall), iPoolId(from.iPoolId), iBlockHeight(from.iBlockHeight), ref_ptr((uint8_t*)&iBlockHeight) + bStall(from.bStall), iPoolId(from.iPoolId), iBlockHeight(from.iBlockHeight), ref_ptr((uint8_t*)&iBlockHeight) { assert(iWorkSize <= sizeof(bWorkBlob)); memcpy(bWorkBlob, from.bWorkBlob, iWorkSize); diff --git a/xmrstak/backend/nvidia/CudaCryptonightR_gen.cpp b/xmrstak/backend/nvidia/CudaCryptonightR_gen.cpp index 87eb05540..f1bf75819 100644 --- a/xmrstak/backend/nvidia/CudaCryptonightR_gen.cpp +++ b/xmrstak/backend/nvidia/CudaCryptonightR_gen.cpp @@ -153,6 +153,7 @@ static void CryptonightR_build_program( std::string& lowered_name, const xmrstak_algo& algo, uint64_t height, + uint32_t precompile_count, int arch_major, int arch_minor, std::string source) @@ -164,7 +165,7 @@ static void CryptonightR_build_program( for (size_t i = 0; i < CryptonightR_cache.size();) { const CacheEntry& entry = CryptonightR_cache[i]; - if ((entry.algo == algo) && (entry.height + 2 < height)) + if ((entry.algo == algo) && (entry.height + 2 + precompile_count < height)) { printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu released (old program)", entry.height); CryptonightR_cache[i] = std::move(CryptonightR_cache.back()); @@ -273,10 +274,10 @@ static void CryptonightR_build_program( CryptonightR_cache_mutex.UnLock(); } -void CryptonightR_get_program(std::vector& ptx, std::string& lowered_name, const xmrstak_algo algo, uint64_t height, int arch_major, int arch_minor, bool background) +void CryptonightR_get_program(std::vector& ptx, std::string& lowered_name, const xmrstak_algo algo, uint64_t height, uint32_t precompile_count, int arch_major, int arch_minor, bool background) { if (background) { - background_exec([=]() { std::vector tmp; std::string s; CryptonightR_get_program(tmp, s, algo, height, arch_major, arch_minor, false); }); + background_exec([=]() { std::vector tmp; std::string s; CryptonightR_get_program(tmp, s, algo, height, precompile_count, arch_major, arch_minor, false); }); return; } @@ -329,7 +330,7 @@ void CryptonightR_get_program(std::vector& ptx, std::string& lowered_name, CryptonightR_cache_mutex.UnLock(); } - CryptonightR_build_program(ptx, lowered_name, algo, height, arch_major, arch_minor, source_code); + CryptonightR_build_program(ptx, lowered_name, algo, height, precompile_count, arch_major, arch_minor, source_code); } } // namespace xmrstak diff --git a/xmrstak/backend/nvidia/CudaCryptonightR_gen.hpp b/xmrstak/backend/nvidia/CudaCryptonightR_gen.hpp index e214647b9..c3d8827b0 100644 --- a/xmrstak/backend/nvidia/CudaCryptonightR_gen.hpp +++ b/xmrstak/backend/nvidia/CudaCryptonightR_gen.hpp @@ -29,7 +29,7 @@ namespace nvidia { void CryptonightR_get_program(std::vector& ptx, std::string& lowered_name, - const xmrstak_algo algo, uint64_t height, int arch_major, int arch_minor, bool background = false); + const xmrstak_algo algo, uint64_t height, uint32_t precompile_count, int arch_major, int arch_minor, bool background = false); } // namespace xmrstak diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp index a50dd30cc..80615d7a3 100644 --- a/xmrstak/backend/nvidia/minethd.cpp +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -285,7 +285,7 @@ void minethd::work_main() for(size_t i = 0; i < foundCount; i++) { - uint8_t bWorkBlob[112]; + uint8_t bWorkBlob[128]; uint8_t bResult[32]; memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 3c62bd090..718cff0c7 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -1033,9 +1033,11 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, ui if(ctx->module) cuModuleUnload(ctx->module); + uint32_t PRECOMPILATION_DEPTH = 4; + std::vector ptx; std::string lowered_name; - xmrstak::nvidia::CryptonightR_get_program(ptx, lowered_name, miner_algo, chain_height, ctx->device_arch[0], ctx->device_arch[1]); + xmrstak::nvidia::CryptonightR_get_program(ptx, lowered_name, miner_algo, chain_height, PRECOMPILATION_DEPTH, ctx->device_arch[0], ctx->device_arch[1]); CU_CHECK(ctx->device_id, cuModuleLoadDataEx(&ctx->module, ptx.data(), 0, 0, 0)); CU_CHECK(ctx->device_id, cuModuleGetFunction(&ctx->kernel, ctx->module, lowered_name.c_str())); @@ -1043,7 +1045,9 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, ui ctx->kernel_height = chain_height; ctx->cached_algo = miner_algo; - xmrstak::nvidia::CryptonightR_get_program(ptx, lowered_name, miner_algo, chain_height + 1, ctx->device_arch[0], ctx->device_arch[1], true); + for (int i = 1; i <= PRECOMPILATION_DEPTH; ++i) + xmrstak::nvidia::CryptonightR_get_program(ptx, lowered_name, miner_algo, + chain_height + i, PRECOMPILATION_DEPTH, ctx->device_arch[0], ctx->device_arch[1], true); } } diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index e909e2fa3..b6e41c619 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -112,7 +112,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric uint32_t ctx_b[4]; uint32_t ctx_key1[40]; uint32_t ctx_key2[40]; - uint32_t input[21]; + uint32_t input[32]; memcpy( input, d_input, len ); //*((uint32_t *)(((char *)input) + 39)) = startNonce + thread; @@ -349,7 +349,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_a, 4 * sizeof(uint32_t) * wsize)); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, ctx_b_size)); // POW block format http://monero.wikia.com/wiki/PoW_Block_Header_Format - CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) )); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 32 * sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) )); CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) )); CUDA_CHECK_MSG( diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp index c75c74964..3f535631d 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp @@ -103,54 +103,7 @@ __device__ __forceinline__ void cn_keccakf(uint64_t *s) { uint64_t bc[5], tmpxor[5], tmp1, tmp2; - tmpxor[0] = s[0] ^ s[5]; - tmpxor[1] = s[1] ^ s[6] ^ 0x8000000000000000ULL; - tmpxor[2] = s[2] ^ s[7]; - tmpxor[3] = s[3] ^ s[8]; - tmpxor[4] = s[4] ^ s[9]; - - bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1); - bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1); - bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1); - bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1); - bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1); - - tmp1 = s[1] ^ bc[0]; - - s[0] ^= bc[4]; - s[1] = rotl64_2(s[6] ^ bc[0], 12); - s[6] = rotl64_1(s[9] ^ bc[3], 20); - s[9] = rotl64_2(bc[1], 29); - s[22] = rotl64_2(bc[3], 7); - s[14] = rotl64_1(bc[4], 18); - s[20] = rotl64_2(s[2] ^ bc[1], 30); - s[2] = rotl64_2(bc[1], 11); - s[12] = rotl64_1(bc[2], 25); - s[13] = rotl64_1(bc[3], 8); - s[19] = rotl64_2(bc[2], 24); - s[23] = rotl64_2(bc[4], 9); - s[15] = rotl64_1(s[4] ^ bc[3], 27); - s[4] = rotl64_1(bc[3], 14); - s[24] = rotl64_1(bc[0], 2); - s[21] = rotl64_2(s[8] ^ bc[2], 23); - s[8] = rotl64_2(0x8000000000000000ULL ^ bc[0], 13); - s[16] = rotl64_2(s[5] ^ bc[4], 4); - s[5] = rotl64_1(s[3] ^ bc[2], 28); - s[3] = rotl64_1(bc[2], 21); - s[18] = rotl64_1(bc[1], 15); - s[17] = rotl64_1(bc[0], 10); - s[11] = rotl64_1(s[7] ^ bc[1], 6); - s[7] = rotl64_1(bc[4], 3); - s[10] = rotl64_1(tmp1, 1); - - tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1); - tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1); - tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1); - tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1); - tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1); - s[0] ^= 0x0000000000000001; - - for(int i = 1; i < 24; ++i) + for(int i = 0; i < 24; ++i) { tmpxor[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; tmpxor[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp index 5a8a51703..8335f6fe8 100644 --- a/xmrstak/cli/cli-miner.cpp +++ b/xmrstak/cli/cli-miner.cpp @@ -860,8 +860,8 @@ int do_benchmark(int block_version, int wait_sec, int work_sec) printer::inst()->print_msg(L0, "Prepare benchmark for block version %d", block_version); - uint8_t work[112]; - memset(work,0,112); + uint8_t work[128]; + memset(work,0,128); work[0] = static_cast(block_version); xmrstak::pool_data dat; @@ -872,15 +872,14 @@ int do_benchmark(int block_version, int wait_sec, int work_sec) printer::inst()->print_msg(L0, "Wait %d sec until all backends are initialized",wait_sec); std::this_thread::sleep_for(std::chrono::seconds(wait_sec)); - /* AMD and NVIDIA is currently only supporting work sizes up to 84byte - * \todo fix this issue + /* AMD and NVIDIA is currently only supporting work sizes up to 128byte */ printer::inst()->print_msg(L0, "Start a %d second benchmark...",work_sec); - xmrstak::globalStates::inst().switch_work(xmrstak::miner_work("", work, 84, 0, false, 0, 0), dat); + xmrstak::globalStates::inst().switch_work(xmrstak::miner_work("", work, 128, 0, false, 0, 0), dat); uint64_t iStartStamp = get_timestamp_ms(); std::this_thread::sleep_for(std::chrono::seconds(work_sec)); - xmrstak::globalStates::inst().switch_work(xmrstak::miner_work("", work, 84, 0, false, 0, 0), dat); + xmrstak::globalStates::inst().switch_work(xmrstak::miner_work("", work, 128, 0, false, 0, 0), dat); double fTotalHps = 0.0; for (uint32_t i = 0; i < pvThreads->size(); i++) diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index 2b22a2fb9..5e3384a63 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -98,11 +98,12 @@ xmrstak::coin_selection coins[] = { { "cryptonight_lite", {POW(cryptonight_lite)}, {POW(cryptonight_aeon)}, nullptr }, { "cryptonight_lite_v7", {POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, nullptr }, { "cryptonight_lite_v7_xor", {POW(cryptonight_ipbc)}, {POW(cryptonight_aeon)}, nullptr }, - { "cryptonight_r", {POW(cryptonight_r)}, {POW(cryptonight_r),10,POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_r", {POW(cryptonight_r)}, {POW(cryptonight_r)}, nullptr }, { "cryptonight_superfast", {POW(cryptonight_superfast)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_turtle", {POW(cryptonight_turtle)}, {POW(cryptonight_turtle)}, nullptr }, { "cryptonight_v7", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, - { "cryptonight_v8", {POW(cryptonight_monero_v8)}, {POW(cryptonight_r),10,POW(cryptonight_monero_v8)}, nullptr }, + { "cryptonight_v8", {POW(cryptonight_monero_v8)}, {POW(cryptonight_r)}, nullptr }, + { "cryptonight_v8_double", {POW(cryptonight_v8_double)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8_half", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8_reversewaltz", {POW(cryptonight_v8_reversewaltz)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8_zelerius", {POW(cryptonight_v8_zelerius)},{POW(cryptonight_gpu)}, nullptr }, @@ -114,13 +115,14 @@ xmrstak::coin_selection coins[] = { { "haven", {POW(cryptonight_haven)}, {POW(cryptonight_gpu)}, nullptr }, { "lethean", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, { "masari", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, - { "monero", {POW(cryptonight_r),10,POW(cryptonight_monero_v8)}, {POW(cryptonight_r),10,POW(cryptonight_monero_v8)}, "pool.usxmrpool.com:3333" }, + { "monero", {POW(cryptonight_r)}, {POW(cryptonight_r)}, "pool.usxmrpool.com:3333" }, { "qrl", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, { "ryo", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, "pool.ryo-currency.com:3333" }, { "stellite", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, { "turtlecoin", {POW(cryptonight_turtle), 6u,POW(cryptonight_aeon)}, {POW(cryptonight_aeon)}, nullptr }, { "plenteum", {POW(cryptonight_turtle)}, {POW(cryptonight_turtle)}, nullptr }, - { "zelerius", {POW(cryptonight_v8_zelerius), 7, POW(cryptonight_monero_v8)}, {POW(cryptonight_gpu)}, nullptr } + { "zelerius", {POW(cryptonight_v8_zelerius), 7, POW(cryptonight_monero_v8)}, {POW(cryptonight_gpu)}, nullptr }, + { "xcash", {POW(cryptonight_v8_double)}, {POW(cryptonight_gpu)}, nullptr } }; constexpr size_t coin_algo_size = (sizeof(coins)/sizeof(coins[0])); diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 5eb3c20a4..83c92e058 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -1118,6 +1118,7 @@ void executor::http_result_report(std::string& out) } snprintf(buffer, sizeof(buffer), sHtmlResultBodyHigh, + jconf::inst()->GetMiningCoin().c_str(), iPoolDiff, iGoodRes, iTotalRes, fGoodResPrc, fAvgResTime, iPoolHashes, int_port(iTopDiff[0]), int_port(iTopDiff[1]), int_port(iTopDiff[2]), int_port(iTopDiff[3]), int_port(iTopDiff[4]), int_port(iTopDiff[5]), int_port(iTopDiff[6]), int_port(iTopDiff[7]), @@ -1163,7 +1164,7 @@ void executor::http_connection_report(std::string& out) } snprintf(buffer, sizeof(buffer), sHtmlConnectionBodyHigh, - jconf::inst()->GetMiningCoin().c_str(), + pool != nullptr ? pool->get_rigid() : "", pool != nullptr ? pool->get_pool_addr() : "not connected", cdate, ping_time); out.append(buffer); diff --git a/xmrstak/net/msgstruct.hpp b/xmrstak/net/msgstruct.hpp index 813fc7d06..33980bf42 100644 --- a/xmrstak/net/msgstruct.hpp +++ b/xmrstak/net/msgstruct.hpp @@ -12,7 +12,7 @@ struct pool_job { char sJobID[64]; - uint8_t bWorkBlob[112]; + uint8_t bWorkBlob[128]; uint64_t iTarget; uint32_t iWorkLen; uint32_t iSavedNonce; diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index b1fd0e70b..ea3a276aa 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -32,6 +32,7 @@ POOLCONF], * ryo * turtlecoin * plenteum + * xcash * * Native algorithms which do not depend on any block versions: * @@ -47,6 +48,7 @@ POOLCONF], * cryptonight_superfast * cryptonight_v7 * cryptonight_v8 + * cryptonight_v8_double (used by xcash) * cryptonight_v8_half (used by masari and stellite) * cryptonight_v8_reversewaltz (used by graft) * cryptonight_v8_zelerius diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index 01a0dcd60..644f82b19 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -18,7 +18,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.10.0-hide-3.1.0" +#define XMR_STAK_VERSION "2.10.1-hide-3.1.1" #if defined(_WIN32) #define OS_TYPE "win"