diff --git a/cpu-miner.c b/cpu-miner.c index 6948e52..e118b9e 100755 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -1320,8 +1320,9 @@ static void *miner_thread(void *userdata) hashes_done = 0; gettimeofday(&tv_start, NULL); + uint32_t results[2]; /* scan nonces for a proof-of-work hash */ - rc = scanhash_cryptonight(thr_id, work.data, work.target, max_nonce, &hashes_done); + rc = scanhash_cryptonight(thr_id, work.data, work.target, max_nonce, &hashes_done, results); /* record scanhash elapsed time */ gettimeofday(&tv_end, NULL); @@ -1367,8 +1368,18 @@ static void *miner_thread(void *userdata) } /* if nonce found, submit work */ - if(rc && !opt_benchmark && !submit_work(mythr, &work)) - break; + if(rc && !opt_benchmark) + { + uint32_t backup = *nonceptr; + *nonceptr = results[0]; + submit_work(mythr, &work); + if(rc > 1) + { + *nonceptr = results[1]; + submit_work(mythr, &work); + } + *nonceptr = backup; + } } out: diff --git a/cryptonight/cryptonight.cu b/cryptonight/cryptonight.cu index c75fb3a..aa94193 100755 --- a/cryptonight/cryptonight.cu +++ b/cryptonight/cryptonight.cu @@ -138,9 +138,10 @@ extern "C" void cryptonight_hash(void* output, const void* input, size_t len); extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, - unsigned long *hashes_done) + unsigned long *hashes_done, uint32_t *results) { cudaError_t err; + int res; uint32_t *nonceptr = (uint32_t*)(((char*)pdata) + 39); const uint32_t first_nonce = *nonceptr; uint32_t nonce = *nonceptr; @@ -190,31 +191,48 @@ extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata, do { - uint32_t foundNonce; + uint32_t foundNonce[2]; cryptonight_extra_cpu_prepare(thr_id, throughput, nonce, d_ctx[thr_id]); cryptonight_core_cpu_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx[thr_id]); - cryptonight_extra_cpu_final(thr_id, throughput, nonce, &foundNonce, d_ctx[thr_id]); + cryptonight_extra_cpu_final(thr_id, throughput, nonce, foundNonce, d_ctx[thr_id]); - if(foundNonce < 0xffffffff) + if(foundNonce[0] < 0xffffffff) { uint32_t vhash64[8] = {0, 0, 0, 0, 0, 0, 0, 0}; uint32_t tempdata[19]; memcpy(tempdata, pdata, 76); uint32_t *tempnonceptr = (uint32_t*)(((char*)tempdata) + 39); - *tempnonceptr = foundNonce; + *tempnonceptr = foundNonce[0]; #if !defined _WIN64 && !defined _LP64 /* hash is broken in 64bit builds */ cryptonight_hash(vhash64, tempdata, 76); #endif if((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget)) { - *nonceptr = foundNonce; + res = 1; + results[0] = foundNonce[0]; *hashes_done = nonce - first_nonce + throughput; - return 1; + if(foundNonce[1] < 0xffffffff) + { + *tempnonceptr = foundNonce[1]; +#if !defined _WIN64 && !defined _LP64 /* hash is broken in 64bit builds */ + cryptonight_hash(vhash64, tempdata, 76); +#endif + if((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget)) + { + res++; + results[1] = foundNonce[1]; + } + else + { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce[1]); + } + } + return res; } else { - applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce); + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce[0]); } } if(nonce > 0xffffffff - throughput) diff --git a/cryptonight/cuda_cryptonight_extra.cu b/cryptonight/cuda_cryptonight_extra.cu index e4dfbb0..b5c25bc 100755 --- a/cryptonight/cuda_cryptonight_extra.cu +++ b/cryptonight/cuda_cryptonight_extra.cu @@ -95,14 +95,14 @@ __global__ void cryptonight_extra_gpu_prepare(int threads, uint32_t * __restrict } } -__global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, uint32_t * __restrict__ d_target, uint32_t * __restrict__ resNonce, struct cryptonight_gpu_ctx * __restrict__ d_ctx) +__global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, const uint32_t * __restrict__ d_target, uint32_t * __restrict__ resNonce, struct cryptonight_gpu_ctx * __restrict__ d_ctx) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + const int thread = blockDim.x * blockIdx.x + threadIdx.x; if(thread < threads) { int i; - uint32_t nonce = startNonce + thread; + const uint32_t nonce = startNonce + thread; struct cryptonight_gpu_ctx *ctx = &d_ctx[thread]; uint32_t hash[8]; uint32_t state[50]; @@ -153,7 +153,11 @@ __global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, ui } if(rc == true) - resNonce[0] = nonce; + { + uint32_t tmp = atomicExch(resNonce, nonce); + if(tmp != 0xffffffff) + resNonce[1] = tmp; + } } } @@ -168,7 +172,7 @@ __host__ void cryptonight_extra_cpu_init(int thr_id) { cudaMalloc(&d_input[thr_id], 19 * sizeof(uint32_t)); cudaMalloc(&d_target[thr_id], 8 * sizeof(uint32_t)); - cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); + cudaMalloc(&d_resultNonce[thr_id], 2*sizeof(uint32_t)); exit_if_cudaerror(thr_id, __FILE__, __LINE__); } @@ -190,13 +194,13 @@ __host__ void cryptonight_extra_cpu_final(int thr_id, int threads, uint32_t star dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); - cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); + cudaMemset(d_resultNonce[thr_id], 0xFF, 2*sizeof(uint32_t)); exit_if_cudaerror(thr_id, __FILE__, __LINE__); cryptonight_extra_gpu_final << > >(threads, startNonce, d_target[thr_id], d_resultNonce[thr_id], d_ctx); exit_if_cudaerror(thr_id, __FILE__, __LINE__); - cudaMemcpy(resnonce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + cudaMemcpy(resnonce, d_resultNonce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost); exit_if_cudaerror(thr_id, __FILE__, __LINE__); } diff --git a/miner.h b/miner.h index f190614..a4274a3 100755 --- a/miner.h +++ b/miner.h @@ -196,7 +196,7 @@ void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap); extern int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, - unsigned long *hashes_done); + unsigned long *hashes_done, uint32_t *results); extern void cryptonight_hash(void* output, const void* input, size_t len);