diff --git a/cpu-miner.c b/cpu-miner.c
index 6948e52..e118b9e 100755
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -1320,8 +1320,9 @@ static void *miner_thread(void *userdata)
 		hashes_done = 0;
 		gettimeofday(&tv_start, NULL);
 
+		uint32_t results[2];
 		/* scan nonces for a proof-of-work hash */
-		rc = scanhash_cryptonight(thr_id, work.data, work.target,	max_nonce, &hashes_done);
+		rc = scanhash_cryptonight(thr_id, work.data, work.target,	max_nonce, &hashes_done, results);
 
 		/* record scanhash elapsed time */
 		gettimeofday(&tv_end, NULL);
@@ -1367,8 +1368,18 @@ static void *miner_thread(void *userdata)
 		}
 
 		/* if nonce found, submit work */
-		if(rc && !opt_benchmark && !submit_work(mythr, &work))
-			break;
+		if(rc && !opt_benchmark)
+		{
+			uint32_t backup = *nonceptr;
+			*nonceptr = results[0];
+			submit_work(mythr, &work);
+			if(rc > 1)
+			{
+				*nonceptr = results[1];
+				submit_work(mythr, &work);
+			}
+			*nonceptr = backup;
+		}
 	}
 
 out:
diff --git a/cryptonight/cryptonight.cu b/cryptonight/cryptonight.cu
index c75fb3a..aa94193 100755
--- a/cryptonight/cryptonight.cu
+++ b/cryptonight/cryptonight.cu
@@ -138,9 +138,10 @@ extern "C" void cryptonight_hash(void* output, const void* input, size_t len);
 
 extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata,
 																		const uint32_t *ptarget, uint32_t max_nonce,
-																		unsigned long *hashes_done)
+																		unsigned long *hashes_done, uint32_t *results)
 {
 	cudaError_t err;
+	int res;
 	uint32_t *nonceptr = (uint32_t*)(((char*)pdata) + 39);
 	const uint32_t first_nonce = *nonceptr;
 	uint32_t nonce = *nonceptr;
@@ -190,31 +191,48 @@ extern "C" int scanhash_cryptonight(int thr_id, uint32_t *pdata,
 
 	do
 	{
-		uint32_t foundNonce;
+		uint32_t foundNonce[2];
 
 		cryptonight_extra_cpu_prepare(thr_id, throughput, nonce, d_ctx[thr_id]);
 		cryptonight_core_cpu_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx[thr_id]);
-		cryptonight_extra_cpu_final(thr_id, throughput, nonce, &foundNonce, d_ctx[thr_id]);
+		cryptonight_extra_cpu_final(thr_id, throughput, nonce, foundNonce, d_ctx[thr_id]);
 
-		if(foundNonce < 0xffffffff)
+		if(foundNonce[0] < 0xffffffff)
 		{
 			uint32_t vhash64[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 			uint32_t tempdata[19];
 			memcpy(tempdata, pdata, 76);
 			uint32_t *tempnonceptr = (uint32_t*)(((char*)tempdata) + 39);
-			*tempnonceptr = foundNonce;
+			*tempnonceptr = foundNonce[0];
 #if !defined _WIN64 && !defined _LP64 /* hash is broken in 64bit builds */
 			cryptonight_hash(vhash64, tempdata, 76);
 #endif
 			if((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget))
 			{
-				*nonceptr = foundNonce;
+				res = 1;
+				results[0] = foundNonce[0];
 				*hashes_done = nonce - first_nonce + throughput;
-				return 1;
+				if(foundNonce[1] < 0xffffffff)
+				{
+					*tempnonceptr = foundNonce[1];
+#if !defined _WIN64 && !defined _LP64 /* hash is broken in 64bit builds */
+					cryptonight_hash(vhash64, tempdata, 76);
+#endif
+					if((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget))
+					{
+						res++;
+						results[1] = foundNonce[1];
+					}
+					else
+					{
+						applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce[1]);
+					}
+				}
+				return res;
 			}
 			else
 			{
-				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce);
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce[0]);
 			}
 		}
 		if(nonce > 0xffffffff - throughput)
diff --git a/cryptonight/cuda_cryptonight_extra.cu b/cryptonight/cuda_cryptonight_extra.cu
index e4dfbb0..b5c25bc 100755
--- a/cryptonight/cuda_cryptonight_extra.cu
+++ b/cryptonight/cuda_cryptonight_extra.cu
@@ -95,14 +95,14 @@ __global__ void cryptonight_extra_gpu_prepare(int threads, uint32_t * __restrict
 	}
 }
 
-__global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, uint32_t * __restrict__ d_target, uint32_t * __restrict__ resNonce, struct cryptonight_gpu_ctx * __restrict__ d_ctx)
+__global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, const uint32_t * __restrict__ d_target, uint32_t * __restrict__ resNonce, struct cryptonight_gpu_ctx * __restrict__ d_ctx)
 {
-	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	const int thread = blockDim.x * blockIdx.x + threadIdx.x;
 
 	if(thread < threads)
 	{
 		int i;
-		uint32_t nonce = startNonce + thread;
+		const uint32_t nonce = startNonce + thread;
 		struct cryptonight_gpu_ctx *ctx = &d_ctx[thread];
 		uint32_t hash[8];
 		uint32_t state[50];
@@ -153,7 +153,11 @@ __global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, ui
 		}
 
 		if(rc == true)
-			resNonce[0] = nonce;
+		{
+			uint32_t tmp = atomicExch(resNonce, nonce);
+			if(tmp != 0xffffffff)
+				resNonce[1] = tmp;
+		}
 	}
 }
 
@@ -168,7 +172,7 @@ __host__ void cryptonight_extra_cpu_init(int thr_id)
 {
 	cudaMalloc(&d_input[thr_id], 19 * sizeof(uint32_t));
 	cudaMalloc(&d_target[thr_id], 8 * sizeof(uint32_t));
-	cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
+	cudaMalloc(&d_resultNonce[thr_id], 2*sizeof(uint32_t));
 	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
 }
 
@@ -190,13 +194,13 @@ __host__ void cryptonight_extra_cpu_final(int thr_id, int threads, uint32_t star
 	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
 	dim3 block(threadsperblock);
 
-	cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
+	cudaMemset(d_resultNonce[thr_id], 0xFF, 2*sizeof(uint32_t));
 	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
 
 	cryptonight_extra_gpu_final << <grid, block >> >(threads, startNonce, d_target[thr_id], d_resultNonce[thr_id], d_ctx);
 	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
 
-	cudaMemcpy(resnonce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	cudaMemcpy(resnonce, d_resultNonce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost);
 	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
 }
 
diff --git a/miner.h b/miner.h
index f190614..a4274a3 100755
--- a/miner.h
+++ b/miner.h
@@ -196,7 +196,7 @@ void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
 
 extern int scanhash_cryptonight(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
-	unsigned long *hashes_done);
+	unsigned long *hashes_done, uint32_t *results);
 
 extern void cryptonight_hash(void* output, const void* input, size_t len);