Skip to content

Commit

Permalink
avoid unnecessary memcpy
Browse files Browse the repository at this point in the history
  • Loading branch information
KlausT committed Sep 17, 2016
1 parent 80c0909 commit bfe8a1b
Show file tree
Hide file tree
Showing 5 changed files with 749 additions and 752 deletions.
46 changes: 22 additions & 24 deletions cryptonight/cuda_cryptonight_blake.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,29 @@ typedef struct {
v[c] += v[d]; \
v[b] = BLAKE_ROT(v[b] ^ v[c], 7);

__constant__ uint8_t d_blake_sigma[14][16];
__constant__ uint32_t d_blake_cst[16];

const uint8_t h_blake_sigma[14][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
{ 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
{ 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
{12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
{13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
{ 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
{10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
__constant__ uint8_t d_blake_sigma[14][16] =
{
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}
};

const uint32_t h_blake_cst[16] = {
0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
__constant__ uint32_t d_blake_cst[16]
= {
0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
};

__device__ void cn_blake_compress(blake_state * __restrict__ S, const uint8_t * __restrict__ block)
Expand Down
12 changes: 2 additions & 10 deletions cryptonight/cuda_cryptonight_extra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ static uint32_t *d_resultNonce[8];
#include "cuda_cryptonight_jh.cu"
#include "cuda_cryptonight_skein.cu"

__constant__ uint8_t d_sub_byte[16][16];

const uint8_t sub_byte[16][16] = {
__constant__ uint8_t d_sub_byte[16][16] =
{
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76},
{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0},
{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15},
Expand Down Expand Up @@ -172,13 +171,6 @@ __host__ void cryptonight_extra_cpu_init(int thr_id)
cudaMalloc(&d_input[thr_id], 19 * sizeof(uint32_t));
cudaMalloc(&d_target[thr_id], 8 * sizeof(uint32_t));
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
cudaMemcpyToSymbol(keccakf_rndc, h_keccakf_rndc, sizeof(h_keccakf_rndc), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_sub_byte, sub_byte, sizeof(sub_byte), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_blake_sigma, h_blake_sigma, sizeof(h_blake_sigma), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_blake_cst, h_blake_cst, sizeof(h_blake_cst), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_groestl_T, h_groestl_T, sizeof(h_groestl_T), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_JH256_H0, h_JH256_H0, sizeof(h_JH256_H0), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_E8_rc, h_E8_rc, sizeof(h_E8_rc), 0, cudaMemcpyHostToDevice);
exit_if_cudaerror(thr_id, __FILE__, __LINE__);
}

Expand Down
Loading

0 comments on commit bfe8a1b

Please sign in to comment.