-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgeneraterandom.cu
72 lines (61 loc) · 1.98 KB
/
generaterandom.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <curand.h>
#include <cstdio>
#include <time.h>
#include <ctime>
#include <curand_kernel.h>
#include <cuda.h>
#include "errorchecking.cu"
__global__ void initialise_curand_on_kernels3(curandState * state, unsigned long seed)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
curand_init(seed, idx, 0, &state[idx]);
}
__device__ float generate3(curandState* globalState, int ind)
{
//copy state to local mem
curandState localState = globalState[ind];
//apply uniform distribution with calculated random
float rndval = curand_uniform(&localState);
//update state
globalState[ind] = localState;
//return value
return rndval;
}
__global__ void set_random_number_from_kernels3(float* _ptr, curandState* globalState, const unsigned int _points)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
//only call gen on the kernels we have inited
//(one per device container element)
if (idx < _points)
{
float x = generate3(globalState, idx);
printf("float %f block %d\n", x, blockIdx.x);
_ptr[idx] = x;
}
}
int tee() {
srand(time(NULL));
//naively setting the threads per block and block per grid sizes, where 100 is the amount of rngs
int threadsPerBlock = 512;
int nBlocks = 300 / threadsPerBlock + 1;
printf("# of blocks", nBlocks);
//alocate space for each kernels curandState
curandState* deviceStates;
cudaMalloc(&deviceStates, nBlocks * sizeof(curandState));
CudaCheckError();
//call curand_init on each kernel with the same random seed
//and init the rng states
initialise_curand_on_kernels3 << <nBlocks, threadsPerBlock >> > (deviceStates, unsigned(time(NULL)));
CudaCheckError();
//allocate space for the device container of rns
float* d_random_floats;
cudaMalloc((void**)&d_random_floats, sizeof(float) * 50000);
CudaCheckError();
//calculate per element of the container a rn
set_random_number_from_kernels3 << <nBlocks, threadsPerBlock >> > (d_random_floats, deviceStates, 50000);
CudaCheckError();
return 0;
}