Skip to content

Commit 1702509

Browse files
committed
consistency
1 parent 7f36347 commit 1702509

File tree

3 files changed

+68
-64
lines changed

3 files changed

+68
-64
lines changed

examples/cuda/getting_started.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <math.h>
2525
#include <stdio.h>
2626
#include <stdlib.h>
27+
#include <vector>
2728

2829
static const double PI = 3.141592653589793238462643383279502884;
2930

@@ -35,9 +36,9 @@ int main() {
3536
int64_t modes[1] = {N};
3637

3738
// Host pointers: frequencies (x), coefficients (c), and output (f).
38-
float *x;
39-
float _Complex *c;
40-
float _Complex *f;
39+
std::vector<float> x(M);
40+
std::vector<float _Complex> c(M);
41+
std::vector<float _Complex> f(N);
4142

4243
// Device pointers.
4344
float *d_x;
@@ -51,10 +52,6 @@ int main() {
5152
float _Complex f0;
5253

5354
// Allocate the host arrays.
54-
x = (float *)malloc(M * sizeof(float));
55-
c = (float _Complex *)malloc(M * sizeof(float _Complex));
56-
f = (float _Complex *)malloc(N * sizeof(float _Complex));
57-
5855
// Fill with random numbers. Frequencies must be in the interval [-pi, pi)
5956
// while strengths can be any value.
6057
srand(0);
@@ -70,8 +67,8 @@ int main() {
7067
cudaMalloc(&d_c, M * sizeof(float _Complex));
7168
cudaMalloc(&d_f, N * sizeof(float _Complex));
7269

73-
cudaMemcpy(d_x, x, M * sizeof(float), cudaMemcpyHostToDevice);
74-
cudaMemcpy(d_c, c, M * sizeof(float _Complex), cudaMemcpyHostToDevice);
70+
cudaMemcpy(d_x, x.data(), M * sizeof(float), cudaMemcpyHostToDevice);
71+
cudaMemcpy(d_c, c.data(), M * sizeof(float _Complex), cudaMemcpyHostToDevice);
7572

7673
// Make the cufinufft plan for a 1D type-1 transform with six digits of
7774
// tolerance.
@@ -85,7 +82,7 @@ int main() {
8582
cufinufftf_execute(plan, d_c, d_f);
8683

8784
// Copy the result back onto the host.
88-
cudaMemcpy(f, d_f, N * sizeof(float _Complex), cudaMemcpyDeviceToHost);
85+
cudaMemcpy(f.data(), d_f, N * sizeof(float _Complex), cudaMemcpyDeviceToHost);
8986

9087
// Destroy the plan and free the device arrays after we're done.
9188
cufinufftf_destroy(plan);
@@ -109,10 +106,5 @@ int main() {
109106

110107
printf("f0[%d] = %lf + %lfi\n", idx, crealf(f0), cimagf(f0));
111108

112-
// Finally free the host arrays.
113-
free(x);
114-
free(c);
115-
free(f);
116-
117109
return 0;
118110
}

perftest/guru_timing_test.cpp

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "finufft/finufft_utils.hpp"
22
#include <finufft/test_defs.h>
3+
#include <memory>
34

45
// for sleep call
56
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
@@ -93,58 +94,73 @@ int main(int argc, char *argv[])
9394
N3 = (N3 == 0) ? 1 : N3;
9495
BIGINT N = N1 * N2 * N3;
9596

96-
FLT *s = nullptr;
97-
FLT *t = nullptr;
98-
FLT *u = nullptr;
97+
std::unique_ptr<FLT[]> s;
98+
std::unique_ptr<FLT[]> t;
99+
std::unique_ptr<FLT[]> u;
99100
if (type == 3) { // make target freq NU pts for type 3 (N of them)...
100-
s = static_cast<FLT *>(std::malloc(sizeof(FLT) * N)); // targ freqs (1-cmpt)
101-
FLT S1 = (FLT)N1 / 2;
101+
s = std::make_unique<FLT[]>(N); // targ freqs (1-cmpt)
102+
FLT *s_ptr = s.get();
103+
FLT S1 = (FLT)N1 / 2;
102104
#pragma omp parallel
103105
{
104106
unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
105107
#pragma omp for schedule(dynamic, TEST_RANDCHUNK)
106108
for (BIGINT k = 0; k < N; ++k) {
107-
s[k] = S1 * (1.7 + randm11r(&se)); // note the offset, to test type 3.
109+
s_ptr[k] = S1 * (1.7 + randm11r(&se)); // note the offset, to test type 3.
108110
}
109-
if (ndim > 1) {
110-
t = static_cast<FLT *>(std::malloc(sizeof(FLT) * N)); // targ freqs (2-cmpt)
111-
FLT S2 = (FLT)N2 / 2;
111+
}
112+
if (ndim > 1) {
113+
t = std::make_unique<FLT[]>(N); // targ freqs (2-cmpt)
114+
FLT *t_ptr = t.get();
115+
FLT S2 = (FLT)N2 / 2;
116+
#pragma omp parallel
117+
{
118+
unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
112119
#pragma omp for schedule(dynamic, TEST_RANDCHUNK)
113120
for (BIGINT k = 0; k < N; ++k) {
114-
t[k] = S2 * (-0.5 + randm11r(&se));
121+
t_ptr[k] = S2 * (-0.5 + randm11r(&se));
115122
}
116123
}
117-
if (ndim > 2) {
118-
u = static_cast<FLT *>(std::malloc(sizeof(FLT) * N)); // targ freqs (3-cmpt)
119-
FLT S3 = (FLT)N3 / 2;
124+
}
125+
if (ndim > 2) {
126+
u = std::make_unique<FLT[]>(N); // targ freqs (3-cmpt)
127+
FLT *u_ptr = u.get();
128+
FLT S3 = (FLT)N3 / 2;
129+
#pragma omp parallel
130+
{
131+
unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
120132
#pragma omp for schedule(dynamic, TEST_RANDCHUNK)
121133
for (BIGINT k = 0; k < N; ++k) {
122-
u[k] = S3 * (0.9 + randm11r(&se));
134+
u_ptr[k] = S3 * (0.9 + randm11r(&se));
123135
}
124136
}
125137
}
126138
}
127139

128-
CPX *c = static_cast<CPX *>(std::malloc(sizeof(CPX) * M * ntransf)); // strengths
129-
CPX *F = static_cast<CPX *>(std::malloc(sizeof(CPX) * N * ntransf)); // mode ampls
130-
131-
FLT *x = static_cast<FLT *>(std::malloc(sizeof(FLT) * M)),
132-
*y = nullptr,
133-
*z = nullptr; // NU pts x coords
134-
if (ndim > 1) y = static_cast<FLT *>(std::malloc(sizeof(FLT) * M)); // NU pts y coords
135-
if (ndim > 2) z = static_cast<FLT *>(std::malloc(sizeof(FLT) * M)); // NU pts z coords
140+
auto c = std::make_unique<CPX[]>(M * ntransf); // strengths
141+
auto F = std::make_unique<CPX[]>(N * ntransf); // mode ampls
142+
143+
auto x = std::make_unique<FLT[]>(M);
144+
std::unique_ptr<FLT[]> y;
145+
std::unique_ptr<FLT[]> z; // NU pts coords
146+
if (ndim > 1) y = std::make_unique<FLT[]>(M); // NU pts y coords
147+
if (ndim > 2) z = std::make_unique<FLT[]>(M); // NU pts z coords
148+
FLT *x_ptr = x.get();
149+
FLT *y_ptr = y.get();
150+
FLT *z_ptr = z.get();
151+
CPX *c_ptr = c.get();
136152
#pragma omp parallel
137153
{
138154
unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
139155
#pragma omp for schedule(dynamic, TEST_RANDCHUNK)
140156
for (BIGINT j = 0; j < M; ++j) {
141-
x[j] = PI * randm11r(&se);
142-
if (y) y[j] = PI * randm11r(&se);
143-
if (z) z[j] = PI * randm11r(&se);
157+
x_ptr[j] = PI * randm11r(&se);
158+
if (y_ptr) y_ptr[j] = PI * randm11r(&se);
159+
if (z_ptr) z_ptr[j] = PI * randm11r(&se);
144160
}
145161
#pragma omp for schedule(dynamic, TEST_RANDCHUNK)
146162
for (BIGINT i = 0; i < ntransf * M; i++) // random strengths
147-
c[i] = crandm11r(&se);
163+
c_ptr[i] = crandm11r(&se);
148164
}
149165

150166
// Andrea found the following are needed to get reliable independent timings:
@@ -177,7 +193,15 @@ int main(int argc, char *argv[])
177193
}
178194

179195
timer.restart(); // Guru Step 2
180-
ier = FINUFFT_SETPTS(plan, M, x, y, z, N, s, t, u); //(t1,2: N,s,t,u ignored)
196+
ier = FINUFFT_SETPTS(plan,
197+
M,
198+
x.get(),
199+
y.get(),
200+
z.get(),
201+
N,
202+
s.get(),
203+
t.get(),
204+
u.get()); //(t1,2: N,s,t,u ignored)
181205
double sort_t = timer.elapsedsec();
182206
if (ier) {
183207
std::printf("error (ier=%d)!\n", ier);
@@ -193,7 +217,7 @@ int main(int argc, char *argv[])
193217
}
194218

195219
timer.restart(); // Guru Step 3
196-
ier = FINUFFT_EXECUTE(plan, c, F);
220+
ier = FINUFFT_EXECUTE(plan, c.get(), F.get());
197221
double exec_t = timer.elapsedsec();
198222
if (ier) {
199223
std::printf("error (ier=%d)!\n", ier);
@@ -235,7 +259,8 @@ int main(int argc, char *argv[])
235259
// this used to actually call Alex's old (v1.1) src/finufft?d.cpp routines.
236260
// Since we don't want to ship those, we now call the simple interfaces.
237261

238-
double simpleTime = many_simple_calls(c, F, x, y, z, plan);
262+
double simpleTime =
263+
many_simple_calls(c.get(), F.get(), x.get(), y.get(), z.get(), plan);
239264
if (std::isnan(simpleTime)) return 1;
240265

241266
if (type != 3)
@@ -263,15 +288,6 @@ int main(int argc, char *argv[])
263288
// (must be done *after* many_simple_calls, which sneaks a look at the plan!)
264289
// however, segfaults, maybe because plan->opts.debug changed?
265290

266-
//---------------------------- Free Memory (no need to test if NULL)
267-
std::free(F);
268-
std::free(c);
269-
std::free(x);
270-
std::free(y);
271-
std::free(z);
272-
std::free(s);
273-
std::free(t);
274-
std::free(u);
275291
return 0;
276292
}
277293

perftest/manysmallprobs.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <complex>
2+
#include <vector>
23

34
// public header
45
#include "finufft.h"
@@ -35,17 +36,15 @@ int main(int argc, char *argv[])
3536
int ier;
3637

3738
// generate some random nonuniform points (x) and complex strengths (c):
38-
double *x = static_cast<double *>(std::malloc(sizeof(double) * M));
39-
std::complex<double> *c =
40-
static_cast<std::complex<double> *>(std::malloc(sizeof(std::complex<double>) * M));
39+
std::vector<double> x(M);
40+
std::vector<std::complex<double>> c(M);
4141
for (int j = 0; j < M; ++j) {
4242
x[j] = PI * (2 * (static_cast<double>(std::rand()) / RAND_MAX) - 1); // uniform random in [-pi,pi]
4343
c[j] = 2 * (static_cast<double>(std::rand()) / RAND_MAX) - 1 +
4444
I * (2 * (static_cast<double>(std::rand()) / RAND_MAX) - 1);
4545
}
4646
// allocate output array for the Fourier modes:
47-
std::complex<double> *F = static_cast<std::complex<double> *>(
48-
std::malloc(sizeof(std::complex<double>) * N));
47+
std::vector<std::complex<double>> F(N);
4948

5049
std::printf("repeatedly calling the simple interface: --------------------- \n");
5150
CNTime timer;
@@ -54,7 +53,7 @@ int main(int argc, char *argv[])
5453
// printf("rep %d\n",r);
5554
x[0] = PI * (-1.0 + 2 * static_cast<double>(r) / static_cast<double>(reps)); // one source jiggles around
5655
c[0] = (1.0 + I) * static_cast<double>(r) / static_cast<double>(reps); // one coeff also jiggles
57-
ier = finufft1d1(M, x, c, +1, acc, N, F, nullptr);
56+
ier = finufft1d1(M, x.data(), c.data(), +1, acc, N, F.data(), nullptr);
5857
}
5958
// (note this can't use the many-vectors interface since the NU change)
6059
std::complex<double> y = F[0]; // actually use the data so not optimized away
@@ -79,9 +78,9 @@ int main(int argc, char *argv[])
7978
for (int r = 0; r < reps; ++r) { // set the pts and execute
8079
x[0] = PI * (-1.0 + 2 * static_cast<double>(r) / static_cast<double>(reps)); // one source jiggles around
8180
// (of course if most sources *were* in fact fixed, use ZGEMM for them!)
82-
finufft_setpts(plan, M, x, nullptr, nullptr, 0, nullptr, nullptr, nullptr);
81+
finufft_setpts(plan, M, x.data(), nullptr, nullptr, 0, nullptr, nullptr, nullptr);
8382
c[0] = (1.0 + I) * static_cast<double>(r) / static_cast<double>(reps); // one coeff also jiggles
84-
ier = finufft_execute(plan, c, F);
83+
ier = finufft_execute(plan, c.data(), F.data());
8584
}
8685
finufft_destroy(plan);
8786
y = F[0];
@@ -93,8 +92,5 @@ int main(int argc, char *argv[])
9392
ier,
9493
std::real(y),
9594
std::imag(y));
96-
std::free(x);
97-
std::free(c);
98-
std::free(F);
9995
return ier;
10096
}

0 commit comments

Comments
 (0)