Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance the speed of whitenoise; #33

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
88 changes: 79 additions & 9 deletions pmesh/_whitenoise_generics.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
static int
mkname(_has_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs)
{
ptrdiff_t irel[3];
int d;
irel[2] = iabs[2] - self->start[2];
if(irel[2] >= 0 && irel[2] < self->size[2]) return 1;
return 0;
}

static void
mkname(_set_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs, char * delta_k, FLOAT re, FLOAT im)
{
Expand All @@ -19,10 +29,46 @@ mkname(_set_mode)(PMeshWhiteNoiseGenerator * self, ptrdiff_t * iabs, char * delt
static void
mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
{

clock_t start, end;
double cpu_time_used;

start = clock();

/* Fill delta_k with gadget scheme */
int d;
int i, j, k;

int signs[3];

{
int compressed = 1;
ptrdiff_t iabs[3] = {self->start[0], self->start[1], 0};

/* if no negative k modes are requested, do not work with negative sign;
* this saves half of the computing time. */

for(k = self->Nmesh[2] / 2 + 1; k < self->Nmesh[2]; k ++) {
iabs[2] = k;
if (mkname(_has_mode)(self, iabs)) {
compressed = 0;
break;
}
}
printf("compressed = %d\n", compressed);
if (compressed) {
/* only half of the fourier space is requested, ignore the conjugates */
signs[0] = 1;
signs[1] = 0;
signs[2] = 0;
} else {
/* full fourier space field is requested */
/* do negative then positive. ordering is import to makesure the positive overwrites nyquist. */
signs[0] = -1;
signs[1] = 1;
signs[2] = 0;
}
}

gsl_rng * rng = gsl_rng_alloc(gsl_rng_ranlxd1);
gsl_rng_set(rng, seed);

Expand All @@ -46,6 +92,14 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
}
gsl_rng_free(rng);

end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("time used in seeds = %g\n", cpu_time_used);

start = end;
ptrdiff_t skipped = 0;
ptrdiff_t used = 0;

for(i = self->start[0];
i < self->start[0] + self->size[0];
i ++) {
Expand Down Expand Up @@ -73,8 +127,10 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
d2 = 1;
}

int sign; /* sign in the k plane */
for(sign = -1; sign <= 1; sign += 2) {
int isign;
for(isign = 0; signs[isign] != 0; isign ++) {
int sign = signs[isign];

unsigned int seed_lower, seed_this;

/* the lower quadrant generator */
Expand Down Expand Up @@ -103,14 +159,24 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
SAMPLE(this_rng, &ampl, &phase);
}

/* we want two numbers that are of std ~ 1/sqrt(2) */
ampl = sqrt(- log(ampl));
ptrdiff_t iabs[3] = {i, j, k};

/* Unitary gaussian, the norm of real and imag is fixed to 1/sqrt(2) */
if(self->unitary)
ampl = 1.0;
/* mode is not there, skip it */
if(!mkname(_has_mode)(self, iabs)) {
skipped ++;
continue;
} else {
used ++;
}

ptrdiff_t iabs[3] = {i, j, k};
/* we want two numbers that are of std ~ 1/sqrt(2) */
if(self->unitary) {
/* Unitary gaussian, the norm of real and imag is fixed to 1/sqrt(2) */
ampl = 1.0;
} else {
/* box-mueller */
ampl = sqrt(- log(ampl));
}

FLOAT re = ampl * cos(phase);
FLOAT im = ampl * sin(phase);
Expand Down Expand Up @@ -154,6 +220,10 @@ mkname(_generic_fill)(PMeshWhiteNoiseGenerator * self, void * delta_k, int seed)
gsl_rng_free(lower_rng);
gsl_rng_free(this_rng);
}
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("time used in fill = %g\n", cpu_time_used);
printf("skipped = %td used = %td\n", skipped, used);
}

/* Footnotes */
Expand Down
1 change: 1 addition & 0 deletions pmesh/_whitenoise_imp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <time.h>

#include <gsl/config.h>
#include <gsl/gsl_rng.h>
Expand Down
80 changes: 61 additions & 19 deletions pmesh/pm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numbers # for testing Numbers
import warnings

import functools
def is_inplace(out):
return out is Ellipsis

Expand Down Expand Up @@ -412,21 +412,21 @@ def resample(self, out):

# ensure the down sample is real
for i, slab in zip(complex.slabs.i, complex.slabs):
mask = numpy.bitwise_and.reduce(
mask = functools.reduce(numpy.bitwise_and,
[(n - ii) % n == ii
for ii, n in zip(i, complex.Nmesh)])
slab.imag[mask] = 0

# remove the nyquist of the output
# FIXME: the nyquist is messy due to hermitian constraints
# let's do not touch them till we know they are important.
mask = numpy.bitwise_or.reduce(
mask = functools.reduce(numpy.bitwise_or,
[ ii == n // 2
for ii, n in zip(i, complex.Nmesh)])
slab[mask] = 0

# also remove the nyquist of the input
mask = numpy.bitwise_or.reduce(
mask = functools.reduce(numpy.bitwise_or,
[ ii == n // 2
for ii, n in zip(i, self.Nmesh)])
slab[mask] = 0
Expand Down Expand Up @@ -474,7 +474,7 @@ def preview(self, Nmesh=None, axes=None, resampler=None, method=None):
if all(Nmesh == self.pm.Nmesh): Nmesh = None

if Nmesh is not None:
pm = self.pm.resize(Nmesh)
pm = self.pm.reshape(Nmesh)
if method is None:
if any(Nmesh < self.pm.Nmesh): method = 'downsample'
else : method = 'upsample'
Expand Down Expand Up @@ -1005,8 +1005,26 @@ class ParticleMesh(object):

"""

def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_method='estimate', resampler='cic'):
""" create a PM object. """
def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8',
plan_method='estimate', resampler='cic', transposed=True):
""" create a PM object.

Parameters
----------
transposed : bool
if True, use the transposed data decomposition in fourier space;
transposed data decomposition results faster transforms,
but makes the whitenoise generation in 3d very slow.
plan_method : string
method for planning, `estimate`, `exhaustive`, `measure`.
resampler : string or ResampleWindow
used to determine the default size of the domain decomposition
np : the process mesh
if None, automatically infer -- (n-1)d decomposition on (n)d mesh,
Nmesh : tuple or alike
size of the mesh. len(Nmesh) is the dimension of the system.

"""
if comm is None:
comm = MPI.COMM_WORLD

Expand All @@ -1020,6 +1038,8 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
elif len(Nmesh) == 1:
np = []

self.np = np

if len(np) == len(Nmesh):
# only implemented for non-padded and destroy input
self._use_padded = False
Expand Down Expand Up @@ -1065,7 +1085,7 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth

_cache_args = (tuple(self.Nmesh), tuple(self.BoxSize),
MPI._addressof(comm), comm.rank, comm.size,
tuple(np), self.dtype, plan_method)
tuple(np), self.dtype, plan_method, paddedflag, transposed)

template = _pm_cache.get(_cache_args, None)

Expand All @@ -1084,10 +1104,20 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
else:
self.procmesh = pfft.ProcMesh(np, comm=comm)

if transposed:
partition_flags = pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag
forward_flags = pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag
backward_flags = pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag
else:
partition_flags = paddedflag
forward_flags = paddedflag
backward_flags = paddedflag

self.transposed = transposed
self.partition = pfft.Partition(forward,
self.Nmesh,
self.procmesh,
pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag)
partition_flags)

bufferin = pfft.LocalBuffer(self.partition)
bufferout = pfft.LocalBuffer(self.partition)
Expand All @@ -1106,17 +1136,17 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
else:
self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
bufferin, bufferout, forward,
plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag)
plan_method | forward_flags)
self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
bufferout, bufferin, backward,
plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag)
plan_method | backward_flags)

self.ipforward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
bufferin, bufferin, forward,
plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | paddedflag)
plan_method | forward_flags)
self.ipbackward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
bufferout, bufferout, backward,
plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | paddedflag)
plan_method | backward_flags)

self.domain = domain.GridND(self.partition.i_edges, comm=self.comm)

Expand Down Expand Up @@ -1177,27 +1207,39 @@ def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_meth
_pm_cache[_cache_args] = self

def resize(self, Nmesh):
warnings.warn("ParticleMesh.resize method is deprecated. Use reshape method", DeprecationWarning)
return self.reshape(Nmesh=Nmesh)

def reshape(self, Nmesh=None, transposed=None):
"""
Create a resized ParticleMesh object, changing the resolution Nmesh.
Create a reshaped ParticleMesh object, changing the resolution Nmesh, or even
dimension.

Parameters
----------
Nmesh : int or array_like or None
The new resolution
transposed : boolean
The new transposed format.

Returns
-------
A ParticleMesh of the given resolution. If Nmesh is None
or the same as ``self.Nmesh``, a reference of ``self`` is returned.
A ParticleMesh of the given resolution and transpose property
"""
if Nmesh is None: Nmesh = self.Nmesh
Nmesh_ = self.Nmesh.copy()
Nmesh_[...] = Nmesh
if all(self.Nmesh == Nmesh_): return self

if transposed is None:
transposed = self.transposed

return ParticleMesh(BoxSize=self.BoxSize,
Nmesh=Nmesh_,
dtype=self.dtype, comm=self.comm)
dtype=self.dtype,
comm=self.comm,
resampler=self.resampler,
np=self.np,
transposed=self.transposed)

def create(self, mode, base=None, value=None, zeros=False):
"""
Expand Down Expand Up @@ -1271,7 +1313,7 @@ def generate_whitenoise(self, seed, unitary=False, mean=0, mode='complex', base=

# add mean
def filter(k, v):
mask = numpy.bitwise_and.reduce([ki == 0 for ki in k])
mask = functools.reduce(numpy.bitwise_and, [ki == 0 for ki in k])
v[mask] = mean
return v

Expand Down
22 changes: 22 additions & 0 deletions pmesh/tests/test_pm.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,28 @@ def test_fft(comm):
real2 = complex.c2r()
assert_almost_equal(numpy.asarray(real), numpy.asarray(real2), decimal=7)

@MPITest(commsize=(1,4))
def test_whitenoise_untransposed(comm):
pm_ut = ParticleMesh(BoxSize=8.0, Nmesh=[4, 4], comm=comm, dtype='f4', transposed=False)
pm_t = ParticleMesh(BoxSize=8.0, Nmesh=[4, 4], comm=comm, dtype='f4', transposed=True)

f1 = pm_ut.generate_whitenoise(seed=3333, mode='complex')
f2 = pm_t.generate_whitenoise(seed=3333, mode='complex')

f1r = numpy.concatenate(comm.allgather(numpy.array(f1.ravel())))
f2r = numpy.concatenate(comm.allgather(numpy.array(f2.ravel())))

assert_array_equal(f1r, f2r)

# this should have asserted r2c transforms as well.
r1 = pm_ut.generate_whitenoise(seed=3333, mode='real')
r2 = pm_t.generate_whitenoise(seed=3333, mode='real')

r1r = numpy.concatenate(comm.allgather(numpy.array(r1.ravel())))
r2r = numpy.concatenate(comm.allgather(numpy.array(r2.ravel())))

assert_array_equal(r1r, r2r)

@MPITest(commsize=(1,4))
def test_inplace_fft(comm):
pm = ParticleMesh(BoxSize=8.0, Nmesh=[8, 8], comm=comm, dtype='f8')
Expand Down