diff --git a/inc/owOpenCLConstant.h b/inc/owOpenCLConstant.h index 3cf2bb89..ba0bf4f6 100644 --- a/inc/owOpenCLConstant.h +++ b/inc/owOpenCLConstant.h @@ -50,7 +50,7 @@ #define INTEL_OPENCL_DEBUG 0 -const int local_NDRange_size = 256; +const int local_NDRange_size = MAX_NEIGHBOR_COUNT; enum DEVICE { CPU = 0, GPU = 1, ALL = 2}; enum INTEGRATOR { EULER = 0, LEAPFROG = 1 }; diff --git a/src/owOpenCLSolver.cpp b/src/owOpenCLSolver.cpp index 4a468ce9..b599f284 100644 --- a/src/owOpenCLSolver.cpp +++ b/src/owOpenCLSolver.cpp @@ -37,6 +37,7 @@ #include #include "owOpenCLSolver.h" +#include "owOpenCLConstant.h" int comparator(const void *v1, const void *v2); @@ -731,11 +732,11 @@ owOpenCLSolver::_run_pcisph_computeDensity(owConfigProperty *config) { pcisph_computeDensity.setArg(5, config->getParticleCount()); int err = queue.enqueueNDRangeKernel( pcisph_computeDensity, cl::NullRange, - cl::NDRange((int)(config->getParticleCount_RoundUp())), + cl::NDRange((int)(config->getParticleCount_RoundUp()*local_NDRange_size/2)), #if defined(__APPLE__) cl::NullRange, nullptr, nullptr); #else - cl::NDRange((int)(local_NDRange_size)), nullptr, nullptr); + cl::NDRange((int)(local_NDRange_size/2)), nullptr, nullptr); #endif #if QUEUE_EACH_KERNEL queue.finish(); diff --git a/src/sphFluid.cl b/src/sphFluid.cl index a7b4f942..75d72db4 100644 --- a/src/sphFluid.cl +++ b/src/sphFluid.cl @@ -478,7 +478,7 @@ __kernel void pcisph_computeDensity( __global uint * particleIndexBack, uint PARTICLE_COUNT ) { - int id = get_global_id( 0 ); + int id = get_group_id( 0 ); if( id >= PARTICLE_COUNT ) return; id = particleIndexBack[id]; //track selected particle (indices are not shuffled anymore) int idx = id * MAX_NEIGHBOR_COUNT; @@ -488,25 +488,41 @@ __kernel void pcisph_computeDensity( float hScaled6 = hScaled2*hScaled2*hScaled2; int real_nc = 0; - do // gather density contribution from all neighbors (if they exist) - { - if( NEIGHBOR_MAP_ID( neighborMap[ idx + nc ] ) != NO_PARTICLE_ID ) - { + int lid = get_local_id(0); + __local float l_density[MAX_NEIGHBOR_COUNT/2]; + + l_density[lid] = 0.0; + barrier(CLK_LOCAL_MEM_FENCE); + + nc = lid; + int offset = MAX_NEIGHBOR_COUNT/2; + + if( NEIGHBOR_MAP_ID( neighborMap[ idx + nc ] ) != NO_PARTICLE_ID ) { r_ij2= NEIGHBOR_MAP_DISTANCE( neighborMap[ idx + nc ] ); // distance is already scaled here r_ij2 *= r_ij2; - if(r_ij2hScaled2) printf("=Error: r_ij/h = %f\n", NEIGHBOR_MAP_DISTANCE( neighborMap[ idx + nc ] ) / hScaled); - real_nc++; + if(r_ij2