diff --git a/Src/Particle/AMReX_ParticleBufferMap.H b/Src/Particle/AMReX_ParticleBufferMap.H index 476d3d53439..33cec4b8236 100644 --- a/Src/Particle/AMReX_ParticleBufferMap.H +++ b/Src/Particle/AMReX_ParticleBufferMap.H @@ -36,10 +36,10 @@ struct GetBucket const int* m_lev_gid_to_bucket; const int* m_lev_offsets; - GetBucket (const Gpu::DeviceVector& lev_gid_to_bucket, - const Gpu::DeviceVector& lev_offsets) - : m_lev_gid_to_bucket(lev_gid_to_bucket.dataPtr()), - m_lev_offsets(lev_offsets.dataPtr()) + GetBucket (const int* lev_gid_to_bucket_ptr, + const int* lev_offsets_ptr) + : m_lev_gid_to_bucket(lev_gid_to_bucket_ptr), + m_lev_offsets(lev_offsets_ptr) {} AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -154,7 +154,9 @@ public: } [[nodiscard]] GetPID getPIDFunctor () const noexcept { return GetPID(d_bucket_to_pid, d_lev_gid_to_bucket, d_lev_offsets);} - [[nodiscard]] GetBucket getBucketFunctor () const noexcept { return GetBucket(d_lev_gid_to_bucket, d_lev_offsets);} + [[nodiscard]] GetBucket getBucketFunctor () const noexcept { return GetBucket(d_lev_gid_to_bucket.data(), d_lev_offsets.data());} + [[nodiscard]] GetBucket getHostBucketFunctor () const noexcept { return GetBucket(m_lev_gid_to_bucket.data(), m_lev_offsets.data());} + }; } // namespace amrex diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H index 91933c75e5b..00bf423478c 100644 --- a/Src/Particle/AMReX_ParticleCommunication.H +++ b/Src/Particle/AMReX_ParticleCommunication.H @@ -154,7 +154,12 @@ struct ParticleCopyPlan m_box_counts_d.resize(num_buckets+1, 0); m_box_offsets.resize(num_buckets+1); auto* p_dst_box_counts = m_box_counts_d.dataPtr(); - auto getBucket = pc.BufferMap().getBucketFunctor(); + auto getBucket = pc.stableRedistribute() ? pc.BufferMap().getHostBucketFunctor() : pc.BufferMap().getBucketFunctor(); + + Gpu::HostVector h_box_counts; + if (pc.stableRedistribute() ) { + h_box_counts.resize(m_box_counts_d.size(), 0); + } m_dst_indices.resize(num_levels); for (int lev = 0; lev < num_levels; ++lev) @@ -166,24 +171,49 @@ struct ParticleCopyPlan if (num_copies == 0) { continue; } m_dst_indices[lev][gid].resize(num_copies); - const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr(); - const auto* p_levs = op.m_levels[lev].at(gid).dataPtr(); - auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr(); - - AMREX_FOR_1D ( num_copies, i, - { - int dst_box = p_boxes[i]; - if (dst_box >= 0) - { - int dst_lev = p_levs[i]; - int index = static_cast(Gpu::Atomic::Add( - &p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U)); - p_dst_indices[i] = index; + if (pc.stableRedistribute()) { + const Gpu::DeviceVector& d_boxes = op.m_boxes[lev].at(gid); + Gpu::HostVector h_boxes(d_boxes.size()); + Gpu::copy(Gpu::deviceToHost,d_boxes.begin(),d_boxes.end(),h_boxes.begin()); + + const Gpu::DeviceVector& d_levs = op.m_levels[lev].at(gid); + Gpu::HostVector h_levs(d_levs.size()); + Gpu::copy(Gpu::deviceToHost,d_levs.begin(),d_levs.end(),h_levs.begin()); + + Gpu::HostVector h_dst_indices(num_copies); + for (int i = 0; i < num_copies; ++i) { + int dst_box = h_boxes[i]; + if (dst_box >= 0) { + int dst_lev = h_levs[i]; + int index = static_cast(h_box_counts[getBucket(dst_lev, dst_box)]++); + h_dst_indices[i] = index; + } } - }); + Gpu::copy(Gpu::hostToDevice,h_dst_indices.begin(),h_dst_indices.end(),m_dst_indices[lev][gid].begin()); + } + else { + const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr(); + const auto* p_levs = op.m_levels[lev].at(gid).dataPtr(); + auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr(); + AMREX_FOR_1D ( num_copies, i, + { + int dst_box = p_boxes[i]; + if (dst_box >= 0) + { + int dst_lev = p_levs[i]; + int index = static_cast(Gpu::Atomic::Add( + &p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U)); + p_dst_indices[i] = index; + } + }); + } } } + if (pc.stableRedistribute()) { + Gpu::copy(Gpu::hostToDevice,h_box_counts.begin(),h_box_counts.end(),m_box_counts_d.begin()); + } + amrex::Gpu::exclusive_scan(m_box_counts_d.begin(), m_box_counts_d.end(), m_box_offsets.begin()); diff --git a/Src/Particle/AMReX_ParticleContainerBase.H b/Src/Particle/AMReX_ParticleContainerBase.H index 64adf750f0f..433890b1579 100644 --- a/Src/Particle/AMReX_ParticleContainerBase.H +++ b/Src/Particle/AMReX_ParticleContainerBase.H @@ -231,6 +231,10 @@ public: void SetVerbose (int verbose) { m_verbose = verbose; } + [[nodiscard]] int stableRedistribute () const {return m_stable_redistribute; } + + void setStableRedistribute (int stable) { m_stable_redistribute = stable; } + const ParticleBufferMap& BufferMap () const {return m_buffer_map;} Vector NeighborProcs(int ngrow) const @@ -260,6 +264,7 @@ protected: void defineBufferMap () const; int m_verbose{0}; + int m_stable_redistribute = 0; std::unique_ptr m_gdb_object = std::make_unique(); ParGDBBase* m_gdb{nullptr}; Vector > m_dummy_mf; diff --git a/Tests/Particles/Redistribute/main.cpp b/Tests/Particles/Redistribute/main.cpp index b08c7515909..a9314cce4f1 100644 --- a/Tests/Particles/Redistribute/main.cpp +++ b/Tests/Particles/Redistribute/main.cpp @@ -328,6 +328,7 @@ struct TestParams int do_regrid; int sort; int test_level_lost = 0; + int stable_redistribute = 0; }; void testRedistribute(); @@ -358,6 +359,7 @@ void get_test_params(TestParams& params, const std::string& prefix) pp.query("num_runtime_real", num_runtime_real); pp.query("num_runtime_int", num_runtime_int); pp.query("remove_negative", remove_negative); + pp.query("stable_redistribute", params.stable_redistribute); params.sort = 0; pp.query("sort", params.sort); @@ -410,6 +412,7 @@ void testRedistribute () } TestParticleContainer pc(geom, dm, ba, rr); + pc.setStableRedistribute(params.stable_redistribute); IntVect nppc(params.num_ppc);