Skip to content

Commit

Permalink
Add option to do stable redistribute with GPUs (AMReX-Codes#4200)
Browse files Browse the repository at this point in the history
Currently the order of particles in memory will not be consistent if you
run the same simulation twice on the GPU. This usually doesn't matter,
but for cases where it does, this adds a slower, stable option.

The proposed changes:
- [ ] fix a bug or incorrect behavior in AMReX
- [ ] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
  • Loading branch information
atmyers authored Oct 31, 2024
1 parent 47108f0 commit b9d549b
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 20 deletions.
12 changes: 7 additions & 5 deletions Src/Particle/AMReX_ParticleBufferMap.H
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ struct GetBucket
const int* m_lev_gid_to_bucket;
const int* m_lev_offsets;

GetBucket (const Gpu::DeviceVector<int>& lev_gid_to_bucket,
const Gpu::DeviceVector<int>& lev_offsets)
: m_lev_gid_to_bucket(lev_gid_to_bucket.dataPtr()),
m_lev_offsets(lev_offsets.dataPtr())
GetBucket (const int* lev_gid_to_bucket_ptr,
const int* lev_offsets_ptr)
: m_lev_gid_to_bucket(lev_gid_to_bucket_ptr),
m_lev_offsets(lev_offsets_ptr)
{}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
Expand Down Expand Up @@ -154,7 +154,9 @@ public:
}

[[nodiscard]] GetPID getPIDFunctor () const noexcept { return GetPID(d_bucket_to_pid, d_lev_gid_to_bucket, d_lev_offsets);}
[[nodiscard]] GetBucket getBucketFunctor () const noexcept { return GetBucket(d_lev_gid_to_bucket, d_lev_offsets);}
[[nodiscard]] GetBucket getBucketFunctor () const noexcept { return GetBucket(d_lev_gid_to_bucket.data(), d_lev_offsets.data());}
[[nodiscard]] GetBucket getHostBucketFunctor () const noexcept { return GetBucket(m_lev_gid_to_bucket.data(), m_lev_offsets.data());}

};

} // namespace amrex
Expand Down
60 changes: 45 additions & 15 deletions Src/Particle/AMReX_ParticleCommunication.H
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,12 @@ struct ParticleCopyPlan
m_box_counts_d.resize(num_buckets+1, 0);
m_box_offsets.resize(num_buckets+1);
auto* p_dst_box_counts = m_box_counts_d.dataPtr();
auto getBucket = pc.BufferMap().getBucketFunctor();
auto getBucket = pc.stableRedistribute() ? pc.BufferMap().getHostBucketFunctor() : pc.BufferMap().getBucketFunctor();

Gpu::HostVector<unsigned int> h_box_counts;
if (pc.stableRedistribute() ) {
h_box_counts.resize(m_box_counts_d.size(), 0);
}

m_dst_indices.resize(num_levels);
for (int lev = 0; lev < num_levels; ++lev)
Expand All @@ -166,24 +171,49 @@ struct ParticleCopyPlan
if (num_copies == 0) { continue; }
m_dst_indices[lev][gid].resize(num_copies);

const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
const auto* p_levs = op.m_levels[lev].at(gid).dataPtr();
auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr();

AMREX_FOR_1D ( num_copies, i,
{
int dst_box = p_boxes[i];
if (dst_box >= 0)
{
int dst_lev = p_levs[i];
int index = static_cast<int>(Gpu::Atomic::Add(
&p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
p_dst_indices[i] = index;
if (pc.stableRedistribute()) {
const Gpu::DeviceVector<int>& d_boxes = op.m_boxes[lev].at(gid);
Gpu::HostVector<int> h_boxes(d_boxes.size());
Gpu::copy(Gpu::deviceToHost,d_boxes.begin(),d_boxes.end(),h_boxes.begin());

const Gpu::DeviceVector<int>& d_levs = op.m_levels[lev].at(gid);
Gpu::HostVector<int> h_levs(d_levs.size());
Gpu::copy(Gpu::deviceToHost,d_levs.begin(),d_levs.end(),h_levs.begin());

Gpu::HostVector<int> h_dst_indices(num_copies);
for (int i = 0; i < num_copies; ++i) {
int dst_box = h_boxes[i];
if (dst_box >= 0) {
int dst_lev = h_levs[i];
int index = static_cast<int>(h_box_counts[getBucket(dst_lev, dst_box)]++);
h_dst_indices[i] = index;
}
}
});
Gpu::copy(Gpu::hostToDevice,h_dst_indices.begin(),h_dst_indices.end(),m_dst_indices[lev][gid].begin());
}
else {
const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
const auto* p_levs = op.m_levels[lev].at(gid).dataPtr();
auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr();
AMREX_FOR_1D ( num_copies, i,
{
int dst_box = p_boxes[i];
if (dst_box >= 0)
{
int dst_lev = p_levs[i];
int index = static_cast<int>(Gpu::Atomic::Add(
&p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
p_dst_indices[i] = index;
}
});
}
}
}

if (pc.stableRedistribute()) {
Gpu::copy(Gpu::hostToDevice,h_box_counts.begin(),h_box_counts.end(),m_box_counts_d.begin());
}

amrex::Gpu::exclusive_scan(m_box_counts_d.begin(), m_box_counts_d.end(),
m_box_offsets.begin());

Expand Down
5 changes: 5 additions & 0 deletions Src/Particle/AMReX_ParticleContainerBase.H
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ public:

void SetVerbose (int verbose) { m_verbose = verbose; }

[[nodiscard]] int stableRedistribute () const {return m_stable_redistribute; }

void setStableRedistribute (int stable) { m_stable_redistribute = stable; }

const ParticleBufferMap& BufferMap () const {return m_buffer_map;}

Vector<int> NeighborProcs(int ngrow) const
Expand Down Expand Up @@ -260,6 +264,7 @@ protected:
void defineBufferMap () const;

int m_verbose{0};
int m_stable_redistribute = 0;
std::unique_ptr<ParGDB> m_gdb_object = std::make_unique<ParGDB>();
ParGDBBase* m_gdb{nullptr};
Vector<std::unique_ptr<MultiFab> > m_dummy_mf;
Expand Down
3 changes: 3 additions & 0 deletions Tests/Particles/Redistribute/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ struct TestParams
int do_regrid;
int sort;
int test_level_lost = 0;
int stable_redistribute = 0;
};

void testRedistribute();
Expand Down Expand Up @@ -358,6 +359,7 @@ void get_test_params(TestParams& params, const std::string& prefix)
pp.query("num_runtime_real", num_runtime_real);
pp.query("num_runtime_int", num_runtime_int);
pp.query("remove_negative", remove_negative);
pp.query("stable_redistribute", params.stable_redistribute);

params.sort = 0;
pp.query("sort", params.sort);
Expand Down Expand Up @@ -410,6 +412,7 @@ void testRedistribute ()
}

TestParticleContainer pc(geom, dm, ba, rr);
pc.setStableRedistribute(params.stable_redistribute);

IntVect nppc(params.num_ppc);

Expand Down

0 comments on commit b9d549b

Please sign in to comment.