Initial import

This commit is contained in:
Guilhem Lavaux 2023-05-29 10:41:03 +02:00
commit 56a50eead3
820 changed files with 192077 additions and 0 deletions

View file

@ -0,0 +1,333 @@
/*+
ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/hermiticity_fixup.cpp
Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
Copyright (C) 2009-2019 Jens Jasche <jens.jasche@fysik.su.se>
Additional contributions from:
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+*/
#include <set>
#include <array>
#include <algorithm>
#include "libLSS/tools/console.hpp"
#include "libLSS/tools/errors.hpp"
#include "libLSS/tools/mpi_fftw_helper.hpp"
#include "libLSS/samplers/core/types_samplers.hpp"
#include "libLSS/tools/mpi/ghost_planes.hpp"
#include "libLSS/tools/hermiticity_fixup.hpp"
static constexpr bool ULTRA_VERBOSE = true;
using namespace LibLSS;
template <typename T, size_t Nd>
Hermiticity_fixer<T, Nd>::Hermiticity_fixer(Mgr_p mgr_)
: comm(mgr_->getComm()), mgr(mgr_) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
std::set<ssize_t> wanted_planes, owned_planes;
std::array<ssize_t, Nd - 1> dims;
std::copy(mgr->N.begin() + 1, mgr->N.end(), dims.begin());
dims[Nd - 2] = dims[Nd - 2] / 2 + 1;
{
size_t i_min = mgr->startN0;
size_t i_max = mgr->startN0 + mgr->localN0;
ctx.format("own: i_min=%d, i_max=%d", i_min, i_max);
for (size_t i = i_min; i < i_max; i++) {
owned_planes.insert(i);
}
}
{
size_t i_min = std::max(size_t(mgr->startN0), mgr->N[0] / 2 + 1);
size_t i_max = mgr->startN0 + mgr->localN0;
ctx.format("want: i_min=%d, i_max=%d", i_min, i_max);
for (size_t i = i_min; i < i_max; i++) {
size_t conj_plane = mgr->N[0] - i;
if (!mgr->on_core(conj_plane)) {
wanted_planes.insert(conj_plane);
}
}
}
ghosts.setup(comm, wanted_planes, owned_planes, dims, mgr->N[0]);
}
template <size_t Nd>
static ssize_t encode_index(
std::array<ssize_t, Nd> const &index, std::array<size_t, Nd> const &N) {
ssize_t ret = 0;
for (size_t i = 0; i < Nd; i++)
ret = ret * N[i] + index[i];
return ret;
}
template <size_t Nd>
static void decode_index(
ssize_t coded_index, std::array<ssize_t, Nd> &decoded,
std::array<size_t, Nd> const &N) {
for (size_t i = Nd; i > 0; i--) {
size_t j = i - 1;
ssize_t tmp = coded_index / N[j];
ssize_t tmp2 = coded_index - tmp * N[j];
decoded[j] = tmp2;
coded_index = tmp;
}
}
template <size_t Nd>
static void find_conjugate(
std::array<ssize_t, Nd> &reversed_index,
std::array<ssize_t, Nd> const &index, std::array<size_t, Nd> const &N) {
for (size_t i = 0; i < Nd; i++) {
if (index[i] == 0)
reversed_index[i] = 0;
else
reversed_index[i] = N[i] - index[i];
}
}
template <size_t Nd>
static bool
has_nyquist(std::array<ssize_t, Nd> &index, std::array<size_t, Nd> const &N) {
for (size_t i = 0; i < Nd; i++) {
if (index[i] == N[i] / 2 || index[i] == 0)
return true;
}
return false;
}
// ---------------------------------------------------------------------------
// Forward hermiticity fixer
template <
size_t rank, typename Mgr, typename Ghosts, typename CArray,
size_t Dim = CArray::dimensionality>
static typename std::enable_if<Dim == 1, void>::type
fix_plane(Mgr &mgr, Ghosts &&ghosts, CArray &&a, size_t *N) {
std::array<size_t, 1> current_N = {N[0]};
size_t Ntot = N[0];
size_t N0_HC = N[0] / 2;
#pragma omp parallel for
for (size_t i = 1; i < N0_HC; i++) {
size_t current, conj_current;
current = i;
conj_current = current_N[0] - i;
a[conj_current] = std::conj(a[current]);
}
for (size_t i : {size_t(0), N0_HC}) {
a[i].imag(0);
}
}
template <bool full, size_t Nd, typename AccessDirect, typename AccessConj>
static void direct_fix(
std::array<size_t, Nd> const &current_N, AccessDirect &&direct_access,
AccessConj &&conj_access) {
size_t Ntot =
full ? array::product(current_N) / 2 : array::product(current_N);
#pragma omp parallel for
for (size_t i = 0; i < Ntot; i++) {
std::array<ssize_t, Nd> current, conj_current;
decode_index(i, current, current_N);
//if (!has_nyquist(current, current_N))
{
find_conjugate(conj_current, current, current_N);
direct_access(current) = std::conj(conj_access(conj_current));
}
}
}
template <
size_t rank, typename Mgr, typename Ghosts, typename CArray,
size_t Dim = CArray::dimensionality>
static typename std::enable_if<Dim != 1, void>::type
fix_plane(Mgr &mgr, Ghosts &&ghosts, CArray &&a, size_t *N) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
std::array<size_t, Dim> current_N;
size_t Ntot = array::product(current_N);
size_t N0_HC = N[0] / 2;
std::copy(N, N + Dim, current_N.begin());
if (rank != 0) {
auto accessor = [&a](auto &&x) -> auto & { return a(x); };
direct_fix<true>(current_N, accessor, accessor);
} else if (mgr.startN0 + mgr.localN0 > N0_HC) {
size_t i_min = std::max(N0_HC, size_t(mgr.startN0));
size_t i_max = mgr.startN0 + mgr.localN0;
std::array<size_t, Dim - 1> sub_N;
std::copy(current_N.begin() + 1, current_N.end(), sub_N.begin());
ctx.format("i_min = %d, i_max = %d", i_min, i_max);
for (size_t i0 = i_min; i0 < i_max; i0++) {
size_t i0_conj = N[0] - i0;
auto this_plane = a[i0];
auto direct_access = [&this_plane](auto &&x) -> auto & {
return this_plane(x);
};
if (mgr.on_core(i0_conj)) {
auto conj_plane = a[i0_conj];
auto conj_direct_access = [&conj_plane](auto &&x) -> auto & {
return conj_plane(x);
};
direct_fix<false>(sub_N, direct_access, conj_direct_access);
} else {
ctx.format(" Fix plane %d using i0_conj=%d from remote", i0, i0_conj);
auto conj_plane = ghosts(i0_conj);
direct_fix<false>(
sub_N, direct_access, [&conj_plane](auto &&x) -> auto & {
return conj_plane(x);
});
}
}
}
if (rank != 0 || mgr.on_core(0))
fix_plane<rank + 1>(mgr, ghosts, a[0], N + 1);
if (rank != 0 || mgr.on_core(N0_HC))
fix_plane<rank + 1>(mgr, ghosts, a[N0_HC], N + 1);
}
template <typename T, size_t Nd>
void Hermiticity_fixer<T, Nd>::forward(CArrayRef &a) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
// Grab the planes that is required to build the Nyquist correction
ghosts.synchronize(a);
std::array<size_t, Nd> N = mgr->N;
size_t N_HC = mgr->N_HC;
auto idx = array::make_star_indices<Nd - 1>(boost::indices);
auto idx_g = array::make_star_indices<Nd - 2>(boost::indices);
fix_plane<0>(
*mgr,
[this, idx_g, N_HC](ssize_t plane) {
return array::slice_array(ghosts.getPlane(plane), idx_g[0]);
},
array::slice_array(a, idx[0]), N.data());
fix_plane<0>(
*mgr,
[this, idx_g, N_HC](ssize_t plane) {
return array::slice_array(ghosts.getPlane(plane), idx_g[N_HC - 1]);
},
array::slice_array(a, idx[N_HC - 1]), N.data());
ghosts.release();
}
// ---------------------------------------------------------------------------
// Adjoint gradient of hermiticity fixer
template <
size_t rank, typename Mgr, typename CArray,
size_t Dim = CArray::dimensionality>
static typename std::enable_if<Dim == 1, void>::type
adjoint_fix_plane(Mgr &mgr, CArray &&a, size_t *N) {
std::array<size_t, 1> current_N = {N[0]};
size_t Ntot = N[0];
size_t N0_HC = N[0] / 2;
#pragma omp parallel for
for (size_t i = 1; i < N0_HC; i++) {
size_t current, conj_current;
current = i;
conj_current = current_N[0] - i;
a[conj_current] = 0;
}
for (size_t i : {size_t(0), N0_HC}) {
auto &a0 = a[i];
a0.real(a0.real() * 0.5);
a0.imag(0);
}
}
template <bool full, size_t Nd, typename AccessDirect>
static void adjoint_direct_fix(
std::array<size_t, Nd> const &current_N, AccessDirect &&direct_access) {
size_t const Ntot =
full ? array::product(current_N) / 2 : array::product(current_N);
#pragma omp parallel for
for (size_t i = 0; i < Ntot; i++) {
std::array<ssize_t, Nd> current, conj_current;
decode_index(i, current, current_N);
//if (!has_nyquist(current, current_N))
{
find_conjugate(conj_current, current, current_N);
direct_access(conj_current) = 0;
}
}
}
template <
size_t rank, typename Mgr, typename CArray,
size_t Dim = CArray::dimensionality>
static typename std::enable_if<Dim != 1, void>::type
adjoint_fix_plane(Mgr &mgr, CArray &&a, size_t *N) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
std::array<size_t, Dim> current_N;
size_t Ntot = array::product(current_N);
size_t N0_HC = N[0] / 2;
std::copy(N, N + Dim, current_N.begin());
if (rank != 0) {
auto accessor = [&a](auto &&x) -> auto & { return a(x); };
adjoint_direct_fix<true>(current_N, accessor);
} else if (mgr.startN0 + mgr.localN0 > N0_HC) {
size_t i_min = std::max(N0_HC, size_t(mgr.startN0));
size_t i_max = mgr.startN0 + mgr.localN0;
std::array<size_t, Dim - 1> sub_N;
std::copy(current_N.begin() + 1, current_N.end(), sub_N.begin());
for (size_t i0 = i_min; i0 < i_max; i0++) {
auto this_plane = a[i0];
auto direct_access = [&this_plane](auto &&x) -> auto & {
return this_plane(x);
};
adjoint_direct_fix<false>(sub_N, direct_access);
}
}
if (rank != 0 || mgr.on_core(0))
adjoint_fix_plane<rank + 1>(mgr, a[0], N + 1);
if (rank != 0 || mgr.on_core(N0_HC))
adjoint_fix_plane<rank + 1>(mgr, a[N0_HC], N + 1);
}
template <typename T, size_t Nd>
void Hermiticity_fixer<T, Nd>::adjoint(CArrayRef &a) {
// Grab the planes that is required to build the Nyquist correction
std::array<size_t, Nd> N = mgr->N;
size_t N_HC = mgr->N_HC;
fwrap(a) = fwrap(a) * 2.0;
// if (mgr->on_core(0))
// a[0][0][0] *= 0.5;
auto idx = array::make_star_indices<Nd - 1>(boost::indices);
adjoint_fix_plane<0>(*mgr, array::slice_array(a, idx[0]), N.data());
adjoint_fix_plane<0>(*mgr, array::slice_array(a, idx[N_HC - 1]), N.data());
}
//template struct LibLSS::Hermiticity_fixer<double, 1>;
//template struct LibLSS::Hermiticity_fixer<double, 2>;
template struct LibLSS::Hermiticity_fixer<double, 3>;
// ARES TAG: authors_num = 2
// ARES TAG: name(0) = Guilhem Lavaux
// ARES TAG: email(0) = guilhem.lavaux@iap.fr
// ARES TAG: year(0) = 2014-2020
// ARES TAG: name(1) = Jens Jasche
// ARES TAG: email(1) = jens.jasche@fysik.su.se
// ARES TAG: year(1) = 2009-2019

View file

@ -0,0 +1,47 @@
/*+
ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/hermiticity_fixup.hpp
Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
Copyright (C) 2009-2019 Jens Jasche <jens.jasche@fysik.su.se>
Additional contributions from:
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+*/
#ifndef __LIBLSS_TOOLS_HERMITICITY_FIXUP_HPP
# define __LIBLSS_TOOLS_HERMITICITY_FIXUP_HPP
# include <complex>
# include <boost/format.hpp>
# include "libLSS/tools/mpi_fftw_helper.hpp"
# include "libLSS/tools/mpi/ghost_planes.hpp"
namespace LibLSS {
template <typename T, size_t Nd>
struct Hermiticity_fixer {
typedef FFTW_Manager<T, Nd> Mgr;
typedef std::shared_ptr<Mgr> Mgr_p;
typedef typename Mgr::U_ArrayFourier::array_type CArrayRef;
MPI_Communication *comm;
Mgr_p mgr;
GhostPlanes<std::complex<T>, Nd - 1> ghosts;
Hermiticity_fixer(Mgr_p mgr);
void forward(CArrayRef &a);
void adjoint(CArrayRef &a);
};
} // namespace LibLSS
#endif
// ARES TAG: authors_num = 2
// ARES TAG: name(0) = Guilhem Lavaux
// ARES TAG: email(0) = guilhem.lavaux@iap.fr
// ARES TAG: year(0) = 2014-2020
// ARES TAG: name(1) = Jens Jasche
// ARES TAG: email(1) = jens.jasche@fysik.su.se
// ARES TAG: year(1) = 2009-2019

View file

@ -0,0 +1,231 @@
/*+
ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/mpi/ghost_array.hpp
Copyright (C) 2018-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
Additional contributions from:
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+*/
#pragma once
#ifndef __LIBLSS_TOOLS_MPI_GHOST_ARRAY_HPP
# define __LIBLSS_TOOLS_MPI_GHOST_ARRAY_HPP
# include <set>
# include <map>
# include <memory>
# include "libLSS/tools/string_tools.hpp"
# include "libLSS/tools/uninitialized_type.hpp"
# include "libLSS/mpi/generic_mpi.hpp"
# include "libLSS/tools/array_tools.hpp"
# include "libLSS/tools/string_tools.hpp"
# include "libLSS/samplers/core/types_samplers.hpp"
namespace LibLSS {
template <typename T>
struct GhostArrayTypes {
typedef LibLSS::U_Array<T, 1> U_ArrayType;
typedef typename U_ArrayType::array_type ArrayType;
typedef std::map<size_t, std::shared_ptr<U_ArrayType>> MapGhosts;
};
template <typename T>
class GhostArray : public GhostArrayTypes<T> {
protected:
static constexpr bool SUPER_VERBOSE = false;
typedef GhostArrayTypes<T> super;
typedef typename super::ArrayType ArrayType;
typedef typename super::U_ArrayType U_ArrayType;
typedef typename super::MapGhosts MapGhosts;
MPI_Communication *comm;
std::vector<boost::multi_array<T, 1>> exchangeIndexes;
public:
GhostArray() {}
/**
* @brief
*
* We assume that localIndexes are unique.
*
* @param comm_
* @param localIndexes
*/
template <typename IndexSet>
void setup(MPI_Communication *comm_, IndexSet &&localIndexes) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
typedef typename std::remove_reference_t<IndexSet>::value_type indexType;
static_assert(
std::is_same<indexType, T>::value, "Index list must be of type T");
int localKeys = localIndexes.size();
comm = comm_;
// Serialize and send to peers
auto commSize = boost::extents[comm->size()];
boost::multi_array<indexType, 1> linearIndexes(boost::extents[localKeys]);
boost::multi_array<int, 1> allIndexCounts(commSize);
boost::multi_array<int, 1> displIndexes(commSize);
ctx.print("Transfer indexes to linear array");
std::copy(
localIndexes.begin(), localIndexes.end(), linearIndexes.begin());
ctx.print("Sort");
std::sort(linearIndexes.begin(), linearIndexes.end());
comm->all_gather_t(&localKeys, 1, allIndexCounts.data(), 1);
ctx.print("Compute global displacements");
int totalIndexes = 0, previousDispl = 0;
for (int i = 0; i < comm->size(); i++) {
totalIndexes += allIndexCounts[i];
displIndexes[i] = previousDispl;
previousDispl += allIndexCounts[i];
}
boost::multi_array<indexType, 1> allIndexes(boost::extents[totalIndexes]);
// FIXME: Try to reduce memory/bandwidth consumption with better distributed algorithm
ctx.print("Gather all relevant indexes");
comm->all_gatherv_t(
linearIndexes.data(), localKeys, allIndexes.data(),
allIndexCounts.data(), displIndexes.data());
std::set<indexType> localSet;
if (SUPER_VERBOSE)
ctx.format("Local indexes: %s", LibLSS::to_string(localIndexes));
ctx.print("Transfer local indexes to set for better intersection");
std::copy(
localIndexes.begin(), localIndexes.end(),
std::inserter(localSet, localSet.begin()));
exchangeIndexes.resize(comm->size());
for (int i = 0; i < comm->size(); i++) {
// Compute intersections with remote nodes
std::set<indexType> otherIndexes, interIndexes;
if (i == comm->rank())
continue;
for (int j = 0; j < allIndexCounts[i]; j++) {
otherIndexes.insert(allIndexes[j + displIndexes[i]]);
}
if (SUPER_VERBOSE)
ctx.format(
"Other indexes (count=%d): %s", allIndexCounts[i],
LibLSS::to_string(otherIndexes));
ctx.format("Intersect with rank=%d", i);
std::set_intersection(
localSet.begin(), localSet.end(), otherIndexes.begin(),
otherIndexes.end(),
std::inserter(interIndexes, interIndexes.begin()));
ctx.format("%d indexes in common", interIndexes.size());
exchangeIndexes[i].resize(boost::extents[interIndexes.size()]);
std::copy(
interIndexes.begin(), interIndexes.end(),
exchangeIndexes[i].begin());
}
}
/**
* @brief
*
*
*
* @tparam U
* @tparam boost::multi_array_ref<U, 1>
* @param data
* @param indexToIndex how to map an index (from setup) to an index in the provided array
*/
template <typename U, typename ReductionOperation, typename IndexMapper>
void synchronize(
boost::multi_array_ref<U, 1> &data, IndexMapper &&mapper,
ReductionOperation op) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
typedef LibLSS::U_Array<U, 1> TmpU;
std::vector<std::shared_ptr<TmpU>> allTmpSend;
std::vector<std::shared_ptr<TmpU>> allTmpRecv;
std::vector<MPICC_Request> allReq;
allTmpRecv.resize(comm->size());
for (int i = 0; i < comm->size(); i++) {
int numExchange = exchangeIndexes[i].size();
if (numExchange == 0) {
continue;
}
ctx.format("Send %d data -> %d", numExchange, i);
{
auto thisTmp = std::make_shared<TmpU>(boost::extents[numExchange]);
auto &tmpData = thisTmp->get_array();
allTmpSend.push_back(thisTmp);
# pragma omp parallel for
for (int j = 0; j < numExchange; j++) {
tmpData[j] = data[mapper(exchangeIndexes[i][j])];
}
allReq.push_back(comm->IsendT(tmpData.data(), tmpData.size(), i, i));
}
ctx.format("Recv %d data <- %d", numExchange, i);
{
auto thisTmp = std::make_shared<TmpU>(boost::extents[numExchange]);
auto &tmpData = thisTmp->get_array();
allTmpRecv[i] = thisTmp;
allReq.push_back(
comm->IrecvT(tmpData.data(), tmpData.size(), i, comm->rank()));
}
}
ctx.print("Wait IO completion");
comm->WaitAll(allReq);
allTmpSend.clear();
{
ConsoleContext<LOG_DEBUG> ctx("GhostArray local reduction");
// Now all data are in place, we must do partial reductions
for (int i = 0; i < comm->size(); i++) {
if (i == comm->rank())
continue;
int numExchange = exchangeIndexes[i].size();
if (numExchange == 0)
continue;
auto &inData = allTmpRecv[i]->get_array();
# pragma omp parallel for
for (int j = 0; j < numExchange; j++) {
op(data[mapper(exchangeIndexes[i][j])], inData[j]);
}
}
}
}
template <typename U, typename IndexMapper>
void synchronize(
boost::multi_array_ref<U, 1> &data, IndexMapper &&indexToIndex) {
synchronize<U>(
data, indexToIndex, [](auto &x, auto const &y) { x += y; });
}
};
} // namespace LibLSS
#endif
// ARES TAG: num_authors = 1
// ARES TAG: name(0) = Guilhem Lavaux
// ARES TAG: year(0) = 2018-2020
// ARES TAG: email(0) = guilhem.lavaux@iap.fr

View file

@ -0,0 +1,606 @@
/*+
ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/mpi/ghost_planes.hpp
Copyright (C) 2018-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
Additional contributions from:
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+*/
#pragma once
#ifndef __LIBLSS_TOOLS_MPI_GHOST_PLANES_HPP
# define __LIBLSS_TOOLS_MPI_GHOST_PLANES_HPP
# include <map>
# include <memory>
# include "libLSS/tools/uninitialized_type.hpp"
# include "libLSS/mpi/generic_mpi.hpp"
# include "libLSS/tools/array_tools.hpp"
# include "libLSS/tools/string_tools.hpp"
# include "libLSS/samplers/core/types_samplers.hpp"
namespace LibLSS {
/**
* This class provides some types to abbreviate the long array specification
* for ghost planes.
*/
template <typename T, size_t Nd>
struct GhostPlaneTypes {
typedef boost::multi_array_ref<T, Nd> ArrayType;
typedef UninitializedArray<ArrayType> U_ArrayType;
typedef std::map<size_t, std::shared_ptr<U_ArrayType>> MapGhosts;
};
/**
* @file
* This enumeration allows to choose between different kind of "ghosts".
*/
enum GhostMethod {
GHOST_COPY, ///< in synchronize mode, the plane is copied. In AG mode, it is accumulated.
GHOST_ACCUMULATE ///< in synchronize mode, the plane is accumulated. In AG mode, it is copied.
};
/**
* This class handles the generic problems of ghost planes management with MPI.
* The concept of ghost planes (and ghost particles in another module), comes
* from the distinction of which MPI task owns the plane and which task needs
* the plane to do further computation. A ghost plane is not designed to be an
* "active" plane on the node that needs it. Though there is a slight variant
* that may allow such things at the cost of a final synchronization.
*
* The work flow of using ghostplanes is the following:
* GhostPlanes object creation
* call setup method to indicate what are the provided data and requirements
* do stuff
* call synchronize before needing the ghost planes
* use the ghost planes with getPlane()
* Repeat synchronize if needed
*
* There is an adjoint gradient variant of the synchronization step which
* does sum reduction of the adjoint gradient arrays corresponding to the
* ghost planes.
*
*/
template <typename T, size_t Nd>
struct GhostPlanes : GhostPlaneTypes<T, Nd> {
typedef GhostPlaneTypes<T, Nd> super;
typedef typename super::ArrayType ArrayType;
typedef typename super::U_ArrayType U_ArrayType;
typedef typename super::MapGhosts MapGhosts;
private:
static constexpr bool CHECK_DIMENSIONS = true;
static constexpr bool ULTRA_VERBOSE = false;
MPI_Communication *comm;
MapGhosts ghosts, ag_ghosts;
size_t maxPlaneId;
std::map<size_t, size_t> plane_peer;
std::array<size_t, Nd> setupDims;
typedef LibLSS::multi_array<int, 1> int_array;
typedef LibLSS::multi_array<int, 1> size_array;
typedef std::set<int> size_set;
LibLSS::multi_array<int, 1> other_requested_planes, other_requested_count,
other_requested_displ;
std::map<size_t, std::shared_ptr<MPI_Communication>> owned_plane_dispatch;
size_set req_plane_set;
template <typename PlaneSet>
inline size_array fill_with_planes(PlaneSet &&owned_planes) {
size_array plane_set(boost::extents[owned_planes.size()]);
size_t i = 0;
for (auto plane : owned_planes) {
plane_set[i] = plane;
i++;
}
return plane_set;
}
template <typename Array>
inline std::string array_to_str(Array const &s, char const *sep) {
std::ostringstream oss;
auto iter = s.begin();
if (iter == s.end())
return "";
oss << *iter;
++iter;
while (iter != s.end()) {
oss << sep << *iter;
++iter;
}
return oss.str();
}
template <typename PlaneSet>
inline void dispatch_plane_map(
PlaneSet &&owned_planes, int_array &other_planes,
int_array &other_planes_count, int_array &other_planes_displ) {
size_t cSize = comm->size();
auto e_cSize = boost::extents[cSize];
ConsoleContext<LOG_DEBUG> ctx("dispatch_plane_map");
int_array tmp_data(e_cSize), send_displ(e_cSize), send_count(e_cSize);
// Now find out which rank has the planes.
// Everybody send their planeset for that.
auto plane_set = fill_with_planes(owned_planes);
size_t Nplanes = plane_set.size();
array::fill(tmp_data, Nplanes);
array::fill(send_count, 1);
// Costly but we hopefully do it only once in a while.
// Get all the plane number count from everybody.
ctx.print("Dispatch our planeset, number is " + to_string(tmp_data));
comm->all2allT(tmp_data.data(), 1, other_planes_count.data(), 1);
for (size_t i = 1; i < comm->size(); i++) {
other_planes_displ[i] =
other_planes_displ[i - 1] + other_planes_count[i - 1];
}
size_t total_planes =
other_planes_displ[cSize - 1] + other_planes_count[cSize - 1];
ctx.print(boost::format("Total planes = %d") % total_planes);
other_planes.resize(boost::extents[total_planes]);
ctx.print(
boost::format("Now gather plane ids send_count=%s; send_displ=%s; "
"other_planes_count=%s; other_planes_displ=%s") %
array_to_str(tmp_data, ",") % array_to_str(send_displ, ",") %
array_to_str(other_planes_count, ",") %
array_to_str(other_planes_displ, ","));
// Get plane id from everybody
comm->all2allv_t(
plane_set.data(), tmp_data.data(), send_displ.data(),
other_planes.data(), other_planes_count.data(),
other_planes_displ.data());
ctx.print(
boost::format("Got other task planeset: %s") %
array_to_str(other_planes, ","));
}
typedef std::map<size_t, std::list<size_t>> MapPlaneToPeer;
inline MapPlaneToPeer gather_peer_by_plane(
int_array const &required_planes,
int_array const &required_planes_count,
int_array const &required_planes_displ) {
MapPlaneToPeer plane_to_peer;
int peer = 0;
size_t cSize = comm->size();
size_t cRank = comm->rank();
ConsoleContext<LOG_DEBUG> ctx("gather_peer_by_plane");
for (size_t i = 0; i < required_planes.num_elements(); i++) {
if (peer + 1 < cSize && i >= required_planes_displ[peer + 1]) {
peer++;
}
ctx.print(
boost::format("Peer %d provides %d") % peer % required_planes[i]);
if (peer != cRank) {
plane_to_peer[required_planes[i]].push_back(peer);
}
}
return plane_to_peer;
}
static inline void null_destroy(void *) {}
std::map<
GhostMethod,
std::function<MPICC_Request(MPI_Communication *, T const *, int)>>
ghost_methods;
std::map<
GhostMethod,
std::function<MPICC_Request(MPI_Communication *, T *, T *, int)>>
ghost_methods_ag;
static MPICC_Request
ghost_copy_method(MPI_Communication *c, T const *data, int num) {
return c->IbroadcastT((T *)data, num, 0);
}
static MPICC_Request
ghost_accumulate_method(MPI_Communication *c, T const *data, int num) {
return c->IallReduceT((T *)MPI_IN_PLACE, (T *)data, num, MPI_SUM);
}
static MPICC_Request ghost_accumulate_method_ag(
MPI_Communication *c, T *indata, T const *data, int num) {
return c->IgatherT((T *)indata, num, (T *)data, num, 0);
}
static MPICC_Request
ghost_copy_method_ag(MPI_Communication *c, T *indata, T *data, int num) {
return c->IreduceT(indata, data, num, MPI_SUM, 0);
}
public:
/**
* Constructor.
*/
GhostPlanes() {
ghost_methods[GHOST_COPY] = &ghost_copy_method;
ghost_methods[GHOST_ACCUMULATE] = &ghost_accumulate_method;
ghost_methods_ag[GHOST_COPY] = &ghost_copy_method_ag;
ghost_methods_ag[GHOST_ACCUMULATE] = &ghost_accumulate_method_ag;
std::fill(setupDims.begin(), setupDims.end(), 0);
}
/**
* Return the current dimensions of the planes.
*
* @return A container with the dimensions.
*/
auto const &dims() const { return setupDims; }
/**
* This function allows the user to change the dimensions of the planes.
*
* @params dims (N-1)-d dimension of each plane.
*/
template <typename DimList>
void updatePlaneDims(DimList &&dims) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
auto i1 = dims.begin();
auto i2 = setupDims.begin();
int d = 0;
for (d = 0; d < Nd; d++) {
if (*i1 != *i2)
break;
++i1;
++i2;
}
// Everything already correct. Exit now.
if (d == Nd) {
ctx.print("No change needed.");
return;
}
ctx.format("New shape is %dx%d", dims[0], dims[1]);
for (auto &g : ghosts) {
if (g.second)
g.second.reset(); //reshape(dims);
}
for (auto &g : ag_ghosts) {
if (g.second)
g.second.reset(); //reshape(dims);
}
std::copy(dims.begin(), dims.end(), setupDims.begin());
}
/**
* This function setups the ghost plane object for usage. It can be called
* several times, in that case the previous setup is forgotten and a new
* one is initiated.
*
* @param mpi MPI Communicator with the same topology as the planes
* @param planes a list of planes that are required from other nodes. The
* list must be a sort of container.
* @param owned_planes a list of the planes that are owned by the current
* node.
* @param dims dimensions of the planes (barring the first one, i.e. 2D if
* the entire set is 3D))
* @param maxPlaneId_ this is convenience to avoid a global communication
* to figure out what is the maximum id of the considered
* planes.
*/
template <typename PlaneList, typename PlaneSet, typename DimList>
void setup(
MPI_Communication *comm_, PlaneList &&planes, PlaneSet &&owned_planes,
DimList &&dims, size_t maxPlaneId_) {
LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
size_t cSize = comm_->size();
auto e_cSize = boost::extents[cSize];
int_array other_planes, other_planes_count(e_cSize),
other_planes_displ(e_cSize);
size_set owned_plane_set;
// required_planes, required_planes_count,
// required_planes_displ;
maxPlaneId = maxPlaneId_;
req_plane_set = size_set(planes.begin(), planes.end());
owned_plane_set = size_set(owned_planes.begin(), owned_planes.end());
ghosts.clear();
comm = comm_;
std::copy(dims.begin(), dims.end(), setupDims.begin());
// Create a map betwen requested planes and peers.
dispatch_plane_map(
owned_planes, other_planes, other_planes_count, other_planes_displ);
// Now we know about the requirements of other peer for own set of planes
auto plane_to_peer = gather_peer_by_plane(
// required_planes, required_planes_count, required_planes_displ
other_planes, other_planes_count, other_planes_displ);
ctx.print("Required planes: " + to_string(req_plane_set));
ctx.print("Owned planes: " + to_string(owned_plane_set));
for (size_t plane = 0; plane < maxPlaneId; plane++) {
std::shared_ptr<MPI_Communication> cptr;
auto peer = plane_to_peer.find(plane);
if (owned_plane_set.count(plane) > 0) {
// Mark this task as root (key==0)
cptr = std::shared_ptr<MPI_Communication>(comm->split(plane, 0));
if (ULTRA_VERBOSE)
ctx.format("Data for plane %d is present here.", plane);
} else if (req_plane_set.find(plane) != req_plane_set.end()) {
// Mark this task as non root (key!=0)
cptr = std::shared_ptr<MPI_Communication>(comm->split(plane, 1));
if (ULTRA_VERBOSE)
ctx.format("Data for plane %d is NEEDED here.", plane);
} else {
// Ignore this one, but we have to run it nonetheless as split is a collective operation.
comm->split();
if (ULTRA_VERBOSE)
ctx.format("Ignore this process for plane %d.", plane);
}
if (cptr &&
cptr->size() <=
1) { // Should even be 2 , but then we have a rank problem later.
// We do not a need a new communicator for that in the end.
// This will reaffect cptr and frees the communicator we have just
// created.
cptr.reset();
if (ULTRA_VERBOSE)
ctx.format(
"Communicator has only one process for plane %d, reset.",
plane);
}
owned_plane_dispatch[plane] = cptr;
}
}
/**
* @brief Pre-allocate memory for synchronization.
*
* Warning! Previous memory is freed.
*
*/
void allocate() {
// Allocate memory for the ghost planes
for (auto plane : req_plane_set) {
if (!ghosts[plane])
ghosts[plane] = std::make_shared<U_ArrayType>(setupDims);
if (!ag_ghosts[plane])
ag_ghosts[plane] = std::make_shared<U_ArrayType>(setupDims);
}
}
/**
* @brief Release memory for synchronization
*
*/
void release() {
for (auto plane : req_plane_set) {
ghosts[plane].reset();
ag_ghosts[plane].reset();
}
}
/**
* Clear the internal ghost cache for the computation
* of the adjoint gradient.
*/
void clear_ghosts() {
for (auto &ag : ag_ghosts) {
array::fill(ag.second->get_array(), 0);
}
}
/**
* This creates a virtual contiguous array of all the planes that are
* requested and owned by the current task. There is a bit of overhead for
* each plane lookup so please use wisely by caching plane access.
*
* @param planes contiguous multi_array of planes to be synchronized. The
* the multi_array is assumed to range from min_local_plane
* to max_local_plane (according to the list given in
* setup).
* @param method a method to compute the synchronization
*/
void synchronize(
boost::multi_array_ref<T, (Nd + 1)> const &planes,
GhostMethod method = GHOST_COPY) {
// Synchronize operations with other members of comm
ConsoleContext<LOG_DEBUG> ctx("ghost synchronize");
RequestArray requests(boost::extents[maxPlaneId]);
StatusArray statuses(boost::extents[maxPlaneId]);
allocate();
// Check that the planes do have the correct shape
if (CHECK_DIMENSIONS) {
auto shape_in = planes.shape();
auto iter = ghosts.begin();
if (iter != ghosts.end()) {
auto shape_out = iter->second->get_array().shape();
for (size_t i = 1; i < Nd; i++) {
if (shape_in[i] != shape_out[i - 1]) {
error_helper<ErrorBadState>(
"Invalid dimensions of the array to synchronize (" +
to_string(shape_in[i]) +
" != " + to_string(shape_out[i - 1]) + ")");
}
}
}
}
for (size_t plane = 0; plane < maxPlaneId; plane++) {
auto iter = owned_plane_dispatch.find(plane);
if (iter != owned_plane_dispatch.end()) {
int num;
if (!iter->second) {
if (ULTRA_VERBOSE)
ctx.print("Empty communicator. Skip.");
continue;
}
if (req_plane_set.count(plane) == 0) {
//Console::instance().c_assert(plane >= idMin && plane < idMax, "Missing plane id for broadcasting");
auto one_plane = planes[plane];
T const *data =
one_plane
.origin(); // This assumes that index_bases is zero for dims > 1
num = one_plane.num_elements();
ctx.format("Send our plane %d (num=%d)", plane, num);
requests[plane] =
ghost_methods[method](iter->second.get(), data, num);
} else {
auto &one_plane = (ghosts[plane]->get_array());
auto data = one_plane.data();
num = one_plane.num_elements();
ctx.format(
"Receive some plane %d (num=%d), ptr=%p", plane, num,
(void *)data);
requests[plane] =
ghost_methods[method](iter->second.get(), data, num);
}
}
// If we do not have anything to exchange just skip the communication.
}
if (ULTRA_VERBOSE)
ctx.print("Wait for completion");
MPI_Communication::WaitAll(requests, statuses);
}
/**
* This function allows to compute an "adjoint gradient" of the ghost
* plane algorithm.
*
* @param ag_planes similar to synchronize, except that ag_planes is
* modified through communication with sibling nodes.
* @param method a method to compute the synchronization
* @see GhostMethod
*/
void synchronize_ag(
boost::multi_array_ref<T, (Nd + 1)> &ag_planes,
GhostMethod method = GHOST_COPY) {
// Synchronize operations with other members of comm
ConsoleContext<LOG_DEBUG> ctx(
"ghost synchronize_ag, maxPlaneId=" + to_string(maxPlaneId));
RequestArray requests(boost::extents[maxPlaneId]);
StatusArray statuses(boost::extents[maxPlaneId]);
std::vector<std::unique_ptr<T[]>> all_tmps;
for (size_t plane = 0; plane < maxPlaneId; plane++) {
auto iter = owned_plane_dispatch.find(plane);
if (iter != owned_plane_dispatch.end()) {
int num;
if (!iter->second) {
if (ULTRA_VERBOSE)
ctx.print("Empty communicator. Skip.");
continue;
}
if (req_plane_set.count(plane) == 0) {
//Console::instance().c_assert(plane >= idMin && plane < idMax, "Missing plane id for broadcasting");
auto one_plane = ag_planes[plane];
T *tmp_buf;
T *data =
one_plane
.origin(); // WARNING: This assumes that index_bases is zero for dims > 1
num = one_plane.num_elements();
ctx.format("Receive and reduce our plane %d (num=%d)", plane, num);
Console::instance().c_assert(
iter->second->rank() == 0,
"For reception, local rank has to be zero.");
all_tmps.push_back(std::unique_ptr<T[]>(tmp_buf = new T[num]));
LibLSS::copy_array_rv(
boost::multi_array_ref<T, 2>(
tmp_buf,
boost::extents[one_plane.shape()[0]][one_plane.shape()[1]]),
one_plane);
requests[plane] = ghost_methods_ag[method](
iter->second.get(), tmp_buf, data, num);
} else {
auto &one_plane = (ag_ghosts[plane]->get_array());
auto data = one_plane.data();
T *tmp_buf;
num = one_plane.num_elements();
ctx.format(
"Send and reduce some plane %d (num=%d), ptr=%p", plane, num,
(void *)data);
Console::instance().c_assert(
iter->second->rank() != 0,
"For sending, local rank must not be zero.");
all_tmps.push_back(std::unique_ptr<T[]>(tmp_buf = new T[num]));
LibLSS::copy_array_rv(
boost::multi_array_ref<T, 2>(
tmp_buf,
boost::extents[one_plane.shape()[0]][one_plane.shape()[1]]),
one_plane);
requests[plane] = ghost_methods_ag[method](
iter->second.get(), tmp_buf, data, num);
}
}
// If we do not have anything to exchange just skip the communication.
}
MPI_Communication::WaitAll(requests, statuses);
}
/**
* Return the adjoint gradient plane indicated by the parameter i.
* @param i plane of interest.
*/
ArrayType &ag_getPlane(size_t i) {
auto iter = ag_ghosts.find(i);
Console::instance().c_assert(
iter != ag_ghosts.end(), "Invalid ag ghost plane access");
return iter->second->get_array();
}
/**
* Return the ghost plane indicated by the parameter i.
* @param i plane of interest.
*/
ArrayType &getPlane(size_t i) {
auto iter = ghosts.find(i);
if (iter == ghosts.end()) {
Console::instance().print<LOG_ERROR>(
boost::format("no such ghost plane %d") % i);
error_helper<ErrorBadState>("Invalid ghost plane access");
}
return iter->second->get_array();
}
/**
* Return the ghost plane indicated by the parameter i.
* @param i plane of interest.
*/
ArrayType const &getPlane(size_t i) const {
auto iter = ghosts.find(i);
if (iter == ghosts.end()) {
Console::instance().print<LOG_ERROR>(
boost::format("no such ghost plane %d") % i);
error_helper<ErrorBadState>("Invalid ghost plane access");
}
return iter->second->get_array();
}
};
} // namespace LibLSS
#endif
// ARES TAG: num_authors = 1
// ARES TAG: name(0) = Guilhem Lavaux
// ARES TAG: year(0) = 2018-2020
// ARES TAG: email(0) = guilhem.lavaux@iap.fr

View file

@ -0,0 +1,224 @@
/*+
ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/symplectic_integrator.hpp
Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
Additional contributions from:
Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+*/
#ifndef __LIBLSS_SYMPLECTIC_INTEGRATOR_HPP
#define __LIBLSS_SYMPLECTIC_INTEGRATOR_HPP
#include <boost/multi_array.hpp>
#include "libLSS/tools/console.hpp"
#include "libLSS/tools/array_tools.hpp"
#include "libLSS/tools/fused_array.hpp"
#include "libLSS/tools/fusewrapper.hpp"
namespace LibLSS {
namespace SymplecticOption {
enum IntegratorScheme {
SI_2A,
SI_2B,
SI_2C,
SI_3A,
SI_4B,
SI_4C,
SI_4D,
SI_6A,
CG_89
};
typedef boost::multi_array<double, 2> IntegratorCoefficients;
template <int N>
static inline void
pushScheme(double coefs[2][N], IntegratorCoefficients &I_coefs) {
int Ncoefs = N;
I_coefs.resize(boost::extents[2][Ncoefs]);
for (int i = 0; i < Ncoefs; i++) {
I_coefs[0][i] = coefs[0][i];
I_coefs[1][i] = coefs[1][i];
}
}
}; // namespace SymplecticOption
struct SymplecticIntegrators {
typedef SymplecticOption::IntegratorCoefficients IntegratorCoefficients;
typedef SymplecticOption::IntegratorScheme IntegratorScheme;
IntegratorCoefficients I_coefs;
SymplecticIntegrators() { setIntegratorScheme(SymplecticOption::SI_2A); }
void setIntegratorScheme(IntegratorScheme scheme) {
using namespace SymplecticOption;
switch (scheme) {
case SI_2A: {
//si2a : standard leapfrog
double coefs[2][2] = {{0.5, 0.5}, {0.0, 1.0}};
pushScheme<2>(coefs, I_coefs);
break;
}
case SI_2B: {
//si2b : pseudo leapfrog
double coefs[2][2] = {{1.0, 0.0}, {0.5, 0.5}};
pushScheme<2>(coefs, I_coefs);
break;
}
case SI_2C: {
//si2c : optimal 2-stage
double coefs[2][2] = {{1.0 / sqrt(2.), 1.0 - 1.0 / sqrt(2.0)},
{1.0 - 1.0 / sqrt(2.0), 1.0 / sqrt(2.0)}};
pushScheme<2>(coefs, I_coefs);
break;
}
case SI_3A: {
//si3a : Ruth's third order method
double coefs[2][3] = {{2.0 / 3.0, -2.0 / 3.0, 1.0},
{7.0 / 24.0, 0.75, -1.0 / 24.0}};
pushScheme<3>(coefs, I_coefs);
break;
}
case SI_4B: {
//si4b : Calvo and Sanz-Serna's fourth order method
double coeffs[2][4] = {{0.515352837431122936, -0.085782019412973646,
0.441583023616466524, 0.128846158365384185},
{0.134496199277431089, -0.224819803079420806,
0.756320000515668291, 0.334003603286321425}};
pushScheme<4>(coeffs, I_coefs);
break;
}
case SI_4C: {
//si4c : McLachlan and Atela's optimal third order method
double coeffs[2][5] = {{0.205177661542290, 0.403021281604210,
-0.12092087633891, 0.512721933192410, 0.0},
{0.061758858135626, 0.33897802655364,
0.61479130717558, -0.14054801465937,
0.12501982279453}};
pushScheme<5>(coeffs, I_coefs);
break;
}
case SI_4D: {
//si4d : McLachlan and Atela's optimal third order method
double caux = pow(2., 1. / 3.);
double coeffs[2][4] = {
{0.5 / (2. - caux), 0.5 * (1.0 - caux) / (2. - caux),
0.5 * (1.0 - caux) / (2. - caux), 0.5 / (2. - caux)},
{0.0, 1.0 / (2. - caux), -caux / (2. - caux), 1.0 / (2. - caux)}};
pushScheme<4>(coeffs, I_coefs);
break;
}
case SI_6A: {
//si6a : Yoshida's sixth-order method
double caux = pow(2., 1. / 3.);
double coeffs[2][8] = {
{0.78451361047756, 0.23557321335936, -1.1776799841789,
1.3151863206839, 0., 0., 0., 0.},
{0.39225680523878, 0.51004341191846, -0.47105338540976,
0.068753168252520, 0., 0., 0., 0.}};
coeffs[0][4] = coeffs[0][2];
coeffs[0][5] = coeffs[0][1];
coeffs[0][6] = coeffs[0][0];
coeffs[1][4] = coeffs[1][3];
coeffs[1][5] = coeffs[1][2];
coeffs[1][6] = coeffs[1][1];
coeffs[1][7] = coeffs[1][0];
pushScheme<8>(coeffs, I_coefs);
break;
}
case CG_89: {
constexpr int const i = 4;
constexpr double const n = 2.;
double s = std::pow(2*i, 1/(n+1.));
double coeffs[2][4*i+2];
for (int j = 0; j < i; j++) {
coeffs[0][2*j] = 0.5;
coeffs[0][2*j+1] = 0.5;
coeffs[1][2*j] = 0.;
coeffs[1][2*j+1] = 1.;
}
coeffs[0][2*i] = -0.5*s;
coeffs[0][2*i+1] = -0.5*s;
coeffs[1][2*i] = 0;
coeffs[1][2*i+1] = -s;
int const base = 2*i+2;
for (int j = 0; j < i; j++) {
coeffs[0][base+2*j] = 0.5;
coeffs[0][base+2*j+1] = 0.5;
coeffs[1][base+2*j] = 0.;
coeffs[1][base+2*j+1] = 1.;
}
pushScheme<4*i+2>(coeffs, I_coefs);
break;
}
default:
error_helper<ErrorBadState>("Unknown integration scheme");
break;
}
}
template <
typename MassMatrix, typename GradientVector, typename MomentumVector,
typename PositionVector, typename GradientFunction>
void integrate_dense(
const GradientFunction &gradient, MassMatrix &&masses, double epsilon,
int Ntime, PositionVector &position, MomentumVector &momentum,
GradientVector &tmp_gradient) {
using boost::lambda::_1;
using boost::lambda::_2;
using boost::lambda::_3;
Console &cons = Console::instance();
Progress<LOG_INFO_SINGLE> &progress =
cons.start_progress<LOG_INFO_SINGLE>(
"doing Symplectic integration", Ntime, 10);
int Ncoefs = I_coefs.shape()[1];
for (int i_Time = 0; i_Time < Ntime; i_Time++) {
///the scheme depends on the chosen integrator order
for (int n = 0; n < Ncoefs; n++) {
double an = I_coefs[0][n] * epsilon;
double bn = I_coefs[1][n] * epsilon;
if (bn != 0) {
gradient(position, tmp_gradient);
// This is momentum update
fwrap(momentum) = fwrap(momentum) - fwrap(tmp_gradient) * bn;
}
// This is position update
fwrap(position) = fwrap(position) + masses(momentum, tmp_gradient) * an;
}
progress.update(i_Time);
}
progress.destroy();
}
template <
typename MassMatrix, typename GradientVector, typename MomentumVector,
typename PositionVector, typename GradientFunction>
void integrate(
const GradientFunction &gradient, MassMatrix &&masses, double epsilon,
int Ntime, PositionVector &position, MomentumVector &momentum,
GradientVector &tmp_gradient) {
auto mass_op = [&masses](MomentumVector const &m, auto&) {
return fwrap(m) * fwrap(masses);
};
integrate_dense(
gradient, mass_op, epsilon, Ntime, position, momentum, tmp_gradient);
}
};
}; // namespace LibLSS
#endif