Initial import

2023-05-29 10:41:03 +02:00 · 2023-05-29 10:41:03 +02:00 · 56a50eead3
commit 56a50eead3
820 changed files with 192077 additions and 0 deletions
--- a/extra/hades/libLSS/tools/hermiticity_fixup.cpp
+++ b/extra/hades/libLSS/tools/hermiticity_fixup.cpp
@ -0,0 +1,333 @@
+/*+
+    ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/hermiticity_fixup.cpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2019 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#include <set>
+#include <array>
+#include <algorithm>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/errors.hpp"
+#include "libLSS/tools/mpi_fftw_helper.hpp"
+#include "libLSS/samplers/core/types_samplers.hpp"
+#include "libLSS/tools/mpi/ghost_planes.hpp"
+#include "libLSS/tools/hermiticity_fixup.hpp"
+
+static constexpr bool ULTRA_VERBOSE = true;
+
+using namespace LibLSS;
+
+template <typename T, size_t Nd>
+Hermiticity_fixer<T, Nd>::Hermiticity_fixer(Mgr_p mgr_)
+    : comm(mgr_->getComm()), mgr(mgr_) {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+  std::set<ssize_t> wanted_planes, owned_planes;
+
+  std::array<ssize_t, Nd - 1> dims;
+  std::copy(mgr->N.begin() + 1, mgr->N.end(), dims.begin());
+  dims[Nd - 2] = dims[Nd - 2] / 2 + 1;
+
+  {
+    size_t i_min = mgr->startN0;
+    size_t i_max = mgr->startN0 + mgr->localN0;
+    ctx.format("own: i_min=%d, i_max=%d", i_min, i_max);
+    for (size_t i = i_min; i < i_max; i++) {
+      owned_planes.insert(i);
+    }
+  }
+
+  {
+    size_t i_min = std::max(size_t(mgr->startN0), mgr->N[0] / 2 + 1);
+    size_t i_max = mgr->startN0 + mgr->localN0;
+    ctx.format("want: i_min=%d, i_max=%d", i_min, i_max);
+    for (size_t i = i_min; i < i_max; i++) {
+      size_t conj_plane = mgr->N[0] - i;
+      if (!mgr->on_core(conj_plane)) {
+        wanted_planes.insert(conj_plane);
+      }
+    }
+  }
+
+  ghosts.setup(comm, wanted_planes, owned_planes, dims, mgr->N[0]);
+}
+
+template <size_t Nd>
+static ssize_t encode_index(
+    std::array<ssize_t, Nd> const &index, std::array<size_t, Nd> const &N) {
+  ssize_t ret = 0;
+
+  for (size_t i = 0; i < Nd; i++)
+    ret = ret * N[i] + index[i];
+  return ret;
+}
+
+template <size_t Nd>
+static void decode_index(
+    ssize_t coded_index, std::array<ssize_t, Nd> &decoded,
+    std::array<size_t, Nd> const &N) {
+  for (size_t i = Nd; i > 0; i--) {
+    size_t j = i - 1;
+    ssize_t tmp = coded_index / N[j];
+    ssize_t tmp2 = coded_index - tmp * N[j];
+    decoded[j] = tmp2;
+    coded_index = tmp;
+  }
+}
+
+template <size_t Nd>
+static void find_conjugate(
+    std::array<ssize_t, Nd> &reversed_index,
+    std::array<ssize_t, Nd> const &index, std::array<size_t, Nd> const &N) {
+  for (size_t i = 0; i < Nd; i++) {
+    if (index[i] == 0)
+      reversed_index[i] = 0;
+    else
+      reversed_index[i] = N[i] - index[i];
+  }
+}
+
+template <size_t Nd>
+static bool
+has_nyquist(std::array<ssize_t, Nd> &index, std::array<size_t, Nd> const &N) {
+  for (size_t i = 0; i < Nd; i++) {
+    if (index[i] == N[i] / 2 || index[i] == 0)
+      return true;
+  }
+  return false;
+}
+
+// ---------------------------------------------------------------------------
+//  Forward hermiticity fixer
+
+template <
+    size_t rank, typename Mgr, typename Ghosts, typename CArray,
+    size_t Dim = CArray::dimensionality>
+static typename std::enable_if<Dim == 1, void>::type
+fix_plane(Mgr &mgr, Ghosts &&ghosts, CArray &&a, size_t *N) {
+  std::array<size_t, 1> current_N = {N[0]};
+  size_t Ntot = N[0];
+  size_t N0_HC = N[0] / 2;
+
+#pragma omp parallel for
+  for (size_t i = 1; i < N0_HC; i++) {
+    size_t current, conj_current;
+    current = i;
+    conj_current = current_N[0] - i;
+    a[conj_current] = std::conj(a[current]);
+  }
+
+  for (size_t i : {size_t(0), N0_HC}) {
+    a[i].imag(0);
+  }
+}
+
+template <bool full, size_t Nd, typename AccessDirect, typename AccessConj>
+static void direct_fix(
+    std::array<size_t, Nd> const &current_N, AccessDirect &&direct_access,
+    AccessConj &&conj_access) {
+  size_t Ntot =
+      full ? array::product(current_N) / 2 : array::product(current_N);
+#pragma omp parallel for
+  for (size_t i = 0; i < Ntot; i++) {
+    std::array<ssize_t, Nd> current, conj_current;
+    decode_index(i, current, current_N);
+    //if (!has_nyquist(current, current_N))
+    {
+      find_conjugate(conj_current, current, current_N);
+      direct_access(current) = std::conj(conj_access(conj_current));
+    }
+  }
+}
+
+template <
+    size_t rank, typename Mgr, typename Ghosts, typename CArray,
+    size_t Dim = CArray::dimensionality>
+static typename std::enable_if<Dim != 1, void>::type
+fix_plane(Mgr &mgr, Ghosts &&ghosts, CArray &&a, size_t *N) {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+  std::array<size_t, Dim> current_N;
+  size_t Ntot = array::product(current_N);
+  size_t N0_HC = N[0] / 2;
+
+  std::copy(N, N + Dim, current_N.begin());
+
+  if (rank != 0) {
+    auto accessor = [&a](auto &&x) -> auto & { return a(x); };
+    direct_fix<true>(current_N, accessor, accessor);
+  } else if (mgr.startN0 + mgr.localN0 > N0_HC) {
+    size_t i_min = std::max(N0_HC, size_t(mgr.startN0));
+    size_t i_max = mgr.startN0 + mgr.localN0;
+    std::array<size_t, Dim - 1> sub_N;
+    std::copy(current_N.begin() + 1, current_N.end(), sub_N.begin());
+
+    ctx.format("i_min = %d, i_max = %d", i_min, i_max);
+
+    for (size_t i0 = i_min; i0 < i_max; i0++) {
+      size_t i0_conj = N[0] - i0;
+      auto this_plane = a[i0];
+      auto direct_access = [&this_plane](auto &&x) -> auto & {
+        return this_plane(x);
+      };
+
+      if (mgr.on_core(i0_conj)) {
+        auto conj_plane = a[i0_conj];
+        auto conj_direct_access = [&conj_plane](auto &&x) -> auto & {
+          return conj_plane(x);
+        };
+        direct_fix<false>(sub_N, direct_access, conj_direct_access);
+      } else {
+        ctx.format(" Fix plane %d using i0_conj=%d from remote", i0, i0_conj);
+        auto conj_plane = ghosts(i0_conj);
+        direct_fix<false>(
+            sub_N, direct_access, [&conj_plane](auto &&x) -> auto & {
+              return conj_plane(x);
+            });
+      }
+    }
+  }
+
+  if (rank != 0 || mgr.on_core(0))
+    fix_plane<rank + 1>(mgr, ghosts, a[0], N + 1);
+  if (rank != 0 || mgr.on_core(N0_HC))
+    fix_plane<rank + 1>(mgr, ghosts, a[N0_HC], N + 1);
+}
+
+template <typename T, size_t Nd>
+void Hermiticity_fixer<T, Nd>::forward(CArrayRef &a) {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+  // Grab the planes that is required to build the Nyquist correction
+  ghosts.synchronize(a);
+
+  std::array<size_t, Nd> N = mgr->N;
+  size_t N_HC = mgr->N_HC;
+
+  auto idx = array::make_star_indices<Nd - 1>(boost::indices);
+  auto idx_g = array::make_star_indices<Nd - 2>(boost::indices);
+  fix_plane<0>(
+      *mgr,
+      [this, idx_g, N_HC](ssize_t plane) {
+        return array::slice_array(ghosts.getPlane(plane), idx_g[0]);
+      },
+      array::slice_array(a, idx[0]), N.data());
+  fix_plane<0>(
+      *mgr,
+      [this, idx_g, N_HC](ssize_t plane) {
+        return array::slice_array(ghosts.getPlane(plane), idx_g[N_HC - 1]);
+      },
+      array::slice_array(a, idx[N_HC - 1]), N.data());
+
+  ghosts.release();
+}
+
+// ---------------------------------------------------------------------------
+// Adjoint gradient of hermiticity fixer
+
+template <
+    size_t rank, typename Mgr, typename CArray,
+    size_t Dim = CArray::dimensionality>
+static typename std::enable_if<Dim == 1, void>::type
+adjoint_fix_plane(Mgr &mgr, CArray &&a, size_t *N) {
+  std::array<size_t, 1> current_N = {N[0]};
+  size_t Ntot = N[0];
+  size_t N0_HC = N[0] / 2;
+
+#pragma omp parallel for
+  for (size_t i = 1; i < N0_HC; i++) {
+    size_t current, conj_current;
+    current = i;
+    conj_current = current_N[0] - i;
+    a[conj_current] = 0;
+  }
+
+  for (size_t i : {size_t(0), N0_HC}) {
+    auto &a0 = a[i];
+    a0.real(a0.real() * 0.5);
+    a0.imag(0);
+  }
+}
+
+template <bool full, size_t Nd, typename AccessDirect>
+static void adjoint_direct_fix(
+    std::array<size_t, Nd> const &current_N, AccessDirect &&direct_access) {
+  size_t const Ntot =
+      full ? array::product(current_N) / 2 : array::product(current_N);
+#pragma omp parallel for
+  for (size_t i = 0; i < Ntot; i++) {
+    std::array<ssize_t, Nd> current, conj_current;
+    decode_index(i, current, current_N);
+    //if (!has_nyquist(current, current_N))
+    {
+      find_conjugate(conj_current, current, current_N);
+      direct_access(conj_current) = 0;
+    }
+  }
+}
+
+template <
+    size_t rank, typename Mgr, typename CArray,
+    size_t Dim = CArray::dimensionality>
+static typename std::enable_if<Dim != 1, void>::type
+adjoint_fix_plane(Mgr &mgr, CArray &&a, size_t *N) {
+  LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+  std::array<size_t, Dim> current_N;
+  size_t Ntot = array::product(current_N);
+  size_t N0_HC = N[0] / 2;
+
+  std::copy(N, N + Dim, current_N.begin());
+
+  if (rank != 0) {
+    auto accessor = [&a](auto &&x) -> auto & { return a(x); };
+    adjoint_direct_fix<true>(current_N, accessor);
+  } else if (mgr.startN0 + mgr.localN0 > N0_HC) {
+    size_t i_min = std::max(N0_HC, size_t(mgr.startN0));
+    size_t i_max = mgr.startN0 + mgr.localN0;
+    std::array<size_t, Dim - 1> sub_N;
+    std::copy(current_N.begin() + 1, current_N.end(), sub_N.begin());
+
+    for (size_t i0 = i_min; i0 < i_max; i0++) {
+      auto this_plane = a[i0];
+      auto direct_access = [&this_plane](auto &&x) -> auto & {
+        return this_plane(x);
+      };
+
+      adjoint_direct_fix<false>(sub_N, direct_access);
+    }
+  }
+
+  if (rank != 0 || mgr.on_core(0))
+    adjoint_fix_plane<rank + 1>(mgr, a[0], N + 1);
+  if (rank != 0 || mgr.on_core(N0_HC))
+    adjoint_fix_plane<rank + 1>(mgr, a[N0_HC], N + 1);
+}
+
+template <typename T, size_t Nd>
+void Hermiticity_fixer<T, Nd>::adjoint(CArrayRef &a) {
+  // Grab the planes that is required to build the Nyquist correction
+
+  std::array<size_t, Nd> N = mgr->N;
+  size_t N_HC = mgr->N_HC;
+
+  fwrap(a) = fwrap(a) * 2.0;
+//  if (mgr->on_core(0))
+//    a[0][0][0] *= 0.5;
+
+  auto idx = array::make_star_indices<Nd - 1>(boost::indices);
+  adjoint_fix_plane<0>(*mgr, array::slice_array(a, idx[0]), N.data());
+  adjoint_fix_plane<0>(*mgr, array::slice_array(a, idx[N_HC - 1]), N.data());
+}
+
+//template struct LibLSS::Hermiticity_fixer<double, 1>;
+//template struct LibLSS::Hermiticity_fixer<double, 2>;
+template struct LibLSS::Hermiticity_fixer<double, 3>;
+
+// ARES TAG: authors_num = 2
+// ARES TAG: name(0) = Guilhem Lavaux
+// ARES TAG: email(0) = guilhem.lavaux@iap.fr
+// ARES TAG: year(0) = 2014-2020
+// ARES TAG: name(1) = Jens Jasche
+// ARES TAG: email(1) = jens.jasche@fysik.su.se
+// ARES TAG: year(1) = 2009-2019
--- a/extra/hades/libLSS/tools/hermiticity_fixup.hpp
+++ b/extra/hades/libLSS/tools/hermiticity_fixup.hpp
@ -0,0 +1,47 @@
+/*+
+    ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/hermiticity_fixup.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2019 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_TOOLS_HERMITICITY_FIXUP_HPP
+#  define __LIBLSS_TOOLS_HERMITICITY_FIXUP_HPP
+
+#  include <complex>
+#  include <boost/format.hpp>
+#  include "libLSS/tools/mpi_fftw_helper.hpp"
+#  include "libLSS/tools/mpi/ghost_planes.hpp"
+
+namespace LibLSS {
+
+  template <typename T, size_t Nd>
+  struct Hermiticity_fixer {
+    typedef FFTW_Manager<T, Nd> Mgr;
+    typedef std::shared_ptr<Mgr> Mgr_p;
+    typedef typename Mgr::U_ArrayFourier::array_type CArrayRef;
+
+    MPI_Communication *comm;
+    Mgr_p mgr;
+
+    GhostPlanes<std::complex<T>, Nd - 1> ghosts;
+
+    Hermiticity_fixer(Mgr_p mgr);
+
+    void forward(CArrayRef &a);
+    void adjoint(CArrayRef &a);
+  };
+
+} // namespace LibLSS
+
+#endif
+
+// ARES TAG: authors_num = 2
+// ARES TAG: name(0) = Guilhem Lavaux
+// ARES TAG: email(0) = guilhem.lavaux@iap.fr
+// ARES TAG: year(0) = 2014-2020
+// ARES TAG: name(1) = Jens Jasche
+// ARES TAG: email(1) = jens.jasche@fysik.su.se
+// ARES TAG: year(1) = 2009-2019
--- a/extra/hades/libLSS/tools/mpi/ghost_array.hpp
+++ b/extra/hades/libLSS/tools/mpi/ghost_array.hpp
@ -0,0 +1,231 @@
+/*+
+    ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/mpi/ghost_array.hpp
+    Copyright (C) 2018-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#pragma once
+#ifndef __LIBLSS_TOOLS_MPI_GHOST_ARRAY_HPP
+#  define __LIBLSS_TOOLS_MPI_GHOST_ARRAY_HPP
+
+#  include <set>
+#  include <map>
+#  include <memory>
+#  include "libLSS/tools/string_tools.hpp"
+#  include "libLSS/tools/uninitialized_type.hpp"
+#  include "libLSS/mpi/generic_mpi.hpp"
+#  include "libLSS/tools/array_tools.hpp"
+#  include "libLSS/tools/string_tools.hpp"
+#  include "libLSS/samplers/core/types_samplers.hpp"
+
+namespace LibLSS {
+
+  template <typename T>
+  struct GhostArrayTypes {
+    typedef LibLSS::U_Array<T, 1> U_ArrayType;
+    typedef typename U_ArrayType::array_type ArrayType;
+
+    typedef std::map<size_t, std::shared_ptr<U_ArrayType>> MapGhosts;
+  };
+
+  template <typename T>
+  class GhostArray : public GhostArrayTypes<T> {
+  protected:
+    static constexpr bool SUPER_VERBOSE = false;
+    typedef GhostArrayTypes<T> super;
+    typedef typename super::ArrayType ArrayType;
+    typedef typename super::U_ArrayType U_ArrayType;
+    typedef typename super::MapGhosts MapGhosts;
+
+    MPI_Communication *comm;
+
+    std::vector<boost::multi_array<T, 1>> exchangeIndexes;
+
+  public:
+    GhostArray() {}
+
+    /**
+     * @brief 
+     * 
+     * We assume that localIndexes are unique.
+     * 
+     * @param comm_ 
+     * @param localIndexes 
+     */
+    template <typename IndexSet>
+    void setup(MPI_Communication *comm_, IndexSet &&localIndexes) {
+      LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+      typedef typename std::remove_reference_t<IndexSet>::value_type indexType;
+      static_assert(
+          std::is_same<indexType, T>::value, "Index list must be of type T");
+      int localKeys = localIndexes.size();
+
+      comm = comm_;
+
+      // Serialize and send to peers
+      auto commSize = boost::extents[comm->size()];
+      boost::multi_array<indexType, 1> linearIndexes(boost::extents[localKeys]);
+      boost::multi_array<int, 1> allIndexCounts(commSize);
+      boost::multi_array<int, 1> displIndexes(commSize);
+
+      ctx.print("Transfer indexes to linear array");
+      std::copy(
+          localIndexes.begin(), localIndexes.end(), linearIndexes.begin());
+
+      ctx.print("Sort");
+      std::sort(linearIndexes.begin(), linearIndexes.end());
+
+      comm->all_gather_t(&localKeys, 1, allIndexCounts.data(), 1);
+
+      ctx.print("Compute global displacements");
+      int totalIndexes = 0, previousDispl = 0;
+      for (int i = 0; i < comm->size(); i++) {
+        totalIndexes += allIndexCounts[i];
+        displIndexes[i] = previousDispl;
+        previousDispl += allIndexCounts[i];
+      }
+
+      boost::multi_array<indexType, 1> allIndexes(boost::extents[totalIndexes]);
+      // FIXME: Try to reduce memory/bandwidth consumption with better distributed algorithm
+      ctx.print("Gather all relevant indexes");
+      comm->all_gatherv_t(
+          linearIndexes.data(), localKeys, allIndexes.data(),
+          allIndexCounts.data(), displIndexes.data());
+
+      std::set<indexType> localSet;
+
+      if (SUPER_VERBOSE)
+        ctx.format("Local indexes: %s", LibLSS::to_string(localIndexes));
+
+      ctx.print("Transfer local indexes to set for better intersection");
+      std::copy(
+          localIndexes.begin(), localIndexes.end(),
+          std::inserter(localSet, localSet.begin()));
+
+      exchangeIndexes.resize(comm->size());
+      for (int i = 0; i < comm->size(); i++) {
+        // Compute intersections with remote nodes
+        std::set<indexType> otherIndexes, interIndexes;
+
+        if (i == comm->rank())
+          continue;
+
+        for (int j = 0; j < allIndexCounts[i]; j++) {
+          otherIndexes.insert(allIndexes[j + displIndexes[i]]);
+        }
+        if (SUPER_VERBOSE)
+          ctx.format(
+              "Other indexes (count=%d): %s", allIndexCounts[i],
+              LibLSS::to_string(otherIndexes));
+
+        ctx.format("Intersect with rank=%d", i);
+        std::set_intersection(
+            localSet.begin(), localSet.end(), otherIndexes.begin(),
+            otherIndexes.end(),
+            std::inserter(interIndexes, interIndexes.begin()));
+
+        ctx.format("%d indexes in common", interIndexes.size());
+        exchangeIndexes[i].resize(boost::extents[interIndexes.size()]);
+        std::copy(
+            interIndexes.begin(), interIndexes.end(),
+            exchangeIndexes[i].begin());
+      }
+    }
+
+    /**
+       * @brief 
+       * 
+       * 
+       * 
+       * @tparam U 
+       * @tparam boost::multi_array_ref<U, 1> 
+       * @param data 
+       * @param indexToIndex how to map an index (from setup) to an index in the provided array
+       */
+    template <typename U, typename ReductionOperation, typename IndexMapper>
+    void synchronize(
+        boost::multi_array_ref<U, 1> &data, IndexMapper &&mapper,
+        ReductionOperation op) {
+      LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+      typedef LibLSS::U_Array<U, 1> TmpU;
+
+      std::vector<std::shared_ptr<TmpU>> allTmpSend;
+      std::vector<std::shared_ptr<TmpU>> allTmpRecv;
+      std::vector<MPICC_Request> allReq;
+
+      allTmpRecv.resize(comm->size());
+
+      for (int i = 0; i < comm->size(); i++) {
+        int numExchange = exchangeIndexes[i].size();
+        if (numExchange == 0) {
+          continue;
+        }
+
+        ctx.format("Send %d data -> %d", numExchange, i);
+        {
+          auto thisTmp = std::make_shared<TmpU>(boost::extents[numExchange]);
+          auto &tmpData = thisTmp->get_array();
+
+          allTmpSend.push_back(thisTmp);
+
+#  pragma omp parallel for
+          for (int j = 0; j < numExchange; j++) {
+            tmpData[j] = data[mapper(exchangeIndexes[i][j])];
+          }
+
+          allReq.push_back(comm->IsendT(tmpData.data(), tmpData.size(), i, i));
+        }
+        ctx.format("Recv %d data <- %d", numExchange, i);
+        {
+          auto thisTmp = std::make_shared<TmpU>(boost::extents[numExchange]);
+          auto &tmpData = thisTmp->get_array();
+
+          allTmpRecv[i] = thisTmp;
+
+          allReq.push_back(
+              comm->IrecvT(tmpData.data(), tmpData.size(), i, comm->rank()));
+        }
+      }
+      ctx.print("Wait IO completion");
+      comm->WaitAll(allReq);
+
+      allTmpSend.clear();
+
+      {
+        ConsoleContext<LOG_DEBUG> ctx("GhostArray local reduction");
+
+        // Now all data are in place, we must do partial reductions
+        for (int i = 0; i < comm->size(); i++) {
+          if (i == comm->rank())
+            continue;
+          int numExchange = exchangeIndexes[i].size();
+          if (numExchange == 0)
+            continue;
+
+          auto &inData = allTmpRecv[i]->get_array();
+
+#  pragma omp parallel for
+          for (int j = 0; j < numExchange; j++) {
+            op(data[mapper(exchangeIndexes[i][j])], inData[j]);
+          }
+        }
+      }
+    }
+
+    template <typename U, typename IndexMapper>
+    void synchronize(
+        boost::multi_array_ref<U, 1> &data, IndexMapper &&indexToIndex) {
+      synchronize<U>(
+          data, indexToIndex, [](auto &x, auto const &y) { x += y; });
+    }
+  };
+} // namespace LibLSS
+
+#endif
+// ARES TAG: num_authors = 1
+// ARES TAG: name(0) = Guilhem Lavaux
+// ARES TAG: year(0) = 2018-2020
+// ARES TAG: email(0) = guilhem.lavaux@iap.fr
--- a/extra/hades/libLSS/tools/mpi/ghost_planes.hpp
+++ b/extra/hades/libLSS/tools/mpi/ghost_planes.hpp
@ -0,0 +1,606 @@
+/*+
+    ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/mpi/ghost_planes.hpp
+    Copyright (C) 2018-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#pragma once
+#ifndef __LIBLSS_TOOLS_MPI_GHOST_PLANES_HPP
+#  define __LIBLSS_TOOLS_MPI_GHOST_PLANES_HPP
+
+#  include <map>
+#  include <memory>
+#  include "libLSS/tools/uninitialized_type.hpp"
+#  include "libLSS/mpi/generic_mpi.hpp"
+#  include "libLSS/tools/array_tools.hpp"
+#  include "libLSS/tools/string_tools.hpp"
+#  include "libLSS/samplers/core/types_samplers.hpp"
+
+namespace LibLSS {
+
+  /**
+   * This class provides some types to abbreviate the long array specification
+   * for ghost planes.
+   */
+  template <typename T, size_t Nd>
+  struct GhostPlaneTypes {
+    typedef boost::multi_array_ref<T, Nd> ArrayType;
+    typedef UninitializedArray<ArrayType> U_ArrayType;
+
+    typedef std::map<size_t, std::shared_ptr<U_ArrayType>> MapGhosts;
+  };
+
+  /**
+   * @file
+   * This enumeration allows to choose between different kind of "ghosts".
+   */
+  enum GhostMethod {
+    GHOST_COPY, ///< in synchronize mode, the plane is copied. In AG mode, it is accumulated.
+    GHOST_ACCUMULATE ///< in synchronize mode, the plane is accumulated. In AG mode, it is copied.
+  };
+
+  /**
+  * This class handles the generic problems of ghost planes management with MPI.
+  * The concept of ghost planes (and ghost particles in another module), comes
+  * from the distinction of which MPI task owns the plane and which task needs
+  * the plane to do further computation. A ghost plane is not designed to be an
+  * "active" plane on the node that needs it. Though there is a slight variant
+  * that may allow such things at the cost of a final synchronization.
+  *
+  * The work flow of using ghostplanes is the following:
+  *   GhostPlanes object creation
+  *   call setup method to indicate what are the provided data and requirements
+  *   do stuff
+  *   call synchronize before needing the ghost planes
+  *   use the ghost planes with getPlane()
+  *   Repeat synchronize if needed
+  *
+  * There is an adjoint gradient variant of the synchronization step which
+  * does sum reduction of the adjoint gradient arrays corresponding to the
+  * ghost planes.
+  *
+  */
+  template <typename T, size_t Nd>
+  struct GhostPlanes : GhostPlaneTypes<T, Nd> {
+    typedef GhostPlaneTypes<T, Nd> super;
+    typedef typename super::ArrayType ArrayType;
+    typedef typename super::U_ArrayType U_ArrayType;
+    typedef typename super::MapGhosts MapGhosts;
+
+  private:
+    static constexpr bool CHECK_DIMENSIONS = true;
+    static constexpr bool ULTRA_VERBOSE = false;
+    MPI_Communication *comm;
+    MapGhosts ghosts, ag_ghosts;
+    size_t maxPlaneId;
+    std::map<size_t, size_t> plane_peer;
+    std::array<size_t, Nd> setupDims;
+
+    typedef LibLSS::multi_array<int, 1> int_array;
+    typedef LibLSS::multi_array<int, 1> size_array;
+    typedef std::set<int> size_set;
+
+    LibLSS::multi_array<int, 1> other_requested_planes, other_requested_count,
+        other_requested_displ;
+
+    std::map<size_t, std::shared_ptr<MPI_Communication>> owned_plane_dispatch;
+    size_set req_plane_set;
+
+    template <typename PlaneSet>
+    inline size_array fill_with_planes(PlaneSet &&owned_planes) {
+      size_array plane_set(boost::extents[owned_planes.size()]);
+      size_t i = 0;
+      for (auto plane : owned_planes) {
+        plane_set[i] = plane;
+        i++;
+      }
+      return plane_set;
+    }
+
+    template <typename Array>
+    inline std::string array_to_str(Array const &s, char const *sep) {
+      std::ostringstream oss;
+      auto iter = s.begin();
+
+      if (iter == s.end())
+        return "";
+
+      oss << *iter;
+      ++iter;
+
+      while (iter != s.end()) {
+        oss << sep << *iter;
+        ++iter;
+      }
+      return oss.str();
+    }
+
+    template <typename PlaneSet>
+    inline void dispatch_plane_map(
+        PlaneSet &&owned_planes, int_array &other_planes,
+        int_array &other_planes_count, int_array &other_planes_displ) {
+      size_t cSize = comm->size();
+      auto e_cSize = boost::extents[cSize];
+      ConsoleContext<LOG_DEBUG> ctx("dispatch_plane_map");
+      int_array tmp_data(e_cSize), send_displ(e_cSize), send_count(e_cSize);
+
+      // Now find out which rank has the planes.
+      // Everybody send their planeset for that.
+      auto plane_set = fill_with_planes(owned_planes);
+      size_t Nplanes = plane_set.size();
+      array::fill(tmp_data, Nplanes);
+      array::fill(send_count, 1);
+
+      // Costly but we hopefully do it only once in a while.
+      // Get all the plane number count from everybody.
+      ctx.print("Dispatch our planeset, number is " + to_string(tmp_data));
+      comm->all2allT(tmp_data.data(), 1, other_planes_count.data(), 1);
+
+      for (size_t i = 1; i < comm->size(); i++) {
+        other_planes_displ[i] =
+            other_planes_displ[i - 1] + other_planes_count[i - 1];
+      }
+
+      size_t total_planes =
+          other_planes_displ[cSize - 1] + other_planes_count[cSize - 1];
+
+      ctx.print(boost::format("Total planes = %d") % total_planes);
+      other_planes.resize(boost::extents[total_planes]);
+
+      ctx.print(
+          boost::format("Now gather plane ids send_count=%s; send_displ=%s; "
+                        "other_planes_count=%s; other_planes_displ=%s") %
+          array_to_str(tmp_data, ",") % array_to_str(send_displ, ",") %
+          array_to_str(other_planes_count, ",") %
+          array_to_str(other_planes_displ, ","));
+      // Get plane id from everybody
+      comm->all2allv_t(
+          plane_set.data(), tmp_data.data(), send_displ.data(),
+          other_planes.data(), other_planes_count.data(),
+          other_planes_displ.data());
+      ctx.print(
+          boost::format("Got other task planeset: %s") %
+          array_to_str(other_planes, ","));
+    }
+
+    typedef std::map<size_t, std::list<size_t>> MapPlaneToPeer;
+
+    inline MapPlaneToPeer gather_peer_by_plane(
+        int_array const &required_planes,
+        int_array const &required_planes_count,
+        int_array const &required_planes_displ) {
+      MapPlaneToPeer plane_to_peer;
+      int peer = 0;
+      size_t cSize = comm->size();
+      size_t cRank = comm->rank();
+      ConsoleContext<LOG_DEBUG> ctx("gather_peer_by_plane");
+
+      for (size_t i = 0; i < required_planes.num_elements(); i++) {
+        if (peer + 1 < cSize && i >= required_planes_displ[peer + 1]) {
+          peer++;
+        }
+        ctx.print(
+            boost::format("Peer %d provides %d") % peer % required_planes[i]);
+        if (peer != cRank) {
+          plane_to_peer[required_planes[i]].push_back(peer);
+        }
+      }
+      return plane_to_peer;
+    }
+
+    static inline void null_destroy(void *) {}
+
+    std::map<
+        GhostMethod,
+        std::function<MPICC_Request(MPI_Communication *, T const *, int)>>
+        ghost_methods;
+    std::map<
+        GhostMethod,
+        std::function<MPICC_Request(MPI_Communication *, T *, T *, int)>>
+        ghost_methods_ag;
+
+    static MPICC_Request
+    ghost_copy_method(MPI_Communication *c, T const *data, int num) {
+      return c->IbroadcastT((T *)data, num, 0);
+    }
+
+    static MPICC_Request
+    ghost_accumulate_method(MPI_Communication *c, T const *data, int num) {
+      return c->IallReduceT((T *)MPI_IN_PLACE, (T *)data, num, MPI_SUM);
+    }
+
+    static MPICC_Request ghost_accumulate_method_ag(
+        MPI_Communication *c, T *indata, T const *data, int num) {
+      return c->IgatherT((T *)indata, num, (T *)data, num, 0);
+    }
+
+    static MPICC_Request
+    ghost_copy_method_ag(MPI_Communication *c, T *indata, T *data, int num) {
+      return c->IreduceT(indata, data, num, MPI_SUM, 0);
+    }
+
+  public:
+    /**
+     * Constructor.
+     */
+    GhostPlanes() {
+      ghost_methods[GHOST_COPY] = &ghost_copy_method;
+      ghost_methods[GHOST_ACCUMULATE] = &ghost_accumulate_method;
+      ghost_methods_ag[GHOST_COPY] = &ghost_copy_method_ag;
+      ghost_methods_ag[GHOST_ACCUMULATE] = &ghost_accumulate_method_ag;
+      std::fill(setupDims.begin(), setupDims.end(), 0);
+    }
+
+    /**
+     *  Return the current dimensions of the planes.
+     *
+     * @return A container with the dimensions.
+     */
+    auto const &dims() const { return setupDims; }
+
+    /**
+     * This function allows the user to change the dimensions of the planes.
+     *
+     * @params dims (N-1)-d dimension of each plane.
+     */
+    template <typename DimList>
+    void updatePlaneDims(DimList &&dims) {
+      LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+      auto i1 = dims.begin();
+      auto i2 = setupDims.begin();
+      int d = 0;
+      for (d = 0; d < Nd; d++) {
+        if (*i1 != *i2)
+          break;
+        ++i1;
+        ++i2;
+      }
+      // Everything already correct. Exit now.
+      if (d == Nd) {
+        ctx.print("No change needed.");
+        return;
+      }
+
+      ctx.format("New shape is %dx%d", dims[0], dims[1]);
+
+      for (auto &g : ghosts) {
+        if (g.second)
+          g.second.reset(); //reshape(dims);
+      }
+      for (auto &g : ag_ghosts) {
+        if (g.second)
+          g.second.reset(); //reshape(dims);
+      }
+      std::copy(dims.begin(), dims.end(), setupDims.begin());
+    }
+
+    /**
+      * This function setups the ghost plane object for usage. It can be called
+      * several times, in that case the previous setup is forgotten and a new
+      * one is initiated.
+      *
+      * @param mpi MPI Communicator with the same topology as the planes
+      * @param planes a list of planes that are required from other nodes. The
+      *               list must be a sort of container.
+      * @param owned_planes a list of the planes that are owned by the current
+      *                     node.
+      * @param dims dimensions of the planes (barring the first one, i.e. 2D if
+      *              the entire set is 3D))
+      * @param maxPlaneId_ this is convenience to avoid a global communication
+      *            to figure out what is the maximum id of the considered
+      *            planes.
+      */
+    template <typename PlaneList, typename PlaneSet, typename DimList>
+    void setup(
+        MPI_Communication *comm_, PlaneList &&planes, PlaneSet &&owned_planes,
+        DimList &&dims, size_t maxPlaneId_) {
+      LIBLSS_AUTO_DEBUG_CONTEXT(ctx);
+      size_t cSize = comm_->size();
+      auto e_cSize = boost::extents[cSize];
+      int_array other_planes, other_planes_count(e_cSize),
+          other_planes_displ(e_cSize);
+      size_set owned_plane_set;
+      //        required_planes, required_planes_count,
+      //        required_planes_displ;
+
+      maxPlaneId = maxPlaneId_;
+      req_plane_set = size_set(planes.begin(), planes.end());
+      owned_plane_set = size_set(owned_planes.begin(), owned_planes.end());
+
+      ghosts.clear();
+      comm = comm_;
+      std::copy(dims.begin(), dims.end(), setupDims.begin());
+
+      // Create a map betwen requested planes and peers.
+      dispatch_plane_map(
+          owned_planes, other_planes, other_planes_count, other_planes_displ);
+
+      // Now we know about the requirements of other peer for own set of planes
+      auto plane_to_peer = gather_peer_by_plane(
+          //        required_planes, required_planes_count, required_planes_displ
+          other_planes, other_planes_count, other_planes_displ);
+
+      ctx.print("Required planes: " + to_string(req_plane_set));
+      ctx.print("Owned planes: " + to_string(owned_plane_set));
+
+      for (size_t plane = 0; plane < maxPlaneId; plane++) {
+        std::shared_ptr<MPI_Communication> cptr;
+        auto peer = plane_to_peer.find(plane);
+
+        if (owned_plane_set.count(plane) > 0) {
+          // Mark this task as root (key==0)
+          cptr = std::shared_ptr<MPI_Communication>(comm->split(plane, 0));
+          if (ULTRA_VERBOSE)
+            ctx.format("Data for plane %d is present here.", plane);
+        } else if (req_plane_set.find(plane) != req_plane_set.end()) {
+          // Mark this task as non root (key!=0)
+          cptr = std::shared_ptr<MPI_Communication>(comm->split(plane, 1));
+          if (ULTRA_VERBOSE)
+            ctx.format("Data for plane %d is NEEDED here.", plane);
+        } else {
+          // Ignore this one, but we have to run it nonetheless as split is a collective operation.
+          comm->split();
+          if (ULTRA_VERBOSE)
+            ctx.format("Ignore this process for plane %d.", plane);
+        }
+        if (cptr &&
+            cptr->size() <=
+                1) { // Should even be 2 , but then we have a rank problem later.
+          // We do not a need a new communicator for that in the end.
+          // This will reaffect cptr and frees the communicator we have just
+          // created.
+          cptr.reset();
+          if (ULTRA_VERBOSE)
+            ctx.format(
+                "Communicator has only one process for plane %d, reset.",
+                plane);
+        }
+        owned_plane_dispatch[plane] = cptr;
+      }
+    }
+
+    /**
+     * @brief Pre-allocate memory for synchronization.
+     * 
+     * Warning! Previous memory is freed.
+     * 
+     */
+    void allocate() {
+      // Allocate memory for the ghost planes
+      for (auto plane : req_plane_set) {
+        if (!ghosts[plane])
+          ghosts[plane] = std::make_shared<U_ArrayType>(setupDims);
+        if (!ag_ghosts[plane])
+          ag_ghosts[plane] = std::make_shared<U_ArrayType>(setupDims);
+      }
+    }
+
+    /**
+     * @brief Release memory for synchronization
+     * 
+     */
+    void release() {
+      for (auto plane : req_plane_set) {
+        ghosts[plane].reset();
+        ag_ghosts[plane].reset();
+      }
+    }
+
+    /**
+     * Clear the internal ghost cache for the computation
+     * of the adjoint gradient.
+     */
+    void clear_ghosts() {
+      for (auto &ag : ag_ghosts) {
+        array::fill(ag.second->get_array(), 0);
+      }
+    }
+
+    /**
+      * This creates a virtual contiguous array of all the planes that are
+      * requested and owned by the current task. There is a bit of overhead for
+      * each plane lookup so please use wisely by caching plane access.
+      *
+      * @param planes  contiguous multi_array of planes to be synchronized. The
+      *                the multi_array is assumed to range from min_local_plane
+      *                to max_local_plane (according to the list given in
+      *                setup).
+      * @param method  a method to compute the synchronization
+      */
+    void synchronize(
+        boost::multi_array_ref<T, (Nd + 1)> const &planes,
+        GhostMethod method = GHOST_COPY) {
+      // Synchronize operations with other members of comm
+      ConsoleContext<LOG_DEBUG> ctx("ghost synchronize");
+      RequestArray requests(boost::extents[maxPlaneId]);
+      StatusArray statuses(boost::extents[maxPlaneId]);
+
+      allocate();
+
+      // Check that the planes do have the correct shape
+      if (CHECK_DIMENSIONS) {
+        auto shape_in = planes.shape();
+        auto iter = ghosts.begin();
+
+        if (iter != ghosts.end()) {
+          auto shape_out = iter->second->get_array().shape();
+
+          for (size_t i = 1; i < Nd; i++) {
+            if (shape_in[i] != shape_out[i - 1]) {
+              error_helper<ErrorBadState>(
+                  "Invalid dimensions of the array to synchronize (" +
+                  to_string(shape_in[i]) +
+                  " != " + to_string(shape_out[i - 1]) + ")");
+            }
+          }
+        }
+      }
+
+      for (size_t plane = 0; plane < maxPlaneId; plane++) {
+        auto iter = owned_plane_dispatch.find(plane);
+        if (iter != owned_plane_dispatch.end()) {
+          int num;
+
+          if (!iter->second) {
+            if (ULTRA_VERBOSE)
+              ctx.print("Empty communicator. Skip.");
+            continue;
+          }
+
+          if (req_plane_set.count(plane) == 0) {
+            //Console::instance().c_assert(plane >= idMin && plane < idMax, "Missing plane id for broadcasting");
+            auto one_plane = planes[plane];
+            T const *data =
+                one_plane
+                    .origin(); // This assumes that index_bases is zero for dims > 1
+            num = one_plane.num_elements();
+            ctx.format("Send our plane %d (num=%d)", plane, num);
+            requests[plane] =
+                ghost_methods[method](iter->second.get(), data, num);
+          } else {
+            auto &one_plane = (ghosts[plane]->get_array());
+            auto data = one_plane.data();
+            num = one_plane.num_elements();
+            ctx.format(
+                "Receive some plane %d (num=%d), ptr=%p", plane, num,
+                (void *)data);
+            requests[plane] =
+                ghost_methods[method](iter->second.get(), data, num);
+          }
+        }
+        // If we do not have anything to exchange just skip the communication.
+      }
+
+      if (ULTRA_VERBOSE)
+        ctx.print("Wait for completion");
+      MPI_Communication::WaitAll(requests, statuses);
+    }
+
+    /**
+      * This function allows to compute an "adjoint gradient" of the ghost
+      * plane algorithm.
+      *
+      * @param ag_planes similar to synchronize, except that ag_planes is
+      *                  modified through communication with sibling nodes.
+      * @param method  a method to compute the synchronization
+      * @see GhostMethod
+      */
+    void synchronize_ag(
+        boost::multi_array_ref<T, (Nd + 1)> &ag_planes,
+        GhostMethod method = GHOST_COPY) {
+      // Synchronize operations with other members of comm
+      ConsoleContext<LOG_DEBUG> ctx(
+          "ghost synchronize_ag, maxPlaneId=" + to_string(maxPlaneId));
+      RequestArray requests(boost::extents[maxPlaneId]);
+      StatusArray statuses(boost::extents[maxPlaneId]);
+      std::vector<std::unique_ptr<T[]>> all_tmps;
+
+      for (size_t plane = 0; plane < maxPlaneId; plane++) {
+        auto iter = owned_plane_dispatch.find(plane);
+        if (iter != owned_plane_dispatch.end()) {
+          int num;
+
+          if (!iter->second) {
+            if (ULTRA_VERBOSE)
+              ctx.print("Empty communicator. Skip.");
+            continue;
+          }
+
+          if (req_plane_set.count(plane) == 0) {
+            //Console::instance().c_assert(plane >= idMin && plane < idMax, "Missing plane id for broadcasting");
+            auto one_plane = ag_planes[plane];
+            T *tmp_buf;
+            T *data =
+                one_plane
+                    .origin(); // WARNING: This assumes that index_bases is zero for dims > 1
+            num = one_plane.num_elements();
+            ctx.format("Receive and reduce our plane %d (num=%d)", plane, num);
+            Console::instance().c_assert(
+                iter->second->rank() == 0,
+                "For reception, local rank has to be zero.");
+            all_tmps.push_back(std::unique_ptr<T[]>(tmp_buf = new T[num]));
+            LibLSS::copy_array_rv(
+                boost::multi_array_ref<T, 2>(
+                    tmp_buf,
+                    boost::extents[one_plane.shape()[0]][one_plane.shape()[1]]),
+                one_plane);
+            requests[plane] = ghost_methods_ag[method](
+                iter->second.get(), tmp_buf, data, num);
+          } else {
+            auto &one_plane = (ag_ghosts[plane]->get_array());
+            auto data = one_plane.data();
+            T *tmp_buf;
+
+            num = one_plane.num_elements();
+            ctx.format(
+                "Send and reduce some plane %d (num=%d), ptr=%p", plane, num,
+                (void *)data);
+            Console::instance().c_assert(
+                iter->second->rank() != 0,
+                "For sending, local rank must not be zero.");
+            all_tmps.push_back(std::unique_ptr<T[]>(tmp_buf = new T[num]));
+            LibLSS::copy_array_rv(
+                boost::multi_array_ref<T, 2>(
+                    tmp_buf,
+                    boost::extents[one_plane.shape()[0]][one_plane.shape()[1]]),
+                one_plane);
+            requests[plane] = ghost_methods_ag[method](
+                iter->second.get(), tmp_buf, data, num);
+          }
+        }
+        // If we do not have anything to exchange just skip the communication.
+      }
+
+      MPI_Communication::WaitAll(requests, statuses);
+    }
+
+    /**
+      * Return the adjoint gradient plane indicated by the parameter i.
+      * @param i plane of interest.
+      */
+    ArrayType &ag_getPlane(size_t i) {
+      auto iter = ag_ghosts.find(i);
+      Console::instance().c_assert(
+          iter != ag_ghosts.end(), "Invalid ag ghost plane access");
+      return iter->second->get_array();
+    }
+
+    /**
+      * Return the ghost plane indicated by the parameter i.
+      * @param i plane of interest.
+      */
+    ArrayType &getPlane(size_t i) {
+      auto iter = ghosts.find(i);
+      if (iter == ghosts.end()) {
+        Console::instance().print<LOG_ERROR>(
+            boost::format("no such ghost plane %d") % i);
+        error_helper<ErrorBadState>("Invalid ghost plane access");
+      }
+      return iter->second->get_array();
+    }
+
+    /**
+      * Return the ghost plane indicated by the parameter i.
+      * @param i plane of interest.
+      */
+    ArrayType const &getPlane(size_t i) const {
+      auto iter = ghosts.find(i);
+      if (iter == ghosts.end()) {
+        Console::instance().print<LOG_ERROR>(
+            boost::format("no such ghost plane %d") % i);
+        error_helper<ErrorBadState>("Invalid ghost plane access");
+      }
+      return iter->second->get_array();
+    }
+  };
+
+} // namespace LibLSS
+
+#endif
+// ARES TAG: num_authors = 1
+// ARES TAG: name(0) = Guilhem Lavaux
+// ARES TAG: year(0) = 2018-2020
+// ARES TAG: email(0) = guilhem.lavaux@iap.fr
--- a/extra/hades/libLSS/tools/symplectic_integrator.hpp
+++ b/extra/hades/libLSS/tools/symplectic_integrator.hpp
@ -0,0 +1,224 @@
+/*+
+    ARES/HADES/BORG Package -- ./extra/hades/libLSS/tools/symplectic_integrator.hpp
+    Copyright (C) 2014-2020 Guilhem Lavaux <guilhem.lavaux@iap.fr>
+    Copyright (C) 2009-2020 Jens Jasche <jens.jasche@fysik.su.se>
+
+    Additional contributions from:
+       Guilhem Lavaux <guilhem.lavaux@iap.fr> (2023)
+    
+*/
+#ifndef __LIBLSS_SYMPLECTIC_INTEGRATOR_HPP
+#define __LIBLSS_SYMPLECTIC_INTEGRATOR_HPP
+
+#include <boost/multi_array.hpp>
+#include "libLSS/tools/console.hpp"
+#include "libLSS/tools/array_tools.hpp"
+#include "libLSS/tools/fused_array.hpp"
+#include "libLSS/tools/fusewrapper.hpp"
+
+namespace LibLSS {
+
+  namespace SymplecticOption {
+    enum IntegratorScheme {
+      SI_2A,
+      SI_2B,
+      SI_2C,
+      SI_3A,
+      SI_4B,
+      SI_4C,
+      SI_4D,
+      SI_6A,
+      CG_89
+    };
+
+    typedef boost::multi_array<double, 2> IntegratorCoefficients;
+
+    template <int N>
+    static inline void
+    pushScheme(double coefs[2][N], IntegratorCoefficients &I_coefs) {
+      int Ncoefs = N;
+
+      I_coefs.resize(boost::extents[2][Ncoefs]);
+      for (int i = 0; i < Ncoefs; i++) {
+        I_coefs[0][i] = coefs[0][i];
+        I_coefs[1][i] = coefs[1][i];
+      }
+    }
+
+  }; // namespace SymplecticOption
+
+  struct SymplecticIntegrators {
+    typedef SymplecticOption::IntegratorCoefficients IntegratorCoefficients;
+    typedef SymplecticOption::IntegratorScheme IntegratorScheme;
+    IntegratorCoefficients I_coefs;
+
+    SymplecticIntegrators() { setIntegratorScheme(SymplecticOption::SI_2A); }
+
+    void setIntegratorScheme(IntegratorScheme scheme) {
+      using namespace SymplecticOption;
+
+      switch (scheme) {
+      case SI_2A: {
+        //si2a : standard leapfrog
+        double coefs[2][2] = {{0.5, 0.5}, {0.0, 1.0}};
+        pushScheme<2>(coefs, I_coefs);
+        break;
+      }
+      case SI_2B: {
+        //si2b : pseudo leapfrog
+        double coefs[2][2] = {{1.0, 0.0}, {0.5, 0.5}};
+        pushScheme<2>(coefs, I_coefs);
+        break;
+      }
+      case SI_2C: {
+        //si2c : optimal 2-stage
+        double coefs[2][2] = {{1.0 / sqrt(2.), 1.0 - 1.0 / sqrt(2.0)},
+                              {1.0 - 1.0 / sqrt(2.0), 1.0 / sqrt(2.0)}};
+        pushScheme<2>(coefs, I_coefs);
+        break;
+      }
+      case SI_3A: {
+        //si3a : Ruth's third order method
+        double coefs[2][3] = {{2.0 / 3.0, -2.0 / 3.0, 1.0},
+                              {7.0 / 24.0, 0.75, -1.0 / 24.0}};
+        pushScheme<3>(coefs, I_coefs);
+        break;
+      }
+      case SI_4B: {
+        //si4b : Calvo and Sanz-Serna's fourth order method
+        double coeffs[2][4] = {{0.515352837431122936, -0.085782019412973646,
+                                0.441583023616466524, 0.128846158365384185},
+                               {0.134496199277431089, -0.224819803079420806,
+                                0.756320000515668291, 0.334003603286321425}};
+        pushScheme<4>(coeffs, I_coefs);
+        break;
+      }
+      case SI_4C: {
+        //si4c : McLachlan and Atela's optimal third order method
+        double coeffs[2][5] = {{0.205177661542290, 0.403021281604210,
+                                -0.12092087633891, 0.512721933192410, 0.0},
+                               {0.061758858135626, 0.33897802655364,
+                                0.61479130717558, -0.14054801465937,
+                                0.12501982279453}};
+
+        pushScheme<5>(coeffs, I_coefs);
+        break;
+      }
+      case SI_4D: {
+        //si4d : McLachlan and Atela's optimal third order method
+        double caux = pow(2., 1. / 3.);
+        double coeffs[2][4] = {
+            {0.5 / (2. - caux), 0.5 * (1.0 - caux) / (2. - caux),
+             0.5 * (1.0 - caux) / (2. - caux), 0.5 / (2. - caux)},
+            {0.0, 1.0 / (2. - caux), -caux / (2. - caux), 1.0 / (2. - caux)}};
+
+        pushScheme<4>(coeffs, I_coefs);
+        break;
+      }
+      case SI_6A: {
+        //si6a : Yoshida's sixth-order method
+        double caux = pow(2., 1. / 3.);
+        double coeffs[2][8] = {
+            {0.78451361047756, 0.23557321335936, -1.1776799841789,
+             1.3151863206839, 0., 0., 0., 0.},
+            {0.39225680523878, 0.51004341191846, -0.47105338540976,
+             0.068753168252520, 0., 0., 0., 0.}};
+        coeffs[0][4] = coeffs[0][2];
+        coeffs[0][5] = coeffs[0][1];
+        coeffs[0][6] = coeffs[0][0];
+
+        coeffs[1][4] = coeffs[1][3];
+        coeffs[1][5] = coeffs[1][2];
+        coeffs[1][6] = coeffs[1][1];
+        coeffs[1][7] = coeffs[1][0];
+
+        pushScheme<8>(coeffs, I_coefs);
+        break;
+      }
+      case CG_89: {
+        constexpr int const i = 4;
+        constexpr double const n = 2.;
+        double s = std::pow(2*i, 1/(n+1.));
+        double coeffs[2][4*i+2];
+        for (int j = 0; j < i; j++) {
+          coeffs[0][2*j] = 0.5;
+          coeffs[0][2*j+1] = 0.5;
+          coeffs[1][2*j] = 0.;
+          coeffs[1][2*j+1] = 1.;
+        }
+        coeffs[0][2*i] = -0.5*s;
+        coeffs[0][2*i+1] = -0.5*s;
+        coeffs[1][2*i] = 0;
+        coeffs[1][2*i+1] = -s;
+        int const base = 2*i+2;
+        for (int j = 0; j < i; j++) {
+          coeffs[0][base+2*j] = 0.5;
+          coeffs[0][base+2*j+1] = 0.5;
+          coeffs[1][base+2*j] = 0.;
+          coeffs[1][base+2*j+1] = 1.;
+        }
+        pushScheme<4*i+2>(coeffs, I_coefs);
+        break;
+      }
+      default:
+        error_helper<ErrorBadState>("Unknown integration scheme");
+        break;
+      }
+    }
+
+    template <
+        typename MassMatrix, typename GradientVector, typename MomentumVector,
+        typename PositionVector, typename GradientFunction>
+    void integrate_dense(
+        const GradientFunction &gradient, MassMatrix &&masses, double epsilon,
+        int Ntime, PositionVector &position, MomentumVector &momentum,
+        GradientVector &tmp_gradient) {
+
+      using boost::lambda::_1;
+      using boost::lambda::_2;
+      using boost::lambda::_3;
+      Console &cons = Console::instance();
+      Progress<LOG_INFO_SINGLE> &progress =
+          cons.start_progress<LOG_INFO_SINGLE>(
+              "doing Symplectic integration", Ntime, 10);
+
+      int Ncoefs = I_coefs.shape()[1];
+      for (int i_Time = 0; i_Time < Ntime; i_Time++) {
+
+        ///the scheme depends on the chosen integrator order
+        for (int n = 0; n < Ncoefs; n++) {
+          double an = I_coefs[0][n] * epsilon;
+          double bn = I_coefs[1][n] * epsilon;
+
+          if (bn != 0) {
+            gradient(position, tmp_gradient);
+            // This is momentum update
+            fwrap(momentum) = fwrap(momentum) - fwrap(tmp_gradient) * bn;
+          }
+          // This is position update
+          fwrap(position) = fwrap(position) + masses(momentum, tmp_gradient) * an;
+        }
+        progress.update(i_Time);
+      }
+
+      progress.destroy();
+    }
+
+    template <
+        typename MassMatrix, typename GradientVector, typename MomentumVector,
+        typename PositionVector, typename GradientFunction>
+    void integrate(
+        const GradientFunction &gradient, MassMatrix &&masses, double epsilon,
+        int Ntime, PositionVector &position, MomentumVector &momentum,
+        GradientVector &tmp_gradient) {
+      auto mass_op = [&masses](MomentumVector const &m, auto&) {
+        return fwrap(m) * fwrap(masses);
+      };
+      integrate_dense(
+          gradient, mass_op, epsilon, Ntime, position, momentum, tmp_gradient);
+    }
+  };
+
+}; // namespace LibLSS
+
+#endif